[Pkg-javascript-commits] [node-iconv-lite] 04/83: for Simple-Chinese lanuage, gbk and gb2312 encoding is supported

matthew pideil mpideil-guest at moszumanska.debian.org
Tue Apr 1 19:56:45 UTC 2014


This is an automated email from the git hooks/post-receive script.

mpideil-guest pushed a commit to branch master
in repository node-iconv-lite.

commit e559c6b9cbbdf1213ab65d9ed06a7626098d215f
Author: Jinwu Zhan <jenkinv at 163.com>
Date:   Wed Nov 23 15:02:53 2011 +0800

    for Simple-Chinese lanuage, gbk and gb2312 encoding is supported
---
 encodings/gbk.js       |  8 ++++++
 encodings/table/gbk.js |  1 +
 index.js               | 66 ++++++++++++++++++++++++++++++++++++++++++++++++--
 package.json           |  3 ++-
 test/gbk-test.js       | 30 +++++++++++++++++++++++
 test/gbkFile.txt       | 14 +++++++++++
 6 files changed, 119 insertions(+), 3 deletions(-)

diff --git a/encodings/gbk.js b/encodings/gbk.js
new file mode 100644
index 0000000..56883a4
--- /dev/null
+++ b/encodings/gbk.js
@@ -0,0 +1,8 @@
+var gbkTable = require(__dirname + '/table/gbk.js');
+module.exports = {
+	'gb2312': 'gbk',
+	'gbk': {
+		type: 'table',
+		table: gbkTable
+	}
+}
diff --git a/encodings/table/gbk.js b/encodings/table/gbk.js
new file mode 100644
index 0000000..3692a15
--- /dev/null
+++ b/encodings/table/gbk.js
@@ -0,0 +1 @@
+module.exports={33088:19970,33089:19972,33090:19973,33091:19974,33092:19983,33093:19986,33094:19991,33095:19999,33096:20000,33097:20001,33098:20003,33099:20006,33100:20009,33101:20014,33102:20015,33103:20017,33104:20019,33105:20021,33106:20023,33107:20028,33108:20032,33109:20033,33110:20034,33111:20036,33112:20038,33113:20042,33114:20049,33115:20053,33116:20055,33117:20058,33118:20059,33119:20066,33120:20067,33121:20068,33122:20069,33123:20071,33124:20072,33125:20074,33126:20075,33127:20 [...]
\ No newline at end of file
diff --git a/index.js b/index.js
index 36c3c17..5f03f5a 100644
--- a/index.js
+++ b/index.js
@@ -113,12 +113,74 @@ module.exports = iconv = {
                 },
             };
         },
+
+        // Codepage double-byte encodings.
+        table: function(options) {
+            if (!options.table) {
+                throw new Error("Encoding '" + options.type +"' has incorect 'table' option");
+            }
+            var table = options.table, key,
+                revCharsTable = {};
+            for (key in table) {
+                revCharsTable[table[key]] = parseInt(key);
+            }
+            return {
+                toEncoding: function(str) {
+                    str = ensureString(str);
+                    var len = 0, strLen = str.length;
+                    for (var i = 0; i < strLen; i++) {
+                        if (!!(str.charCodeAt(i) >> 8)) {
+                            len += 2;
+                        } else {
+                            len ++;
+                        }
+                    }
+                    var newBuf = new Buffer(len);
+                    for (var i = 0, j = 0; i < strLen; i++) {
+                        var unicode = str.charCodeAt(i);
+                        if (!!(unicode >> 7)) {
+                            var gbkcode = revCharsTable[unicode] || 0xA1F0;//not found in table ,replace it
+                            newBuf[j++] = gbkcode >> 8;//high byte;
+                            newBuf[j++] = gbkcode & 0xFF;//low byte
+                        } else {//ascii
+                            newBuf[j++] = unicode;
+                        }
+                    }
+                    return newBuf;
+                },
+                fromEncoding: function(buf) {
+                    buf = ensureBuffer(buf);
+                    var idx = 0, len = 0,
+                        newBuf = new Buffer(len*2),unicode,gbkcode;
+                    for (var i = 0, _len = buf.length; i < _len; i++, len++) {
+                        if (!!(buf[i] & 0x80)) {//the high bit is 1, so this byte is gbkcode's high byte.skip next byte
+                            i++;
+                        }
+                    }
+                    var newBuf = new Buffer(len*2);
+                    for (var i = 0, j = 0, _len = buf.length; i < _len; i++, j++) {
+                        var temp = buf[i], gbkcode, unicode;
+                        if (temp & 0x80) {
+                            gbkcode = (temp << 8) + buf[++i];
+                            unicode = table[gbkcode] || iconv.defaultCharUnicode.charCodeAt(0);//not found in table, replace with defaultCharUnicode
+                        }else {
+                            unicode = temp;
+                        }
+                        newBuf[j*2] = unicode & 0xFF;//low byte
+                        newBuf[j*2+1] = unicode >> 8;//high byte
+                    }
+                    return newBuf.toString('ucs2');
+                }
+            }
+        },
     },
 }
 
 // Load other encodings from files in /encodings dir.
-var encodingsDir = __dirname+"/encodings/";
-require('fs').readdirSync(encodingsDir).forEach(function(file) {
+var encodingsDir = __dirname+"/encodings/",
+    fs = require('fs');
+fs.readdirSync(encodingsDir).forEach(function(file) {
+    if(fs.statSync(encodingsDir + file).isDirectory()) return;
     var encodings = require(encodingsDir + file)
     for (var key in encodings)
         iconv.encodings[key] = encodings[key]
diff --git a/package.json b/package.json
index 6266c9c..4222638 100644
--- a/package.json
+++ b/package.json
@@ -15,6 +15,7 @@
         "node": ">=0.4.0"
     },
     "devDependencies": {
-        "vows": ""
+        "vows": "",
+        "iconv": "",
     }
 }
diff --git a/test/gbk-test.js b/test/gbk-test.js
new file mode 100644
index 0000000..a40b4cc
--- /dev/null
+++ b/test/gbk-test.js
@@ -0,0 +1,30 @@
+var vows    = require('vows'),
+    fs      = require('fs'),
+    assert  = require('assert'),
+    iconv   = require(__dirname+'/../');
+
+var testString = "中国abc",//unicode contains GBK-code and ascii
+    testStringGBKBuffer = new Buffer([0xd6,0xd0,0xb9,0xfa,0x61,0x62,0x63]);
+
+vows.describe("GBK tests").addBatch({
+    "Vows is working": function() {},
+    "Return values are of correct types": function() {
+        assert.ok(iconv.toEncoding(testString, "utf8") instanceof Buffer);        
+        var s = iconv.fromEncoding(new Buffer(testString), "utf8");
+        assert.strictEqual(Object.prototype.toString.call(s), "[object String]");
+    },
+    "GBK correctly encoded/decoded": function() {    
+        assert.strictEqual(iconv.toEncoding(testString, "GBK").toString('binary'), testStringGBKBuffer.toString('binary'));
+        assert.strictEqual(iconv.fromEncoding(testStringGBKBuffer, "GBK"), testString);
+    },
+    "GB2312 correctly encoded/decoded": function() {    
+        assert.strictEqual(iconv.toEncoding(testString, "GB2312").toString('binary'), testStringGBKBuffer.toString('binary'));
+        assert.strictEqual(iconv.fromEncoding(testStringGBKBuffer, "GB2312"), testString);
+    },
+    "GBK file read decoded,compare with iconv result": function() {
+        var contentBuffer = fs.readFileSync("gbkFile.txt");
+        var str = iconv.fromEncoding(contentBuffer, "GBK");
+        var iconvc = new (require('iconv').Iconv)('GBK','utf8');
+        assert.strictEqual(iconvc.convert(contentBuffer).toString(), str);
+    },
+}).export(module)
diff --git a/test/gbkFile.txt b/test/gbkFile.txt
new file mode 100644
index 0000000..345b5d0
--- /dev/null
+++ b/test/gbkFile.txt
@@ -0,0 +1,14 @@
+<!doctype html><html><head><meta http-equiv="Content-Type" content="text/html;charset=gb2312"><title>�ٶ�һ�£����֪��      </title><style>html{overflow-y:auto}body{font:12px arial;text-align:center;background:#fff}body,p,form,ul,li{margin:0;padding:0;list-style:none}body,form,#fm{position:relative}td{text-align:left}img{border:0}a{color:#00c}a:active{color:#f60}#u{padding:7px 10px 3px 0;text-align:right}#m{width:680px;margin:0 auto}#nv{font-size:16px;margin:0 0 4px;text-align:left;text-inde [...]
+</head>
+
+<body><div id="u"><a href="http://www.baidu.com/gaoji/preferences.html" name="tj_setting">��������</a> | <a href="http://passport.baidu.com/?login&tpl=mn" name="tj_login">��¼</a></div>
+<div id="m"><p id="lg"><img src="http://www.baidu.com/img/baidu_sylogo1.gif" width="270" height="129" usemap="#mp"><map name="mp"><area shape="rect" coords="40,25,230,95" href="http://hi.baidu.com/baidu/" target="_blank" title="��˽��� �ٶȵĿռ�" ></map></p><p id="nv"><a href="http://news.baidu.com">�� ��</a>��<b>�� ҳ</b>��<a href="http://tieba.baidu.com">�� ��</a>��<a href="http://zhidao.baidu.com">֪ ��</a>��<a href="http://mp3.baidu.com">MP3</a>��<a href="http://image.b [...]
+<p id="lk"><a href="http://hi.baidu.com">�ռ�</a>��<a href="http://baike.baidu.com">�ٿ�</a>��<a href="http://www.hao123.com">hao123</a><span> | <a href="/more/">����>></a></span></p><p id="lm"></p><p><a id="seth" onClick="this.setHomePage('http://www.baidu.com')" href="http://utility.baidu.com/traf/click.php?id=215&url=http://www.baidu.com" onmousedown="return ns_c({'fm':'behs','tab':'homepage','pos':0})">�Ѱٶ���Ϊ��ҳ</a><a id="setf" onClick="fa(this)" href="javascript:void(0)" onmous [...]
+<p id="lh"><a href="http://e.baidu.com/?refer=888">����ٶ��ƹ�</a> | <a href="http://top.baidu.com">�������ư�</a> | <a href="http://home.baidu.com">���ڰٶ�</a> | <a href="http://ir.baidu.com">About Baidu</a></p><p id="cp">©2011 Baidu <a href="/duty/">ʹ�ðٶ�ǰ�ض�</a> <a href="http://www.miibeian.gov.cn" target="_blank">��ICP֤030173��</a> <img src="http://gimg.baidu.com/img/gs.gif"></p></div></body>
+
+<script>var w=window,d=document,n=navigator,k=d.f.wd,a=d.getElementById("nv").getElementsByTagName("a"),isIE=n.userAgent.indexOf("MSIE")!=-1&&!window.opera;for(var i=0;i<a.length;i++){a[i].onclick=function(){if(k.value.length>0){var C=this,A=C.href,B=encodeURIComponent(k.value);if(A.indexOf("q=")!=-1){C.href=A.replace(/q=[^&\x24]*/,"q="+B)}else{this.href+="?q="+B}}}}(function(){if(/q=([^&]+)/.test(location.search)){k.value=decodeURIComponent(RegExp["\x241"])}})();if(n.cookieEnabled&&!/su [...]
+
+
+<script type="text/javascript" src="http://www.baidu.com/cache/hps/js/hps-1.2.js"></script>
+
+</html><!--b762345d979562e8-->
\ No newline at end of file

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-iconv-lite.git



More information about the Pkg-javascript-commits mailing list