[Pkg-javascript-commits] [node-entities] 41/63: moved code around, use new maps, removed {en, de}codeHTML4
Wolfgang Borgert
debacle at moszumanska.debian.org
Mon Sep 22 08:15:47 UTC 2014
This is an automated email from the git hooks/post-receive script.
debacle pushed a commit to branch master
in repository node-entities.
commit 56c8b5e56a2694d866104b5e6878928d37a89e46
Author: fb55 <me at feedic.com>
Date: Mon Mar 17 20:27:30 2014 +0100
moved code around, use new maps, removed {en,de}codeHTML4
HTML4 methods are aliased as their HTML5 equivalent
---
compile.js | 86 ++++++++++++++++++++-----------------------------
decode.js | 72 +++++++++++++++++++++++++++++++++++++++++
index.js | 106 +++++++++++--------------------------------------------------
3 files changed, 126 insertions(+), 138 deletions(-)
diff --git a/compile.js b/compile.js
index d547fa1..726f860 100644
--- a/compile.js
+++ b/compile.js
@@ -1,64 +1,48 @@
-var modes = ["XML", "HTML4", "HTML5"];
+var inverseXML = getInverseObj(require("./entities/xml.json")),
+ xmlReplacer = getInverseReplacer(inverseXML);
-modes.reduce(function(prev, name, i){
- var obj = require("./entities/" + name.toLowerCase() + ".json");
+exports.XML = getInverse(inverseXML, xmlReplacer);
- if(prev){
- Object.keys(prev).forEach(function(name){
- obj[name] = prev[name];
- });
- }
-
- var inverse = getInverse(obj);
+var inverseHTML = getInverseObj(require("./entities/entities.json")),
+ htmlReplacer = getInverseReplacer(inverseHTML);
- module.exports[name] = {
- strict: getStrictReplacer(obj),
- //there is no non-strict mode for XML
- normal: i === 0 ? null : getReplacer(obj),
- inverse: getInverseReplacer(inverse),
- inverseObj: inverse,
- obj: obj
- };
+exports.HTML = getInverse(inverseHTML, htmlReplacer);
- return obj;
-}, null);
-
-function sortDesc(a, b){
- return a < b ? 1 : -1;
+function getInverseObj(obj){
+ return Object.keys(obj).sort().reduce(function(inverse, name){
+ inverse[obj[name]] = name + ";";
+ return inverse;
+ }, {});
}
-function getReplacer(obj){
- var keys = Object.keys(obj).sort(sortDesc);
- var re = keys.join("|")//.replace(/(\w+);\|\1/g, "$1;?");
-
- // also match hex and char codes
- re += "|#[xX][\\da-fA-F]+;?|#\\d+;?";
-
- return new RegExp("&(?:" + re + ")", "g");
+function getInverseReplacer(inverse){
+ return new RegExp("\\" + Object.keys(inverse).sort().join("|\\"), "g");
}
-function getStrictReplacer(obj){
- var keys = Object.keys(obj).sort(sortDesc).filter(RegExp.prototype.test, /;$/);
- var re = keys.map(function(name){
- return name.slice(0, -1); //remove trailing semicolon
- }).join("|");
+var re_nonASCII = /[^\0-\x7F]/g,
+ re_astralSymbols = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
- // also match hex and char codes
- re += "|#[xX][\\da-fA-F]+|#\\d+";
-
- return new RegExp("&(?:" + re + ");", "g");
+function nonUTF8Replacer(c){
+ return "&#x" + c.charCodeAt(0).toString(16).toUpperCase() + ";";
}
-function getInverse(obj){
- return Object.keys(obj).filter(function(name){
- //prefer identifiers with a semicolon
- return name.substr(-1) === ";" || obj[name + ";"] !== obj[name];
- }).reduce(function(inverse, name){
- inverse[obj[name]] = name;
- return inverse;
- }, {});
+function astralReplacer(c){
+ // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
+ var high = c.charCodeAt(0);
+ var low = c.charCodeAt(1);
+ var codePoint = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000;
+ return "&#x" + codePoint.toString(16).toUpperCase() + ";";
}
-function getInverseReplacer(inverse){
- return new RegExp("\\" + Object.keys(inverse).sort().join("|\\"), "g");
-}
+function getInverse(inverse, re){
+ function func(name){
+ return "&" + inverse[name];
+ }
+
+ return function(data){
+ return data
+ .replace(re, func)
+ .replace(re_astralSymbols, astralReplacer)
+ .replace(re_nonASCII, nonUTF8Replacer);
+ };
+}
\ No newline at end of file
diff --git a/decode.js b/decode.js
new file mode 100644
index 0000000..ebf586d
--- /dev/null
+++ b/decode.js
@@ -0,0 +1,72 @@
+var entityMap = require("./entities/entities.json"),
+ legacyMap = require("./entities/legacy.json"),
+ xmlMap = require("./entities/xml.json"),
+ decodeCodePoint = require("./decode_codepoint.js");
+
+var decodeXMLStrict = getStrictDecoder(xmlMap),
+ decodeHTMLStrict = getStrictDecoder(entityMap);
+
+function getStrictDecoder(map){
+ var keys = Object.keys(map).join("|"),
+ replace = getReplacer(map);
+
+ keys += "|#[xX][\\da-fA-F]+|#\\d+";
+
+ var re = new RegExp("&(?:" + keys + ");", "g");
+
+ return function(str){
+ return String(str).replace(re, replace);
+ };
+}
+
+var decodeHTML = (function(){
+ var legacy = Object.keys(legacyMap)
+ .sort(sorter);
+
+ var keys = Object.keys(entityMap)
+ .sort(sorter);
+
+ for(var i = 0, j = 0; i < keys.length; i++){
+ if(legacy[j] === keys[i]){
+ keys[i] += ";?";
+ j++;
+ } else {
+ keys[i] += ";";
+ }
+ }
+
+ var re = new RegExp("&(?:" + keys.join("|") + "|#[xX][\\da-fA-F]+;?|#\\d+;?)", "g"),
+ replace = getReplacer(entityMap);
+
+ function replacer(str){
+ if(str.substr(-1) !== ";") str += ";";
+ return replace(str);
+ }
+
+ //TODO consider creating a merged map
+ return function(str){
+ return String(str).replace(re, replacer);
+ };
+}());
+
+function sorter(a, b){
+ return a < b ? 1 : -1;
+}
+
+function getReplacer(map){
+ return function replace(str){
+ if(str.charAt(1) === "#"){
+ if(str.charAt(2) === "X" || str.charAt(2) === "x"){
+ return decodeCodePoint(parseInt(str.substr(3), 16));
+ }
+ return decodeCodePoint(parseInt(str.substr(2), 10));
+ }
+ return map[str.slice(1, -1)];
+ };
+}
+
+module.exports = {
+ XML: decodeXMLStrict,
+ HTML: decodeHTML,
+ HTMLStrict: decodeHTMLStrict
+};
\ No newline at end of file
diff --git a/index.js b/index.js
index 8470541..f8d1737 100644
--- a/index.js
+++ b/index.js
@@ -1,99 +1,31 @@
-var compiled = require("./compile.js"),
- modes = ["XML", "HTML4", "HTML5"];
-
-var levels = modes.map(function(name, i){
- var obj = compiled[name],
- strict = genReplaceFunc(obj.strict, getStrictReplacer(obj.obj)),
- //there is no non-strict mode for XML
- normal = i === 0 ? strict : genReplaceFunc(obj.normal, getReplacer(obj.obj)),
- inverse = getInverse(obj.inverseObj, obj.inverse);
-
- exports["decode" + name + "Strict"] = strict;
- exports["decode" + name] = normal;
- exports["encode" + name] = inverse;
-
- return {
- strict: strict,
- normal: normal,
- inverse: inverse
- };
-});
-
-var decode = levels.map(function(l){ return l.normal; }),
- decodeStrict = levels.map(function(l){ return l.strict; }),
- encode = levels.map(function(l){ return l.inverse; });
+var encode = require("./compile.js"),
+ decode = require("./decode.js");
exports.decode = function(data, level){
- if(!(level >= 0 && level < 3)) level = 0;
- return decode[level](data);
+ return (!level || level <= 0 ? decode.XML : decode.HTML)(data);
};
+
exports.decodeStrict = function(data, level){
- if(!(level >= 0 && level < 3)) level = 0;
- return decodeStrict[level](data);
+ return (!level || level <= 0 ? decode.XML : decode.HTMLStrict)(data);
};
+
exports.encode = function(data, level){
- if(!(level >= 0 && level < 3)) level = 0;
- return encode[level](data);
+ return (!level || level <= 0 ? encode.XML : encode.HTML)(data);
};
-function getReplacer(obj){
- return function normalReplacer(name){
- if(name.charAt(1) === "#"){
- if(name.charAt(2).toLowerCase() === "x"){
- return codePointToSymbol(parseInt(name.substr(3), 16));
- }
- return codePointToSymbol(parseInt(name.substr(2), 10));
- }
- return obj[name.substr(1)];
- };
-}
-
-function codePointToSymbol(entity){
- return String.fromCharCode(entity); //TODO
-}
-
-function getStrictReplacer(obj){
- return function strictReplacer(name){
- if(name.charAt(1) === "#"){
- if(name.charAt(2).toLowerCase() === "x"){
- return String.fromCharCode(parseInt(name.substr(3), 16));
- }
- return String.fromCharCode(parseInt(name.substr(2), 10));
- }
- return obj[name.substr(1)];
- };
-}
-
-var re_nonASCII = /[^\0-\x7F]/g,
- re_astralSymbols = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
-
-function nonUTF8Replacer(c){
- return "&#x" + c.charCodeAt(0).toString(16).toUpperCase() + ";";
-}
+exports.encodeXML = encode.XML;
-function astralReplacer(c){
- // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
- var high = c.charCodeAt(0);
- var low = c.charCodeAt(1);
- var codePoint = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000;
- return "&#x" + codePoint.toString(16).toUpperCase() + ";";
-}
+exports.encodeHTML4 =
+exports.encodeHTML5 =
+exports.encodeHTML = encode.HTML;
-function getInverse(inverse, re){
- function func(name){
- return "&" + inverse[name];
- }
+exports.decodeXML =
+exports.decodeXMLStrict = decode.XML;
- return function(data){
- return data
- .replace(re, func)
- .replace(re_astralSymbols, astralReplacer)
- .replace(re_nonASCII, nonUTF8Replacer);
- };
-}
+exports.decodeHTML4 =
+exports.decodeHTML5 =
+exports.decodeHTML = decode.HTML;
-function genReplaceFunc(regex, func){
- return function(data){
- return data.replace(regex, func);
- };
-}
+exports.decodeHTML4Strict =
+exports.decodeHTML5Strict =
+exports.decodeHTMLStrict = decode.HTMLStrict;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-entities.git
More information about the Pkg-javascript-commits
mailing list