[Pkg-javascript-commits] [node-entities] 41/63: moved code around, use new maps, removed {en, de}codeHTML4

Wolfgang Borgert debacle at moszumanska.debian.org
Mon Sep 22 08:15:47 UTC 2014


This is an automated email from the git hooks/post-receive script.

debacle pushed a commit to branch master
in repository node-entities.

commit 56c8b5e56a2694d866104b5e6878928d37a89e46
Author: fb55 <me at feedic.com>
Date:   Mon Mar 17 20:27:30 2014 +0100

    moved code around, use new maps, removed {en,de}codeHTML4
    
    HTML4 methods are aliased as their HTML5 equivalent
---
 compile.js |  86 ++++++++++++++++++++-----------------------------
 decode.js  |  72 +++++++++++++++++++++++++++++++++++++++++
 index.js   | 106 +++++++++++--------------------------------------------------
 3 files changed, 126 insertions(+), 138 deletions(-)

diff --git a/compile.js b/compile.js
index d547fa1..726f860 100644
--- a/compile.js
+++ b/compile.js
@@ -1,64 +1,48 @@
-var modes = ["XML", "HTML4", "HTML5"];
+var inverseXML = getInverseObj(require("./entities/xml.json")),
+    xmlReplacer = getInverseReplacer(inverseXML);
 
-modes.reduce(function(prev, name, i){
-	var obj = require("./entities/" + name.toLowerCase() + ".json");
+exports.XML = getInverse(inverseXML, xmlReplacer);
 
-	if(prev){
-		Object.keys(prev).forEach(function(name){
-			obj[name] = prev[name];
-		});
-	}
-
-	var inverse = getInverse(obj);
+var inverseHTML = getInverseObj(require("./entities/entities.json")),
+    htmlReplacer = getInverseReplacer(inverseHTML);
 
-	module.exports[name] = {
-		strict: getStrictReplacer(obj),
-		//there is no non-strict mode for XML
-		normal: i === 0 ? null : getReplacer(obj),
-		inverse: getInverseReplacer(inverse),
-		inverseObj: inverse,
-		obj: obj
-	};
+exports.HTML = getInverse(inverseHTML, htmlReplacer);
 
-	return obj;
-}, null);
-
-function sortDesc(a, b){
-	return a < b ? 1 : -1;
+function getInverseObj(obj){
+	return Object.keys(obj).sort().reduce(function(inverse, name){
+		inverse[obj[name]] = name + ";";
+		return inverse;
+	}, {});
 }
 
-function getReplacer(obj){
-	var keys = Object.keys(obj).sort(sortDesc);
-	var re = keys.join("|")//.replace(/(\w+);\|\1/g, "$1;?");
-
-	// also match hex and char codes
-	re += "|#[xX][\\da-fA-F]+;?|#\\d+;?";
-
-	return new RegExp("&(?:" + re + ")", "g");
+function getInverseReplacer(inverse){
+	return new RegExp("\\" + Object.keys(inverse).sort().join("|\\"), "g");
 }
 
-function getStrictReplacer(obj){
-	var keys = Object.keys(obj).sort(sortDesc).filter(RegExp.prototype.test, /;$/);
-	var re = keys.map(function(name){
-		return name.slice(0, -1); //remove trailing semicolon
-	}).join("|");
+var re_nonASCII = /[^\0-\x7F]/g,
+    re_astralSymbols = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
 
-	// also match hex and char codes
-	re += "|#[xX][\\da-fA-F]+|#\\d+";
-
-	return new RegExp("&(?:" + re + ");", "g");
+function nonUTF8Replacer(c){
+	return "&#x" + c.charCodeAt(0).toString(16).toUpperCase() + ";";
 }
 
-function getInverse(obj){
-	return Object.keys(obj).filter(function(name){
-		//prefer identifiers with a semicolon
-		return name.substr(-1) === ";" || obj[name + ";"] !== obj[name];
-	}).reduce(function(inverse, name){
-		inverse[obj[name]] = name;
-		return inverse;
-	}, {});
+function astralReplacer(c){
+	// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
+	var high = c.charCodeAt(0);
+	var low  = c.charCodeAt(1);
+	var codePoint = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000;
+	return "&#x" + codePoint.toString(16).toUpperCase() + ";";
 }
 
-function getInverseReplacer(inverse){
-	return new RegExp("\\" + Object.keys(inverse).sort().join("|\\"), "g");
-}
+function getInverse(inverse, re){
+	function func(name){
+		return "&" + inverse[name];
+	}
+
+	return function(data){
+		return data
+				.replace(re, func)
+				.replace(re_astralSymbols, astralReplacer)
+				.replace(re_nonASCII, nonUTF8Replacer);
+	};
+}
\ No newline at end of file
diff --git a/decode.js b/decode.js
new file mode 100644
index 0000000..ebf586d
--- /dev/null
+++ b/decode.js
@@ -0,0 +1,72 @@
+var entityMap = require("./entities/entities.json"),
+    legacyMap = require("./entities/legacy.json"),
+    xmlMap    = require("./entities/xml.json"),
+    decodeCodePoint = require("./decode_codepoint.js");
+
+var decodeXMLStrict  = getStrictDecoder(xmlMap),
+    decodeHTMLStrict = getStrictDecoder(entityMap);
+
+function getStrictDecoder(map){
+	var keys = Object.keys(map).join("|"),
+	    replace = getReplacer(map);
+
+	keys += "|#[xX][\\da-fA-F]+|#\\d+";
+
+	var re = new RegExp("&(?:" + keys + ");", "g");
+
+	return function(str){
+		return String(str).replace(re, replace);
+	};
+}
+
+var decodeHTML = (function(){
+	var legacy = Object.keys(legacyMap)
+		.sort(sorter);
+
+	var keys = Object.keys(entityMap)
+		.sort(sorter);
+
+	for(var i = 0, j = 0; i < keys.length; i++){
+		if(legacy[j] === keys[i]){
+			keys[i] += ";?";
+			j++;
+		} else {
+			keys[i] += ";";
+		}
+	}
+
+	var re = new RegExp("&(?:" + keys.join("|") + "|#[xX][\\da-fA-F]+;?|#\\d+;?)", "g"),
+	    replace = getReplacer(entityMap);
+
+	function replacer(str){
+		if(str.substr(-1) !== ";") str += ";";
+		return replace(str);
+	}
+
+	//TODO consider creating a merged map
+	return function(str){
+		return String(str).replace(re, replacer);
+	};
+}());
+
+function sorter(a, b){
+	return a < b ? 1 : -1;
+}
+
+function getReplacer(map){
+	return function replace(str){
+		if(str.charAt(1) === "#"){
+			if(str.charAt(2) === "X" || str.charAt(2) === "x"){
+				return decodeCodePoint(parseInt(str.substr(3), 16));
+			}
+			return decodeCodePoint(parseInt(str.substr(2), 10));
+		}
+		return map[str.slice(1, -1)];
+	};
+}
+
+module.exports = {
+	XML: decodeXMLStrict,
+	HTML: decodeHTML,
+	HTMLStrict: decodeHTMLStrict
+};
\ No newline at end of file
diff --git a/index.js b/index.js
index 8470541..f8d1737 100644
--- a/index.js
+++ b/index.js
@@ -1,99 +1,31 @@
-var compiled = require("./compile.js"),
-    modes = ["XML", "HTML4", "HTML5"];
-
-var levels = modes.map(function(name, i){
-	var obj = compiled[name],
-	    strict = genReplaceFunc(obj.strict, getStrictReplacer(obj.obj)),
-	    //there is no non-strict mode for XML
-	    normal = i === 0 ? strict : genReplaceFunc(obj.normal, getReplacer(obj.obj)),
-	    inverse = getInverse(obj.inverseObj, obj.inverse);
-
-	exports["decode" + name + "Strict"] = strict;
-	exports["decode" + name] = normal;
-	exports["encode" + name] = inverse;
-
-	return {
-		strict:  strict,
-		normal:  normal,
-		inverse: inverse
-	};
-});
-
-var decode = levels.map(function(l){ return l.normal; }),
-    decodeStrict = levels.map(function(l){ return l.strict; }),
-    encode = levels.map(function(l){ return l.inverse; });
+var encode = require("./compile.js"),
+    decode = require("./decode.js");
 
 exports.decode = function(data, level){
-	if(!(level >= 0 && level < 3)) level = 0;
-	return decode[level](data);
+	return (!level || level <= 0 ? decode.XML : decode.HTML)(data);
 };
+
 exports.decodeStrict = function(data, level){
-	if(!(level >= 0 && level < 3)) level = 0;
-	return decodeStrict[level](data);
+	return (!level || level <= 0 ? decode.XML : decode.HTMLStrict)(data);
 };
+
 exports.encode = function(data, level){
-	if(!(level >= 0 && level < 3)) level = 0;
-	return encode[level](data);
+	return (!level || level <= 0 ? encode.XML : encode.HTML)(data);
 };
 
-function getReplacer(obj){
-	return function normalReplacer(name){
-		if(name.charAt(1) === "#"){
-			if(name.charAt(2).toLowerCase() === "x"){
-				return codePointToSymbol(parseInt(name.substr(3), 16));
-			}
-			return codePointToSymbol(parseInt(name.substr(2), 10));
-		}
-		return obj[name.substr(1)];
-	};
-}
-
-function codePointToSymbol(entity){
-	return String.fromCharCode(entity); //TODO
-}
-
-function getStrictReplacer(obj){
-	return function strictReplacer(name){
-		if(name.charAt(1) === "#"){
-			if(name.charAt(2).toLowerCase() === "x"){
-				return String.fromCharCode(parseInt(name.substr(3), 16));
-			}
-			return String.fromCharCode(parseInt(name.substr(2), 10));
-		}
-		return obj[name.substr(1)];
-	};
-}
-
-var re_nonASCII = /[^\0-\x7F]/g,
-    re_astralSymbols = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
-
-function nonUTF8Replacer(c){
-	return "&#x" + c.charCodeAt(0).toString(16).toUpperCase() + ";";
-}
+exports.encodeXML = encode.XML;
 
-function astralReplacer(c){
-	// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
-	var high = c.charCodeAt(0);
-	var low  = c.charCodeAt(1);
-	var codePoint = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000;
-	return "&#x" + codePoint.toString(16).toUpperCase() + ";";
-}
+exports.encodeHTML4 =
+exports.encodeHTML5 =
+exports.encodeHTML  = encode.HTML;
 
-function getInverse(inverse, re){
-	function func(name){
-		return "&" + inverse[name];
-	}
+exports.decodeXML =
+exports.decodeXMLStrict = decode.XML;
 
-	return function(data){
-		return data
-				.replace(re, func)
-				.replace(re_astralSymbols, astralReplacer)
-				.replace(re_nonASCII, nonUTF8Replacer);
-	};
-}
+exports.decodeHTML4 =
+exports.decodeHTML5 =
+exports.decodeHTML = decode.HTML;
 
-function genReplaceFunc(regex, func){
-	return function(data){
-		return data.replace(regex, func);
-	};
-}
+exports.decodeHTML4Strict =
+exports.decodeHTML5Strict =
+exports.decodeHTMLStrict = decode.HTMLStrict;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-entities.git



More information about the Pkg-javascript-commits mailing list