[Pkg-javascript-commits] [node-entities] 24/63: Added encode<name>Strict methods, refactored

Wolfgang Borgert debacle at moszumanska.debian.org
Mon Sep 22 08:15:45 UTC 2014


This is an automated email from the git hooks/post-receive script.

debacle pushed a commit to branch master
in repository node-entities.

commit 5d6cb929c828da749aea35f153524fa29a81a1a2
Author: Felix Böhm <me at feedic.com>
Date:   Sat May 18 10:21:27 2013 +0200

    Added encode<name>Strict methods, refactored
---
 index.js | 160 +++++++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 100 insertions(+), 60 deletions(-)

diff --git a/index.js b/index.js
index 35a0f50..b536538 100644
--- a/index.js
+++ b/index.js
@@ -1,43 +1,3 @@
-var re_notUTF8 = /[\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]/g,
-	charCode_func = function(c){ return "&#" + c.charCodeAt(0) + ";";};
-
-var fetch = function(filename, inherits){
-	var obj = require("./entities/" + filename + ".json");
-	
-	if(inherits) for(var name in inherits) obj[name] = inherits[name];
-	
-	var re = Object.keys(obj).sort().join("|").replace(/(\w+)\|\1;/g, "$1;?");
-	
-	// also match hex and char codes
-	re += "|#[xX][0-9a-fA-F]+;?|#\\d+;?";
-
-	return {
-		func: function(name){
-			if (name.charAt(1) === "#") {
-				if (name.charAt(2).toLowerCase() === "x") {
-					return String.fromCharCode(parseInt(name.substr(3), 16));
-				}
-				return String.fromCharCode(parseInt(name.substr(2), 10));
-			}
-			return obj[name.substr(1)];
-		},
-		re: new RegExp("&(?:" + re + ")", "g"),
-		obj: obj
-	};
-};
-
-var getReverse = function(obj){
-	var reverse = Object.keys(obj).reduce(function(reverse, name){
-		reverse[obj[name]] = name;
-		return reverse;
-	}, {});
-	
-	return {
-		func: function(name){ return "&" + reverse[name]; },
-		re: new RegExp("\\" + Object.keys(reverse).sort().join("|\\"), "g")
-	};
-};
-
 var modes = ["XML", "HTML4", "HTML5"];
 
 module.exports = {
@@ -45,33 +5,113 @@ module.exports = {
 		if(!modes[level]) level = 0;
 		return module.exports["decode" + modes[level]](data);
 	},
+	decodeStrict: function(data, level){
+		if(!modes[level]) level = 0;
+		return module.exports["decode" + modes[level] + "Strict"](data);
+	},
 	encode: function(data, level){
 		if(!modes[level]) level = 0;
 		return module.exports["encode" + modes[level]](data);
 	}
 };
 
-var tmp;
+modes.reduce(function(prev, name){
+	var obj = require("./entities/" + name.toLowerCase() + ".json");
+
+	if(prev){
+		Object.keys(prev).forEach(function(name){
+			obj[name] = prev[name];
+		});
+	}
+
+	module.exports["decode" + name + "Strict"] = getStrictReplacer(obj);
+
+	if(name === "XML"){
+		//there is no non-strict mode for XML
+		module.exports.decodeXML = module.exports.decodeXMLStrict;
+	} else {
+		module.exports["decode" + name] = getReplacer(obj);
+	}
+
+	module.exports["encode" + name] = getReverse(obj);
+
+	return obj;
+}, null);
+
+function getReplacer(obj){
+	var keys = Object.keys(obj).sort();
+	var re = keys.join("|").replace(/(\w+)\|\1;/g, "$1;?");
+
+	// also match hex and char codes
+	re += "|#[xX][\\da-fA-F]+;?|#\\d+;?";
 
-modes.forEach(function(name){
-	var obj = fetch(name.toLowerCase(), tmp),
-		regex = obj.re,
-		func = obj.func;
-	
-	tmp = obj.obj;
-	
-	module.exports["decode" + name] = function(data){
+	return genReplaceFunc(
+		new RegExp("&(?:" + re + ")", "g"),
+		function func(name){
+			if(name.charAt(1) === "#"){
+				if(name.charAt(2).toLowerCase() === "x"){
+					return String.fromCharCode(parseInt(name.substr(3), 16));
+				}
+				return String.fromCharCode(parseInt(name.substr(2), 10));
+			}
+			return obj[name.substr(1)];
+		}
+	);
+}
+
+function getStrictReplacer(obj){
+	var keys = Object.keys(obj).sort().filter(RegExp.prototype.test, /;$/);
+	var re = keys.map(function(name){
+		return name.slice(0, -1); //remove trailing semicolon
+	}).join("|");
+
+	// also match hex and char codes
+	re += "|#[xX][\\da-fA-F]+|#\\d+";
+
+	var expr = new RegExp("&(?:" + re + ");", "g");
+
+	return genReplaceFunc(expr, func);
+
+	function func(name){
+			if(name.charAt(1) === "#"){
+				if(name.charAt(2).toLowerCase() === "x"){
+					return String.fromCharCode(parseInt(name.substr(3), 16));
+				}
+				return String.fromCharCode(parseInt(name.substr(2), 10));
+			}
+			return obj[name.substr(1)];
+		}
+}
+
+var re_nonUTF8 = /[\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]/g;
+
+function nonUTF8Replacer(c){
+	return "&#" + c.charCodeAt(0) + ";";
+}
+
+function getReverse(obj){
+	var reverse = Object.keys(obj).filter(function(name){
+		//prefer identifiers with a semicolon
+		return name.substr(-1) === ";" || obj[name + ";"] !== obj[name];
+	}).reduce(function(reverse, name){
+		reverse[obj[name]] = name;
+		return reverse;
+	}, {});
+
+	var regex = new RegExp("\\" + Object.keys(reverse).sort().join("|\\"), "g");
+	function func(name){
+		return "&" + reverse[name];
+	}
+
+	return function(data){
 		return data
-			.replace(regex, func);
+				.replace(regex, func)
+				.replace(re_nonUTF8, nonUTF8Replacer);
 	};
-	
-	var reverse = getReverse(obj.obj),
-		reverse_re = reverse.re,
-		reverse_func = reverse.func;
-	
-	module.exports["encode" + name] = function(data){
-		return data
-			.replace(reverse_re, reverse_func)
-			.replace(re_notUTF8, charCode_func);
+}
+
+function genReplaceFunc(regex, func){
+	return function(data){
+		return data.replace(regex, func);
 	};
-});
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-entities.git



More information about the Pkg-javascript-commits mailing list