[Pkg-javascript-commits] [node-acorn-jsx] 09/484: Less reliance on regexps, more on hacks

Bastien Roucariès rouca at moszumanska.debian.org
Sat Aug 19 14:19:56 UTC 2017


This is an automated email from the git hooks/post-receive script.

rouca pushed a commit to branch master
in repository node-acorn-jsx.

commit ec279617030e4f660c73d1aab18fc2f9f608ebab
Author: Marijn Haverbeke <marijnh at gmail.com>
Date:   Mon Sep 24 16:15:33 2012 +0200

    Less reliance on regexps, more on hacks
---
 acorn.js              | 285 +++++++++++++++++++++++++++++++++-----------------
 test/jquery-string.js |   3 +-
 2 files changed, 189 insertions(+), 99 deletions(-)

diff --git a/acorn.js b/acorn.js
index 878ea36..8566b49 100644
--- a/acorn.js
+++ b/acorn.js
@@ -233,7 +233,7 @@
         next();
         expect(_parenL);
         clause.param = parseIdent();
-        if (strict && strictBadWords.test(clause.param.name))
+        if (strict && isStrictBadIdWord(clause.param.name))
           raise(clause.param.start, "Binding " + clause.param.name + " in strict mode");
         expect(_parenR);
         clause.guard = null;
@@ -344,7 +344,7 @@
     for (;;) {
       var decl = startNode();
       decl.id = parseIdent();
-      if (strict && strictBadWords.test(decl.id.name))
+      if (strict && isStrictBadIdWord(decl.id.name))
         raise(decl.id.start, "Binding " + decl.id.name + " in strict mode");
       decl.init = eat(_eq) ? parseExpression(true, noIn) : null;
       node.declarations.push(finishNode(decl, "VariableDeclarator"));
@@ -386,8 +386,7 @@
     if (strict || node.body.body.length && isUseStrict(node.body.body[0])) {
       for (var i = node.id ? -1 : 0; i < node.params.length; ++i) {
         var id = i < 0 ? node.id : node.params[i];
-        if (strictReservedWords.test(id.name) ||
-            strictBadWords.test(id.name))
+        if (isStrictReservedWord(id.name) || isStrictBadIdWord(id.name))
           raise(id.start, "Defining '" + id.name + "' in strict mode");
         if (i >= 0) for (var j = 0; j < i; ++j) if (id.name === node.params[j].name)
           raise(id.start, "Argument name clash in strict mode");
@@ -433,25 +432,27 @@
         if (options.allowTrailingCommas && eat(_braceR)) break;
       } else first = false;
 
-      var prop = {key: parsePropertyName()};
+      var prop = {key: parsePropertyName()}, isGetSet, kind;
       if (eat(_colon)) {
         prop.value = parseExpression(true);
-        prop.kind = "init";
+        kind = prop.kind = "init";
       } else if (options.ecmaVersion >= 5 && prop.key.type === "Identifier" &&
-                 /^[sg]et$/.test(prop.key.name)) {
-        prop.kind = prop.key.name;
+                 (prop.key.name === "get" || prop.key.name === "set")) {
+        isGetSet = true;
+        kind = prop.kind = prop.key.name;
         prop.key = parsePropertyName();
         if (!tokType === _parenL) unexpected();
         prop.value = parseFunction(startNode(), false);
       } else unexpected();
-      if (prop.key.type === "Identifier") for (var i = 0; i < node.properties.length; ++i) {
-        var other = node.properties[i];
-        if (other.key.name === prop.key.name) {
-          var haveGetSet = /.et/.test(prop.kind) || /.et/.test(other.kind);
-          var haveInit = prop.kind === "init" || other.kind === "init";
-          if (haveGetSet && (other.kind === prop.kind || haveInit) ||
-              strict && prop.kind === "init" && other.kind === "init")
-            raise(prop.key.start, "Redefinition of property");
+      if (prop.key.type === "Identifier" && (strict || isGetSet)) {
+        for (var i = 0; i < node.properties.length; ++i) {
+          var other = node.properties[i];
+          if (other.key.name === prop.key.name) {
+            var conflict = kind == other.kind || isGetSet && other.kind === "init" ||
+              kind === "init" && (other.kind === "get" || other.kind === "set");
+            if (conflict && !strict && kind === "init" && other.kind === "init") conflict = false;
+            if (conflict) raise(prop.key.start, "Redefinition of property");
+          }
         }
       }
       node.properties.push(prop);
@@ -618,9 +619,9 @@
   }
 
   function checkLVal(expr) {
-    if (!/^(?:Identifier|Member)/.test(expr.type))
+    if (expr.type !== "Identifier" && expr.type !== "MemberExpression")
       raise(expr.start, "Assigning to rvalue");
-    if (strict && expr.type === "Identifier" && strictBadWords.test(expr.name))
+    if (strict && expr.type === "Identifier" && isStrictBadIdWord(expr.name))
       raise(expr.start, "Assigning to " + expr.name + " in strict mode");
   }
 
@@ -649,31 +650,61 @@
                       "typeof": {keyword: "typeof", prefix: true},
                       "void": {keyword: "void", prefix: true},
                       "delete": {keyword: "delete", prefix: true}};
-  var keywords = /^(?:break|case|catch|continue|debugger|default|do|else|finally|for|function|if|return|switch|throw|try|var|while|with|null|true|false|instanceof|typeof|void|delete|new|in)$/;
+  var isKeyword = makePredicate("break case catch continue debugger default do else finally for function if return switch throw try var while with null true false instanceof typeof void delete new in");
 
-  var _bracketL = {type: "[", beforeExpr: true}, _bracketR = {type: "]"}, _braceL = {type: "{", beforeExpr: true}, _braceR = {type: "}"}, _parenL = {type: "(", beforeExpr: true}, _parenR = {type: ")"}, _comma = {type: ",", beforeExpr: true}, _semi = {type: ";", beforeExpr: true}, _colon = {type: ":", beforeExpr: true}, _dot = {type: "."}, _question = {type: "?", beforeExpr: true};
-  var puncTypes = {"[": _bracketL, "]": _bracketR, "{": _braceL, "}": _braceR, "(": _parenL,
-                   ")": _parenR, ",": _comma, ";": _semi, ":": _colon, ".": _dot, "?": _question};
-  var puncChars = /[\[\]\{\}\(\),;:\.\?]/;
+  var _bracketL = {type: "[", beforeExpr: true}, _bracketR = {type: "]"}, _braceL = {type: "{", beforeExpr: true};
+  var _braceR = {type: "}"}, _parenL = {type: "(", beforeExpr: true}, _parenR = {type: ")"};
+  var _comma = {type: ",", beforeExpr: true}, _semi = {type: ";", beforeExpr: true};
+  var _colon = {type: ":", beforeExpr: true}, _dot = {type: "."}, _question = {type: "?", beforeExpr: true};
 
   var _slash = {binop: 10, beforeExpr: true}, _eq = {isAssign: true, beforeExpr: true};
-  var opTypes = {"/": _slash, "=": _eq};
-  opTypes["+"] = opTypes["-"] = {binop: 9, prefix: true, beforeExpr: true};
-  opTypes["++"] = opTypes["--"] = {postfix: true, prefix: true, isUpdate: true};
-  opTypes["~"] = opTypes["!"] = {prefix: true, beforeExpr: true};
-  var binopPrecs = [1, "||", 2, "&&", 3, "|", 4, "^", 5, "&", 6, "==", "===", "!=", "!==",
-                    7, "<", ">", "<=", ">=", 8, ">>", "<<", ">>>", 10, "*", "%"];
-  for (var i = 0, curPrec; i < binopPrecs.length; ++i)
-    if (typeof binopPrecs[i] === "number") curPrec = binopPrecs[i];
-    else opTypes[binopPrecs[i]] = {binop: curPrec, beforeExpr: true};
-  var assignOps = ["+=", "-=", "/=", "*=", "%=", ">>=", ">>>=", "<<=", "|=", "^=", "&="];
-  for (var i = 0, typeObj = {isAssign: true, beforeExpr: true}; i < assignOps.length; ++i)
-    opTypes[assignOps[i]] = typeObj;
-
-  var reservedWords3 = /^(?:abstract|boolean|byte|char|class|double|enum|export|extends|final|float|goto|implements|import|int|interface|long|native|package|private|protected|public|short|static|super|synchronized|throws|transient|volatile)$/;
-  var reservedWords5 = /^(?:class|enum|extends|super|const|export|import)$/;
-  var strictReservedWords = /^(?:implements|interface|let|package|private|protected|public|static|yield)$/;
-  var strictBadWords = /^(?:eval|arguments)$/;
+  var _assign = {isAssign: true, beforeExpr: true}, _plusmin = {binop: 9, prefix: true, beforeExpr: true};
+  var _incdec = {postfix: true, prefix: true, isUpdate: true}, _prefix = {prefix: true, beforeExpr: true};
+  var _bin1 = {binop: 1, beforeExpr: true}, _bin2 = {binop: 2, beforeExpr: true};
+  var _bin3 = {binop: 3, beforeExpr: true}, _bin4 = {binop: 4, beforeExpr: true};
+  var _bin5 = {binop: 5, beforeExpr: true}, _bin6 = {binop: 6, beforeExpr: true};
+  var _bin7 = {binop: 7, beforeExpr: true}, _bin8 = {binop: 8, beforeExpr: true};
+  var _bin10 = {binop: 10, beforeExpr: true};
+
+  var isReservedWord3 = makePredicate("abstract boolean byte char class double enum export extends final float goto implements import int interface long native package private protected public short static super synchronized throws transient volatile");
+  var isReservedWord5 = makePredicate("class enum extends super const export import");
+
+  function isStrictBadIdWord(str) {
+    return str === "eval" || str === "arguments";
+  }
+  var isStrictReservedWord = makePredicate("implements interface let package private protected public static yield");
+
+  function makePredicate(words) {
+    words = words.split(" ");
+    var f = "(function(str){", cats = [];
+    out: for (var i = 0; i < words.length; ++i) {
+      for (var j = 0; j < cats.length; ++j)
+        if (cats[j][0].length == words[i].length) {
+          cats[j].push(words[i]);
+          continue out;
+        }
+      cats.push([words[i]]);
+    }
+    function compareTo(arr) {
+      if (arr.length == 1) return f += "return str === " + JSON.stringify(arr[0]) + ";";
+      f += "switch(str){";
+      for (var i = 0; i < arr.length; ++i) f += "case " + JSON.stringify(arr[i]) + ":";
+      f += "return true}return false;";
+    }
+    if (cats.length > 3) {
+      cats.sort(function(a, b) {return b.length - a.length;});
+      f += "switch(str.length){";
+      for (var i = 0; i < cats.length; ++i) {
+        var cat = cats[i];
+        f += "case " + cat[0].length + ":";
+        compareTo(cat);
+      }
+      f += "}";
+    } else {
+      compareTo(words);
+    }
+    return (1, eval)(f + "})");
+  }
 
   // CHARACTER CATEGORIES
 
@@ -685,8 +716,6 @@
   var nonASCIIidentifierChars = "\u0371-\u0374\u0483-\u0487\u0591-\u05bd\u05bf\u05c1\u05c2\u05c4\u05c5\u05c7\u0610-\u061a\u0620-\u0649\u0672-\u06d3\u06e7-\u06e8\u06fb-\u06fc\u0730-\u074a\u0800-\u0814\u081b-\u0823\u0825-\u0827\u0829-\u082d\u0840-\u0857\u08e4-\u08fe\u0900-\u0903\u093a-\u093c\u093e-\u094f\u0951-\u0957\u0962-\u0963\u0966-\u096f\u0981-\u0983\u09bc\u09be-\u09c4\u09c7\u09c8\u09d7\u09df-\u09e0\u0a01-\u0a03\u0a3c\u0a3e-\u0a42\u0a47\u0a48\u0a4b-\u0a4d\u0a51\u0a66-\u0a71\u0a75\u0a8 [...]
   var nonASCIIidentifierStart = new RegExp("[" + nonASCIIidentifierStartChars + "]");
   var nonASCIIidentifier = new RegExp("[" + nonASCIIidentifierStartChars + nonASCIIidentifierChars + "]");
-  var identifierStart = /[a-zA-Z_$]/, identifier = /[a-zA-Z_$0-9]+/, identifierG = /[a-zA-Z_$0-9]+/g
-  var digit = /\d/, sign = /[-+]/;
   var newline = /[\n\r\u2028\u2029]/;
   var lineBreak = /\r\n?|[\n\r\u2028\u2029]/g;
 
@@ -786,22 +815,86 @@
     if (forceRegexp) return readRegexp();
     if (tokPos >= inputLen) return finishToken(_eof);
 
-    var ch = input.charAt(tokPos), next;
-    if (identifierStart.test(ch)) return readWord();
-    if (ch === "." && digit.test(input.charAt(tokPos+1))) return readNumber(ch);
-    if (puncChars.test(ch)) {++tokPos; return finishToken(puncTypes[ch], null);}
-    if (digit.test(ch)) {
-      var nextCh = input.charAt(tokPos+1);
-      if (ch === "0" && nextCh === "x" || nextCh === "X") return readHexNumber();
-      else return readNumber(ch);
-    }
-    if (ch === '"' || ch === "'") return readString(ch);
-    if (ch === "/" && tokRegexpAllowed) {++tokPos; return readRegexp();}
-    if (operatorChar.test(ch)) return readOperator(ch);
-    if (ch === "\\" || nonASCIIidentifierStart.test(ch)) return readWord(true);
+    var code = input.charCodeAt(tokPos), next = input.charCodeAt(tokPos+1);
+    if (isIdentifierStart(code) || code === 92) return readWord();
+
+    switch(code) {
+    case 46: // '.'
+      if (next >= 48 && next <= 57) return readNumber(String.fromCharCode(code));
+      ++tokPos;
+      return finishToken(_dot);
+    case 40: ++tokPos; return finishToken(_parenL);
+    case 41: ++tokPos; return finishToken(_parenR);
+    case 59: ++tokPos; return finishToken(_semi);
+    case 44: ++tokPos; return finishToken(_comma);
+    case 91: ++tokPos; return finishToken(_bracketL);
+    case 93: ++tokPos; return finishToken(_bracketR);
+    case 123: ++tokPos; return finishToken(_braceL);
+    case 125: ++tokPos; return finishToken(_braceR);
+    case 58: ++tokPos; return finishToken(_colon);
+    case 63: ++tokPos; return finishToken(_question);
+    case 48: case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: case 57: // 0-9
+      if (code === 48 && (next === 120 || next === 88)) return readHexNumber();
+      return readNumber(String.fromCharCode(code));
+    case 34: case 39: // '"', "'"
+      return readString(String.fromCharCode(code));
+
+    // Operators
+
+    case 47: // '/'
+      if (tokRegexpAllowed) {++tokPos; return readRegexp();}
+      if (next === 61) return finishOp(_assign, 2);
+      return finishOp(_slash, 1);
+
+    case 37: case 42: // '%*'
+      if (next === 61) return finishOp(_assign, 2);
+      return finishOp(_bin10, 1);
+
+    case 124: case 38: // '|&'
+      if (next === code) return finishOp(code === 124 ? _bin1 : _bin2, 2);
+      if (next === 61) return finishOp(_assign, 2);
+      return finishOp(code === 124 ? _bin3 : _bin5, 1);
+
+    case 94: // '^'
+      if (next === 61) return finishOp(_assign, 2);
+      return finishOp(_bin4, 1);
+
+    case 43: case 45: // '+-'
+      if (next === code) return finishOp(_incdec, 2);
+      if (next === 61) return finishOp(_assign, 2);
+      return finishOp(_plusmin, 1);
+
+    case 60: case 62: // '<>'
+      var size = 1;
+      if (next === code) {
+        size = code === 62 && input.charCodeAt(tokPos+2) === 62 ? 3 : 2;
+        if (input.charCodeAt(tokPos + size) === 61) return finishOp(_assign, size + 1);
+        return finishOp(_bin8, size);
+      }
+      if (next === 61)
+        size = input.charCodeAt(tokPos+2) === 61 ? 3 : 2;
+      return finishOp(_bin7, size);
+
+    case 61: case 33: // '=!'
+      if (next === 61) return finishOp(_bin6, input.charCodeAt(tokPos+2) === 61 ? 3 : 2);
+      return finishOp(code === 61 ? _eq : _prefix, 1);
+
+    case 126: // '~'
+      if (next === 61) return finishOp(_assign, 2);
+      return finishOp(_prefix, 1);
+    }
+
+    var ch = String.fromCharCode(code);
+    if (ch === "\\" || nonASCIIidentifierStart.test(ch)) return readWord();
     raise(tokPos, "Unexpected character '" + ch + "'");
   }
 
+  function finishOp(type, size) {
+    var str = input.slice(tokPos, tokPos + size);
+    tokPos += size;
+    finishToken(type, str);
+  }
+
   function readRegexp() {
     var content = "", escaped, inClass, start = tokPos;
     for (;;) {
@@ -818,7 +911,7 @@
     }
     var content = input.slice(start, tokPos);
     ++tokPos;
-    var mods = readWord1(true);
+    var mods = readWord1();
     if (!reModifiers.test(mods)) raise(start, "Invalid regexp flag");
     return finishToken(_regexp, new RegExp(content, mods));
   }
@@ -844,7 +937,7 @@
     tokPos += 2; // 0x
     var val = readInt(16);
     if (val == null) raise(tokStart + 2, "Expected hexadecimal number");
-    if (identifierStart.test(input.charAt(tokPos))) raise(tokPos, "Identifier directly after number");
+    if (isIdentifierStart(input.charCodeAt(tokPos))) raise(tokPos, "Identifier directly after number");
     return finishToken(_num, val);
   }
 
@@ -852,16 +945,18 @@
     var start = tokPos, isFloat = ch === ".";
     if (!isFloat && readInt(10) == null) raise(start, "Invalid number");
     if (isFloat || input.charAt(tokPos) === ".") {
-      if (sign.test(input.charAt(++tokPos))) ++tokPos;
-      if (readInt(10) == null) raise(start, "Invalid number");
+      var next = input.charAt(++tokPos);
+      if (next === "-" || next === "+") ++tokPos;
+      if (readInt(10) === null) raise(start, "Invalid number");
       isFloat = true;
     }
     if (/e/i.test(input.charAt(tokPos))) {
-      if (sign.test(input.charAt(++tokPos))) ++tokPos;
-      if (readInt(10) == null) raise(start, "Invalid number")
+      var next = input.charAt(++tokPos);
+      if (next === "-" || next === "+") ++tokPos;
+      if (readInt(10) === null) raise(start, "Invalid number")
       isFloat = true;
     }
-    if (identifierStart.test(input.charAt(tokPos))) raise(tokPos, "Identifier directly after number");
+    if (isIdentifierStart(input.charCodeAt(tokPos))) raise(tokPos, "Identifier directly after number");
 
     var str = input.slice(start, tokPos), val;
     if (isFloat) val = parseFloat(str);
@@ -924,65 +1019,61 @@
   }
 
   function readHexChar(len) {
+    var aa = tokPos;
     var n = readInt(16, len);
-    if (n == null) raise(tokStart, "Bad character escape sequence");
+    if (n === null) raise(tokStart, "Bad character escape sequence");
     return String.fromCharCode(n);
   }
 
-  function readOperator(op) {
-    for (;;) {
-      var ch = input.charAt(++tokPos), nextOp;
-      if (!operatorChar.test(ch) || !opTypes.hasOwnProperty(nextOp = op + ch)) break;
-      op = nextOp;
-    }
-    return finishToken(opTypes[op], op);
+  function isIdentifierStart(code) {
+    return (code >= 65 && code <= 90) || (code >= 97 && code <= 122) ||
+      code === 36 || code === 95 ||
+      (code >= 0xaa && nonASCIIidentifierStart.test(String.fromCharCode(code)));
+  }
+
+  function isIdentifierChar(ch) {
+    return ((ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z") ||
+            (ch >= "0" && ch <= "9") || ch === "$" || ch === "_" ||
+            (ch >= "\xaa" && nonASCIIidentifier.test(ch)));
   }
 
   var containsEsc;
-  function readWord1(startSpecial) {
+  function readWord1() {
     containsEsc = false;
-    var word, first, escaped;
-    if (startSpecial) {
-      word = ""; first = true;
-    } else {
-      identifierG.lastIndex = tokPos;
-      word = identifierG.exec(input)[0];
-      tokPos += word.length;
-      var code = input.charCodeAt(tokPos);
-      if (code < 256 && code !== 92) return word;
-    }
+    var word = "", first = true;
     for (;;) {
       var ch = input.charAt(tokPos);
-      if (!escaped) {
-        if (ch === "\\") {}
-        else if ((ch <= "\xff" ? identifier : nonASCIIidentifier).test(ch)) word += ch;
-        else break;
+      if ((ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z") ||
+          (ch >= "0" && ch <= "9") || ch === "$" || ch === "_" ||
+          (ch >= "\xaa" && nonASCIIidentifier.test(ch))) {
+        word += ch;
         ++tokPos;
-      } else {
-        if (ch !== "u") raise(tokPos, "Expecting Unicode escape sequence \\uXXXX");
+      } else if (ch === "\\") {
+        containsEsc = true;
+        if (input.charAt(++tokPos) != "u")
+          raise(tokPos, "Expecting Unicode escape sequence \\uXXXX");
         ++tokPos;
         var esc = readHexChar(4);
         if (!esc) raise(tokPos - 1, "Invalid Unicode escape");
-        var re = esc <= "\xff" ? (first ? identifier : identifierStart)
-                               : (first ? nonASCIIidentifier : nonASCIIidentifierStart);
-        if (!re.test(esc)) raise(tokPos - 4, "Invalid Unicode escape");
+        if (!(first ? isIdentifierStart(esc.charCodeAt(0)) : isIdentifierChar(esc)))
+          raise(tokPos - 4, "Invalid Unicode escape");
         word += esc;
+      } else {
+        break;
       }
-      escaped = ch === "\\";
-      if (escaped) containsEsc = true;
       first = false;
     }
     return word;
   }
 
-  function readWord(startSpecial) {
-    var word = readWord1(startSpecial);
+  function readWord() {
+    var word = readWord1();
     var type = _name;
     if (!containsEsc) {
-      if (keywords.test(word)) type = keywordTypes[word];
+      if (isKeyword(word)) type = keywordTypes[word];
       else if (options.forbidReserved &&
-               (options.ecmaVersion === 3 ? reservedWords3 : reservedWords5).test(word) ||
-               strict && strictReservedWords.test(word))
+               (options.ecmaVersion === 3 ? isReservedWord3 : isReservedWord5)(word) ||
+               strict && isStrictReservedWord(word))
         raise(tokStart, "The keyword '" + word + "' is reserved");
     }
     return finishToken(type, word);
diff --git a/test/jquery-string.js b/test/jquery-string.js
index 74975f2..5bb27bb 100644
--- a/test/jquery-string.js
+++ b/test/jquery-string.js
@@ -1,5 +1,4 @@
-// Silly noise to be able to load in source code without doing an XMLHttpReq
-var jquery164 = "/*!\n\
+/* Silly noise to be able to load in source code without doing an XMLHttpReq */var jquery164 = "/*!\n\
  * jQuery JavaScript Library v1.6.4\n\
  * http://jquery.com/\n\
  *\n\

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-acorn-jsx.git



More information about the Pkg-javascript-commits mailing list