[Pkg-javascript-commits] [node-regjsparser] 01/07: New upstream version 0.3.0+ds
Julien Puydt
julien.puydt at laposte.net
Mon Feb 19 23:44:05 UTC 2018
This is an automated email from the git hooks/post-receive script.
jpuydt-guest pushed a commit to branch master
in repository node-regjsparser.
commit 8e788f1e58e9eccbe3fa019ac265efa52bc0f621
Author: Julien Puydt <julien.puydt at laposte.net>
Date: Thu Jan 18 08:44:49 2018 +0100
New upstream version 0.3.0+ds
---
README.md | 10 ++
parser.js | 200 ++++++++++++++++++++++++++++++++++---
tools/generate-identifier-regex.js | 20 ++--
3 files changed, 205 insertions(+), 25 deletions(-)
diff --git a/README.md b/README.md
index 83f67e9..8ed945c 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,16 @@ var parse = require('regjsparser').parse;
var parseTree = parse('^a'); // /^a/
console.log(parseTree);
+
+// Toggle on/off additional features:
+var parseTree = parse('^a', {
+ // SEE: https://github.com/jviereck/regjsparser/pull/78
+ unicodePropertyEscape: true,
+
+ // SEE: https://github.com/jviereck/regjsparser/pull/83
+ namedGroups: true
+});
+console.log(parseTree);
```
## Testing
diff --git a/parser.js b/parser.js
index 22a054a..93ff428 100644
--- a/parser.js
+++ b/parser.js
@@ -48,7 +48,7 @@
// .
// \ AtomEscape
// CharacterClass
-// ( Disjunction )
+// ( GroupSpecifier Disjunction )
// ( ? : Disjunction )
//
// PatternCharacter ::
@@ -58,6 +58,7 @@
// DecimalEscape
// CharacterEscape
// CharacterClassEscape
+// k GroupName
//
// CharacterEscape[U] ::
// ControlEscape
@@ -115,9 +116,80 @@
// b
// CharacterEscape
// CharacterClassEscape
+//
+// GroupSpecifier ::
+// [empty]
+// ? GroupName
+//
+// GroupName ::
+// < RegExpIdentifierName >
+//
+// RegExpIdentifierName ::
+// RegExpIdentifierStart
+// RegExpIdentifierName RegExpIdentifierContinue
+//
+// RegExpIdentifierStart ::
+// UnicodeIDStart
+// $
+// _
+// \ RegExpUnicodeEscapeSequence
+//
+// RegExpIdentifierContinue ::
+// UnicodeIDContinue
+// $
+// _
+// \ RegExpUnicodeEscapeSequence
+// <ZWNJ>
+// <ZWJ>
(function() {
+ var fromCodePoint = String.fromCodePoint || (function() {
+ // Implementation taken from
+ // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/fromCodePoint
+
+ var stringFromCharCode = String.fromCharCode;
+ var floor = Math.floor;
+
+ return function fromCodePoint() {
+ var MAX_SIZE = 0x4000;
+ var codeUnits = [];
+ var highSurrogate;
+ var lowSurrogate;
+ var index = -1;
+ var length = arguments.length;
+ if (!length) {
+ return '';
+ }
+ var result = '';
+ while (++index < length) {
+ var codePoint = Number(arguments[index]);
+ if (
+ !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
+ codePoint < 0 || // not a valid Unicode code point
+ codePoint > 0x10FFFF || // not a valid Unicode code point
+ floor(codePoint) != codePoint // not an integer
+ ) {
+ throw RangeError('Invalid code point: ' + codePoint);
+ }
+ if (codePoint <= 0xFFFF) { // BMP code point
+ codeUnits.push(codePoint);
+ } else { // Astral code point; split in surrogate halves
+ // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
+ codePoint -= 0x10000;
+ highSurrogate = (codePoint >> 10) + 0xD800;
+ lowSurrogate = (codePoint % 0x400) + 0xDC00;
+ codeUnits.push(highSurrogate, lowSurrogate);
+ }
+ if (index + 1 == length || codeUnits.length > MAX_SIZE) {
+ result += stringFromCharCode.apply(null, codeUnits);
+ codeUnits.length = 0;
+ }
+ }
+ return result;
+ };
+ }());
+
function parse(str, flags, features) {
if (!features) {
features = {};
@@ -220,6 +292,17 @@
});
}
+ function createNamedReference(name) {
+ return addRaw({
+ type: 'reference',
+ name: name,
+ range: [
+ name.range[0] - 3,
+ pos
+ ]
+ });
+ }
+
function createGroup(behavior, disjunction, from, to) {
return addRaw({
type: 'group',
@@ -424,6 +507,10 @@
return false;
}
+ return finishGroup(type, from);
+ }
+
+ function finishGroup(type, from) {
var body = parseDisjunction();
if (!body) {
bail('Expected disjunction');
@@ -524,7 +611,7 @@
// .
// \ AtomEscape
// CharacterClass
- // ( Disjunction )
+ // ( GroupSpecifier Disjunction )
// ( ? : Disjunction )
var res;
@@ -551,6 +638,13 @@
else if (res = parseCharacterClass()) {
return res;
}
+ else if (features.namedGroups && match("(?<")) {
+ var name = parseIdentifier();
+ skip(">");
+ var group = finishGroup("normal", name.range[0] - 3);
+ group.name = name;
+ return group;
+ }
else {
// ( Disjunction )
// ( ? : Disjunction )
@@ -593,10 +687,11 @@
// DecimalEscape
// CharacterEscape
// CharacterClassEscape
+ // k GroupName
var res, from = pos;
- res = parseDecimalEscape();
+ res = parseDecimalEscape() || parseNamedReference();
if (res) {
return res;
}
@@ -681,6 +776,27 @@
return false;
}
+ function parseNamedReference() {
+ if (features.namedGroups && matchReg(/^k<(?=.*?>)/)) {
+ var name = parseIdentifier();
+ skip('>');
+ return createNamedReference(name);
+ }
+ }
+
+ function parseRegExpUnicodeEscapeSequence() {
+ var res;
+ if (res = matchReg(/^u([0-9a-fA-F]{4})/)) {
+ // UnicodeEscapeSequence
+ return parseUnicodeSurrogatePairEscape(
+ createEscaped('unicodeEscape', parseInt(res[1], 16), res[1], 2)
+ );
+ } else if (hasUnicodeFlag && (res = matchReg(/^u\{([0-9a-fA-F]+)\}/))) {
+ // RegExpUnicodeEscapeSequence (ES6 Unicode code point escape)
+ return createEscaped('unicodeCodePointEscape', parseInt(res[1], 16), res[1], 4);
+ }
+ }
+
function parseCharacterEscape() {
// CharacterEscape ::
// ControlEscape
@@ -707,14 +823,8 @@
} else if (res = matchReg(/^x([0-9a-fA-F]{2})/)) {
// HexEscapeSequence
return createEscaped('hexadecimalEscape', parseInt(res[1], 16), res[1], 2);
- } else if (res = matchReg(/^u([0-9a-fA-F]{4})/)) {
- // UnicodeEscapeSequence
- return parseUnicodeSurrogatePairEscape(
- createEscaped('unicodeEscape', parseInt(res[1], 16), res[1], 2)
- );
- } else if (hasUnicodeFlag && (res = matchReg(/^u\{([0-9a-fA-F]+)\}/))) {
- // RegExpUnicodeEscapeSequence (ES6 Unicode code point escape)
- return createEscaped('unicodeCodePointEscape', parseInt(res[1], 16), res[1], 4);
+ } else if (res = parseRegExpUnicodeEscapeSequence()) {
+ return res;
} else if (features.unicodePropertyEscape && hasUnicodeFlag && (res = matchReg(/^([pP])\{([^\}]+)\}/))) {
// https://github.com/jviereck/regjsparser/issues/77
return addRaw({
@@ -730,17 +840,77 @@
}
}
- // Taken from the Esprima parser.
- function isIdentifierPart(ch) {
+ function parseIdentifierAtom(check) {
+ var ch = lookahead();
+ var from = pos;
+ if (!check(ch.charCodeAt(0))) return;
+ incr();
+ if (ch === '\\') {
+ var esc = parseRegExpUnicodeEscapeSequence();
+ if (!esc || !check(esc.codePoint)) {
+ bail('Invalid escape sequence', null, from, pos);
+ }
+ return fromCodePoint(esc.codePoint);
+ }
+ return ch;
+ }
+
+ function parseIdentifier() {
+ // RegExpIdentifierName ::
+ // RegExpIdentifierStart
+ // RegExpIdentifierName RegExpIdentifierContinue
+ //
+ // RegExpIdentifierStart ::
+ // UnicodeIDStart
+ // $
+ // _
+ // \ RegExpUnicodeEscapeSequence
+ //
+ // RegExpIdentifierContinue ::
+ // UnicodeIDContinue
+ // $
+ // _
+ // \ RegExpUnicodeEscapeSequence
+ // <ZWNJ>
+ // <ZWJ>
+
+ var start = pos;
+ var res = parseIdentifierAtom(isIdentifierStart);
+ if (!res) {
+ bail('Invalid identifier');
+ }
+
+ var ch;
+ while (ch = parseIdentifierAtom(isIdentifierPart)) {
+ res += ch;
+ }
+
+ return addRaw({
+ type: 'identifier',
+ value: res,
+ range: [start, pos]
+ });
+ }
+
+ function isIdentifierStart(ch) {
// Generated by `tools/generate-identifier-regex.js`.
- var NonAsciiIdentifierPart = new RegExp('[\xAA\xB5\xBA\xC0-\xD6\xD8-\xF6\xF8-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0300-\u0374\u0376\u0377\u037A-\u037D\u037F\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03F5\u03F7-\u0481\u0483-\u0487\u048A-\u052F\u0531-\u0556\u0559\u0561-\u0587\u0591-\u05BD\u05BF\u05C1\u05C2\u05C4\u05C5\u05C7\u05D0-\u05EA\u05F0-\u05F2\u0610-\u061A\u0620-\u0669\u066E-\u06D3\u06D5-\u06DC\u06DF-\u06E8\u06EA-\u06FC\u06FF\u0710-\u074A\u074D-\u07B1\u07C0-\u07F5\ [...]
+ var NonAsciiIdentifierStart = /[\xAA\xB5\xBA\xC0-\xD6\xD8-\xF6\xF8-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0370-\u0374\u0376\u0377\u037A-\u037D\u037F\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03F5\u03F7-\u0481\u048A-\u052F\u0531-\u0556\u0559\u0561-\u0587\u05D0-\u05EA\u05F0-\u05F2\u0620-\u064A\u066E\u066F\u0671-\u06D3\u06D5\u06E5\u06E6\u06EE\u06EF\u06FA-\u06FC\u06FF\u0710\u0712-\u072F\u074D-\u07A5\u07B1\u07CA-\u07EA\u07F4\u07F5\u07FA\u0800-\u0815\u081A\u0824\u0828\u0840-\u [...]
return (ch === 36) || (ch === 95) || // $ (dollar) and _ (underscore)
(ch >= 65 && ch <= 90) || // A..Z
(ch >= 97 && ch <= 122) || // a..z
(ch >= 48 && ch <= 57) || // 0..9
(ch === 92) || // \ (backslash)
- ((ch >= 0x80) && NonAsciiIdentifierPart.test(String.fromCharCode(ch)));
+ ((ch >= 0x80) && NonAsciiIdentifierStart.test(String.fromCharCode(ch)));
+ }
+
+ // Taken from the Esprima parser.
+ function isIdentifierPart(ch) {
+ // Generated by `tools/generate-identifier-regex.js`.
+ var NonAsciiIdentifierPartOnly = /[\u0300-\u036F\u0483-\u0487\u0591-\u05BD\u05BF\u05C1\u05C2\u05C4\u05C5\u05C7\u0610-\u061A\u064B-\u0669\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7\u06E8\u06EA-\u06ED\u06F0-\u06F9\u0711\u0730-\u074A\u07A6-\u07B0\u07C0-\u07C9\u07EB-\u07F3\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u08E4-\u0903\u093A-\u093C\u093E-\u094F\u0951-\u0957\u0962\u0963\u0966-\u096F\u0981-\u0983\u09BC\u09BE-\u09C4\u09C7\u09C8\u09CB-\u09CD\u09D7\u09E2\u09E3\u09E [...]
+
+ return isIdentifierStart(ch) ||
+ ((ch >= 0x80) && NonAsciiIdentifierPartOnly.test(String.fromCharCode(ch)));
}
function parseIdentityEscape() {
diff --git a/tools/generate-identifier-regex.js b/tools/generate-identifier-regex.js
index bd06403..af1fc06 100644
--- a/tools/generate-identifier-regex.js
+++ b/tools/generate-identifier-regex.js
@@ -28,24 +28,24 @@ var generateES5Regex = function() { // ES 5.1
.add(Lu, Ll, Lt, Lm, Lo, Nl)
.removeRange(0x010000, 0x10FFFF) // remove astral symbols
.removeRange(0x0, 0x7F); // remove ASCII symbols (regjsparser-specific)
- var identifierPart = identifierStart.clone()
- .add('\u200C', '\u200D', Mn, Mc, Nd, Pc)
+ var identifierPartOnly = regenerate('\u200C', '\u200D')
+ .add(Mn, Mc, Nd, Pc)
.removeRange(0x010000, 0x10FFFF) // remove astral symbols
.removeRange(0x0, 0x7F); // remove ASCII symbols (regjsparser-specific)
return {
'NonAsciiIdentifierStart': identifierStart.toString(),
- 'NonAsciiIdentifierPart': identifierPart.toString()
+ 'NonAsciiIdentifierPartOnly': identifierPartOnly.toString()
};
};
var result = generateES5Regex();
-// console.log(
-// '// ECMAScript 5.1/Unicode v%s NonAsciiIdentifierStart:\n\n%s\n',
-// version,
-// result.NonAsciiIdentifierStart
-// );
console.log(
- '// ECMAScript 5.1/Unicode v%s NonAsciiIdentifierPart:\n\n%s',
+ '// ECMAScript 5.1/Unicode v%s NonAsciiIdentifierStart:\n\n%s\n',
version,
- result.NonAsciiIdentifierPart
+ result.NonAsciiIdentifierStart
+);
+console.log(
+ '// ECMAScript 5.1/Unicode v%s NonAsciiIdentifierPartOnly:\n\n%s',
+ version,
+ result.NonAsciiIdentifierPartOnly
);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-regjsparser.git
More information about the Pkg-javascript-commits
mailing list