[jruby-joni] 13/194: split parseExp
Hideki Yamane
henrich at moszumanska.debian.org
Thu Feb 1 12:04:09 UTC 2018
This is an automated email from the git hooks/post-receive script.
henrich pushed a commit to branch debian/sid
in repository jruby-joni.
commit b60059cbab29841635be9b4bb6b0ee520928ce70
Author: Marcin Mielzynski <lopx at gazeta.pl>
Date: Fri Dec 15 04:12:04 2017 +0100
split parseExp
---
src/org/joni/Parser.java | 345 ++++++++++++++++++++++++++---------------------
1 file changed, 193 insertions(+), 152 deletions(-)
diff --git a/src/org/joni/Parser.java b/src/org/joni/Parser.java
index 0316d1e..172796b 100644
--- a/src/org/joni/Parser.java
+++ b/src/org/joni/Parser.java
@@ -118,14 +118,6 @@ class Parser extends Lexer {
return true; /* 1: is not POSIX bracket, but no error. */
}
- private CClassNode parseCharProperty() {
- int ctype = fetchCharPropertyToCType();
- CClassNode n = new CClassNode();
- n.addCType(ctype, false, env, this);
- if (token.getPropNot()) n.setNot();
- return n;
- }
-
private boolean codeExistCheck(int code, boolean ignoreEscaped) {
mark();
@@ -764,56 +756,11 @@ class Parser extends Lexer {
return parseExpTkByte(group); // goto tk_byte
}
case LINEBREAK:
- byte[]buflb = new byte[Config.ENC_CODE_TO_MBC_MAXLEN * 2];
- int len1 = enc.codeToMbc(0x0D, buflb, 0);
- int len2 = enc.codeToMbc(0x0A, buflb, len1);
- StringNode left = new StringNode(buflb, 0, len1 + len2);
- left.setRaw();
- /* [\x0A-\x0D] or [\x0A-\x0D\x{85}\x{2028}\x{2029}] */
- CClassNode right = new CClassNode();
- if (enc.minLength() > 1) {
- right.addCodeRange(env, 0x0A, 0x0D);
- } else {
- right.bs.setRange(0x0A, 0x0D);
- }
-
- if (enc.toString().startsWith("UTF")) {
- /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
- right.addCodeRange(env, 0x85, 0x85);
- right.addCodeRange(env, 0x2028, 0x2029);
- }
- /* (?>...) */
- EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK);
- en.setTarget(ConsAltNode.newAltNode(left, ConsAltNode.newAltNode(right, null)));
- node = en;
+ node = parseLineBreak();
break;
case EXTENDED_GRAPHEME_CLUSTER:
- if (Config.USE_UNICODE_PROPERTIES) {
- if (enc instanceof UnicodeEncoding) {
- int ctype = enc.propertyNameToCType(new byte[]{(byte)'M'}, 0, 1);
- if (ctype > 0) {
- CClassNode cc1 = new CClassNode(); /* \P{M} */
- cc1.addCType(ctype, false, env, this);
- cc1.setNot();
- CClassNode cc2 = new CClassNode(); /* \p{M}* */
- cc1.addCType(ctype, false, env, this);
- QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
- qn.setTarget(cc2);
- /* (?>...) */
- EncloseNode en2 = new EncloseNode(EncloseType.STOP_BACKTRACK);
- /* \P{M}\p{M}* */
- en2.setTarget(ConsAltNode.newListNode(cc1, ConsAltNode.newListNode(qn, null)));
- node = en2;
- }
- }
- }
- if (node == null) {
- AnyCharNode np1 = new AnyCharNode();
- EncloseNode on = new EncloseNode(bsOnOff(env.option, Option.MULTILINE, false), 0);
- on.setTarget(np1);
- node = np1;
- }
+ node = parseExtendedGraphemeCluster(node);
break;
case KEEP:
@@ -825,54 +772,17 @@ class Parser extends Lexer {
case RAW_BYTE:
return parseExpTkRawByte(group); // tk_raw_byte:
+
case CODE_POINT:
- byte[]buf = new byte[Config.ENC_CODE_TO_MBC_MAXLEN];
- int num = enc.codeToMbc(token.getCode(), buf, 0);
- // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else
- node = new StringNode(buf, 0, num);
+ node = parseCodePoint();
break;
case QUOTE_OPEN:
- int[]endOp = new int[]{syntax.metaCharTable.esc, 'E'};
- int qstart = p;
- Ptr nextChar = new Ptr();
- int qend = findStrPosition(endOp, endOp.length, qstart, stop, nextChar);
- if (qend == -1) nextChar.p = qend = stop;
- node = new StringNode(bytes, qstart, qend);
- p = nextChar.p;
+ node = parseQuoteOpen();
break;
case CHAR_TYPE:
- switch(token.getPropCType()) {
- case CharacterType.D:
- case CharacterType.S:
- case CharacterType.W:
- if (Config.NON_UNICODE_SDW) {
- CClassNode cc = new CClassNode();
- cc.addCType(token.getPropCType(), false, env, this);
- if (token.getPropNot()) cc.setNot();
- node = cc;
- }
- break;
-
- case CharacterType.WORD:
- node = new CTypeNode(token.getPropCType(), token.getPropNot(), false);
- break;
-
- case CharacterType.SPACE:
- case CharacterType.DIGIT:
- case CharacterType.XDIGIT:
- // #ifdef USE_SHARED_CCLASS_TABLE ... #endif
- CClassNode ccn = new CClassNode();
- ccn.addCType(token.getPropCType(), false, env, this);
- if (token.getPropNot()) ccn.setNot();
- node = ccn;
- break;
-
- default:
- newInternalException(ERR_PARSER_BUG);
-
- } // inner switch
+ node = parseCharType(node);
break;
case CHAR_PROPERTY:
@@ -880,16 +790,7 @@ class Parser extends Lexer {
break;
case CC_CC_OPEN:
- CClassNode cc = parseCharClass();
- node = cc;
- if (isIgnoreCase(env.option)) {
- ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc);
- enc.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg);
-
- if (arg.altRoot != null) {
- node = ConsAltNode.newAltNode(node, arg.altRoot);
- }
- }
+ node = parseCcCcOpen();
break;
case ANYCHAR:
@@ -897,58 +798,15 @@ class Parser extends Lexer {
break;
case ANYCHAR_ANYTIME:
- node = new AnyCharNode();
- QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
- qn.setTarget(node);
- node = qn;
+ node = parseAnycharAnytime();
break;
case BACKREF:
- if (syntax.op2OptionECMAScript() && token.getBackrefNum() == 1 && env.memNodes != null) {
- EncloseNode encloseNode = (EncloseNode) env.memNodes[token.getBackrefRef1()];
- boolean shouldIgnore = false;
- if (encloseNode != null && encloseNode.containingAnchor != null) {
- shouldIgnore = true;
- for (Node anchorNode : env.precReadNotNodes) {
- if (anchorNode == encloseNode.containingAnchor) {
- shouldIgnore = false;
- break;
- }
- }
- }
- if (shouldIgnore) {
- node = StringNode.EMPTY;
- } else {
- node = new BackRefNode(token.getBackrefNum(),
- new int[]{token.getBackrefRef1()},
- token.getBackrefByName(),
- token.getBackrefExistLevel(), // #ifdef USE_BACKREF_AT_LEVEL
- token.getBackrefLevel(), // ...
- env);
- }
- } else {
- int[]backRefs = token.getBackrefNum() > 1 ? token.getBackrefRefs() : new int[]{token.getBackrefRef1()};
- node = new BackRefNode(token.getBackrefNum(),
- backRefs,
- token.getBackrefByName(),
- token.getBackrefExistLevel(), // #ifdef USE_BACKREF_AT_LEVEL
- token.getBackrefLevel(), // ...
- env);
- }
-
+ node = parseBackref();
break;
case CALL:
- if (Config.USE_SUBEXP_CALL) {
- int gNum = token.getCallGNum();
- if (gNum < 0 || token.getCallRel()) {
- if (gNum > 0) gNum--;
- gNum = backrefRelToAbs(gNum);
- if (gNum <= 0) newValueException(ERR_INVALID_BACKREF);
- }
- node = new CallNode(bytes, token.getCallNameP(), token.getCallNameEnd(), gNum);
- env.numCall++;
- } // USE_SUBEXP_CALL
+ if (Config.USE_SUBEXP_CALL) node = parseCall();
break;
case ANCHOR:
@@ -979,6 +837,60 @@ class Parser extends Lexer {
return parseExpRepeat(node, group); // repeat:
}
+ private Node parseLineBreak() {
+ byte[]buflb = new byte[Config.ENC_CODE_TO_MBC_MAXLEN * 2];
+ int len1 = enc.codeToMbc(0x0D, buflb, 0);
+ int len2 = enc.codeToMbc(0x0A, buflb, len1);
+ StringNode left = new StringNode(buflb, 0, len1 + len2);
+ left.setRaw();
+ /* [\x0A-\x0D] or [\x0A-\x0D\x{85}\x{2028}\x{2029}] */
+ CClassNode right = new CClassNode();
+ if (enc.minLength() > 1) {
+ right.addCodeRange(env, 0x0A, 0x0D);
+ } else {
+ right.bs.setRange(0x0A, 0x0D);
+ }
+
+ if (enc.toString().startsWith("UTF")) {
+ /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
+ right.addCodeRange(env, 0x85, 0x85);
+ right.addCodeRange(env, 0x2028, 0x2029);
+ }
+ /* (?>...) */
+ EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK);
+ en.setTarget(ConsAltNode.newAltNode(left, ConsAltNode.newAltNode(right, null)));
+ return en;
+ }
+
+ private Node parseExtendedGraphemeCluster(Node node) {
+ if (Config.USE_UNICODE_PROPERTIES) {
+ if (enc instanceof UnicodeEncoding) {
+ int ctype = enc.propertyNameToCType(new byte[]{(byte)'M'}, 0, 1);
+ if (ctype > 0) {
+ CClassNode cc1 = new CClassNode(); /* \P{M} */
+ cc1.addCType(ctype, false, env, this);
+ cc1.setNot();
+ CClassNode cc2 = new CClassNode(); /* \p{M}* */
+ cc1.addCType(ctype, false, env, this);
+ QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
+ qn.setTarget(cc2);
+ /* (?>...) */
+ EncloseNode en2 = new EncloseNode(EncloseType.STOP_BACKTRACK);
+ /* \P{M}\p{M}* */
+ en2.setTarget(ConsAltNode.newListNode(cc1, ConsAltNode.newListNode(qn, null)));
+ node = en2;
+ }
+ }
+ }
+ if (node == null) {
+ AnyCharNode np1 = new AnyCharNode();
+ EncloseNode on = new EncloseNode(bsOnOff(env.option, Option.MULTILINE, false), 0);
+ on.setTarget(np1);
+ node = np1;
+ }
+ return node;
+ }
+
private Node parseExpTkByte(boolean group) {
StringNode node = new StringNode(bytes, token.backP, p); // tk_byte:
while (true) {
@@ -1094,6 +1006,135 @@ class Parser extends Lexer {
return top;
}
+ private Node parseCodePoint() {
+ byte[]buf = new byte[Config.ENC_CODE_TO_MBC_MAXLEN];
+ int num = enc.codeToMbc(token.getCode(), buf, 0);
+ // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else
+ return new StringNode(buf, 0, num);
+ }
+
+ private Node parseQuoteOpen() {
+ int[]endOp = new int[]{syntax.metaCharTable.esc, 'E'};
+ int qstart = p;
+ Ptr nextChar = new Ptr();
+ int qend = findStrPosition(endOp, endOp.length, qstart, stop, nextChar);
+ if (qend == -1) nextChar.p = qend = stop;
+ Node node = new StringNode(bytes, qstart, qend);
+ p = nextChar.p;
+ return node;
+ }
+
+ private Node parseCharType(Node node) {
+ switch(token.getPropCType()) {
+ case CharacterType.D:
+ case CharacterType.S:
+ case CharacterType.W:
+ if (Config.NON_UNICODE_SDW) {
+ CClassNode cc = new CClassNode();
+ cc.addCType(token.getPropCType(), false, env, this);
+ if (token.getPropNot()) cc.setNot();
+ node = cc;
+ }
+ break;
+
+ case CharacterType.WORD:
+ node = new CTypeNode(token.getPropCType(), token.getPropNot(), false);
+ break;
+
+ case CharacterType.SPACE:
+ case CharacterType.DIGIT:
+ case CharacterType.XDIGIT:
+ // #ifdef USE_SHARED_CCLASS_TABLE ... #endif
+ CClassNode ccn = new CClassNode();
+ ccn.addCType(token.getPropCType(), false, env, this);
+ if (token.getPropNot()) ccn.setNot();
+ node = ccn;
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+
+ } // inner switch
+ return node;
+ }
+
+ private CClassNode parseCharProperty() {
+ int ctype = fetchCharPropertyToCType();
+ CClassNode n = new CClassNode();
+ n.addCType(ctype, false, env, this);
+ if (token.getPropNot()) n.setNot();
+ return n;
+ }
+
+ private Node parseCcCcOpen() {
+ CClassNode cc = parseCharClass();
+ Node node = cc;
+ if (isIgnoreCase(env.option)) {
+ ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc);
+ enc.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg);
+
+ if (arg.altRoot != null) {
+ node = ConsAltNode.newAltNode(node, arg.altRoot);
+ }
+ }
+ return node;
+ }
+
+ private Node parseAnycharAnytime() {
+ Node node = new AnyCharNode();
+ QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
+ qn.setTarget(node);
+ return qn;
+ }
+
+ private Node parseBackref() {
+ final Node node;
+ if (syntax.op2OptionECMAScript() && token.getBackrefNum() == 1 && env.memNodes != null) {
+ EncloseNode encloseNode = (EncloseNode) env.memNodes[token.getBackrefRef1()];
+ boolean shouldIgnore = false;
+ if (encloseNode != null && encloseNode.containingAnchor != null) {
+ shouldIgnore = true;
+ for (Node anchorNode : env.precReadNotNodes) {
+ if (anchorNode == encloseNode.containingAnchor) {
+ shouldIgnore = false;
+ break;
+ }
+ }
+ }
+ if (shouldIgnore) {
+ node = StringNode.EMPTY;
+ } else {
+ node = new BackRefNode(token.getBackrefNum(),
+ new int[]{token.getBackrefRef1()},
+ token.getBackrefByName(),
+ token.getBackrefExistLevel(), // #ifdef USE_BACKREF_AT_LEVEL
+ token.getBackrefLevel(), // ...
+ env);
+ }
+ } else {
+ int[]backRefs = token.getBackrefNum() > 1 ? token.getBackrefRefs() : new int[]{token.getBackrefRef1()};
+ node = new BackRefNode(token.getBackrefNum(),
+ backRefs,
+ token.getBackrefByName(),
+ token.getBackrefExistLevel(), // #ifdef USE_BACKREF_AT_LEVEL
+ token.getBackrefLevel(), // ...
+ env);
+ }
+ return node;
+ }
+
+ private Node parseCall() {
+ int gNum = token.getCallGNum();
+ if (gNum < 0 || token.getCallRel()) {
+ if (gNum > 0) gNum--;
+ gNum = backrefRelToAbs(gNum);
+ if (gNum <= 0) newValueException(ERR_INVALID_BACKREF);
+ }
+ Node node = new CallNode(bytes, token.getCallNameP(), token.getCallNameEnd(), gNum);
+ env.numCall++;
+ return node;
+ }
+
private Node parseBranch(TokenType term) {
Node node = parseExp(term);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jruby-joni.git
More information about the pkg-java-commits
mailing list