[jruby-joni] 262/279: Imported Upstream version 2.1.6
Hideki Yamane
henrich at moszumanska.debian.org
Mon Nov 16 11:27:48 UTC 2015
This is an automated email from the git hooks/post-receive script.
henrich pushed a commit to branch debian/sid
in repository jruby-joni.
commit 39fa758d7880e22cd65425c6c5ce273ab84076a8
Author: Hideki Yamane <henrich at debian.org>
Date: Wed Apr 29 23:22:30 2015 +0900
Imported Upstream version 2.1.6
---
pom.xml | 2 +-
src/org/joni/Analyser.java | 33 ++--
src/org/joni/ArrayCompiler.java | 89 ++++++----
src/org/joni/ByteCodeMachine.java | 9 +
src/org/joni/ByteCodePrinter.java | 8 +
src/org/joni/Lexer.java | 37 +++-
src/org/joni/Option.java | 40 +++--
src/org/joni/Parser.java | 128 +++++++++++++-
src/org/joni/Regex.java | 7 +-
src/org/joni/StackMachine.java | 5 +-
src/org/joni/Syntax.java | 54 +++++-
src/org/joni/Token.java | 13 +-
src/org/joni/ast/AnchorNode.java | 38 ++--
src/org/joni/ast/CClassNode.java | 50 ++++--
src/org/joni/ast/CTypeNode.java | 9 +-
src/org/joni/ast/EncloseNode.java | 25 +--
src/org/joni/constants/EncloseType.java | 1 +
src/org/joni/constants/OPCode.java | 192 ++++++++++++---------
src/org/joni/constants/OPSize.java | 1 +
src/org/joni/constants/RegexState.java | 28 ---
src/org/joni/constants/SyntaxProperties.java | 17 +-
src/org/joni/constants/TokenType.java | 3 +
src/org/joni/exception/ErrorMessages.java | 1 +
test/org/joni/test/TestA.java | 28 +++
.../org/joni/test/TestAU.java | 36 +++-
test/org/joni/test/TestJoni.java | 10 ++
.../org/joni/test/TestPerl.java | 37 +++-
test/org/joni/test/TestU8.java | 8 +-
28 files changed, 632 insertions(+), 277 deletions(-)
diff --git a/pom.xml b/pom.xml
index f2d345d..9ecfd33 100644
--- a/pom.xml
+++ b/pom.xml
@@ -4,7 +4,7 @@
<groupId>org.jruby.joni</groupId>
<artifactId>joni</artifactId>
<packaging>jar</packaging>
- <version>2.1.5</version>
+ <version>2.1.6</version>
<name>Joni</name>
<description>
Java port of Oniguruma: http://www.geocities.jp/kosako3/oniguruma
diff --git a/src/org/joni/Analyser.java b/src/org/joni/Analyser.java
index 9aa9acc..d871f77 100644
--- a/src/org/joni/Analyser.java
+++ b/src/org/joni/Analyser.java
@@ -51,7 +51,6 @@ import org.joni.ast.StringNode;
import org.joni.constants.AnchorType;
import org.joni.constants.EncloseType;
import org.joni.constants.NodeType;
-import org.joni.constants.RegexState;
import org.joni.constants.StackPopLevel;
import org.joni.constants.TargetInfo;
@@ -62,8 +61,6 @@ final class Analyser extends Parser {
}
protected final void compile() {
- regex.state = RegexState.COMPILING;
-
if (Config.DEBUG) {
Config.log.println(regex.encStringToString(bytes, getBegin(), getEnd()));
}
@@ -173,13 +170,11 @@ final class Analyser extends Parser {
if (Config.DEBUG_COMPILE) {
if (Config.USE_NAMED_GROUP) Config.log.print(regex.nameTableToString());
- Config.log.println("stack used: " + regex.stackNeeded);
+ Config.log.println("stack used: " + regex.requireStack);
if (Config.USE_STRING_TEMPLATES) Config.log.print("templates: " + regex.templateNum + "\n");
Config.log.println(new ByteCodePrinter(regex).byteCodeListToString());
} // DEBUG_COMPILE
-
- regex.state = RegexState.NORMAL;
}
private void noNameDisableMapFor_cosAlt(Node node, int[]map, Ptr counter) {
@@ -225,10 +220,10 @@ final class Analyser extends Parser {
private void noNameDisableMapFor_anchor(Node node, int[]map, Ptr counter) {
AnchorNode an = (AnchorNode)node;
switch (an.type) {
- case AnchorNode.PREC_READ:
- case AnchorNode.PREC_READ_NOT:
- case AnchorNode.LOOK_BEHIND:
- case AnchorNode.LOOK_BEHIND_NOT:
+ case AnchorType.PREC_READ:
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND:
+ case AnchorType.LOOK_BEHIND_NOT:
an.setTarget(noNameDisableMap(an.target, map, counter));
}
}
@@ -267,6 +262,10 @@ final class Analyser extends Parser {
break;
case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if (en.type == EncloseType.CONDITION) {
+ en.regNum = map[en.regNum];
+ }
renumberByMap(((EncloseNode)node).target, map);
break;
@@ -383,6 +382,7 @@ final class Analyser extends Parser {
case EncloseType.OPTION:
case EncloseNode.STOP_BACKTRACK:
+ case EncloseNode.CONDITION:
info = quantifiersMemoryInfo(en.target);
break;
@@ -498,6 +498,7 @@ final class Analyser extends Parser {
case EncloseType.OPTION:
case EncloseType.STOP_BACKTRACK:
+ case EncloseNode.CONDITION:
min = getMinMatchLength(en.target);
break;
} // inner switch
@@ -603,6 +604,7 @@ final class Analyser extends Parser {
case EncloseType.OPTION:
case EncloseType.STOP_BACKTRACK:
+ case EncloseNode.CONDITION:
max = getMaxMatchLength(en.target);
break;
} // inner switch
@@ -715,6 +717,7 @@ final class Analyser extends Parser {
case EncloseType.OPTION:
case EncloseType.STOP_BACKTRACK:
+ case EncloseNode.CONDITION:
len = getCharLengthTree(en.target, level);
break;
} // inner switch
@@ -938,6 +941,7 @@ final class Analyser extends Parser {
case EncloseType.MEMORY:
case EncloseType.STOP_BACKTRACK:
+ case EncloseNode.CONDITION:
n = getHeadValueNode(en.target, exact);
break;
} // inner switch
@@ -1890,6 +1894,14 @@ final class Analyser extends Parser {
}
break;
+ case EncloseNode.CONDITION:
+ if (Config.USE_NAMED_GROUP) {
+ if (!en.isNameRef() && env.numNamed > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(env.option)) {
+ newValueException(ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED);
+ }
+ }
+ setupTree(en.target, state);
+ break;
} // inner switch
break;
@@ -2203,6 +2215,7 @@ final class Analyser extends Parser {
break;
case EncloseType.STOP_BACKTRACK:
+ case EncloseType.CONDITION:
optimizeNodeLeft(en.target, opt, oenv);
break;
} // inner switch
diff --git a/src/org/joni/ArrayCompiler.java b/src/org/joni/ArrayCompiler.java
index ac21d0e..62e44fc 100644
--- a/src/org/joni/ArrayCompiler.java
+++ b/src/org/joni/ArrayCompiler.java
@@ -96,6 +96,7 @@ final class ArrayCompiler extends Compiler {
do {
len = compileLengthTree(aln.car);
if (aln.cdr != null) {
+ regex.requireStack = true;
addOpcodeRelAddr(OPCode.PUSH, len + OPSize.JUMP);
}
compileTree(aln.car);
@@ -161,6 +162,7 @@ final class ArrayCompiler extends Compiler {
int savedNumNullCheck = regex.numNullCheck;
if (emptyInfo != 0) {
+ regex.requireStack = true;
addOpcode(OPCode.NULL_CHECK_START);
addMemNum(regex.numNullCheck); /* NULL CHECK ID */
regex.numNullCheck++;
@@ -419,6 +421,7 @@ final class ArrayCompiler extends Compiler {
}
private void compileRangeRepeatNode(QuantifierNode qn, int targetLen, int emptyInfo) {
+ regex.requireStack = true;
int numRepeat = regex.numRepeat;
addOpcode(qn.greedy ? OPCode.REPEAT : OPCode.REPEAT_NG);
addMemNum(numRepeat); /* OP_REPEAT ID */
@@ -517,6 +520,7 @@ final class ArrayCompiler extends Compiler {
@Override
protected void compileCECQuantifierNode(QuantifierNode qn) {
+ regex.requireStack = true;
boolean infinite = isRepeatInfinite(qn.upper);
int emptyInfo = qn.targetEmptyInfo;
@@ -691,6 +695,7 @@ final class ArrayCompiler extends Compiler {
@Override
protected void compileNonCECQuantifierNode(QuantifierNode qn) {
+ regex.requireStack = true;
boolean infinite = isRepeatInfinite(qn.upper);
int emptyInfo = qn.targetEmptyInfo;
@@ -858,6 +863,21 @@ final class ArrayCompiler extends Compiler {
}
break;
+ case EncloseType.CONDITION:
+ len = OPSize.CONDITION;
+ if (node.target.getType() == NodeType.ALT) {
+ ConsAltNode x = (ConsAltNode)node.target;
+ tlen = compileLengthTree(x.car); /* yes-node */
+ len += tlen + OPSize.JUMP;
+ if (x.cdr == null) newInternalException(ERR_PARSER_BUG);
+ x = x.cdr;
+ tlen = compileLengthTree(x.car); /* no-node */
+ len += tlen;
+ if (x.cdr != null) newSyntaxException(ERR_INVALID_CONDITION_PATTERN);
+ } else {
+ newInternalException(ERR_PARSER_BUG);
+ }
+ break;
default:
newInternalException(ERR_PARSER_BUG);
return 0; // not reached
@@ -872,6 +892,7 @@ final class ArrayCompiler extends Compiler {
case EncloseType.MEMORY:
if (Config.USE_SUBEXP_CALL) {
if (node.isCalled()) {
+ regex.requireStack = true;
addOpcode(OPCode.CALL);
node.callAddr = codeLength + OPSize.ABSADDR + OPSize.JUMP;
node.setAddrFixed();
@@ -888,6 +909,7 @@ final class ArrayCompiler extends Compiler {
} // USE_SUBEXP_CALL
if (bsAt(regex.btMemStart, node.regNum)) {
+ regex.requireStack = true;
addOpcode(OPCode.MEMORY_START_PUSH);
} else {
addOpcode(OPCode.MEMORY_START);
@@ -915,6 +937,7 @@ final class ArrayCompiler extends Compiler {
break;
case EncloseType.STOP_BACKTRACK:
+ regex.requireStack = true;
if (node.isStopBtSimpleRepeat()) {
QuantifierNode qn = (QuantifierNode)node.target;
@@ -932,6 +955,27 @@ final class ArrayCompiler extends Compiler {
}
break;
+ case EncloseType.CONDITION:
+ addOpcode(OPCode.CONDITION);
+ addMemNum(node.regNum);
+ if (node.target.getType() == NodeType.ALT) {
+ ConsAltNode x = (ConsAltNode)node.target;
+ len = compileLengthTree(x.car); /* yes-node */
+ if (x.cdr == null) newInternalException(ERR_PARSER_BUG);
+ x = x.cdr;
+ int len2 = compileLengthTree(x.car); /* no-node */
+ if (x.cdr != null) newSyntaxException(ERR_INVALID_CONDITION_PATTERN);
+ x = (ConsAltNode)node.target;
+ addRelAddr(len + OPSize.JUMP);
+ compileTree(x.car); /* yes-node */
+ addOpcodeRelAddr(OPCode.JUMP, len2);
+ x = x.cdr;
+ compileTree(x.car); /* no-node */
+ } else {
+ newInternalException(ERR_PARSER_BUG);
+ }
+ break;
+
default:
newInternalException(ERR_PARSER_BUG);
break;
@@ -1002,13 +1046,19 @@ final class ArrayCompiler extends Compiler {
addOpcode(enc.isSingleByte() ? OPCode.WORD_END_SB : OPCode.WORD_END);
break;
+ case AnchorType.KEEP:
+ addOpcode(OPCode.KEEP);
+ break;
+
case AnchorType.PREC_READ:
+ regex.requireStack = true;
addOpcode(OPCode.PUSH_POS);
compileTree(node.target);
addOpcode(OPCode.POP_POS);
break;
case AnchorType.PREC_READ_NOT:
+ regex.requireStack = true;
len = compileLengthTree(node.target);
addOpcodeRelAddr(OPCode.PUSH_POS_NOT, len + OPSize.FAIL_POS);
compileTree(node.target);
@@ -1028,6 +1078,7 @@ final class ArrayCompiler extends Compiler {
break;
case AnchorType.LOOK_BEHIND_NOT:
+ regex.requireStack = true;
len = compileLengthTree(node.target);
addOpcodeRelAddr(OPCode.PUSH_LOOK_BEHIND_NOT, len + OPSize.FAIL_LOOK_BEHIND_NOT);
if (node.charLength < 0) {
@@ -1182,44 +1233,6 @@ final class ArrayCompiler extends Compiler {
private void addOpcode(int opcode) {
addInt(opcode);
-
- switch(opcode) {
- case OPCode.ANYCHAR_STAR:
- case OPCode.ANYCHAR_STAR_SB:
- case OPCode.ANYCHAR_ML_STAR:
- case OPCode.ANYCHAR_ML_STAR_SB:
- case OPCode.ANYCHAR_STAR_PEEK_NEXT:
- case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB:
- case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
- case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB:
- case OPCode.STATE_CHECK_ANYCHAR_STAR:
- case OPCode.STATE_CHECK_ANYCHAR_STAR_SB:
- case OPCode.STATE_CHECK_ANYCHAR_ML_STAR:
- case OPCode.MEMORY_START_PUSH:
- case OPCode.MEMORY_END_PUSH:
- case OPCode.MEMORY_END_PUSH_REC:
- case OPCode.MEMORY_END_REC:
- case OPCode.NULL_CHECK_START:
- case OPCode.NULL_CHECK_END_MEMST_PUSH:
- case OPCode.PUSH:
- case OPCode.STATE_CHECK_PUSH:
- case OPCode.STATE_CHECK_PUSH_OR_JUMP:
- case OPCode.STATE_CHECK:
- case OPCode.PUSH_OR_JUMP_EXACT1:
- case OPCode.PUSH_IF_PEEK_NEXT:
- case OPCode.REPEAT:
- case OPCode.REPEAT_NG:
- case OPCode.REPEAT_INC_SG:
- case OPCode.REPEAT_INC_NG:
- case OPCode.REPEAT_INC_NG_SG:
- case OPCode.PUSH_POS:
- case OPCode.PUSH_POS_NOT:
- case OPCode.PUSH_STOP_BT:
- case OPCode.PUSH_LOOK_BEHIND_NOT:
- case OPCode.CALL:
- case OPCode.RETURN: // it will appear only with CALL though
- regex.stackNeeded = true;
- }
}
private void addStateCheckNum(int num) {
diff --git a/src/org/joni/ByteCodeMachine.java b/src/org/joni/ByteCodeMachine.java
index d5b318c..a6a0cdd 100644
--- a/src/org/joni/ByteCodeMachine.java
+++ b/src/org/joni/ByteCodeMachine.java
@@ -326,6 +326,7 @@ class ByteCodeMachine extends StackMachine {
case OPCode.EXACT1_IC_SB: opExact1ICSb(); break;
case OPCode.EXACTN_IC_SB: opExactNICSb(); continue;
+ case OPCode.CONDITION: opCondition(); continue;
case OPCode.FINISH:
return finish();
@@ -702,6 +703,14 @@ class ByteCodeMachine extends StackMachine {
sprev = s - 1;
}
+ private void opCondition() {
+ int mem = code[ip++];
+ int addr = code[ip++];
+ if (mem > regex.numMem || repeatStk[memEndStk + mem] == INVALID_INDEX || repeatStk[memStartStk + mem] == INVALID_INDEX) {
+ ip += addr;
+ }
+ }
+
private boolean isInBitSet() {
int c = bytes[s] & 0xff;
return ((code[ip + (c >>> BitSet.ROOM_SHIFT)] & (1 << c)) != 0);
diff --git a/src/org/joni/ByteCodePrinter.java b/src/org/joni/ByteCodePrinter.java
index 77938da..3ffe9c0 100644
--- a/src/org/joni/ByteCodePrinter.java
+++ b/src/org/joni/ByteCodePrinter.java
@@ -386,6 +386,14 @@ class ByteCodePrinter {
sb.append(":" + scn + ":(" + addr + ")");
break;
+ case OPCode.CONDITION:
+ mem = code[bp];
+ bp += OPSize.MEMNUM;
+ addr = code[bp];
+ bp += OPSize.RELADDR;
+ sb.append(":" + mem + ":" + addr);
+ break;
+
default:
throw new InternalException("undefined code: " + code[--bp]);
}
diff --git a/src/org/joni/Lexer.java b/src/org/joni/Lexer.java
index 0b70271..c1feb68 100644
--- a/src/org/joni/Lexer.java
+++ b/src/org/joni/Lexer.java
@@ -19,6 +19,7 @@
*/
package org.joni;
+import static org.joni.Option.isAsciiRange;
import static org.joni.Option.isSingleline;
import static org.joni.ast.QuantifierNode.isRepeatInfinite;
@@ -756,7 +757,7 @@ class Lexer extends ScannerSupport {
private void fetchTokenFor_anchor(int subType) {
token.type = TokenType.ANCHOR;
- token.setAnchor(subType);
+ token.setAnchorSubtype(subType);
}
private void fetchTokenFor_xBrace() {
@@ -976,13 +977,13 @@ class Lexer extends ScannerSupport {
}
}
- protected final TokenType fetchToken() {
+ protected final void fetchToken() {
// mark(); // out
start:
while(true) {
if (!left()) {
token.type = TokenType.EOT;
- return token.type;
+ return;
}
token.type = TokenType.STRING;
@@ -1029,16 +1030,28 @@ class Lexer extends ScannerSupport {
if (syntax.opEscWWord()) fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
break;
case 'b':
- if (syntax.opEscBWordBound()) fetchTokenFor_anchor(AnchorType.WORD_BOUND);
+ if (syntax.opEscBWordBound()) {
+ fetchTokenFor_anchor(AnchorType.WORD_BOUND);
+ token.setAnchorASCIIRange(isAsciiRange(env.option));
+ }
break;
case 'B':
- if (syntax.opEscBWordBound()) fetchTokenFor_anchor(AnchorType.NOT_WORD_BOUND);
+ if (syntax.opEscBWordBound()) {
+ fetchTokenFor_anchor(AnchorType.NOT_WORD_BOUND);
+ token.setAnchorASCIIRange(isAsciiRange(env.option));
+ }
break;
case '<':
- if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) fetchTokenFor_anchor(AnchorType.WORD_BEGIN);
+ if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) {
+ fetchTokenFor_anchor(AnchorType.WORD_BEGIN);
+ token.setAnchorASCIIRange(isAsciiRange(env.option));
+ }
break;
case '>':
- if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) fetchTokenFor_anchor(AnchorType.WORD_END);
+ if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) {
+ fetchTokenFor_anchor(AnchorType.WORD_END);
+ token.setAnchorASCIIRange(isAsciiRange(env.option));
+ }
break;
case 's':
if (syntax.opEscSWhiteSpace()) fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
@@ -1109,7 +1122,14 @@ class Lexer extends ScannerSupport {
case 'P':
fetchTokenFor_charProperty();
break;
-
+ case 'R':
+ if (syntax.op2EscCapitalRLinebreak()) token.type = TokenType.LINEBREAK;
+ break;
+ case 'X':
+ if (syntax.op2EscCapitalXExtendedGraphemeCluster()) token.type = TokenType.EXTENDED_GRAPHEME_CLUSTER;
+ break;
+ case 'K':
+ if (syntax.op2EscCapitalKKeep()) token.type = TokenType.KEEP;
default:
unfetch();
int num = fetchEscapedValue();
@@ -1219,7 +1239,6 @@ class Lexer extends ScannerSupport {
break;
} // while
- return token.type;
}
private void greedyCheck() {
diff --git a/src/org/joni/Option.java b/src/org/joni/Option.java
index 13fbba3..607a6e0 100644
--- a/src/org/joni/Option.java
+++ b/src/org/joni/Option.java
@@ -23,21 +23,31 @@ public class Option {
/* options */
public static final int NONE = 0;
- public static final int IGNORECASE = (1<<0);
- public static final int EXTEND = (1<<1);
- public static final int MULTILINE = (1<<2);
- public static final int SINGLELINE = (1<<3);
- public static final int FIND_LONGEST = (1<<4);
- public static final int FIND_NOT_EMPTY = (1<<5);
- public static final int NEGATE_SINGLELINE = (1<<6);
- public static final int DONT_CAPTURE_GROUP = (1<<7);
- public static final int CAPTURE_GROUP = (1<<8);
+ public static final int IGNORECASE = (1 << 0);
+ public static final int EXTEND = (1 << 1);
+ public static final int MULTILINE = (1 << 2);
+ public static final int SINGLELINE = (1 << 3);
+ public static final int FIND_LONGEST = (1 << 4);
+ public static final int FIND_NOT_EMPTY = (1 << 5);
+ public static final int NEGATE_SINGLELINE = (1 << 6);
+ public static final int DONT_CAPTURE_GROUP = (1 << 7);
+ public static final int CAPTURE_GROUP = (1 << 8);
/* options (search time) */
- public static final int NOTBOL = (1<<9);
- public static final int NOTEOL = (1<<10);
- public static final int POSIX_REGION = (1<<11);
- public static final int MAXBIT = (1<<12); /* limit */
+ public static final int NOTBOL = (1 << 9);
+ public static final int NOTEOL = (1 << 10);
+ public static final int POSIX_REGION = (1 << 11);
+
+ /* options (ctype range) */
+ public static final int ASCII_RANGE = (1 << 12);
+ public static final int POSIX_BRACKET_ALL_RANGE = (1 << 13);
+ public static final int WORD_BOUND_ALL_RANGE = (1 << 14);
+ /* options (newline) */
+ public static final int NEWLINE_CRLF = (1 << 15);
+ public static final int NOTBOS = (1 << 16);
+ public static final int NOTEOS = (1 << 17);
+
+ public static final int MAXBIT = (1 << 18); /* limit */
public static final int DEFAULT = NONE;
@@ -112,6 +122,10 @@ public class Option {
return (option & POSIX_REGION) != 0;
}
+ public static boolean isAsciiRange(int option) {
+ return (option & ASCII_RANGE) != 0;
+ }
+
/* OP_SET_OPTION is required for these options. ??? */
// public static boolean isDynamic(int option) {
// return (option & (MULTILINE | IGNORECASE)) != 0;
diff --git a/src/org/joni/Parser.java b/src/org/joni/Parser.java
index 419993f..77352b7 100644
--- a/src/org/joni/Parser.java
+++ b/src/org/joni/Parser.java
@@ -27,10 +27,14 @@ import static org.joni.Option.isIgnoreCase;
import org.jcodings.Ptr;
import org.jcodings.constants.CharacterType;
import org.jcodings.constants.PosixBracket;
+import org.jcodings.unicode.UnicodeEncoding;
import org.joni.ast.AnchorNode;
import org.joni.ast.AnyCharNode;
import org.joni.ast.BackRefNode;
import org.joni.ast.CClassNode;
+import org.joni.ast.CClassNode.CCSTATE;
+import org.joni.ast.CClassNode.CCStateArg;
+import org.joni.ast.CClassNode.CCVALTYPE;
import org.joni.ast.CTypeNode;
import org.joni.ast.CallNode;
import org.joni.ast.ConsAltNode;
@@ -38,10 +42,7 @@ import org.joni.ast.EncloseNode;
import org.joni.ast.Node;
import org.joni.ast.QuantifierNode;
import org.joni.ast.StringNode;
-import org.joni.ast.CClassNode.CCStateArg;
import org.joni.constants.AnchorType;
-import org.joni.constants.CCSTATE;
-import org.joni.constants.CCVALTYPE;
import org.joni.constants.EncloseType;
import org.joni.constants.NodeType;
import org.joni.constants.TokenType;
@@ -493,6 +494,56 @@ class Parser extends Lexer {
}
break;
+ case '(': /* conditional expression: (?(cond)yes), (?(cond)yes|no) */
+ if (syntax.op2QMarkLParenCondition()) {
+ int num = -1;
+ int name = -1;
+ fetch();
+ if (enc.isDigit(c)) { /* (n) */
+ unfetch();
+ num = fetchName('(', true);
+ if (syntax.strictCheckBackref()) {
+ if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) newValueException(ERR_INVALID_BACKREF);
+ }
+ } else {
+ if (Config.USE_NAMED_GROUP) {
+ if (c == '<' || c == '\'') { /* (<name>), ('name') */
+ name = p;
+ num = fetchName(c, false);
+ int nameEnd = value;
+ fetch();
+ if (c != ')') newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ NameEntry e = env.reg.nameToGroupNumbers(bytes, name, nameEnd);
+ if (e == null) newValueException(ERR_UNDEFINED_NAME_REFERENCE, name, nameEnd);
+ if (syntax.strictCheckBackref()) {
+ if (e.backNum == 1) {
+ if (e.backRef1 > env.numMem ||
+ env.memNodes == null ||
+ env.memNodes[e.backRef1] == null) newValueException(ERR_INVALID_BACKREF);
+ } else {
+ for (int i=0; i<e.backNum; i++) {
+ if (e.backRefs[i] > env.numMem ||
+ env.memNodes == null ||
+ env.memNodes[e.backRefs[i]] == null) newValueException(ERR_INVALID_BACKREF);
+ }
+ }
+ }
+
+ num = e.backNum == 1 ? e.backRef1 : e.backRefs[0]; /* XXX: use left most named group as Perl */
+ }
+ } else { // USE_NAMED_GROUP
+ newSyntaxException(ERR_INVALID_CONDITION_PATTERN);
+ }
+ }
+ EncloseNode en = new EncloseNode(EncloseType.CONDITION);
+ en.regNum = num;
+ if (name != -1) en.setNameRef();
+ node = en;
+ } else {
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ }
+ break;
+
// case 'p': #ifdef USE_POSIXLINE_OPTION
case '-':
case 'i':
@@ -534,6 +585,10 @@ class Parser extends Lexer {
// option = bsOnOff(option, Option.MULTILINE|Option.SINGLELINE, neg);
// break;
+ case 'a': /* limits \d, \s, \w and POSIX brackets to ASCII range */
+ if ((syntax.op2OptionPerl() || syntax.op2OptionRuby()) && !neg) {
+ option = bsOnOff(option, Option.ASCII_RANGE, neg);
+ }
default:
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
} // switch
@@ -580,7 +635,7 @@ class Parser extends Lexer {
Node target = parseSubExp(term);
if (node.getType() == NodeType.ANCHOR) {
- AnchorNode an = (AnchorNode) node;
+ AnchorNode an = (AnchorNode)node;
an.setTarget(target);
if (syntax.op2OptionECMAScript() && an.type == AnchorType.PREC_READ_NOT) {
env.popPrecReadNotNode(an);
@@ -594,6 +649,10 @@ class Parser extends Lexer {
}
/* Don't move this to previous of parse_subexp() */
env.setMemNode(en.regNum, node);
+ } else if (en.type == EncloseType.CONDITION) {
+ if (target.getType() != NodeType.ALT) { /* convert (?(cond)yes) to (?(cond)yes|empty) */
+ en.setTarget(ConsAltNode.newAltNode(target, ConsAltNode.newAltNode(StringNode.EMPTY, null)));
+ }
}
}
returnCode = 0;
@@ -675,6 +734,63 @@ class Parser extends Lexer {
} else {
return parseExpTkByte(group); // goto tk_byte
}
+ case LINEBREAK:
+ byte[]buflb = new byte[Config.ENC_CODE_TO_MBC_MAXLEN * 2];
+ int len1 = enc.codeToMbc(0x0D, buflb, 0);
+ int len2 = enc.codeToMbc(0x0A, buflb, len1);
+ StringNode left = new StringNode(buflb, 0, len1 + len2);
+ left.setRaw();
+ /* [\x0A-\x0D] or [\x0A-\x0D\x{85}\x{2028}\x{2029}] */
+ CClassNode right = new CClassNode();
+ if (enc.minLength() > 1) {
+ right.addCodeRange(env, 0x0A, 0x0D);
+ } else {
+ right.bs.setRange(0x0A, 0x0D);
+ }
+
+ if (enc.toString().startsWith("UTF")) {
+ /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
+ right.addCodeRange(env, 0x85, 0x85);
+ right.addCodeRange(env, 0x2028, 0x2029);
+ }
+ /* (?>...) */
+ EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK);
+ en.setTarget(ConsAltNode.newAltNode(left, ConsAltNode.newAltNode(right, null)));
+ node = en;
+ break;
+
+ case EXTENDED_GRAPHEME_CLUSTER:
+ if (Config.USE_UNICODE_PROPERTIES) {
+ if (enc instanceof UnicodeEncoding) {
+ int ctype = enc.propertyNameToCType(new byte[]{(byte)'M'}, 0, 1);
+ if (ctype > 0) {
+ CClassNode cc1 = new CClassNode(); /* \P{M} */
+ cc1.addCType(ctype, false, env, this);
+ cc1.setNot();
+ CClassNode cc2 = new CClassNode(); /* \p{M}* */
+ cc1.addCType(ctype, false, env, this);
+ QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
+ qn.setTarget(cc2);
+ /* (?>...) */
+ EncloseNode en2 = new EncloseNode(EncloseType.STOP_BACKTRACK);
+ /* \P{M}\p{M}* */
+ en2.setTarget(ConsAltNode.newListNode(cc1, ConsAltNode.newListNode(qn, null)));
+ node = en2;
+ }
+ }
+ }
+ if (node == null) {
+ AnyCharNode np1 = new AnyCharNode();
+ EncloseNode on = new EncloseNode(bsOnOff(env.option, Option.MULTILINE, false), 0);
+ on.setTarget(np1);
+ node = np1;
+ }
+ break;
+
+ case KEEP:
+ node = new AnchorNode(AnchorType.KEEP);
+ break;
+
case STRING:
return parseExpTkByte(group); // tk_byte:
@@ -711,7 +827,7 @@ class Parser extends Lexer {
break;
case CharacterType.WORD:
- node = new CTypeNode(token.getPropCType(), token.getPropNot());
+ node = new CTypeNode(token.getPropCType(), token.getPropNot(), false);
break;
case CharacterType.SPACE:
@@ -807,7 +923,7 @@ class Parser extends Lexer {
break;
case ANCHOR:
- node = new AnchorNode(token.getAnchor()); // possible bug in oniguruma
+ node = new AnchorNode(token.getAnchorSubtype());
break;
case OP_REPEAT:
diff --git a/src/org/joni/Regex.java b/src/org/joni/Regex.java
index 03adf2b..f5eadbd 100644
--- a/src/org/joni/Regex.java
+++ b/src/org/joni/Regex.java
@@ -27,25 +27,22 @@ import java.util.IllegalFormatConversionException;
import java.util.Iterator;
import org.jcodings.Encoding;
-import org.jcodings.EncodingDB;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.UTF8Encoding;
import org.jcodings.util.BytesHash;
import org.joni.constants.AnchorType;
-import org.joni.constants.RegexState;
import org.joni.exception.ErrorMessages;
import org.joni.exception.InternalException;
import org.joni.exception.ValueException;
-public final class Regex implements RegexState {
+public final class Regex {
int[] code; /* compiled pattern */
int codeLength;
- boolean stackNeeded;
+ boolean requireStack;
Object[]operands; /* e.g. shared CClassNode */
int operandLength;
- int state; /* normal, searching, compiling */ // remove
int numMem; /* used memory(...) num counted from 1 */
int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
int numNullCheck; /* OP_NULL_CHECK_START/END id counter */
diff --git a/src/org/joni/StackMachine.java b/src/org/joni/StackMachine.java
index 7b16780..b3d1106 100644
--- a/src/org/joni/StackMachine.java
+++ b/src/org/joni/StackMachine.java
@@ -43,7 +43,7 @@ abstract class StackMachine extends Matcher implements StackType {
protected StackMachine(Regex regex, byte[]bytes, int p , int end) {
super(regex, bytes, p, end);
- this.stack = regex.stackNeeded ? fetchStack() : null;
+ this.stack = regex.requireStack ? fetchStack() : null;
int n = regex.numRepeat + (regex.numMem << 1);
this.repeatStk = n > 0 ? new int[n] : null;
@@ -502,6 +502,7 @@ abstract class StackMachine extends Matcher implements StackType {
int endp;
isNull = 1;
while (k < stk) {
+ e = stack[k++];
if (e.type == MEM_START) {
if (e.getMemEnd() == INVALID_INDEX) {
isNull = 0;
@@ -519,8 +520,6 @@ abstract class StackMachine extends Matcher implements StackType {
isNull = -1; /* empty, but position changed */
}
}
- k++;
- e = stack[k]; // !!
}
break;
}
diff --git a/src/org/joni/Syntax.java b/src/org/joni/Syntax.java
index 4e7b5e7..5ec30f7 100644
--- a/src/org/joni/Syntax.java
+++ b/src/org/joni/Syntax.java
@@ -189,6 +189,9 @@ public final class Syntax implements SyntaxProperties{
return isOp(OP_ESC_X_BRACE_HEX8);
}
+ public boolean opEscOBraceOctal() {
+ return isOp(OP_ESC_O_BRACE_OCTAL);
+ }
/**
* OP
@@ -278,10 +281,55 @@ public final class Syntax implements SyntaxProperties{
return isOp2(OP2_INEFFECTIVE_ESCAPE);
}
+ public boolean op2EscCapitalRLinebreak() {
+ return isOp2(OP2_ESC_CAPITAL_R_LINEBREAK);
+ }
+
+ public boolean op2EscCapitalXExtendedGraphemeCluster() {
+ return isOp2(OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER);
+ }
+
+ public boolean op2EscVVerticalWhiteSpace() {
+ return isOp2(OP2_ESC_V_VERTICAL_WHITESPACE);
+ }
+
+ public boolean op2EscHHorizontalWhiteSpace() {
+ return isOp2(OP2_ESC_H_HORIZONTAL_WHITESPACE);
+ }
+
+ public boolean op2EscCapitalKKeep() {
+ return isOp2(OP2_ESC_CAPITAL_K_KEEP);
+ }
+
+ public boolean op2EscGBraceBackref() {
+ return isOp2(OP2_ESC_G_BRACE_BACKREF);
+ }
+
+ public boolean op2QMarkSubexpCall() {
+ return isOp2(OP2_QMARK_SUBEXP_CALL);
+ }
+
+ public boolean op2QMarkBarBranchReset() {
+ return isOp2(OP2_QMARK_BAR_BRANCH_RESET);
+ }
+
+ public boolean op2QMarkLParenCondition() {
+ return isOp2(OP2_QMARK_LPAREN_CONDITION);
+ }
+
+ public boolean op2QMarkCapitalPNamedGroup() {
+ return isOp2(OP2_QMARK_CAPITAL_P_NAMED_GROUP);
+ }
+
+ public boolean op2OptionJava() {
+ return isOp2(OP2_OPTION_JAVA);
+ }
+
public boolean op2OptionECMAScript() {
return isOp2(OP2_OPTION_ECMASCRIPT);
}
+
/**
* BEHAVIOR
*
@@ -371,7 +419,11 @@ public final class Syntax implements SyntaxProperties{
OP2_PLUS_POSSESSIVE_REPEAT |
OP2_CCLASS_SET_OP | OP2_ESC_CAPITAL_C_BAR_CONTROL |
OP2_ESC_CAPITAL_M_BAR_META | OP2_ESC_V_VTAB |
- OP2_ESC_H_XDIGIT ),
+ OP2_ESC_H_XDIGIT |
+ OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER |
+ OP2_QMARK_LPAREN_CONDITION |
+ OP2_ESC_CAPITAL_R_LINEBREAK // | OP2_ESC_CAPITAL_K_KEEP
+ ),
( GNU_REGEX_BV |
ALLOW_INTERVAL_LOW_ABBREV |
diff --git a/src/org/joni/Token.java b/src/org/joni/Token.java
index 8ad7330..321ad91 100644
--- a/src/org/joni/Token.java
+++ b/src/org/joni/Token.java
@@ -46,18 +46,19 @@ final class Token {
INT1 = code;
}
- int getAnchor() {
+ int getAnchorSubtype() {
return INT1;
}
- void setAnchor(int anchor) {
+ void setAnchorSubtype(int anchor) {
INT1 = anchor;
}
- int getSubtype() {
- return INT1;
+ boolean getAnchorASCIIRange() {
+ return INT2 == 1;
}
- void setSubtype(int subtype) {
- INT1 = subtype;
+
+ void setAnchorASCIIRange(boolean ascii) {
+ INT2 = ascii ? 1 : 0;
}
// repeat union member
diff --git a/src/org/joni/ast/AnchorNode.java b/src/org/joni/ast/AnchorNode.java
index 988d994..498f1a6 100644
--- a/src/org/joni/ast/AnchorNode.java
+++ b/src/org/joni/ast/AnchorNode.java
@@ -21,8 +21,9 @@ package org.joni.ast;
import org.joni.constants.AnchorType;
-public final class AnchorNode extends Node implements AnchorType {
- public int type;
+public final class AnchorNode extends Node {
+ public final int type;
+ public boolean asciiRange;
public Node target;
public int charLength;
@@ -60,28 +61,29 @@ public final class AnchorNode extends Node implements AnchorType {
public String toString(int level) {
StringBuilder value = new StringBuilder();
value.append("\n type: " + typeToString());
+ value.append("\n ascii: " + asciiRange);
value.append("\n target: " + pad(target, level + 1));
return value.toString();
}
public String typeToString() {
StringBuilder type = new StringBuilder();
- if (isType(BEGIN_BUF)) type.append("BEGIN_BUF ");
- if (isType(BEGIN_LINE)) type.append("BEGIN_LINE ");
- if (isType(BEGIN_POSITION)) type.append("BEGIN_POSITION ");
- if (isType(END_BUF)) type.append("END_BUF ");
- if (isType(SEMI_END_BUF)) type.append("SEMI_END_BUF ");
- if (isType(END_LINE)) type.append("END_LINE ");
- if (isType(WORD_BOUND)) type.append("WORD_BOUND ");
- if (isType(NOT_WORD_BOUND)) type.append("NOT_WORD_BOUND ");
- if (isType(WORD_BEGIN)) type.append("WORD_BEGIN ");
- if (isType(WORD_END)) type.append("WORD_END ");
- if (isType(PREC_READ)) type.append("PREC_READ ");
- if (isType(PREC_READ_NOT)) type.append("PREC_READ_NOT ");
- if (isType(LOOK_BEHIND)) type.append("LOOK_BEHIND ");
- if (isType(LOOK_BEHIND_NOT)) type.append("LOOK_BEHIND_NOT ");
- if (isType(ANYCHAR_STAR)) type.append("ANYCHAR_STAR ");
- if (isType(ANYCHAR_STAR_ML)) type.append("ANYCHAR_STAR_ML ");
+ if (isType(AnchorType.BEGIN_BUF)) type.append("BEGIN_BUF ");
+ if (isType(AnchorType.BEGIN_LINE)) type.append("BEGIN_LINE ");
+ if (isType(AnchorType.BEGIN_POSITION)) type.append("BEGIN_POSITION ");
+ if (isType(AnchorType.END_BUF)) type.append("END_BUF ");
+ if (isType(AnchorType.SEMI_END_BUF)) type.append("SEMI_END_BUF ");
+ if (isType(AnchorType.END_LINE)) type.append("END_LINE ");
+ if (isType(AnchorType.WORD_BOUND)) type.append("WORD_BOUND ");
+ if (isType(AnchorType.NOT_WORD_BOUND)) type.append("NOT_WORD_BOUND ");
+ if (isType(AnchorType.WORD_BEGIN)) type.append("WORD_BEGIN ");
+ if (isType(AnchorType.WORD_END)) type.append("WORD_END ");
+ if (isType(AnchorType.PREC_READ)) type.append("PREC_READ ");
+ if (isType(AnchorType.PREC_READ_NOT)) type.append("PREC_READ_NOT ");
+ if (isType(AnchorType.LOOK_BEHIND)) type.append("LOOK_BEHIND ");
+ if (isType(AnchorType.LOOK_BEHIND_NOT)) type.append("LOOK_BEHIND_NOT ");
+ if (isType(AnchorType.ANYCHAR_STAR)) type.append("ANYCHAR_STAR ");
+ if (isType(AnchorType.ANYCHAR_STAR_ML)) type.append("ANYCHAR_STAR_ML ");
return type.toString();
}
diff --git a/src/org/joni/ast/CClassNode.java b/src/org/joni/ast/CClassNode.java
index 6dd5d7f..f47b100 100644
--- a/src/org/joni/ast/CClassNode.java
+++ b/src/org/joni/ast/CClassNode.java
@@ -29,8 +29,6 @@ import org.joni.BitSet;
import org.joni.CodeRangeBuffer;
import org.joni.Config;
import org.joni.ScanEnvironment;
-import org.joni.constants.CCSTATE;
-import org.joni.constants.CCVALTYPE;
import org.joni.exception.ErrorMessages;
import org.joni.exception.InternalException;
import org.joni.exception.SyntaxException;
@@ -267,24 +265,24 @@ public final class CClassNode extends Node {
if (!not) {
for (int i=0; i<n; i++) {
- for (int j=mbr[i * 2 + 1]; j<=mbr[i * 2 + 2]; j++) {
+ for (int j=CR_FROM(mbr, i); j<=CR_TO(mbr, i); j++) {
if (j >= sbOut) {
if (Config.VANILLA) {
- if (j == mbr[i * 2 + 2]) {
+ if (j == CR_TO(mbr, i)) {
i++;
- } else if (j > mbr[i * 2 + 1]) {
- addCodeRangeToBuf(j, mbr[i * 2 + 2]);
+ } else if (j > CR_FROM(mbr, i)) {
+ addCodeRangeToBuf(j, CR_TO(mbr, i));
i++;
}
} else {
- if (j >= mbr[i * 2 + 1]) {
- addCodeRangeToBuf(j, mbr[i * 2 + 2]);
+ if (j >= CR_FROM(mbr, i)) {
+ addCodeRangeToBuf(j, CR_TO(mbr, i));
i++;
}
}
// !goto sb_end!, remove duplication!
for (; i<n; i++) {
- addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
+ addCodeRangeToBuf(CR_FROM(mbr, i), CR_TO(mbr, i));
}
return;
}
@@ -293,27 +291,27 @@ public final class CClassNode extends Node {
}
// !sb_end:!
for (int i=0; i<n; i++) {
- addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
+ addCodeRangeToBuf(CR_FROM(mbr, i), CR_TO(mbr, i));
}
} else {
int prev = 0;
for (int i=0; i<n; i++) {
- for (int j=prev; j < mbr[2 * i + 1]; j++) {
+ for (int j=prev; j < CR_FROM(mbr, i); j++) {
if (j >= sbOut) {
// !goto sb_end2!, remove duplication
prev = sbOut;
for (i=0; i<n; i++) {
- if (prev < mbr[2 * i + 1]) addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
- prev = mbr[i * 2 + 2] + 1;
+ if (prev < CR_FROM(mbr, i)) addCodeRangeToBuf(prev, CR_FROM(mbr, i) - 1);
+ prev = CR_TO(mbr, i) + 1;
}
if (prev < 0x7fffffff/*!!!*/) addCodeRangeToBuf(prev, 0x7fffffff);
return;
}
bs.set(j);
}
- prev = mbr[2 * i + 2] + 1;
+ prev = CR_TO(mbr, i) + 1;
}
for (int j=prev; j<sbOut; j++) {
@@ -323,8 +321,8 @@ public final class CClassNode extends Node {
// !sb_end2:!
prev = sbOut;
for (int i=0; i<n; i++) {
- if (prev < mbr[2 * i + 1]) addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
- prev = mbr[i * 2 + 2] + 1;
+ if (prev < CR_FROM(mbr, i)) addCodeRangeToBuf(prev, CR_FROM(mbr, i) - 1);
+ prev = CR_TO(mbr, i) + 1;
}
if (prev < 0x7fffffff/*!!!*/) addCodeRangeToBuf(prev, 0x7fffffff);
}
@@ -425,6 +423,19 @@ public final class CClassNode extends Node {
} // switch
}
+ public static enum CCVALTYPE {
+ SB,
+ CODE_POINT,
+ CLASS
+ }
+
+ public static enum CCSTATE {
+ VALUE,
+ RANGE,
+ COMPLETE,
+ START
+ }
+
public static final class CCStateArg {
public int v;
public int vs;
@@ -567,4 +578,11 @@ public final class CClassNode extends Node {
return (flags & FLAG_NCCLASS_SHARE) != 0;
}
+ private static int CR_FROM(int[] range, int i) {
+ return range[(i * 2) + 1];
+ }
+
+ private static int CR_TO(int[] range, int i) {
+ return range[(i * 2) + 2];
+ }
}
diff --git a/src/org/joni/ast/CTypeNode.java b/src/org/joni/ast/CTypeNode.java
index 017ce54..0472e9a 100644
--- a/src/org/joni/ast/CTypeNode.java
+++ b/src/org/joni/ast/CTypeNode.java
@@ -20,12 +20,14 @@
package org.joni.ast;
public final class CTypeNode extends Node {
- public int ctype;
- public boolean not;
+ public final int ctype;
+ public final boolean not;
+ public final boolean asciiRange;
- public CTypeNode(int type, boolean not) {
+ public CTypeNode(int type, boolean not, boolean asciiRange) {
this.ctype= type;
this.not = not;
+ this.asciiRange = asciiRange;
}
@Override
@@ -43,6 +45,7 @@ public final class CTypeNode extends Node {
StringBuilder value = new StringBuilder();
value.append("\n ctype: " + ctype);
value.append("\n not: " + not);
+ value.append("\n ascii: " + asciiRange);
return value.toString();
}
diff --git a/src/org/joni/ast/EncloseNode.java b/src/org/joni/ast/EncloseNode.java
index 7c45d14..0ce827e 100644
--- a/src/org/joni/ast/EncloseNode.java
+++ b/src/org/joni/ast/EncloseNode.java
@@ -101,6 +101,7 @@ public final class EncloseNode extends StateNode implements EncloseType {
if (isStopBacktrack()) types.append("STOP_BACKTRACK ");
if (isMemory()) types.append("MEMORY ");
if (isOption()) types.append("OPTION ");
+ if (isCondition()) types.append("CONDITION ");
return types.toString();
}
@@ -113,36 +114,16 @@ public final class EncloseNode extends StateNode implements EncloseType {
state &= ~flag;
}
- public void clearMemory() {
- type &= ~MEMORY;
- }
-
- public void setMemory() {
- type |= MEMORY;
- }
-
public boolean isMemory() {
return (type & MEMORY) != 0;
}
- public void clearOption() {
- type &= ~OPTION;
- }
-
- public void setOption() {
- type |= OPTION;
- }
-
public boolean isOption() {
return (type & OPTION) != 0;
}
- public void clearStopBacktrack() {
- type &= ~STOP_BACKTRACK;
- }
-
- public void setStopBacktrack() {
- type |= STOP_BACKTRACK;
+ public boolean isCondition() {
+ return (type & CONDITION) != 0;
}
public boolean isStopBacktrack() {
diff --git a/src/org/joni/constants/EncloseType.java b/src/org/joni/constants/EncloseType.java
index 125af0c..13d42b6 100644
--- a/src/org/joni/constants/EncloseType.java
+++ b/src/org/joni/constants/EncloseType.java
@@ -23,6 +23,7 @@ public interface EncloseType {
final int MEMORY = 1<<0;
final int OPTION = 1<<1;
final int STOP_BACKTRACK = 1<<2;
+ final int CONDITION = 1<<3;
final int ALLOWED_IN_LB = MEMORY;
final int ALLOWED_IN_LB_NOT = 0;
diff --git a/src/org/joni/constants/OPCode.java b/src/org/joni/constants/OPCode.java
index 05d1f8b..06d0feb 100644
--- a/src/org/joni/constants/OPCode.java
+++ b/src/org/joni/constants/OPCode.java
@@ -63,94 +63,102 @@ public interface OPCode {
final int WORD_BEGIN = 33;
final int WORD_END = 34;
- final int BEGIN_BUF = 35;
- final int END_BUF = 36;
- final int BEGIN_LINE = 37;
- final int END_LINE = 38;
- final int SEMI_END_BUF = 39;
- final int BEGIN_POSITION = 40;
-
- final int BACKREF1 = 41;
- final int BACKREF2 = 42;
- final int BACKREFN = 43;
- final int BACKREFN_IC = 44;
- final int BACKREF_MULTI = 45;
- final int BACKREF_MULTI_IC = 46;
- final int BACKREF_WITH_LEVEL = 47; /* \k<xxx+n>, \k<xxx-n> */
-
- final int MEMORY_START = 48;
- final int MEMORY_START_PUSH = 49; /* push back-tracker to stack */
- final int MEMORY_END_PUSH = 50; /* push back-tracker to stack */
- final int MEMORY_END_PUSH_REC = 51; /* push back-tracker to stack */
- final int MEMORY_END = 52;
- final int MEMORY_END_REC = 53; /* push marker to stack */
-
- final int FAIL = 54; /* pop stack and move */
- final int JUMP = 55;
- final int PUSH = 56;
- final int POP = 57;
- final int PUSH_OR_JUMP_EXACT1 = 58; /* if match exact then push, else jump. */
- final int PUSH_IF_PEEK_NEXT = 59; /* if match exact then push, else none. */
-
- final int REPEAT = 60; /* {n,m} */
- final int REPEAT_NG = 61; /* {n,m}? (non greedy) */
- final int REPEAT_INC = 62;
- final int REPEAT_INC_NG = 63; /* non greedy */
- final int REPEAT_INC_SG = 64; /* search and get in stack */
- final int REPEAT_INC_NG_SG = 65; /* search and get in stack (non greedy) */
-
- final int NULL_CHECK_START = 66; /* null loop checker start */
- final int NULL_CHECK_END = 67; /* null loop checker end */
- final int NULL_CHECK_END_MEMST = 68; /* null loop checker end (with capture status) */
- final int NULL_CHECK_END_MEMST_PUSH = 69; /* with capture status and push check-end */
-
- final int PUSH_POS = 70; /* (?=...) start */
- final int POP_POS = 71; /* (?=...) end */
- final int PUSH_POS_NOT = 72; /* (?!...) start */
- final int FAIL_POS = 73; /* (?!...) end */
- final int PUSH_STOP_BT = 74; /* (?>...) start */
- final int POP_STOP_BT = 75; /* (?>...) end */
- final int LOOK_BEHIND = 76; /* (?<=...) start (no needs end opcode) */
- final int PUSH_LOOK_BEHIND_NOT = 77; /* (?<!...) start */
- final int FAIL_LOOK_BEHIND_NOT = 78; /* (?<!...) end */
-
- final int CALL = 79; /* \g<name> */
- final int RETURN = 80;
-
- final int STATE_CHECK_PUSH = 81; /* combination explosion check and push */
- final int STATE_CHECK_PUSH_OR_JUMP = 82; /* check ok -> push, else jump */
- final int STATE_CHECK = 83; /* check only */
- final int STATE_CHECK_ANYCHAR_STAR = 84;
- final int STATE_CHECK_ANYCHAR_ML_STAR = 85;
+ final int ASCII_WORD = 35;
+ final int NOT_ASCII_WORD = 36;
+ final int ASCII_WORD_BOUND = 37;
+ final int NOT_ASCII_WORD_BOUND = 38;
+ final int ASCII_WORD_BEGIN = 39;
+ final int ASCII_WORD_END = 40;
+
+ final int BEGIN_BUF = 41;
+ final int END_BUF = 42;
+ final int BEGIN_LINE = 43;
+ final int END_LINE = 44;
+ final int SEMI_END_BUF = 45;
+ final int BEGIN_POSITION = 46;
+
+ final int BACKREF1 = 47;
+ final int BACKREF2 = 48;
+ final int BACKREFN = 49;
+ final int BACKREFN_IC = 50;
+ final int BACKREF_MULTI = 51;
+ final int BACKREF_MULTI_IC = 52;
+ final int BACKREF_WITH_LEVEL = 53; /* \k<xxx+n>, \k<xxx-n> */
+
+ final int MEMORY_START = 54;
+ final int MEMORY_START_PUSH = 55; /* push back-tracker to stack */
+ final int MEMORY_END_PUSH = 56; /* push back-tracker to stack */
+ final int MEMORY_END_PUSH_REC = 57; /* push back-tracker to stack */
+ final int MEMORY_END = 58;
+ final int MEMORY_END_REC = 59; /* push marker to stack */
+
+ final int KEEP = 60;
+ final int FAIL = 61; /* pop stack and move */
+ final int JUMP = 62;
+ final int PUSH = 63;
+ final int POP = 64;
+ final int PUSH_OR_JUMP_EXACT1 = 65; /* if match exact then push, else jump. */
+ final int PUSH_IF_PEEK_NEXT = 66; /* if match exact then push, else none. */
+
+ final int REPEAT = 67; /* {n,m} */
+ final int REPEAT_NG = 68; /* {n,m}? (non greedy) */
+ final int REPEAT_INC = 69;
+ final int REPEAT_INC_NG = 70; /* non greedy */
+ final int REPEAT_INC_SG = 71; /* search and get in stack */
+ final int REPEAT_INC_NG_SG = 72; /* search and get in stack (non greedy) */
+
+ final int NULL_CHECK_START = 73; /* null loop checker start */
+ final int NULL_CHECK_END = 74; /* null loop checker end */
+ final int NULL_CHECK_END_MEMST = 75; /* null loop checker end (with capture status) */
+ final int NULL_CHECK_END_MEMST_PUSH = 76; /* with capture status and push check-end */
+
+ final int PUSH_POS = 77; /* (?=...) start */
+ final int POP_POS = 78; /* (?=...) end */
+ final int PUSH_POS_NOT = 79; /* (?!...) start */
+ final int FAIL_POS = 80; /* (?!...) end */
+ final int PUSH_STOP_BT = 81; /* (?>...) start */
+ final int POP_STOP_BT = 82; /* (?>...) end */
+ final int LOOK_BEHIND = 83; /* (?<=...) start (no needs end opcode) */
+ final int PUSH_LOOK_BEHIND_NOT = 84; /* (?<!...) start */
+ final int FAIL_LOOK_BEHIND_NOT = 85; /* (?<!...) end */
+
+ final int CALL = 86; /* \g<name> */
+ final int RETURN = 87;
+ final int CONDITION = 88;
+
+ final int STATE_CHECK_PUSH = 89; /* combination explosion check and push */
+ final int STATE_CHECK_PUSH_OR_JUMP = 90; /* check ok -> push, else jump */
+ final int STATE_CHECK = 91; /* check only */
+ final int STATE_CHECK_ANYCHAR_STAR = 92;
+ final int STATE_CHECK_ANYCHAR_ML_STAR = 93;
/* no need: IS_DYNAMIC_OPTION() == 0 */
- final int SET_OPTION_PUSH = 86; /* set option and push recover option */
- final int SET_OPTION = 87; /* set option */
+ final int SET_OPTION_PUSH = 94; /* set option and push recover option */
+ final int SET_OPTION = 95; /* set option */
// single byte versions
- final int ANYCHAR_SB = 88; /* "." */
- final int ANYCHAR_ML_SB = 89; /* "." multi-line */
- final int ANYCHAR_STAR_SB = 90; /* ".*" */
- final int ANYCHAR_ML_STAR_SB = 91; /* ".*" multi-line */
- final int ANYCHAR_STAR_PEEK_NEXT_SB = 92;
- final int ANYCHAR_ML_STAR_PEEK_NEXT_SB = 93;
- final int STATE_CHECK_ANYCHAR_STAR_SB = 94;
- final int STATE_CHECK_ANYCHAR_ML_STAR_SB= 95;
-
- final int CCLASS_SB = 96;
- final int CCLASS_NOT_SB = 97;
- final int WORD_SB = 98;
- final int NOT_WORD_SB = 99;
- final int WORD_BOUND_SB = 100;
- final int NOT_WORD_BOUND_SB = 101;
- final int WORD_BEGIN_SB = 102;
- final int WORD_END_SB = 103;
-
- final int LOOK_BEHIND_SB = 104;
-
- final int EXACT1_IC_SB = 105; /* single byte, N = 1, ignore case */
- final int EXACTN_IC_SB = 106; /* single byte, ignore case */
-
+ final int ANYCHAR_SB = 96; /* "." */
+ final int ANYCHAR_ML_SB = 97; /* "." multi-line */
+ final int ANYCHAR_STAR_SB = 98; /* ".*" */
+ final int ANYCHAR_ML_STAR_SB = 99; /* ".*" multi-line */
+ final int ANYCHAR_STAR_PEEK_NEXT_SB = 100;
+ final int ANYCHAR_ML_STAR_PEEK_NEXT_SB = 101;
+ final int STATE_CHECK_ANYCHAR_STAR_SB = 102;
+ final int STATE_CHECK_ANYCHAR_ML_STAR_SB= 103;
+
+ final int CCLASS_SB = 104;
+ final int CCLASS_NOT_SB = 105;
+ final int WORD_SB = 106;
+ final int NOT_WORD_SB = 107;
+ final int WORD_BOUND_SB = 108;
+ final int NOT_WORD_BOUND_SB = 109;
+ final int WORD_BEGIN_SB = 110;
+ final int WORD_END_SB = 111;
+
+ final int LOOK_BEHIND_SB = 112;
+
+ final int EXACT1_IC_SB = 113; /* single byte, N = 1, ignore case */
+ final int EXACTN_IC_SB = 114; /* single byte, ignore case */
public final String OpCodeNames[] = Config.DEBUG_COMPILE ? new String[] {
"finish", /*OP_FINISH*/
@@ -188,6 +196,12 @@ public interface OPCode {
"not-word-bound", /*OP_NOT_WORD_BOUND*/
"word-begin", /*OP_WORD_BEGIN*/
"word-end", /*OP_WORD_END*/
+ "ascii-word", /*OP_ASCII_WORD*/
+ "not-ascii-word", /*OP_NOT_ASCII_WORD*/
+ "ascii-word-bound", /*OP_ASCII_WORD_BOUND*/
+ "not-ascii-word-bound", /*OP_NOT_ASCII_WORD_BOUND*/
+ "ascii-word-begin", /*OP_ASCII_WORD_BEGIN*/
+ "ascii-word-end", /*OP_ASCII_WORD_END*/
"begin-buf", /*OP_BEGIN_BUF*/
"end-buf", /*OP_END_BUF*/
"begin-line", /*OP_BEGIN_LINE*/
@@ -207,6 +221,7 @@ public interface OPCode {
"mem-end-push-rec", /*OP_MEMORY_END_PUSH_REC*/
"mem-end", /*OP_MEMORY_END*/
"mem-end-rec", /*OP_MEMORY_END_REC*/
+ "keep", /*OP_KEEP*/
"fail", /*OP_FAIL*/
"jump", /*OP_JUMP*/
"push", /*OP_PUSH*/
@@ -234,6 +249,7 @@ public interface OPCode {
"fail-look-behind-not", /*OP_FAIL_LOOK_BEHIND_NOT*/
"call", /*OP_CALL*/
"return", /*OP_RETURN*/
+ "condition", /*OP_CONDITION*/
"state-check-push", /*OP_STATE_CHECK_PUSH*/
"state-check-push-or-jump", /*OP_STATE_CHECK_PUSH_OR_JUMP*/
"state-check", /*OP_STATE_CHECK*/
@@ -305,6 +321,12 @@ public interface OPCode {
Arguments.NON, /*OP_NOT_WORD_BOUND*/
Arguments.NON, /*OP_WORD_BEGIN*/
Arguments.NON, /*OP_WORD_END*/
+ Arguments.NON, /*OP_ASCII_WORD*/
+ Arguments.NON, /*OP_NOT_ASCII_WORD*/
+ Arguments.NON, /*OP_ASCII_WORD_BOUND*/
+ Arguments.NON, /*OP_NOT_ASCII_WORD_BOUND*/
+ Arguments.NON, /*OP_ASCII_WORD_BEGIN*/
+ Arguments.NON, /*OP_ASCII_WORD_END*/
Arguments.NON, /*OP_BEGIN_BUF*/
Arguments.NON, /*OP_END_BUF*/
Arguments.NON, /*OP_BEGIN_LINE*/
@@ -324,6 +346,7 @@ public interface OPCode {
Arguments.MEMNUM, /*OP_MEMORY_END_PUSH_REC*/
Arguments.MEMNUM, /*OP_MEMORY_END*/
Arguments.MEMNUM, /*OP_MEMORY_END_REC*/
+ Arguments.NON, /*OP_KEEP*/
Arguments.NON, /*OP_FAIL*/
Arguments.RELADDR, /*OP_JUMP*/
Arguments.RELADDR, /*OP_PUSH*/
@@ -351,6 +374,7 @@ public interface OPCode {
Arguments.NON, /*OP_FAIL_LOOK_BEHIND_NOT*/
Arguments.ABSADDR, /*OP_CALL*/
Arguments.NON, /*OP_RETURN*/
+ Arguments.SPECIAL, /*OP_CONDITION*/
Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH*/
Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH_OR_JUMP*/
Arguments.STATE_CHECK, /*OP_STATE_CHECK*/
diff --git a/src/org/joni/constants/OPSize.java b/src/org/joni/constants/OPSize.java
index d5595ad..fa3d3c6 100644
--- a/src/org/joni/constants/OPSize.java
+++ b/src/org/joni/constants/OPSize.java
@@ -67,6 +67,7 @@ public interface OPSize {
final int FAIL_LOOK_BEHIND_NOT = OPCODE;
final int CALL = (OPCODE + ABSADDR);
final int RETURN = OPCODE;
+ final int CONDITION = (OPCODE + MEMNUM + RELADDR);
// #ifdef USE_COMBINATION_EXPLOSION_CHECK
final int STATE_CHECK = (OPCODE + STATE_CHECK_NUM);
diff --git a/src/org/joni/constants/RegexState.java b/src/org/joni/constants/RegexState.java
deleted file mode 100644
index 72dd3ff..0000000
--- a/src/org/joni/constants/RegexState.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Permission is hereby granted, free of charge, to any person obtaining a copy of
- * this software and associated documentation files (the "Software"), to deal in
- * the Software without restriction, including without limitation the rights to
- * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished to do
- * so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-package org.joni.constants;
-
-// we dont need this ATM
-public interface RegexState {
- final int NORMAL = 0;
- final int SEARCHING = 1;
- final int COMPILING = -1;
- final int MODIFY = -2;
-}
diff --git a/src/org/joni/constants/SyntaxProperties.java b/src/org/joni/constants/SyntaxProperties.java
index 075324c..46861f2 100644
--- a/src/org/joni/constants/SyntaxProperties.java
+++ b/src/org/joni/constants/SyntaxProperties.java
@@ -52,10 +52,11 @@ public interface SyntaxProperties {
final int OP_ESC_OCTAL3 = (1<<28); /* \OOO */
final int OP_ESC_X_HEX2 = (1<<29); /* \xHH */
final int OP_ESC_X_BRACE_HEX8 = (1<<30); /* \x{7HHHHHHH} */
+ final int OP_ESC_O_BRACE_OCTAL = (1<<31); /* \o{OOO} */
final int OP2_ESC_CAPITAL_Q_QUOTE = (1<<0); /* \Q...\E */
final int OP2_QMARK_GROUP_EFFECT = (1<<1); /* (?...); */
- final int OP2_OPTION_PERL = (1<<2); /* (?imsx);,(?-imsx); */
+ final int OP2_OPTION_PERL = (1<<2); /* (?imsxadlu), (?-imsx), (?^imsxalu) */
final int OP2_OPTION_RUBY = (1<<3); /* (?imx);, (?-imx); */
final int OP2_PLUS_POSSESSIVE_REPEAT = (1<<4); /* ?+,*+,++ */
final int OP2_PLUS_POSSESSIVE_INTERVAL = (1<<5); /* {n,m}+ */
@@ -74,7 +75,18 @@ public interface SyntaxProperties {
/* final int OP2_CHAR_PROPERTY_PREFIX_IS = (1<<18); */
final int OP2_ESC_H_XDIGIT = (1<<19); /* \h, \H */
final int OP2_INEFFECTIVE_ESCAPE = (1<<20); /* \ */
- final int OP2_OPTION_ECMASCRIPT = (1<<21); /* EcmaScript quirks */
+ final int OP2_ESC_CAPITAL_R_LINEBREAK = (1<<21); /* \R as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */
+ final int OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER = (1<<22); /* \X as (?:\P{M}\p{M}*) */
+ final int OP2_ESC_V_VERTICAL_WHITESPACE = (1<<23); /* \v, \V -- Perl */
+ final int OP2_ESC_H_HORIZONTAL_WHITESPACE= (1<<24); /* \h, \H -- Perl */
+ final int OP2_ESC_CAPITAL_K_KEEP = (1<<25); /* \K */
+ final int OP2_ESC_G_BRACE_BACKREF = (1<<26); /* \g{name}, \g{n} */
+ final int OP2_QMARK_SUBEXP_CALL = (1<<27); /* (?&name), (?n), (?R), (?0) */
+ final int OP2_QMARK_BAR_BRANCH_RESET = (1<<28); /* (?|...) */
+ final int OP2_QMARK_LPAREN_CONDITION = (1<<29); /* (?(cond)yes...|no...) */
+ final int OP2_QMARK_CAPITAL_P_NAMED_GROUP= (1<<30); /* (?P<name>...), (?P=name), (?P>name) -- Python/PCRE */
+ final int OP2_OPTION_JAVA = (1<<31); /* (?idmsux), (?-idmsux) */
+ final int OP2_OPTION_ECMASCRIPT = (1<<32); /* EcmaScript quirks */
/* syntax (behavior); */
final int CONTEXT_INDEP_ANCHORS = (1<<31); /* not implemented */
@@ -88,6 +100,7 @@ public interface SyntaxProperties {
final int CAPTURE_ONLY_NAMED_GROUP = (1<<7); /* see doc/RE */
final int ALLOW_MULTIPLEX_DEFINITION_NAME = (1<<8); /* (?<x>);(?<x>); */
final int FIXED_INTERVAL_IS_GREEDY_ONLY = (1<<9); /* a{n}?=(?:a{n});? */
+ final int ALLOW_MULTIPLEX_DEFINITION_NAME_CALL = (1<<10); /* (?<x>)(?<x>)(?&x) */
/* syntax (behavior); in char class [...] */
final int NOT_NEWLINE_IN_NEGATIVE_CC = (1<<20); /* [^...] */
diff --git a/src/org/joni/constants/TokenType.java b/src/org/joni/constants/TokenType.java
index 59aa094..80f3079 100644
--- a/src/org/joni/constants/TokenType.java
+++ b/src/org/joni/constants/TokenType.java
@@ -39,6 +39,9 @@ public enum TokenType {
CC_OPEN,
QUOTE_OPEN,
CHAR_PROPERTY, /* \p{...}, \P{...} */
+ LINEBREAK,
+ EXTENDED_GRAPHEME_CLUSTER,
+ KEEP,
/* in cc */
CC_CLOSE,
CC_RANGE,
diff --git a/src/org/joni/exception/ErrorMessages.java b/src/org/joni/exception/ErrorMessages.java
index 683ff62..008ea0b 100644
--- a/src/org/joni/exception/ErrorMessages.java
+++ b/src/org/joni/exception/ErrorMessages.java
@@ -63,6 +63,7 @@ public interface ErrorMessages extends org.jcodings.exception.ErrorMessages {
final String ERR_INVALID_POSIX_BRACKET_TYPE = "invalid POSIX bracket type";
final String ERR_INVALID_LOOK_BEHIND_PATTERN = "invalid pattern in look-behind";
final String ERR_INVALID_REPEAT_RANGE_PATTERN = "invalid repeat range {lower,upper}";
+ final String ERR_INVALID_CONDITION_PATTERN = "invalid conditional pattern";
/* values error (syntax error) */
final String ERR_TOO_BIG_NUMBER = "too big number";
diff --git a/test/org/joni/test/TestA.java b/test/org/joni/test/TestA.java
index d9403da..de5e3b5 100644
--- a/test/org/joni/test/TestA.java
+++ b/test/org/joni/test/TestA.java
@@ -500,6 +500,34 @@ public class TestA extends Test {
x2s("(?<=\\babc)d", " abcd", 4, 5);
x2s("(?<=\\Babc)d", "aabcd", 4, 5);
+ x2s("([^\\s]+),(.*)+", " xxxx, xxx xxxxxx xxxxxxxxx xxxxxxx, xxxx xxxxx xxxxx ", 1, 62);
+ x2s(",(.*)+", " xxxx, xxx xxxxxx xxxxxxxxx xxxxxxx, xxxx xxxxx xxxxx ", 5, 62);
+
+ x2s("(?:(a)|(b))(?(1)cd)e", "acde", 0, 4);
+ ns("(?:(a)|(b))(?(1)cd)e", "ae");
+ x2s("(?:(a)|(b))(?(2)cd)e", "ae", 0, 2);
+ ns("(?:(a)|(b))(?(2)cd)e", "acde");
+ x2s("(?:(a)|(b))(?(1)c|d)", "ac", 0, 2);
+ x2s("(?:(a)|(b))(?(1)c|d)", "bd", 0, 2);
+ ns("(?:(a)|(b))(?(1)c|d)", "ad");
+ ns("(?:(a)|(b))(?(1)c|d)", "bc");
+ x2s("(?:(a)|(b))(?:(?(1)cd)e|fg)", "acde", 0, 4);
+ x2s("(?:(a)|(b))(?:(?(1)cd|x)e|fg)", "bxe", 0, 3);
+ ns("(?:(a)|(b))(?:(?(2)cd|x)e|fg)", "bxe");
+ x2s("(?:(?<x>a)|(?<y>b))(?:(?(<x>)cd|x)e|fg)", "bxe", 0, 3);
+ ns("(?:(?<x>a)|(?<y>b))(?:(?(<y>)cd|x)e|fg)", "bxe");
+ x2s("((?<=a))?(?(1)b|c)", "abc", 1, 2);
+ x2s("((?<=a))?(?(1)b|c)", "bc", 1, 2);
+ x2s("((?<x>x)|(?<y>y))(?(<x>)y|x)", "xy", 0, 2);
+ x2s("((?<x>x)|(?<y>y))(?(<x>)y|x)", "yx", 0, 2);
+ ns("((?<x>x)|(?<y>y))(?(<x>)y|x)", "xx");
+ ns("((?<x>x)|(?<y>y))(?(<x>)y|x)", "yy");
+
+ x2s("\\R", "\n", 0, 1);
+ x2s("\\R", "\r", 0, 1);
+ x2s("\\R{3}", "\r\r\n\n", 0, 4);
+
+ x2s("\\X{5}", "ăăab\n", 0, 5);
}
public static void main(String[] args) throws Throwable{
diff --git a/src/org/joni/constants/CCVALTYPE.java b/test/org/joni/test/TestAU.java
similarity index 62%
rename from src/org/joni/constants/CCVALTYPE.java
rename to test/org/joni/test/TestAU.java
index b2bcb30..0d37a88 100644
--- a/src/org/joni/constants/CCVALTYPE.java
+++ b/test/org/joni/test/TestAU.java
@@ -17,10 +17,36 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-package org.joni.constants;
+package org.joni.test;
-public enum CCVALTYPE {
- SB,
- CODE_POINT,
- CLASS
+import org.jcodings.Encoding;
+import org.jcodings.specific.ASCIIEncoding;
+import org.joni.Option;
+import org.joni.Syntax;
+
+public class TestAU extends Test {
+
+ public int option() {
+ return Option.DEFAULT;
+ }
+
+ public Encoding encoding() {
+ return ASCIIEncoding.INSTANCE;
+ }
+
+ public String testEncoding() {
+ return "utf-8";
+ }
+
+ public Syntax syntax() {
+ return Syntax.DEFAULT;
+ }
+
+ public void test() throws InterruptedException {
+ x2s("\\X{5}", "あいab\n", 0, 5);
+ }
+
+ public static void main(String[] args) throws Throwable {
+ new TestU8().run();
+ }
}
diff --git a/test/org/joni/test/TestJoni.java b/test/org/joni/test/TestJoni.java
index b7d5744..d7f4fe6 100644
--- a/test/org/joni/test/TestJoni.java
+++ b/test/org/joni/test/TestJoni.java
@@ -30,6 +30,8 @@ public class TestJoni extends TestCase {
private Test testLookBehind;
private Test testu8;
private Test testInterrupt;
+ private Test testPerl;
+ private Test testAsciiViaUtf;
protected void setUp() {
testa = new TestA();
@@ -39,6 +41,8 @@ public class TestJoni extends TestCase {
testu8 = new TestU8();
testLookBehind = new TestLookBehind();
testInterrupt = new TestInterrupt();
+ testPerl = new TestPerl();
+ testAsciiViaUtf = new TestAU();
}
protected void tearDown() {
@@ -52,6 +56,7 @@ public class TestJoni extends TestCase {
public void testAscii() {
testJoniTest(testa);
+ testJoniTest(testAsciiViaUtf);
}
public void testEUCJP() {
@@ -71,4 +76,9 @@ public class TestJoni extends TestCase {
public void testInterrupt() {
testJoniTest(testInterrupt);
}
+
+ public void testPerl() {
+ testJoniTest(testPerl);
+ }
+
}
diff --git a/src/org/joni/constants/CCSTATE.java b/test/org/joni/test/TestPerl.java
similarity index 63%
rename from src/org/joni/constants/CCSTATE.java
rename to test/org/joni/test/TestPerl.java
index 669b821..334f334 100644
--- a/src/org/joni/constants/CCSTATE.java
+++ b/test/org/joni/test/TestPerl.java
@@ -17,11 +17,36 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-package org.joni.constants;
+package org.joni.test;
-public enum CCSTATE {
- VALUE,
- RANGE,
- COMPLETE,
- START
+import org.joni.Option;
+import org.joni.Syntax;
+import org.jcodings.Encoding;
+import org.jcodings.specific.ASCIIEncoding;
+
+public class TestPerl extends Test {
+
+ public int option() {
+ return Option.DEFAULT;
+ }
+
+ public Encoding encoding() {
+ return ASCIIEncoding.INSTANCE;
+ }
+
+ public String testEncoding() {
+ return "iso-8859-2";
+ }
+
+ public Syntax syntax() {
+ return Syntax.PerlNG;
+ }
+
+ public void test() throws InterruptedException {
+
+ }
+
+ public static void main(String[] args) throws Throwable{
+ new TestPerl().run();
+ }
}
diff --git a/test/org/joni/test/TestU8.java b/test/org/joni/test/TestU8.java
index 2479e74..9723100 100644
--- a/test/org/joni/test/TestU8.java
+++ b/test/org/joni/test/TestU8.java
@@ -35,7 +35,7 @@ public class TestU8 extends Test {
}
public String testEncoding() {
- return "iso-8859-1";
+ return "utf-8";
}
public Syntax syntax() {
@@ -80,6 +80,12 @@ public class TestU8 extends Test {
x2s("(?i:!\\[CDAT)", "![CDAT", 0, 6);
x2s("(?i:\\!\\[CDAa)", "\\![CDAa", 1, 7);
x2s("(?i:\\!\\[CDAb)", "\\![CDAb", 1, 7);
+
+ x2s("\\R", "\u0085", 0, 2);
+ x2s("\\R", "\u2028", 0, 3);
+ x2s("\\R", "\u2029", 0, 3);
+
+ x2s("\\X", "\u306F\u309A\n", 0, 3);
}
public static void main(String[] args) throws Throwable {
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jruby-joni.git
More information about the pkg-java-commits
mailing list