[jruby-joni] 58/279: Imported Upstream version 1.1.3
Hideki Yamane
henrich at moszumanska.debian.org
Mon Nov 16 11:26:45 UTC 2015
This is an automated email from the git hooks/post-receive script.
henrich pushed a commit to branch debian/sid
in repository jruby-joni.
commit fe86a384b8c781f0f030a22b9c37b9f75be1c1ba
Author: Torsten Werner <twerner at debian.org>
Date: Sat Jul 31 18:51:30 2010 +0200
Imported Upstream version 1.1.3
---
MANIFEST.MF | 2 +
build.xml | 39 +
pom.xml | 131 ++
src/org/joni/Analyser.java | 2209 +++++++++++++++++++++++++
src/org/joni/ApplyCaseFold.java | 99 ++
src/org/joni/ApplyCaseFoldArg.java | 35 +
src/org/joni/ArrayCompiler.java | 1237 ++++++++++++++
src/org/joni/AsmCompiler.java | 109 ++
src/org/joni/AsmCompilerSupport.java | 267 +++
src/org/joni/BitSet.java | 115 ++
src/org/joni/BitStatus.java | 55 +
src/org/joni/ByteCodeMachine.java | 1665 +++++++++++++++++++
src/org/joni/ByteCodePrinter.java | 360 ++++
src/org/joni/CaptureTreeNode.java | 74 +
src/org/joni/CodeRangeBuffer.java | 380 +++++
src/org/joni/Compiler.java | 190 +++
src/org/joni/Config.java | 87 +
src/org/joni/Lexer.java | 1385 ++++++++++++++++
src/org/joni/Matcher.java | 574 +++++++
src/org/joni/MatcherFactory.java | 31 +
src/org/joni/MinMaxLen.java | 139 ++
src/org/joni/NameEntry.java | 97 ++
src/org/joni/NativeMachine.java | 27 +
src/org/joni/NodeOptInfo.java | 127 ++
src/org/joni/OptAnchorInfo.java | 92 +
src/org/joni/OptEnvironment.java | 39 +
src/org/joni/OptExactInfo.java | 171 ++
src/org/joni/OptMapInfo.java | 129 ++
src/org/joni/Option.java | 122 ++
src/org/joni/Parser.java | 1028 ++++++++++++
src/org/joni/Regex.java | 430 +++++
src/org/joni/Region.java | 66 +
src/org/joni/ScanEnvironment.java | 140 ++
src/org/joni/ScannerSupport.java | 179 ++
src/org/joni/SearchAlgorithm.java | 528 ++++++
src/org/joni/StackEntry.java | 164 ++
src/org/joni/StackMachine.java | 621 +++++++
src/org/joni/Syntax.java | 606 +++++++
src/org/joni/Token.java | 172 ++
src/org/joni/UnsetAddrList.java | 69 +
src/org/joni/WarnCallback.java | 32 +
src/org/joni/ast/AnchorNode.java | 92 +
src/org/joni/ast/AnyCharNode.java | 40 +
src/org/joni/ast/BackRefNode.java | 98 ++
src/org/joni/ast/CClassNode.java | 531 ++++++
src/org/joni/ast/CTypeNode.java | 50 +
src/org/joni/ast/CallNode.java | 86 +
src/org/joni/ast/ConsAltNode.java | 154 ++
src/org/joni/ast/EncloseNode.java | 151 ++
src/org/joni/ast/Node.java | 136 ++
src/org/joni/ast/QuantifierNode.java | 272 +++
src/org/joni/ast/StateNode.java | 232 +++
src/org/joni/ast/StringNode.java | 209 +++
src/org/joni/bench/AbstractBench.java | 50 +
src/org/joni/bench/BenchGreedyBacktrack.java | 7 +
src/org/joni/bench/BenchRailsRegs.java | 31 +
src/org/joni/bench/BenchSeveralRegexps.java | 17 +
src/org/joni/constants/AnchorType.java | 58 +
src/org/joni/constants/Arguments.java | 31 +
src/org/joni/constants/AsmConstants.java | 49 +
src/org/joni/constants/CCSTATE.java | 27 +
src/org/joni/constants/CCVALTYPE.java | 26 +
src/org/joni/constants/EncloseType.java | 29 +
src/org/joni/constants/MetaChar.java | 31 +
src/org/joni/constants/NodeStatus.java | 39 +
src/org/joni/constants/NodeType.java | 66 +
src/org/joni/constants/OPCode.java | 387 +++++
src/org/joni/constants/OPSize.java | 75 +
src/org/joni/constants/Reduce.java | 60 +
src/org/joni/constants/RegexState.java | 28 +
src/org/joni/constants/StackPopLevel.java | 27 +
src/org/joni/constants/StackType.java | 51 +
src/org/joni/constants/StringType.java | 27 +
src/org/joni/constants/SyntaxProperties.java | 124 ++
src/org/joni/constants/TargetInfo.java | 27 +
src/org/joni/constants/TokenType.java | 48 +
src/org/joni/constants/Traverse.java | 26 +
src/org/joni/exception/ErrorMessages.java | 92 +
src/org/joni/exception/InternalException.java | 28 +
src/org/joni/exception/JOniException.java | 28 +
src/org/joni/exception/SyntaxException.java | 28 +
src/org/joni/exception/ValueException.java | 37 +
test/org/joni/test/Test.java | 194 +++
test/org/joni/test/TestA.java | 481 ++++++
test/org/joni/test/TestC.java | 736 ++++++++
test/org/joni/test/TestCornerCases.java | 62 +
test/org/joni/test/TestCrnl.java | 86 +
test/org/joni/test/TestJoni.java | 37 +
test/org/joni/test/TestU.java | 770 +++++++++
89 files changed, 19993 insertions(+)
diff --git a/MANIFEST.MF b/MANIFEST.MF
new file mode 100644
index 0000000..ae40ba2
--- /dev/null
+++ b/MANIFEST.MF
@@ -0,0 +1,2 @@
+Implementation-Title: Joni (java port of Oniguruma)
+Implementation-Version: 1.1.1
diff --git a/build.xml b/build.xml
new file mode 100644
index 0000000..04a54ef
--- /dev/null
+++ b/build.xml
@@ -0,0 +1,39 @@
+<?xml version="1.0" ?>
+<project name="Joni" default="build">
+
+ <property name="src.dir" value="src" />
+ <property name="bin.dir" value="target/classes" />
+ <property name="dist.dir" value="target" />
+ <property name="jar.name" value="joni.jar" />
+
+ <target name="clean">
+ <delete dir="${bin.dir}" />
+ <delete dir="${dist.dir}" />
+
+ </target>
+
+ <target name="compile">
+ <mkdir dir="${bin.dir}" />
+ <javac srcdir="${src.dir}" destdir="${bin.dir}"/>
+ </target>
+
+ <target name="build" depends="compile">
+ <mkdir dir="${dist.dir}" />
+
+ <tstamp>
+ <format property="buildDate" pattern="yyyy-MM-dd" />
+ <format property="buildTime" pattern="HH:mm:ss" />
+ </tstamp>
+
+ <jar destfile="${dist.dir}/${jar.name}" manifest="MANIFEST.MF">
+ <fileset dir="${bin.dir}" />
+
+ <manifest>
+ <attribute name="Built-By" value="${user.name}" />
+ <attribute name="Built-Date" value="${buildDate}" />
+ <attribute name="Built-Time" value="${buildTime}" />
+ </manifest>
+ </jar>
+
+ </target>
+</project>
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..606f9d9
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,131 @@
+<?xml version="1.0" ?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>org.jruby.joni</groupId>
+ <artifactId>joni</artifactId>
+ <packaging>jar</packaging>
+ <version>1.1.3</version>
+ <name>Joni</name>
+ <description>
+ Java port of Oniguruma: http://www.geocities.jp/kosako3/oniguruma
+ that uses byte arrays directly instead of java Strings and chars
+ </description>
+
+ <issueManagement>
+ <system>JIRA</system>
+ <url>http://jira.codehaus.org/browse/JRUBY</url>
+ </issueManagement>
+
+ <scm>
+ <connection>scm:svn:http://svn.codehaus.org/jruby</connection>
+ <developerConnection>scm:svn:https://svn.codehaus.org/jruby</developerConnection>
+ <url>http://svn.codehaus.org/jruby</url>
+ </scm>
+
+ <licenses>
+ <license>
+ <name>MIT License</name>
+ <url>http://www.opensource.org/licenses/mit-license.php</url>
+ <distribution>repo</distribution>
+ </license>
+ </licenses>
+
+ <distributionManagement>
+ <repository>
+ <id>codehaus-jruby-repository</id>
+ <name>JRuby Central Repository</name>
+ <url>dav:https://dav.codehaus.org/repository/jruby</url>
+ </repository>
+ <snapshotRepository>
+ <id>codehaus-jruby-snapshot-repository</id>
+ <name>JRuby Central Development Repository</name>
+ <url>dav:https://dav.codehaus.org/snapshots.repository/jruby</url>
+ </snapshotRepository>
+ <site>
+ <id>codehaus-jruby-site</id>
+ <name>JRuby Maven site</name>
+ <url>dav:https://dav.codehaus.org/jruby/info</url>
+ </site>
+ </distributionManagement>
+
+ <repositories>
+ <repository>
+ <id>codehaus</id>
+ <name>Codehaus Repository</name>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ <url>http://repository.codehaus.org</url>
+ </repository>
+ </repositories>
+
+ <developers>
+ <developer>
+ <id>lopex</id>
+ <name>Marcin Mielzynski</name>
+ <email>lopx at gazeta.pl</email>
+ </developer>
+ </developers>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.jruby.jcodings</groupId>
+ <artifactId>jcodings</artifactId>
+ <version>1.0.2</version>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>3.8.1</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>asm</groupId>
+ <artifactId>asm</artifactId>
+ <version>3.0</version>
+ <scope>provided</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <sourceDirectory>src</sourceDirectory>
+ <testSourceDirectory>test</testSourceDirectory>
+ <finalName>joni</finalName>
+ <extensions>
+ <extension>
+ <groupId>org.apache.maven.wagon</groupId>
+ <artifactId>wagon-webdav</artifactId>
+ </extension>
+ </extensions>
+ <plugins>
+ <plugin>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <source>1.5</source>
+ <target>1.5</target>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <includes>
+ <include>**/TestJoni.java</include>
+ </includes>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-jar-plugin</artifactId>
+ <configuration>
+ <archive>
+ <manifestFile>MANIFEST.MF</manifestFile>
+ </archive>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/src/org/joni/Analyser.java b/src/org/joni/Analyser.java
new file mode 100644
index 0000000..cce91ba
--- /dev/null
+++ b/src/org/joni/Analyser.java
@@ -0,0 +1,2209 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.BitStatus.bsAll;
+import static org.joni.BitStatus.bsAt;
+import static org.joni.BitStatus.bsClear;
+import static org.joni.BitStatus.bsOnAt;
+import static org.joni.BitStatus.bsOnAtSimple;
+import static org.joni.Option.isCaptureGroup;
+import static org.joni.Option.isFindCondition;
+import static org.joni.Option.isIgnoreCase;
+import static org.joni.Option.isMultiline;
+import static org.joni.ast.ConsAltNode.newAltNode;
+import static org.joni.ast.ConsAltNode.newListNode;
+import static org.joni.ast.QuantifierNode.isRepeatInfinite;
+
+import java.util.HashSet;
+
+import org.jcodings.CaseFoldCodeItem;
+import org.jcodings.constants.CharacterType;
+import org.joni.ast.AnchorNode;
+import org.joni.ast.BackRefNode;
+import org.joni.ast.CClassNode;
+import org.joni.ast.CTypeNode;
+import org.joni.ast.CallNode;
+import org.joni.ast.ConsAltNode;
+import org.joni.ast.EncloseNode;
+import org.joni.ast.Node;
+import org.joni.ast.QuantifierNode;
+import org.joni.ast.StringNode;
+import org.joni.constants.AnchorType;
+import org.joni.constants.EncloseType;
+import org.joni.constants.NodeType;
+import org.joni.constants.RegexState;
+import org.joni.constants.StackPopLevel;
+import org.joni.constants.TargetInfo;
+
+final class Analyser extends Parser {
+
+ protected Analyser(ScanEnvironment env, byte[]bytes, int p, int end) {
+ super(env, bytes, p, end);
+ }
+
+ protected final void compile() {
+ regex.state = RegexState.COMPILING;
+
+ if (Config.DEBUG) {
+ Config.log.println(regex.encStringToString(bytes, getBegin(), getEnd()));
+ }
+
+ reset();
+
+ regex.numMem = 0;
+ regex.numRepeat = 0;
+ regex.numNullCheck = 0;
+ //regex.repeatRangeAlloc = 0;
+ regex.repeatRangeLo = null;
+ regex.repeatRangeHi = null;
+ regex.numCombExpCheck = 0;
+
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) regex.numCombExpCheck = 0;
+
+ parse();
+
+ if (Config.USE_NAMED_GROUP) {
+ /* mixed use named group and no-named group */
+ if (env.numNamed > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(regex.options)) {
+ if (env.numNamed != env.numMem) {
+ root = disableNoNameGroupCapture(root);
+ } else {
+ numberedRefCheck(root);
+ }
+ }
+ } // USE_NAMED_GROUP
+
+ if (Config.USE_NAMED_GROUP) {
+ if (env.numCall > 0) {
+ env.unsetAddrList = new UnsetAddrList(env.numCall);
+ setupSubExpCall(root);
+ // r != 0 ???
+ subexpRecursiveCheckTrav(root);
+ // r < 0 -< err, FOUND_CALLED_NODE = 1
+ subexpInfRecursiveCheckTrav(root);
+ // r != 0 recursion infinite ???
+ regex.numCall = env.numCall;
+ } else {
+ regex.numCall = 0;
+ }
+ } // USE_NAMED_GROUP
+
+ setupTree(root, 0);
+ if (Config.DEBUG_PARSE_TREE) {
+ root.verifyTree(new HashSet<Node>(),env.reg.warnings);
+ Config.log.println(root + "\n");
+ }
+
+ regex.captureHistory = env.captureHistory;
+ regex.btMemStart = env.btMemStart;
+ regex.btMemEnd = env.btMemEnd;
+
+ if (isFindCondition(regex.options)) {
+ regex.btMemEnd = bsAll();
+ } else {
+ regex.btMemEnd = env.btMemEnd;
+ regex.btMemEnd |= regex.captureHistory;
+ }
+
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (env.backrefedMem == 0 || (Config.USE_SUBEXP_CALL && env.numCall == 0)) {
+ setupCombExpCheck(root, 0);
+
+ if (Config.USE_SUBEXP_CALL && env.hasRecursion) {
+ env.numCombExpCheck = 0;
+ } else { // USE_SUBEXP_CALL
+ if (env.combExpMaxRegNum > 0) {
+ for (int i=1; i<env.combExpMaxRegNum; i++) {
+ if (bsAt(env.backrefedMem, i)) {
+ env.numCombExpCheck = 0;
+ break;
+ }
+ }
+ }
+ }
+
+ } // USE_SUBEXP_CALL
+ regex.numCombExpCheck = env.numCombExpCheck;
+ } // USE_COMBINATION_EXPLOSION_CHECK
+
+ regex.clearOptimizeInfo();
+
+ if (!Config.DONT_OPTIMIZE) setOptimizedInfoFromTree(root);
+
+ env.memNodes = null;
+
+ if (regex.numRepeat != 0 || regex.btMemEnd != 0) {
+ regex.stackPopLevel = StackPopLevel.ALL;
+ } else {
+ if (regex.btMemStart != 0) {
+ regex.stackPopLevel = StackPopLevel.MEM_START;
+ } else {
+ regex.stackPopLevel = StackPopLevel.FREE;
+ }
+ }
+
+ new ArrayCompiler(this).compile();
+ //new AsmCompiler(this).compile();
+
+ if (Config.DEBUG_COMPILE) {
+ if (Config.USE_NAMED_GROUP) Config.log.print(regex.nameTableToString());
+ Config.log.println("stack used: " + regex.stackNeeded);
+ Config.log.println(new ByteCodePrinter(regex).byteCodeListToString());
+ } // DEBUG_COMPILE
+
+ regex.state = RegexState.NORMAL;
+ }
+
+ private Node noNameDisableMap(Node node, int[]map, int[]counter) {
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ can.setCar(noNameDisableMap(can.car, map, counter));
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ Node target = qn.target;
+ Node old = target;
+ target = noNameDisableMap(target, map, counter);
+
+ if (target != old) {
+ qn.setTarget(target);
+ if (target.getType() == NodeType.QTFR) qn.reduceNestedQuantifier((QuantifierNode)target);
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if (en.type == EncloseType.MEMORY) {
+ if (en.isNamedGroup()) {
+ counter[0]++;
+ map[en.regNum] = counter[0];
+ en.regNum = counter[0];
+ //en.target = noNameDisableMap(en.target, map, counter);
+ en.setTarget(noNameDisableMap(en.target, map, counter)); // ???
+ } else {
+ node = en.target;
+ en.target = null; // remove first enclose: /(a)(?<b>c)/
+ node = noNameDisableMap(node, map, counter);
+ }
+ } else {
+ //en.target = noNameDisableMap(en.target, map, counter);
+ en.setTarget(noNameDisableMap(en.target, map, counter)); // ???
+ }
+ break;
+
+ default:
+ break;
+ } // switch
+
+ return node;
+ }
+
+ private void renumberByMap(Node node, int[]map) {
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ renumberByMap(can.car, map);
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ renumberByMap(((QuantifierNode)node).target, map);
+ break;
+
+ case NodeType.ENCLOSE:
+ renumberByMap(((EncloseNode)node).target, map);
+ break;
+
+ case NodeType.BREF:
+ ((BackRefNode)node).renumber(map);
+ break;
+
+ default:
+ break;
+ } // switch
+ }
+
+ protected final void numberedRefCheck(Node node) {
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ numberedRefCheck(can.car);
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ numberedRefCheck(((QuantifierNode)node).target);
+ break;
+
+ case NodeType.ENCLOSE:
+ numberedRefCheck(((EncloseNode)node).target);
+ break;
+
+ case NodeType.BREF:
+ BackRefNode br = (BackRefNode)node;
+ if (!br.isNameRef()) newValueException(ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED);
+ break;
+
+ default:
+ break;
+ } // switch
+ }
+
+ protected final Node disableNoNameGroupCapture(Node root) {
+ int[]map = new int[env.numMem + 1];
+
+ for (int i=1; i<=env.numMem; i++) map[i] = 0;
+
+ int[]counter = new int[]{0}; // !!! this should be passed as the recursion goes right ?, move to plain int
+ root = noNameDisableMap(root, map, counter); // ???
+ renumberByMap(root, map);
+
+ for (int i=1, pos=1; i<=env.numMem; i++) {
+ if (map[i] > 0) {
+ env.memNodes[pos] = env.memNodes[i];
+ pos++;
+ }
+ }
+
+ int loc = env.captureHistory;
+ env.captureHistory = bsClear();
+
+ for (int i=1; i<=Config.MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (bsAt(loc, i)) {
+ env.captureHistory = bsOnAtSimple(env.captureHistory, map[i]);
+ }
+ }
+
+ env.numMem = env.numNamed;
+ regex.numMem = env.numNamed;
+
+ regex.renumberNameTable(map);
+
+ return root;
+ }
+
+ private void swap(Node a, Node b) {
+ a.swap(b);
+
+ if (root == b) {
+ root = a;
+ } else if (root == a) {
+ root = b;
+ }
+ }
+
+ // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ private int quantifiersMemoryInfo(Node node) {
+ int info = 0;
+
+ switch(node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ int v = quantifiersMemoryInfo(can.car);
+ if (v > info) info = v;
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (cn.isRecursion()) {
+ return TargetInfo.IS_EMPTY_REC; /* tiny version */
+ } else {
+ info = quantifiersMemoryInfo(cn.target);
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.upper != 0) {
+ info = quantifiersMemoryInfo(qn.target);
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch (en.type) {
+ case EncloseType.MEMORY:
+ return TargetInfo.IS_EMPTY_MEM;
+
+ case EncloseType.OPTION:
+ case EncloseNode.STOP_BACKTRACK:
+ info = quantifiersMemoryInfo(en.target);
+ break;
+
+ default:
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.BREF:
+ case NodeType.STR:
+ case NodeType.CTYPE:
+ case NodeType.CCLASS:
+ case NodeType.CANY:
+ case NodeType.ANCHOR:
+ default:
+ break;
+ } // switch
+
+ return info;
+ }
+
+ private int getMinMatchLength(Node node) {
+ int min = 0;
+
+ switch (node.getType()) {
+ case NodeType.BREF:
+ BackRefNode br = (BackRefNode)node;
+ if (br.isRecursion()) break;
+
+ if (br.back[0] > env.numMem) newValueException(ERR_INVALID_BACKREF);
+ min = getMinMatchLength(env.memNodes[br.back[0]]);
+
+ for (int i=1; i<br.backNum; i++) {
+ if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
+ int tmin = getMinMatchLength(env.memNodes[br.back[i]]);
+ if (min > tmin) min = tmin;
+ }
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (cn.isRecursion()) {
+ EncloseNode en = (EncloseNode)cn.target;
+ if (en.isMinFixed()) min = en.minLength;
+ } else {
+ min = getMinMatchLength(cn.target);
+ }
+ break;
+
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.LIST:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ min += getMinMatchLength(can.car);
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode y = (ConsAltNode)node;
+ do {
+ Node x = y.car;
+ int tmin = getMinMatchLength(x);
+ if (y == node) {
+ min = tmin;
+ } else if (min > tmin) {
+ min = tmin;
+ }
+ } while ((y = y.cdr) != null);
+ break;
+
+ case NodeType.STR:
+ min = ((StringNode)node).length();
+ break;
+
+ case NodeType.CTYPE:
+ min = 1;
+ break;
+
+ case NodeType.CCLASS:
+ case NodeType.CANY:
+ min = 1;
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.lower > 0) {
+ min = getMinMatchLength(qn.target);
+ min = MinMaxLen.distanceMultiply(min, qn.lower);
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch (en.type) {
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL) {
+ if (en.isMinFixed()) {
+ min = en.minLength;
+ } else {
+ min = getMinMatchLength(en.target);
+ en.minLength = min;
+ en.setMinFixed();
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case EncloseType.OPTION:
+ case EncloseType.STOP_BACKTRACK:
+ min = getMinMatchLength(en.target);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ANCHOR:
+ default:
+ break;
+ } // switch
+
+ return min;
+ }
+
+ private int getMaxMatchLength(Node node) {
+ int max = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode ln = (ConsAltNode)node;
+ do {
+ int tmax = getMaxMatchLength(ln.car);
+ max = MinMaxLen.distanceAdd(max, tmax);
+ } while ((ln = ln.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode an = (ConsAltNode)node;
+ do {
+ int tmax = getMaxMatchLength(an.car);
+ if (max < tmax) max = tmax;
+ } while ((an = an.cdr) != null);
+ break;
+
+ case NodeType.STR:
+ max = ((StringNode)node).length();
+ break;
+
+ case NodeType.CTYPE:
+ max = enc.maxLengthDistance();
+ break;
+
+ case NodeType.CCLASS:
+ case NodeType.CANY:
+ max = enc.maxLengthDistance();
+ break;
+
+ case NodeType.BREF:
+ BackRefNode br = (BackRefNode)node;
+ if (br.isRecursion()) {
+ max = MinMaxLen.INFINITE_DISTANCE;
+ break;
+ }
+
+ for (int i=0; i<br.backNum; i++) {
+ if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
+ int tmax = getMaxMatchLength(env.memNodes[br.back[i]]);
+ if (max < tmax) max = tmax;
+ }
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (!cn.isRecursion()) {
+ max = getMaxMatchLength(cn.target);
+ } else {
+ max = MinMaxLen.INFINITE_DISTANCE;
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.upper != 0) {
+ max = getMaxMatchLength(qn.target);
+ if (max != 0) {
+ if (!isRepeatInfinite(qn.upper)) {
+ max = MinMaxLen.distanceMultiply(max, qn.upper);
+ } else {
+ max = MinMaxLen.INFINITE_DISTANCE;
+ }
+ }
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch (en.type) {
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL) {
+ if (en.isMaxFixed()) {
+ max = en.maxLength;
+ } else {
+ max = getMaxMatchLength(en.target);
+ en.maxLength = max;
+ en.setMaxFixed();
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case EncloseType.OPTION:
+ case EncloseType.STOP_BACKTRACK:
+ max = getMaxMatchLength(en.target);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ANCHOR:
+ default:
+ break;
+ } // switch
+
+ return max;
+ }
+
+ private static final int GET_CHAR_LEN_VARLEN = -1;
+ private static final int GET_CHAR_LEN_TOP_ALT_VARLEN = -2;
+ protected final int getCharLengthTree(Node node) {
+ return getCharLengthTree(node, 0);
+ }
+
+ private int getCharLengthTree(Node node, int level) {
+ level++;
+
+ int len = 0;
+ returnCode = 0;
+
+ switch(node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode ln = (ConsAltNode)node;
+ do {
+ int tlen = getCharLengthTree(ln.car, level);
+ if (returnCode == 0) len = MinMaxLen.distanceAdd(len, tlen);
+ } while (returnCode == 0 && (ln = ln.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode an = (ConsAltNode)node;
+ boolean varLen = false;
+
+ int tlen = getCharLengthTree(an.car, level);
+ while (returnCode == 0 && (an = an.cdr) != null) {
+ int tlen2 = getCharLengthTree(an.car, level);
+ if (returnCode == 0) {
+ if (tlen != tlen2) varLen = true;
+ }
+ }
+
+ if (returnCode == 0) {
+ if (varLen) {
+ if (level == 1) {
+ returnCode = GET_CHAR_LEN_TOP_ALT_VARLEN;
+ } else {
+ returnCode = GET_CHAR_LEN_VARLEN;
+ }
+ } else {
+ len = tlen;
+ }
+ }
+ break;
+
+ case NodeType.STR:
+ StringNode sn = (StringNode)node;
+ len = sn.length(enc);
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.lower == qn.upper) {
+ tlen = getCharLengthTree(qn.target, level);
+ if (returnCode == 0) len = MinMaxLen.distanceMultiply(tlen, qn.lower);
+ } else {
+ returnCode = GET_CHAR_LEN_VARLEN;
+ }
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (!cn.isRecursion()) {
+ len = getCharLengthTree(cn.target, level);
+ } else {
+ returnCode = GET_CHAR_LEN_VARLEN;
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.CTYPE:
+ len = 1;
+
+ case NodeType.CCLASS:
+ case NodeType.CANY:
+ len = 1;
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch(en.type) {
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL) {
+ if (en.isCLenFixed()) {
+ len = en.charLength;
+ } else {
+ len = getCharLengthTree(en.target, level);
+ if (returnCode == 0) {
+ en.charLength = len;
+ en.setCLenFixed();
+ }
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case EncloseType.OPTION:
+ case EncloseType.STOP_BACKTRACK:
+ len = getCharLengthTree(en.target, level);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ANCHOR:
+ break;
+
+ default:
+ returnCode = GET_CHAR_LEN_VARLEN;
+ } // switch
+ return len;
+ }
+
+ /* x is not included y ==> 1 : 0 */
+ private boolean isNotIncluded(Node x, Node y) {
+ Node tmp;
+
+ // !retry:!
+ retry:while(true) {
+
+ int yType = y.getType();
+
+ switch(x.getType()) {
+ case NodeType.CTYPE:
+ switch(yType) {
+ case NodeType.CTYPE:
+ CTypeNode cny = (CTypeNode)y;
+ CTypeNode cnx = (CTypeNode)x;
+ return cny.ctype == cnx.ctype && cny.not != cnx.not;
+
+ case NodeType.CCLASS:
+ // !swap:!
+ tmp = x;
+ x = y;
+ y = tmp;
+ // !goto retry;!
+ continue retry;
+
+ case NodeType.STR:
+ // !goto swap;!
+ tmp = x;
+ x = y;
+ y = tmp;
+ continue retry;
+
+ default:
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.CCLASS:
+ CClassNode xc = (CClassNode)x;
+
+ switch(yType) {
+ case NodeType.CTYPE:
+ switch(((CTypeNode)y).ctype) {
+ case CharacterType.WORD:
+ if (!((CTypeNode)y).not) {
+ if (xc.mbuf == null && !xc.isNot()) {
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ if (xc.bs.at(i)) {
+ if (enc.isSbWord(i)) return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ } else {
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ if (!enc.isSbWord(i)) {
+ if (!xc.isNot()) {
+ if (xc.bs.at(i)) return false;
+ } else {
+ if (!xc.bs.at(i)) return false;
+ }
+ }
+ }
+ return true;
+ }
+ // break; not reached
+
+ default:
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.CCLASS:
+ CClassNode yc = (CClassNode)y;
+
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ boolean v = xc.bs.at(i);
+ if ((v && !xc.isNot()) || (!v && xc.isNot())) {
+ v = yc.bs.at(i);
+ if ((v && !yc.isNot()) || (!v && yc.isNot())) return false;
+ }
+ }
+ if ((xc.mbuf == null && !xc.isNot()) || yc.mbuf == null && !yc.isNot()) return true;
+ return false;
+ // break; not reached
+
+ case NodeType.STR:
+ // !goto swap;!
+ tmp = x;
+ x = y;
+ y = tmp;
+ continue retry;
+
+ default:
+ break;
+
+ } // inner switch
+ break; // case NodeType.CCLASS
+
+ case NodeType.STR:
+ StringNode xs = (StringNode)x;
+ if (xs.length() == 0) break;
+
+ switch (yType) {
+ case NodeType.CTYPE:
+ CTypeNode cy = ((CTypeNode)y);
+ switch (cy.ctype) {
+ case CharacterType.WORD:
+ if (enc.isMbcWord(xs.bytes, xs.p, xs.end)) {
+ return cy.not;
+ } else {
+ return !cy.not;
+ }
+
+ default:
+ break;
+
+ } // inner switch
+ break;
+
+ case NodeType.CCLASS:
+ CClassNode cc = (CClassNode)y;
+ int code = enc.mbcToCode(xs.bytes, xs.p, xs.p + enc.maxLength());
+ return !cc.isCodeInCC(enc, code);
+
+ case NodeType.STR:
+ StringNode ys = (StringNode)y;
+ int len = xs.length();
+ if (len > ys.length()) len = ys.length();
+ if (xs.isAmbig() || ys.isAmbig()) {
+ /* tiny version */
+ return false;
+ } else {
+ for (int i=0, p=ys.p, q=xs.p; i<len; i++, p++, q++) {
+ if (ys.bytes[p] != xs.bytes[q]) return true;
+ }
+ }
+ break;
+
+ default:
+ break;
+ } // inner switch
+
+ break; // case NodeType.STR
+
+ } // switch
+
+ break;
+ } // retry:while
+ return false;
+ }
+
+ private Node getHeadValueNode(Node node, boolean exact) {
+ Node n = null;
+
+ switch(node.getType()) {
+ case NodeType.BREF:
+ case NodeType.ALT:
+ case NodeType.CANY:
+ break;
+
+ case NodeType.CALL:
+ break; // if (Config.USE_SUBEXP_CALL)
+
+ case NodeType.CTYPE:
+ case NodeType.CCLASS:
+ if (!exact) n = node;
+ break;
+
+ case NodeType.LIST:
+ n = getHeadValueNode(((ConsAltNode)node).car, exact);
+ break;
+
+ case NodeType.STR:
+ StringNode sn = (StringNode)node;
+ if (sn.end <= sn.p) break; // ???
+
+ if (exact && !sn.isRaw() && isIgnoreCase(regex.options)){
+ // nothing
+ } else {
+ n = node;
+ }
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.lower > 0) {
+ if (qn.headExact != null) {
+ n = qn.headExact;
+ } else {
+ n = getHeadValueNode(qn.target, exact);
+ }
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+
+ switch (en.type) {
+ case EncloseType.OPTION:
+ int options = regex.options;
+ regex.options = en.option;
+ n = getHeadValueNode(en.target, exact);
+ regex.options = options;
+ break;
+
+ case EncloseType.MEMORY:
+ case EncloseType.STOP_BACKTRACK:
+ n = getHeadValueNode(en.target, exact);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ if (an.type == AnchorType.PREC_READ) n = getHeadValueNode(an.target, exact);
+ break;
+
+ default:
+ break;
+ } // switch
+
+ return n;
+ }
+
+ // true: invalid
+ private boolean checkTypeTree(Node node, int typeMask, int encloseMask, int anchorMask) {
+ if ((node.getType2Bit() & typeMask) == 0) return true;
+
+ boolean invalid = false;
+
+ switch(node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ invalid = checkTypeTree(can.car, typeMask, encloseMask, anchorMask);
+ } while (!invalid && (can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ invalid = checkTypeTree(((QuantifierNode)node).target, typeMask, encloseMask, anchorMask);
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if ((en.type & encloseMask) == 0) return true;
+ invalid = checkTypeTree(en.target, typeMask, encloseMask, anchorMask);
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ if ((an.type & anchorMask) == 0) return true;
+
+ if (an.target != null) invalid = checkTypeTree(an.target, typeMask, encloseMask, anchorMask);
+ break;
+
+ default:
+ break;
+
+ } // switch
+
+ return invalid;
+ }
+
+ private static final int RECURSION_EXIST = 1;
+ private static final int RECURSION_INFINITE = 2;
+ private int subexpInfRecursiveCheck(Node node, boolean head) {
+ int r = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ int min;
+ ConsAltNode x = (ConsAltNode)node;
+ do {
+ int ret = subexpInfRecursiveCheck(x.car, head);
+ if (ret == RECURSION_INFINITE) return ret;
+ r |= ret;
+ if (head) {
+ min = getMinMatchLength(x.car);
+ if (min != 0) head = false;
+ }
+ } while ((x = x.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ r = RECURSION_EXIST;
+ do {
+ int ret = subexpInfRecursiveCheck(can.car, head);
+ if (ret == RECURSION_INFINITE) return ret;
+ r &= ret;
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ r = subexpInfRecursiveCheck(qn.target, head);
+ if (r == RECURSION_EXIST) {
+ if (qn.lower == 0) r = 0;
+ }
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND:
+ case AnchorType.LOOK_BEHIND_NOT:
+ r = subexpInfRecursiveCheck(an.target, head);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.CALL:
+ r = subexpInfRecursiveCheck(((CallNode)node).target, head);
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if (en.isMark2()) {
+ return 0;
+ } else if (en.isMark1()) {
+ return !head ? RECURSION_EXIST : RECURSION_INFINITE;
+ // throw exception here ???
+ } else {
+ en.setMark2();
+ r = subexpInfRecursiveCheck(en.target, head);
+ en.clearMark2();
+ }
+ break;
+
+ default:
+ break;
+ } // switch
+ return r;
+ }
+
+ protected final int subexpInfRecursiveCheckTrav(Node node) {
+ int r = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ r = subexpInfRecursiveCheckTrav(can.car);
+ } while (r == 0 && (can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ r = subexpInfRecursiveCheckTrav(((QuantifierNode)node).target);
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND:
+ case AnchorType.LOOK_BEHIND_NOT:
+ r = subexpInfRecursiveCheckTrav(an.target);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if (en.isRecursion()) {
+ en.setMark1();
+ r = subexpInfRecursiveCheck(en.target, true);
+ if (r > 0) newValueException(ERR_NEVER_ENDING_RECURSION);
+ en.clearMark1();
+ }
+ r = subexpInfRecursiveCheckTrav(en.target);
+ break;
+
+ default:
+ break;
+ } // switch
+
+ return r;
+ }
+
+ private int subexpRecursiveCheck(Node node) {
+ int r = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ r |= subexpRecursiveCheck(can.car);
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ r = subexpRecursiveCheck(((QuantifierNode)node).target);
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND:
+ case AnchorType.LOOK_BEHIND_NOT:
+ r = subexpRecursiveCheck(an.target);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.CALL:
+ CallNode cn = (CallNode)node;
+ r = subexpRecursiveCheck(cn.target);
+ if (r != 0) cn.setRecursion();
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if (en.isMark2()) {
+ return 0;
+ } else if (en.isMark1()) {
+ return 1; /* recursion */
+ } else {
+ en.setMark2();
+ r = subexpRecursiveCheck(en.target);
+ en.clearMark2();
+ }
+ break;
+
+ default:
+ break;
+ } // switch
+
+ return r;
+ }
+
+ private static final int FOUND_CALLED_NODE = 1;
+ protected final int subexpRecursiveCheckTrav(Node node) {
+ int r = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ int ret = subexpRecursiveCheckTrav(can.car);
+ if (ret == FOUND_CALLED_NODE) {
+ r = FOUND_CALLED_NODE;
+ }
+ // else if (ret < 0) return ret; ???
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ r = subexpRecursiveCheckTrav(qn.target);
+ if (qn.upper == 0) {
+ if (r == FOUND_CALLED_NODE) qn.isRefered = true;
+ }
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND:
+ case AnchorType.LOOK_BEHIND_NOT:
+ r = subexpRecursiveCheckTrav(an.target);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if (!en.isRecursion()) {
+ if (en.isCalled()) {
+ en.setMark1();
+ r = subexpRecursiveCheck(en.target);
+ if (r != 0) en.setRecursion();
+ en.clearMark1();
+ }
+ }
+ r = subexpRecursiveCheckTrav(en.target);
+ if (en.isCalled()) r |= FOUND_CALLED_NODE;
+ break;
+
+ default:
+ break;
+ } // switch
+
+ return r;
+ }
+
+ protected final void setupSubExpCall(Node node) {
+
+ switch(node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode ln = (ConsAltNode)node;
+ do {
+ setupSubExpCall(ln.car);
+ } while ((ln = ln.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ setupSubExpCall(can.car);
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ setupSubExpCall(((QuantifierNode)node).target);
+ break;
+
+ case NodeType.ENCLOSE:
+ setupSubExpCall(((EncloseNode)node).target);
+ break;
+
+ case NodeType.CALL:
+ CallNode cn = (CallNode)node;
+
+ if (cn.groupNum != 0) {
+ int gNum = cn.groupNum;
+
+ if (Config.USE_NAMED_GROUP) {
+ if (env.numNamed > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(env.option)) {
+ newValueException(ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED);
+ }
+ } // USE_NAMED_GROUP
+ if (gNum > env.numMem) newValueException(ERR_UNDEFINED_GROUP_REFERENCE, cn.nameP, cn.nameEnd);
+
+ // !goto set_call_attr!; // remove duplication ?
+ cn.target = env.memNodes[cn.groupNum]; // no setTarget in call nodes!
+ if (cn.target == null) newValueException(ERR_UNDEFINED_NAME_REFERENCE, cn.nameP, cn.nameEnd);
+
+ ((EncloseNode)cn.target).setCalled();
+ env.btMemStart = BitStatus.bsOnAt(env.btMemStart, cn.groupNum);
+ cn.unsetAddrList = env.unsetAddrList;
+ } else {
+ if (Config.USE_NAMED_GROUP) {
+ NameEntry ne = regex.nameToGroupNumbers(cn.name, cn.nameP, cn.nameEnd);
+
+ if (ne == null) {
+ newValueException(ERR_UNDEFINED_NAME_REFERENCE, cn.nameP, cn.nameEnd);
+ } else if (ne.backNum > 1) {
+ newValueException(ERR_MULTIPLEX_DEFINITION_NAME_CALL, cn.nameP, cn.nameEnd);
+ } else {
+ cn.groupNum = ne.backRef1; // ne.backNum == 1 ? ne.backRef1 : ne.backRefs[0]; // ??? need to check ?
+ // !set_call_attr:!
+ cn.target = env.memNodes[cn.groupNum]; // no setTarget in call nodes!
+ if (cn.target == null) newValueException(ERR_UNDEFINED_NAME_REFERENCE, cn.nameP, cn.nameEnd);
+
+ ((EncloseNode)cn.target).setCalled();
+ env.btMemStart = BitStatus.bsOnAt(env.btMemStart, cn.groupNum);
+ cn.unsetAddrList = env.unsetAddrList;
+ }
+ }
+ }
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND:
+ case AnchorType.LOOK_BEHIND_NOT:
+ setupSubExpCall(an.target);
+ break;
+ }
+ break;
+
+ } // switch
+ }
+
+ /* divide different length alternatives in look-behind.
+ (?<=A|B) ==> (?<=A)|(?<=B)
+ (?<!A|B) ==> (?<!A)(?<!B)
+ */
+ private void divideLookBehindAlternatives(Node node) {
+ AnchorNode an = (AnchorNode)node;
+ int anchorType = an.type;
+
+ Node head = an.target;
+ Node np = ((ConsAltNode)head).car;
+
+
+ swap(node, head);
+
+ Node tmp = node;
+ node = head;
+ head = tmp;
+
+ ((ConsAltNode)node).setCar(head);
+ ((AnchorNode)head).setTarget(np);
+ np = node;
+
+ while ((np = ((ConsAltNode)np).cdr) != null) {
+ AnchorNode insert = new AnchorNode(anchorType);
+ insert.setTarget(((ConsAltNode)np).car);
+ ((ConsAltNode)np).setCar(insert);
+ }
+
+ if (anchorType == AnchorType.LOOK_BEHIND_NOT) {
+ np = node;
+ do {
+ ((ConsAltNode)np).toListNode(); /* alt -> list */
+ } while ((np = ((ConsAltNode)np).cdr) != null);
+ }
+ }
+
+ private void setupLookBehind(Node node) {
+ AnchorNode an = (AnchorNode)node;
+
+ int len = getCharLengthTree(an.target);
+ switch(returnCode) {
+ case 0:
+ an.charLength = len;
+ break;
+ case GET_CHAR_LEN_VARLEN:
+ newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+ break;
+ case GET_CHAR_LEN_TOP_ALT_VARLEN:
+ if (syntax.differentLengthAltLookBehind()) {
+ divideLookBehindAlternatives(node);
+ } else {
+ newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+ }
+ }
+ }
+
+ private void nextSetup(Node node, Node nextNode) {
+ // retry:
+ retry: while(true) {
+
+ int type = node.getType();
+ if (type == NodeType.QTFR) {
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.greedy && isRepeatInfinite(qn.upper)) {
+ if (Config.USE_QTFR_PEEK_NEXT) {
+ StringNode n = (StringNode)getHeadValueNode(nextNode, true);
+ /* '\0': for UTF-16BE etc... */
+ if (n != null && n.bytes[n.p] != 0) { // ?????????
+ qn.nextHeadExact = n;
+ }
+ } // USE_QTFR_PEEK_NEXT
+ /* automatic posseivation a*b ==> (?>a*)b */
+ if (qn.lower <= 1) {
+ if (qn.target.isSimple()) {
+ Node x = getHeadValueNode(qn.target, false);
+ if (x != null) {
+ Node y = getHeadValueNode(nextNode, false);
+ if (y != null && isNotIncluded(x, y)) {
+ EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); //onig_node_new_enclose
+ en.setStopBtSimpleRepeat();
+ //en.setTarget(qn.target); // optimize it ??
+ swap(node, en);
+
+ en.setTarget(node);
+ }
+ }
+ }
+ }
+ }
+ } else if (type == NodeType.ENCLOSE) {
+ EncloseNode en = (EncloseNode)node;
+ if (en.isMemory()) {
+ node = en.target;
+ // !goto retry;!
+ continue retry;
+ }
+ }
+
+ break;
+ } // while
+ }
+
+ private void updateStringNodeCaseFold(Node node) {
+ StringNode sn = (StringNode)node;
+
+ byte[]sbuf = new byte[sn.length() << 1];
+ int sp = 0;
+
+ value = sn.p;
+ int end = sn.end;
+
+ byte[]buf = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN];
+ while (value < end) {
+ int len = enc.mbcCaseFold(regex.caseFoldFlag, sn.bytes, this, end, buf);
+ for (int i=0; i<len; i++) {
+ if (sp >= sbuf.length) {
+ byte[]tmp = new byte[sbuf.length << 1];
+ System.arraycopy(sbuf, 0, tmp, 0, sbuf.length);
+ sbuf = tmp;
+ }
+ sbuf[sp++] = buf[i];
+ }
+ }
+
+ sn.set(sbuf, 0, sp);
+ }
+
+ private Node expandCaseFoldMakeRemString(byte[]bytes, int p, int end) {
+ StringNode node = new StringNode(bytes, p, end);
+
+ updateStringNodeCaseFold(node);
+ node.setAmbig();
+ node.setDontGetOptInfo();
+ return node;
+ }
+
+ private boolean expandCaseFoldStringAlt(int itemNum, CaseFoldCodeItem[]items,
+ byte[]bytes, int p, int slen, int end, Node[]node) {
+ boolean varlen = false;
+
+ for (int i=0; i<itemNum; i++) {
+ if (items[i].byteLen != slen) {
+ varlen = true;
+ break;
+ }
+ }
+
+ ConsAltNode varANode = null, anode, xnode;
+ if (varlen) {
+ node[0] = varANode = newAltNode(null, null);
+
+ xnode = newListNode(null, null);
+ varANode.setCar(xnode);
+
+ anode = newAltNode(null, null);
+ xnode.setCar(anode);
+ } else {
+ node[0] = anode = newAltNode(null, null);
+ }
+
+ StringNode snode = new StringNode(bytes, p, p + slen);
+ anode.setCar(snode);
+
+ for (int i=0; i<itemNum; i++) {
+ snode = new StringNode();
+
+ for (int j=0; j<items[i].codeLen; j++) {
+ snode.ensure(Config.ENC_CODE_TO_MBC_MAXLEN);
+ snode.end += enc.codeToMbc(items[i].code[j], snode.bytes, snode.end);
+ }
+
+ ConsAltNode an = newAltNode(null, null);
+ if (items[i].byteLen != slen) {
+ int q = p + items[i].byteLen;
+ if (q < end) {
+ Node rem = expandCaseFoldMakeRemString(bytes, q, end);
+
+ xnode = ConsAltNode.listAdd(null, snode);
+ ConsAltNode.listAdd(xnode, rem);
+ an.setCar(xnode);
+ } else {
+ an.setCar(snode);
+ }
+ varANode.setCdr(an);
+ varANode = an;
+ } else {
+ an.setCar(snode);
+ anode.setCdr(an);
+ anode = an;
+ }
+ }
+ return varlen;
+ }
+
+ private static final int THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION = 8;
+ private void expandCaseFoldString(Node node) {
+ StringNode sn = (StringNode)node;
+
+ if (sn.isAmbig() || sn.length() <= 0) return;
+
+ byte[]bytes = sn.bytes;
+ int p = sn.p;
+ int end = sn.end;
+ int altNum = 1;
+
+ ConsAltNode topRoot = null, root = null;
+ Node[]prevNode = new Node[]{null};
+ StringNode snode = null;
+
+ while (p < end) {
+ CaseFoldCodeItem[]items = enc.caseFoldCodesByString(regex.caseFoldFlag, bytes, p, end);
+ int len = enc.length(bytes, p, end);
+
+ if (items.length == 0) {
+ if (snode == null) {
+ if (root == null && prevNode[0] != null) {
+ topRoot = root = ConsAltNode.listAdd(null, prevNode[0]);
+ }
+
+ prevNode[0] = snode = new StringNode(); // onig_node_new_str(NULL, NULL);
+
+ if (root != null) {
+ ConsAltNode.listAdd(root, snode);
+ }
+
+ }
+
+ snode.cat(bytes, p, p + len);
+ } else {
+ altNum *= (items.length + 1);
+ if (altNum > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
+
+ if (root == null && prevNode[0] != null) {
+ topRoot = root = ConsAltNode.listAdd(null, prevNode[0]);
+ }
+
+ boolean r = expandCaseFoldStringAlt(items.length, items, bytes, p, len, end, prevNode);
+ if (r) { // if (r == 1)
+ if (root == null) {
+ topRoot = (ConsAltNode)prevNode[0];
+ } else {
+ ConsAltNode.listAdd(root, prevNode[0]);
+ }
+
+ root = (ConsAltNode)((ConsAltNode)prevNode[0]).car;
+ } else { /* r == 0 */
+ if (root != null) {
+ ConsAltNode.listAdd(root, prevNode[0]);
+ }
+ }
+ snode = null;
+ }
+ p += len;
+ }
+
+ if (p < end) {
+ Node srem = expandCaseFoldMakeRemString(bytes, p, end);
+
+ if (prevNode[0] != null && root == null) {
+ topRoot = root = ConsAltNode.listAdd(null, prevNode[0]);
+ }
+
+ if (root == null) {
+ prevNode[0] = srem;
+ } else {
+ ConsAltNode.listAdd(root, srem);
+ }
+ }
+ /* ending */
+ Node xnode = topRoot != null ? topRoot : prevNode[0];
+ swap(node, xnode);
+ }
+
+ private static final int CEC_THRES_NUM_BIG_REPEAT = 512;
+ private static final int CEC_INFINITE_NUM = 0x7fffffff;
+
+ private static final int CEC_IN_INFINITE_REPEAT = (1<<0);
+ private static final int CEC_IN_FINITE_REPEAT = (1<<1);
+ private static final int CEC_CONT_BIG_REPEAT = (1<<2);
+
+ protected final int setupCombExpCheck(Node node, int state) {
+ int r = state;
+ int ret;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode ln = (ConsAltNode)node;
+
+ do {
+ r = setupCombExpCheck(ln.car, r);
+ //prev = ((ConsAltNode)node).car;
+ } while (r >= 0 && (ln = ln.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode an = (ConsAltNode)node;
+ do {
+ ret = setupCombExpCheck(an.car, state);
+ r |= ret;
+ } while (ret >= 0 && (an = an.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ int childState = state;
+ int addState = 0;
+ int varNum;
+
+ if (!isRepeatInfinite(qn.upper)) {
+ if (qn.upper > 1) {
+ /* {0,1}, {1,1} are allowed */
+ childState |= CEC_IN_FINITE_REPEAT;
+
+ /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
+ if (env.backrefedMem == 0) {
+ if (qn.target.getType() == NodeType.ENCLOSE) {
+ EncloseNode en = (EncloseNode)qn.target;
+ if (en.type == EncloseType.MEMORY) {
+ if (en.target.getType() == NodeType.QTFR) {
+ QuantifierNode q = (QuantifierNode)en.target;
+ if (isRepeatInfinite(q.upper) && q.greedy == qn.greedy) {
+ qn.upper = qn.lower == 0 ? 1 : qn.lower;
+ if (qn.upper == 1) childState = state;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if ((state & CEC_IN_FINITE_REPEAT) != 0) {
+ qn.combExpCheckNum = -1;
+ } else {
+ if (isRepeatInfinite(qn.upper)) {
+ varNum = CEC_INFINITE_NUM;
+ childState |= CEC_IN_INFINITE_REPEAT;
+ } else {
+ varNum = qn.upper - qn.lower;
+ }
+
+ if (varNum >= CEC_THRES_NUM_BIG_REPEAT) addState |= CEC_CONT_BIG_REPEAT;
+
+ if (((state & CEC_IN_INFINITE_REPEAT) != 0 && varNum != 0) ||
+ ((state & CEC_CONT_BIG_REPEAT) != 0 && varNum >= CEC_THRES_NUM_BIG_REPEAT)) {
+ if (qn.combExpCheckNum == 0) {
+ env.numCombExpCheck++;
+ qn.combExpCheckNum = env.numCombExpCheck;
+ if (env.currMaxRegNum > env.combExpMaxRegNum) {
+ env.combExpMaxRegNum = env.currMaxRegNum;
+ }
+ }
+ }
+ }
+ r = setupCombExpCheck(qn.target, childState);
+ r |= addState;
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch( en.type) {
+ case EncloseNode.MEMORY:
+ if (env.currMaxRegNum < en.regNum) {
+ env.currMaxRegNum = en.regNum;
+ }
+ r = setupCombExpCheck(en.target, state);
+ break;
+
+ default:
+ r = setupCombExpCheck(en.target, state);
+ } // inner switch
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (cn.isRecursion()) {
+ env.hasRecursion = true;
+ } else {
+ r = setupCombExpCheck(cn.target, state);
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ default:
+ break;
+
+ } // switch
+
+ return r;
+ }
+
+ private static final int IN_ALT = (1<<0);
+ private static final int IN_NOT = (1<<1);
+ private static final int IN_REPEAT = (1<<2);
+ private static final int IN_VAR_REPEAT = (1<<3);
+ private static final int EXPAND_STRING_MAX_LENGTH = 100;
+
+ /* setup_tree does the following work.
+ 1. check empty loop. (set qn->target_empty_info)
+ 2. expand ignore-case in char class.
+ 3. set memory status bit flags. (reg->mem_stats)
+ 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
+ 5. find invalid patterns in look-behind.
+ 6. expand repeated string.
+ */
+ protected final void setupTree(Node node, int state) {
+ switch (node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode lin = (ConsAltNode)node;
+ Node prev = null;
+ do {
+ setupTree(lin.car, state);
+ if (prev != null) {
+ nextSetup(prev, lin.car);
+ }
+ prev = lin.car;
+ } while ((lin = lin.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode aln = (ConsAltNode)node;
+ do {
+ setupTree(aln.car, (state | IN_ALT));
+ } while ((aln = aln.cdr) != null);
+ break;
+
+ case NodeType.CCLASS:
+ break;
+
+ case NodeType.STR:
+ if (isIgnoreCase(regex.options) && !((StringNode)node).isRaw()) {
+ expandCaseFoldString(node);
+ }
+ break;
+
+ case NodeType.CTYPE:
+ case NodeType.CANY:
+ break;
+
+ case NodeType.CALL: // if (Config.USE_SUBEXP_CALL) ?
+ break;
+
+ case NodeType.BREF:
+ BackRefNode br = (BackRefNode)node;
+ for (int i=0; i<br.backNum; i++) {
+ if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
+ env.backrefedMem = bsOnAt(env.backrefedMem, br.back[i]);
+ env.btMemStart = bsOnAt(env.btMemStart, br.back[i]);
+ if (Config.USE_BACKREF_WITH_LEVEL) {
+ if (br.isNestLevel()) {
+ env.btMemEnd = bsOnAt(env.btMemEnd, br.back[i]);
+ }
+ } // USE_BACKREF_AT_LEVEL
+ ((EncloseNode)env.memNodes[br.back[i]]).setMemBackrefed();
+ }
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ Node target = qn.target;
+
+ if ((state & IN_REPEAT) != 0) qn.setInRepeat();
+
+ if (isRepeatInfinite(qn.upper) || qn.lower >= 1) {
+ int d = getMinMatchLength(target);
+ if (d == 0) {
+ qn.targetEmptyInfo = TargetInfo.IS_EMPTY;
+ if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) {
+ int info = quantifiersMemoryInfo(target);
+ if (info > 0) qn.targetEmptyInfo = info;
+ } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ // strange stuff here (turned off)
+ }
+ }
+
+ state |= IN_REPEAT;
+ if (qn.lower != qn.upper) state |= IN_VAR_REPEAT;
+
+ setupTree(target, state);
+
+ /* expand string */
+ if (target.getType() == NodeType.STR) {
+ if (!isRepeatInfinite(qn.lower) && qn.lower == qn.upper &&
+ qn.lower > 1 && qn.lower <= EXPAND_STRING_MAX_LENGTH) {
+ StringNode sn = (StringNode)target;
+ int len = sn.length();
+
+ if (len * qn.lower <= EXPAND_STRING_MAX_LENGTH) {
+ StringNode str = qn.convertToString();
+ // if (str.parent == null) root = str;
+ int n = qn.lower;
+ for (int i=0; i<n; i++) {
+ str.cat(sn.bytes, sn.p, sn.end);
+ }
+ }
+ break; /* break case NT_QTFR: */
+ }
+ }
+ if (Config.USE_OP_PUSH_OR_JUMP_EXACT) {
+ if (qn.greedy && qn.targetEmptyInfo != 0) {
+ if (target.getType() == NodeType.QTFR) {
+ QuantifierNode tqn = (QuantifierNode)target;
+ if (tqn.headExact != null) {
+ qn.headExact = tqn.headExact;
+ tqn.headExact = null;
+ }
+ } else {
+ qn.headExact = getHeadValueNode(qn.target, true);
+ }
+ }
+ } // USE_OP_PUSH_OR_JUMP_EXACT
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch (en.type) {
+ case EncloseType.OPTION:
+ int options = regex.options;
+ regex.options = en.option;
+ setupTree(en.target, state);
+ regex.options = options;
+ break;
+
+ case EncloseType.MEMORY:
+ if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
+ env.btMemStart = bsOnAt(env.btMemStart, en.regNum);
+ /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
+
+ }
+ setupTree(en.target, state);
+ break;
+
+ case EncloseType.STOP_BACKTRACK:
+ setupTree(en.target, state);
+ if (en.target.getType() == NodeType.QTFR) {
+ QuantifierNode tqn = (QuantifierNode)en.target;
+ if (isRepeatInfinite(tqn.upper) && tqn.lower <= 1 && tqn.greedy) {
+ /* (?>a*), a*+ etc... */
+ if (tqn.target.isSimple()) en.setStopBtSimpleRepeat();
+ }
+ }
+ break;
+
+ } // inner switch
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ setupTree(an.target, state);
+ break;
+
+ case AnchorType.PREC_READ_NOT:
+ setupTree(an.target, (state | IN_NOT));
+ break;
+
+ case AnchorType.LOOK_BEHIND:
+ boolean lbInvalid = checkTypeTree(an.target, NodeType.ALLOWED_IN_LB,
+ EncloseType.ALLOWED_IN_LB,
+ AnchorType.ALLOWED_IN_LB);
+
+ if (lbInvalid) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+ setupLookBehind(node);
+ setupTree(an.target, state);
+ break;
+
+ case AnchorType.LOOK_BEHIND_NOT:
+ boolean lbnInvalid = checkTypeTree(an.target, NodeType.ALLOWED_IN_LB,
+ EncloseType.ALLOWED_IN_LB,
+ AnchorType.ALLOWED_IN_LB);
+
+ if (lbnInvalid) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+
+ setupLookBehind(node);
+ setupTree(an.target, (state | IN_NOT));
+ break;
+
+ } // inner switch
+ break;
+
+ default:
+ break;
+
+ } // switch
+ }
+
+ private static final int MAX_NODE_OPT_INFO_REF_COUNT = 5;
+ private void optimizeNodeLeft(Node node, NodeOptInfo opt, OptEnvironment oenv) { // oenv remove, pass mmd
+ opt.clear();
+ opt.setBoundNode(oenv.mmd);
+
+ switch (node.getType()) {
+ case NodeType.LIST: {
+ OptEnvironment nenv = new OptEnvironment();
+ NodeOptInfo nopt = new NodeOptInfo();
+ nenv.copy(oenv);
+ ConsAltNode lin = (ConsAltNode)node;
+ do {
+ optimizeNodeLeft(lin.car, nopt, nenv);
+ nenv.mmd.add(nopt.length);
+ opt.concatLeftNode(nopt, enc);
+ } while ((lin = lin.cdr) != null);
+ break;
+ }
+
+ case NodeType.ALT: {
+ NodeOptInfo nopt = new NodeOptInfo();
+ ConsAltNode aln = (ConsAltNode)node;
+ do {
+ optimizeNodeLeft(aln.car, nopt, oenv);
+ if (aln == node) {
+ opt.copy(nopt);
+ } else {
+ opt.altMerge(nopt, oenv);
+ }
+ } while ((aln = aln.cdr) != null);
+ break;
+ }
+
+ case NodeType.STR: {
+ StringNode sn = (StringNode)node;
+
+ int slen = sn.length();
+
+ if (!sn.isAmbig()) {
+ opt.exb.concatStr(sn.bytes, sn.p, sn.end, sn.isRaw(), enc);
+
+ if (slen > 0) {
+ opt.map.addChar(sn.bytes[sn.p], enc);
+ }
+
+ opt.length.set(slen, slen);
+ } else {
+ int max;
+ if (sn.isDontGetOptInfo()) {
+ int n = sn.length(enc);
+ max = enc.maxLengthDistance() * n;
+ } else {
+ opt.exb.concatStr(sn.bytes, sn.p, sn.end, sn.isRaw(), enc);
+ opt.exb.ignoreCase = true;
+
+ if (slen > 0) {
+ opt.map.addCharAmb(sn.bytes, sn.p, sn.end, enc, oenv.caseFoldFlag);
+ }
+
+ max = slen;
+ }
+ opt.length.set(slen, max);
+ }
+
+ if (opt.exb.length == slen) {
+ opt.exb.reachEnd = true;
+ }
+ break;
+ }
+
+ case NodeType.CCLASS: {
+ CClassNode cc = (CClassNode)node;
+ /* no need to check ignore case. (setted in setup_tree()) */
+ if (cc.mbuf != null || cc.isNot()) {
+ int min = enc.minLength();
+ int max = enc.maxLengthDistance();
+ opt.length.set(min, max);
+ } else {
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ boolean z = cc.bs.at(i);
+ if ((z && !cc.isNot()) || (!z && cc.isNot())) {
+ opt.map.addChar((byte)i, enc);
+ }
+ }
+ opt.length.set(1, 1);
+ }
+ break;
+ }
+
+ case NodeType.CTYPE: {
+ int min;
+ int max = enc.maxLengthDistance();
+ if (max == 1) {
+ min = 1;
+ CTypeNode cn = (CTypeNode)node;
+
+ switch (cn.ctype) {
+ case CharacterType.WORD:
+ if (cn.not) {
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ if (!enc.isWord(i)) {
+ opt.map.addChar((byte)i, enc);
+ }
+ }
+ } else {
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ if (enc.isWord(i)) {
+ opt.map.addChar((byte)i, enc);
+ }
+ }
+ }
+ break;
+ } // inner switch
+ } else {
+ min = enc.minLength();
+ }
+ opt.length.set(min, max);
+ break;
+ }
+
+ case NodeType.CANY: {
+ opt.length.set(enc.minLength(), enc.maxLengthDistance());
+ break;
+ }
+
+ case NodeType.ANCHOR: {
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.BEGIN_BUF:
+ case AnchorType.BEGIN_POSITION:
+ case AnchorType.BEGIN_LINE:
+ case AnchorType.END_BUF:
+ case AnchorType.SEMI_END_BUF:
+ case AnchorType.END_LINE:
+ opt.anchor.add(an.type);
+ break;
+
+ case AnchorType.PREC_READ:
+ NodeOptInfo nopt = new NodeOptInfo();
+ optimizeNodeLeft(an.target, nopt, oenv);
+ if (nopt.exb.length > 0) {
+ opt.expr.copy(nopt.exb);
+ } else if (nopt.exm.length > 0) {
+ opt.expr.copy(nopt.exm);
+ }
+ opt.expr.reachEnd = false;
+ if (nopt.map.value > 0) opt.map.copy(nopt.map);
+ break;
+
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND: /* Sorry, I can't make use of it. */
+ case AnchorType.LOOK_BEHIND_NOT:
+ break;
+
+ } // inner switch
+ break;
+ }
+
+ case NodeType.BREF: {
+ BackRefNode br = (BackRefNode)node;
+
+ if (br.isRecursion()) {
+ opt.length.set(0, MinMaxLen.INFINITE_DISTANCE);
+ break;
+ }
+
+ Node[]nodes = oenv.scanEnv.memNodes;
+
+ int min = getMinMatchLength(nodes[br.back[0]]);
+ int max = getMaxMatchLength(nodes[br.back[0]]);
+
+ for (int i=1; i<br.backNum; i++) {
+ int tmin = getMinMatchLength(nodes[br.back[i]]);
+ int tmax = getMaxMatchLength(nodes[br.back[i]]);
+ if (min > tmin) min = tmin;
+ if (max < tmax) max = tmax;
+ }
+ opt.length.set(min, max);
+ break;
+ }
+
+ case NodeType.CALL: {
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (cn.isRecursion()) {
+ opt.length.set(0, MinMaxLen.INFINITE_DISTANCE);
+ } else {
+ int safe = oenv.options;
+ oenv.options = ((EncloseNode)cn.target).option;
+ optimizeNodeLeft(cn.target, opt, oenv);
+ oenv.options = safe;
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+ }
+
+ case NodeType.QTFR: {
+ NodeOptInfo nopt = new NodeOptInfo();
+ QuantifierNode qn = (QuantifierNode)node;
+ optimizeNodeLeft(qn.target, nopt, oenv);
+ if (qn.lower == 0 && isRepeatInfinite(qn.upper)) {
+ if (oenv.mmd.max == 0 && qn.target.getType() == NodeType.CANY && qn.greedy) {
+ if (isMultiline(oenv.options)) {
+ opt.anchor.add(AnchorType.ANYCHAR_STAR_ML);
+ } else {
+ opt.anchor.add(AnchorType.ANYCHAR_STAR);
+ }
+ }
+ } else {
+ if (qn.lower > 0) {
+ opt.copy(nopt);
+ if (nopt.exb.length > 0) {
+ if (nopt.exb.reachEnd) {
+ int i;
+ for (i=1; i<qn.lower && !opt.exb.isFull(); i++) {
+ opt.exb.concat(nopt.exb, enc);
+ }
+ if (i < qn.lower) {
+ opt.exb.reachEnd = false;
+ }
+ }
+ }
+ if (qn.lower != qn.upper) {
+ opt.exb.reachEnd = false;
+ opt.exm.reachEnd = false;
+ }
+ if (qn.lower > 1) {
+ opt.exm.reachEnd = false;
+ }
+
+ }
+ }
+ int min = MinMaxLen.distanceMultiply(nopt.length.min, qn.lower);
+ int max;
+ if (isRepeatInfinite(qn.upper)) {
+ max = nopt.length.max > 0 ? MinMaxLen.INFINITE_DISTANCE : 0;
+ } else {
+ max = MinMaxLen.distanceMultiply(nopt.length.max, qn.upper);
+ }
+ opt.length.set(min, max);
+ break;
+ }
+
+ case NodeType.ENCLOSE: {
+ EncloseNode en = (EncloseNode)node;
+ switch (en.type) {
+ case EncloseType.OPTION:
+ int save = oenv.options;
+ oenv.options = en.option;
+ optimizeNodeLeft(en.target, opt, oenv);
+ oenv.options = save;
+ break;
+
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL && ++en.optCount > MAX_NODE_OPT_INFO_REF_COUNT) {
+ int min = 0;
+ int max = MinMaxLen.INFINITE_DISTANCE;
+ if (en.isMinFixed()) min = en.minLength;
+ if (en.isMaxFixed()) max = en.maxLength;
+ opt.length.set(min, max);
+ } else { // USE_SUBEXP_CALL
+ optimizeNodeLeft(en.target, opt, oenv);
+ if (opt.anchor.isSet(AnchorType.ANYCHAR_STAR_MASK)) {
+ if (bsAt(oenv.scanEnv.backrefedMem, en.regNum)) {
+ opt.anchor.remove(AnchorType.ANYCHAR_STAR_MASK);
+ }
+ }
+ }
+ break;
+
+ case EncloseType.STOP_BACKTRACK:
+ optimizeNodeLeft(en.target, opt, oenv);
+ break;
+ } // inner switch
+ break;
+ }
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ } // switch
+ }
+
+ protected final void setOptimizedInfoFromTree(Node node) {
+ NodeOptInfo opt = new NodeOptInfo();
+ OptEnvironment oenv = new OptEnvironment();
+
+ oenv.enc = regex.enc;
+ oenv.options = regex.options;
+ oenv.caseFoldFlag = regex.caseFoldFlag;
+ oenv.scanEnv = env;
+ oenv.mmd.clear(); // ??
+
+ optimizeNodeLeft(node, opt, oenv);
+
+ regex.anchor = opt.anchor.leftAnchor & (AnchorType.BEGIN_BUF |
+ AnchorType.BEGIN_POSITION |
+ AnchorType.ANYCHAR_STAR |
+ AnchorType.ANYCHAR_STAR_ML);
+
+ regex.anchor |= opt.anchor.rightAnchor & (AnchorType.END_BUF |
+ AnchorType.SEMI_END_BUF);
+
+ if ((regex.anchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF)) != 0) {
+ regex.anchorDmin = opt.length.min;
+ regex.anchorDmax = opt.length.max;
+ }
+
+ if (opt.exb.length > 0 || opt.exm.length > 0) {
+ opt.exb.select(opt.exm, enc);
+ if (opt.map.value > 0 && opt.exb.compare(opt.map) > 0) {
+ // !goto set_map;!
+ regex.setOptimizeMapInfo(opt.map);
+ regex.setSubAnchor(opt.map.anchor);
+ } else {
+ regex.setExactInfo(opt.exb);
+ regex.setSubAnchor(opt.exb.anchor);
+ }
+ } else if (opt.map.value > 0) {
+ // !set_map:!
+ regex.setOptimizeMapInfo(opt.map);
+ regex.setSubAnchor(opt.map.anchor);
+ } else {
+ regex.subAnchor |= opt.anchor.leftAnchor & AnchorType.BEGIN_LINE;
+ if (opt.length.max == 0) regex.subAnchor |= opt.anchor.rightAnchor & AnchorType.END_LINE;
+ }
+
+ if (Config.DEBUG_COMPILE || Config.DEBUG_MATCH) {
+ Config.log.println(regex.optimizeInfoToString());
+ }
+ }
+}
diff --git a/src/org/joni/ApplyCaseFold.java b/src/org/joni/ApplyCaseFold.java
new file mode 100644
index 0000000..54a1aa0
--- /dev/null
+++ b/src/org/joni/ApplyCaseFold.java
@@ -0,0 +1,99 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.jcodings.ApplyAllCaseFoldFunction;
+import org.jcodings.Encoding;
+import org.joni.ast.CClassNode;
+import org.joni.ast.ConsAltNode;
+import org.joni.ast.StringNode;
+
+final class ApplyCaseFold implements ApplyAllCaseFoldFunction {
+
+ // i_apply_case_fold
+ public void apply(int from, int[]to, int length, Object o) {
+ ApplyCaseFoldArg arg = (ApplyCaseFoldArg)o;
+
+ ScanEnvironment env = arg.env;
+ Encoding enc = env.enc;
+ CClassNode cc = arg.cc;
+ BitSet bs = cc.bs;
+
+ if (length == 1) {
+ boolean inCC = cc.isCodeInCC(enc, from);
+
+ if (Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS) {
+ if ((inCC && !cc.isNot()) || (!inCC && cc.isNot())) {
+ if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE) {
+ cc.addCodeRange(env, to[0], to[0]);
+ } else {
+ /* /(?i:[^A-C])/.match("a") ==> fail. */
+ bs.set(to[0]);
+ }
+ }
+ } else {
+ if (inCC) {
+ if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE) {
+ if (cc.isNot()) cc.clearNotFlag(enc);
+ cc.addCodeRange(env, to[0], to[0]);
+ } else {
+ if (cc.isNot()) {
+ bs.clear(to[0]);
+ } else {
+ bs.set(to[0]);
+ }
+ }
+ }
+ } // CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
+
+ } else {
+ if (cc.isCodeInCC(enc, from) && (!Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS || !cc.isNot())) {
+ StringNode node = null;
+ for (int i=0; i<length; i++) {
+ if (i == 0) {
+ node = new StringNode();
+ node.ensure(Config.ENC_CODE_TO_MBC_MAXLEN);
+ node.end += enc.codeToMbc(to[i], node.bytes, node.end);
+
+ /* char-class expanded multi-char only
+ compare with string folded at match time. */
+ node.setAmbig();
+ } else {
+ node.ensure(Config.ENC_CODE_TO_MBC_MAXLEN);
+ node.end += enc.codeToMbc(to[i], node.bytes, node.end);
+ }
+ }
+
+ ConsAltNode alt = ConsAltNode.newAltNode(node, null);
+
+ if (arg.tail == null) {
+ arg.altRoot = alt;
+ } else {
+ arg.tail.setCdr(alt);
+ }
+ arg.tail = alt;
+ }
+
+ }
+
+ }
+
+ static final ApplyCaseFold INSTANCE = new ApplyCaseFold();
+}
diff --git a/src/org/joni/ApplyCaseFoldArg.java b/src/org/joni/ApplyCaseFoldArg.java
new file mode 100644
index 0000000..92dd2fb
--- /dev/null
+++ b/src/org/joni/ApplyCaseFoldArg.java
@@ -0,0 +1,35 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.ast.CClassNode;
+import org.joni.ast.ConsAltNode;
+
+public final class ApplyCaseFoldArg {
+ final ScanEnvironment env;
+ final CClassNode cc;
+ ConsAltNode altRoot;
+ ConsAltNode tail;
+
+ public ApplyCaseFoldArg(ScanEnvironment env, CClassNode cc) {
+ this.env = env;
+ this.cc = cc;
+ }
+}
diff --git a/src/org/joni/ArrayCompiler.java b/src/org/joni/ArrayCompiler.java
new file mode 100644
index 0000000..f23ce58
--- /dev/null
+++ b/src/org/joni/ArrayCompiler.java
@@ -0,0 +1,1237 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.BitStatus.bsAt;
+import static org.joni.Option.isDynamic;
+import static org.joni.Option.isIgnoreCase;
+import static org.joni.Option.isMultiline;
+import static org.joni.ast.QuantifierNode.isRepeatInfinite;
+
+import org.jcodings.constants.CharacterType;
+import org.joni.ast.AnchorNode;
+import org.joni.ast.BackRefNode;
+import org.joni.ast.CClassNode;
+import org.joni.ast.CTypeNode;
+import org.joni.ast.CallNode;
+import org.joni.ast.ConsAltNode;
+import org.joni.ast.EncloseNode;
+import org.joni.ast.Node;
+import org.joni.ast.QuantifierNode;
+import org.joni.ast.StringNode;
+import org.joni.constants.AnchorType;
+import org.joni.constants.EncloseType;
+import org.joni.constants.NodeType;
+import org.joni.constants.OPCode;
+import org.joni.constants.OPSize;
+import org.joni.constants.TargetInfo;
+
+final class ArrayCompiler extends Compiler {
+
+ ArrayCompiler(Analyser analyser) {
+ super(analyser);
+ }
+
+ @Override
+ protected final void prepare() {
+ regex.code = new int[(analyser.stop - analyser.p) * 2 + 1]; // (+1: empty regex)
+ regex.codeLength = 0;
+ }
+
+ @Override
+ protected final void finish() {
+ addOpcode(OPCode.END);
+ addOpcode(OPCode.FINISH); // for stack bottom
+
+ if (Config.USE_SUBEXP_CALL && analyser.env.unsetAddrList != null) {
+ analyser.env.unsetAddrList.fix(regex);
+ analyser.env.unsetAddrList = null;
+ }
+
+ regex.factory = MatcherFactory.DEFAULT;
+ }
+
+ @Override
+ protected void compileAltNode(ConsAltNode node) {
+ ConsAltNode aln = node;
+ int len = 0;
+
+ do {
+ len += compileLengthTree(aln.car);
+ if (aln.cdr != null) {
+ len += OPSize.PUSH + OPSize.JUMP;
+ }
+ } while ((aln = aln.cdr) != null);
+
+ int pos = regex.codeLength + len; /* goal position */
+
+ aln = node;
+ do {
+ len = compileLengthTree(aln.car);
+ if (aln.cdr != null) {
+ addOpcodeRelAddr(OPCode.PUSH, len + OPSize.JUMP);
+ }
+ compileTree(aln.car);
+ if (aln.cdr != null) {
+ len = pos - (regex.codeLength + OPSize.JUMP);
+ addOpcodeRelAddr(OPCode.JUMP, len);
+ }
+ } while ((aln = aln.cdr) != null);
+ }
+
+ private boolean isNeedStrLenOpExact(int op) {
+ return op == OPCode.EXACTN ||
+ op == OPCode.EXACTMB2N ||
+ op == OPCode.EXACTMB3N ||
+ op == OPCode.EXACTMBN ||
+ op == OPCode.EXACTN_IC ||
+ op == OPCode.EXACTN_IC_SB;
+ }
+
+ private int selectStrOpcode(int mbLength, int strLength, boolean ignoreCase) {
+ int op;
+
+ if (ignoreCase) {
+ switch(strLength) {
+ case 1: op = enc.toLowerCaseTable() != null ? OPCode.EXACT1_IC_SB : OPCode.EXACT1_IC; break;
+ default:op = enc.toLowerCaseTable() != null ? OPCode.EXACTN_IC_SB : OPCode.EXACTN_IC; break;
+ } // switch
+ } else {
+ switch (mbLength) {
+ case 1:
+ switch (strLength) {
+ case 1: op = OPCode.EXACT1; break;
+ case 2: op = OPCode.EXACT2; break;
+ case 3: op = OPCode.EXACT3; break;
+ case 4: op = OPCode.EXACT4; break;
+ case 5: op = OPCode.EXACT5; break;
+ default:op = OPCode.EXACTN; break;
+ } // inner switch
+ break;
+ case 2:
+ switch (strLength) {
+ case 1: op = OPCode.EXACTMB2N1; break;
+ case 2: op = OPCode.EXACTMB2N2; break;
+ case 3: op = OPCode.EXACTMB2N3; break;
+ default:op = OPCode.EXACTMB2N; break;
+ } // inner switch
+ break;
+ case 3:
+ op = OPCode.EXACTMB3N;
+ default:
+ op = OPCode.EXACTMBN;
+ } // switch
+ }
+ return op;
+ }
+
+ private void compileTreeEmptyCheck(Node node, int emptyInfo) {
+ int savedNumNullCheck = regex.numNullCheck;
+
+ if (emptyInfo != 0) {
+ addOpcode(OPCode.NULL_CHECK_START);
+ addMemNum(regex.numNullCheck); /* NULL CHECK ID */
+ regex.numNullCheck++;
+ }
+
+ compileTree(node);
+
+ if (emptyInfo != 0) {
+ switch(emptyInfo) {
+ case TargetInfo.IS_EMPTY:
+ addOpcode(OPCode.NULL_CHECK_END);
+ break;
+ case TargetInfo.IS_EMPTY_MEM:
+ addOpcode(OPCode.NULL_CHECK_END_MEMST);
+ break;
+ case TargetInfo.IS_EMPTY_REC:
+ addOpcode(OPCode.NULL_CHECK_END_MEMST_PUSH);
+ break;
+ } // switch
+
+ addMemNum(savedNumNullCheck); /* NULL CHECK ID */
+ }
+ }
+
+ private int addCompileStringlength(byte[]bytes, int p, int mbLength, int strLength, boolean ignoreCase) {
+ int op = selectStrOpcode(mbLength, strLength, ignoreCase);
+
+ int len = OPSize.OPCODE;
+
+ if (op == OPCode.EXACTMBN) len += OPSize.LENGTH;
+ if (isNeedStrLenOpExact(op)) len += OPSize.LENGTH;
+
+ len += mbLength * strLength;
+ return len;
+ }
+
+ @Override
+ protected final void addCompileString(byte[]bytes, int p, int mbLength, int strLength, boolean ignoreCase) {
+ int op = selectStrOpcode(mbLength, strLength, ignoreCase);
+ addOpcode(op);
+
+ if (op == OPCode.EXACTMBN) addLength(mbLength);
+
+ if (isNeedStrLenOpExact(op)) {
+ if (op == OPCode.EXACTN_IC || op == OPCode.EXACTN_IC_SB) {
+ addLength(mbLength * strLength);
+ } else {
+ addLength(strLength);
+ }
+ }
+ addBytes(bytes, p, mbLength * strLength);
+ }
+
+ private int compileLengthStringNode(Node node) {
+ StringNode sn = (StringNode)node;
+ if (sn.length() <= 0) return 0;
+ boolean ambig = sn.isAmbig();
+
+ int p, prev;
+ p = prev = sn.p;
+ int end = sn.end;
+ byte[]bytes = sn.bytes;
+ int prevLen = enc.length(bytes, p, end);
+ p += prevLen;
+
+ int slen = 1;
+ int rlen = 0;
+
+ while (p < end) {
+ int len = enc.length(bytes, p, end);
+ if (len == prevLen) {
+ slen++;
+ } else {
+ int r = addCompileStringlength(bytes, prev, prevLen, slen, ambig);
+ rlen += r;
+ prev = p;
+ slen = 1;
+ prevLen = len;
+ }
+ p += len;
+ }
+ int r = addCompileStringlength(bytes, prev, prevLen, slen, ambig);
+ rlen += r;
+ return rlen;
+ }
+
+ private int compileLengthStringRawNode(StringNode sn) {
+ if (sn.length() <= 0) return 0;
+ return addCompileStringlength(sn.bytes, sn.p, 1 /*sb*/, sn.length(), false);
+ }
+
+ private void addMultiByteCClass(CodeRangeBuffer mbuf) {
+ addLength(mbuf.used);
+ addInts(mbuf.p, mbuf.used);
+ }
+
+ private int compileLengthCClassNode(CClassNode cc) {
+ if (cc.isShare()) return OPSize.OPCODE + OPSize.POINTER;
+
+ int len;
+ if (cc.mbuf == null) {
+ len = OPSize.OPCODE + BitSet.BITSET_SIZE;
+ } else {
+ if (enc.minLength() > 1 || cc.bs.isEmpty()) {
+ len = OPSize.OPCODE;
+ } else {
+ len = OPSize.OPCODE + BitSet.BITSET_SIZE;
+ }
+
+ len += OPSize.LENGTH + cc.mbuf.used;
+ }
+ return len;
+ }
+
+ @Override
+ protected void compileCClassNode(CClassNode cc) {
+ if (cc.isShare()) { // shared char class
+ addOpcode(OPCode.CCLASS_NODE);
+ addPointer(cc);
+ return;
+ }
+
+ if (cc.mbuf == null) {
+ if (cc.isNot()) {
+ addOpcode(enc.isSingleByte() ? OPCode.CCLASS_NOT_SB : OPCode.CCLASS_NOT);
+ } else {
+ addOpcode(enc.isSingleByte() ? OPCode.CCLASS_SB : OPCode.CCLASS);
+ }
+ addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset
+ } else {
+ if (enc.minLength() > 1 || cc.bs.isEmpty()) {
+ if (cc.isNot()) {
+ addOpcode(OPCode.CCLASS_MB_NOT);
+ } else {
+ addOpcode(OPCode.CCLASS_MB);
+ }
+ addMultiByteCClass(cc.mbuf);
+ } else {
+ if (cc.isNot()) {
+ addOpcode(OPCode.CCLASS_MIX_NOT);
+ } else {
+ addOpcode(OPCode.CCLASS_MIX);
+ }
+ // store the bit set and mbuf themself!
+ addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset
+ addMultiByteCClass(cc.mbuf);
+ }
+ }
+ }
+
+ @Override
+ protected void compileCTypeNode(CTypeNode node) {
+ CTypeNode cn = node;
+ int op;
+ switch (cn.ctype) {
+ case CharacterType.WORD:
+ if (cn.not) {
+ op = enc.isSingleByte() ? OPCode.NOT_WORD_SB : OPCode.NOT_WORD;
+ } else {
+ op = enc.isSingleByte() ? OPCode.WORD_SB : OPCode.WORD;
+ }
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ return; // not reached
+ } // inner switch
+ addOpcode(op);
+ }
+
+ @Override
+ protected void compileAnyCharNode() {
+ if (isMultiline(regex.options)) {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_SB : OPCode.ANYCHAR_ML);
+ } else {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_SB : OPCode.ANYCHAR);
+ }
+ }
+
+ @Override
+ protected void compileCallNode(CallNode node) {
+ addOpcode(OPCode.CALL);
+ node.unsetAddrList.add(regex.codeLength, node.target);
+ addAbsAddr(0); /*dummy addr.*/
+ }
+
+ @Override
+ protected void compileBackrefNode(BackRefNode node) {
+ BackRefNode br = node;
+ if (Config.USE_BACKREF_WITH_LEVEL && br.isNestLevel()) {
+ addOpcode(OPCode.BACKREF_WITH_LEVEL);
+ addOption(regex.options & Option.IGNORECASE);
+ addLength(br.nestLevel);
+ // !goto add_bacref_mems;!
+ addLength(br.backNum);
+ for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]);
+ return;
+ } else { // USE_BACKREF_AT_LEVEL
+ if (br.backNum == 1) {
+ if (isIgnoreCase(regex.options)) {
+ addOpcode(OPCode.BACKREFN_IC);
+ addMemNum(br.back[0]);
+ } else {
+ switch (br.back[0]) {
+ case 1:
+ addOpcode(OPCode.BACKREF1);
+ break;
+ case 2:
+ addOpcode(OPCode.BACKREF2);
+ break;
+ default:
+ addOpcode(OPCode.BACKREFN);
+ addOpcode(br.back[0]);
+ break;
+ } // switch
+ }
+ } else {
+ if (isIgnoreCase(regex.options)) {
+ addOpcode(OPCode.BACKREF_MULTI_IC);
+ } else {
+ addOpcode(OPCode.BACKREF_MULTI);
+ }
+ // !add_bacref_mems:!
+ addLength(br.backNum);
+ for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]);
+ }
+ }
+ }
+
+ private static final int REPEAT_RANGE_ALLOC = 8;
+ private void entryRepeatRange(int id, int lower, int upper) {
+ if (regex.repeatRangeLo == null) {
+ regex.repeatRangeLo = new int[REPEAT_RANGE_ALLOC];
+ regex.repeatRangeHi = new int[REPEAT_RANGE_ALLOC];
+ } else if (id >= regex.repeatRangeLo.length){
+ int[]tmp = new int[regex.repeatRangeLo.length + REPEAT_RANGE_ALLOC];
+ System.arraycopy(regex.repeatRangeLo, 0, tmp, 0, regex.repeatRangeLo.length);
+ regex.repeatRangeLo = tmp;
+ tmp = new int[regex.repeatRangeHi.length + REPEAT_RANGE_ALLOC];
+ System.arraycopy(regex.repeatRangeHi, 0, tmp, 0, regex.repeatRangeHi.length);
+ regex.repeatRangeHi = tmp;
+ }
+
+ regex.repeatRangeLo[id] = lower;
+ regex.repeatRangeHi[id] = isRepeatInfinite(upper) ? 0x7fffffff : upper;
+ }
+
+ private void compileRangeRepeatNode(QuantifierNode qn, int targetLen, int emptyInfo) {
+ int numRepeat = regex.numRepeat;
+ addOpcode(qn.greedy ? OPCode.REPEAT : OPCode.REPEAT_NG);
+ addMemNum(numRepeat); /* OP_REPEAT ID */
+ regex.numRepeat++;
+ addRelAddr(targetLen + OPSize.REPEAT_INC);
+
+ entryRepeatRange(numRepeat, qn.lower, qn.upper);
+
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+
+ if ((Config.USE_SUBEXP_CALL && regex.numCall > 0) || qn.isInRepeat()) {
+ addOpcode(qn.greedy ? OPCode.REPEAT_INC_SG : OPCode.REPEAT_INC_NG_SG);
+ } else {
+ addOpcode(qn.greedy ? OPCode.REPEAT_INC : OPCode.REPEAT_INC_NG);
+ }
+
+ addMemNum(numRepeat); /* OP_REPEAT ID */
+ }
+
+ private static final int QUANTIFIER_EXPAND_LIMIT_SIZE = 50; // was 50
+
+ private static boolean cknOn(int ckn) {
+ return ckn > 0;
+ }
+
+ private int compileCECLengthQuantifierNode(QuantifierNode qn) {
+ boolean infinite = isRepeatInfinite(qn.upper);
+ int emptyInfo = qn.targetEmptyInfo;
+
+ int tlen = compileLengthTree(qn.target);
+ int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0;
+ int cklen = cknOn(ckn) ? OPSize.STATE_CHECK_NUM : 0;
+
+ /* anychar repeat */
+ if (qn.target.getType() == NodeType.CANY) {
+ if (qn.greedy && infinite) {
+ if (qn.nextHeadExact != null && !cknOn(ckn)) {
+ return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower + cklen;
+ } else {
+ return OPSize.ANYCHAR_STAR + tlen * qn.lower + cklen;
+ }
+ }
+ }
+
+ int modTLen;
+ if (emptyInfo != 0) {
+ modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
+ } else {
+ modTLen = tlen;
+ }
+
+ int len;
+ if (infinite && qn.lower <= 1) {
+ if (qn.greedy) {
+ if (qn.lower == 1) {
+ len = OPSize.JUMP;
+ } else {
+ len = 0;
+ }
+ len += OPSize.PUSH + cklen + modTLen + OPSize.JUMP;
+ } else {
+ if (qn.lower == 0) {
+ len = OPSize.JUMP;
+ } else {
+ len = 0;
+ }
+ len += modTLen + OPSize.PUSH + cklen;
+ }
+ } else if (qn.upper == 0) {
+ if (qn.isRefered) { /* /(?<n>..){0}/ */
+ len = OPSize.JUMP + tlen;
+ } else {
+ len = 0;
+ }
+ } else if (qn.upper == 1 && qn.greedy) {
+ if (qn.lower == 0) {
+ if (cknOn(ckn)) {
+ len = OPSize.STATE_CHECK_PUSH + tlen;
+ } else {
+ len = OPSize.PUSH + tlen;
+ }
+ } else {
+ len = tlen;
+ }
+ } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */
+ len = OPSize.PUSH + cklen + OPSize.JUMP + tlen;
+ } else {
+ len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM;
+
+ if (cknOn(ckn)) {
+ len += OPSize.STATE_CHECK;
+ }
+ }
+ return len;
+ }
+
+ @Override
+ protected void compileCECQuantifierNode(QuantifierNode qn) {
+ boolean infinite = isRepeatInfinite(qn.upper);
+ int emptyInfo = qn.targetEmptyInfo;
+
+ int tlen = compileLengthTree(qn.target);
+
+ int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0;
+
+ if (qn.isAnyCharStar()) {
+ compileTreeNTimes(qn.target, qn.lower);
+ if (qn.nextHeadExact != null && !cknOn(ckn)) {
+ if (isMultiline(regex.options)) {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB : OPCode.ANYCHAR_ML_STAR_PEEK_NEXT);
+ } else {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_STAR_PEEK_NEXT_SB : OPCode.ANYCHAR_STAR_PEEK_NEXT);
+ }
+ if (cknOn(ckn)) {
+ addStateCheckNum(ckn);
+ }
+ StringNode sn = (StringNode)qn.nextHeadExact;
+ addBytes(sn.bytes, sn.p, 1);
+ return;
+ } else {
+ if (isMultiline(regex.options)) {
+ if (cknOn(ckn)) {
+ addOpcode(enc.isSingleByte() ? OPCode.STATE_CHECK_ANYCHAR_ML_STAR_SB : OPCode.STATE_CHECK_ANYCHAR_ML_STAR);
+ } else {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_STAR_SB : OPCode.ANYCHAR_ML_STAR);
+ }
+ } else {
+ if (cknOn(ckn)) {
+ addOpcode(enc.isSingleByte() ? OPCode.STATE_CHECK_ANYCHAR_STAR_SB : OPCode.STATE_CHECK_ANYCHAR_STAR);
+ } else {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_STAR_SB : OPCode.ANYCHAR_STAR);
+ }
+ }
+ if (cknOn(ckn)) {
+ addStateCheckNum(ckn);
+ }
+ return;
+ }
+ }
+
+ int modTLen;
+ if (emptyInfo != 0) {
+ modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
+ } else {
+ modTLen = tlen;
+ }
+ if (infinite && qn.lower <= 1) {
+ if (qn.greedy) {
+ if (qn.lower == 1) {
+ addOpcodeRelAddr(OPCode.JUMP, cknOn(ckn) ? OPSize.STATE_CHECK_PUSH :
+ OPSize.PUSH);
+ }
+ if (cknOn(ckn)) {
+ addOpcode(OPCode.STATE_CHECK_PUSH);
+ addStateCheckNum(ckn);
+ addRelAddr(modTLen + OPSize.JUMP);
+ } else {
+ addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP);
+ }
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + (cknOn(ckn) ?
+ OPSize.STATE_CHECK_PUSH :
+ OPSize.PUSH)));
+ } else {
+ if (qn.lower == 0) {
+ addOpcodeRelAddr(OPCode.JUMP, modTLen);
+ }
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ if (cknOn(ckn)) {
+ addOpcode(OPCode.STATE_CHECK_PUSH_OR_JUMP);
+ addStateCheckNum(ckn);
+ addRelAddr(-(modTLen + OPSize.STATE_CHECK_PUSH_OR_JUMP));
+ } else {
+ addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH));
+ }
+ }
+ } else if (qn.upper == 0) {
+ if (qn.isRefered) { /* /(?<n>..){0}/ */
+ addOpcodeRelAddr(OPCode.JUMP, tlen);
+ compileTree(qn.target);
+ } // else r=0 ???
+ } else if (qn.upper == 1 && qn.greedy) {
+ if (qn.lower == 0) {
+ if (cknOn(ckn)) {
+ addOpcode(OPCode.STATE_CHECK_PUSH);
+ addStateCheckNum(ckn);
+ addRelAddr(tlen);
+ } else {
+ addOpcodeRelAddr(OPCode.PUSH, tlen);
+ }
+ }
+ compileTree(qn.target);
+ } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0){ /* '??' */
+ if (cknOn(ckn)) {
+ addOpcode(OPCode.STATE_CHECK_PUSH);
+ addStateCheckNum(ckn);
+ addRelAddr(OPSize.JUMP);
+ } else {
+ addOpcodeRelAddr(OPCode.PUSH, OPSize.JUMP);
+ }
+
+ addOpcodeRelAddr(OPCode.JUMP, tlen);
+ compileTree(qn.target);
+ } else {
+ compileRangeRepeatNode(qn, modTLen, emptyInfo);
+ if (cknOn(ckn)) {
+ addOpcode(OPCode.STATE_CHECK);
+ addStateCheckNum(ckn);
+ }
+ }
+ }
+
+ private int compileNonCECLengthQuantifierNode(QuantifierNode qn) {
+ boolean infinite = isRepeatInfinite(qn.upper);
+ int emptyInfo = qn.targetEmptyInfo;
+
+ int tlen = compileLengthTree(qn.target);
+
+ /* anychar repeat */
+ if (qn.target.getType() == NodeType.CANY) {
+ if (qn.greedy && infinite) {
+ if (qn.nextHeadExact != null) {
+ return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower;
+ } else {
+ return OPSize.ANYCHAR_STAR + tlen * qn.lower;
+ }
+ }
+ }
+
+ int modTLen = 0;
+ if (emptyInfo != 0) {
+ modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
+ } else {
+ modTLen = tlen;
+ }
+
+ int len;
+ if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
+ len = OPSize.JUMP;
+ } else {
+ len = tlen * qn.lower;
+ }
+
+ if (qn.greedy) {
+ if (qn.headExact != null) {
+ len += OPSize.PUSH_OR_JUMP_EXACT1 + modTLen + OPSize.JUMP;
+ } else if (qn.nextHeadExact != null) {
+ len += OPSize.PUSH_IF_PEEK_NEXT + modTLen + OPSize.JUMP;
+ } else {
+ len += OPSize.PUSH + modTLen + OPSize.JUMP;
+ }
+ } else {
+ len += OPSize.JUMP + modTLen + OPSize.PUSH;
+ }
+
+ } else if (qn.upper == 0 && qn.isRefered) { /* /(?<n>..){0}/ */
+ len = OPSize.JUMP + tlen;
+ } else if (!infinite && qn.greedy &&
+ (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE )) {
+ len = tlen * qn.lower;
+ len += (OPSize.PUSH + tlen) * (qn.upper - qn.lower);
+ } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */
+ len = OPSize.PUSH + OPSize.JUMP + tlen;
+ } else {
+ len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM;
+ }
+ return len;
+ }
+
+ @Override
+ protected void compileNonCECQuantifierNode(QuantifierNode qn) {
+ boolean infinite = isRepeatInfinite(qn.upper);
+ int emptyInfo = qn.targetEmptyInfo;
+
+ int tlen = compileLengthTree(qn.target);
+
+ if (qn.isAnyCharStar()) {
+ compileTreeNTimes(qn.target, qn.lower);
+ if (qn.nextHeadExact != null) {
+ if (isMultiline(regex.options)) {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB : OPCode.ANYCHAR_ML_STAR_PEEK_NEXT);
+ } else {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_STAR_PEEK_NEXT_SB : OPCode.ANYCHAR_STAR_PEEK_NEXT);
+ }
+ StringNode sn = (StringNode)qn.nextHeadExact;
+ addBytes(sn.bytes, sn.p, 1);
+ return;
+ } else {
+ if (isMultiline(regex.options)) {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_STAR_SB : OPCode.ANYCHAR_ML_STAR);
+ } else {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_STAR_SB : OPCode.ANYCHAR_STAR);
+ }
+ return;
+ }
+ }
+
+ int modTLen;
+ if (emptyInfo != 0) {
+ modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
+ } else {
+ modTLen = tlen;
+ }
+ if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
+ if (qn.greedy) {
+ if (qn.headExact != null) {
+ addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_OR_JUMP_EXACT1);
+ } else if (qn.nextHeadExact != null) {
+ addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_IF_PEEK_NEXT);
+ } else {
+ addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH);
+ }
+ } else {
+ addOpcodeRelAddr(OPCode.JUMP, OPSize.JUMP);
+ }
+ } else {
+ compileTreeNTimes(qn.target, qn.lower);
+ }
+
+ if (qn.greedy) {
+ if (qn.headExact != null) {
+ addOpcodeRelAddr(OPCode.PUSH_OR_JUMP_EXACT1, modTLen + OPSize.JUMP);
+ StringNode sn = (StringNode)qn.headExact;
+ addBytes(sn.bytes, sn.p, 1);
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_OR_JUMP_EXACT1));
+ } else if (qn.nextHeadExact != null) {
+ addOpcodeRelAddr(OPCode.PUSH_IF_PEEK_NEXT, modTLen + OPSize.JUMP);
+ StringNode sn = (StringNode)qn.nextHeadExact;
+ addBytes(sn.bytes, sn.p, 1);
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_IF_PEEK_NEXT));
+ } else {
+ addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP);
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH));
+ }
+ } else {
+ addOpcodeRelAddr(OPCode.JUMP, modTLen);
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH));
+ }
+ } else if (qn.upper == 0 && qn.isRefered) { /* /(?<n>..){0}/ */
+ addOpcodeRelAddr(OPCode.JUMP, tlen);
+ compileTree(qn.target);
+ } else if (!infinite && qn.greedy &&
+ (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ int n = qn.upper - qn.lower;
+ compileTreeNTimes(qn.target, qn.lower);
+
+ for (int i=0; i<n; i++) {
+ addOpcodeRelAddr(OPCode.PUSH, (n - i) * tlen + (n - i - 1) * OPSize.PUSH);
+ compileTree(qn.target);
+ }
+ } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */
+ addOpcodeRelAddr(OPCode.PUSH, OPSize.JUMP);
+ addOpcodeRelAddr(OPCode.JUMP, tlen);
+ compileTree(qn.target);
+ } else {
+ compileRangeRepeatNode(qn, modTLen, emptyInfo);
+ }
+ }
+
+ private int compileLengthOptionNode(EncloseNode node) {
+ int prev = regex.options;
+ regex.options = node.option;
+ int tlen = compileLengthTree(node.target);
+ regex.options = prev;
+
+ if (isDynamic(prev ^ node.option)) {
+ return OPSize.SET_OPTION_PUSH + OPSize.SET_OPTION + OPSize.FAIL + tlen + OPSize.SET_OPTION;
+ } else {
+ return tlen;
+ }
+ }
+
+ @Override
+ protected void compileOptionNode(EncloseNode node) {
+ int prev = regex.options;
+
+ if (isDynamic(prev ^ node.option)) {
+ addOpcodeOption(OPCode.SET_OPTION_PUSH, node.option);
+ addOpcodeOption(OPCode.SET_OPTION, prev);
+ addOpcode(OPCode.FAIL);
+ }
+
+ regex.options = node.option;
+ compileTree(node.target);
+ regex.options = prev;
+
+ if (isDynamic(prev ^ node.option)) {
+ addOpcodeOption(OPCode.SET_OPTION, prev);
+ }
+ }
+
+ private int compileLengthEncloseNode(EncloseNode node) {
+ if (node.isOption()) {
+ return compileLengthOptionNode(node);
+ }
+
+ int tlen;
+ if (node.target != null) {
+ tlen = compileLengthTree(node.target);
+ } else {
+ tlen = 0;
+ }
+
+ int len;
+ switch (node.type) {
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL && node.isCalled()) {
+ len = OPSize.MEMORY_START_PUSH + tlen + OPSize.CALL + OPSize.JUMP + OPSize.RETURN;
+ if (bsAt(regex.btMemEnd, node.regNum)) {
+ len += node.isRecursion() ? OPSize.MEMORY_END_PUSH_REC : OPSize.MEMORY_END_PUSH;
+ } else {
+ len += node.isRecursion() ? OPSize.MEMORY_END_REC : OPSize.MEMORY_END;
+ }
+ } else { // USE_SUBEXP_CALL
+ if (bsAt(regex.btMemStart, node.regNum)) {
+ len = OPSize.MEMORY_START_PUSH;
+ } else {
+ len = OPSize.MEMORY_START;
+ }
+ len += tlen + (bsAt(regex.btMemEnd, node.regNum) ? OPSize.MEMORY_END_PUSH : OPSize.MEMORY_END);
+ }
+ break;
+
+ case EncloseType.STOP_BACKTRACK:
+ if (node.isStopBtSimpleRepeat()) {
+ QuantifierNode qn = (QuantifierNode)node.target;
+ tlen = compileLengthTree(qn.target);
+ len = tlen * qn.lower + OPSize.PUSH + tlen + OPSize.POP + OPSize.JUMP;
+ } else {
+ len = OPSize.PUSH_STOP_BT + tlen + OPSize.POP_STOP_BT;
+ }
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ return 0; // not reached
+ } // switch
+ return len;
+ }
+
+ @Override
+ protected void compileEncloseNode(EncloseNode node) {
+ int len;
+ switch (node.type) {
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL) {
+ if (node.isCalled()) {
+ addOpcode(OPCode.CALL);
+ node.callAddr = regex.codeLength + OPSize.ABSADDR + OPSize.JUMP;
+ node.setAddrFixed();
+ addAbsAddr(node.callAddr);
+ len = compileLengthTree(node.target);
+ len += OPSize.MEMORY_START_PUSH + OPSize.RETURN;
+ if (bsAt(regex.btMemEnd, node.regNum)) {
+ len += node.isRecursion() ? OPSize.MEMORY_END_PUSH_REC : OPSize.MEMORY_END_PUSH;
+ } else {
+ len += node.isRecursion() ? OPSize.MEMORY_END_REC : OPSize.MEMORY_END;
+ }
+ addOpcodeRelAddr(OPCode.JUMP, len);
+ }
+ } // USE_SUBEXP_CALL
+
+ if (bsAt(regex.btMemStart, node.regNum)) {
+ addOpcode(OPCode.MEMORY_START_PUSH);
+ } else {
+ addOpcode(OPCode.MEMORY_START);
+ }
+
+ addMemNum(node.regNum);
+ compileTree(node.target);
+
+ if (Config.USE_SUBEXP_CALL && node.isCalled()) {
+ if (bsAt(regex.btMemEnd, node.regNum)) {
+ addOpcode(node.isRecursion() ? OPCode.MEMORY_END_PUSH_REC : OPCode.MEMORY_END_PUSH);
+ } else {
+ addOpcode(node.isRecursion() ? OPCode.MEMORY_END_REC : OPCode.MEMORY_END);
+ }
+ addMemNum(node.regNum);
+ addOpcode(OPCode.RETURN);
+ } else { // USE_SUBEXP_CALL
+ if (bsAt(regex.btMemEnd, node.regNum)) {
+ addOpcode(OPCode.MEMORY_END_PUSH);
+ } else {
+ addOpcode(OPCode.MEMORY_END);
+ }
+ addMemNum(node.regNum);
+ }
+ break;
+
+ case EncloseType.STOP_BACKTRACK:
+ if (node.isStopBtSimpleRepeat()) {
+ QuantifierNode qn = (QuantifierNode)node.target;
+
+ compileTreeNTimes(qn.target, qn.lower);
+
+ len = compileLengthTree(qn.target);
+ addOpcodeRelAddr(OPCode.PUSH, len + OPSize.POP + OPSize.JUMP);
+ compileTree(qn.target);
+ addOpcode(OPCode.POP);
+ addOpcodeRelAddr(OPCode.JUMP, -(OPSize.PUSH + len + OPSize.POP + OPSize.JUMP));
+ } else {
+ addOpcode(OPCode.PUSH_STOP_BT);
+ compileTree(node.target);
+ addOpcode(OPCode.POP_STOP_BT);
+ }
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ break;
+ } // switch
+ }
+
+ private int compileLengthAnchorNode(AnchorNode node) {
+ int tlen;
+ if (node.target != null) {
+ tlen = compileLengthTree(node.target);
+ } else {
+ tlen = 0;
+ }
+
+ int len;
+ switch (node.type) {
+ case AnchorType.PREC_READ:
+ len = OPSize.PUSH_POS + tlen + OPSize.POP_POS;
+ break;
+
+ case AnchorType.PREC_READ_NOT:
+ len = OPSize.PUSH_POS_NOT + tlen + OPSize.FAIL_POS;
+ break;
+
+ case AnchorType.LOOK_BEHIND:
+ len = OPSize.LOOK_BEHIND + tlen;
+ break;
+
+ case AnchorType.LOOK_BEHIND_NOT:
+ len = OPSize.PUSH_LOOK_BEHIND_NOT + tlen + OPSize.FAIL_LOOK_BEHIND_NOT;
+ break;
+
+ default:
+ len = OPSize.OPCODE;
+ break;
+ } // switch
+ return len;
+ }
+
+ @Override
+ protected void compileAnchorNode(AnchorNode node) {
+ int len;
+ int n;
+
+ switch (node.type) {
+ case AnchorType.BEGIN_BUF: addOpcode(OPCode.BEGIN_BUF); break;
+ case AnchorType.END_BUF: addOpcode(OPCode.END_BUF); break;
+ case AnchorType.BEGIN_LINE: addOpcode(OPCode.BEGIN_LINE); break;
+ case AnchorType.END_LINE: addOpcode(OPCode.END_LINE); break;
+ case AnchorType.SEMI_END_BUF: addOpcode(OPCode.SEMI_END_BUF); break;
+ case AnchorType.BEGIN_POSITION: addOpcode(OPCode.BEGIN_POSITION); break;
+
+ case AnchorType.WORD_BOUND:
+ addOpcode(enc.isSingleByte() ? OPCode.WORD_BOUND_SB : OPCode.WORD_BOUND);
+ break;
+
+ case AnchorType.NOT_WORD_BOUND:
+ addOpcode(enc.isSingleByte() ? OPCode.NOT_WORD_BOUND_SB : OPCode.NOT_WORD_BOUND);
+ break;
+
+ case AnchorType.WORD_BEGIN:
+ if (Config.USE_WORD_BEGIN_END)
+ addOpcode(enc.isSingleByte() ? OPCode.WORD_BEGIN_SB : OPCode.WORD_BEGIN);
+ break;
+
+ case AnchorType.WORD_END:
+ if (Config.USE_WORD_BEGIN_END)
+ addOpcode(enc.isSingleByte() ? OPCode.WORD_END_SB : OPCode.WORD_END);
+ break;
+
+ case AnchorType.PREC_READ:
+ addOpcode(OPCode.PUSH_POS);
+ compileTree(node.target);
+ addOpcode(OPCode.POP_POS);
+ break;
+
+ case AnchorType.PREC_READ_NOT:
+ len = compileLengthTree(node.target);
+ addOpcodeRelAddr(OPCode.PUSH_POS_NOT, len + OPSize.FAIL_POS);
+ compileTree(node.target);
+ addOpcode(OPCode.FAIL_POS);
+ break;
+
+ case AnchorType.LOOK_BEHIND:
+ addOpcode(enc.isSingleByte() ? OPCode.LOOK_BEHIND_SB : OPCode.LOOK_BEHIND);
+ if (node.charLength < 0) {
+ n = analyser.getCharLengthTree(node.target);
+ if (analyser.returnCode != 0) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+ } else {
+ n = node.charLength;
+ }
+ addLength(n);
+ compileTree(node.target);
+ break;
+
+ case AnchorType.LOOK_BEHIND_NOT:
+ len = compileLengthTree(node.target);
+ addOpcodeRelAddr(OPCode.PUSH_LOOK_BEHIND_NOT, len + OPSize.FAIL_LOOK_BEHIND_NOT);
+ if (node.charLength < 0) {
+ n = analyser.getCharLengthTree(node.target);
+ if (analyser.returnCode != 0) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+ } else {
+ n = node.charLength;
+ }
+ addLength(n);
+ compileTree(node.target);
+ addOpcode(OPCode.FAIL_LOOK_BEHIND_NOT);
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ } // switch
+ }
+
+ private int compileLengthTree(Node node) {
+ int len = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode lin = (ConsAltNode)node;
+ do {
+ len += compileLengthTree(lin.car);
+ } while ((lin = lin.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode aln = (ConsAltNode)node;
+ int n = 0;
+ do {
+ len += compileLengthTree(aln.car);
+ n++;
+ } while ((aln = aln.cdr) != null);
+ len += (OPSize.PUSH + OPSize.JUMP) * (n - 1);
+ break;
+
+ case NodeType.STR:
+ StringNode sn = (StringNode)node;
+ if (sn.isRaw()) {
+ len = compileLengthStringRawNode(sn);
+ } else {
+ len = compileLengthStringNode(sn);
+ }
+ break;
+
+ case NodeType.CCLASS:
+ len = compileLengthCClassNode((CClassNode)node);
+ break;
+
+ case NodeType.CTYPE:
+ case NodeType.CANY:
+ len = OPSize.OPCODE;
+ break;
+
+ case NodeType.BREF:
+ BackRefNode br = (BackRefNode)node;
+
+ if (Config.USE_BACKREF_WITH_LEVEL && br.isNestLevel()) {
+ len = OPSize.OPCODE + OPSize.OPTION + OPSize.LENGTH +
+ OPSize.LENGTH + (OPSize.MEMNUM * br.backNum);
+ } else { // USE_BACKREF_AT_LEVEL
+ if (br.backNum == 1) {
+ len = ((!isIgnoreCase(regex.options) && br.back[0] <= 2)
+ ? OPSize.OPCODE : (OPSize.OPCODE + OPSize.MEMNUM));
+ } else {
+ len = OPSize.OPCODE + OPSize.LENGTH + (OPSize.MEMNUM * br.backNum);
+ }
+ }
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ len = OPSize.CALL;
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.QTFR:
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ len = compileCECLengthQuantifierNode((QuantifierNode)node);
+ } else {
+ len = compileNonCECLengthQuantifierNode((QuantifierNode)node);
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ len = compileLengthEncloseNode((EncloseNode)node);
+ break;
+
+ case NodeType.ANCHOR:
+ len = compileLengthAnchorNode((AnchorNode)node);
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+
+ } //switch
+ return len;
+ }
+
+ private void ensure(int size) {
+ if (size >= regex.code.length) {
+ int length = regex.code.length << 1;
+ while (length <= size) length <<= 1;
+ int[]tmp = new int[length];
+ System.arraycopy(regex.code, 0, tmp, 0, regex.code.length);
+ regex.code = tmp;
+ }
+ }
+
+ private void addInt(int i) {
+ if (regex.codeLength >= regex.code.length) {
+ int[]tmp = new int[regex.code.length << 1];
+ System.arraycopy(regex.code, 0, tmp, 0, regex.code.length);
+ regex.code = tmp;
+ }
+ regex.code[regex.codeLength++] = i;
+ }
+
+ void setInt(int i, int offset) {
+ ensure(offset);
+ regex.code[offset] = i;
+ }
+
+ private void addObject(Object o) {
+ if (regex.operands == null) {
+ regex.operands = new Object[4];
+ } else if (regex.operandLength >= regex.operands.length) {
+ Object[]tmp = new Object[regex.operands.length << 1];
+ System.arraycopy(regex.operands, 0, tmp, 0, regex.operands.length);
+ regex.operands = tmp;
+ }
+ addInt(regex.operandLength);
+ regex.operands[regex.operandLength++] = o;
+ }
+
+ private void addBytes(byte[]bytes, int p ,int length) {
+ ensure(regex.codeLength + length);
+ int end = p + length;
+
+ while (p < end) regex.code[regex.codeLength++] = bytes[p++];
+ }
+
+ private void addInts(int[]ints, int length) {
+ ensure(regex.codeLength + length);
+ System.arraycopy(ints, 0, regex.code, regex.codeLength, length);
+ regex.codeLength += length;
+ }
+
+ private void addOpcode(int opcode) {
+ addInt(opcode);
+
+ switch(opcode) {
+ case OPCode.ANYCHAR_STAR:
+ case OPCode.ANYCHAR_STAR_SB:
+ case OPCode.ANYCHAR_ML_STAR:
+ case OPCode.ANYCHAR_ML_STAR_SB:
+ case OPCode.ANYCHAR_STAR_PEEK_NEXT:
+ case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB:
+ case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
+ case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB:
+ case OPCode.STATE_CHECK_ANYCHAR_STAR:
+ case OPCode.STATE_CHECK_ANYCHAR_STAR_SB:
+ case OPCode.STATE_CHECK_ANYCHAR_ML_STAR:
+ case OPCode.MEMORY_START_PUSH:
+ case OPCode.MEMORY_END_PUSH:
+ case OPCode.MEMORY_END_PUSH_REC:
+ case OPCode.MEMORY_END_REC:
+ case OPCode.NULL_CHECK_START:
+ case OPCode.NULL_CHECK_END_MEMST_PUSH:
+ case OPCode.PUSH:
+ case OPCode.STATE_CHECK_PUSH:
+ case OPCode.STATE_CHECK_PUSH_OR_JUMP:
+ case OPCode.STATE_CHECK:
+ case OPCode.PUSH_OR_JUMP_EXACT1:
+ case OPCode.PUSH_IF_PEEK_NEXT:
+ case OPCode.REPEAT:
+ case OPCode.REPEAT_NG:
+ case OPCode.REPEAT_INC_SG:
+ case OPCode.REPEAT_INC_NG:
+ case OPCode.REPEAT_INC_NG_SG:
+ case OPCode.PUSH_POS:
+ case OPCode.PUSH_POS_NOT:
+ case OPCode.PUSH_STOP_BT:
+ case OPCode.PUSH_LOOK_BEHIND_NOT:
+ case OPCode.CALL:
+ case OPCode.RETURN: // it will appear only with CALL though
+ regex.stackNeeded = true;
+ }
+ }
+
+ private void addStateCheckNum(int num) {
+ addInt(num);
+ }
+
+ private void addRelAddr(int addr) {
+ addInt(addr);
+ }
+
+ private void addAbsAddr(int addr) {
+ addInt(addr);
+ }
+
+ private void addLength(int length) {
+ addInt(length);
+ }
+
+ private void addMemNum(int num) {
+ addInt(num);
+ }
+
+ private void addPointer(Object o) {
+ addObject(o);
+ }
+
+ private void addOption(int option) {
+ addInt(option);
+ }
+
+ private void addOpcodeRelAddr(int opcode, int addr) {
+ addOpcode(opcode);
+ addRelAddr(addr);
+ }
+
+ private void addOpcodeOption(int opcode, int option) {
+ addOpcode(opcode);
+ addOption(option);
+ }
+}
diff --git a/src/org/joni/AsmCompiler.java b/src/org/joni/AsmCompiler.java
new file mode 100644
index 0000000..433d324
--- /dev/null
+++ b/src/org/joni/AsmCompiler.java
@@ -0,0 +1,109 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.ast.AnchorNode;
+import org.joni.ast.BackRefNode;
+import org.joni.ast.CClassNode;
+import org.joni.ast.CTypeNode;
+import org.joni.ast.CallNode;
+import org.joni.ast.ConsAltNode;
+import org.joni.ast.EncloseNode;
+import org.joni.ast.QuantifierNode;
+
+final class AsmCompiler extends AsmCompilerSupport {
+
+ public AsmCompiler(Analyser analyser) {
+ super(analyser);
+ }
+
+ @Override
+ protected void prepare() {
+ REG_NUM++;
+ prepareMachine();
+ prepareMachineInit();
+ prepareMachineMatch();
+
+ prepareFactory();
+ prepareFactoryInit();
+ }
+
+ @Override
+ protected void finish() {
+ setupFactoryInit();
+
+ setupMachineInit();
+ setupMachineMatch();
+
+ setupClasses();
+ }
+
+ @Override
+ protected void compileAltNode(ConsAltNode node) {
+ }
+
+ @Override
+ protected void addCompileString(byte[]bytes, int p, int mbLength, int strLength, boolean ignoreCase) {
+ String template = installTemplate(bytes, p, strLength);
+ }
+
+ @Override
+ protected void compileCClassNode(CClassNode node) {
+ if (node.bs != null) {
+ String bitsetName = installBitSet(node.bs.bits);
+ }
+ }
+
+ @Override
+ protected void compileCTypeNode(CTypeNode node) {
+ }
+
+ @Override
+ protected void compileAnyCharNode() {
+ }
+
+ @Override
+ protected void compileBackrefNode(BackRefNode node) {
+ }
+
+ @Override
+ protected void compileCallNode(CallNode node) {
+ }
+
+ @Override
+ protected void compileCECQuantifierNode(QuantifierNode node) {
+ }
+
+ @Override
+ protected void compileNonCECQuantifierNode(QuantifierNode node) {
+ }
+
+ @Override
+ protected void compileOptionNode(EncloseNode node) {
+ }
+
+ @Override
+ protected void compileEncloseNode(EncloseNode node) {
+ }
+
+ @Override
+ protected void compileAnchorNode(AnchorNode node) {
+ }
+}
diff --git a/src/org/joni/AsmCompilerSupport.java b/src/org/joni/AsmCompilerSupport.java
new file mode 100644
index 0000000..217aa8b
--- /dev/null
+++ b/src/org/joni/AsmCompilerSupport.java
@@ -0,0 +1,267 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import org.joni.constants.AsmConstants;
+import org.objectweb.asm.ClassWriter;
+import org.objectweb.asm.MethodVisitor;
+import org.objectweb.asm.Opcodes;
+
+abstract class AsmCompilerSupport extends Compiler implements Opcodes, AsmConstants {
+ protected ClassWriter factory; // matcher allocator, also bit set, code rage and string template container
+ protected MethodVisitor factoryInit;// factory constructor
+ protected String factoryName;
+
+ protected ClassWriter machine; // matcher
+ protected MethodVisitor machineInit;// matcher constructor
+ protected MethodVisitor match; // actual matcher implementation (the matchAt method)
+ protected String machineName;
+
+ // we will? try to manage visitMaxs ourselves for efficiency
+ protected int maxStack = 1;
+ protected int maxVars = LAST_INDEX;
+
+ // for field generation
+ protected int bitsets, ranges, templates;
+
+ // simple class name postfix scheme for now
+ static int REG_NUM = 0;
+
+ // dummy class loader for now
+ private static final class DummyClassLoader extends ClassLoader {
+ public Class<?> defineClass(String name, byte[] bytes) {
+ return super.defineClass(name, bytes, 0, bytes.length);
+ }
+ };
+
+ private static final DummyClassLoader loader = new DummyClassLoader();
+
+ AsmCompilerSupport(Analyser analyser) {
+ super(analyser);
+ }
+
+ protected final void prepareFactory() {
+ factory = new ClassWriter(ClassWriter.COMPUTE_MAXS);
+ factoryName = "org/joni/MatcherFactory" + REG_NUM;
+
+ factory.visit(V1_4, ACC_PUBLIC + ACC_FINAL, factoryName, null, "org/joni/MatcherFactory", null);
+
+ MethodVisitor create = factory.visitMethod(ACC_SYNTHETIC, "create", "(Lorg/joni/Regex;[BII)Lorg/joni/Matcher;", null, null);
+ create.visitTypeInsn(NEW, machineName);
+ create.visitInsn(DUP); // instance
+ create.visitVarInsn(ALOAD, 1); // Regex
+ create.visitVarInsn(ALOAD, 2); // bytes[]
+ create.visitVarInsn(ILOAD, 3); // p
+ create.visitVarInsn(ILOAD, 4); // end
+ create.visitMethodInsn(INVOKESPECIAL, machineName, "<init>", "(Lorg/joni/Regex;[BII)V");
+ create.visitInsn(ARETURN);
+ create.visitMaxs(0, 0);
+ //create.visitMaxs(6, 5);
+ create.visitEnd();
+ }
+
+ protected final void prepareFactoryInit() {
+ factoryInit = factory.visitMethod(ACC_PUBLIC, "<init>", "()V", null, null);
+ factoryInit.visitVarInsn(ALOAD, 0);
+ factoryInit.visitMethodInsn(INVOKESPECIAL, "org/joni/MatcherFactory", "<init>", "()V");
+ }
+
+ protected final void setupFactoryInit() {
+ factoryInit.visitInsn(RETURN);
+ factoryInit.visitMaxs(0, 0);
+ //init.visitMaxs(1, 1);
+ factoryInit.visitEnd();
+ }
+
+ protected final void prepareMachine() {
+ machine = new ClassWriter(ClassWriter.COMPUTE_MAXS);
+ machineName = "org/joni/NativeMachine" + REG_NUM;
+ }
+
+ protected final void prepareMachineInit() {
+ machine.visit(V1_4, ACC_PUBLIC + ACC_FINAL, machineName, null, "org/joni/NativeMachine", null);
+ machineInit = machine.visitMethod(ACC_PROTECTED, "<init>", "(Lorg/joni/Regex;[BII)V", null, null);
+ machineInit.visitVarInsn(ALOAD, THIS); // this
+ machineInit.visitVarInsn(ALOAD, 1); // Regex
+ machineInit.visitVarInsn(ALOAD, 2); // bytes[]
+ machineInit.visitVarInsn(ILOAD, 3); // p
+ machineInit.visitVarInsn(ILOAD, 4); // end
+ machineInit.visitMethodInsn(INVOKESPECIAL, "org/joni/NativeMachine", "<init>", "(Lorg/joni/Regex;[BII)V");
+ }
+
+ protected final void setupMachineInit() {
+ if (bitsets + ranges + templates > 0) { // ok, some of these are in use, we'd like to cache the factory
+ machine.visitField(ACC_PRIVATE + ACC_FINAL, "factory", "L" + factoryName + ";", null, null);
+ machineInit.visitVarInsn(ALOAD, THIS); // this
+ machineInit.visitVarInsn(ALOAD, 1); // this, Regex
+ machineInit.visitFieldInsn(GETFIELD, "org/joni/Regex", "factory", "Lorg/joni/MatcherFactory;"); // this, factory
+ machineInit.visitTypeInsn(CHECKCAST, factoryName);
+ machineInit.visitFieldInsn(PUTFIELD, machineName, "factory", "L" + factoryName + ";"); // []
+ }
+
+ machineInit.visitInsn(RETURN);
+ machineInit.visitMaxs(0, 0);
+ //init.visitMaxs(5, 5);
+ machineInit.visitEnd();
+ }
+
+ protected final void prepareMachineMatch() {
+ match = machine.visitMethod(ACC_SYNTHETIC, "matchAt", "(III)I", null, null);
+ move(S, SSTART); // s = sstart
+ load("bytes", "[B"); //
+ astore(BYTES); // byte[]bytes = this.bytes
+ }
+
+ protected final void setupMachineMatch() {
+ match.visitInsn(ICONST_M1);
+ match.visitInsn(IRETURN);
+
+ match.visitMaxs(maxStack, maxVars);
+ match.visitEnd();
+ }
+
+ protected final void setupClasses() {
+ byte[]factoryCode = factory.toByteArray();
+ byte[]machineCode = machine.toByteArray();
+
+ if (Config.DEBUG_ASM) {
+ try {
+ FileOutputStream fos;
+ fos = new FileOutputStream(factoryName.substring(factoryName.lastIndexOf('/') + 1) + ".class");
+ fos.write(factoryCode);
+ fos.close();
+ fos = new FileOutputStream(machineName.substring(machineName.lastIndexOf('/') + 1) + ".class");
+ fos.write(machineCode);
+ fos.close();
+ } catch (IOException ioe) {
+ ioe.printStackTrace(Config.err);
+ }
+ }
+
+ loader.defineClass(machineName.replace('/', '.'), machineCode);
+ Class<?> cls = loader.defineClass(factoryName.replace('/', '.'), factoryCode);
+ try {
+ regex.factory = (MatcherFactory)cls.newInstance();
+ } catch(Exception e) {
+ e.printStackTrace(Config.err);
+ }
+ }
+
+ protected final void aload(int var) {
+ match.visitVarInsn(ALOAD, var);
+ }
+
+ protected final void astore(int var) {
+ match.visitVarInsn(ASTORE, var);
+ }
+
+ protected final void loadThis() {
+ match.visitVarInsn(ALOAD, THIS);
+ }
+
+ protected final void load(int var) {
+ match.visitVarInsn(ILOAD, var);
+ }
+
+ protected final void store(int var) {
+ match.visitVarInsn(ISTORE, var);
+ }
+
+ protected final void move(int to, int from) {
+ load(from);
+ store(to);
+ }
+
+ protected final void load(String field, String singature) {
+ loadThis();
+ match.visitFieldInsn(GETFIELD, machineName, field, singature);
+ }
+
+ protected final void load(String field) {
+ load(field, "I");
+ }
+
+ protected final void store(String field, String singature) {
+ loadThis();
+ match.visitFieldInsn(PUTFIELD, machineName, field, singature);
+ }
+
+ protected final void store(String field) {
+ store(field, "I");
+ }
+
+ protected final String installTemplate(byte[]arr, int p, int length) {
+ String templateName = TEMPLATE + ++templates;
+ installArray(templateName, arr, p, length);
+ return templateName;
+ }
+
+ protected final String installCodeRange(int[]arr) {
+ String coreRangeName = CODERANGE + ++ranges;
+ installArray(coreRangeName, arr);
+ return coreRangeName;
+ }
+
+ protected final String installBitSet(int[]arr) {
+ String bitsetName = BITSET + ++bitsets;
+ installArray(bitsetName, arr);
+ return bitsetName;
+ }
+
+ private void installArray(String name, int[]arr) {
+ factory.visitField(ACC_PRIVATE + ACC_FINAL, name, "[I", null, null);
+ factoryInit.visitVarInsn(ALOAD, THIS); // this;
+ loadInt(factoryInit, arr.length); // this, length
+ factoryInit.visitIntInsn(NEWARRAY, T_INT); // this, arr
+ for (int i=0;i < arr.length; i++) buildArray(i, arr[i], IASTORE);
+ factoryInit.visitFieldInsn(PUTFIELD, factoryName, name, "[I");
+ }
+
+ private void installArray(String name, byte[]arr, int p, int length) {
+ factory.visitField(ACC_PRIVATE + ACC_FINAL, name, "[B", null, null);
+ factoryInit.visitVarInsn(ALOAD, THIS); // this;
+ loadInt(factoryInit, arr.length); // this, length
+ factoryInit.visitIntInsn(NEWARRAY, T_BYTE); // this, arr
+ for (int i=p, j=0; i < p + length; i++, j++) buildArray(j, arr[i] & 0xff, BASTORE);
+ factoryInit.visitFieldInsn(PUTFIELD, factoryName, name, "[B");
+ }
+
+ private void buildArray(int index, int value, int type) {
+ factoryInit.visitInsn(DUP); // ... arr, arr
+ loadInt(factoryInit, index); // ... arr, arr, index
+ loadInt(factoryInit, value); // ... arr, arr, index, value
+ factoryInit.visitInsn(type); // ... arr
+ }
+
+ private void loadInt(MethodVisitor mv, int value) {
+ if (value >= -1 && value <= 5) {
+ mv.visitInsn(value + ICONST_0); // ICONST_0 == 3
+ } else if (value >= 6 && value <= 127 || value >= -128 && value <= -2) {
+ mv.visitIntInsn(BIPUSH, value);
+ } else if (value >= 128 && value <= 32767 || value >= -32768 && value <= -129) {
+ mv.visitIntInsn(SIPUSH, value);
+ } else {
+ mv.visitLdcInsn(new Integer(value));
+ }
+ }
+}
diff --git a/src/org/joni/BitSet.java b/src/org/joni/BitSet.java
new file mode 100644
index 0000000..3d9cf99
--- /dev/null
+++ b/src/org/joni/BitSet.java
@@ -0,0 +1,115 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+public final class BitSet {
+ static final int BITS_PER_BYTE = 8;
+ public static final int SINGLE_BYTE_SIZE = (1 << BITS_PER_BYTE);
+ private static final int BITS_IN_ROOM = 4 * BITS_PER_BYTE;
+ static final int BITSET_SIZE = (SINGLE_BYTE_SIZE / BITS_IN_ROOM);
+ static final int ROOM_SHIFT = log2(BITS_IN_ROOM);
+
+ final int[] bits = new int[BITSET_SIZE];
+
+ private static final int BITS_TO_STRING_WRAP = 4;
+ public String toString() {
+ StringBuilder buffer = new StringBuilder();
+ buffer.append("BitSet");
+ for (int i=0; i<SINGLE_BYTE_SIZE; i++) {
+ if ((i % (SINGLE_BYTE_SIZE / BITS_TO_STRING_WRAP)) == 0) buffer.append("\n ");
+ buffer.append(at(i) ? "1" : "0");
+ }
+ return buffer.toString();
+ }
+
+ public boolean at(int pos) {
+ return (bits[pos >>> ROOM_SHIFT] & bit(pos)) != 0;
+ }
+
+ public void set(int pos) {
+ bits[pos >>> ROOM_SHIFT] |= bit(pos);
+ }
+
+ public void clear(int pos) {
+ bits[pos >>> ROOM_SHIFT] &= ~bit(pos);
+ }
+
+ public void invert(int pos) {
+ bits[pos >>> ROOM_SHIFT] ^= bit(pos);
+ }
+
+ public void clear() {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i]=0;
+ }
+
+ public boolean isEmpty() {
+ for (int i=0; i<BITSET_SIZE; i++) {
+ if (bits[i] != 0) return false;
+ }
+ return true;
+ }
+
+ public void setRange(int from, int to) {
+ for (int i=from; i<=to && i < SINGLE_BYTE_SIZE; i++) set(i);
+ }
+
+ public void setAll() {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i] = ~0;
+ }
+
+ public void invert() {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i] = ~bits[i];
+ }
+
+ public void invertTo(BitSet to) {
+ for (int i=0; i<BITSET_SIZE; i++) to.bits[i] = ~bits[i];
+ }
+
+ public void and(BitSet other) {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i] &= other.bits[i];
+ }
+
+ public void or(BitSet other) {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i] |= other.bits[i];
+ }
+
+ public void copy(BitSet other) {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i] = other.bits[i];
+ }
+
+ public int numOn() {
+ int num = 0;
+ for (int i=0; i<SINGLE_BYTE_SIZE; i++) {
+ if (at(i)) num++;
+ }
+ return num;
+ }
+
+ static int bit(int pos){
+ return 1 << (pos % SINGLE_BYTE_SIZE);
+ }
+
+ private static int log2(int n){
+ int log = 0;
+ while ((n >>>= 1) != 0) log++;
+ return log;
+ }
+
+}
diff --git a/src/org/joni/BitStatus.java b/src/org/joni/BitStatus.java
new file mode 100644
index 0000000..1440170
--- /dev/null
+++ b/src/org/joni/BitStatus.java
@@ -0,0 +1,55 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+final class BitStatus {
+ public static final int BIT_STATUS_BITS_NUM = 4 * 8;
+
+ public static int bsClear() {
+ return 0;
+ }
+ public static int bsAll() {
+ return -1;
+ }
+ public static boolean bsAt(int stats, int n) {
+ return (n < BIT_STATUS_BITS_NUM ? stats & (1 << n) : (stats & 1)) != 0;
+ }
+ public static int bsOnAt(int stats, int n) {
+ if (n < BIT_STATUS_BITS_NUM) {
+ stats |= (1 << n);
+ } else {
+ stats |= 1;
+ }
+ return stats;
+ }
+ public static int bsOnAtSimple(int stats, int n) {
+ if (n < BIT_STATUS_BITS_NUM) stats |= (1 << n);
+ return stats;
+ }
+
+ public static int bsOnOff(int v, int f, boolean negative) {
+ if (negative) {
+ v &= ~f;
+ } else {
+ v |= f;
+ }
+ return v;
+ }
+}
diff --git a/src/org/joni/ByteCodeMachine.java b/src/org/joni/ByteCodeMachine.java
new file mode 100644
index 0000000..bcaea73
--- /dev/null
+++ b/src/org/joni/ByteCodeMachine.java
@@ -0,0 +1,1665 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.BitStatus.bsAt;
+import static org.joni.Option.isFindCondition;
+import static org.joni.Option.isFindLongest;
+import static org.joni.Option.isFindNotEmpty;
+import static org.joni.Option.isNotBol;
+import static org.joni.Option.isNotEol;
+import static org.joni.Option.isPosixRegion;
+
+import org.jcodings.CodeRange;
+import org.jcodings.Encoding;
+import org.jcodings.IntHolder;
+import org.joni.ast.CClassNode;
+import org.joni.constants.OPCode;
+import org.joni.constants.OPSize;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.InternalException;
+
+class ByteCodeMachine extends StackMachine {
+ private int bestLen; // return value
+ private int s = 0; // current char
+
+ private int range; // right range
+ private int sprev;
+ private int sstart;
+ private int sbegin;
+
+ private final int[]code; // byte code
+ private int ip; // instruction pointer
+
+ ByteCodeMachine(Regex regex, byte[]bytes, int p, int end) {
+ super(regex, bytes, p, end);
+ this.code = regex.code;
+ }
+
+ protected int stkp; // a temporary
+ private boolean makeCaptureHistoryTree(CaptureTreeNode node) {
+ //CaptureTreeNode child;
+ int k = stkp;
+ //int k = kp;
+
+ while (k < stk) {
+ StackEntry e = stack[k];
+ if (e.type == MEM_START) {
+ int n = e.getMemNum();
+ if (n <= Config.MAX_CAPTURE_HISTORY_GROUP && bsAt(regex.captureHistory, n)) {
+ CaptureTreeNode child = new CaptureTreeNode();
+ child.group = n;
+ child.beg = e.getMemPStr() - str;
+ node.addChild(child);
+ stkp = k + 1;
+ if (makeCaptureHistoryTree(child)) return true;
+
+ k = stkp;
+ child.end = e.getMemPStr() - str;
+ }
+ } else if (e.type == MEM_END) {
+ if (e.getMemNum() == node.group) {
+ node.end = e.getMemPStr() - str;
+ stkp = k;
+ return false;
+ }
+ }
+ }
+ return true; /* 1: root node ending. */
+ }
+
+ private void checkCaptureHistory(Region region) {
+ CaptureTreeNode node;
+ if (region.historyRoot == null) {
+ node = region.historyRoot = new CaptureTreeNode();
+ } else {
+ node = region.historyRoot;
+ node.clear();
+ }
+
+ // was clear ???
+ node.group = 0;
+ node.beg = sstart - str;
+ node.end = s - str;
+
+ stkp = 0;
+ makeCaptureHistoryTree(region.historyRoot);
+ }
+
+ private byte[]cfbuf;
+ private byte[]cfbuf2;
+
+ protected final byte[]cfbuf() {
+ return cfbuf == null ? cfbuf = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN] : cfbuf;
+ }
+
+ protected final byte[]cfbuf2() {
+ return cfbuf2 == null ? cfbuf2 = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN] : cfbuf2;
+ }
+
+ private boolean stringCmpIC(int caseFlodFlag, int s1, IntHolder ps2, int mbLen, int textEnd) {
+ byte[]buf1 = cfbuf();
+ byte[]buf2 = cfbuf2();
+
+ int s2 = ps2.value;
+ int end1 = s1 + mbLen;
+
+ while (s1 < end1) {
+ value = s1;
+ int len1 = enc.mbcCaseFold(caseFlodFlag, bytes, this, textEnd, buf1);
+ s1 = value;
+ value = s2;
+ int len2 = enc.mbcCaseFold(caseFlodFlag, bytes, this, textEnd, buf2);
+ s2 = value;
+
+ if (len1 != len2) return false;
+ int p1 = 0;
+ int p2 = 0;
+
+ while (len1-- > 0) {
+ if (buf1[p1] != buf2[p2]) return false;
+ p1++; p2++;
+ }
+ }
+ ps2.value = s2;
+ return true;
+ }
+
+ private void debugMatchBegin() {
+ Config.log.println("match_at: " +
+ "str: " + str +
+ ", end: " + end +
+ ", start: " + this.sstart +
+ ", sprev: " + this.sprev);
+ Config.log.println("size: " + (end - str) + ", start offset: " + (this.sstart - str));
+ }
+
+ private void debugMatchLoop() {
+ if (Config.DEBUG_MATCH) {
+ Config.log.printf("%4d", (s - str)).print("> \"");
+ int q, i;
+ for (i=0, q=s; i<7 && q<end && s>=0; i++) {
+ int len = enc.length(bytes, q, end);
+ while (len-- > 0) if (q < end) Config.log.print(new String(new byte[]{bytes[q++]}));
+ }
+ String str = q < end ? "...\"" : "\"";
+ q += str.length();
+ Config.log.print(str);
+ for (i=0; i<20-(q-s);i++) Config.log.print(" ");
+ StringBuilder sb = new StringBuilder();
+ new ByteCodePrinter(regex).compiledByteCodeToString(sb, ip);
+ Config.log.println(sb.toString());
+ }
+ }
+
+ protected final int matchAt(int range, int sstart, int sprev) {
+ this.range = range;
+ this.sstart = sstart;
+ this.sprev = sprev;
+
+ stk = 0;
+ ip = 0;
+
+ if (Config.DEBUG_MATCH) debugMatchBegin();
+
+ init();
+
+ bestLen = -1;
+ s = sstart;
+
+ final int[]code = this.code;
+ while (true) {
+ if (Config.DEBUG_MATCH) debugMatchLoop();
+
+ sbegin = s;
+ switch (code[ip++]) {
+ case OPCode.END: if (opEnd()) return finish(); break;
+ case OPCode.EXACT1: opExact1(); break;
+ case OPCode.EXACT2: opExact2(); continue;
+ case OPCode.EXACT3: opExact3(); continue;
+ case OPCode.EXACT4: opExact4(); continue;
+ case OPCode.EXACT5: opExact5(); continue;
+ case OPCode.EXACTN: opExactN(); continue;
+
+ case OPCode.EXACTMB2N1: opExactMB2N1(); break;
+ case OPCode.EXACTMB2N2: opExactMB2N2(); continue;
+ case OPCode.EXACTMB2N3: opExactMB2N3(); continue;
+ case OPCode.EXACTMB2N: opExactMB2N(); continue;
+ case OPCode.EXACTMB3N: opExactMB3N(); continue;
+ case OPCode.EXACTMBN: opExactMBN(); continue;
+
+ case OPCode.EXACT1_IC: opExact1IC(); break;
+ case OPCode.EXACTN_IC: opExactNIC(); continue;
+
+ case OPCode.CCLASS: opCClass(); break;
+ case OPCode.CCLASS_MB: opCClassMB(); break;
+ case OPCode.CCLASS_MIX: opCClassMIX(); break;
+ case OPCode.CCLASS_NOT: opCClassNot(); break;
+ case OPCode.CCLASS_MB_NOT: opCClassMBNot(); break;
+ case OPCode.CCLASS_MIX_NOT: opCClassMIXNot(); break;
+ case OPCode.CCLASS_NODE: opCClassNode(); break;
+
+ case OPCode.ANYCHAR: opAnyChar(); break;
+ case OPCode.ANYCHAR_ML: opAnyCharML(); break;
+ case OPCode.ANYCHAR_STAR: opAnyCharStar(); break;
+ case OPCode.ANYCHAR_ML_STAR: opAnyCharMLStar(); break;
+ case OPCode.ANYCHAR_STAR_PEEK_NEXT: opAnyCharStarPeekNext(); break;
+ case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: opAnyCharMLStarPeekNext(); break;
+ case OPCode.STATE_CHECK_ANYCHAR_STAR: opStateCheckAnyCharStar(); break;
+ case OPCode.STATE_CHECK_ANYCHAR_ML_STAR:opStateCheckAnyCharMLStar();break;
+
+ case OPCode.WORD: opWord(); break;
+ case OPCode.NOT_WORD: opNotWord(); break;
+ case OPCode.WORD_BOUND: opWordBound(); continue;
+ case OPCode.NOT_WORD_BOUND: opNotWordBound(); continue;
+ case OPCode.WORD_BEGIN: opWordBegin(); continue;
+ case OPCode.WORD_END: opWordEnd(); continue;
+
+ case OPCode.BEGIN_BUF: opBeginBuf(); continue;
+ case OPCode.END_BUF: opEndBuf(); continue;
+ case OPCode.BEGIN_LINE: opBeginLine(); continue;
+ case OPCode.END_LINE: opEndLine(); continue;
+ case OPCode.SEMI_END_BUF: opSemiEndBuf(); continue;
+ case OPCode.BEGIN_POSITION: opBeginPosition(); continue;
+
+ case OPCode.MEMORY_START_PUSH: opMemoryStartPush(); continue;
+ case OPCode.MEMORY_START: opMemoryStart(); continue;
+ case OPCode.MEMORY_END_PUSH: opMemoryEndPush(); continue;
+ case OPCode.MEMORY_END: opMemoryEnd(); continue;
+ case OPCode.MEMORY_END_PUSH_REC: opMemoryEndPushRec(); continue;
+ case OPCode.MEMORY_END_REC: opMemoryEndRec(); continue;
+
+ case OPCode.BACKREF1: opBackRef1(); continue;
+ case OPCode.BACKREF2: opBackRef2(); continue;
+ case OPCode.BACKREFN: opBackRefN(); continue;
+ case OPCode.BACKREFN_IC: opBackRefNIC(); continue;
+ case OPCode.BACKREF_MULTI: opBackRefMulti(); continue;
+ case OPCode.BACKREF_MULTI_IC: opBackRefMultiIC(); continue;
+ case OPCode.BACKREF_WITH_LEVEL: opBackRefAtLevel(); continue;
+
+ case OPCode.NULL_CHECK_START: opNullCheckStart(); continue;
+ case OPCode.NULL_CHECK_END: opNullCheckEnd(); continue;
+ case OPCode.NULL_CHECK_END_MEMST: opNullCheckEndMemST(); continue;
+ case OPCode.NULL_CHECK_END_MEMST_PUSH: opNullCheckEndMemSTPush(); continue;
+
+ case OPCode.JUMP: opJump(); continue;
+ case OPCode.PUSH: opPush(); continue;
+
+ // CEC
+ case OPCode.STATE_CHECK_PUSH: opStateCheckPush(); continue;
+ case OPCode.STATE_CHECK_PUSH_OR_JUMP: opStateCheckPushOrJump(); continue;
+ case OPCode.STATE_CHECK: opStateCheck(); continue;
+
+ case OPCode.POP: opPop(); continue;
+ case OPCode.PUSH_OR_JUMP_EXACT1: opPushOrJumpExact1(); continue;
+ case OPCode.PUSH_IF_PEEK_NEXT: opPushIfPeekNext(); continue;
+
+ case OPCode.REPEAT: opRepeat(); continue;
+ case OPCode.REPEAT_NG: opRepeatNG(); continue;
+ case OPCode.REPEAT_INC: opRepeatInc(); continue;
+ case OPCode.REPEAT_INC_SG: opRepeatIncSG(); continue;
+ case OPCode.REPEAT_INC_NG: opRepeatIncNG(); continue;
+ case OPCode.REPEAT_INC_NG_SG: opRepeatIncNGSG(); continue;
+
+ case OPCode.PUSH_POS: opPushPos(); continue;
+ case OPCode.POP_POS: opPopPos(); continue;
+ case OPCode.PUSH_POS_NOT: opPushPosNot(); continue;
+ case OPCode.FAIL_POS: opFailPos(); continue;
+ case OPCode.PUSH_STOP_BT: opPushStopBT(); continue;
+ case OPCode.POP_STOP_BT: opPopStopBT(); continue;
+
+ case OPCode.LOOK_BEHIND: opLookBehind(); continue;
+ case OPCode.PUSH_LOOK_BEHIND_NOT: opPushLookBehindNot(); continue;
+ case OPCode.FAIL_LOOK_BEHIND_NOT: opFailLookBehindNot(); continue;
+
+ // USE_SUBEXP_CALL
+ case OPCode.CALL: opCall(); continue;
+ case OPCode.RETURN: opReturn(); continue;
+
+ // single byte implementations
+ case OPCode.CCLASS_SB: opCClassSb(); break;
+ case OPCode.CCLASS_NOT_SB: opCClassNotSb(); break;
+
+ case OPCode.ANYCHAR_SB: opAnyCharSb(); break;
+ case OPCode.ANYCHAR_ML_SB: opAnyCharMLSb(); break;
+ case OPCode.ANYCHAR_STAR_SB: opAnyCharStarSb(); break;
+ case OPCode.ANYCHAR_ML_STAR_SB: opAnyCharMLStarSb(); break;
+ case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB: opAnyCharStarPeekNextSb(); break;
+ case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB: opAnyCharMLStarPeekNextSb(); break;
+ case OPCode.STATE_CHECK_ANYCHAR_STAR_SB: opStateCheckAnyCharStarSb(); break;
+ case OPCode.STATE_CHECK_ANYCHAR_ML_STAR_SB: opStateCheckAnyCharMLStarSb();break;
+
+ case OPCode.WORD_SB: opWordSb(); break;
+ case OPCode.NOT_WORD_SB: opNotWordSb(); break;
+ case OPCode.WORD_BOUND_SB: opWordBoundSb(); continue;
+ case OPCode.NOT_WORD_BOUND_SB: opNotWordBoundSb(); continue;
+ case OPCode.WORD_BEGIN_SB: opWordBeginSb(); continue;
+ case OPCode.WORD_END_SB: opWordEndSb(); continue;
+
+ case OPCode.LOOK_BEHIND_SB: opLookBehindSb(); continue;
+
+ case OPCode.EXACT1_IC_SB: opExact1ICSb(); break;
+ case OPCode.EXACTN_IC_SB: opExactNICSb(); continue;
+
+ case OPCode.FINISH:
+ return finish();
+
+ case OPCode.FAIL: opFail(); continue;
+
+ default:
+ throw new InternalException(ErrorMessages.ERR_UNDEFINED_BYTECODE);
+
+ } // main switch
+ } // main while
+ }
+
+ private boolean opEnd() {
+ int n = s - sstart;
+
+ if (n > bestLen) {
+ if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
+ if (isFindLongest(regex.options)) {
+ if (n > msaBestLen) {
+ msaBestLen = n;
+ msaBestS = sstart;
+ } else {
+ // goto end_best_len;
+ return endBestLength();
+ }
+ }
+ } // USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+
+ bestLen = n;
+ final Region region = msaRegion;
+ if (region != null) {
+ // USE_POSIX_REGION_OPTION ... else ...
+ region.beg[0] = msaBegin = sstart - str;
+ region.end[0] = msaEnd = s - str;
+ for (int i = 1; i <= regex.numMem; i++) {
+ // opt!
+ if (repeatStk[memEndStk + i] != INVALID_INDEX) {
+ region.beg[i] = bsAt(regex.btMemStart, i) ?
+ stack[repeatStk[memStartStk + i]].getMemPStr() - str :
+ repeatStk[memStartStk + i] - str;
+
+
+ region.end[i] = bsAt(regex.btMemEnd, i) ?
+ stack[repeatStk[memEndStk + i]].getMemPStr() :
+ repeatStk[memEndStk + i] - str;
+
+ } else {
+ region.beg[i] = region.end[i] = Region.REGION_NOTPOS;
+ }
+
+ }
+
+ if (Config.USE_CAPTURE_HISTORY) {
+ if (regex.captureHistory != 0) checkCaptureHistory(region);
+ }
+ } else {
+ msaBegin = sstart - str;
+ msaEnd = s - str;
+ }
+ } else {
+ Region region = msaRegion;
+ if (Config.USE_POSIX_API_REGION_OPTION) {
+ if (!isPosixRegion(regex.options)) {
+ if (region != null) {
+ region.clear();
+ } else {
+ msaBegin = msaEnd = 0;
+ }
+ }
+ } else {
+ if (region != null) {
+ region.clear();
+ } else {
+ msaBegin = msaEnd = 0;
+ }
+ } // USE_POSIX_REGION_OPTION
+ }
+ // end_best_len:
+ /* default behavior: return first-matching result. */
+ return endBestLength();
+ }
+
+ private boolean endBestLength() {
+ if (isFindCondition(regex.options)) {
+ if (isFindNotEmpty(regex.options) && s == sstart) {
+ bestLen = -1;
+ {opFail(); return false;} /* for retry */
+ }
+ if (isFindLongest(regex.options) && s < range) {
+ {opFail(); return false;} /* for retry */
+ }
+ }
+ // goto finish;
+ return true;
+ }
+
+ private void opExact1() {
+ if (s >= range || code[ip] != bytes[s++]) {opFail(); return;}
+ //if (s > range) {opFail(); return;}
+ ip++;
+ sprev = sbegin; // break;
+ }
+
+ private void opExact2() {
+ if (s + 2 > range) {opFail(); return;}
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ sprev = s;
+ ip++; s++;
+ }
+
+ private void opExact3() {
+ if (s + 3 > range) {opFail(); return;}
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ sprev = s;
+ ip++; s++;
+ }
+
+ private void opExact4() {
+ if (s + 4 > range) {opFail(); return;}
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ sprev = s;
+ ip++; s++;
+ }
+
+ private void opExact5() {
+ if (s + 5 > range) {opFail(); return;}
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ sprev = s;
+ ip++; s++;
+ }
+
+ private void opExactN() {
+ int tlen = code[ip++];
+ if (s + tlen > range) {opFail(); return;}
+
+ while (tlen-- > 0) if (code[ip++] != bytes[s++]) {opFail(); return;}
+ sprev = s - 1;
+ }
+
+ private void opExactMB2N1() {
+ if (s + 2 > range) {opFail(); return;}
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ sprev = sbegin; // break;
+ }
+
+ private void opExactMB2N2() {
+ if (s + 4 > range) {opFail(); return;}
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ sprev = s;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ }
+
+ private void opExactMB2N3() {
+ if (s + 6 > range) {opFail(); return;}
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ sprev = s;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ }
+
+ private void opExactMB2N() {
+ int tlen = code[ip++];
+ if (tlen * 2 > range) {opFail(); return;}
+
+ while(tlen-- > 0) {
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ }
+ sprev = s - 2;
+ }
+
+ private void opExactMB3N() {
+ int tlen = code[ip++];
+ if (tlen * 3 > range) {opFail(); return;}
+
+ while (tlen-- > 0) {
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ }
+ sprev = s - 3;
+ }
+
+ private void opExactMBN() {
+ int tlen = code[ip++]; /* mb-len */
+ int tlen2= code[ip++]; /* string len */
+
+ tlen2 *= tlen;
+ if (s + tlen2 > range) {opFail(); return;}
+
+ while(tlen2-- > 0) {
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ }
+ sprev = s - tlen;
+ }
+
+ private void opExact1IC() {
+ if (s >= range) {opFail(); return;}
+
+ byte[]lowbuf = cfbuf();
+
+ value = s;
+ int len = enc.mbcCaseFold(regex.caseFoldFlag, bytes, this, end, lowbuf);
+ s = value;
+
+ if (s > range) {opFail(); return;}
+
+ int q = 0;
+ while (len-- > 0) {
+ if (code[ip] != lowbuf[q]) {opFail(); return;}
+ ip++; q++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opExact1ICSb() {
+ if (s >= range || code[ip] != enc.toLowerCaseTable()[bytes[s++] & 0xff]) {opFail(); return;}
+ ip++;
+ sprev = sbegin; // break;
+ }
+
+ private void opExactNIC() {
+ int tlen = code[ip++];
+ int endp = ip + tlen;
+
+ byte[]lowbuf = cfbuf();
+
+ while (ip < endp) {
+ sprev = s;
+ if (s >= range) {opFail(); return;}
+
+ value = s;
+ int len = enc.mbcCaseFold(regex.caseFoldFlag, bytes, this, end, lowbuf);
+ s = value;
+
+ if (s > range) {opFail(); return;}
+ int q = 0;
+ while (len-- > 0) {
+ if (code[ip] != lowbuf[q]) {opFail(); return;}
+ ip++; q++;
+ }
+ }
+ }
+
+ private void opExactNICSb() {
+ int tlen = code[ip++];
+ if (s + tlen > range) {opFail(); return;}
+ byte[]toLowerTable = enc.toLowerCaseTable();
+ while (tlen-- > 0) if (code[ip++] != toLowerTable[bytes[s++] & 0xff]) {opFail(); return;}
+ sprev = s - 1;
+ }
+
+ private boolean isInBitSet() {
+ int c = bytes[s] & 0xff;
+ return ((code[ip + (c >>> BitSet.ROOM_SHIFT)] & (1 << c)) != 0);
+ }
+
+ private void opCClass() {
+ if (s >= range || !isInBitSet()) {opFail(); return;}
+ ip += BitSet.BITSET_SIZE;
+ s += enc.length(bytes, s, end); /* OP_CCLASS can match mb-code. \D, \S */
+ sprev = sbegin; // break;
+ }
+
+ private void opCClassSb() {
+ if (s >= range || !isInBitSet()) {opFail(); return;}
+ ip += BitSet.BITSET_SIZE;
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private boolean isInClassMB() {
+ int tlen = code[ip++];
+ if (s >= range) return false;
+ int mbLen = enc.length(bytes, s, end);
+ if (s + mbLen > range) return false;
+ int ss = s;
+ s += mbLen;
+ int c = enc.mbcToCode(bytes, ss, s);
+ if (!CodeRange.isInCodeRange(code, ip, c)) return false;
+ ip += tlen;
+ return true;
+ }
+
+ private void opCClassMB() {
+ // beyond string check
+ if (s >= range || !enc.isMbcHead(bytes, s, end)) {opFail(); return;}
+ if (!isInClassMB()) {opFail(); return;} // not!!!
+ sprev = sbegin; // break;
+ }
+
+ private void opCClassMIX() {
+ if (s >= range) {opFail(); return;}
+ if (enc.isMbcHead(bytes, s, end)) {
+ ip += BitSet.BITSET_SIZE;
+ if (!isInClassMB()) {opFail(); return;}
+ } else {
+ if (!isInBitSet()) {opFail(); return;}
+ ip += BitSet.BITSET_SIZE;
+ int tlen = code[ip++]; // by code range length
+ ip += tlen;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opCClassNot() {
+ if (s >= range || isInBitSet()) {opFail(); return;}
+ ip += BitSet.BITSET_SIZE;
+ s += enc.length(bytes, s, end);
+ sprev = sbegin; // break;
+ }
+
+ private void opCClassNotSb() {
+ if (s >= range || isInBitSet()) {opFail(); return;}
+ ip += BitSet.BITSET_SIZE;
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private boolean isNotInClassMB() {
+ int tlen = code[ip++];
+ int mbLen = enc.length(bytes, s, end);
+
+ if (!(s + mbLen <= range)) {
+ if (s >= range) return false;
+ s = end;
+ ip += tlen;
+ return true;
+ }
+
+ int ss = s;
+ s += mbLen;
+ int c = enc.mbcToCode(bytes, ss, s);
+
+ if (CodeRange.isInCodeRange(code, ip, c)) return false;
+ ip += tlen;
+ return true;
+ }
+
+ private void opCClassMBNot() {
+ if (s >= range) {opFail(); return;}
+ if (!enc.isMbcHead(bytes, s, end)) {
+ s++;
+ int tlen = code[ip++];
+ ip += tlen;
+ sprev = sbegin; // break;
+ return;
+ }
+ if (!isNotInClassMB()) {opFail(); return;}
+ sprev = sbegin; // break;
+ }
+
+ private void opCClassMIXNot() {
+ if (s >= range) {opFail(); return;}
+ if (enc.isMbcHead(bytes, s, end)) {
+ ip += BitSet.BITSET_SIZE;
+ if (!isNotInClassMB()) {opFail(); return;}
+ } else {
+ if (isInBitSet()) {opFail(); return;}
+ ip += BitSet.BITSET_SIZE;
+ int tlen = code[ip++];
+ ip += tlen;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opCClassNode() {
+ if (s >= range) {opFail(); return;}
+ CClassNode cc = (CClassNode)regex.operands[code[ip++]];
+ int mbLen = enc.length(bytes, s, end);
+ int ss = s;
+ s += mbLen;
+ if (s > range) {opFail(); return;}
+ int c = enc.mbcToCode(bytes, ss, s);
+ if (!cc.isCodeInCCLength(mbLen, c)) {opFail(); return;}
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyChar() {
+ if (s >= range) {opFail(); return;}
+ int n = enc.length(bytes, s, end);
+ if (s + n > range) {opFail(); return;}
+ if (enc.isNewLine(bytes, s, end)) {opFail(); return;}
+ s += n;
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharSb() {
+ if (s >= range) {opFail(); return;}
+ if (bytes[s] == Encoding.NEW_LINE) {opFail(); return;}
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharML() {
+ if (s >= range) {opFail(); return;}
+ int n = enc.length(bytes, s, end);
+ if (s + n > range) {opFail(); return;}
+ s += n;
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharMLSb() {
+ if (s >= range) {opFail(); return;}
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharStar() {
+ final byte[]bytes = this.bytes;
+ while (s < range) {
+ pushAlt(ip, s, sprev);
+ int n = enc.length(bytes, s, end);
+ if (s + n > range) {opFail(); return;}
+ if (enc.isNewLine(bytes, s, end)) {opFail(); return;}
+ sprev = s;
+ s += n;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharStarSb() {
+ final byte[]bytes = this.bytes;
+ while (s < range) {
+ pushAlt(ip, s, sprev);
+ if (bytes[s] == Encoding.NEW_LINE) {opFail(); return;}
+ sprev = s;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharMLStar() {
+ final byte[]bytes = this.bytes;
+ while (s < range) {
+ pushAlt(ip, s, sprev);
+ int n = enc.length(bytes, s, end);
+ if (s + n > range) {opFail(); return;}
+ sprev = s;
+ s += n;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharMLStarSb() {
+ while (s < range) {
+ pushAlt(ip, s, sprev);
+ sprev = s;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharStarPeekNext() {
+ final byte c = (byte)code[ip];
+ final byte[]bytes = this.bytes;
+
+ while (s < range) {
+ if (c == bytes[s]) pushAlt(ip + 1, s, sprev);
+ int n = enc.length(bytes, s, end);
+ if (s + n > range || enc.isNewLine(bytes, s, end)) {opFail(); return;}
+ sprev = s;
+ s += n;
+ }
+ ip++;
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharStarPeekNextSb() {
+ final byte c = (byte)code[ip];
+ final byte[]bytes = this.bytes;
+
+ while (s < range) {
+ byte b = bytes[s];
+ if (c == b) pushAlt(ip + 1, s, sprev);
+ if (b == Encoding.NEW_LINE) {opFail(); return;}
+ sprev = s;
+ s++;
+ }
+ ip++;
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharMLStarPeekNext() {
+ final byte c = (byte)code[ip];
+ final byte[]bytes = this.bytes;
+
+ while (s < range) {
+ if (c == bytes[s]) pushAlt(ip + 1, s, sprev);
+ int n = enc.length(bytes, s, end);
+ if (s + n > range) {opFail(); return;}
+ sprev = s;
+ s += n;
+ }
+ ip++;
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharMLStarPeekNextSb() {
+ final byte c = (byte)code[ip];
+ final byte[]bytes = this.bytes;
+
+ while (s < range) {
+ if (c == bytes[s]) pushAlt(ip + 1, s, sprev);
+ sprev = s;
+ s++;
+ }
+ ip++;
+ sprev = sbegin; // break;
+ }
+
+ // CEC
+ private void opStateCheckAnyCharStar() {
+ int mem = code[ip++];
+ final byte[]bytes = this.bytes;
+
+ while (s < range) {
+ if (stateCheckVal(s, mem)) {opFail(); return;}
+ pushAltWithStateCheck(ip, s, sprev, mem);
+ int n = enc.length(bytes, s, end);
+ if (s + n > range || enc.isNewLine(bytes, s, end)) {opFail(); return;}
+ sprev = s;
+ s += n;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opStateCheckAnyCharStarSb() {
+ int mem = code[ip++];
+ final byte[]bytes = this.bytes;
+
+ while (s < range) {
+ if (stateCheckVal(s, mem)) {opFail(); return;}
+ pushAltWithStateCheck(ip, s, sprev, mem);
+ if (bytes[s] == Encoding.NEW_LINE) {opFail(); return;}
+ sprev = s;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ // CEC
+ private void opStateCheckAnyCharMLStar() {
+ int mem = code[ip++];
+
+ final byte[]bytes = this.bytes;
+ while (s < range) {
+ if (stateCheckVal(s, mem)) {opFail(); return;}
+ pushAltWithStateCheck(ip, s, sprev, mem);
+ int n = enc.length(bytes, s, end);
+ if (s + n > range) {opFail(); return;}
+ sprev = s;
+ s += n;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opStateCheckAnyCharMLStarSb() {
+ int mem = code[ip++];
+
+ while (s < range) {
+ if (stateCheckVal(s, mem)) {opFail(); return;}
+ pushAltWithStateCheck(ip, s, sprev, mem);
+ sprev = s;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opWord() {
+ if (s >= range || !enc.isMbcWord(bytes, s, end)) {opFail(); return;}
+ s += enc.length(bytes, s, end);
+ sprev = sbegin; // break;
+ }
+
+ private void opWordSb() {
+ if (s >= range || !enc.isWord(bytes[s] & 0xff)) {opFail(); return;}
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private void opNotWord() {
+ if (s >= range || enc.isMbcWord(bytes, s, end)) {opFail(); return;}
+ s += enc.length(bytes, s, end);
+ sprev = sbegin; // break;
+ }
+
+ private void opNotWordSb() {
+ if (s >= range || enc.isWord(bytes[s] & 0xff)) {opFail(); return;}
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private void opWordBound() {
+ if (s == str) {
+ if (s >= range || !enc.isMbcWord(bytes, s, end)) {opFail(); return;}
+ } else if (s == end) {
+ if (!enc.isMbcWord(bytes, sprev, end)) {opFail(); return;}
+ } else {
+ if (enc.isMbcWord(bytes, s, end) == enc.isMbcWord(bytes, sprev, end)) {opFail(); return;}
+ }
+ }
+
+ private void opWordBoundSb() {
+ if (s == str) {
+ if (s >= range || !enc.isWord(bytes[s] & 0xff)) {opFail(); return;}
+ } else if (s == end) {
+ if (sprev >= end || !enc.isWord(bytes[sprev] & 0xff)) {opFail(); return;}
+ } else {
+ if (enc.isWord(bytes[s] & 0xff) == enc.isWord(bytes[sprev] & 0xff)) {opFail(); return;}
+ }
+ }
+
+ private void opNotWordBound() {
+ if (s == str) {
+ if (s < range && enc.isMbcWord(bytes, s, end)) {opFail(); return;}
+ } else if (s == end) {
+ if (enc.isMbcWord(bytes, sprev, end)) {opFail(); return;}
+ } else {
+ if (enc.isMbcWord(bytes, s, end) != enc.isMbcWord(bytes, sprev, end)) {opFail(); return;}
+ }
+ }
+
+ private void opNotWordBoundSb() {
+ if (s == str) {
+ if (s < range && enc.isWord(bytes[s] & 0xff)) {opFail(); return;}
+ } else if (s == end) {
+ if (sprev < end && enc.isWord(bytes[sprev] & 0xff)) {opFail(); return;}
+ } else {
+ if (enc.isWord(bytes[s] & 0xff) != enc.isWord(bytes[sprev] & 0xff)) {opFail(); return;}
+ }
+ }
+
+ private void opWordBegin() {
+ if (s < range && enc.isMbcWord(bytes, s, end)) {
+ if (s == str || !enc.isMbcWord(bytes, sprev, end)) return;
+ }
+ opFail();
+ }
+
+ private void opWordBeginSb() {
+ if (s < range && enc.isWord(bytes[s] & 0xff)) {
+ if (s == str || !enc.isWord(bytes[sprev] & 0xff)) return;
+ }
+ opFail();
+ }
+
+ private void opWordEnd() {
+ if (s != str && enc.isMbcWord(bytes, sprev, end)) {
+ if (s == end || !enc.isMbcWord(bytes, s, end)) return;
+ }
+ opFail();
+ }
+
+ private void opWordEndSb() {
+ if (s != str && enc.isWord(bytes[sprev] & 0xff)) {
+ if (s == end || !enc.isWord(bytes[s] & 0xff)) return;
+ }
+ opFail();
+ }
+
+ private void opBeginBuf() {
+ if (s != str) opFail();
+ }
+
+ private void opEndBuf() {
+ if (s != end) opFail();
+ }
+
+ private void opBeginLine() {
+ if (s == str) {
+ if (isNotBol(msaOptions)) opFail();
+ return;
+ } else if (enc.isNewLine(bytes, sprev, end) && s != end) {
+ return;
+ }
+ opFail();
+ }
+
+ private void opEndLine() {
+ if (s == end) {
+ if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
+ if (str == end || !enc.isNewLine(bytes, sprev, end)) {
+ if (isNotEol(msaOptions)) opFail();
+ }
+ return;
+ } else {
+ if (isNotEol(msaOptions)) opFail();
+ return;
+ }
+ } else if (enc.isNewLine(bytes, s, end) || (Config.USE_CRNL_AS_LINE_TERMINATOR && enc.isMbcCrnl(bytes, s, end))) {
+ return;
+ }
+ opFail();
+ }
+
+ private void opSemiEndBuf() {
+ if (s == end) {
+ if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
+ if (str == end || !enc.isNewLine(bytes, sprev, end)) {
+ if (isNotEol(msaOptions)) opFail();
+ }
+ return;
+ } else {
+ if (isNotEol(msaOptions)) opFail();
+ return;
+ }
+ } else if (enc.isNewLine(bytes, s, end) && (s + enc.length(bytes, s, end)) == end) {
+ return;
+ } else if (Config.USE_CRNL_AS_LINE_TERMINATOR && enc.isMbcCrnl(bytes, s, end)) {
+ int ss = s + enc.length(bytes, s, end);
+ ss += enc.length(bytes, ss, end);
+ if (ss == end) return;
+ }
+ opFail();
+ }
+
+ private void opBeginPosition() {
+ if (s != msaStart) opFail();
+ }
+
+ private void opMemoryStartPush() {
+ int mem = code[ip++];
+ pushMemStart(mem, s);
+ }
+
+ private void opMemoryStart() {
+ int mem = code[ip++];
+ repeatStk[memStartStk + mem] = s;
+ }
+
+ private void opMemoryEndPush() {
+ int mem = code[ip++];
+ pushMemEnd(mem, s);
+ }
+
+ private void opMemoryEnd() {
+ int mem = code[ip++];
+ repeatStk[memEndStk + mem] = s;
+ }
+
+ private void opMemoryEndPushRec() {
+ int mem = code[ip++];
+ int stkp = getMemStart(mem); /* should be before push mem-end. */
+ pushMemEnd(mem, s);
+ repeatStk[memStartStk + mem] = stkp;
+ }
+
+ private void opMemoryEndRec() {
+ int mem = code[ip++];
+ repeatStk[memEndStk + mem] = s;
+ int stkp = getMemStart(mem);
+
+ if (BitStatus.bsAt(regex.btMemStart, mem)) {
+ repeatStk[memStartStk + mem] = stkp;
+ } else {
+ repeatStk[memStartStk + mem] = stack[stkp].getMemPStr();
+ }
+
+ pushMemEndMark(mem);
+ }
+
+ private boolean backrefInvalid(int mem) {
+ return repeatStk[memEndStk + mem] == INVALID_INDEX || repeatStk[memStartStk + mem] == INVALID_INDEX;
+ }
+
+ private int backrefStart(int mem) {
+ return bsAt(regex.btMemStart, mem) ? stack[repeatStk[memStartStk + mem]].getMemPStr() : repeatStk[memStartStk + mem];
+ }
+
+ private int backrefEnd(int mem) {
+ return bsAt(regex.btMemEnd, mem) ? stack[repeatStk[memEndStk + mem]].getMemPStr() : repeatStk[memEndStk + mem];
+ }
+
+ private void backref(int mem) {
+ /* if you want to remove following line,
+ you should check in parse and compile time. (numMem) */
+ if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;}
+
+ int pstart = backrefStart(mem);
+ int pend = backrefEnd(mem);
+
+ int n = pend - pstart;
+ if (s + n > range) {opFail(); return;}
+ sprev = s;
+
+ // STRING_CMP
+ while(n-- > 0) if (bytes[pstart++] != bytes[s++]) {opFail(); return;}
+
+ int len;
+
+ // beyond string check
+ if (sprev < range) {
+ while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len;
+ }
+ }
+
+ private void opBackRef1() {
+ backref(1);
+ }
+
+ private void opBackRef2() {
+ backref(2);
+ }
+
+ private void opBackRefN() {
+ backref(code[ip++]);
+ }
+
+ private void opBackRefNIC() {
+ int mem = code[ip++];
+ /* if you want to remove following line,
+ you should check in parse and compile time. (numMem) */
+ if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;}
+
+ int pstart = backrefStart(mem);
+ int pend = backrefEnd(mem);
+
+ int n = pend - pstart;
+ if (s + n > range) {opFail(); return;}
+ sprev = s;
+
+ value = s;
+ if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) {opFail(); return;}
+ s = value;
+
+ int len;
+ // if (sprev < bytes.length)
+ while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len;
+ }
+
+ private void opBackRefMulti() {
+ int tlen = code[ip++];
+
+ int i;
+ loop:for (i=0; i<tlen; i++) {
+ int mem = code[ip++];
+ if (backrefInvalid(mem)) continue;
+
+ int pstart = backrefStart(mem);
+ int pend = backrefEnd(mem);
+
+ int n = pend - pstart;
+ if (s + n > range) {opFail(); return;}
+
+ sprev = s;
+ int swork = s;
+
+ while (n-- > 0) {
+ if (bytes[pstart++] != bytes[swork++]) continue loop;
+ }
+
+ s = swork;
+
+ int len;
+
+ // beyond string check
+ if (sprev < range) {
+ while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len;
+ }
+
+ ip += tlen - i - 1; // * SIZE_MEMNUM (1)
+ break; /* success */
+ }
+ if (i == tlen) {opFail(); return;}
+ }
+
+ private void opBackRefMultiIC() {
+ int tlen = code[ip++];
+
+ int i;
+ loop:for (i=0; i<tlen; i++) {
+ int mem = code[ip++];
+ if (backrefInvalid(mem)) continue;
+
+ int pstart = backrefStart(mem);
+ int pend = backrefEnd(mem);
+
+ int n = pend - pstart;
+ if (s + n > range) {opFail(); return;}
+
+ sprev = s;
+
+ value = s;
+ if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) continue loop; // STRING_CMP_VALUE_IC
+ s = value;
+
+ int len;
+ // if (sprev < bytes.length)
+ while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len;
+
+ ip += tlen - i - 1; // * SIZE_MEMNUM (1)
+ break; /* success */
+ }
+ if (i == tlen) {opFail(); return;}
+ }
+
+ private boolean memIsInMemp(int mem, int num, int memp) {
+ for (int i=0; i<num; i++) {
+ int m = code[memp++];
+ if (mem == m) return true;
+ }
+ return false;
+ }
+
+ // USE_BACKREF_AT_LEVEL // (s) and (end) implicit
+ private boolean backrefMatchAtNestedLevel(boolean ignoreCase, int caseFoldFlag,
+ int nest, int memNum, int memp) {
+ int pend = -1;
+ int level = 0;
+ int k = stk - 1;
+
+ while (k >= 0) {
+ StackEntry e = stack[k];
+
+ if (e.type == CALL_FRAME) {
+ level--;
+ } else if (e.type == RETURN) {
+ level++;
+ } else if (level == nest) {
+ if (e.type == MEM_START) {
+ if (memIsInMemp(e.getMemNum(), memNum, memp)) {
+ int pstart = e.getMemPStr();
+ if (pend != -1) {
+ if (pend - pstart > end - s) return false; /* or goto next_mem; */
+ int p = pstart;
+
+ value = s;
+ if (ignoreCase) {
+ if (!stringCmpIC(caseFoldFlag, pstart, this, pend - pstart, end)) {
+ return false; /* or goto next_mem; */
+ }
+ } else {
+ while (p < pend) {
+ if (bytes[p++] != bytes[value++]) return false; /* or goto next_mem; */
+ }
+ }
+ s = value;
+
+ return true;
+ }
+ }
+ } else if (e.type == MEM_END) {
+ if (memIsInMemp(e.getMemNum(), memNum, memp)) {
+ pend = e.getMemPStr();
+ }
+ }
+ }
+ k--;
+ }
+ return false;
+ }
+
+ private void opBackRefAtLevel() {
+ int ic = code[ip++];
+ int level = code[ip++];
+ int tlen = code[ip++];
+
+ sprev = s;
+ if (backrefMatchAtNestedLevel(ic != 0, regex.caseFoldFlag, level, tlen, ip)) { // (s) and (end) implicit
+ int len;
+ while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len;
+ ip += tlen; // * SIZE_MEMNUM
+ } else {
+ {opFail(); return;}
+ }
+ }
+
+ /* no need: IS_DYNAMIC_OPTION() == 0 */
+ private void opSetOptionPush() {
+ // option = code[ip++]; // final for now
+ pushAlt(ip, s, sprev);
+ ip += OPSize.SET_OPTION + OPSize.FAIL;
+ }
+
+ private void opSetOption() {
+ // option = code[ip++]; // final for now
+ }
+
+ private void opNullCheckStart() {
+ int mem = code[ip++];
+ pushNullCheckStart(mem, s);
+ }
+
+ private void nullCheckFound() {
+ // null_check_found:
+ /* empty loop founded, skip next instruction */
+ switch(code[ip++]) {
+ case OPCode.JUMP:
+ case OPCode.PUSH:
+ ip++; // p += SIZE_RELADDR;
+ break;
+ case OPCode.REPEAT_INC:
+ case OPCode.REPEAT_INC_NG:
+ case OPCode.REPEAT_INC_SG:
+ case OPCode.REPEAT_INC_NG_SG:
+ ip++; // p += SIZE_MEMNUM;
+ break;
+ default:
+ throw new InternalException(ErrorMessages.ERR_UNEXPECTED_BYTECODE);
+ } // switch
+ }
+
+ private void opNullCheckEnd() {
+ int mem = code[ip++];
+ int isNull = nullCheck(mem, s); /* mem: null check id */
+
+ if (isNull != 0) {
+ if (Config.DEBUG_MATCH) {
+ Config.log.println("NULL_CHECK_END: skip id:" + mem + ", s:" + s);
+ }
+
+ nullCheckFound();
+ }
+ }
+
+ // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ private void opNullCheckEndMemST() {
+ int mem = code[ip++]; /* mem: null check id */
+ int isNull = nullCheckMemSt(mem, s);
+
+ if (isNull != 0) {
+ if (Config.DEBUG_MATCH) {
+ Config.log.println("NULL_CHECK_END_MEMST: skip id:" + mem + ", s:" + s);
+ }
+
+ if (isNull == -1) {opFail(); return;}
+ nullCheckFound();
+ }
+ }
+
+ // USE_SUBEXP_CALL
+ private void opNullCheckEndMemSTPush() {
+ int mem = code[ip++]; /* mem: null check id */
+
+ int isNull;
+ if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) {
+ isNull = nullCheckMemStRec(mem, s);
+ } else {
+ isNull = nullCheckRec(mem, s);
+ }
+
+ if (isNull != 0) {
+ if (Config.DEBUG_MATCH) {
+ Config.log.println("NULL_CHECK_END_MEMST_PUSH: skip id:" + mem + ", s:" + s);
+ }
+
+ if (isNull == -1) {opFail(); return;}
+ nullCheckFound();
+ } else {
+ pushNullCheckEnd(mem);
+ }
+ }
+
+ private void opJump() {
+ ip += code[ip] + 1;
+ }
+
+ private void opPush() {
+ int addr = code[ip++];
+ pushAlt(ip + addr, s, sprev);
+ }
+
+ // CEC
+ private void opStateCheckPush() {
+ int mem = code[ip++];
+ if (stateCheckVal(s, mem)) {opFail(); return;}
+ int addr = code[ip++];
+ pushAltWithStateCheck(ip + addr, s, sprev, mem);
+ }
+
+ // CEC
+ private void opStateCheckPushOrJump() {
+ int mem = code[ip++];
+ int addr= code[ip++];
+
+ if (stateCheckVal(s, mem)) {
+ ip += addr;
+ } else {
+ pushAltWithStateCheck(ip + addr, s, sprev, mem);
+ }
+ }
+
+ // CEC
+ private void opStateCheck() {
+ int mem = code[ip++];
+ if (stateCheckVal(s, mem)) {opFail(); return;}
+ pushStateCheck(s, mem);
+ }
+
+ private void opPop() {
+ popOne();
+ }
+
+ private void opPushOrJumpExact1() {
+ int addr = code[ip++];
+ if (code[ip] == bytes[s] && s < range) {
+ ip++;
+ pushAlt(ip + addr, s, sprev);
+ return;
+ }
+ ip += addr + 1;
+ }
+
+ private void opPushIfPeekNext() {
+ int addr = code[ip++];
+ // beyond string check
+ if (s < range && code[ip] == bytes[s]) {
+ ip++;
+ pushAlt(ip + addr, s, sprev);
+ return;
+ }
+ ip++;
+ }
+
+ private void opRepeat() {
+ int mem = code[ip++]; /* mem: OP_REPEAT ID */
+ int addr= code[ip++];
+
+ // ensure1();
+ repeatStk[mem] = stk;
+ pushRepeat(mem, ip);
+
+ if (regex.repeatRangeLo[mem] == 0) { // lower
+ pushAlt(ip + addr, s, sprev);
+ }
+ }
+
+ private void opRepeatNG() {
+ int mem = code[ip++]; /* mem: OP_REPEAT ID */
+ int addr= code[ip++];
+
+ // ensure1();
+ repeatStk[mem] = stk;
+ pushRepeat(mem, ip);
+
+ if (regex.repeatRangeLo[mem] == 0) {
+ pushAlt(ip, s, sprev);
+ ip += addr;
+ }
+ }
+
+ private void repeatInc(int mem, int si) {
+ StackEntry e = stack[si];
+
+ e.increaseRepeatCount();
+
+ if (e.getRepeatCount() >= regex.repeatRangeHi[mem]) {
+ /* end of repeat. Nothing to do. */
+ } else if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) {
+ pushAlt(ip, s, sprev);
+ ip = e.getRepeatPCode(); /* Don't use stkp after PUSH. */
+ } else {
+ ip = e.getRepeatPCode();
+ }
+ pushRepeatInc(si);
+ }
+
+ private void opRepeatInc() {
+ int mem = code[ip++]; /* mem: OP_REPEAT ID */
+ int si = repeatStk[mem];
+ repeatInc(mem, si);
+ }
+
+ private void opRepeatIncSG() {
+ int mem = code[ip++]; /* mem: OP_REPEAT ID */
+ int si = getRepeat(mem);
+ repeatInc(mem, si);
+ }
+
+ private void repeatIncNG(int mem, int si) {
+ StackEntry e = stack[si];
+
+ e.increaseRepeatCount();
+
+ if (e.getRepeatCount() < regex.repeatRangeHi[mem]) {
+ if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) {
+ int pcode = e.getRepeatPCode();
+ pushRepeatInc(si);
+ pushAlt(pcode, s, sprev);
+ } else {
+ ip = e.getRepeatPCode();
+ pushRepeatInc(si);
+ }
+ } else if (e.getRepeatCount() == regex.repeatRangeHi[mem]) {
+ pushRepeatInc(si);
+ }
+ }
+
+ private void opRepeatIncNG() {
+ int mem = code[ip++];
+ int si = repeatStk[mem];
+ repeatIncNG(mem, si);
+ }
+
+ private void opRepeatIncNGSG() {
+ int mem = code[ip++];
+ int si = getRepeat(mem);
+ repeatIncNG(mem, si);
+ }
+
+ private void opPushPos() {
+ pushPos(s, sprev);
+ }
+
+ private void opPopPos() {
+ StackEntry e = stack[posEnd()];
+ s = e.getStatePStr();
+ sprev= e.getStatePStrPrev();
+ }
+
+ private void opPushPosNot() {
+ int addr = code[ip++];
+ pushPosNot(ip + addr, s, sprev);
+ }
+
+ private void opFailPos() {
+ popTilPosNot();
+ opFail();
+ }
+
+ private void opPushStopBT() {
+ pushStopBT();
+ }
+
+ private void opPopStopBT() {
+ stopBtEnd();
+ }
+
+ private void opLookBehind() {
+ int tlen = code[ip++];
+ s = enc.stepBack(bytes, str, s, end, tlen);
+ if (s == -1) {opFail(); return;}
+ sprev = enc.prevCharHead(bytes, str, s, end);
+ }
+
+ private void opLookBehindSb() {
+ int tlen = code[ip++];
+ s -= tlen;
+ if (s < str) {opFail(); return;}
+ sprev = s == str ? -1 : s - 1;
+ }
+
+ private void opPushLookBehindNot() {
+ int addr = code[ip++];
+ int tlen = code[ip++];
+ int q = enc.stepBack(bytes, str, s, end, tlen);
+ if (q == -1) {
+ /* too short case -> success. ex. /(?<!XXX)a/.match("a")
+ If you want to change to fail, replace following line. */
+ ip += addr;
+ // return FAIL;
+ } else {
+ pushLookBehindNot(ip + addr, s, sprev);
+ s = q;
+ sprev = enc.prevCharHead(bytes, str, s, end);
+ }
+ }
+
+ private void opFailLookBehindNot() {
+ popTilLookBehindNot();
+ opFail();
+ }
+
+ private void opCall() {
+ int addr = code[ip++];
+ pushCallFrame(ip);
+ ip = addr; // absolute address
+ }
+
+ private void opReturn() {
+ ip = sreturn();
+ pushReturn();
+ }
+
+ private void opFail() {
+ if (stack == null) {
+ ip = regex.codeLength - 1;
+ return;
+ }
+
+
+ StackEntry e = pop();
+ ip = e.getStatePCode();
+ s = e.getStatePStr();
+ sprev = e.getStatePStrPrev();
+
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.getStateCheck() != 0) {
+ e.type = STATE_CHECK_MARK;
+ stk++;
+ }
+ }
+ }
+
+ private int finish() {
+ return bestLen;
+ }
+}
\ No newline at end of file
diff --git a/src/org/joni/ByteCodePrinter.java b/src/org/joni/ByteCodePrinter.java
new file mode 100644
index 0000000..5efb353
--- /dev/null
+++ b/src/org/joni/ByteCodePrinter.java
@@ -0,0 +1,360 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.jcodings.Encoding;
+import org.joni.ast.CClassNode;
+import org.joni.constants.Arguments;
+import org.joni.constants.OPCode;
+import org.joni.constants.OPSize;
+import org.joni.exception.InternalException;
+
+class ByteCodePrinter {
+ int[]code;
+ int codeLength;
+
+ Object[]operands;
+ int operantCount;
+ Encoding enc;
+ WarnCallback warnings;
+
+ public ByteCodePrinter(Regex regex) {
+ code = regex.code;
+ codeLength = regex.codeLength;
+ operands = regex.operands;
+ operantCount = regex.operandLength;
+ enc = regex.enc;
+ warnings = regex.warnings;
+ }
+
+ public String byteCodeListToString() {
+ return compiledByteCodeListToString();
+ }
+
+ private void pString(StringBuilder sb, int len, int s) {
+ sb.append(":");
+ while (len-- > 0) sb.append(new String(new byte[]{(byte)code[s++]}));
+ }
+
+ private void pLenString(StringBuilder sb, int len, int mbLen, int s) {
+ int x = len * mbLen;
+ sb.append(":" + len + ":");
+ while (x-- > 0) sb.append(new String(new byte[]{(byte)code[s++]}));
+ }
+
+ public int compiledByteCodeToString(StringBuilder sb, int bp) {
+ int len, n, mem, addr, scn, cod;
+ BitSet bs;
+ CClassNode cc;
+
+ sb.append("[" + OPCode.OpCodeNames[code[bp]]);
+ int argType = OPCode.OpCodeArgTypes[code[bp]];
+ int ip = bp;
+ if (argType != Arguments.SPECIAL) {
+ bp++;
+ switch (argType) {
+ case Arguments.NON:
+ break;
+
+ case Arguments.RELADDR:
+ sb.append(":(" + code[bp] + ")");
+ bp += OPSize.RELADDR;
+ break;
+
+ case Arguments.ABSADDR:
+ sb.append(":(" + code[bp] + ")");
+ bp += OPSize.ABSADDR;
+ break;
+
+ case Arguments.LENGTH:
+ sb.append(":" + code[bp]);
+ bp += OPSize.LENGTH;
+ break;
+
+ case Arguments.MEMNUM:
+ sb.append(":" + code[bp]);
+ bp += OPSize.MEMNUM;
+ break;
+
+ case Arguments.OPTION:
+ sb.append(":" + code[bp]);
+ bp += OPSize.OPTION;
+ break;
+
+ case Arguments.STATE_CHECK:
+ sb.append(":" + code[bp]);
+ bp += OPSize.STATE_CHECK;
+ break;
+ }
+ } else {
+ switch (code[bp++]) {
+ case OPCode.EXACT1:
+ case OPCode.ANYCHAR_STAR_PEEK_NEXT:
+ case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
+ case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB:
+ case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB:
+ pString(sb, 1, bp++);
+ break;
+
+ case OPCode.EXACT2:
+ pString(sb, 2, bp);
+ bp += 2;
+ break;
+
+ case OPCode.EXACT3:
+ pString(sb, 3, bp);
+ bp += 3;
+ break;
+
+ case OPCode.EXACT4:
+ pString(sb, 4, bp);
+ bp += 4;
+ break;
+
+ case OPCode.EXACT5:
+ pString(sb, 5, bp);
+ bp += 5;
+ break;
+
+ case OPCode.EXACTN:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ pLenString(sb, len, 1, bp);
+ bp += len;
+ break;
+
+ case OPCode.EXACTMB2N1:
+ pString(sb, 2, bp);
+ bp += 2;
+ break;
+
+ case OPCode.EXACTMB2N2:
+ pString(sb, 4, bp);
+ bp += 4;
+ break;
+
+ case OPCode.EXACTMB2N3:
+ pString(sb, 6, bp);
+ bp += 6;
+ break;
+
+ case OPCode.EXACTMB2N:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ pLenString(sb, len, 2, bp);
+ bp += len * 2;
+ break;
+
+ case OPCode.EXACTMB3N:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ pLenString(sb, len, 3, bp);
+ bp += len * 3;
+ break;
+
+ case OPCode.EXACTMBN:
+ int mbLen = code[bp];
+ bp += OPSize.LENGTH;
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ sb.append(":" + mbLen + ":" + len + ":");
+ n = len * mbLen;
+ while (n-- > 0) sb.append(new String(new byte[]{(byte)code[bp++]}));
+ break;
+
+ case OPCode.EXACT1_IC:
+ case OPCode.EXACT1_IC_SB:
+ final int MAX_CHAR_LENGTH = 6;
+ byte[]bytes = new byte[MAX_CHAR_LENGTH];
+ for (int i = 0; bp + i < code.length && i < MAX_CHAR_LENGTH; i++) bytes[i] = (byte)code[bp + i];
+ len = enc.length(bytes, 0, MAX_CHAR_LENGTH);
+ pString(sb, len, bp);
+ bp += len;
+ break;
+
+ case OPCode.EXACTN_IC:
+ case OPCode.EXACTN_IC_SB:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ pLenString(sb, len, 1, bp);
+ bp += len;
+ break;
+
+ case OPCode.CCLASS:
+ case OPCode.CCLASS_SB:
+ bs = new BitSet();
+ System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
+ n = bs.numOn();
+ bp += BitSet.BITSET_SIZE;
+ sb.append(":" + n);
+ break;
+
+ case OPCode.CCLASS_NOT:
+ case OPCode.CCLASS_NOT_SB:
+ bs = new BitSet();
+ System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
+ n = bs.numOn();
+ bp += BitSet.BITSET_SIZE;
+ sb.append(":" + n);
+ break;
+
+ case OPCode.CCLASS_MB:
+ case OPCode.CCLASS_MB_NOT:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ cod = code[bp];
+ //bp += OPSize.CODE_POINT;
+ bp += len;
+ sb.append(":" + cod + ":" + len);
+ break;
+
+ case OPCode.CCLASS_MIX:
+ case OPCode.CCLASS_MIX_NOT:
+ bs = new BitSet();
+ System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
+ n = bs.numOn();
+ bp += BitSet.BITSET_SIZE;
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ cod = code[bp];
+ //bp += OPSize.CODE_POINT;
+ bp += len;
+ sb.append(":" + n + ":" + cod + ":" + len);
+ break;
+
+ case OPCode.CCLASS_NODE:
+ cc = (CClassNode)operands[code[bp]];
+ bp += OPSize.POINTER;
+ n = cc.bs.numOn();
+ sb.append(":" + cc + ":" + n);
+ break;
+
+ case OPCode.BACKREFN_IC:
+ mem = code[bp];
+ bp += OPSize.MEMNUM;
+ sb.append(":" + mem);
+ break;
+
+ case OPCode.BACKREF_MULTI_IC:
+ case OPCode.BACKREF_MULTI:
+ sb.append(" ");
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ for (int i=0; i<len; i++) {
+ mem = code[bp];
+ bp += OPSize.MEMNUM;
+ if (i > 0) sb.append(", ");
+ sb.append(mem);
+ }
+ break;
+
+ case OPCode.BACKREF_WITH_LEVEL: {
+ int option = code[bp];
+ bp += OPSize.OPTION;
+ sb.append(":" + option);
+ int level = code[bp];
+ bp += OPSize.LENGTH;
+ sb.append(":" + level);
+ sb.append(" ");
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ for (int i=0; i<len; i++) {
+ mem = code[bp];
+ bp += OPSize.MEMNUM;
+ if (i > 0) sb.append(", ");
+ sb.append(mem);
+ }
+ break;
+ }
+
+ case OPCode.REPEAT:
+ case OPCode.REPEAT_NG:
+ mem = code[bp];
+ bp += OPSize.MEMNUM;
+ addr = code[bp];
+ bp += OPSize.RELADDR;
+ sb.append(":" + mem + ":" + addr);
+ break;
+
+ case OPCode.PUSH_OR_JUMP_EXACT1:
+ case OPCode.PUSH_IF_PEEK_NEXT:
+ addr = code[bp];
+ bp += OPSize.RELADDR;
+ sb.append(":(" + addr + ")");
+ pString(sb, 1, bp);
+ bp++;
+ break;
+
+ case OPCode.LOOK_BEHIND:
+ case OPCode.LOOK_BEHIND_SB:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ sb.append(":" + len);
+ break;
+
+ case OPCode.PUSH_LOOK_BEHIND_NOT:
+ addr = code[bp];
+ bp += OPSize.RELADDR;
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ sb.append(":" + len + ":(" + addr + ")");
+ break;
+
+ case OPCode.STATE_CHECK_PUSH:
+ case OPCode.STATE_CHECK_PUSH_OR_JUMP:
+ scn = code[bp];
+ bp += OPSize.STATE_CHECK_NUM;
+ addr = code[bp];
+ bp += OPSize.RELADDR;
+ sb.append(":" + scn + ":(" + addr + ")");
+ break;
+
+ default:
+ throw new InternalException("undefined code: " + code[--bp]);
+ }
+ }
+
+ sb.append("]");
+
+ // @opcode_address(opcode_size)
+ if (Config.DEBUG_COMPILE_BYTE_CODE_INFO) sb.append("@" + ip + "(" + (bp - ip) + ")");
+
+ return bp;
+ }
+
+ private String compiledByteCodeListToString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("code length: " + codeLength + "\n");
+
+ int ncode = 0;
+ int bp = 0;
+ int end = codeLength;
+
+ while (bp < end) {
+ ncode++;
+
+ if (bp > 0) sb.append(ncode % 5 == 0 ? "\n" : " ");
+
+ bp = compiledByteCodeToString(sb, bp);
+ }
+ sb.append("\n");
+ return sb.toString();
+ }
+}
diff --git a/src/org/joni/CaptureTreeNode.java b/src/org/joni/CaptureTreeNode.java
new file mode 100644
index 0000000..dd6549c
--- /dev/null
+++ b/src/org/joni/CaptureTreeNode.java
@@ -0,0 +1,74 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+public class CaptureTreeNode {
+
+
+ int group;
+ int beg;
+ int end;
+ // int allocated;
+ int numChildren;
+ CaptureTreeNode[]children;
+
+ CaptureTreeNode() {
+ beg = Region.REGION_NOTPOS;
+ end = Region.REGION_NOTPOS;
+ group = -1;
+ }
+
+ static final int HISTORY_TREE_INIT_ALLOC_SIZE = 8;
+ void addChild(CaptureTreeNode child) {
+ if (children == null) {
+ children = new CaptureTreeNode[HISTORY_TREE_INIT_ALLOC_SIZE];
+ } else if (numChildren >= children.length) {
+ CaptureTreeNode[]tmp = new CaptureTreeNode[children.length << 1];
+ System.arraycopy(children, 0, tmp, 0, children.length);
+ children = tmp;
+ }
+
+ children[numChildren] = child;
+ numChildren++;
+ }
+
+ void clear() {
+ for (int i=0; i<numChildren; i++) {
+ children[i] = null; // ???
+ }
+ numChildren = 0;
+ beg = end = Region.REGION_NOTPOS;
+ group = -1;
+ }
+
+ CaptureTreeNode cloneTree() {
+ CaptureTreeNode clone = new CaptureTreeNode();
+ clone.beg = beg;
+ clone.end = end;
+
+ for (int i=0; i<numChildren; i++) {
+ CaptureTreeNode child = children[i].cloneTree();
+ clone.addChild(child);
+ }
+ return clone;
+ }
+
+
+}
diff --git a/src/org/joni/CodeRangeBuffer.java b/src/org/joni/CodeRangeBuffer.java
new file mode 100644
index 0000000..51b77e7
--- /dev/null
+++ b/src/org/joni/CodeRangeBuffer.java
@@ -0,0 +1,380 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.jcodings.Encoding;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.ValueException;
+
+public final class CodeRangeBuffer {
+ private static final int INIT_MULTI_BYTE_RANGE_SIZE = 5;
+ private static final int ALL_MULTI_BYTE_RANGE = 0x7fffffff;
+
+ int[]p;
+ int used;
+
+ public CodeRangeBuffer(int[]ranges) {
+ p = ranges;
+ used = ranges[0] + 1;
+ }
+
+ public CodeRangeBuffer() {
+ p = new int[INIT_MULTI_BYTE_RANGE_SIZE];
+ writeCodePoint(0, 0);
+ }
+
+ public int[]getCodeRange() {
+ return p;
+ }
+
+ private CodeRangeBuffer(CodeRangeBuffer orig) {
+ p = new int[orig.p.length];
+ System.arraycopy(orig.p, 0, p, 0, p.length);
+ used = orig.used;
+ }
+
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("CodeRange");
+ buf.append("\n used: " + used);
+ buf.append("\n code point: " + p[0]);
+ buf.append("\n ranges: ");
+
+ for (int i=0; i<p[0]; i++) {
+ buf.append("[" + rangeNumToString(p[i * 2 + 1]) + ".." + rangeNumToString(p[i * 2 + 2]) + "]");
+ if (i > 0 && i % 6 == 0) buf.append("\n ");
+ }
+
+ return buf.toString();
+ }
+
+ private static String rangeNumToString(int num){
+ return "0x" + Integer.toString(num, 16);
+ }
+
+ public void expand(int low) {
+ int length = p.length;
+ do { length <<= 1; } while (length < low);
+ int[]tmp = new int[length];
+ System.arraycopy(p, 0, tmp, 0, used);
+ p = tmp;
+ }
+
+ public void ensureSize(int size) {
+ int length = p.length;
+ while (length < size ) { length <<= 1; }
+ if (p.length != length) {
+ int[]tmp = new int[length];
+ System.arraycopy(p, 0, tmp, 0, used);
+ p = tmp;
+ }
+ }
+
+ private void moveRight(int from, int to, int n) {
+ if (to + n > p.length) expand(to + n);
+ System.arraycopy(p, from, p, to, n);
+ if (to + n > used) used = to + n;
+ }
+
+ protected void moveLeft(int from, int to, int n) {
+ System.arraycopy(p, from, p, to, n);
+ }
+
+ private void moveLeftAndReduce(int from, int to) {
+ System.arraycopy(p, from, p, to, used - from);
+ used -= from - to;
+ }
+
+ public void writeCodePoint(int pos, int b) {
+ int u = pos + 1;
+ if (p.length < u) expand(u);
+ p[pos] = b;
+ if (used < u) used = u;
+ }
+
+ public CodeRangeBuffer clone() {
+ return new CodeRangeBuffer(this);
+ }
+
+ // ugly part: these methods should be made OO
+ // add_code_range_to_buf
+ public static CodeRangeBuffer addCodeRangeToBuff(CodeRangeBuffer pbuf, int from, int to) {
+ if (from > to) {
+ int n = from;
+ from = to;
+ to = n;
+ }
+
+ if (pbuf == null) pbuf = new CodeRangeBuffer(); // move to CClassNode
+
+ int[]p = pbuf.p;
+ int n = p[0];
+
+ int low = 0;
+ int bound = n;
+
+ while (low < bound) {
+ int x = (low + bound) >>> 1;
+ if (from > p[x * 2 + 2]) {
+ low = x + 1;
+ } else {
+ bound = x;
+ }
+ }
+
+ int high = low;
+ bound = n;
+
+ while (high < bound) {
+ int x = (high + bound) >>> 1;
+ if (to >= p[x * 2 + 1] - 1) {
+ high = x + 1;
+ } else {
+ bound = x;
+ }
+ }
+
+ int incN = low + 1 - high;
+
+ if (n + incN > Config.MAX_MULTI_BYTE_RANGES_NUM) throw new ValueException(ErrorMessages.ERR_TOO_MANY_MULTI_BYTE_RANGES);
+
+ if (incN != 1) {
+ if (from > p[low * 2 + 1]) from = p[low * 2 + 1];
+ if (to < p[(high - 1) * 2 + 2]) to = p[(high - 1) * 2 + 2];
+ }
+
+ if (incN != 0 && high < n) {
+ int fromPos = 1 + high * 2;
+ int toPos = 1 + (low + 1) * 2;
+ int size = (n - high) * 2;
+
+ if (incN > 0) {
+ pbuf.moveRight(fromPos, toPos, size);
+ } else {
+ pbuf.moveLeftAndReduce(fromPos, toPos);
+ }
+ }
+
+ int pos = 1 + low * 2;
+ // pbuf.ensureSize(pos + 2);
+ pbuf.writeCodePoint(pos, from);
+ pbuf.writeCodePoint(pos + 1, to);
+ n += incN;
+ pbuf.writeCodePoint(0, n);
+
+ return pbuf;
+ }
+
+ // add_code_range, be aware of it returning null!
+ public static CodeRangeBuffer addCodeRange(CodeRangeBuffer pbuf, ScanEnvironment env, int from, int to) {
+ if (from >to) {
+ if (env.syntax.allowEmptyRangeInCC()) {
+ return pbuf;
+ } else {
+ throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
+ }
+ }
+ return addCodeRangeToBuff(pbuf, from, to);
+ }
+
+ // SET_ALL_MULTI_BYTE_RANGE
+ protected static CodeRangeBuffer setAllMultiByteRange(Encoding enc, CodeRangeBuffer pbuf) {
+ return addCodeRangeToBuff(pbuf, enc.mbcodeStartPosition(), ALL_MULTI_BYTE_RANGE);
+ }
+
+ // ADD_ALL_MULTI_BYTE_RANGE
+ public static CodeRangeBuffer addAllMultiByteRange(Encoding enc, CodeRangeBuffer pbuf) {
+ if (!enc.isSingleByte()) return setAllMultiByteRange(enc, pbuf);
+ return pbuf;
+ }
+
+ // not_code_range_buf
+ public static CodeRangeBuffer notCodeRangeBuff(Encoding enc, CodeRangeBuffer bbuf) {
+ CodeRangeBuffer pbuf = null;
+
+ if (bbuf == null) return setAllMultiByteRange(enc, pbuf);
+
+ int[]p = bbuf.p;
+ int n = p[0];
+
+ if (n <= 0) return setAllMultiByteRange(enc, pbuf);
+
+ int pre = enc.mbcodeStartPosition();
+
+ int from;
+ int to = 0;
+ for (int i=0; i<n; i++) {
+ from = p[i * 2 + 1];
+ to = p[i * 2 + 2];
+ if (pre <= from - 1) {
+ pbuf = addCodeRangeToBuff(pbuf, pre, from - 1);
+ }
+ if (to == ALL_MULTI_BYTE_RANGE) break;
+ pre = to + 1;
+ }
+
+ if (to < ALL_MULTI_BYTE_RANGE) pbuf = addCodeRangeToBuff(pbuf, to + 1, ALL_MULTI_BYTE_RANGE);
+ return pbuf;
+ }
+
+ // or_code_range_buf
+ public static CodeRangeBuffer orCodeRangeBuff(Encoding enc, CodeRangeBuffer bbuf1, boolean not1,
+ CodeRangeBuffer bbuf2, boolean not2) {
+ CodeRangeBuffer pbuf = null;
+
+ if (bbuf1 == null && bbuf2 == null) {
+ if (not1 || not2) {
+ return setAllMultiByteRange(enc, pbuf);
+ }
+ return null;
+ }
+
+ if (bbuf2 == null) {
+ CodeRangeBuffer tbuf;
+ boolean tnot;
+ // swap
+ tnot = not1; not1 = not2; not2 = tnot;
+ tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
+ }
+
+ if (bbuf1 == null) {
+ if (not1) {
+ return setAllMultiByteRange(enc, pbuf);
+ } else {
+ if (!not2) {
+ return bbuf2.clone();
+ } else {
+ return notCodeRangeBuff(enc, bbuf2);
+ }
+ }
+ }
+
+ if (not1) {
+ CodeRangeBuffer tbuf;
+ boolean tnot;
+ // swap
+ tnot = not1; not1 = not2; not2 = tnot;
+ tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
+ }
+
+ if (!not2 && !not1) { /* 1 OR 2 */
+ pbuf = bbuf2.clone();
+ } else if (!not1) { /* 1 OR (not 2) */
+ pbuf = notCodeRangeBuff(enc, bbuf2);
+ }
+
+ int[]p1 = bbuf1.p;
+ int n1 = p1[0];
+
+ for (int i=0; i<n1; i++) {
+ int from = p1[i * 2 + 1];
+ int to = p1[i * 2 + 2];
+ pbuf = addCodeRangeToBuff(pbuf, from, to);
+ }
+
+ return pbuf;
+ }
+
+ // and_code_range1
+ public static CodeRangeBuffer andCodeRange1(CodeRangeBuffer pbuf, int from1, int to1, int[]data, int n) {
+ for (int i=0; i<n; i++) {
+ int from2 = data[i * 2 + 1];
+ int to2 = data[i * 2 + 2];
+ if (from2 < from1) {
+ if (to2 < from1) {
+ continue;
+ } else {
+ from1 = to2 + 1;
+ }
+ } else if (from2 <= to1) {
+ if (to2 < to1) {
+ if (from1 <= from2 - 1) {
+ pbuf = addCodeRangeToBuff(pbuf, from1, from2 - 1);
+ }
+ from1 = to2 + 1;
+ } else {
+ to1 = from2 - 1;
+ }
+ } else {
+ from1 = from2;
+ }
+ if (from1 > to1) break;
+ }
+
+ if (from1 <= to1) {
+ pbuf = addCodeRangeToBuff(pbuf, from1, to1);
+ }
+
+ return pbuf;
+ }
+
+ // and_code_range_buf
+ public static CodeRangeBuffer andCodeRangeBuff(CodeRangeBuffer bbuf1, boolean not1,
+ CodeRangeBuffer bbuf2, boolean not2) {
+ CodeRangeBuffer pbuf = null;
+
+ if (bbuf1 == null) {
+ if (not1 && bbuf2 != null) return bbuf2.clone(); /* not1 != 0 -> not2 == 0 */
+ return null;
+ } else if (bbuf2 == null) {
+ if (not2) return bbuf1.clone();
+ return null;
+ }
+
+ if (not1) {
+ CodeRangeBuffer tbuf;
+ boolean tnot;
+ // swap
+ tnot = not1; not1 = not2; not2 = tnot;
+ tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
+ }
+
+ int[]p1 = bbuf1.p;
+ int n1 = p1[0];
+ int[]p2 = bbuf2.p;
+ int n2 = p2[0];
+
+ if (!not2 && !not1) { /* 1 AND 2 */
+ for (int i=0; i<n1; i++) {
+ int from1 = p1[i * 2 + 1];
+ int to1 = p1[i * 2 + 2];
+
+ for (int j=0; j<n2; j++) {
+ int from2 = p2[j * 2 + 1];
+ int to2 = p2[j * 2 + 2];
+
+ if (from2 > to1) break;
+ if (to2 < from1) continue;
+ int from = from1 > from2 ? from1 : from2;
+ int to = to1 < to2 ? to1 : to2;
+ pbuf = addCodeRangeToBuff(pbuf, from, to);
+ }
+ }
+ } else if (!not1) { /* 1 AND (not 2) */
+ for (int i=0; i<n1; i++) {
+ int from1 = p1[i * 2 + 1];
+ int to1 = p1[i * 2 + 2];
+ pbuf = andCodeRange1(pbuf, from1, to1, p2, n2);
+ }
+ }
+
+ return pbuf;
+ }
+}
diff --git a/src/org/joni/Compiler.java b/src/org/joni/Compiler.java
new file mode 100644
index 0000000..c9ea261
--- /dev/null
+++ b/src/org/joni/Compiler.java
@@ -0,0 +1,190 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.jcodings.Encoding;
+import org.joni.ast.AnchorNode;
+import org.joni.ast.BackRefNode;
+import org.joni.ast.CClassNode;
+import org.joni.ast.CTypeNode;
+import org.joni.ast.CallNode;
+import org.joni.ast.ConsAltNode;
+import org.joni.ast.EncloseNode;
+import org.joni.ast.Node;
+import org.joni.ast.QuantifierNode;
+import org.joni.ast.StringNode;
+import org.joni.constants.NodeType;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.InternalException;
+import org.joni.exception.SyntaxException;
+
+abstract class Compiler implements ErrorMessages {
+ protected final Analyser analyser;
+ protected final Encoding enc;
+ protected final Regex regex;
+
+ protected Compiler(Analyser analyser) {
+ this.analyser = analyser;
+ this.regex = analyser.regex;
+ this.enc = regex.enc;
+ }
+
+ final void compile() {
+ prepare();
+ compileTree(analyser.root);
+ finish();
+ }
+
+ protected abstract void prepare();
+ protected abstract void finish();
+
+ protected abstract void compileAltNode(ConsAltNode node);
+
+ private void compileStringRawNode(StringNode sn) {
+ if (sn.length() <= 0) return;
+ addCompileString(sn.bytes, sn.p, 1 /*sb*/, sn.length(), false);
+ }
+
+ private void compileStringNode(StringNode node) {
+ StringNode sn = node;
+ if (sn.length() <= 0) return;
+
+ boolean ambig = sn.isAmbig();
+
+ int p, prev;
+ p = prev = sn.p;
+ int end = sn.end;
+ byte[]bytes = sn.bytes;
+ int prevLen = enc.length(bytes, p, end);
+ p += prevLen;
+ int slen = 1;
+
+ while (p < end) {
+ int len = enc.length(bytes, p, end);
+ if (len == prevLen) {
+ slen++;
+ } else {
+ addCompileString(bytes, prev, prevLen, slen, ambig);
+ prev = p;
+ slen = 1;
+ prevLen = len;
+ }
+ p += len;
+ }
+ addCompileString(bytes, prev, prevLen, slen, ambig);
+ }
+
+ protected abstract void addCompileString(byte[]bytes, int p, int mbLength, int strLength, boolean ignoreCase);
+
+ protected abstract void compileCClassNode(CClassNode node);
+ protected abstract void compileCTypeNode(CTypeNode node);
+ protected abstract void compileAnyCharNode();
+ protected abstract void compileCallNode(CallNode node);
+ protected abstract void compileBackrefNode(BackRefNode node);
+ protected abstract void compileCECQuantifierNode(QuantifierNode node);
+ protected abstract void compileNonCECQuantifierNode(QuantifierNode node);
+ protected abstract void compileOptionNode(EncloseNode node);
+ protected abstract void compileEncloseNode(EncloseNode node);
+ protected abstract void compileAnchorNode(AnchorNode node);
+
+ protected final void compileTree(Node node) {
+ switch (node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode lin = (ConsAltNode)node;
+ do {
+ compileTree(lin.car);
+ } while ((lin = lin.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ compileAltNode((ConsAltNode)node);
+ break;
+
+ case NodeType.STR:
+ StringNode sn = (StringNode)node;
+ if (sn.isRaw()) {
+ compileStringRawNode(sn);
+ } else {
+ compileStringNode(sn);
+ }
+ break;
+
+ case NodeType.CCLASS:
+ compileCClassNode((CClassNode)node);
+ break;
+
+ case NodeType.CTYPE:
+ compileCTypeNode((CTypeNode)node);
+ break;
+
+ case NodeType.CANY:
+ compileAnyCharNode();
+ break;
+
+ case NodeType.BREF:
+ compileBackrefNode((BackRefNode)node);
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ compileCallNode((CallNode)node);
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.QTFR:
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ compileCECQuantifierNode((QuantifierNode)node);
+ } else {
+ compileNonCECQuantifierNode((QuantifierNode)node);
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode enode = (EncloseNode)node;
+ if (enode.isOption()) {
+ compileOptionNode(enode);
+ } else {
+ compileEncloseNode(enode);
+ }
+ break;
+
+ case NodeType.ANCHOR:
+ compileAnchorNode((AnchorNode)node);
+ break;
+
+ default:
+ // undefined node type
+ newInternalException(ERR_PARSER_BUG);
+ } // switch
+ }
+
+ protected final void compileTreeNTimes(Node node, int n) {
+ for (int i=0; i<n; i++) compileTree(node);
+ }
+
+ protected void newSyntaxException(String message) {
+ throw new SyntaxException(message);
+ }
+
+ protected void newInternalException(String message) {
+ throw new InternalException(message);
+ }
+}
diff --git a/src/org/joni/Config.java b/src/org/joni/Config.java
new file mode 100644
index 0000000..07762f0
--- /dev/null
+++ b/src/org/joni/Config.java
@@ -0,0 +1,87 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import java.io.PrintStream;
+
+public interface Config extends org.jcodings.Config {
+ final int CHAR_TABLE_SIZE = 256;
+
+ final boolean USE_NAMED_GROUP = true;
+ final boolean USE_SUBEXP_CALL = true;
+ final boolean USE_BACKREF_WITH_LEVEL = true; /* \k<name+n>, \k<name-n> */
+
+ final boolean USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT = true; /* /(?:()|())*\2/ */
+ final boolean USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE = true; /* /\n$/ =~ "\n" */
+ final boolean USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR = false;
+
+ final boolean CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS = true;
+
+ final boolean USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE = false;
+ final boolean USE_CAPTURE_HISTORY = false;
+ final boolean USE_VARIABLE_META_CHARS = true;
+ final boolean USE_WORD_BEGIN_END = true; /* "\<": word-begin, "\>": word-end */
+ final boolean USE_POSIX_API_REGION_OPTION = true; /* needed for POSIX API support */
+ final boolean USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE = true;
+ final boolean USE_COMBINATION_EXPLOSION_CHECK = false;
+
+ final int NREGION = 10;
+ final int MAX_BACKREF_NUM = 1000;
+ final int MAX_REPEAT_NUM = 100000;
+ final int MAX_MULTI_BYTE_RANGES_NUM = 10000;
+
+
+ final boolean USE_WARN = true;
+
+ // internal config
+ final boolean USE_PARSE_TREE_NODE_RECYCLE = true;
+ final boolean USE_OP_PUSH_OR_JUMP_EXACT = true;
+ final boolean USE_SHARED_CCLASS_TABLE = false;
+ final boolean USE_QTFR_PEEK_NEXT = true;
+
+ final int INIT_MATCH_STACK_SIZE = 64;
+ final int DEFAULT_MATCH_STACK_LIMIT_SIZE = 0; /* unlimited */
+ final int NUMBER_OF_POOLED_STACKS = 4;
+
+
+
+ final boolean DONT_OPTIMIZE = false;
+
+
+ final int MAX_CAPTURE_HISTORY_GROUP = 31;
+
+
+ final int CHECK_STRING_THRESHOLD_LEN = 7;
+ final int CHECK_BUFF_MAX_SIZE = 0x4000;
+
+
+ final PrintStream log = System.out;
+ final PrintStream err = System.err;
+
+ final boolean DEBUG_ALL = false;
+ final boolean DEBUG = DEBUG_ALL;
+ final boolean DEBUG_PARSE_TREE = DEBUG_ALL;
+ final boolean DEBUG_COMPILE = DEBUG_ALL;
+ final boolean DEBUG_COMPILE_BYTE_CODE_INFO = DEBUG_ALL;
+ final boolean DEBUG_SEARCH = DEBUG_ALL;
+ final boolean DEBUG_MATCH = DEBUG_ALL;
+ final boolean DEBUG_ASM = true;
+ final boolean DEBUG_ASM_EXEC = true;
+}
diff --git a/src/org/joni/Lexer.java b/src/org/joni/Lexer.java
new file mode 100644
index 0000000..172132f
--- /dev/null
+++ b/src/org/joni/Lexer.java
@@ -0,0 +1,1385 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.Option.isSingleline;
+import static org.joni.ast.QuantifierNode.isRepeatInfinite;
+
+import org.jcodings.constants.CharacterType;
+import org.jcodings.exception.CharacterPropertyException;
+import org.joni.ast.QuantifierNode;
+import org.joni.constants.AnchorType;
+import org.joni.constants.MetaChar;
+import org.joni.constants.TokenType;
+import org.joni.exception.ErrorMessages;
+
+class Lexer extends ScannerSupport {
+ protected final ScanEnvironment env;
+ protected final Syntax syntax; // fast access to syntax
+ protected final Token token = new Token(); // current token
+
+ protected Lexer(ScanEnvironment env, byte[]bytes, int p, int end) {
+ super(env.enc, bytes, p, end);
+ this.env = env;
+ this.syntax = env.syntax;
+ }
+
+ /**
+ * @return 0: normal {n,m}, 2: fixed {n}
+ * !introduce returnCode here
+ */
+ private int fetchRangeQuantifier() {
+ mark();
+ boolean synAllow = syntax.allowInvalidInterval();
+
+ if (!left()) {
+ if (synAllow) {
+ return 1; /* "....{" : OK! */
+ } else {
+ newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
+ }
+ }
+
+ if (!synAllow) {
+ c = peek();
+ if (c == ')' || c == '(' || c == '|') {
+ newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
+ }
+ }
+
+ int low = scanUnsignedNumber();
+ if (low < 0) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
+ if (low > Config.MAX_REPEAT_NUM) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
+
+ boolean nonLow = false;
+ if (p == _p) { /* can't read low */
+ if (syntax.allowIntervalLowAbbrev()) {
+ low = 0;
+ nonLow = true;
+ } else {
+ return invalidRangeQuantifier(synAllow);
+ }
+ }
+
+ if (!left()) return invalidRangeQuantifier(synAllow);
+
+ fetch();
+ int up;
+ int ret = 0;
+ if (c == ',') {
+ int prev = p; // ??? last
+ up = scanUnsignedNumber();
+ if (up < 0) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
+ if (up > Config.MAX_REPEAT_NUM) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
+
+ if (p == prev) {
+ if (nonLow) return invalidRangeQuantifier(synAllow);
+ up = QuantifierNode.REPEAT_INFINITE; /* {n,} : {n,infinite} */
+ }
+ } else {
+ if (nonLow) return invalidRangeQuantifier(synAllow);
+ unfetch();
+ up = low; /* {n} : exact n times */
+ ret = 2; /* fixed */
+ }
+
+ if (!left()) return invalidRangeQuantifier(synAllow);
+ fetch();
+
+ if (syntax.opEscBraceInterval()) {
+ if (c != syntax.metaCharTable.esc) return invalidRangeQuantifier(synAllow);
+ fetch();
+ }
+
+ if (c != '}') return invalidRangeQuantifier(synAllow);
+
+ if (!isRepeatInfinite(up) && low > up) {
+ newValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE);
+ }
+
+ token.type = TokenType.INTERVAL;
+ token.setRepeatLower(low);
+ token.setRepeatUpper(up);
+
+ return ret; /* 0: normal {n,m}, 2: fixed {n} */
+ }
+
+ private int invalidRangeQuantifier(boolean synAllow) {
+ if (synAllow) {
+ restore();
+ return 1;
+ } else {
+ newSyntaxException(ERR_INVALID_REPEAT_RANGE_PATTERN);
+ return 0; // not reached
+ }
+ }
+
+ /* \M-, \C-, \c, or \... */
+ private int fetchEscapedValue() {
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE);
+ fetch();
+
+ switch(c) {
+
+ case 'M':
+ if (syntax.op2EscCapitalMBarMeta()) {
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_META);
+ fetch();
+ if (c != '-') newSyntaxException(ERR_META_CODE_SYNTAX);
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_META);
+ fetch();
+ if (c == syntax.metaCharTable.esc) {
+ c = fetchEscapedValue();
+ }
+ c = ((c & 0xff) | 0x80);
+ } else {
+ fetchEscapedValueBackSlash();
+ }
+ break;
+
+ case 'C':
+ if (syntax.op2EscCapitalCBarControl()) {
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_CONTROL);
+ fetch();
+ if (c != '-') newSyntaxException(ERR_CONTROL_CODE_SYNTAX);
+ fetchEscapedValueControl();
+ } else {
+ fetchEscapedValueBackSlash();
+ }
+ break;
+
+ case 'c':
+ if (syntax.opEscCControl()) {
+ fetchEscapedValueControl();
+ }
+ /* fall through */
+
+ default:
+ fetchEscapedValueBackSlash();
+ } // switch
+
+ return c; // ???
+ }
+
+ private void fetchEscapedValueBackSlash() {
+ c = env.convertBackslashValue(c);
+ }
+
+ private void fetchEscapedValueControl() {
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_CONTROL);
+ fetch();
+ if (c == '?') {
+ c = 0177;
+ } else {
+ if (c == syntax.metaCharTable.esc) {
+ c = fetchEscapedValue();
+ }
+ c &= 0x9f;
+ }
+ }
+
+ private int nameEndCodePoint(int start) {
+ switch(start) {
+ case '<':
+ return '>';
+ case '\'':
+ return '\'';
+ default:
+ return 0;
+ }
+ }
+
+ // USE_NAMED_GROUP && USE_BACKREF_AT_LEVEL
+ /*
+ \k<name+n>, \k<name-n>
+ \k<num+n>, \k<num-n>
+ \k<-num+n>, \k<-num-n>
+ */
+
+ // value implicit (rnameEnd)
+ private boolean fetchNameWithLevel(int startCode, int[]rbackNum, int[]rlevel) {
+ int src = p;
+ boolean existLevel = false;
+ int isNum = 0;
+ int sign = 1;
+
+ int endCode = nameEndCodePoint(startCode);
+ int pnumHead = p;
+ int nameEnd = stop;
+
+ String err = null;
+ if (!left()) {
+ newValueException(ERR_EMPTY_GROUP_NAME);
+ } else {
+ fetch();
+ if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME);
+ if (enc.isDigit(c)) {
+ isNum = 1;
+ } else if (c == '-') {
+ isNum = 2;
+ sign = -1;
+ pnumHead = p;
+ } else if (!enc.isWord(c)) {
+ err = ERR_INVALID_GROUP_NAME;
+ }
+ }
+
+ while (left()) {
+ nameEnd = p;
+ fetch();
+ if (c == endCode || c == ')' || c == '+' || c == '-') {
+ if (isNum == 2) err = ERR_INVALID_GROUP_NAME;
+ break;
+ }
+
+ if (isNum != 0) {
+ if (enc.isDigit(c)) {
+ isNum = 1;
+ } else {
+ err = ERR_INVALID_GROUP_NAME;
+ // isNum = 0;
+ }
+ } else if (!enc.isWord(c)) {
+ err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ boolean isEndCode = false;
+ if (err == null && c != endCode) {
+ if (c == '+' || c == '-') {
+ int flag = c == '-' ? -1 : 1;
+
+ fetch();
+ if (!enc.isDigit(c)) newValueException(ERR_INVALID_GROUP_NAME, src, stop);
+ unfetch();
+ int level = scanUnsignedNumber();
+ if (level < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ rlevel[0] = level * flag;
+ existLevel = true;
+
+ fetch();
+ isEndCode = c == endCode;
+ }
+
+ if (!isEndCode) {
+ err = ERR_INVALID_GROUP_NAME;
+ nameEnd = stop;
+ }
+ }
+
+ if (err == null) {
+ if (isNum != 0) {
+ mark();
+ p = pnumHead;
+ int backNum = scanUnsignedNumber();
+ restore();
+ if (backNum < 0) {
+ newValueException(ERR_TOO_BIG_NUMBER);
+ } else if (backNum == 0) {
+ newValueException(ERR_INVALID_GROUP_NAME, src, stop);
+ }
+ rbackNum[0] = backNum * sign;
+ }
+ value = nameEnd;
+ return existLevel;
+ } else {
+ newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd);
+ return false; // not reached
+ }
+ }
+
+ // USE_NAMED_GROUP
+ // ref: 0 -> define name (don't allow number name)
+ // 1 -> reference name (allow number name)
+ private int fetchNameForNamedGroup(int startCode, boolean ref) {
+ int src = p;
+ value = 0;
+
+ int isNum = 0;
+ int sign = 1;
+
+ int endCode = nameEndCodePoint(startCode);
+ int pnumHead = p;
+ int nameEnd = stop;
+
+ String err = null;
+ if (!left()) {
+ newValueException(ERR_EMPTY_GROUP_NAME);
+ } else {
+ fetch();
+ if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME);
+ if (enc.isDigit(c)) {
+ if (ref) {
+ isNum = 1;
+ } else {
+ err = ERR_INVALID_GROUP_NAME;
+ // isNum = 0;
+ }
+ } else if (c == '-') {
+ if (ref) {
+ isNum = 2;
+ sign = -1;
+ pnumHead = p;
+ } else {
+ err = ERR_INVALID_GROUP_NAME;
+ // isNum = 0;
+ }
+ } else if (!enc.isWord(c)) {
+ err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ if (err == null) {
+ while (left()) {
+ nameEnd = p;
+ fetch();
+ if (c == endCode || c == ')') {
+ if (isNum == 2) err = ERR_INVALID_GROUP_NAME;
+ break;
+ }
+
+ if (isNum != 0) {
+ if (enc.isDigit(c)) {
+ isNum = 1;
+ } else {
+ if (!enc.isWord(c)) {
+ err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ } else {
+ err = ERR_INVALID_GROUP_NAME;
+ }
+ // isNum = 0;
+ }
+ } else {
+ if (!enc.isWord(c)) {
+ err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+ }
+
+ if (c != endCode) {
+ err = ERR_INVALID_GROUP_NAME;
+ nameEnd = stop;
+ }
+
+ int backNum = 0;
+ if (isNum != 0) {
+ mark();
+ p = pnumHead;
+ backNum = scanUnsignedNumber();
+ restore();
+ if (backNum < 0) {
+ newValueException(ERR_TOO_BIG_NUMBER);
+ } else if (backNum == 0) {
+ newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd);
+ }
+ backNum *= sign;
+ }
+ value = nameEnd;
+ return backNum;
+ } else {
+ while (left()) {
+ nameEnd = p;
+ fetch();
+ if (c == endCode || c == ')') break;
+ }
+ if (!left()) nameEnd = stop;
+ newValueException(err, src, nameEnd);
+ return 0; // not reached
+ }
+ }
+
+ // #else USE_NAMED_GROUP
+ // make it return nameEnd!
+ private final int fetchNameForNoNamedGroup(int startCode, boolean ref) {
+ int src = p;
+ value = 0;
+
+ int isNum = 0;
+ int sign = 1;
+
+ int endCode = nameEndCodePoint(startCode);
+ int pnumHead = p;
+ int nameEnd = stop;
+
+ String err = null;
+ if (!left()) {
+ newValueException(ERR_EMPTY_GROUP_NAME);
+ } else {
+ fetch();
+ if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME);
+
+ if (enc.isDigit(c)) {
+ isNum = 1;
+ } else if (c == '-') {
+ isNum = 2;
+ sign = -1;
+ pnumHead = p;
+ } else {
+ err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ while(left()) {
+ nameEnd = p;
+
+ fetch();
+ if (c == endCode || c == ')') break;
+ if (!enc.isDigit(c)) err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+
+ if (err == null && c != endCode) {
+ err = ERR_INVALID_GROUP_NAME;
+ nameEnd = stop;
+ }
+
+ if (err == null) {
+ mark();
+ p = pnumHead;
+ int backNum = scanUnsignedNumber();
+ restore();
+ if (backNum < 0) {
+ newValueException(ERR_TOO_BIG_NUMBER);
+ } else if (backNum == 0){
+ newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd);
+ }
+ backNum *= sign;
+
+ value = nameEnd;
+ return backNum;
+ } else {
+ newValueException(err, src, nameEnd);
+ return 0; // not reached
+ }
+ }
+
+ protected final int fetchName(int startCode, boolean ref) {
+ if (Config.USE_NAMED_GROUP) {
+ return fetchNameForNamedGroup(startCode, ref);
+ } else {
+ return fetchNameForNoNamedGroup(startCode, ref);
+ }
+ }
+
+ private boolean strExistCheckWithEsc(int[]s, int n, int bad) {
+ int p = this.p;
+ int to = this.stop;
+
+ boolean inEsc = false;
+ int i=0;
+
+ while(p < to) {
+ if (inEsc) {
+ inEsc = false;
+ p += enc.length(bytes, p, to);
+ } else {
+ int x = enc.mbcToCode(bytes, p, to);
+ int q = p + enc.length(bytes, p, to);
+ if (x == s[0]) {
+ for (i=1; i<n && q < to; i++) {
+ x = enc.mbcToCode(bytes, q, to);
+ if (x != s[i]) break;
+ q += enc.length(bytes, q, to);
+ }
+ if (i >= n) return true;
+ p += enc.length(bytes, p, to);
+ } else {
+ x = enc.mbcToCode(bytes, p, to);
+ if (x == bad) return false;
+ else if (x == syntax.metaCharTable.esc) inEsc = true;
+ p = q;
+ }
+ }
+ }
+ return false;
+ }
+
+ private static final int send[] = new int[]{':', ']'};
+
+ protected final TokenType fetchTokenInCC() {
+ int last;
+ int c2;
+
+ if (!left()) {
+ token.type = TokenType.EOT;
+ return token.type;
+ }
+
+ fetch();
+ token.type = TokenType.CHAR;
+ token.base = 0;
+ token.setC(c);
+ token.escaped = false;
+
+ if (c == ']') {
+ token.type = TokenType.CC_CLOSE;
+ } else if (c == '-') {
+ token.type = TokenType.CC_RANGE;
+ } else if (c == syntax.metaCharTable.esc) {
+ if (!syntax.backSlashEscapeInCC()) return token.type;
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE);
+ fetch();
+ token.escaped = true;
+ token.setC(c);
+
+ switch (c) {
+
+ case 'w':
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.WORD);
+ token.setPropNot(false);
+ break;
+
+ case 'W':
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.WORD);
+ token.setPropNot(true);
+ break;
+
+ case 'd':
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.DIGIT);
+ token.setPropNot(false);
+ break;
+
+ case 'D':
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.DIGIT);
+ token.setPropNot(true);
+ break;
+
+ case 's':
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.SPACE);
+ token.setPropNot(false);
+ break;
+
+ case 'S':
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.SPACE);
+ token.setPropNot(true);
+ break;
+
+ case 'h':
+ if (!syntax.op2EscHXDigit()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.XDIGIT);
+ token.setPropNot(false);
+ break;
+
+ case 'H':
+ if (!syntax.op2EscHXDigit()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.XDIGIT);
+ token.setPropNot(true);
+ break;
+
+ case 'p':
+ case 'P':
+ c2 = peek(); // !!! migrate to peekIs
+ if (c2 == '{' && syntax.op2EscPBraceCharProperty()) {
+ inc();
+ token.type = TokenType.CHAR_PROPERTY;
+ token.setPropNot(c == 'P');
+
+ if (syntax.op2EscPBraceCircumflexNot()) {
+ c2 = fetchTo();
+ if (c2 == '^') {
+ token.setPropNot(!token.getPropNot());
+ } else {
+ unfetch();
+ }
+ }
+ }
+ break;
+
+ case 'x':
+ if (!left()) break;
+ last = p;
+
+ if (peekIs('{') && syntax.opEscXBraceHex8()) {
+ inc();
+ int num = scanUnsignedHexadecimalNumber(8);
+ if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
+ if (left()) {
+ c2 = peek();
+ if (enc.isXDigit(c2)) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
+ }
+
+ if (p > last + enc.length(bytes, last, stop) && left() && peekIs('}')) {
+ inc();
+ token.type = TokenType.CODE_POINT;
+ token.base = 16;
+ token.setCode(num);
+ } else {
+ /* can't read nothing or invalid format */
+ p = last;
+ }
+ } else if (syntax.opEscXHex2()) {
+ int num = scanUnsignedHexadecimalNumber(2);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.RAW_BYTE;
+ token.base = 16;
+ token.setC(num);
+ }
+ break;
+
+ case 'u':
+ if (!left()) break;
+ last = p;
+
+ if (syntax.op2EscUHex4()) {
+ int num = scanUnsignedHexadecimalNumber(4);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.CODE_POINT;
+ token.base = 16;
+ token.setCode(num);
+ }
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ if (syntax.opEscOctal3()) {
+ unfetch();
+ last = p;
+ int num = scanUnsignedOctalNumber(3);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.RAW_BYTE;
+ token.base = 8;
+ token.setC(num);
+ }
+ break;
+
+ default:
+ unfetch();
+ int num = fetchEscapedValue();
+ if (token.getC() != num) {
+ token.setCode(num);
+ token.type = TokenType.CODE_POINT;
+ }
+ break;
+ } // switch
+
+ } else if (c == '[') {
+ if (syntax.opPosixBracket() && peekIs(':')) {
+ token.backP = p; /* point at '[' is readed */
+ inc();
+ if (strExistCheckWithEsc(send, send.length, ']')) {
+ token.type = TokenType.POSIX_BRACKET_OPEN;
+ } else {
+ unfetch();
+ // remove duplication, goto cc_in_cc;
+ if (syntax.op2CClassSetOp()) {
+ token.type = TokenType.CC_CC_OPEN;
+ } else {
+ env.ccEscWarn("[");
+ }
+ }
+ } else { // cc_in_cc:
+ if (syntax.op2CClassSetOp()) {
+ token.type = TokenType.CC_CC_OPEN;
+ } else {
+ env.ccEscWarn("[");
+ }
+ }
+ } else if (c == '&') {
+ if (syntax.op2CClassSetOp() && left() && peekIs('&')) {
+ inc();
+ token.type = TokenType.CC_AND;
+ }
+ }
+ return token.type;
+ }
+
+ protected final int backrefRelToAbs(int relNo) {
+ return env.numMem + 1 + relNo;
+ }
+
+ protected final TokenType fetchToken() {
+ int last;
+
+ // mark(); // out
+
+ start:
+ while(true) {
+
+ if (!left()) {
+ token.type = TokenType.EOT;
+ return token.type;
+ }
+
+ token.type = TokenType.STRING;
+ token.base = 0;
+ token.backP = p;
+
+ fetch();
+
+ if (c == syntax.metaCharTable.esc && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn)
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE);
+
+ token.backP = p;
+ fetch();
+
+ token.setC(c);
+ token.escaped = true;
+ switch(c) {
+
+ case '*':
+ if (!syntax.opEscAsteriskZeroInf()) break;
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(0);
+ token.setRepeatUpper(QuantifierNode.REPEAT_INFINITE);
+ greedyCheck();
+ break;
+
+ case '+':
+ if (!syntax.opEscPlusOneInf()) break;
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(1);
+ token.setRepeatUpper(QuantifierNode.REPEAT_INFINITE);
+ greedyCheck();
+ break;
+
+ case '?':
+ if (!syntax.opEscQMarkZeroOne()) break;
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(0);
+ token.setRepeatUpper(1);
+ greedyCheck();
+ break;
+
+ case '{':
+ if (!syntax.opEscBraceInterval()) break;
+ switch (fetchRangeQuantifier()) {
+ case 0:
+ greedyCheck();
+ break;
+ case 2:
+ if (syntax.fixedIntervalIsGreedyOnly()) {
+ possessiveCheck();
+ } else {
+ greedyCheck();
+ }
+ break;
+ default: /* 1 : normal char */
+ } // inner switch
+ break;
+
+ case '|':
+ if (!syntax.opEscVBarAlt()) break;
+ token.type = TokenType.ALT;
+ break;
+
+ case '(':
+ if (!syntax.opEscLParenSubexp()) break;
+ token.type = TokenType.SUBEXP_OPEN;
+ break;
+
+ case ')':
+ if (!syntax.opEscLParenSubexp()) break;
+ token.type = TokenType.SUBEXP_CLOSE;
+ break;
+
+ case 'w':
+ if (!syntax.opEscWWord()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.WORD);
+ token.setPropNot(false);
+ break;
+
+ case 'W':
+ if (!syntax.opEscWWord()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.WORD);
+ token.setPropNot(true);
+ break;
+
+ case 'b':
+ if (!syntax.opEscBWordBound()) break;
+ token.type = TokenType.ANCHOR;
+ token.setAnchor(AnchorType.WORD_BOUND);
+ break;
+
+ case 'B':
+ if (!syntax.opEscBWordBound()) break;
+ token.type = TokenType.ANCHOR;
+ token.setAnchor(AnchorType.NOT_WORD_BOUND);
+ break;
+
+ case '<':
+ if (Config.USE_WORD_BEGIN_END) {
+ if (!syntax.opEscLtGtWordBeginEnd()) break;
+ token.type = TokenType.ANCHOR;
+ token.setAnchor(AnchorType.WORD_BEGIN);
+ break;
+ } // USE_WORD_BEGIN_END
+ break; // ?
+
+ case '>':
+ if (Config.USE_WORD_BEGIN_END) {
+ if (!syntax.opEscLtGtWordBeginEnd()) break;
+ token.type = TokenType.ANCHOR;
+ token.setAnchor(AnchorType.WORD_END);
+ break;
+ } // USE_WORD_BEGIN_END
+ break; // ?
+
+ case 's':
+ if (!syntax.opEscSWhiteSpace()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.SPACE);
+ token.setPropNot(false);
+ break;
+
+ case 'S':
+ if (!syntax.opEscSWhiteSpace()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.SPACE);
+ token.setPropNot(true);
+ break;
+
+ case 'd':
+ if (!syntax.opEscDDigit()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.DIGIT);
+ token.setPropNot(false);
+ break;
+
+ case 'D':
+ if (!syntax.opEscDDigit()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.DIGIT);
+ token.setPropNot(true);
+ break;
+
+ case 'h':
+ if (!syntax.op2EscHXDigit()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.XDIGIT);
+ token.setPropNot(false);
+ break;
+
+ case 'H':
+ if (!syntax.op2EscHXDigit()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.XDIGIT);
+ token.setPropNot(true);
+ break;
+
+ case 'A':
+ if (!syntax.opEscAZBufAnchor()) break;
+ // begin_buf label
+ token.type = TokenType.ANCHOR;
+ token.setSubtype(AnchorType.BEGIN_BUF);
+ break;
+
+ case 'Z':
+ if (!syntax.opEscAZBufAnchor()) break;
+ token.type = TokenType.ANCHOR;
+ token.setSubtype(AnchorType.SEMI_END_BUF);
+ break;
+
+ case 'z':
+ if (!syntax.opEscAZBufAnchor()) break;
+ // end_buf label
+ token.type = TokenType.ANCHOR;
+ token.setSubtype(AnchorType.END_BUF);
+ break;
+
+ case 'G':
+ if (!syntax.opEscCapitalGBeginAnchor()) break;
+ token.type = TokenType.ANCHOR;
+ token.setSubtype(AnchorType.BEGIN_POSITION);
+ break;
+
+ case '`':
+ if (!syntax.op2EscGnuBufAnchor()) break;
+ // goto begin_buf
+ token.type = TokenType.ANCHOR;
+ token.setSubtype(AnchorType.BEGIN_BUF);
+ break;
+
+ case '\'':
+ if (!syntax.op2EscGnuBufAnchor()) break;
+ // goto end_buf
+ token.type = TokenType.ANCHOR;
+ token.setSubtype(AnchorType.END_BUF);
+ break;
+
+ case 'x': // extract to helper for all 'x'
+ if (!left()) break;
+ last = p;
+ if (peekIs('{') && syntax.opEscXBraceHex8()) {
+ inc();
+ int num = scanUnsignedHexadecimalNumber(8);
+ if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
+ if (left()) {
+ if (enc.isXDigit(peek())) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
+ }
+
+ if (p > last + enc.length(bytes, last, stop) && left() && peekIs('}')) {
+ inc();
+ token.type = TokenType.CODE_POINT;
+ token.setCode(num);
+ } else {
+ /* can't read nothing or invalid format */
+ p = last;
+ }
+ } else if (syntax.opEscXHex2()) {
+ int num = scanUnsignedHexadecimalNumber(2);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.RAW_BYTE;
+ token.base = 16;
+ token.setC(num);
+ }
+ break;
+
+ case 'u': // extract to helper
+ if (!left()) break;
+ last = p;
+
+ if (syntax.op2EscUHex4()) {
+ int num = scanUnsignedHexadecimalNumber(4);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.CODE_POINT;
+ token.base = 16;
+ token.setCode(num);
+ }
+ break;
+
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ unfetch();
+ last = p;
+ int num = scanUnsignedNumber();
+ if (num < 0 || num > Config.MAX_BACKREF_NUM) {
+ // goto skip_backref
+ } else if (syntax.opDecimalBackref() && (num <= env.numMem || num <= 9)) { /* This spec. from GNU regex */
+ if (syntax.strictCheckBackref()) {
+ if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) newValueException(ERR_INVALID_BACKREF);
+ }
+ token.type = TokenType.BACKREF;
+ token.setBackrefNum(1);
+ token.setBackrefRef1(num);
+ token.setBackrefByName(false);
+ if (Config.USE_BACKREF_WITH_LEVEL) token.setBackrefExistLevel(false);
+ break;
+ }
+ // skip_backref:
+ if (c == '8' || c == '9') {
+ /* normal char */
+ p = last;
+ inc();
+ break;
+ }
+ p = last;
+ /* fall through */
+
+ case '0':
+ if (syntax.opEscOctal3()) {
+ last = p;
+ num = scanUnsignedOctalNumber(c == '0' ? 2 : 3);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.RAW_BYTE;
+ token.base = 8;
+ token.setC(num);
+ } else if (c != '0') {
+ inc();
+ }
+ break;
+
+ case 'k':
+ if (Config.USE_NAMED_GROUP) {
+ if (syntax.op2EscKNamedBackref()) {
+ fetch();
+ if (c =='<' || c == '\'') {
+ last = p;
+ int backNum;
+ if (Config.USE_BACKREF_WITH_LEVEL) {
+ int[]rbackNum = new int[1];
+ int[]rlevel = new int[1];
+ token.setBackrefExistLevel(fetchNameWithLevel(c, rbackNum, rlevel));
+ token.setBackrefLevel(rlevel[0]);
+ backNum = rbackNum[0];
+ } else {
+ backNum = fetchName(c, true);
+ } // USE_BACKREF_AT_LEVEL
+ int nameEnd = value; // set by fetchNameWithLevel/fetchName
+
+ if (backNum != 0) {
+ if (backNum < 0) {
+ backNum = backrefRelToAbs(backNum);
+ if (backNum <= 0) newValueException(ERR_INVALID_BACKREF);
+ }
+
+ if (syntax.strictCheckBackref() && (backNum > env.numMem || env.memNodes == null)) {
+ newValueException(ERR_INVALID_BACKREF);
+ }
+ token.type = TokenType.BACKREF;
+ token.setBackrefByName(false);
+ token.setBackrefNum(1);
+ token.setBackrefRef1(backNum);
+ } else {
+ NameEntry e = env.reg.nameToGroupNumbers(bytes, last, nameEnd);
+ if (e == null) newValueException(ERR_UNDEFINED_NAME_REFERENCE, last, nameEnd);
+
+ if (syntax.strictCheckBackref()) {
+ if (e.backNum == 1) {
+ if (e.backRef1 > env.numMem ||
+ env.memNodes == null ||
+ env.memNodes[e.backRef1] == null) newValueException(ERR_INVALID_BACKREF);
+ } else {
+ for (int i=0; i<e.backNum; i++) {
+ if (e.backRefs[i] > env.numMem ||
+ env.memNodes == null ||
+ env.memNodes[e.backRefs[i]] == null) newValueException(ERR_INVALID_BACKREF);
+ }
+ }
+ }
+
+ token.type = TokenType.BACKREF;
+ token.setBackrefByName(true);
+
+ if (e.backNum == 1) {
+ token.setBackrefNum(1);
+ token.setBackrefRef1(e.backRef1);
+ } else {
+ token.setBackrefNum(e.backNum);
+ token.setBackrefRefs(e.backRefs);
+ }
+ }
+ } else {
+ unfetch();
+ }
+ }
+
+ break;
+ } // USE_NAMED_GROUP
+ break;
+
+ case 'g':
+ if (Config.USE_SUBEXP_CALL) {
+ if (syntax.op2EscGSubexpCall()) {
+ fetch();
+ if (c == '<' || c == '\'') {
+ last = p;
+ int gNum = fetchName(c, true);
+ int nameEnd = value;
+ token.type = TokenType.CALL;
+ token.setCallNameP(last);
+ token.setCallNameEnd(nameEnd);
+ token.setCallGNum(gNum);
+ } else {
+ unfetch();
+ }
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case 'Q':
+ if (syntax.op2EscCapitalQQuote()) {
+ token.type = TokenType.QUOTE_OPEN;
+ }
+ break;
+
+ case 'p':
+ case 'P':
+ if (peekIs('{') && syntax.op2EscPBraceCharProperty()) {
+ inc();
+ token.type = TokenType.CHAR_PROPERTY;
+ token.setPropNot(c == 'P');
+
+ if (syntax.op2EscPBraceCircumflexNot()) {
+ fetch();
+ if (c == '^') {
+ token.setPropNot(!token.getPropNot());
+ } else {
+ unfetch();
+ }
+ }
+ }
+ break;
+
+ default:
+ unfetch();
+ num = fetchEscapedValue();
+
+ /* set_raw: */
+ if (token.getC() != num) {
+ token.type = TokenType.CODE_POINT;
+ token.setCode(num);
+ } else { /* string */
+ p = token.backP + enc.length(bytes, token.backP, stop);
+ }
+ break;
+
+ } // switch (c)
+
+ } else {
+ token.setC(c);
+ token.escaped = false;
+
+ // remove code duplication
+ if (Config.USE_VARIABLE_META_CHARS) {
+ if (c != MetaChar.INEFFECTIVE_META_CHAR && syntax.opVariableMetaCharacters()) {
+ if (c == syntax.metaCharTable.anyChar) { // goto any_char
+ token.type = TokenType.ANYCHAR;
+ break;
+ } else if (c == syntax.metaCharTable.anyTime) { // goto anytime
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(0);
+ token.setRepeatUpper(QuantifierNode.REPEAT_INFINITE);
+ greedyCheck();
+ break;
+ } else if (c == syntax.metaCharTable.zeroOrOneTime) { // goto zero_or_one_time
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(0);
+ token.setRepeatUpper(1);
+ greedyCheck();
+ break;
+ } else if (c == syntax.metaCharTable.oneOrMoreTime) { // goto one_or_more_time
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(1);
+ token.setRepeatUpper(QuantifierNode.REPEAT_INFINITE);
+ greedyCheck();
+ break;
+ } else if (c == syntax.metaCharTable.anyCharAnyTime) { // goto one_or_more_time
+ token.type = TokenType.ANYCHAR_ANYTIME;
+ break;
+ // goto out
+ }
+ }
+ } // USE_VARIABLE_META_CHARS
+
+ {
+ switch(c) {
+
+ case '.':
+ if (!syntax.opDotAnyChar()) break;
+ // any_char:
+ token.type = TokenType.ANYCHAR;
+ break;
+
+ case '*':
+ if (!syntax.opAsteriskZeroInf()) break;
+ // anytime:
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(0);
+ token.setRepeatUpper(QuantifierNode.REPEAT_INFINITE);
+ greedyCheck();
+ break;
+
+ case '+':
+ if (!syntax.opPlusOneInf()) break;
+ // one_or_more_time:
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(1);
+ token.setRepeatUpper(QuantifierNode.REPEAT_INFINITE);
+ greedyCheck();
+ break;
+
+ case '?':
+ if (!syntax.opQMarkZeroOne()) break;
+ // zero_or_one_time:
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(0);
+ token.setRepeatUpper(1);
+ greedyCheck();
+ break;
+
+ case '{':
+ if (!syntax.opBraceInterval()) break;
+ switch(fetchRangeQuantifier()) {
+ case 0:
+ greedyCheck();
+ break;
+ case 2:
+ if (syntax.fixedIntervalIsGreedyOnly()) {
+ possessiveCheck();
+ } else {
+ greedyCheck();
+ }
+ break;
+ default: /* 1 : normal char */
+ } // inner switch
+ break;
+
+ case '|':
+ if (!syntax.opVBarAlt()) break;
+ token.type = TokenType.ALT;
+ break;
+
+ case '(':
+ if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
+ inc();
+ if (peekIs('#')) {
+ fetch();
+ while (true) {
+ if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
+ fetch();
+ if (c == syntax.metaCharTable.esc) {
+ if (left()) fetch();
+ } else {
+ if (c == ')') break;
+ }
+ }
+ continue start; // goto start
+ }
+ unfetch();
+ }
+
+ if (!syntax.opLParenSubexp()) break;
+ token.type = TokenType.SUBEXP_OPEN;
+ break;
+
+ case ')':
+ if (!syntax.opLParenSubexp()) break;
+ token.type = TokenType.SUBEXP_CLOSE;
+ break;
+
+ case '^':
+ if (!syntax.opLineAnchor()) break;
+ token.type = TokenType.ANCHOR;
+ token.setSubtype(isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE);
+ break;
+
+ case '$':
+ if (!syntax.opLineAnchor()) break;
+ token.type = TokenType.ANCHOR;
+ token.setSubtype(isSingleline(env.option) ? AnchorType.SEMI_END_BUF : AnchorType.END_LINE);
+ break;
+
+ case '[':
+ if (!syntax.opBracketCC()) break;
+ token.type = TokenType.CC_CC_OPEN;
+ break;
+
+ case ']':
+ //if (*src > env->pattern) /* /].../ is allowed. */
+ //CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
+ break;
+
+ case '#':
+ if (Option.isExtend(env.option)) {
+ while (left()) {
+ fetch();
+ if (enc.isNewLine(c)) break;
+ }
+ continue start; // goto start
+
+ }
+ break;
+
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ case '\f':
+ if (Option.isExtend(env.option)) {
+ continue start; // goto start
+ }
+ break;
+
+ default: // string
+ break;
+
+ } // switch
+ }
+ }
+
+ break;
+ } // while
+ return token.type;
+ }
+
+ private void greedyCheck() {
+ if (left() && peekIs('?') && syntax.opQMarkNonGreedy()) {
+
+ fetch();
+
+ token.setRepeatGreedy(false);
+ token.setRepeatPossessive(false);
+ } else {
+ possessiveCheck();
+ }
+ }
+
+ private void possessiveCheck() {
+ if (left() && peekIs('+') &&
+ (syntax.op2PlusPossessiveRepeat() && token.type != TokenType.INTERVAL ||
+ syntax.op2PlusPossessiveInterval() && token.type == TokenType.INTERVAL)) {
+
+ fetch();
+
+ token.setRepeatGreedy(true);
+ token.setRepeatPossessive(true);
+ } else {
+ token.setRepeatGreedy(true);
+ token.setRepeatPossessive(false);
+ }
+ }
+
+ protected final int fetchCharPropertyToCType() {
+ mark();
+
+ while (left()) {
+ int last = p;
+ fetch();
+ if (c == '}') {
+ return enc.propertyNameToCType(bytes, _p, last);
+ } else if (c == '(' || c == ')' || c == '{' || c == '|') {
+ throw new CharacterPropertyException(ERR_INVALID_CHAR_PROPERTY_NAME, bytes, _p, last);
+ }
+ }
+ newInternalException(ERR_PARSER_BUG);
+ return 0; // not reached
+ }
+}
diff --git a/src/org/joni/Matcher.java b/src/org/joni/Matcher.java
new file mode 100644
index 0000000..1f0a1dd
--- /dev/null
+++ b/src/org/joni/Matcher.java
@@ -0,0 +1,574 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.joni;
+
+import static org.joni.Option.isFindLongest;
+
+import org.jcodings.Encoding;
+import org.jcodings.IntHolder;
+import org.joni.constants.AnchorType;
+
+public abstract class Matcher extends IntHolder {
+ protected final Regex regex;
+ protected final Encoding enc;
+
+ protected final byte[]bytes;
+ protected final int str;
+ protected final int end;
+
+ protected int msaStart;
+ protected int msaOptions;
+ protected final Region msaRegion;
+ protected int msaBestLen;
+ protected int msaBestS;
+
+ protected int msaBegin;
+ protected int msaEnd;
+
+ public Matcher(Regex regex, byte[]bytes) {
+ this(regex, bytes, 0, bytes.length);
+ }
+
+ public Matcher(Regex regex, byte[]bytes, int p, int end) {
+ this.regex = regex;
+ this.enc = regex.enc;
+
+ this.bytes = bytes;
+ this.str = p;
+ this.end = end;
+
+ this.msaRegion = regex.numMem == 0 ? null : new Region(regex.numMem + 1);
+ }
+
+ // main matching method
+ protected abstract int matchAt(int range, int sstart, int sprev);
+
+ protected abstract void stateCheckBuffInit(int strLength, int offset, int stateNum);
+ protected abstract void stateCheckBuffClear();
+
+ public final Region getRegion() {
+ return msaRegion;
+ }
+
+ public final Region getEagerRegion() {
+ return msaRegion != null ? msaRegion : new Region(msaBegin, msaEnd);
+ }
+
+ public final int getBegin() {
+ return msaBegin;
+ }
+
+ public final int getEnd() {
+ return msaEnd;
+ }
+
+ protected final void msaInit(int option, int start) {
+ msaOptions = option;
+ msaStart = start;
+ if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) msaBestLen = -1;
+ }
+
+ public final int match(int at, int range, int option) {
+ msaInit(option, at);
+
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ int offset = at = str;
+ stateCheckBuffInit(end - str, offset, regex.numCombExpCheck); // move it to construction?
+ } // USE_COMBINATION_EXPLOSION_CHECK
+
+ int prev = enc.prevCharHead(bytes, str, at, end);
+
+ if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
+ return matchAt(end /*range*/, at, prev);
+ } else {
+ return matchAt(range /*range*/, at, prev);
+ }
+ }
+
+ int low, high; // these are the return values
+ private boolean forwardSearchRange(byte[]bytes, int str, int end, int s, int range, IntHolder lowPrev) {
+ int pprev = -1;
+ int p = s;
+
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("forward_search_range: "+
+ "str: " + str +
+ ", end: " + end +
+ ", s: " + s +
+ ", range: " + range);
+ }
+
+ if (regex.dMin > 0) {
+ if (enc.isSingleByte()) {
+ p += regex.dMin;
+ } else {
+ int q = p + regex.dMin;
+ while (p < q) p += enc.length(bytes, p, end);
+ }
+ }
+
+ retry:while (true) {
+ p = regex.searchAlgorithm.search(regex, bytes, p, end, range);
+
+ if (p != -1 && p < range) {
+ if (p - regex.dMin < s) {
+ // retry_gate:
+ pprev = p;
+ p += enc.length(bytes, p, end);
+ continue retry;
+ }
+
+ if (regex.subAnchor != 0) {
+ switch (regex.subAnchor) {
+ case AnchorType.BEGIN_LINE:
+ if (p != str) {
+ int prev = enc.prevCharHead(bytes, (pprev != -1) ? pprev : str, p, end);
+ if (!enc.isNewLine(bytes, prev, end)) {
+ // goto retry_gate;
+ pprev = p;
+ p += enc.length(bytes, p, end);
+ continue retry;
+ }
+ }
+ break;
+
+ case AnchorType.END_LINE:
+ if (p == end) {
+ if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
+ int prev = enc.prevCharHead(bytes, (pprev != -1) ? pprev : str, p, end);
+ if (prev != -1 && enc.isNewLine(bytes, prev, end)) {
+ // goto retry_gate;
+ pprev = p;
+ p += enc.length(bytes, p, end);
+ continue retry;
+ }
+ }
+ } else if (!enc.isNewLine(bytes, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !enc.isMbcCrnl(bytes, p, end))) {
+ //if () break;
+ // goto retry_gate;
+ pprev = p;
+ p += enc.length(bytes, p, end);
+ continue retry;
+ }
+ break;
+ } // switch
+ }
+
+ if (regex.dMax == 0) {
+ low = p;
+ if (lowPrev != null) { // ??? // remove null checks
+ if (low > s) {
+ lowPrev.value = enc.prevCharHead(bytes, s, p, end);
+ } else {
+ lowPrev.value = enc.prevCharHead(bytes, (pprev != -1) ? pprev : str, p, end);
+ }
+ }
+ } else {
+ if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
+ low = p - regex.dMax;
+
+ if (low > s) {
+ low = enc.rightAdjustCharHeadWithPrev(bytes, s, low, end, lowPrev);
+ if (lowPrev != null && lowPrev.value == -1) {
+ lowPrev.value = enc.prevCharHead(bytes, (pprev != -1) ? pprev : s, low, end);
+ }
+ } else {
+ if (lowPrev != null) {
+ lowPrev.value = enc.prevCharHead(bytes, (pprev != -1) ? pprev : str, low, end);
+ }
+ }
+ }
+ }
+ /* no needs to adjust *high, *high is used as range check only */
+ high = p - regex.dMin;
+
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("forward_search_range success: "+
+ "low: " + (low - str) +
+ ", high: " + (high - str) +
+ ", dmin: " + regex.dMin +
+ ", dmax: " + regex.dMax);
+ }
+
+ return true; /* success */
+ }
+
+ return false; /* fail */
+ } //while
+ }
+
+ // low, high
+ private boolean backwardSearchRange(byte[]bytes, int str, int end, int s, int range, int adjrange) {
+ range += regex.dMin;
+ int p = s;
+
+ retry:while (true) {
+ p = regex.searchAlgorithm.searchBackward(regex, bytes, range, adjrange, end, p, s, range);
+
+ if (p != -1) {
+ if (regex.subAnchor != 0) {
+ switch (regex.subAnchor) {
+ case AnchorType.BEGIN_LINE:
+ if (p != str) {
+ int prev = enc.prevCharHead(bytes, str, p, end);
+ if (!enc.isNewLine(bytes, prev, end)) {
+ p = prev;
+ continue retry;
+ }
+ }
+ break;
+
+ case AnchorType.END_LINE:
+ if (p == end) {
+ if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
+ int prev = enc.prevCharHead(bytes, adjrange, p, end);
+ if (prev == -1) return false;
+ if (enc.isNewLine(bytes, prev, end)) {
+ p = prev;
+ continue retry;
+ }
+ }
+ } else if (!enc.isNewLine(bytes, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !enc.isMbcCrnl(bytes, p, end))) {
+ p = enc.prevCharHead(bytes, adjrange, p, end);
+ if (p == -1) return false;
+ continue retry;
+ }
+ break;
+ } // switch
+ }
+
+ /* no needs to adjust *high, *high is used as range check only */
+ if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
+ low = p - regex.dMax;
+ high = p - regex.dMin;
+ high = enc.rightAdjustCharHead(bytes, adjrange, high, end);
+ }
+
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("backward_search_range: "+
+ "low: " + (low - str) +
+ ", high: " + (high - str));
+ }
+
+ return true;
+ }
+
+ if (Config.DEBUG_SEARCH) Config.log.println("backward_search_range: fail.");
+ return false;
+ } // while
+ }
+
+ // MATCH_AND_RETURN_CHECK
+ private boolean matchCheck(int upperRange, int s, int prev) {
+ if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
+ if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
+ //range = upperRange;
+ if (matchAt(upperRange, s, prev) != -1) {
+ if (!isFindLongest(regex.options)) return true;
+ }
+ } else {
+ //range = upperRange;
+ if (matchAt(upperRange, s, prev) != -1) return true;
+ }
+ } else {
+ if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
+ if (matchAt(end, s, prev) != -1) {
+ //range = upperRange;
+ if (!isFindLongest(regex.options)) return true;
+ }
+ } else {
+ //range = upperRange;
+ if (matchAt(end, s, prev) != -1) return true;
+ }
+ }
+ return false;
+ }
+
+ public final int search(int start, int range, int option) {
+ int s, prev;
+ int origStart = start;
+ int origRange = range;
+
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("onig_search (entry point): "+
+ "str: " + str +
+ ", end: " + (end - str) +
+ ", start: " + (start - str) +
+ ", range " + (range - str));
+ }
+
+ if (start > end || start < str) return -1;
+
+ /* anchor optimize: resume search range */
+ if (regex.anchor != 0 && str < end) {
+ int minSemiEnd, maxSemiEnd;
+
+ if ((regex.anchor & AnchorType.BEGIN_POSITION) != 0) {
+ /* search start-position only */
+ // !begin_position:!
+ if (range > start) {
+ range = start + 1;
+ } else {
+ range = start;
+ }
+ } else if ((regex.anchor & AnchorType.BEGIN_BUF) != 0) {
+ /* search str-position only */
+ if (range > start) {
+ if (start != str) return -1; // mismatch_no_msa;
+ range = str + 1;
+ } else {
+ if (range <= str) {
+ start = str;
+ range = str;
+ } else {
+ return -1; // mismatch_no_msa;
+ }
+ }
+ } else if ((regex.anchor & AnchorType.END_BUF) != 0) {
+ minSemiEnd = maxSemiEnd = end;
+ // !end_buf:!
+ if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
+ } else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) {
+ int preEnd = enc.stepBack(bytes, str, end, end, 1);
+ maxSemiEnd = end;
+ if (enc.isNewLine(bytes, preEnd, end)) {
+ minSemiEnd = preEnd;
+ if (Config.USE_CRNL_AS_LINE_TERMINATOR) {
+ preEnd = enc.stepBack(bytes, str, preEnd, end, 1);
+ if (preEnd != -1 && enc.isMbcCrnl(bytes, preEnd, end)) {
+ minSemiEnd = preEnd;
+ }
+ }
+ if (minSemiEnd > str && start <= minSemiEnd) {
+ // !goto end_buf;!
+ if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
+ }
+ } else {
+ minSemiEnd = end;
+ // !goto end_buf;!
+ if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
+ }
+ } else if ((regex.anchor & AnchorType.ANYCHAR_STAR_ML) != 0) {
+ // goto !begin_position;!
+ if (range > start) {
+ range = start + 1;
+ } else {
+ range = start;
+ }
+ }
+
+ } else if (str == end) { /* empty string */
+ // empty address ?
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("onig_search: empty string.");
+ }
+
+ if (regex.thresholdLength == 0) {
+ s = start = str;
+ prev = -1;
+ msaInit(option, start);
+
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) stateCheckBuffClear();
+
+ if (matchCheck(end, s, prev)) return match(s);
+ return mismatch();
+ }
+ return -1; // goto mismatch_no_msa;
+ }
+
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("onig_search(apply anchor): " +
+ "end: " + (end - str) +
+ ", start " + (start - str) +
+ ", range " + (range - str));
+ }
+
+ msaInit(option, origStart);
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ int offset = Math.min(start, range) - str;
+ stateCheckBuffInit(end - str, offset, regex.numCombExpCheck);
+ }
+
+ s = start;
+ if (range > start) { /* forward search */
+ if (s > str) {
+ prev = enc.prevCharHead(bytes, str, s, end);
+ } else {
+ prev = 0; // -1
+ }
+
+ if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
+ int schRange = range;
+ if (regex.dMax != 0) {
+ if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
+ schRange = end;
+ } else {
+ schRange += regex.dMax;
+ if (schRange > end) schRange = end;
+ }
+ }
+ if ((end - start) < regex.thresholdLength) return mismatch();
+
+ if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
+ do {
+ if (!forwardSearchRange(bytes, str, end, s, schRange, this)) return mismatch(); // low, high, lowPrev
+ if (s < low) {
+ s = low;
+ prev = value;
+ }
+ while (s <= high) {
+ if (matchCheck(origRange, s, prev)) return match(s); // ???
+ prev = s;
+ s += enc.length(bytes, s, end);
+ }
+ } while (s < range);
+ return mismatch();
+
+ } else { /* check only. */
+ if (!forwardSearchRange(bytes, str, end, s, schRange, null)) return mismatch();
+
+ if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) {
+ do {
+ if (matchCheck(origRange, s, prev)) return match(s);
+ prev = s;
+ s += enc.length(bytes, s, end);
+
+ while (!enc.isNewLine(bytes, prev, end) && s < range) {
+ prev = s;
+ s += enc.length(bytes, s, end);
+ }
+ } while (s < range);
+ return mismatch();
+ }
+
+ }
+ }
+
+ do {
+ if (matchCheck(origRange, s, prev)) return match(s);
+ prev = s;
+ s += enc.length(bytes, s, end);
+ } while (s < range);
+
+ if (s == range) { /* because empty match with /$/. */
+ if (matchCheck(origRange, s, prev)) return match(s);
+ }
+ } else { /* backward search */
+ if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
+ if (origStart < end) {
+ origStart += enc.length(bytes, origStart, end); // /* is upper range */
+ }
+ }
+
+ if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
+ int adjrange;
+ if (range < end) {
+ adjrange = enc.leftAdjustCharHead(bytes, str, range, end);
+ } else {
+ adjrange = end;
+ }
+ if (regex.dMax != MinMaxLen.INFINITE_DISTANCE && (end - range) >= regex.thresholdLength) {
+ do {
+ int schStart = s + regex.dMax;
+ if (schStart > end) schStart = end;
+ if (!backwardSearchRange(bytes, str, end, schStart, range, adjrange)) return mismatch(); // low, high
+ if (s > high) s = high;
+ while (s != -1 && s >= low) {
+ prev = enc.prevCharHead(bytes, str, s, end);
+ if (matchCheck(origStart, s, prev)) return match(s);
+ s = prev;
+ }
+ } while (s >= range);
+ return mismatch();
+ } else { /* check only. */
+ if ((end - range) < regex.thresholdLength) return mismatch();
+
+ int schStart = s;
+ if (regex.dMax != 0) {
+ if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
+ schStart = end;
+ } else {
+ schStart += regex.dMax;
+ if (schStart > end) {
+ schStart = end;
+ } else {
+ schStart = enc.leftAdjustCharHead(bytes, start, schStart, end);
+ }
+ }
+ }
+ if (!backwardSearchRange(bytes, str, end, schStart, range, adjrange)) return mismatch();
+ }
+ }
+
+ do {
+ prev = enc.prevCharHead(bytes, str, s, end);
+ if (matchCheck(origStart, s, prev)) return match(s);
+ s = prev;
+ } while (s >= range);
+
+ }
+ return mismatch();
+ }
+
+ private boolean endBuf(int start, int range, int minSemiEnd, int maxSemiEnd) {
+ if ((maxSemiEnd - str) < regex.anchorDmin) return true; // mismatch_no_msa;
+
+ if (range > start) {
+ if ((minSemiEnd - start) > regex.anchorDmax) {
+ start = minSemiEnd - regex.anchorDmax;
+ if (start < end) {
+ start = enc.rightAdjustCharHead(bytes, str, start, end);
+ } else { /* match with empty at end */
+ start = enc.prevCharHead(bytes, str, end, end);
+ }
+ }
+ if ((maxSemiEnd - (range - 1)) < regex.anchorDmin) {
+ range = maxSemiEnd - regex.anchorDmin + 1;
+ }
+ if (start >= range) return true; // mismatch_no_msa;
+ } else {
+ if ((minSemiEnd - range) > regex.anchorDmax) {
+ range = minSemiEnd - regex.anchorDmax;
+ }
+ if ((maxSemiEnd - start) < regex.anchorDmin) {
+ start = maxSemiEnd - regex.anchorDmin;
+ start = enc.leftAdjustCharHead(bytes, str, start, end);
+ }
+ if (range > start) return true; // mismatch_no_msa;
+ }
+ return false;
+ }
+
+ private int match(int s) {
+ return s - str; // sstart ???
+ }
+
+ private int mismatch() {
+ if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
+ if (msaBestLen >= 0) {
+ int s = msaBestS;
+ return match(s);
+ }
+ }
+ // falls through finish:
+ return -1;
+ }
+}
diff --git a/src/org/joni/MatcherFactory.java b/src/org/joni/MatcherFactory.java
new file mode 100644
index 0000000..729eeb0
--- /dev/null
+++ b/src/org/joni/MatcherFactory.java
@@ -0,0 +1,31 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+public abstract class MatcherFactory {
+ public abstract Matcher create(Regex regex, byte[]bytes, int p, int end);
+
+ static final MatcherFactory DEFAULT = new MatcherFactory() {
+ @Override
+ public Matcher create(Regex regex, byte[] bytes, int p, int end) {
+ return new ByteCodeMachine(regex, bytes, p, end);
+ }
+ };
+}
diff --git a/src/org/joni/MinMaxLen.java b/src/org/joni/MinMaxLen.java
new file mode 100644
index 0000000..dca90e6
--- /dev/null
+++ b/src/org/joni/MinMaxLen.java
@@ -0,0 +1,139 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+final class MinMaxLen {
+ int min; /* min byte length */
+ int max; /* max byte length */
+
+ MinMaxLen() {
+ }
+
+ MinMaxLen(int min, int max) {
+ this.min = min;
+ this.max = max;
+ }
+
+ /* 1000 / (min-max-dist + 1) */
+ private static final short distValues[] = {
+ 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
+ 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
+ 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
+ 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
+ 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
+ 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
+ 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
+ 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
+ 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
+ };
+
+ int distanceValue() {
+ if (max == INFINITE_DISTANCE) return 0;
+ int d = max - min;
+ /* return dist_vals[d] * 16 / (mm->min + 12); */
+ return d < distValues.length ? distValues[d] : 1;
+ }
+
+ int compareDistanceValue(MinMaxLen other, int v1, int v2) {
+ if (v2 <= 0) return -1;
+ if (v1 <= 0) return 1;
+
+ v1 *= distanceValue();
+ v2 *= other.distanceValue();
+
+ if (v2 > v1) return 1;
+ if (v2 < v1) return -1;
+
+ if (other.min < min) return 1;
+ if (other.min > min) return -1;
+ return 0;
+ }
+
+ boolean equal(MinMaxLen other) {
+ return min == other.min && max == other.max;
+ }
+
+ void set(int min, int max) {
+ this.min = min;
+ this.max = max;
+ }
+
+ void clear() {
+ min = max = 0;
+ }
+
+ void copy(MinMaxLen other) {
+ min = other.min;
+ max = other.max;
+ }
+
+ void add(MinMaxLen other) {
+ min = distanceAdd(min, other.min);
+ max = distanceAdd(max, other.max);
+ }
+
+ void addLength(int len) {
+ min = distanceAdd(min, len);
+ max = distanceAdd(max, len);
+ }
+
+ void altMerge(MinMaxLen other) {
+ if (min > other.min) min = other.min;
+ if (max < other.max) max = other.max;
+ }
+
+ static final int INFINITE_DISTANCE = 0x7FFFFFFF;
+ static int distanceAdd(int d1, int d2) {
+ if (d1 == INFINITE_DISTANCE || d2 == INFINITE_DISTANCE) {
+ return INFINITE_DISTANCE;
+ } else {
+ if (d1 <= INFINITE_DISTANCE - d2) return d1 + d2;
+ else return INFINITE_DISTANCE;
+ }
+ }
+
+ static int distanceMultiply(int d, int m) {
+ if (m == 0) return 0;
+ if (d < INFINITE_DISTANCE / m) {
+ return d * m;
+ } else {
+ return INFINITE_DISTANCE;
+ }
+ }
+
+ static String distanceRangeToString(int a, int b) {
+ String s = "";
+ if (a == INFINITE_DISTANCE) {
+ s += "inf";
+ } else {
+ s += "(" + a + ")";
+ }
+
+ s += "-";
+
+ if (b == INFINITE_DISTANCE) {
+ s += "inf";
+ } else {
+ s += "(" + b + ")";
+ }
+ return s;
+ }
+}
diff --git a/src/org/joni/NameEntry.java b/src/org/joni/NameEntry.java
new file mode 100644
index 0000000..794cf1b
--- /dev/null
+++ b/src/org/joni/NameEntry.java
@@ -0,0 +1,97 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+public final class NameEntry {
+ static final int INIT_NAME_BACKREFS_ALLOC_NUM = 8;
+
+ public final byte[]name;
+ public final int nameP;
+ public final int nameEnd;
+
+ int backNum;
+ int backRef1;
+ int backRefs[];
+
+ public NameEntry(byte[]bytes, int p, int end) {
+ name = bytes;
+ nameP = p;
+ nameEnd = end;
+ }
+
+ public int[] getBackRefs() {
+ switch (backNum) {
+ case 0:
+ return new int[]{};
+ case 1:
+ return new int[]{backRef1};
+ default:
+ int[]result = new int[backNum];
+ System.arraycopy(backRefs, 0, result, 0, backNum);
+ return result;
+ }
+ }
+
+ private void alloc() {
+ backRefs = new int[INIT_NAME_BACKREFS_ALLOC_NUM];
+ }
+
+ private void ensureSize() {
+ if (backNum > backRefs.length) {
+ int[]tmp = new int[backRefs.length << 1];
+ System.arraycopy(backRefs, 0, tmp, 0, backRefs.length);
+ backRefs = tmp;
+ }
+ }
+
+ public void addBackref(int backRef) {
+ backNum++;
+
+ switch (backNum) {
+ case 1:
+ backRef1 = backRef;
+ break;
+ case 2:
+ alloc();
+ backRefs[0] = backRef1;
+ backRefs[1] = backRef;
+ break;
+ default:
+ ensureSize();
+ backRefs[backNum - 1] = backRef;
+ }
+ }
+
+ public String toString() {
+ StringBuilder buff = new StringBuilder(new String(name, nameP, nameEnd - nameP) + " ");
+ if (backNum == 0) {
+ buff.append("-");
+ } else if (backNum == 1){
+ buff.append(backRef1);
+ } else {
+ for (int i=0; i<backNum; i++){
+ if (i > 0) buff.append(", ");
+ buff.append(backRefs[i]);
+ }
+ }
+ return buff.toString();
+ }
+
+}
diff --git a/src/org/joni/NativeMachine.java b/src/org/joni/NativeMachine.java
new file mode 100644
index 0000000..6fc5dbb
--- /dev/null
+++ b/src/org/joni/NativeMachine.java
@@ -0,0 +1,27 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+public abstract class NativeMachine extends Matcher {
+
+ protected NativeMachine(Regex regex, byte[]bytes, int p, int end) {
+ super(regex, bytes, p, end);
+ }
+}
diff --git a/src/org/joni/NodeOptInfo.java b/src/org/joni/NodeOptInfo.java
new file mode 100644
index 0000000..20bc0bc
--- /dev/null
+++ b/src/org/joni/NodeOptInfo.java
@@ -0,0 +1,127 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.jcodings.Encoding;
+
+public final class NodeOptInfo {
+ final MinMaxLen length = new MinMaxLen();
+ final OptAnchorInfo anchor = new OptAnchorInfo();
+ final OptExactInfo exb = new OptExactInfo(); /* boundary */
+ final OptExactInfo exm = new OptExactInfo(); /* middle */
+ final OptExactInfo expr = new OptExactInfo(); /* prec read (?=...) */
+ final OptMapInfo map = new OptMapInfo(); /* boundary */
+
+ public void setBoundNode(MinMaxLen mmd) {
+ exb.mmd.copy(mmd);
+ expr.mmd.copy(mmd);
+ map.mmd.copy(mmd);
+ }
+
+ public void clear() {
+ length.clear();
+ anchor.clear();
+ exb.clear();
+ exm.clear();
+ expr.clear();
+ map.clear();
+ }
+
+ public void copy(NodeOptInfo other) {
+ length.copy(other.length);
+ anchor.copy(other.anchor);
+ exb.copy(other.exb);
+ exm.copy(other.exm);
+ expr.copy(other.expr);
+ map.copy(other.map);
+ }
+
+ public void concatLeftNode(NodeOptInfo other, Encoding enc) {
+ OptAnchorInfo tanchor = new OptAnchorInfo(); // remove it somehow ?
+ tanchor.concat(anchor, other.anchor, length.max, other.length.max);
+ anchor.copy(tanchor);
+
+ if (other.exb.length > 0 && length.max == 0) {
+ tanchor.concat(anchor, other.exb.anchor, length.max, other.length.max);
+ other.exb.anchor.copy(tanchor);
+ }
+
+ if (other.map.value > 0 && length.max == 0) {
+ if (other.map.mmd.max == 0) {
+ other.map.anchor.leftAnchor |= anchor.leftAnchor;
+ }
+ }
+
+ boolean exbReach = exb.reachEnd;
+ boolean exmReach = exm.reachEnd;
+
+ if (other.length.max != 0) {
+ exb.reachEnd = exm.reachEnd = false;
+ }
+
+ if (other.exb.length > 0) {
+ if (exbReach) {
+ exb.concat(other.exb, enc);
+ other.exb.clear();
+ } else if (exmReach) {
+ exm.concat(other.exb, enc);
+ other.exb.clear();
+ }
+ }
+
+ exm.select(other.exb, enc);
+ exm.select(other.exm, enc);
+
+ if (expr.length > 0) {
+ if (other.length.max > 0) {
+ // TODO: make sure it is not an Oniguruma bug (casting unsigned int to int for arithmetic comparison)
+ int otherLengthMax = other.length.max;
+ if (otherLengthMax == MinMaxLen.INFINITE_DISTANCE) otherLengthMax = -1;
+ if (expr.length > otherLengthMax) expr.length = otherLengthMax;
+ if (expr.mmd.max == 0) {
+ exb.select(expr, enc);
+ } else {
+ exm.select(expr, enc);
+ }
+ }
+ } else if (other.expr.length > 0) {
+ expr.copy(other.expr);
+ }
+
+ map.select(other.map);
+ length.add(other.length);
+ }
+
+ public void altMerge(NodeOptInfo other, OptEnvironment env) {
+ anchor.altMerge(other.anchor);
+ exb.altMerge(other.exb, env);
+ exm.altMerge(other.exm, env);
+ expr.altMerge(other.expr, env);
+ map.altMerge(other.map, env.enc);
+ length.altMerge(other.length);
+ }
+
+ public void setBound(MinMaxLen mmd) {
+ exb.mmd.copy(mmd);
+ expr.mmd.copy(mmd);
+ map.mmd.copy(mmd);
+ }
+
+}
diff --git a/src/org/joni/OptAnchorInfo.java b/src/org/joni/OptAnchorInfo.java
new file mode 100644
index 0000000..9084728
--- /dev/null
+++ b/src/org/joni/OptAnchorInfo.java
@@ -0,0 +1,92 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.constants.AnchorType;
+
+final class OptAnchorInfo implements AnchorType {
+ int leftAnchor;
+ int rightAnchor;
+
+ void clear() {
+ leftAnchor = rightAnchor = 0;
+ }
+
+ void copy(OptAnchorInfo other) {
+ leftAnchor = other.leftAnchor;
+ rightAnchor = other.rightAnchor;
+ }
+
+ void concat(OptAnchorInfo left, OptAnchorInfo right, int leftLength, int rightLength) {
+ leftAnchor = left.leftAnchor;
+ if (leftLength == 0) leftAnchor |= right.leftAnchor;
+
+ rightAnchor = right.rightAnchor;
+ if (rightLength == 0) rightAnchor |= left.rightAnchor;
+ }
+
+ boolean isSet(int anchor) {
+ if ((leftAnchor & anchor) != 0) return true;
+ return (rightAnchor & anchor) != 0;
+ }
+
+ void add(int anchor) {
+ if (isLeftAnchor(anchor)) {
+ leftAnchor |= anchor;
+ } else {
+ rightAnchor |= anchor;
+ }
+ }
+
+ void remove(int anchor) {
+ if (isLeftAnchor(anchor)) {
+ leftAnchor &= ~anchor;
+ } else {
+ rightAnchor &= ~anchor;
+ }
+ }
+
+ void altMerge(OptAnchorInfo other) {
+ leftAnchor &= other.leftAnchor;
+ rightAnchor &= other.rightAnchor;
+ }
+
+ static boolean isLeftAnchor(int anchor) { // make a mask for it ?
+ return !(anchor == END_BUF || anchor == SEMI_END_BUF ||
+ anchor == END_LINE || anchor == PREC_READ ||
+ anchor == PREC_READ_NOT);
+ }
+
+ static String anchorToString(int anchor) {
+ StringBuffer s = new StringBuffer("[");
+
+ if ((anchor & AnchorType.BEGIN_BUF) !=0 ) s.append("begin-buf ");
+ if ((anchor & AnchorType.BEGIN_LINE) !=0 ) s.append("begin-line ");
+ if ((anchor & AnchorType.BEGIN_POSITION) !=0 ) s.append("begin-pos ");
+ if ((anchor & AnchorType.END_BUF) !=0 ) s.append("end-buf ");
+ if ((anchor & AnchorType.SEMI_END_BUF) !=0 ) s.append("semi-end-buf ");
+ if ((anchor & AnchorType.END_LINE) !=0 ) s.append("end-line ");
+ if ((anchor & AnchorType.ANYCHAR_STAR) !=0 ) s.append("anychar-star ");
+ if ((anchor & AnchorType.ANYCHAR_STAR_ML) !=0 ) s.append("anychar-star-pl ");
+ s.append("]");
+
+ return s.toString();
+ }
+}
diff --git a/src/org/joni/OptEnvironment.java b/src/org/joni/OptEnvironment.java
new file mode 100644
index 0000000..4b59c31
--- /dev/null
+++ b/src/org/joni/OptEnvironment.java
@@ -0,0 +1,39 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.jcodings.Encoding;
+
+// remove this one in future and pass mmd directly
+final class OptEnvironment {
+ final MinMaxLen mmd = new MinMaxLen();
+ Encoding enc;
+ int options;
+ int caseFoldFlag;
+ ScanEnvironment scanEnv;
+
+ void copy(OptEnvironment other) {
+ mmd.copy(other.mmd);
+ enc = other.enc;
+ options = other.options;
+ caseFoldFlag = other.caseFoldFlag;
+ scanEnv = other.scanEnv;
+ }
+}
diff --git a/src/org/joni/OptExactInfo.java b/src/org/joni/OptExactInfo.java
new file mode 100644
index 0000000..45d94fc
--- /dev/null
+++ b/src/org/joni/OptExactInfo.java
@@ -0,0 +1,171 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.jcodings.Encoding;
+
+final class OptExactInfo {
+ static final int OPT_EXACT_MAXLEN = 24;
+
+ final MinMaxLen mmd = new MinMaxLen();
+ final OptAnchorInfo anchor = new OptAnchorInfo();
+
+ boolean reachEnd;
+ boolean ignoreCase;
+ int length;
+
+ final byte s[] = new byte[OPT_EXACT_MAXLEN];
+
+ boolean isFull() {
+ return length >= OPT_EXACT_MAXLEN;
+ }
+
+ void clear() {
+ mmd.clear();
+ anchor.clear();
+
+ reachEnd = false;
+ ignoreCase = false;
+ length = 0;
+ s[0] = 0; // ???
+ }
+
+ void copy(OptExactInfo other) {
+ mmd.copy(other.mmd);
+ anchor.copy(other.anchor);
+ reachEnd = other.reachEnd;
+ ignoreCase = other.ignoreCase;
+ length = other.length;
+
+ System.arraycopy(other.s, 0, s, 0, OPT_EXACT_MAXLEN);
+ }
+
+ void concat(OptExactInfo other, Encoding enc) {
+ if (!ignoreCase && other.ignoreCase) {
+ if (length >= other.length) return; /* avoid */
+ ignoreCase = true;
+ }
+
+ int p = 0; // add->s;
+ int end = p + other.length;
+
+ int i;
+ for (i=length; p < end;) {
+ int len = enc.length(other.s, p, end);
+ if (i + len > OPT_EXACT_MAXLEN) break;
+ for (int j=0; j<len && p < end; j++) {
+ s[i++] = other.s[p++]; // arraycopy or even don't copy anything ??
+ }
+ }
+
+ length = i;
+ reachEnd = (p == end ? other.reachEnd : false);
+
+ OptAnchorInfo tmp = new OptAnchorInfo();
+ tmp.concat(anchor, other.anchor, 1, 1);
+ if (!other.reachEnd) tmp.rightAnchor = 0;
+ anchor.copy(tmp);
+ }
+
+ // ?? raw is not used here
+ void concatStr(byte[]bytes, int p, int end, boolean raw, Encoding enc) {
+ int i;
+ for (i = length; p < end && i < OPT_EXACT_MAXLEN;) {
+ int len = enc.length(bytes, p, end);
+ if (i + len > OPT_EXACT_MAXLEN) break;
+ for (int j=0; j<len && p < end; j++) {
+ s[i++] = bytes[p++];
+ }
+ }
+
+ length = i;
+ }
+
+ void altMerge(OptExactInfo other, OptEnvironment env) {
+ if (other.length == 0 || length == 0) {
+ clear();
+ return;
+ }
+
+ if (!mmd.equal(other.mmd)) {
+ clear();
+ return;
+ }
+
+ int i;
+ for (i=0; i<length && i<other.length;) {
+ if (s[i] != other.s[i]) break;
+ int len = env.enc.length(s, i, length);
+
+ int j;
+ for (j=1; j<len; j++) {
+ if (s[i+j] != other.s[i+j]) break;
+ }
+
+ if (j < len) break;
+ i += len;
+ }
+
+ if (!other.reachEnd || i<other.length || i<length) reachEnd = false;
+
+ length = i;
+ ignoreCase |= other.ignoreCase;
+
+ anchor.altMerge(other.anchor);
+
+ if (!reachEnd) anchor.rightAnchor = 0;
+ }
+
+
+ void select(OptExactInfo alt, Encoding enc) {
+ int v1 = length;
+ int v2 = alt.length;
+
+ if (v2 == 0) {
+ return;
+ } else if (v1 == 0) {
+ copy(alt);
+ return;
+ } else if (v1 <= 2 && v2 <= 2) {
+ /* ByteValTable[x] is big value --> low price */
+ v2 = OptMapInfo.positionValue(enc, s[0] & 0xff);
+ v1 = OptMapInfo.positionValue(enc, alt.s[0] & 0xff);
+
+ if (length > 1) v1 += 5;
+ if (alt.length > 1) v2 += 5;
+ }
+
+ if (!ignoreCase) v1 *= 2;
+ if (!alt.ignoreCase) v2 *= 2;
+
+ if (mmd.compareDistanceValue(alt.mmd, v1, v2) > 0) copy(alt);
+ }
+
+ // comp_opt_exact_or_map_info
+ private static final int COMP_EM_BASE = 20;
+ int compare(OptMapInfo m) {
+ if (m.value <= 0) return -1;
+
+ int ve = COMP_EM_BASE * length * (ignoreCase ? 1 : 2);
+ int vm = COMP_EM_BASE * 5 * 2 / m.value;
+
+ return mmd.compareDistanceValue(m.mmd, ve, vm);
+ }
+}
diff --git a/src/org/joni/OptMapInfo.java b/src/org/joni/OptMapInfo.java
new file mode 100644
index 0000000..2763b53
--- /dev/null
+++ b/src/org/joni/OptMapInfo.java
@@ -0,0 +1,129 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.jcodings.CaseFoldCodeItem;
+import org.jcodings.Encoding;
+
+final class OptMapInfo {
+
+ final MinMaxLen mmd = new MinMaxLen(); /* info position */
+ final OptAnchorInfo anchor = new OptAnchorInfo();
+
+ int value; /* weighted value */
+ final byte map[] = new byte[Config.CHAR_TABLE_SIZE];
+
+ void clear() {
+ mmd.clear();
+ anchor.clear();
+ value = 0;
+ for (int i=0; i<map.length; i++) map[i] = 0;
+ }
+
+ void copy(OptMapInfo other) {
+ mmd.copy(other.mmd);
+ anchor.copy(other.anchor);
+ value = other.value;
+ //for(int i=0; i<map.length; i++) map[i] = other.map[i];
+ System.arraycopy(other.map, 0, map, 0, other.map.length);
+ }
+
+ void addChar(byte c, Encoding enc) {
+ int c_ = c & 0xff;
+ if (map[c_] == 0) {
+ map[c_] = 1;
+ value += positionValue(enc, c_);
+ }
+ }
+
+ void addCharAmb(byte[]bytes, int p, int end, Encoding enc, int caseFoldFlag) {
+ addChar(bytes[p], enc);
+
+ caseFoldFlag &= ~Config.INTERNAL_ENC_CASE_FOLD_MULTI_CHAR;
+ CaseFoldCodeItem[]items = enc.caseFoldCodesByString(caseFoldFlag, bytes, p, end);
+
+ byte[] buf = new byte[Config.ENC_CODE_TO_MBC_MAXLEN];
+ for (int i=0; i<items.length; i++) {
+ enc.codeToMbc(items[i].code[0], buf, 0);
+ addChar(buf[0], enc);
+ }
+ }
+
+ // select_opt_map_info
+ private static final int z = 1<<15; /* 32768: something big value */
+ void select(OptMapInfo alt) {
+ if (alt.value == 0) return;
+ if (value == 0) {
+ copy(alt);
+ return;
+ }
+
+ int v1 = z / value;
+ int v2 = z /alt.value;
+
+ if (mmd.compareDistanceValue(alt.mmd, v1, v2) > 0) copy(alt);
+ }
+
+ // alt_merge_opt_map_info
+ void altMerge(OptMapInfo other, Encoding enc) {
+ /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
+ if (value == 0) return;
+ if (other.value == 0 || mmd.max < other.mmd.max) {
+ clear();
+ return;
+ }
+
+ mmd.altMerge(other.mmd);
+
+ int val = 0;
+ for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
+ if (other.map[i] != 0) map[i] = 1;
+ if (map[i] != 0) val += positionValue(enc, i);
+ }
+
+ value = val;
+ anchor.altMerge(other.anchor);
+ }
+
+ static final short ByteValTable[] = {
+ 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
+ 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
+ 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
+ };
+
+ // map_position_value
+ static int positionValue(Encoding enc, int i) {
+ if (i < ByteValTable.length) {
+ if (i == 0 && enc.minLength() > 1) {
+ return 20;
+ } else {
+ return ByteValTable[i];
+ }
+ } else {
+ return 4; /* Take it easy. */
+ }
+ }
+
+}
diff --git a/src/org/joni/Option.java b/src/org/joni/Option.java
new file mode 100644
index 0000000..adee24f
--- /dev/null
+++ b/src/org/joni/Option.java
@@ -0,0 +1,122 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+public class Option {
+
+ /* options */
+ public static final int NONE = 0;
+ public static final int IGNORECASE = (1<<0);
+ public static final int EXTEND = (1<<1);
+ public static final int MULTILINE = (1<<2);
+ public static final int SINGLELINE = (1<<3);
+ public static final int FIND_LONGEST = (1<<4);
+ public static final int FIND_NOT_EMPTY = (1<<5);
+ public static final int NEGATE_SINGLELINE = (1<<6);
+ public static final int DONT_CAPTURE_GROUP = (1<<7);
+ public static final int CAPTURE_GROUP = (1<<8);
+
+ /* options (search time) */
+ public static final int NOTBOL = (1<<9);
+ public static final int NOTEOL = (1<<10);
+ public static final int POSIX_REGION = (1<<11);
+ public static final int MAXBIT = (1<<12); /* limit */
+
+ public static final int DEFAULT = NONE;
+
+ public static String toString(int option) {
+ String options = "";
+ if (isIgnoreCase(option)) options += "IGNORECASE ";
+ if (isExtend(option)) options += "EXTEND ";
+ if (isMultiline(option)) options += "MULTILINE ";
+ if (isSingleline(option)) options += "SINGLELINE ";
+ if (isFindLongest(option)) options += "FIND_LONGEST ";
+ if (isFindNotEmpty(option)) options += "FIND_NOT_EMPTY ";
+ if (isNegateSingleline(option)) options += "NEGATE_SINGLELINE ";
+ if (isDontCaptureGroup(option)) options += "DONT_CAPTURE_GROUP ";
+ if (isCaptureGroup(option)) options += "CAPTURE_GROUP ";
+
+ if (isNotBol(option)) options += "NOTBOL ";
+ if (isNotEol(option)) options += "NOTEOL ";
+ if (isPosixRegion(option)) options += "POSIX_REGION ";
+
+ return options;
+ }
+
+ public static boolean isIgnoreCase(int option) {
+ return (option & IGNORECASE) != 0;
+ }
+
+ public static boolean isExtend(int option) {
+ return (option & EXTEND) != 0;
+ }
+
+ public static boolean isSingleline(int option) {
+ return (option & SINGLELINE) != 0;
+ }
+
+ public static boolean isMultiline(int option) {
+ return (option & MULTILINE) != 0;
+ }
+
+ public static boolean isFindLongest(int option) {
+ return (option & FIND_LONGEST) != 0;
+ }
+
+ public static boolean isFindNotEmpty(int option) {
+ return (option & FIND_NOT_EMPTY) != 0;
+ }
+
+ public static boolean isFindCondition(int option) {
+ return (option & (FIND_LONGEST | FIND_NOT_EMPTY)) != 0;
+ }
+
+ public static boolean isNegateSingleline(int option) {
+ return (option & NEGATE_SINGLELINE) != 0;
+ }
+
+ public static boolean isDontCaptureGroup(int option) {
+ return (option & DONT_CAPTURE_GROUP) != 0;
+ }
+
+ public static boolean isCaptureGroup(int option) {
+ return (option & CAPTURE_GROUP) != 0;
+ }
+
+ public static boolean isNotBol(int option) {
+ return (option & NOTBOL) != 0;
+ }
+
+ public static boolean isNotEol(int option) {
+ return (option & NOTEOL) != 0;
+ }
+
+ public static boolean isPosixRegion(int option) {
+ return (option & POSIX_REGION) != 0;
+ }
+
+ /* OP_SET_OPTION is required for these options. ??? */
+ // public static boolean isDynamic(int option) {
+ // return (option & (MULTILINE | IGNORECASE)) != 0;
+ // }
+ public static boolean isDynamic(int option) {
+ return false;
+ }
+}
diff --git a/src/org/joni/Parser.java b/src/org/joni/Parser.java
new file mode 100644
index 0000000..a787d16
--- /dev/null
+++ b/src/org/joni/Parser.java
@@ -0,0 +1,1028 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.BitStatus.bsOnAtSimple;
+import static org.joni.BitStatus.bsOnOff;
+import static org.joni.Option.isDontCaptureGroup;
+import static org.joni.Option.isIgnoreCase;
+
+import org.jcodings.constants.CharacterType;
+import org.jcodings.constants.PosixBracket;
+import org.joni.ast.AnchorNode;
+import org.joni.ast.AnyCharNode;
+import org.joni.ast.BackRefNode;
+import org.joni.ast.CClassNode;
+import org.joni.ast.CTypeNode;
+import org.joni.ast.CallNode;
+import org.joni.ast.ConsAltNode;
+import org.joni.ast.EncloseNode;
+import org.joni.ast.Node;
+import org.joni.ast.QuantifierNode;
+import org.joni.ast.StringNode;
+import org.joni.ast.CClassNode.CCStateArg;
+import org.joni.constants.AnchorType;
+import org.joni.constants.CCSTATE;
+import org.joni.constants.CCVALTYPE;
+import org.joni.constants.EncloseType;
+import org.joni.constants.NodeType;
+import org.joni.constants.TokenType;
+
+class Parser extends Lexer {
+
+ protected final Regex regex;
+ protected Node root;
+
+ protected int returnCode; // return code used by parser methods (they itself return parsed nodes)
+ // this approach will not affect recursive calls
+
+ protected Parser(ScanEnvironment env, byte[]bytes, int p, int end) {
+ super(env, bytes, p, end);
+ regex = env.reg;
+ }
+
+ // onig_parse_make_tree
+ protected final Node parse() {
+ root = parseRegexp();
+ regex.numMem = env.numMem;
+ return root;
+ }
+
+ private static final int POSIX_BRACKET_NAME_MIN_LEN = 4;
+ private static final int POSIX_BRACKET_CHECK_LIMIT_LENGTH = 20;
+ private static final byte BRACKET_END[] = ":]".getBytes();
+ private boolean parsePosixBracket(CClassNode cc) {
+ mark();
+
+ boolean not;
+ if (peekIs('^')) {
+ inc();
+ not = true;
+ } else {
+ not = false;
+ }
+ if (enc.strLength(bytes, p, stop) >= POSIX_BRACKET_NAME_MIN_LEN + 3) { // else goto not_posix_bracket
+ byte[][] pbs= PosixBracket.PBSNamesLower;
+ for (int i=0; i<pbs.length; i++) {
+ byte[]name = pbs[i];
+ // hash lookup here ?
+ if (enc.strNCmp(bytes, p, stop, name, 0, name.length) == 0) {
+ p = enc.step(bytes, p, stop, name.length);
+ if (enc.strNCmp(bytes, p, stop, BRACKET_END, 0, BRACKET_END.length) != 0) {
+ newSyntaxException(ERR_INVALID_POSIX_BRACKET_TYPE);
+ }
+ cc.addCType(PosixBracket.PBSValues[i], not, env, this);
+ inc();
+ inc();
+ return false;
+ }
+ }
+
+ }
+
+ // not_posix_bracket:
+ c = 0;
+ int i= 0;
+ while(left() && ((c=peek()) != ':') && c != ']') {
+ inc();
+ if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
+ }
+
+ if (c == ':' && left()) {
+ inc();
+ if (left()) {
+ fetch();
+ if (c == ']') newSyntaxException(ERR_INVALID_POSIX_BRACKET_TYPE);
+ }
+ }
+ restore();
+ return true; /* 1: is not POSIX bracket, but no error. */
+ }
+
+ private CClassNode parseCharProperty() {
+ int ctype = fetchCharPropertyToCType();
+ CClassNode n = new CClassNode();
+ n.addCType(ctype, false, env, this);
+ if (token.getPropNot()) n.setNot();
+ return n;
+ }
+
+ private boolean codeExistCheck(int code, boolean ignoreEscaped) {
+ mark();
+
+ boolean inEsc = false;
+ while(left()) {
+ if (ignoreEscaped && inEsc) {
+ inEsc = false;
+ } else {
+ fetch();
+ if (c == code) {
+ restore();
+ return true;
+ }
+ if (c == syntax.metaCharTable.esc) inEsc = true;
+ }
+ }
+
+ restore();
+ return false;
+ }
+
+ private CClassNode parseCharClass() {
+ fetchTokenInCC();
+
+ boolean neg;
+ if (token.type == TokenType.CHAR && token.getC() == '^' && !token.escaped) {
+ neg = true;
+ fetchTokenInCC();
+ } else {
+ neg = false;
+ }
+
+ if (token.type == TokenType.CC_CLOSE) {
+ if (!codeExistCheck(']', true)) newSyntaxException(ERR_EMPTY_CHAR_CLASS);
+ env.ccEscWarn("]");
+ token.type = TokenType.CHAR; /* allow []...] */
+ }
+
+ CClassNode cc = new CClassNode();
+ CClassNode prevCC = null;
+ CClassNode workCC = null;
+
+ CCStateArg arg = new CCStateArg();
+
+ boolean andStart = false;
+ arg.state = CCSTATE.START;
+
+ while(token.type != TokenType.CC_CLOSE) {
+ boolean fetched = false;
+
+ switch (token.type) {
+
+ case CHAR:
+ int len = enc.codeToMbcLength(token.getC());
+ if (len > 1) {
+ arg.inType = CCVALTYPE.CODE_POINT;
+ } else {
+ // !sb_char:!
+ arg.inType = CCVALTYPE.SB;
+ }
+ arg.v = token.getC();
+ arg.vIsRaw = false;
+ // !goto val_entry2;!
+ valEntry2(cc, arg);
+ break;
+
+ case RAW_BYTE:
+ /* tok->base != 0 : octal or hexadec. */
+ if (!enc.isSingleByte() && token.base != 0) {
+ byte[]buf = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN];
+ int psave = p;
+ int base = token.base;
+ buf[0] = (byte)token.getC();
+ int i;
+ for (i=1; i<enc.maxLength(); i++) {
+ fetchTokenInCC();
+ if (token.type != TokenType.RAW_BYTE || token.base != base) {
+ fetched = true;
+ break;
+ }
+ buf[i] = (byte)token.getC();
+ }
+ if (i < enc.minLength()) newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);
+
+ len = enc.length(buf, 0, i);
+ if (i < len) {
+ newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);
+ } else if (i > len) { /* fetch back */
+ p = psave;
+ for (i=1; i<len; i++) fetchTokenInCC();
+ fetched = false;
+ }
+ if (i == 1) {
+ arg.v = buf[0] & 0xff;
+ // !goto raw_single!
+ arg.inType = CCVALTYPE.SB;
+ } else {
+ arg.v = enc.mbcToCode(buf, 0, buf.length);
+ arg.inType = CCVALTYPE.CODE_POINT;
+ }
+ } else {
+ arg.v = token.getC();
+ // !raw_single:!
+ arg.inType = CCVALTYPE.SB;
+ }
+ arg.vIsRaw = true;
+ // !goto val_entry2;!
+ valEntry2(cc, arg);
+ break;
+
+ case CODE_POINT:
+ arg.v = token.getCode();
+ arg.vIsRaw = true;
+ // !val_entry:!
+ // !val_entry2:!
+ valEntry(cc, arg);
+ break;
+
+ case POSIX_BRACKET_OPEN:
+ if (parsePosixBracket(cc)) { /* true: is not POSIX bracket */
+ env.ccEscWarn("[");
+ p = token.backP;
+ arg.v = token.getC();
+ arg.vIsRaw = false;
+ // !goto val_entry;!
+ valEntry(cc, arg);
+ break;
+ }
+ // !goto next_class;!
+ cc.nextStateClass(arg, env);
+ break;
+
+ case CHAR_TYPE:
+ cc.addCType(token.getPropCType(), token.getPropNot(), env, this);
+ // !next_class:!
+ cc.nextStateClass(arg, env);
+ break;
+
+ case CHAR_PROPERTY:
+ int ctype = fetchCharPropertyToCType();
+ cc.addCType(ctype, token.getPropNot(), env, this);
+ // !goto next_class;!
+ cc.nextStateClass(arg, env);
+ break;
+
+ case CC_RANGE:
+ if (arg.state == CCSTATE.VALUE) {
+ fetchTokenInCC();
+ fetched = true;
+ if (token.type == TokenType.CC_CLOSE) { /* allow [x-] */
+ // !range_end_val:!
+ // !goto val_entry;!
+ rangeEndVal(cc, arg);
+ break;
+ } else if (token.type == TokenType.CC_AND) {
+ env.ccEscWarn("-");
+ // goto !range_end_val;!
+ rangeEndVal(cc, arg);
+ break;
+ }
+ arg.state = CCSTATE.RANGE;
+ } else if (arg.state == CCSTATE.START) {
+ /* [-xa] is allowed */
+ arg.v = token.getC();
+ arg.vIsRaw = false;
+ fetchTokenInCC();
+ fetched = true;
+ /* [--x] or [a&&-x] is warned. */
+ if (token.type == TokenType.CC_RANGE || andStart) env.ccEscWarn("-");
+ // !goto val_entry;!
+ valEntry(cc, arg);
+ break;
+ } else if (arg.state == CCSTATE.RANGE) {
+ env.ccEscWarn("-");
+ /* [!--x] is allowed */
+ // !goto sb_char;!
+ sbChar(cc, arg);
+ break;
+ } else { /* CCS_COMPLETE */
+ fetchTokenInCC();
+ fetched = true;
+ if (token.type == TokenType.CC_CLOSE) { /* allow [a-b-] */
+ // goto !range_end_val!
+ rangeEndVal(cc, arg);
+ break;
+ } else if (token.type == TokenType.CC_AND) {
+ env.ccEscWarn("-");
+ // goto !range_end_val;!
+ rangeEndVal(cc, arg);
+ break;
+ }
+
+ if (syntax.allowDoubleRangeOpInCC()) {
+ env.ccEscWarn("-");
+ /* [0-9-a] is allowed as [0-9\-a] */
+ // !goto sb_char!
+ sbChar(cc, arg);
+ break;
+ }
+ newSyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
+ }
+ break;
+
+ case CC_CC_OPEN: /* [ */
+ CClassNode acc = parseCharClass();
+ cc.or(acc, enc);
+ break;
+
+ case CC_AND: /* && */
+ if (arg.state == CCSTATE.VALUE) {
+ arg.v = 0; // ??? safe v ?
+ arg.vIsRaw = false;
+ cc.nextStateValue(arg, env);
+ }
+ /* initialize local variables */
+ andStart = true;
+ arg.state = CCSTATE.START;
+ if (prevCC != null) {
+ prevCC.and(cc, enc);
+ } else {
+ prevCC = cc;
+ if (workCC == null) workCC = new CClassNode();
+ cc = workCC;
+ }
+ // initialize_cclass(cc); // clear it ??
+ break;
+
+ case EOT:
+ newSyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS);
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ } // switch
+
+ if (!fetched) fetchTokenInCC();
+
+ } // while
+
+ if (arg.state == CCSTATE.VALUE) {
+ arg.v = 0; // ??? safe v ?
+ arg.vIsRaw = false;
+ cc.nextStateValue(arg, env);
+ }
+
+ if (prevCC != null) {
+ prevCC.and(cc, enc);
+ cc = prevCC;
+ }
+
+ if (neg) {
+ cc.setNot();
+ } else {
+ cc.clearNot();
+ }
+
+ if (cc.isNot() && syntax.notNewlineInNegativeCC()) {
+ if (!cc.isEmpty()) {
+ final int NEW_LINE = 0x0a;
+ if (enc.isNewLine(NEW_LINE)) {
+ if (enc.codeToMbcLength(NEW_LINE) == 1) {
+ cc.bs.set(NEW_LINE);
+ } else {
+ cc.addCodeRange(env, NEW_LINE, NEW_LINE);
+ }
+ }
+ }
+ }
+
+ return cc;
+ }
+
+ private void valEntry2(CClassNode cc, CCStateArg arg) {
+ cc.nextStateValue(arg, env);
+ }
+
+ private void valEntry(CClassNode cc, CCStateArg arg) {
+ int len = enc.codeToMbcLength(arg.v);
+ arg.inType = len == 1 ? CCVALTYPE.SB : CCVALTYPE.CODE_POINT;
+ // !val_entry2:!
+ valEntry2(cc, arg);
+ }
+
+ private void sbChar(CClassNode cc, CCStateArg arg) {
+ arg.inType = CCVALTYPE.SB;
+ arg.v = token.getC();
+ arg.vIsRaw = false;
+ // !goto val_entry2;!
+ valEntry2(cc, arg);
+ }
+
+ private void rangeEndVal(CClassNode cc, CCStateArg arg) {
+ arg.v = '-';
+ arg.vIsRaw = false;
+ // !goto val_entry;!
+ valEntry(cc, arg);
+ }
+
+ private Node parseEnclose(TokenType term) {
+ Node node = null;
+
+ if (!left()) newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
+
+ int option = env.option;
+
+ if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
+ inc();
+ if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
+
+ boolean listCapture = false;
+
+ fetch();
+ switch(c) {
+ case ':': /* (?:...) grouping only */
+ // !group:!
+ fetchToken();
+ node = parseSubExp(term);
+ returnCode = 1; /* group */
+ return node;
+
+ case '=':
+ node = new AnchorNode(AnchorType.PREC_READ);
+ break;
+
+ case '!': /* preceding read */
+ node = new AnchorNode(AnchorType.PREC_READ_NOT);
+ break;
+
+ case '>': /* (?>...) stop backtrack */
+ node = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
+ break;
+
+ case '\'':
+ if (Config.USE_NAMED_GROUP) {
+ if (syntax.op2QMarkLtNamedGroup()) {
+ // !goto named_group1!;
+ listCapture = false;
+ node = namedGroup2(listCapture);
+ break;
+ } else {
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ }
+ break;
+ } // USE_NAMED_GROUP
+ break;
+ case '<': /* look behind (?<=...), (?<!...) */
+ fetch();
+ if (c == '=') {
+ node = new AnchorNode(AnchorType.LOOK_BEHIND);
+ } else if (c == '!') {
+ node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT);
+ } else {
+ if (Config.USE_NAMED_GROUP) {
+ if (syntax.op2QMarkLtNamedGroup()) {
+ unfetch();
+ c = '<';
+
+ // !named_group1:!
+ listCapture = false;
+ // !named_group2:!
+ node = namedGroup2(listCapture);
+ break;
+ } else {
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ }
+
+ } else { // USE_NAMED_GROUP
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ } // USE_NAMED_GROUP
+ }
+ break;
+
+ case '@':
+ if (syntax.op2AtMarkCaptureHistory()) {
+ if (Config.USE_NAMED_GROUP) {
+ if (syntax.op2QMarkLtNamedGroup()) {
+ fetch();
+ if (c == '<' || c == '\'') {
+ listCapture = true;
+ // /* (?@<name>...) */
+ // goto !named_group2;!
+ node = namedGroup2(listCapture);
+ }
+ unfetch();
+ }
+ } // USE_NAMED_GROUP
+ EncloseNode en = new EncloseNode(env.option, false); // node_new_enclose_memory
+ int num = env.addMemEntry();
+ if (num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
+ en.regNum = num;
+ node = en;
+ } else {
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ }
+ break;
+
+ // case 'p': #ifdef USE_POSIXLINE_OPTION
+ case '-':
+ case 'i':
+ case 'm':
+ case 's':
+ case 'x':
+ boolean neg = false;
+ while(true) {
+ switch(c) {
+ case ':':
+ case ')':
+ break;
+
+ case '-':
+ neg = true;
+ break;
+
+ case 'x':
+ option = bsOnOff(option, Option.EXTEND, neg);
+ break;
+
+ case 'i':
+ option = bsOnOff(option, Option.IGNORECASE, neg);
+ break;
+
+ case 's':
+ if (syntax.op2OptionPerl()) {
+ option = bsOnOff(option, Option.MULTILINE, neg);
+ } else {
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ }
+ break;
+
+ case 'm':
+ if (syntax.op2OptionPerl()) {
+ option = bsOnOff(option, Option.SINGLELINE, !neg);
+ } else if (syntax.op2OptionRuby()) {
+ option = bsOnOff(option, Option.MULTILINE, neg);
+ } else {
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ }
+ break;
+
+ // case 'p': #ifdef USE_POSIXLINE_OPTION // not defined
+ // option = bsOnOff(option, Option.MULTILINE|Option.SINGLELINE, neg);
+ // break;
+
+ default:
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ } // switch
+
+ if (c == ')') {
+ EncloseNode en = new EncloseNode(option, 0); // node_new_option
+ node = en;
+ returnCode = 2; /* option only */
+ return node;
+ } else if (c == ':') {
+ int prev = env.option;
+ env.option = option;
+ fetchToken();
+ Node target = parseSubExp(term);
+ env.option = prev;
+ EncloseNode en = new EncloseNode(option, 0); // node_new_option
+ en.setTarget(target);
+ node = en;
+ returnCode = 0;
+ return node;
+ }
+ if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
+ fetch();
+ } // while
+
+ default:
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ } // switch
+
+ } else {
+ if (isDontCaptureGroup(env.option)) {
+ // !goto group;!
+ fetchToken();
+ node = parseSubExp(term);
+ returnCode = 1; /* group */
+ return node;
+ }
+ EncloseNode en = new EncloseNode(env.option, false); // node_new_enclose_memory
+ int num = env.addMemEntry();
+ en.regNum = num;
+ node = en;
+ }
+
+ fetchToken();
+ Node target = parseSubExp(term);
+
+ if (node.getType() == NodeType.ANCHOR) {
+ AnchorNode an = (AnchorNode) node;
+ an.setTarget(target);
+ } else {
+ EncloseNode en = (EncloseNode)node;
+ en.setTarget(target);
+ if (en.type == EncloseType.MEMORY) {
+ /* Don't move this to previous of parse_subexp() */
+ env.setMemNode(en.regNum, node);
+ }
+ }
+ returnCode = 0;
+ return node; // ??
+ }
+
+ private Node namedGroup2(boolean listCapture) {
+ int nm = p;
+ int num = fetchName(c, false);
+ int nameEnd = value;
+ num = env.addMemEntry();
+ if (listCapture && num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
+
+ regex.nameAdd(bytes, nm, nameEnd, num, syntax);
+ EncloseNode en = new EncloseNode(env.option, true); // node_new_enclose_memory
+ en.regNum = num;
+
+ Node node = en;
+
+ if (listCapture) env.captureHistory = bsOnAtSimple(env.captureHistory, num);
+ env.numNamed++;
+ return node;
+ }
+
+ private int nextChar; // hidden var
+ private int findStrPosition(int[]s, int n, int from, int to) {
+ int x;
+ int q;
+ int p = from;
+ int i = 0;
+ while(p < to) {
+ x = enc.mbcToCode(bytes, p, to);
+ q = p + enc.length(bytes, p, to);
+ if (x == s[0]) {
+ for (i=1; i<n && q<to; i++) {
+ x = enc.mbcToCode(bytes, q, to);
+ if (x != s[i]) break;
+ q += enc.length(bytes, q, to);
+ }
+ if (i >= n) {
+ if (bytes[nextChar] != 0) nextChar = q; // we may need zero term semantics...
+ return p;
+ }
+ }
+ p = q;
+ }
+ return -1;
+ }
+
+ private Node parseExp(TokenType term) {
+ if (token.type == term) {
+ //!goto end_of_token;!
+ return new StringNode();
+ }
+
+ Node node = null;
+ boolean group = false;
+
+ switch(token.type) {
+ case ALT:
+ case EOT:
+ // !end_of_token:!
+ return new StringNode(); // node_new_empty
+
+ case SUBEXP_OPEN:
+ node = parseEnclose(TokenType.SUBEXP_CLOSE);
+ if (returnCode == 1) {
+ group = true;
+ } else if (returnCode == 2) { /* option only */
+ int prev = env.option;
+ EncloseNode en = (EncloseNode)node;
+ env.option = en.option;
+ fetchToken();
+ Node target = parseSubExp(term);
+ env.option = prev;
+ en.setTarget(target);
+ return node;
+ }
+ break;
+
+ case SUBEXP_CLOSE:
+ if (!syntax.allowUnmatchedCloseSubexp()) newSyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS);
+
+ if (token.escaped) {
+ // !goto tk_raw_byte;!
+ return parseExpTkRawByte(group);
+ } else {
+ // !goto tk_byte;!
+ return parseExpTkByte(group);
+ }
+
+ case STRING:
+ // !tk_byte:!
+ return parseExpTkByte(group);
+
+ case RAW_BYTE:
+ // !tk_raw_byte:!
+ return parseExpTkRawByte(group);
+
+ case CODE_POINT:
+ byte[]buf = new byte[Config.ENC_CODE_TO_MBC_MAXLEN];
+ int num = enc.codeToMbc(token.getCode(), buf, 0);
+ // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else
+ node = new StringNode(buf, 0, num);
+ break;
+
+ case QUOTE_OPEN:
+ int[]endOp = new int[]{syntax.metaCharTable.esc, 'E'};
+ int qstart = p;
+ int qend = findStrPosition(endOp, endOp.length, qstart, stop); // will set nextChar!!!
+ if (qend == -1) {
+ nextChar = qend = stop;
+ }
+ node = new StringNode(bytes, qstart, qend);
+ p = nextChar;
+ break;
+
+ case CHAR_TYPE:
+ switch(token.getPropCType()) {
+ case CharacterType.WORD:
+ node = new CTypeNode(token.getPropCType(), token.getPropNot());
+ break;
+
+ case CharacterType.SPACE:
+ case CharacterType.DIGIT:
+ case CharacterType.XDIGIT:
+ // #ifdef USE_SHARED_CCLASS_TABLE ... #endif
+ CClassNode ccn = new CClassNode();
+ ccn.addCType(token.getPropCType(), false, env, this);
+ if (token.getPropNot()) ccn.setNot();
+ node = ccn;
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+
+ } // inner switch
+ break;
+
+ case CHAR_PROPERTY:
+ node = parseCharProperty();
+ break;
+
+ case CC_CC_OPEN:
+ CClassNode cc = parseCharClass();
+ node = cc;
+ if (isIgnoreCase(env.option)) {
+ ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc);
+ enc.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg);
+
+ if (arg.altRoot != null) {
+ node = ConsAltNode.newAltNode(node, arg.altRoot);
+ }
+ }
+ break;
+
+ case ANYCHAR:
+ node = new AnyCharNode();
+ break;
+
+ case ANYCHAR_ANYTIME:
+ node = new AnyCharNode();
+ QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
+ qn.setTarget(node);
+ node = qn;
+ break;
+
+ case BACKREF:
+ int[]backRefs = token.getBackrefNum() > 1 ? token.getBackrefRefs() : new int[]{token.getBackrefRef1()};
+ node = new BackRefNode(token.getBackrefNum(),
+ backRefs,
+ token.getBackrefByName(),
+ token.getBackrefExistLevel(), // #ifdef USE_BACKREF_AT_LEVEL
+ token.getBackrefLevel(), // ...
+ env);
+
+ break;
+
+ case CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ int gNum = token.getCallGNum();
+
+ if (gNum < 0) {
+ gNum = backrefRelToAbs(gNum);
+ if (gNum <= 0) newValueException(ERR_INVALID_BACKREF);
+ }
+ node = new CallNode(bytes, token.getCallNameP(), token.getCallNameEnd(), gNum);
+ env.numCall++;
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case ANCHOR:
+ node = new AnchorNode(token.getAnchor()); // possible bug in oniguruma
+ break;
+
+ case OP_REPEAT:
+ case INTERVAL:
+ if (syntax.contextIndepRepeatOps()) {
+ if (syntax.contextInvalidRepeatOps()) {
+ newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED);
+ } else {
+ node = new StringNode(); // node_new_empty
+ }
+ } else {
+ // !goto tk_byte;!
+ return parseExpTkByte(group);
+ }
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ } //switch
+
+ //targetp = node;
+
+ // !re_entry:!
+ fetchToken();
+
+ // !repeat:!
+ return parseExpRepeat(node, group);
+ }
+
+ private Node parseExpTkByte(boolean group) {
+ // !tk_byte:!
+ StringNode node = new StringNode(bytes, token.backP, p);
+ while (true) {
+ fetchToken();
+ if (token.type != TokenType.STRING) break;
+
+ if (token.backP == node.end) {
+ node.end = p; // non escaped character, remain shared, just increase shared range
+ } else {
+ node.cat(bytes, token.backP, p); // non continuous string stream, need to COW
+ }
+ }
+ // !string_end:!
+ // targetp = node;
+ // !goto repeat;!
+ return parseExpRepeat(node, group);
+ }
+
+ private Node parseExpTkRawByte(boolean group) {
+ // !tk_raw_byte:!
+
+ // important: we don't use 0xff mask here neither in the compiler
+ // (in the template string) so we won't have to mask target
+ // strings when comparing against them in the matcher
+ StringNode node = new StringNode((byte)token.getC());
+ node.setRaw();
+
+ int len = 1;
+ while (true) {
+ if (len >= enc.minLength()) {
+ if (len == enc.length(node.bytes, node.p, node.end)) {
+ fetchToken();
+ node.clearRaw();
+ // !goto string_end;!
+ return parseExpRepeat(node, group);
+ }
+ }
+
+ fetchToken();
+ if (token.type != TokenType.RAW_BYTE) {
+ /* Don't use this, it is wrong for little endian encodings. */
+ // USE_PAD_TO_SHORT_BYTE_CHAR ...
+
+ newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);
+ }
+
+ // important: we don't use 0xff mask here neither in the compiler
+ // (in the template string) so we won't have to mask target
+ // strings when comparing against them in the matcher
+ node.cat((byte)token.getC());
+ len++;
+ } // while
+ }
+
+ private Node parseExpRepeat(Node target, boolean group) {
+ // !repeat:!
+ while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) {
+ if (target.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
+
+ QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
+ token.getRepeatUpper(),
+ token.type == TokenType.INTERVAL);
+
+ qtfr.greedy = token.getRepeatGreedy();
+ int ret = qtfr.setQuantifier(target, group, env, bytes, getBegin(), getEnd());
+ Node qn = qtfr;
+
+ if (token.getRepeatPossessive()) {
+ EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
+ en.setTarget(qn);
+ qn = en;
+ }
+
+ if (ret == 0) {
+ target = qn;
+ } else if (ret == 2) { /* split case: /abc+/ */
+ target = ConsAltNode.newListNode(target, null);
+ ConsAltNode tmp = ((ConsAltNode)target).setCdr(ConsAltNode.newListNode(qn, null));
+
+ fetchToken();
+ return parseExpRepeatForCar(target, tmp, group);
+ }
+ // !goto re_entry;!
+ fetchToken();
+ }
+ return target;
+ }
+
+ private Node parseExpRepeatForCar(Node top, ConsAltNode target, boolean group) {
+ // !repeat:!
+ while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) {
+ if (target.car.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
+
+ QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
+ token.getRepeatUpper(),
+ token.type == TokenType.INTERVAL);
+
+ qtfr.greedy = token.getRepeatGreedy();
+ int ret = qtfr.setQuantifier(target.car, group, env, bytes, getBegin(), getEnd());
+ Node qn = qtfr;
+
+ if (token.getRepeatPossessive()) {
+ EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
+ en.setTarget(qn);
+ qn = en;
+ }
+
+ if (ret == 0) {
+ target.setCar(qn);
+ } else if (ret == 2) { /* split case: /abc+/ */
+ assert false;
+ }
+ // !goto re_entry;!
+ fetchToken();
+ }
+ return top;
+ }
+
+ private Node parseBranch(TokenType term) {
+ Node node = parseExp(term);
+
+ if (token.type == TokenType.EOT || token.type == term || token.type == TokenType.ALT) {
+ return node;
+ } else {
+ ConsAltNode top = ConsAltNode.newListNode(node, null);
+ ConsAltNode t = top;
+
+ while (token.type != TokenType.EOT && token.type != term && token.type != TokenType.ALT) {
+ node = parseExp(term);
+ if (node.getType() == NodeType.LIST) {
+ t.setCdr((ConsAltNode)node);
+ while (((ConsAltNode)node).cdr != null ) node = ((ConsAltNode)node).cdr;
+
+ t = ((ConsAltNode)node);
+ } else {
+ t.setCdr(ConsAltNode.newListNode(node, null));
+ t = t.cdr;
+ }
+ }
+ return top;
+ }
+ }
+
+ /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
+ private Node parseSubExp(TokenType term) {
+ Node node = parseBranch(term);
+
+ if (token.type == term) {
+ return node;
+ } else if (token.type == TokenType.ALT) {
+ ConsAltNode top = ConsAltNode.newAltNode(node, null);
+ ConsAltNode t = top;
+ while (token.type == TokenType.ALT) {
+ fetchToken();
+ node = parseBranch(term);
+
+ t.setCdr(ConsAltNode.newAltNode(node, null));
+ t = t.cdr;
+ }
+
+ if (token.type != term) parseSubExpError(term);
+ return top;
+ } else {
+ parseSubExpError(term);
+ return null; //not reached
+ }
+ }
+
+ private void parseSubExpError(TokenType term) {
+ if (term == TokenType.SUBEXP_CLOSE) {
+ newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
+ } else {
+ newInternalException(ERR_PARSER_BUG);
+ }
+ }
+
+ private Node parseRegexp() {
+ fetchToken();
+ return parseSubExp(TokenType.EOT);
+ }
+}
diff --git a/src/org/joni/Regex.java b/src/org/joni/Regex.java
new file mode 100644
index 0000000..d6308de
--- /dev/null
+++ b/src/org/joni/Regex.java
@@ -0,0 +1,430 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.BitStatus.bsAt;
+import static org.joni.Option.isCaptureGroup;
+import static org.joni.Option.isDontCaptureGroup;
+
+import java.util.IllegalFormatConversionException;
+import java.util.Iterator;
+
+import org.jcodings.Encoding;
+import org.jcodings.util.BytesHash;
+import org.joni.constants.AnchorType;
+import org.joni.constants.RegexState;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.InternalException;
+import org.joni.exception.ValueException;
+
+public final class Regex implements RegexState {
+
+ int[] code; /* compiled pattern */
+ int codeLength;
+ boolean stackNeeded;
+ Object[]operands; /* e.g. shared CClassNode */
+ int operandLength;
+
+ int state; /* normal, searching, compiling */ // remove
+ int numMem; /* used memory(...) num counted from 1 */
+ int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
+ int numNullCheck; /* OP_NULL_CHECK_START/END id counter */
+ int numCombExpCheck; /* combination explosion check */
+ int numCall; /* number of subexp call */
+ int captureHistory; /* (?@...) flag (1-31) */
+ int btMemStart; /* need backtrack flag */
+ int btMemEnd; /* need backtrack flag */
+
+ int stackPopLevel;
+
+ int[]repeatRangeLo;
+ int[]repeatRangeHi;
+
+ public WarnCallback warnings;
+ public MatcherFactory factory;
+
+ final Encoding enc;
+ int options;
+ int userOptions;
+ Object userObject;
+ //final Syntax syntax;
+ final int caseFoldFlag;
+
+ BytesHash<NameEntry> nameTable; // named entries
+
+ /* optimization info (string search, char-map and anchors) */
+ SearchAlgorithm searchAlgorithm; /* optimize flag */
+ int thresholdLength; /* search str-length for apply optimize */
+ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
+ int anchorDmin; /* (SEMI_)END_BUF anchor distance */
+ int anchorDmax; /* (SEMI_)END_BUF anchor distance */
+ int subAnchor; /* start-anchor for exact or map */
+
+ byte[]exact;
+ int exactP;
+ int exactEnd;
+
+ byte[]map; /* used as BM skip or char-map */
+ int[]intMap; /* BM skip for exact_len > 255 */
+ int[]intMapBackward; /* BM skip for backward search */
+ int dMin; /* min-distance of exact or map */
+ int dMax; /* max-distance of exact or map */
+
+ public Regex(byte[]bytes, int p, int end, int option, Encoding enc) {
+ this(bytes, p, end, option, enc, Syntax.RUBY, WarnCallback.DEFAULT);
+ }
+
+ // onig_new
+ public Regex(byte[]bytes, int p, int end, int option, Encoding enc, Syntax syntax) {
+ this(bytes, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, enc, syntax, WarnCallback.DEFAULT);
+ }
+
+ public Regex(byte[]bytes, int p, int end, int option, Encoding enc, WarnCallback warnings) {
+ this(bytes, p, end, option, enc, Syntax.RUBY, warnings);
+ }
+
+ // onig_new
+ public Regex(byte[]bytes, int p, int end, int option, Encoding enc, Syntax syntax, WarnCallback warnings) {
+ this(bytes, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, enc, syntax, warnings);
+ }
+
+ // onig_alloc_init
+ public Regex(byte[]bytes, int p, int end, int option, int caseFoldFlag, Encoding enc, Syntax syntax, WarnCallback warnings) {
+
+ if ((option & (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) ==
+ (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) {
+ throw new ValueException(ErrorMessages.ERR_INVALID_COMBINATION_OF_OPTIONS);
+ }
+
+ if ((option & Option.NEGATE_SINGLELINE) != 0) {
+ option |= syntax.options;
+ option &= ~Option.SINGLELINE;
+ } else {
+ option |= syntax.options;
+ }
+
+ this.enc = enc;
+ this.options = option;
+ this.caseFoldFlag = caseFoldFlag;
+ this.warnings = warnings;
+
+ new Analyser(new ScanEnvironment(this, syntax), bytes, p, end).compile();
+
+ this.warnings = null;
+ }
+
+ public Matcher matcher(byte[]bytes) {
+ return matcher(bytes, 0, bytes.length);
+ }
+
+ public Matcher matcher(byte[]bytes, int p, int end) {
+ return factory.create(this, bytes, p, end);
+ }
+
+ public int numberOfCaptures() {
+ return numMem;
+ }
+
+ public int numberOfCaptureHistories() {
+ if (Config.USE_CAPTURE_HISTORY) {
+ int n = 0;
+ for (int i=0; i<=Config.MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (bsAt(captureHistory, i)) n++;
+ }
+ return n;
+ } else {
+ return 0;
+ }
+ }
+
+ String nameTableToString() {
+ StringBuilder sb = new StringBuilder();
+
+ if (nameTable != null) {
+ sb.append("name table\n");
+ for (NameEntry ne : nameTable) {
+ sb.append(" " + ne + "\n");
+ }
+ sb.append("\n");
+ }
+ return sb.toString();
+ }
+
+ NameEntry nameFind(byte[]name, int nameP, int nameEnd) {
+ if (nameTable != null) return nameTable.get(name, nameP, nameEnd);
+ return null;
+ }
+
+ void renumberNameTable(int[]map) {
+ if (nameTable != null) {
+ for (NameEntry e : nameTable) {
+ if (e.backNum > 1) {
+ for (int i=0; i<e.backNum; i++) {
+ e.backRefs[i] = map[e.backRefs[i]];
+ }
+ } else if (e.backNum == 1) {
+ e.backRef1 = map[e.backRef1];
+ }
+ }
+ }
+ }
+
+ public int numberOfNames() {
+ return nameTable == null ? 0 : nameTable.size();
+ }
+
+ void nameAdd(byte[]name, int nameP, int nameEnd, int backRef, Syntax syntax) {
+ if (nameEnd - nameP <= 0) throw new ValueException(ErrorMessages.ERR_EMPTY_GROUP_NAME);
+
+ NameEntry e = null;
+ if (nameTable == null) {
+ nameTable = new BytesHash<NameEntry>(); // 13, oni defaults to 5
+ } else {
+ e = nameFind(name, nameP, nameEnd);
+ }
+
+ if (e == null) {
+ // dup the name here as oni does ?, what for ? (it has to manage it, we don't)
+ e = new NameEntry(name, nameP, nameEnd);
+ nameTable.putDirect(name, nameP, nameEnd, e);
+ } else if (e.backNum >= 1 && !syntax.allowMultiplexDefinitionName()) {
+ throw new ValueException(ErrorMessages.ERR_MULTIPLEX_DEFINED_NAME, new String(name, nameP, nameEnd - nameP));
+ }
+
+ e.addBackref(backRef);
+ }
+
+ NameEntry nameToGroupNumbers(byte[]name, int nameP, int nameEnd) {
+ return nameFind(name, nameP, nameEnd);
+ }
+
+ public int nameToBackrefNumber(byte[]name, int nameP, int nameEnd, Region region) {
+ NameEntry e = nameToGroupNumbers(name, nameP, nameEnd);
+ if (e == null) throw new ValueException(ErrorMessages.ERR_UNDEFINED_NAME_REFERENCE,
+ new String(name, nameP, nameEnd - nameP));
+
+ switch(e.backNum) {
+ case 0:
+ throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
+ case 1:
+ return e.backRef1;
+ default:
+ if (region != null) {
+ for (int i = e.backNum - 1; i >= 0; i--) {
+ if (region.beg[e.backRefs[i]] != Region.REGION_NOTPOS) return e.backRefs[i];
+ }
+ }
+ return e.backRefs[e.backNum - 1];
+ }
+ }
+
+ public Iterator<NameEntry> namedBackrefIterator() {
+ return nameTable.iterator();
+ }
+
+ public boolean noNameGroupIsActive(Syntax syntax) {
+ if (isDontCaptureGroup(options)) return false;
+
+ if (Config.USE_NAMED_GROUP) {
+ if (numberOfNames() > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(options)) return false;
+ }
+ return true;
+ }
+
+ /* set skip map for Boyer-Moor search */
+ void setupBMSkipMap() {
+ byte[]bytes = exact;
+ int p = exactP;
+ int end = exactEnd;
+ int len = end - p;
+
+ if (len < Config.CHAR_TABLE_SIZE) {
+ // map/skip
+ if (map == null) map = new byte[Config.CHAR_TABLE_SIZE];
+
+ for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) map[i] = (byte)len;
+ for (int i=0; i<len-1; i++) map[bytes[p + i] & 0xff] = (byte)(len - 1 -i); // oxff ??
+ } else {
+ if (intMap == null) intMap = new int[Config.CHAR_TABLE_SIZE];
+
+ for (int i=0; i<len-1; i++) intMap[bytes[p + i] & 0xff] = len - 1 - i; // oxff ??
+ }
+ }
+
+ void setExactInfo(OptExactInfo e) {
+ if (e.length == 0) return;
+
+ // shall we copy that ?
+ exact = e.s;
+ exactP = 0;
+ exactEnd = e.length;
+
+ if (e.ignoreCase) {
+ // encodings won't return toLowerTable for case insensitive search if it's not safe to use it directly
+ searchAlgorithm = enc.toLowerCaseTable() != null ? SearchAlgorithm.SLOW_IC_SB : new SearchAlgorithm.SLOW_IC(this);
+ } else {
+ boolean allowReverse = enc.isReverseMatchAllowed(exact, exactP, exactEnd);
+
+ if (e.length >= 3 || (e.length >= 2 && allowReverse)) {
+ setupBMSkipMap();
+ if (allowReverse) {
+ searchAlgorithm = SearchAlgorithm.BM;
+ } else {
+ searchAlgorithm = SearchAlgorithm.BM_NOT_REV;
+ }
+ } else {
+ searchAlgorithm = enc.isSingleByte() ? SearchAlgorithm.SLOW_SB : SearchAlgorithm.SLOW;
+ }
+ }
+
+ dMin = e.mmd.min;
+ dMax = e.mmd.max;
+
+ if (dMin != MinMaxLen.INFINITE_DISTANCE) {
+ thresholdLength = dMin + (exactEnd - exactP);
+ }
+ }
+
+ void setOptimizeMapInfo(OptMapInfo m) {
+ /*
+ for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
+ map[i] = m.map[i]; // do we really have to copy that ???
+ }
+ */
+ map = m.map;
+
+ searchAlgorithm = enc.isSingleByte() ? SearchAlgorithm.MAP_SB : SearchAlgorithm.MAP;
+ dMin = m.mmd.min;
+ dMax = m.mmd.max;
+
+ if (dMin != MinMaxLen.INFINITE_DISTANCE) {
+ thresholdLength = dMin + 1;
+ }
+ }
+
+ void setSubAnchor(OptAnchorInfo anc) {
+ subAnchor |= anc.leftAnchor & AnchorType.BEGIN_LINE;
+ subAnchor |= anc.rightAnchor & AnchorType.END_LINE;
+ }
+
+ void clearOptimizeInfo() {
+ searchAlgorithm = SearchAlgorithm.NONE;
+ anchor = 0;
+ anchorDmax = 0;
+ anchorDmin = 0;
+ subAnchor = 0;
+
+ exact = null;
+ exactP = exactEnd = 0;
+ }
+
+ public String encStringToString(byte[]bytes, int p, int end) {
+ StringBuilder sb = new StringBuilder("\nPATTERN: /");
+
+ if (enc.minLength() > 1) {
+ int p_ = p;
+ while (p_ < end) {
+ int code = enc.mbcToCode(bytes, p_, end);
+ if (code >= 0x80) {
+ try {
+ sb.append(String.format(" 0x%04x ", code));
+ } catch (IllegalFormatConversionException ifce) {
+ sb.append(code);
+ }
+ } else {
+ sb.append((char)code);
+ }
+ p_ += enc.length(bytes, p_, end);
+ }
+ } else {
+ while (p < end) {
+ sb.append(new String(new byte[]{bytes[p]}));
+ p++;
+ }
+ }
+ return sb.append("/").toString();
+ }
+
+ public String optimizeInfoToString() {
+ String s = "";
+ s += "optimize: " + searchAlgorithm.getName() + "\n";
+ s += " anchor: " + OptAnchorInfo.anchorToString(anchor);
+
+ if ((anchor & AnchorType.END_BUF_MASK) != 0) {
+ s += MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax);
+ }
+
+ s += "\n";
+
+ if (searchAlgorithm != SearchAlgorithm.NONE) {
+ s += " sub anchor: " + OptAnchorInfo.anchorToString(subAnchor) + "\n";
+ }
+
+ s += "dmin: " + dMin + " dmax: " + dMax + "\n";
+ s += "threshold length: " + thresholdLength + "\n";
+
+ if (exact != null) {
+ s += "exact: [" + new String(exact, exactP, exactEnd - exactP) + "]: length: " + (exactEnd - exactP) + "\n";
+ } else if (searchAlgorithm == SearchAlgorithm.MAP || searchAlgorithm == SearchAlgorithm.MAP_SB) {
+ int n=0;
+ for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) if (map[i] != 0) n++;
+
+ s += "map: n = " + n + "\n";
+ if (n > 0) {
+ int c=0;
+ s += "[";
+ for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
+ if (map[i] != 0) {
+ if (c > 0) s += ", ";
+ c++;
+ if (enc.maxLength() == 1 && enc.isPrint(i)) s += ((char)i);
+ else s += i;
+ }
+ }
+ s += "]\n";
+ }
+ }
+ return s;
+ }
+
+ public Encoding getEncoding() {
+ return enc;
+ }
+
+ public int getOptions() {
+ return options;
+ }
+
+ public void setUserOptions(int options) {
+ this.userOptions = options;
+ }
+
+ public int getUserOptions() {
+ return userOptions;
+ }
+
+ public void setUserObject(Object object) {
+ this.userObject = object;
+ }
+
+ public Object getUserObject() {
+ return userObject;
+ }
+}
diff --git a/src/org/joni/Region.java b/src/org/joni/Region.java
new file mode 100644
index 0000000..08b90f9
--- /dev/null
+++ b/src/org/joni/Region.java
@@ -0,0 +1,66 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+public final class Region {
+ static final int REGION_NOTPOS = -1;
+
+ public final int numRegs;
+ public final int[]beg;
+ public final int[]end;
+ public CaptureTreeNode historyRoot;
+
+ public Region(int num) {
+ this.numRegs = num;
+ this.beg = new int[num];
+ this.end = new int[num];
+ }
+
+ public Region(int begin, int end) {
+ this.numRegs = 1;
+ this.beg = new int[]{begin};
+ this.end = new int[]{end};
+ }
+
+ public Region clone() {
+ Region region = new Region(numRegs);
+ System.arraycopy(beg, 0, region.beg, 0, beg.length);
+ System.arraycopy(end, 0, region.end, 0, end.length);
+ if (historyRoot != null) region.historyRoot = historyRoot.cloneTree();
+ return region;
+ }
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("Region: \n");
+ for (int i=0; i<beg.length; i++) sb.append(" " + i + ": (" + beg[i] + "-" + end[i] + ")");
+ return sb.toString();
+ }
+
+ CaptureTreeNode getCaptureTree() {
+ return historyRoot;
+ }
+
+ void clear() {
+ for (int i=0; i<beg.length; i++) {
+ beg[i] = end[i] = REGION_NOTPOS;
+ }
+ }
+}
diff --git a/src/org/joni/ScanEnvironment.java b/src/org/joni/ScanEnvironment.java
new file mode 100644
index 0000000..95dd90a
--- /dev/null
+++ b/src/org/joni/ScanEnvironment.java
@@ -0,0 +1,140 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.BitStatus.bsClear;
+
+import org.jcodings.Encoding;
+import org.joni.ast.Node;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.InternalException;
+
+public final class ScanEnvironment {
+
+ private static final int SCANENV_MEMNODES_SIZE = 8;
+
+ int option;
+ final int caseFoldFlag;
+ final public Encoding enc;
+ final public Syntax syntax;
+ int captureHistory;
+ int btMemStart;
+ int btMemEnd;
+ int backrefedMem;
+
+ final public Regex reg;
+
+ int numCall;
+ UnsetAddrList unsetAddrList; // USE_SUBEXP_CALL
+ public int numMem;
+
+ int numNamed; // USE_NAMED_GROUP
+
+ public Node memNodes[];
+
+ // USE_COMBINATION_EXPLOSION_CHECK
+ int numCombExpCheck;
+ int combExpMaxRegNum;
+ int currMaxRegNum;
+ boolean hasRecursion;
+
+ public ScanEnvironment(Regex regex, Syntax syntax) {
+ this.reg = regex;
+ option = regex.options;
+ caseFoldFlag = regex.caseFoldFlag;
+ enc = regex.enc;
+ this.syntax = syntax;
+ }
+
+ public void clear() {
+ captureHistory = bsClear();
+ btMemStart = bsClear();
+ btMemEnd = bsClear();
+ backrefedMem = bsClear();
+
+ numCall = 0;
+ numMem = 0;
+
+ numNamed = 0;
+
+ memNodes = null;
+
+ numCombExpCheck = 0;
+ combExpMaxRegNum = 0;
+ currMaxRegNum = 0;
+ hasRecursion = false;
+ }
+
+ public int addMemEntry() {
+ if (numMem++ == 0) {
+ memNodes = new Node[SCANENV_MEMNODES_SIZE];
+ } else if (numMem >= memNodes.length) {
+ Node[]tmp = new Node[memNodes.length << 1];
+ System.arraycopy(memNodes, 0, tmp, 0, memNodes.length);
+ memNodes = tmp;
+ }
+
+ return numMem;
+ }
+
+ public void setMemNode(int num, Node node) {
+ if (numMem >= num) {
+ memNodes[num] = node;
+ } else {
+ throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
+ }
+ }
+
+ public int convertBackslashValue(int c) {
+ if (syntax.opEscControlChars()) {
+ switch (c) {
+ case 'n': return '\n';
+ case 't': return '\t';
+ case 'r': return '\r';
+ case 'f': return '\f';
+ case 'a': return '\007';
+ case 'b': return '\010';
+ case 'e': return '\033';
+ case 'v':
+ if (syntax.op2EscVVtab()) return 11; // ???
+ break;
+ default:
+ break;
+ }
+ }
+ return c;
+ }
+
+ void ccEscWarn(String s) {
+ if (Config.USE_WARN) {
+ if (syntax.warnCCOpNotEscaped() && syntax.backSlashEscapeInCC()) {
+ reg.warnings.warn("character class has '" + s + "' without escape");
+ }
+ }
+ }
+
+ void closeBracketWithoutEscapeWarn(String s) {
+ if (Config.USE_WARN) {
+ if (syntax.warnCCOpNotEscaped()) {
+ reg.warnings.warn("regular expression has '" + s + "' without escape");
+ }
+ }
+ }
+}
diff --git a/src/org/joni/ScannerSupport.java b/src/org/joni/ScannerSupport.java
new file mode 100644
index 0000000..370d338
--- /dev/null
+++ b/src/org/joni/ScannerSupport.java
@@ -0,0 +1,179 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.jcodings.Encoding;
+import org.jcodings.IntHolder;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.InternalException;
+import org.joni.exception.SyntaxException;
+import org.joni.exception.ValueException;
+
+abstract class ScannerSupport extends IntHolder implements ErrorMessages {
+ protected final Encoding enc; // fast access to encoding
+
+ protected final byte[]bytes; // pattern
+ protected int p; // current scanner position
+ protected int stop; // pattern end (mutable)
+ private int lastFetched; // last fetched value for unfetch support
+ protected int c; // current code point
+
+ private final int begin; // pattern begin position for reset() support
+ private final int end; // pattern end position for reset() support
+ protected int _p; // used by mark()/restore() to mark positions
+
+ protected ScannerSupport(Encoding enc, byte[]bytes, int p, int end) {
+ this.enc = enc;
+
+ this.bytes = bytes;
+ this.begin = p;
+ this.end = end;
+
+ reset();
+ }
+
+ protected int getBegin() {
+ return begin;
+ }
+
+ protected int getEnd() {
+ return end;
+ }
+
+ private final int INT_SIGN_BIT = 1 << 31;
+
+ protected final int scanUnsignedNumber() {
+ int num = 0; // long ???
+ while(left()) {
+ fetch();
+ if (enc.isDigit(c)) {
+ int onum = num;
+ num = num * 10 + Encoding.digitVal(c);
+ if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1;
+ } else {
+ unfetch();
+ break;
+ }
+ }
+ return num;
+ }
+
+ protected final int scanUnsignedHexadecimalNumber(int maxLength) {
+ int num = 0;
+ while(left() && maxLength-- != 0) {
+ fetch();
+ if (enc.isXDigit(c)) {
+ int onum = num;
+ int val = enc.xdigitVal(c);
+ num = (num << 4) + val;
+ if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1;
+ } else {
+ unfetch();
+ break;
+ }
+ }
+ return num;
+ }
+
+ protected final int scanUnsignedOctalNumber(int maxLength) {
+ int num = 0;
+ while(left() && maxLength-- != 0) {
+ fetch();
+ if (enc.isDigit(c) && c < '8') {
+ int onum = num;
+ int val = Encoding.odigitVal(c);
+ num = (num << 3) + val;
+ if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1;
+ } else {
+ unfetch();
+ break;
+ }
+ }
+ return num;
+ }
+
+ protected final void reset() {
+ p = begin;
+ stop = end;
+ }
+
+ protected final void mark() {
+ _p = p;
+ }
+
+ protected final void restore() {
+ p = _p;
+ }
+
+ protected final void inc() {
+ lastFetched = p;
+ p += enc.length(bytes, p, stop);
+ }
+
+ protected final void fetch() {
+ c = enc.mbcToCode(bytes, p, stop);
+ lastFetched = p;
+ p += enc.length(bytes, p, stop);
+ }
+
+ protected int fetchTo() {
+ int to = enc.mbcToCode(bytes, p, stop);
+ lastFetched = p;
+ p += enc.length(bytes, p, stop);
+ return to;
+ }
+
+ protected final void unfetch() {
+ p = lastFetched;
+ }
+
+ protected final int peek() {
+ return p < stop ? enc.mbcToCode(bytes, p, stop) : 0;
+ }
+
+ protected final boolean peekIs(int c) {
+ return peek() == c;
+ }
+
+ protected final boolean left() {
+ return p < stop;
+ }
+
+ protected void newSyntaxException(String message) {
+ throw new SyntaxException(message);
+ }
+
+ protected void newValueException(String message) {
+ throw new ValueException(message);
+ }
+
+ protected void newValueException(String message, String str) {
+ throw new ValueException(message, str);
+ }
+
+ protected void newValueException(String message, int p, int end) {
+ throw new ValueException(message, new String(bytes, p, end - p));
+ }
+
+ protected void newInternalException(String message) {
+ throw new InternalException(message);
+ }
+
+}
diff --git a/src/org/joni/SearchAlgorithm.java b/src/org/joni/SearchAlgorithm.java
new file mode 100644
index 0000000..36bf877
--- /dev/null
+++ b/src/org/joni/SearchAlgorithm.java
@@ -0,0 +1,528 @@
+package org.joni;
+
+import org.jcodings.Encoding;
+import org.jcodings.IntHolder;
+
+public abstract class SearchAlgorithm {
+
+ public abstract String getName();
+ public abstract int search(Regex regex, byte[]text, int textP, int textEnd, int textRange);
+ public abstract int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_);
+
+
+ public static final SearchAlgorithm NONE = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "NONE";
+ }
+
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ return textP;
+ }
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ return textP;
+ }
+
+ };
+
+ public static final SearchAlgorithm SLOW = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "EXACT";
+ }
+
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ Encoding enc = regex.enc;
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+
+ int end = textEnd;
+ end -= targetEnd - targetP - 1;
+
+ if (end > textRange) end = textRange;
+
+ int s = textP;
+
+ while (s < end) {
+ if (text[s] == target[targetP]) {
+ int p = s + 1;
+ int t = targetP + 1;
+ while (t < targetEnd) {
+ if (target[t] != text[p++]) break;
+ t++;
+ }
+
+ if (t == targetEnd) return s;
+ }
+ s += enc.length(text, s, textEnd);
+ }
+
+ return -1;
+ }
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ Encoding enc = regex.enc;
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int s = textEnd;
+ s -= targetEnd - targetP;
+
+ if (s > textStart) {
+ s = textStart;
+ } else {
+ s = enc.leftAdjustCharHead(text, adjustText, s, textEnd);
+ }
+
+ while (s >= textP) {
+ if (text[s] == target[targetP]) {
+ int p = s + 1;
+ int t = targetP + 1;
+ while (t < targetEnd) {
+ if (target[t] != text[p++]) break;
+ t++;
+ }
+ if (t == targetEnd) return s;
+ }
+ s = enc.prevCharHead(text, adjustText, s, textEnd);
+ }
+ return -1;
+ }
+ };
+
+ public static final SearchAlgorithm SLOW_SB = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "EXACT_SB";
+ }
+
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int end = textEnd;
+ end -= targetEnd - targetP - 1;
+
+ if (end > textRange) end = textRange;
+
+ int s = textP;
+
+ while (s < end) {
+ if (text[s] == target[targetP]) {
+ int p = s + 1;
+ int t = targetP + 1;
+ while (t < targetEnd) {
+ if (target[t] != text[p++]) break;
+ t++;
+ }
+
+ if (t == targetEnd) return s;
+ }
+ s++;
+ }
+
+ return -1;
+ }
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int s = textEnd;
+ s -= targetEnd - targetP;
+
+ if (s > textStart) s = textStart;
+
+ while (s >= textP) {
+ if (text[s] == target[targetP]) {
+ int p = s + 1;
+ int t = targetP + 1;
+ while (t < targetEnd) {
+ if (target[t] != text[p++]) break;
+ t++;
+ }
+ if (t == targetEnd) return s;
+ }
+ //s = s <= adjustText ? -1 : s - 1;
+ s--;
+ }
+ return -1;
+ }
+ };
+
+
+ public static final class SLOW_IC extends SearchAlgorithm {
+ private final byte[]buf = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN];
+ private final IntHolder holder = new IntHolder();
+ private final int caseFoldFlag;
+ private final Encoding enc;
+
+ public SLOW_IC(Regex regex) {
+ this.caseFoldFlag = regex.caseFoldFlag;
+ this.enc = regex.enc;
+ }
+
+ public final String getName() {
+ return "EXACT_IC";
+ }
+
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int end = textEnd;
+ end -= targetEnd - targetP - 1;
+
+ if (end > textRange) end = textRange;
+ int s = textP;
+
+ while (s < end) {
+ if (lowerCaseMatch(target, targetP, targetEnd, text, s, textEnd)) return s;
+ s += enc.length(text, s, textEnd);
+ }
+ return -1;
+ }
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int s = textEnd;
+ s -= targetEnd - targetP;
+
+ if (s > textStart) {
+ s = textStart;
+ } else {
+ s = enc.leftAdjustCharHead(text, adjustText, s, textEnd);
+ }
+
+ while (s >= textP) {
+ if (lowerCaseMatch(target, targetP, targetEnd, text, s, textEnd)) return s;
+ s = enc.prevCharHead(text, adjustText, s, textEnd);
+ }
+ return -1;
+ }
+
+ private boolean lowerCaseMatch(byte[]t, int tP, int tEnd,
+ byte[]bytes, int p, int end) {
+
+ holder.value = p;
+ while (tP < tEnd) {
+ int lowlen = enc.mbcCaseFold(caseFoldFlag, bytes, holder, end, buf);
+ if (lowlen == 1) {
+ if (t[tP++] != buf[0]) return false;
+ } else {
+ int q = 0;
+ while (lowlen > 0) {
+ if (t[tP++] != buf[q++]) return false;
+ lowlen--;
+ }
+ }
+ }
+ return true;
+ }
+ };
+
+ public static final SearchAlgorithm SLOW_IC_SB = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "EXACT_IC_SB";
+ }
+
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ final byte[]toLowerTable = regex.enc.toLowerCaseTable();
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int end = textEnd;
+ end -= targetEnd - targetP - 1;
+
+ if (end > textRange) end = textRange;
+ int s = textP;
+
+ while (s < end) {
+ if (target[targetP] == toLowerTable[text[s] & 0xff]) {
+ int p = s + 1;
+ int t = targetP + 1;
+ while (t < targetEnd) {
+ if (target[t] != toLowerTable[text[p++] & 0xff]) break;
+ t++;
+ }
+
+ if (t == targetEnd) return s;
+ }
+ s++;
+ }
+ return -1;
+ }
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ final byte[]toLowerTable = regex.enc.toLowerCaseTable();
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int s = textEnd;
+ s -= targetEnd - targetP;
+
+ if (s > textStart) s = textStart;
+
+ while (s >= textP) {
+ if (target[targetP] == toLowerTable[text[s] & 0xff]) {
+ int p = s + 1;
+ int t = targetP + 1;
+ while (t < targetEnd) {
+ if (target[t] != toLowerTable[text[p++] & 0xff]) break;
+ t++;
+ }
+ if (t == targetEnd) return s;
+ }
+ //s = s <= adjustText ? -1 : s - 1;
+ s--;
+ }
+ return -1;
+ }
+
+ };
+
+ public static final SearchAlgorithm BM = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "EXACT_BM";
+ }
+
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int end = textRange + (targetEnd - targetP) - 1;
+ if (end > textEnd) end = textEnd;
+
+ int tail = targetEnd - 1;
+ int s = textP + (targetEnd - targetP) - 1;
+
+ if (regex.intMap == null) {
+ while (s < end) {
+ int p = s;
+ int t = tail;
+ while (t >= targetP && text[p] == target[t]) {
+ p--; t--;
+ }
+ if (t < targetP) return p + 1;
+ s += regex.map[text[s] & 0xff];
+ }
+ } else { /* see int_map[] */
+ while (s < end) {
+ int p = s;
+ int t = tail;
+ while (t >= targetP && text[p] == target[t]) {
+ p--; t--;
+ }
+ if (t < targetP) return p + 1;
+ s += regex.intMap[text[s] & 0xff];
+ }
+ }
+ return -1;
+ }
+
+ private static final int BM_BACKWARD_SEARCH_LENGTH_THRESHOLD = 100;
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ Encoding enc = regex.enc;
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ if (regex.intMapBackward == null) {
+ if (s_ - range_ < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) {
+ // goto exact_method;
+ return SLOW.searchBackward(regex, text, textP, adjustText, textEnd, textStart, s_, range_);
+ }
+ setBmBackwardSkip(regex, target, targetP, targetEnd);
+ }
+
+ int s = textEnd - (targetEnd - targetP);
+
+ if (textStart < s) {
+ s = textStart;
+ } else {
+ s = enc.leftAdjustCharHead(text, adjustText, s, textEnd);
+ }
+
+ while (s >= textP) {
+ int p = s;
+ int t = targetP;
+ while (t < targetEnd && text[p] == target[t]) {
+ p++; t++;
+ }
+ if (t == targetEnd) return s;
+
+ s -= regex.intMapBackward[text[s] & 0xff];
+ s = enc.leftAdjustCharHead(text, adjustText, s, textEnd);
+ }
+ return -1;
+ }
+
+
+ private void setBmBackwardSkip(Regex regex, byte[]bytes, int p, int end) {
+ int[] skip;
+ if (regex.intMapBackward == null) {
+ skip = new int[Config.CHAR_TABLE_SIZE];
+ regex.intMapBackward = skip;
+ } else {
+ skip = regex.intMapBackward;
+ }
+
+ int len = end - p;
+
+ for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) skip[i] = len;
+ for (int i=len-1; i>0; i--) skip[bytes[i] & 0xff] = i;
+ }
+ };
+
+ public static final SearchAlgorithm BM_NOT_REV = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "EXACT_BM_NOT_REV";
+ }
+
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ Encoding enc = regex.enc;
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int tail = targetEnd - 1;
+ int tlen1 = tail - targetP;
+ int end = textRange;
+
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("bm_search_notrev: "+
+ "text: " + textP +
+ ", text_end: " + textEnd +
+ ", text_range: " + textRange);
+ }
+
+ if (end + tlen1 > textEnd) end = textEnd - tlen1;
+
+ int s = textP;
+
+ if (regex.intMap == null) {
+ while (s < end) {
+ int p, se;
+ p = se = s + tlen1;
+ int t = tail;
+ while (t >= targetP && text[p] == target[t]) {
+ p--; t--;
+ }
+
+ if (t < targetP) return s;
+
+ int skip = regex.map[text[se] & 0xff];
+ t = s;
+ do {
+ s += enc.length(text, s, textEnd);
+ } while ((s - t) < skip && s < end);
+ }
+ } else {
+ while (s < end) {
+ int p, se;
+ p = se = s + tlen1;
+ int t = tail;
+ while (t >= targetP && text[p] == target[t]) {
+ p--; t--;
+ }
+
+ if (t < targetP) return s;
+
+ int skip = regex.intMap[text[se] & 0xff];
+ t = s;
+ do {
+ s += enc.length(text, s, textEnd);
+ } while ((s - t) < skip && s < end);
+
+ }
+ }
+ return -1;
+ }
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ return BM.searchBackward(regex, text, textP, adjustText, textEnd, textStart, s_, range_);
+ }
+ };
+
+
+ public static final SearchAlgorithm MAP = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "MAP";
+ }
+
+ // TODO: check 1.9 inconsistent calls to map_search
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ Encoding enc = regex.enc;
+ byte[]map = regex.map;
+ int s = textP;
+
+ while (s < textRange) {
+ if (map[text[s] & 0xff] != 0) return s;
+ s += enc.length(text, s, textEnd);
+ }
+ return -1;
+ }
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ Encoding enc = regex.enc;
+ byte[]map = regex.map;
+ int s = textStart;
+
+ if (s >= textEnd) s = textEnd - 1; // multibyte safe ?
+ while (s >= textP) {
+ if (map[text[s] & 0xff] != 0) return s;
+ s = enc.prevCharHead(text, adjustText, s, textEnd);
+ }
+ return -1;
+ }
+ };
+
+ public static final SearchAlgorithm MAP_SB = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "MAP_SB";
+ }
+
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ byte[]map = regex.map;
+ int s = textP;
+
+ while (s < textRange) {
+ if (map[text[s] & 0xff] != 0) return s;
+ s++;
+ }
+ return -1;
+ }
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ byte[]map = regex.map;
+ int s = textStart;
+
+ if (s >= textEnd) s = textEnd - 1;
+ while (s >= textP) {
+ if (map[text[s] & 0xff] != 0) return s;
+ s--;
+ }
+ return -1;
+ }
+ };
+
+}
diff --git a/src/org/joni/StackEntry.java b/src/org/joni/StackEntry.java
new file mode 100644
index 0000000..001c98d
--- /dev/null
+++ b/src/org/joni/StackEntry.java
@@ -0,0 +1,164 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+final class StackEntry {
+ int type;
+ private int E1, E2, E3, E4;
+
+ // first union member
+ /* byte code position */
+ void setStatePCode(int pcode) {
+ E1 = pcode;
+ }
+ int getStatePCode() {
+ return E1;
+ }
+ /* string position */
+ void setStatePStr(int pstr) {
+ E2 = pstr;
+ }
+ int getStatePStr() {
+ return E2;
+ }
+ /* previous char position of pstr */
+ void setStatePStrPrev(int pstrPrev) {
+ E3 = pstrPrev;
+ }
+ int getStatePStrPrev() {
+ return E3;
+ }
+
+ void setStateCheck(int check) {
+ E4 = check;
+ }
+ int getStateCheck() {
+ return E4;
+ }
+
+ // second union member
+ /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
+ void setRepeatCount(int count) {
+ E1 = count;
+ }
+ int getRepeatCount() {
+ return E1;
+ }
+ void decreaseRepeatCount() {
+ E1--;
+ }
+ void increaseRepeatCount() {
+ E1++;
+ }
+ /* byte code position (head of repeated target) */
+ void setRepeatPCode(int pcode) {
+ E2 = pcode;
+ }
+ int getRepeatPCode() {
+ return E2;
+ }
+ /* repeat id */
+ void setRepeatNum(int num) {
+ E3 = num;
+ }
+ int getRepeatNum() {
+ return E3;
+ }
+
+ // third union member
+ /* index of stack */ /*int repeat_inc struct*/
+ void setSi(int si) {
+ E1 = si;
+ }
+ int getSi() {
+ return E1;
+ }
+
+ // fourth union member
+ /* memory num */
+ void setMemNum(int num) {
+ E1 = num;
+ }
+ int getMemNum() {
+ return E1;
+ }
+ /* start/end position */
+ void setMemPstr(int pstr) {
+ E2 = pstr;
+ }
+ int getMemPStr() {
+ return E2;
+ }
+
+ /* Following information is set, if this stack type is MEM-START */
+ /* prev. info (for backtrack "(...)*" ) */
+ void setMemStart(int start) {
+ E3 = start;
+ }
+ int getMemStart() {
+ return E3;
+ }
+ /* prev. info (for backtrack "(...)*" ) */
+ void setMemEnd(int end) {
+ E4 = end;
+ }
+ int getMemEnd() {
+ return E4;
+ }
+
+ // fifth union member
+ /* null check id */
+ void setNullCheckNum(int num) {
+ E1 = num;
+ }
+ int getNullCheckNum() {
+ return E1;
+ }
+ /* start position */
+ void setNullCheckPStr(int pstr) {
+ E2 = pstr;
+ }
+ int getNullCheckPStr() {
+ return E2;
+ }
+
+ // sixth union member
+ /* byte code position */
+ void setCallFrameRetAddr(int addr) {
+ E1 = addr;
+ }
+ int getCallFrameRetAddr() {
+ return E1;
+ }
+ /* null check id */
+ void setCallFrameNum(int num) {
+ E2 = num;
+ }
+ int getCallFrameNum() {
+ return E2;
+ }
+ /* string position */
+ void setCallFramePStr(int pstr) {
+ E3 = pstr;
+ }
+ int getCallFramePStr() {
+ return E3;
+ }
+}
diff --git a/src/org/joni/StackMachine.java b/src/org/joni/StackMachine.java
new file mode 100644
index 0000000..7fe60a7
--- /dev/null
+++ b/src/org/joni/StackMachine.java
@@ -0,0 +1,621 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.BitStatus.bsAt;
+
+import java.lang.ref.WeakReference;
+import java.util.Arrays;
+
+import org.joni.constants.StackPopLevel;
+import org.joni.constants.StackType;
+
+abstract class StackMachine extends Matcher implements StackType {
+ protected static final int INVALID_INDEX = -1;
+
+ protected StackEntry[]stack;
+ protected int stk; // stkEnd
+
+ protected final int[]repeatStk;
+ protected final int memStartStk, memEndStk;
+
+ // CEC
+ protected byte[] stateCheckBuff; // move to int[] ?
+ int stateCheckBuffSize;
+
+ protected StackMachine(Regex regex, byte[]bytes, int p , int end) {
+ super(regex, bytes, p, end);
+
+ this.stack = regex.stackNeeded ? fetchStack() : null;
+ int n = regex.numRepeat + (regex.numMem << 1);
+ this.repeatStk = n > 0 ? new int[n] : null;
+
+ memStartStk = regex.numRepeat - 1;
+ memEndStk = memStartStk + regex.numMem;
+ /* for index start from 1, mem_start_stk[1]..mem_start_stk[num_mem] */
+ /* for index start from 1, mem_end_stk[1]..mem_end_stk[num_mem] */
+ }
+
+ private static StackEntry[] allocateStack() {
+ StackEntry[]stack = new StackEntry[Config.INIT_MATCH_STACK_SIZE];
+ stack[0] = new StackEntry();
+ return stack;
+ }
+
+ private void doubleStack() {
+ StackEntry[] newStack = new StackEntry[stack.length << 1];
+ System.arraycopy(stack, 0, newStack, 0, stack.length);
+ stack = newStack;
+ }
+
+ static final ThreadLocal<WeakReference<StackEntry[]>> stacks
+ = new ThreadLocal<WeakReference<StackEntry[]>>() {
+ @Override
+ protected WeakReference<StackEntry[]> initialValue() {
+ return new WeakReference<StackEntry[]>(allocateStack());
+ }
+ };
+
+ private static StackEntry[] fetchStack() {
+ WeakReference<StackEntry[]> ref = stacks.get();
+ StackEntry[] stack = ref.get();
+ if (stack == null) {
+ ref = new WeakReference<StackEntry[]>(stack = allocateStack());
+ stacks.set(ref);
+ }
+ return stack;
+ }
+
+ protected final void init() {
+ if (stack != null) pushEnsured(ALT, regex.codeLength - 1); /* bottom stack */
+ if (repeatStk != null) {
+ for (int i=1; i<=regex.numMem; i++) {
+ repeatStk[i + memStartStk] = repeatStk[i + memEndStk] = INVALID_INDEX;
+ }
+ }
+ }
+
+ protected final StackEntry ensure1() {
+ if (stk >= stack.length) doubleStack();
+ StackEntry e = stack[stk];
+ if (e == null) stack[stk] = e = new StackEntry();
+ return e;
+ }
+
+ protected final void pushType(int type) {
+ ensure1().type = type;
+ stk++;
+ }
+
+ // CEC
+
+ // STATE_CHECK_POS
+ private int stateCheckPos(int s, int snum) {
+ return (s - str) * regex.numCombExpCheck + (snum - 1);
+ }
+
+ // STATE_CHECK_VAL
+ protected final boolean stateCheckVal(int s, int snum) {
+ if (stateCheckBuff != null) {
+ int x = stateCheckPos(s, snum);
+ return (stateCheckBuff[x / 8] & (1 << (x % 8))) != 0;
+ }
+ return false;
+ }
+
+ // ELSE_IF_STATE_CHECK_MARK
+ private void stateCheckMark() {
+ StackEntry e = stack[stk];
+ int x = stateCheckPos(e.getStatePStr(), e.getStateCheck());
+ stateCheckBuff[x / 8] |= (1 << (x % 8));
+ }
+
+ // STATE_CHECK_BUFF_INIT
+ private static final int STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE = 16;
+ protected final void stateCheckBuffInit(int strLength, int offset, int stateNum) {
+ if (stateNum > 0 && strLength >= Config.CHECK_STRING_THRESHOLD_LEN) {
+ int size = ((strLength + 1) * stateNum + 7) >>> 3;
+ offset = (offset * stateNum) >>> 3;
+
+ if (size > 0 && offset < size && size < Config.CHECK_BUFF_MAX_SIZE) {
+ if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {
+ stateCheckBuff = new byte[size];
+ } else {
+ // same impl, reduce...
+ stateCheckBuff = new byte[size];
+ }
+ Arrays.fill(stateCheckBuff, offset, (size - offset), (byte)0);
+ stateCheckBuffSize = size;
+ } else {
+ stateCheckBuff = null; // reduce
+ stateCheckBuffSize = 0;
+ }
+ } else {
+ stateCheckBuff = null; // reduce
+ stateCheckBuffSize = 0;
+ }
+ }
+
+ protected final void stateCheckBuffClear() {
+ stateCheckBuff = null;
+ stateCheckBuffSize = 0;
+ }
+
+ private void push(int type, int pat, int s, int prev) {
+ StackEntry e = ensure1();
+ e.type = type;
+ e.setStatePCode(pat);
+ e.setStatePStr(s);
+ e.setStatePStrPrev(prev);
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(0);
+ stk++;
+ }
+
+ protected final void pushEnsured(int type, int pat) {
+ StackEntry e = stack[stk];
+ e.type = type;
+ e.setStatePCode(pat);
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(0);
+ stk++;
+ }
+
+ protected final void pushAltWithStateCheck(int pat, int s, int sprev, int snum) {
+ StackEntry e = ensure1();
+ e.type = ALT;
+ e.setStatePCode(pat);
+ e.setStatePStr(s);
+ e.setStatePStrPrev(sprev);
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(stateCheckBuff != null ? snum : 0);
+ stk++;
+ }
+
+ protected final void pushStateCheck(int s, int snum) {
+ if (stateCheckBuff != null) {
+ StackEntry e = ensure1();
+ e.type = STATE_CHECK_MARK;
+ e.setStatePStr(s);
+ e.setStateCheck(snum);
+ stk++;
+ }
+ }
+
+ protected final void pushAlt(int pat, int s, int prev) {
+ push(ALT, pat, s, prev);
+ }
+
+ protected final void pushPos(int s, int prev) {
+ push(POS, -1 /*NULL_UCHARP*/, s, prev);
+ }
+
+ protected final void pushPosNot(int pat, int s, int prev) {
+ push(POS_NOT, pat, s, prev);
+ }
+
+ protected final void pushStopBT() {
+ pushType(STOP_BT);
+ }
+
+ protected final void pushLookBehindNot(int pat, int s, int sprev) {
+ push(LOOK_BEHIND_NOT, pat, s, sprev);
+ }
+
+ protected final void pushRepeat(int id, int pat) {
+ StackEntry e = ensure1();
+ e.type = REPEAT;
+ e.setRepeatNum(id);
+ e.setRepeatPCode(pat);
+ e.setRepeatCount(0);
+ stk++;
+ }
+
+ protected final void pushRepeatInc(int sindex) {
+ StackEntry e = ensure1();
+ e.type = REPEAT_INC;
+ e.setSi(sindex);
+ stk++;
+ }
+
+ protected final void pushMemStart(int mnum, int s) {
+ StackEntry e = ensure1();
+ e.type = MEM_START;
+ e.setMemNum(mnum);
+ e.setMemPstr(s);
+ e.setMemStart(repeatStk[memStartStk + mnum]);
+ e.setMemEnd(repeatStk[memEndStk + mnum]);
+ repeatStk[memStartStk + mnum] = stk;
+ repeatStk[memEndStk + mnum] = INVALID_INDEX;
+ stk++;
+ }
+
+ protected final void pushMemEnd(int mnum, int s) {
+ StackEntry e = ensure1();
+ e.type = MEM_END;
+ e.setMemNum(mnum);
+ e.setMemPstr(s);
+ e.setMemStart(repeatStk[memStartStk + mnum]);
+ e.setMemEnd(repeatStk[memEndStk + mnum]);
+ repeatStk[memEndStk + mnum] = stk;
+ stk++;
+ }
+
+ protected final void pushMemEndMark(int mnum) {
+ StackEntry e = ensure1();
+ e.type = MEM_END_MARK;
+ e.setMemNum(mnum);
+ stk++;
+ }
+
+ protected final int getMemStart(int mnum) {
+ int level = 0;
+ int stkp = stk;
+
+ while (stkp > 0) {
+ stkp--;
+ StackEntry e = stack[stkp];
+ if ((e.type & MASK_MEM_END_OR_MARK) != 0 && e.getMemNum() == mnum) {
+ level++;
+ } else if (e.type == MEM_START && e.getMemNum() == mnum) {
+ if (level == 0) break;
+ level--;
+ }
+ }
+ return stkp;
+ }
+
+ protected final void pushNullCheckStart(int cnum, int s) {
+ StackEntry e = ensure1();
+ e.type = NULL_CHECK_START;
+ e.setNullCheckNum(cnum);
+ e.setNullCheckPStr(s);
+ stk++;
+ }
+
+ protected final void pushNullCheckEnd(int cnum) {
+ StackEntry e = ensure1();
+ e.type = NULL_CHECK_END;
+ e.setNullCheckNum(cnum);
+ stk++;
+ }
+
+ protected final void pushCallFrame(int pat) {
+ StackEntry e = ensure1();
+ e.type = CALL_FRAME;
+ e.setCallFrameRetAddr(pat);
+ stk++;
+ }
+
+ protected final void pushReturn() {
+ StackEntry e = ensure1();
+ e.type = RETURN;
+ stk++;
+ }
+
+ // stack debug routines here
+ // ...
+
+ protected final void popOne() {
+ stk--;
+ }
+
+ protected final StackEntry pop() {
+ switch (regex.stackPopLevel) {
+ case StackPopLevel.FREE:
+ return popFree();
+ case StackPopLevel.MEM_START:
+ return popMemStart();
+ default:
+ return popDefault();
+ }
+ }
+
+ private StackEntry popFree() {
+ while (true) {
+ StackEntry e = stack[--stk];
+
+ if ((e.type & MASK_POP_USED) != 0) {
+ return e;
+ } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.type == STATE_CHECK_MARK) stateCheckMark();
+ }
+ }
+ }
+
+ private StackEntry popMemStart() {
+ while (true) {
+ StackEntry e = stack[--stk];
+
+ if ((e.type & MASK_POP_USED) != 0) {
+ return e;
+ } else if (e.type == MEM_START) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd();
+ } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.type == STATE_CHECK_MARK) stateCheckMark();
+ }
+ }
+ }
+
+ private StackEntry popDefault() {
+ while (true) {
+ StackEntry e = stack[--stk];
+
+ if ((e.type & MASK_POP_USED) != 0) {
+ return e;
+ } else if (e.type == MEM_START) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd();
+ } else if (e.type == REPEAT_INC) {
+ //int si = stack[stk + IREPEAT_INC_SI];
+ //stack[si + IREPEAT_COUNT]--;
+ stack[e.getSi()].decreaseRepeatCount();
+ } else if (e.type == MEM_END) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd();
+ } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.type == STATE_CHECK_MARK) stateCheckMark();
+ }
+ }
+ }
+
+ protected final void popTilPosNot() {
+ while (true) {
+ stk--;
+ StackEntry e = stack[stk];
+
+ if (e.type == POS_NOT) {
+ break;
+ } else if (e.type == MEM_START) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemStart();
+ } else if (e.type == REPEAT_INC) {
+ //int si = stack[stk + IREPEAT_INC_SI];
+ //stack[si + IREPEAT_COUNT]--;
+ stack[e.getSi()].decreaseRepeatCount();
+ } else if (e.type == MEM_END){
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemStart();
+ } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.type == STATE_CHECK_MARK) stateCheckMark();
+ }
+ }
+ }
+
+ protected final void popTilLookBehindNot() {
+ while (true) {
+ stk--;
+ StackEntry e = stack[stk];
+
+ if (e.type == LOOK_BEHIND_NOT) {
+ break;
+ } else if (e.type == MEM_START) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd();
+ } else if (e.type == REPEAT_INC) {
+ //int si = stack[stk + IREPEAT_INC_SI];
+ //stack[si + IREPEAT_COUNT]--;
+ stack[e.getSi()].decreaseRepeatCount();
+ } else if (e.type == MEM_END) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd();
+ } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.type == STATE_CHECK_MARK) stateCheckMark();
+ }
+ }
+ }
+
+ protected final int posEnd() {
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+ if ((e.type & MASK_TO_VOID_TARGET) != 0) {
+ e.type = VOID;
+ } else if (e.type == POS) {
+ e.type = VOID;
+ break;
+ }
+ }
+ return k;
+ }
+
+ protected final void stopBtEnd() {
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if ((e.type & MASK_TO_VOID_TARGET) != 0) {
+ e.type = VOID;
+ } else if (e.type == STOP_BT) {
+ e.type = VOID;
+ break;
+ }
+ }
+ }
+
+ // int for consistency with other null check routines
+ protected final int nullCheck(int id, int s) {
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == NULL_CHECK_START) {
+ if (e.getNullCheckNum() == id) {
+ return e.getNullCheckPStr() == s ? 1 : 0;
+ }
+ }
+ }
+ }
+
+ protected final int nullCheckRec(int id, int s) {
+ int level = 0;
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == NULL_CHECK_START) {
+ if (e.getNullCheckNum() == id) {
+ if (level == 0) {
+ return e.getNullCheckPStr() == s ? 1 : 0;
+ } else {
+ level--;
+ }
+ }
+ } else if (e.type == NULL_CHECK_END) {
+ level++;
+ }
+ }
+ }
+
+ protected final int nullCheckMemSt(int id, int s) {
+ int k = stk;
+ int isNull;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == NULL_CHECK_START) {
+ if (e.getNullCheckNum() == id) {
+ if (e.getNullCheckPStr() != s) {
+ isNull = 0;
+ break;
+ } else {
+ int endp;
+ isNull = 1;
+ while (k < stk) {
+ if (e.type == MEM_START) {
+ if (e.getMemEnd() == INVALID_INDEX) {
+ isNull = 0;
+ break;
+ }
+ if (bsAt(regex.btMemEnd, e.getMemNum())) {
+ endp = stack[e.getMemEnd()].getMemPStr();
+ } else {
+ endp = e.getMemEnd();
+ }
+ if (stack[e.getMemStart()].getMemPStr() != endp) {
+ isNull = 0;
+ break;
+ } else if (endp != s) {
+ isNull = -1; /* empty, but position changed */
+ }
+ }
+ k++;
+ e = stack[k]; // !!
+ }
+ break;
+ }
+ }
+ }
+ }
+ return isNull;
+ }
+
+ protected final int nullCheckMemStRec(int id, int s) {
+ int level = 0;
+ int k = stk;
+ int isNull;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == NULL_CHECK_START) {
+ if (e.getNullCheckNum() == id) {
+ if (level == 0) {
+ if (e.getNullCheckPStr() != s) {
+ isNull = 0;
+ break;
+ } else {
+ int endp;
+ isNull = 1;
+ while (k < stk) {
+ if (e.type == MEM_START) {
+ if (e.getMemEnd() == INVALID_INDEX) {
+ isNull = 0;
+ break;
+ }
+ if (bsAt(regex.btMemEnd, e.getMemNum())) {
+ endp = stack[e.getMemEnd()].getMemPStr();
+ } else {
+ endp = e.getMemEnd();
+ }
+ if (stack[e.getMemStart()].getMemPStr() != endp) {
+ isNull = 0;
+ break;
+ } else if (endp != s) {
+ isNull = -1;; /* empty, but position changed */
+ }
+ }
+ k++;
+ e = stack[k];
+ }
+ break;
+ }
+ } else {
+ level--;
+ }
+ }
+ } else if (e.type == NULL_CHECK_END) {
+ if (e.getNullCheckNum() == id) level++;
+ }
+ }
+ return isNull;
+ }
+
+ protected final int getRepeat(int id) {
+ int level = 0;
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == REPEAT) {
+ if (level == 0) {
+ if (e.getRepeatNum() == id) return k;
+ }
+ } else if (e.type == CALL_FRAME) {
+ level--;
+ } else if (e.type == RETURN) {
+ level++;
+ }
+ }
+ }
+
+ protected final int sreturn() {
+ int level = 0;
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == CALL_FRAME) {
+ if (level == 0) {
+ return e.getCallFrameRetAddr();
+ } else {
+ level--;
+ }
+ } else if (e.type == RETURN) {
+ level++;
+ }
+ }
+ }
+}
diff --git a/src/org/joni/Syntax.java b/src/org/joni/Syntax.java
new file mode 100644
index 0000000..b89abe9
--- /dev/null
+++ b/src/org/joni/Syntax.java
@@ -0,0 +1,606 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.constants.MetaChar.INEFFECTIVE_META_CHAR;
+
+import org.joni.constants.SyntaxProperties;
+
+public final class Syntax implements SyntaxProperties{
+ private final int op;
+ private final int op2;
+ private final int behavior;
+ public final int options;
+ public final MetaCharTable metaCharTable;
+
+ public Syntax(int op, int op2, int behavior, int options, MetaCharTable metaCharTable) {
+ this.op = op;
+ this.op2 = op2;
+ this.behavior = behavior;
+ this.options = options;
+ this.metaCharTable = metaCharTable;
+ }
+
+ public static class MetaCharTable {
+ public final int esc;
+ public final int anyChar;
+ public final int anyTime;
+ public final int zeroOrOneTime;
+ public final int oneOrMoreTime;
+ public final int anyCharAnyTime;
+
+ public MetaCharTable(int esc, int anyChar, int anyTime,
+ int zeroOrOneTime, int oneOrMoreTime, int anyCharAnyTime) {
+ this.esc = esc;
+ this.anyChar = anyChar;
+ this.anyTime = anyTime;
+ this.zeroOrOneTime = zeroOrOneTime;
+ this.oneOrMoreTime = oneOrMoreTime;
+ this.anyCharAnyTime = anyCharAnyTime;
+ }
+ }
+
+ /**
+ * OP
+ *
+ */
+ protected boolean isOp(int opm) {
+ return (op & opm) != 0;
+ }
+
+ public boolean opVariableMetaCharacters() {
+ return isOp(OP_VARIABLE_META_CHARACTERS);
+ }
+
+ public boolean opDotAnyChar() {
+ return isOp(OP_DOT_ANYCHAR);
+ }
+
+ public boolean opAsteriskZeroInf() {
+ return isOp(OP_ASTERISK_ZERO_INF);
+ }
+
+ public boolean opEscAsteriskZeroInf() {
+ return isOp(OP_ESC_ASTERISK_ZERO_INF);
+ }
+
+ public boolean opPlusOneInf() {
+ return isOp(OP_PLUS_ONE_INF);
+ }
+
+ public boolean opEscPlusOneInf() {
+ return isOp(OP_ESC_PLUS_ONE_INF);
+ }
+
+ public boolean opQMarkZeroOne() {
+ return isOp(OP_QMARK_ZERO_ONE);
+ }
+
+ public boolean opEscQMarkZeroOne() {
+ return isOp(OP_ESC_QMARK_ZERO_ONE);
+ }
+
+ public boolean opBraceInterval() {
+ return isOp(OP_BRACE_INTERVAL);
+ }
+
+ public boolean opEscBraceInterval() {
+ return isOp(OP_ESC_BRACE_INTERVAL);
+ }
+
+ public boolean opVBarAlt() {
+ return isOp(OP_VBAR_ALT);
+ }
+
+ public boolean opEscVBarAlt() {
+ return isOp(OP_ESC_VBAR_ALT);
+ }
+
+ public boolean opLParenSubexp() {
+ return isOp(OP_LPAREN_SUBEXP);
+ }
+
+ public boolean opEscLParenSubexp() {
+ return isOp(OP_ESC_LPAREN_SUBEXP);
+ }
+
+ public boolean opEscAZBufAnchor() {
+ return isOp(OP_ESC_AZ_BUF_ANCHOR);
+ }
+
+ public boolean opEscCapitalGBeginAnchor() {
+ return isOp(OP_ESC_CAPITAL_G_BEGIN_ANCHOR);
+ }
+
+ public boolean opDecimalBackref() {
+ return isOp(OP_DECIMAL_BACKREF);
+ }
+
+ public boolean opBracketCC() {
+ return isOp(OP_BRACKET_CC);
+ }
+
+ public boolean opEscWWord() {
+ return isOp(OP_ESC_W_WORD);
+ }
+
+ public boolean opEscLtGtWordBeginEnd() {
+ return isOp(OP_ESC_LTGT_WORD_BEGIN_END);
+ }
+
+ public boolean opEscBWordBound() {
+ return isOp(OP_ESC_B_WORD_BOUND);
+ }
+
+ public boolean opEscSWhiteSpace() {
+ return isOp(OP_ESC_S_WHITE_SPACE);
+ }
+
+ public boolean opEscDDigit() {
+ return isOp(OP_ESC_D_DIGIT);
+ }
+
+ public boolean opLineAnchor() {
+ return isOp(OP_LINE_ANCHOR);
+ }
+
+ public boolean opPosixBracket() {
+ return isOp(OP_POSIX_BRACKET);
+ }
+
+ public boolean opQMarkNonGreedy() {
+ return isOp(OP_QMARK_NON_GREEDY);
+ }
+
+ public boolean opEscControlChars() {
+ return isOp(OP_ESC_CONTROL_CHARS);
+ }
+
+ public boolean opEscCControl() {
+ return isOp(OP_ESC_C_CONTROL);
+ }
+
+ public boolean opEscOctal3() {
+ return isOp(OP_ESC_OCTAL3);
+ }
+
+ public boolean opEscXHex2() {
+ return isOp(OP_ESC_X_HEX2);
+ }
+
+ public boolean opEscXBraceHex8() {
+ return isOp(OP_ESC_X_BRACE_HEX8);
+ }
+
+
+ /**
+ * OP
+ *
+ */
+ protected boolean isOp2(int opm) {
+ return (op2 & opm) != 0;
+ }
+
+ public boolean op2EscCapitalQQuote() {
+ return isOp2(OP2_ESC_CAPITAL_Q_QUOTE);
+ }
+
+ public boolean op2QMarkGroupEffect() {
+ return isOp2(OP2_QMARK_GROUP_EFFECT);
+ }
+
+ public boolean op2OptionPerl() {
+ return isOp2(OP2_OPTION_PERL);
+ }
+
+ public boolean op2OptionRuby() {
+ return isOp2(OP2_OPTION_RUBY);
+ }
+
+ public boolean op2PlusPossessiveRepeat() {
+ return isOp2(OP2_PLUS_POSSESSIVE_REPEAT);
+ }
+
+ public boolean op2PlusPossessiveInterval() {
+ return isOp2(OP2_PLUS_POSSESSIVE_INTERVAL);
+ }
+
+ public boolean op2CClassSetOp() {
+ return isOp2(OP2_CCLASS_SET_OP);
+ }
+
+ public boolean op2QMarkLtNamedGroup() {
+ return isOp2(OP2_QMARK_LT_NAMED_GROUP);
+ }
+
+ public boolean op2EscKNamedBackref() {
+ return isOp2(OP2_ESC_K_NAMED_BACKREF);
+ }
+
+ public boolean op2EscGSubexpCall() {
+ return isOp2(OP2_ESC_G_SUBEXP_CALL);
+ }
+
+ public boolean op2AtMarkCaptureHistory() {
+ return isOp2(OP2_ATMARK_CAPTURE_HISTORY);
+ }
+
+ public boolean op2EscCapitalCBarControl() {
+ return isOp2(OP2_ESC_CAPITAL_C_BAR_CONTROL);
+ }
+
+ public boolean op2EscCapitalMBarMeta() {
+ return isOp2(OP2_ESC_CAPITAL_M_BAR_META);
+ }
+
+ public boolean op2EscVVtab() {
+ return isOp2(OP2_ESC_V_VTAB);
+ }
+
+ public boolean op2EscUHex4() {
+ return isOp2(OP2_ESC_U_HEX4);
+ }
+
+ public boolean op2EscGnuBufAnchor() {
+ return isOp2(OP2_ESC_GNU_BUF_ANCHOR);
+ }
+
+ public boolean op2EscPBraceCharProperty() {
+ return isOp2(OP2_ESC_P_BRACE_CHAR_PROPERTY);
+ }
+
+ public boolean op2EscPBraceCircumflexNot() {
+ return isOp2(OP2_ESC_P_BRACE_CIRCUMFLEX_NOT);
+ }
+
+ public boolean op2EscHXDigit() {
+ return isOp2(OP2_ESC_H_XDIGIT);
+ }
+
+ public boolean op2IneffectiveEscape() {
+ return isOp2(OP2_INEFFECTIVE_ESCAPE);
+ }
+
+ /**
+ * BEHAVIOR
+ *
+ */
+ protected boolean isBehavior(int bvm) {
+ return (behavior & bvm) != 0;
+ }
+
+ public boolean contextIndepRepeatOps() {
+ return isBehavior(CONTEXT_INDEP_REPEAT_OPS);
+ }
+
+ public boolean contextInvalidRepeatOps() {
+ return isBehavior(CONTEXT_INVALID_REPEAT_OPS);
+ }
+
+ public boolean allowUnmatchedCloseSubexp() {
+ return isBehavior(ALLOW_UNMATCHED_CLOSE_SUBEXP);
+ }
+
+ public boolean allowInvalidInterval() {
+ return isBehavior(ALLOW_INVALID_INTERVAL);
+ }
+
+ public boolean allowIntervalLowAbbrev() {
+ return isBehavior(ALLOW_INTERVAL_LOW_ABBREV);
+ }
+
+ public boolean strictCheckBackref() {
+ return isBehavior(STRICT_CHECK_BACKREF);
+ }
+
+ public boolean differentLengthAltLookBehind() {
+ return isBehavior(DIFFERENT_LEN_ALT_LOOK_BEHIND);
+ }
+
+ public boolean captureOnlyNamedGroup() {
+ return isBehavior(CAPTURE_ONLY_NAMED_GROUP);
+ }
+
+ public boolean allowMultiplexDefinitionName() {
+ return isBehavior(ALLOW_MULTIPLEX_DEFINITION_NAME);
+ }
+
+ public boolean fixedIntervalIsGreedyOnly() {
+ return isBehavior(FIXED_INTERVAL_IS_GREEDY_ONLY);
+ }
+
+
+ public boolean notNewlineInNegativeCC() {
+ return isBehavior(NOT_NEWLINE_IN_NEGATIVE_CC);
+ }
+
+ public boolean backSlashEscapeInCC() {
+ return isBehavior(BACKSLASH_ESCAPE_IN_CC);
+ }
+
+ public boolean allowEmptyRangeInCC() {
+ return isBehavior(ALLOW_EMPTY_RANGE_IN_CC);
+ }
+
+ public boolean allowDoubleRangeOpInCC() {
+ return isBehavior(ALLOW_DOUBLE_RANGE_OP_IN_CC);
+ }
+
+ public boolean warnCCOpNotEscaped() {
+ return isBehavior(WARN_CC_OP_NOT_ESCAPED);
+ }
+
+ public boolean warnReduntantNestedRepeat() {
+ return isBehavior(WARN_REDUNDANT_NESTED_REPEAT);
+ }
+
+ public static final Syntax RUBY = new Syntax(
+ (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY |
+ OP_ESC_OCTAL3 | OP_ESC_X_HEX2 |
+ OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS |
+ OP_ESC_C_CONTROL )
+ & ~OP_ESC_LTGT_WORD_BEGIN_END ),
+
+ ( OP2_QMARK_GROUP_EFFECT |
+ OP2_OPTION_RUBY |
+ OP2_QMARK_LT_NAMED_GROUP | OP2_ESC_K_NAMED_BACKREF |
+ OP2_ESC_G_SUBEXP_CALL |
+ OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
+ OP2_PLUS_POSSESSIVE_REPEAT |
+ OP2_CCLASS_SET_OP | OP2_ESC_CAPITAL_C_BAR_CONTROL |
+ OP2_ESC_CAPITAL_M_BAR_META | OP2_ESC_V_VTAB |
+ OP2_ESC_H_XDIGIT ),
+
+ ( GNU_REGEX_BV |
+ ALLOW_INTERVAL_LOW_ABBREV |
+ DIFFERENT_LEN_ALT_LOOK_BEHIND |
+ CAPTURE_ONLY_NAMED_GROUP |
+ ALLOW_MULTIPLEX_DEFINITION_NAME |
+ FIXED_INTERVAL_IS_GREEDY_ONLY |
+ WARN_CC_OP_NOT_ESCAPED |
+ WARN_REDUNDANT_NESTED_REPEAT ),
+
+ Option.NONE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax DEFAULT = RUBY;
+
+ public static final Syntax ASIS = new Syntax(
+ 0,
+
+ OP2_INEFFECTIVE_ESCAPE,
+
+ 0,
+
+ Option.NONE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax PosixBasic = new Syntax(
+ (POSIX_COMMON_OP | OP_ESC_LPAREN_SUBEXP |
+ OP_ESC_BRACE_INTERVAL ),
+
+ 0,
+
+ 0,
+
+ ( Option.SINGLELINE | Option.MULTILINE ),
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax PosixExtended = new Syntax(
+ ( POSIX_COMMON_OP | OP_LPAREN_SUBEXP |
+ OP_BRACE_INTERVAL |
+ OP_PLUS_ONE_INF | OP_QMARK_ZERO_ONE |OP_VBAR_ALT ),
+
+ 0,
+
+ ( CONTEXT_INDEP_ANCHORS |
+ CONTEXT_INDEP_REPEAT_OPS | CONTEXT_INVALID_REPEAT_OPS |
+ ALLOW_UNMATCHED_CLOSE_SUBEXP |
+ ALLOW_DOUBLE_RANGE_OP_IN_CC ),
+
+ ( Option.SINGLELINE | Option.MULTILINE ),
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax Emacs = new Syntax(
+ ( OP_DOT_ANYCHAR | OP_BRACKET_CC |
+ OP_ESC_BRACE_INTERVAL |
+ OP_ESC_LPAREN_SUBEXP | OP_ESC_VBAR_ALT |
+ OP_ASTERISK_ZERO_INF | OP_PLUS_ONE_INF |
+ OP_QMARK_ZERO_ONE | OP_DECIMAL_BACKREF |
+ OP_LINE_ANCHOR | OP_ESC_CONTROL_CHARS ),
+
+ OP2_ESC_GNU_BUF_ANCHOR,
+
+ ALLOW_EMPTY_RANGE_IN_CC,
+
+ Option.NONE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax Grep = new Syntax(
+ ( OP_DOT_ANYCHAR | OP_BRACKET_CC | OP_POSIX_BRACKET |
+ OP_ESC_BRACE_INTERVAL | OP_ESC_LPAREN_SUBEXP |
+ OP_ESC_VBAR_ALT |
+ OP_ASTERISK_ZERO_INF | OP_ESC_PLUS_ONE_INF |
+ OP_ESC_QMARK_ZERO_ONE | OP_LINE_ANCHOR |
+ OP_ESC_W_WORD | OP_ESC_B_WORD_BOUND |
+ OP_ESC_LTGT_WORD_BEGIN_END | OP_DECIMAL_BACKREF ),
+
+ 0,
+
+ ( ALLOW_EMPTY_RANGE_IN_CC | NOT_NEWLINE_IN_NEGATIVE_CC ),
+
+ Option.NONE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax GnuRegex = new Syntax(
+ GNU_REGEX_OP,
+ 0,
+ GNU_REGEX_BV,
+
+ Option.NONE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax Java = new Syntax(
+ (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY |
+ OP_ESC_CONTROL_CHARS | OP_ESC_C_CONTROL |
+ OP_ESC_OCTAL3 | OP_ESC_X_HEX2 )
+ & ~OP_ESC_LTGT_WORD_BEGIN_END ),
+
+ ( OP2_ESC_CAPITAL_Q_QUOTE | OP2_QMARK_GROUP_EFFECT |
+ OP2_OPTION_PERL | OP2_PLUS_POSSESSIVE_REPEAT |
+ OP2_PLUS_POSSESSIVE_INTERVAL | OP2_CCLASS_SET_OP |
+ OP2_ESC_V_VTAB | OP2_ESC_U_HEX4 |
+ OP2_ESC_P_BRACE_CHAR_PROPERTY ),
+
+ ( GNU_REGEX_BV | DIFFERENT_LEN_ALT_LOOK_BEHIND ),
+
+ Option.SINGLELINE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax Perl = new Syntax(
+ (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY |
+ OP_ESC_OCTAL3 | OP_ESC_X_HEX2 |
+ OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS |
+ OP_ESC_C_CONTROL )
+ & ~OP_ESC_LTGT_WORD_BEGIN_END ),
+
+ ( OP2_ESC_CAPITAL_Q_QUOTE |
+ OP2_QMARK_GROUP_EFFECT | OP2_OPTION_PERL |
+ OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ OP2_ESC_P_BRACE_CIRCUMFLEX_NOT ),
+
+ GNU_REGEX_BV,
+
+ Option.SINGLELINE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax PerlNG = new Syntax(
+ (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY |
+ OP_ESC_OCTAL3 | OP_ESC_X_HEX2 |
+ OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS |
+ OP_ESC_C_CONTROL )
+ & ~OP_ESC_LTGT_WORD_BEGIN_END ),
+
+ ( OP2_ESC_CAPITAL_Q_QUOTE |
+ OP2_QMARK_GROUP_EFFECT | OP2_OPTION_PERL |
+ OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
+ OP2_QMARK_LT_NAMED_GROUP |
+ OP2_ESC_K_NAMED_BACKREF |
+ OP2_ESC_G_SUBEXP_CALL ),
+
+ ( GNU_REGEX_BV |
+ CAPTURE_ONLY_NAMED_GROUP |
+ ALLOW_MULTIPLEX_DEFINITION_NAME ),
+
+ Option.SINGLELINE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+}
diff --git a/src/org/joni/Token.java b/src/org/joni/Token.java
new file mode 100644
index 0000000..16e2b1a
--- /dev/null
+++ b/src/org/joni/Token.java
@@ -0,0 +1,172 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.constants.TokenType;
+
+final class Token {
+ TokenType type;
+ boolean escaped;
+ int base; /* is number: 8, 16 (used in [....]) */
+ int backP;
+
+ // union fields
+ private int INT1, INT2, INT3, INT4, INT5;
+ private int []INTA1;
+
+ // union accessors
+ int getC() {
+ return INT1;
+ }
+ void setC(int c) {
+ INT1 = c;
+ }
+
+ int getCode() {
+ return INT1;
+ }
+ void setCode(int code) {
+ INT1 = code;
+ }
+
+ int getAnchor() {
+ return INT1;
+ }
+ void setAnchor(int anchor) {
+ INT1 = anchor;
+ }
+
+ int getSubtype() {
+ return INT1;
+ }
+ void setSubtype(int subtype) {
+ INT1 = subtype;
+ }
+
+ // repeat union member
+ int getRepeatLower() {
+ return INT1;
+ }
+ void setRepeatLower(int lower) {
+ INT1 = lower;
+ }
+
+ int getRepeatUpper() {
+ return INT2;
+ }
+ void setRepeatUpper(int upper) {
+ INT2 = upper;
+ }
+
+ boolean getRepeatGreedy() {
+ return INT3 != 0;
+ }
+ void setRepeatGreedy(boolean greedy) {
+ INT3 = greedy ? 1 : 0;
+ }
+
+ boolean getRepeatPossessive() {
+ return INT4 != 0;
+ }
+ void setRepeatPossessive(boolean possessive) {
+ INT4 = possessive ? 1 : 0;
+ }
+
+ // backref union member
+ int getBackrefNum() {
+ return INT1;
+ }
+ void setBackrefNum(int num) {
+ INT1 = num;
+ }
+
+ int getBackrefRef1() {
+ return INT2;
+ }
+ void setBackrefRef1(int ref1) {
+ INT2 = ref1;
+ }
+
+ int[]getBackrefRefs() {
+ return INTA1;
+ }
+ void setBackrefRefs(int[]refs) {
+ INTA1 = refs;
+ }
+
+ boolean getBackrefByName() {
+ return INT3 != 0;
+ }
+ void setBackrefByName(boolean byName) {
+ INT3 = byName ? 1 : 0;
+ }
+
+ // USE_BACKREF_AT_LEVEL
+ boolean getBackrefExistLevel() {
+ return INT4 != 0;
+ }
+ void setBackrefExistLevel(boolean existLevel) {
+ INT4 = existLevel ? 1 : 0;
+ }
+
+ int getBackrefLevel() {
+ return INT5;
+ }
+ void setBackrefLevel(int level) {
+ INT5 = level;
+ }
+
+ // call union member
+ int getCallNameP() {
+ return INT1;
+ }
+ void setCallNameP(int nameP) {
+ INT1 = nameP;
+ }
+
+ int getCallNameEnd() {
+ return INT2;
+ }
+ void setCallNameEnd(int nameEnd) {
+ INT2 = nameEnd;
+ }
+
+ int getCallGNum() {
+ return INT3;
+ }
+ void setCallGNum(int gnum) {
+ INT3 = gnum;
+ }
+
+ // prop union member
+ int getPropCType() {
+ return INT1;
+ }
+ void setPropCType(int ctype) {
+ INT1 = ctype;
+ }
+
+ boolean getPropNot() {
+ return INT2 != 0;
+ }
+ void setPropNot(boolean not) {
+ INT2 = not ? 1 : 0;
+ }
+}
diff --git a/src/org/joni/UnsetAddrList.java b/src/org/joni/UnsetAddrList.java
new file mode 100644
index 0000000..8787972
--- /dev/null
+++ b/src/org/joni/UnsetAddrList.java
@@ -0,0 +1,69 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.ast.EncloseNode;
+import org.joni.ast.Node;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.InternalException;
+
+public final class UnsetAddrList {
+ int num;
+ Node[]targets;
+ int[]offsets;
+
+ public UnsetAddrList(int size) {
+ targets = new Node[size];
+ offsets = new int[size];
+ }
+
+ public void add(int offset, Node node) {
+ if (num >= offsets.length) {
+ Node []ttmp = new Node[targets.length << 1];
+ System.arraycopy(targets, 0, ttmp, 0, num);
+ targets = ttmp;
+ int[]otmp = new int[offsets.length << 1];
+ System.arraycopy(offsets, 0, otmp, 0, num);
+ offsets = otmp;
+ }
+ targets[num] = node;
+ offsets[num] = offset;
+
+ num++;
+ }
+
+ public void fix(Regex regex) {
+ for (int i=0; i<num; i++) {
+ EncloseNode en = (EncloseNode)targets[i];
+ if (!en.isAddrFixed()) new InternalException(ErrorMessages.ERR_PARSER_BUG);
+ regex.code[offsets[i]] = en.callAddr; // is this safe ?
+ }
+ }
+
+ public String toString() {
+ StringBuilder value = new StringBuilder();
+ if (num > 0) {
+ for (int i=0; i<num; i++) {
+ value.append("offset + " + offsets[i] + " target: " + targets[i].getAddressName());
+ }
+ }
+ return value.toString();
+ }
+}
diff --git a/src/org/joni/WarnCallback.java b/src/org/joni/WarnCallback.java
new file mode 100644
index 0000000..351146b
--- /dev/null
+++ b/src/org/joni/WarnCallback.java
@@ -0,0 +1,32 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+/**
+ * @author <a href="mailto:ola.bini at gmail.com">Ola Bini</a>
+ */
+public interface WarnCallback {
+ WarnCallback DEFAULT = new WarnCallback(){
+ public void warn(String message) {
+ System.err.println(message);
+ }
+ };
+ void warn(String message);
+}// WarnCallback
diff --git a/src/org/joni/ast/AnchorNode.java b/src/org/joni/ast/AnchorNode.java
new file mode 100644
index 0000000..cccbc49
--- /dev/null
+++ b/src/org/joni/ast/AnchorNode.java
@@ -0,0 +1,92 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import org.joni.constants.AnchorType;
+
+public final class AnchorNode extends Node implements AnchorType {
+ public int type;
+ public Node target;
+ public int charLength;
+
+ public AnchorNode(int type) {
+ this.type = type;
+ charLength = -1;
+ }
+
+ @Override
+ public int getType() {
+ return ANCHOR;
+ }
+
+ @Override
+ protected void setChild(Node newChild) {
+ target = newChild;
+ }
+
+ @Override
+ protected Node getChild() {
+ return target;
+ }
+
+ public void setTarget(Node tgt) {
+ target = tgt;
+ tgt.parent = this;
+ }
+
+ @Override
+ public String getName() {
+ return "Anchor";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder();
+ value.append("\n type: " + typeToString());
+ value.append("\n target: " + pad(target, level + 1));
+ return value.toString();
+ }
+
+ public String typeToString() {
+ StringBuilder type = new StringBuilder();
+ if (isType(BEGIN_BUF)) type.append("BEGIN_BUF ");
+ if (isType(BEGIN_LINE)) type.append("BEGIN_LINE ");
+ if (isType(BEGIN_POSITION)) type.append("BEGIN_POSITION ");
+ if (isType(END_BUF)) type.append("END_BUF ");
+ if (isType(SEMI_END_BUF)) type.append("SEMI_END_BUF ");
+ if (isType(END_LINE)) type.append("END_LINE ");
+ if (isType(WORD_BOUND)) type.append("WORD_BOUND ");
+ if (isType(NOT_WORD_BOUND)) type.append("NOT_WORD_BOUND ");
+ if (isType(WORD_BEGIN)) type.append("WORD_BEGIN ");
+ if (isType(WORD_END)) type.append("WORD_END ");
+ if (isType(PREC_READ)) type.append("PREC_READ ");
+ if (isType(PREC_READ_NOT)) type.append("PREC_READ_NOT ");
+ if (isType(LOOK_BEHIND)) type.append("LOOK_BEHIND ");
+ if (isType(LOOK_BEHIND_NOT)) type.append("LOOK_BEHIND_NOT ");
+ if (isType(ANYCHAR_STAR)) type.append("ANYCHAR_STAR ");
+ if (isType(ANYCHAR_STAR_ML)) type.append("ANYCHAR_STAR_ML ");
+ return type.toString();
+ }
+
+ private boolean isType(int type) {
+ return (this.type & type) != 0;
+ }
+
+}
diff --git a/src/org/joni/ast/AnyCharNode.java b/src/org/joni/ast/AnyCharNode.java
new file mode 100644
index 0000000..d349d8c
--- /dev/null
+++ b/src/org/joni/ast/AnyCharNode.java
@@ -0,0 +1,40 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+public final class AnyCharNode extends Node {
+ public AnyCharNode(){}
+
+ @Override
+ public int getType() {
+ return CANY;
+ }
+
+ @Override
+ public String getName() {
+ return "Any Char";
+ }
+
+ @Override
+ public String toString(int level) {
+ String value = "";
+ return value;
+ }
+}
diff --git a/src/org/joni/ast/BackRefNode.java b/src/org/joni/ast/BackRefNode.java
new file mode 100644
index 0000000..040fb81
--- /dev/null
+++ b/src/org/joni/ast/BackRefNode.java
@@ -0,0 +1,98 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import org.joni.ScanEnvironment;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.ValueException;
+
+public final class BackRefNode extends StateNode {
+ //private static int NODE_BACKREFS_SIZE = 6;
+
+ //int state;
+ public int backNum;
+ public int back[];
+
+ public int nestLevel;
+
+ public BackRefNode(int backNum, int[]backRefs, boolean byName, ScanEnvironment env) {
+ this.backNum = backNum;
+ if (byName) setNameRef();
+
+ for (int i=0; i<backNum; i++) {
+ if (backRefs[i] <= env.numMem && env.memNodes[backRefs[i]] == null) {
+ setRecursion(); /* /...(\1).../ */
+ break;
+ }
+ }
+
+ back = new int[backNum];
+ System.arraycopy(backRefs, 0, back, 0, backNum); // shall we really dup it ???
+ }
+
+ // #ifdef USE_BACKREF_AT_LEVEL
+ public BackRefNode(int backNum, int[]backRefs, boolean byName, boolean existLevel, int nestLevel, ScanEnvironment env) {
+ this(backNum, backRefs, byName, env);
+
+ if (existLevel) {
+ //state |= NST_NEST_LEVEL;
+ setNestLevel();
+ this.nestLevel = nestLevel;
+ }
+ }
+
+ @Override
+ public int getType() {
+ return BREF;
+ }
+
+ @Override
+ public String getName() {
+ return "Back Ref";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder(super.toString(level));
+ value.append("\n backNum: " + backNum);
+ String backs = "";
+ for (int i=0; i<back.length; i++) backs += back[i] + ", ";
+ value.append("\n back: " + backs);
+ value.append("\n nextLevel: " + nestLevel);
+ return value.toString();
+ }
+
+ public void renumber(int[]map) {
+ if (!isNameRef()) throw new ValueException(ErrorMessages.ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED);
+
+ int oldNum = backNum;
+
+ int pos = 0;
+ for (int i=0; i<oldNum; i++) {
+ int n = map[back[i]];
+ if (n > 0) {
+ back[pos] = n;
+ pos++;
+ }
+ }
+ backNum = pos;
+ }
+
+}
diff --git a/src/org/joni/ast/CClassNode.java b/src/org/joni/ast/CClassNode.java
new file mode 100644
index 0000000..40c7c90
--- /dev/null
+++ b/src/org/joni/ast/CClassNode.java
@@ -0,0 +1,531 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import org.jcodings.CodeRange;
+import org.jcodings.Encoding;
+import org.jcodings.IntHolder;
+import org.jcodings.constants.CharacterType;
+import org.jcodings.exception.EncodingException;
+import org.joni.BitSet;
+import org.joni.CodeRangeBuffer;
+import org.joni.Config;
+import org.joni.ScanEnvironment;
+import org.joni.constants.CCSTATE;
+import org.joni.constants.CCVALTYPE;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.InternalException;
+import org.joni.exception.SyntaxException;
+import org.joni.exception.ValueException;
+
+public final class CClassNode extends Node {
+ private static final int FLAG_NCCLASS_NOT = 1<<0;
+ private static final int FLAG_NCCLASS_SHARE = 1<<1;
+
+ int flags;
+ public final BitSet bs = new BitSet(); // conditional creation ?
+ public CodeRangeBuffer mbuf; /* multi-byte info or NULL */
+
+ private int ctype; // for hashing purposes
+ private Encoding enc; // ...
+
+
+ // node_new_cclass
+ public CClassNode() {}
+
+ public CClassNode(int ctype, Encoding enc, boolean not, int sbOut, int[]ranges) {
+ this(not, sbOut, ranges);
+ this.ctype = ctype;
+ this.enc = enc;
+ }
+
+ // node_new_cclass_by_codepoint_range, only used by shared Char Classes
+ public CClassNode(boolean not, int sbOut, int[]ranges) {
+ if (not) setNot();
+ // bs.clear();
+
+ if (sbOut > 0 && ranges != null) {
+ int n = ranges[0];
+ for (int i=0; i<n; i++) {
+ int from = ranges[i * 2 + 1];
+ int to = ranges[i * 2 + 2];
+ for (int j=from; j<=to; j++) {
+ if (j >= sbOut) {
+ setupBuffer(ranges);
+ return;
+ }
+ bs.set(j);
+ }
+ }
+ }
+ setupBuffer(ranges);
+ }
+
+ @Override
+ public int getType() {
+ return CCLASS;
+ }
+
+ @Override
+ public String getName() {
+ return "Character Class";
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof CClassNode)) return false;
+ CClassNode cc = (CClassNode)other;
+ return ctype == cc.ctype && isNot() == cc.isNot() && enc == cc.enc;
+ }
+
+ @Override
+ public int hashCode() {
+ if (Config.USE_SHARED_CCLASS_TABLE) {
+ int hash = 0;
+ hash += ctype;
+ hash += enc.hashCode();
+ if (isNot()) hash++;
+ return hash + (hash >> 5);
+ } else {
+ return super.hashCode();
+ }
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder();
+ value.append("\n flags: " + flagsToString());
+ value.append("\n bs: " + pad(bs, level + 1));
+ value.append("\n mbuf: " + pad(mbuf, level + 1));
+
+ return value.toString();
+ }
+
+ public String flagsToString() {
+ StringBuilder flags = new StringBuilder();
+ if (isNot()) flags.append("NOT ");
+ if (isShare()) flags.append("SHARE ");
+ return flags.toString();
+ }
+
+ private void setupBuffer(int[]ranges) {
+ if (ranges != null) {
+ if (ranges[0] == 0) return;
+ mbuf = new CodeRangeBuffer(ranges);
+ }
+ }
+
+ public boolean isEmpty() {
+ return mbuf == null && bs.isEmpty();
+ }
+
+ public void addCodeRangeToBuf(int from, int to) {
+ mbuf = CodeRangeBuffer.addCodeRangeToBuff(mbuf, from, to);
+ }
+
+ public void addCodeRange(ScanEnvironment env, int from, int to) {
+ mbuf = CodeRangeBuffer.addCodeRange(mbuf, env, from, to);
+ }
+
+ public void addAllMultiByteRange(Encoding enc) {
+ mbuf = CodeRangeBuffer.addAllMultiByteRange(enc, mbuf);
+ }
+
+ public void clearNotFlag(Encoding enc) {
+ if (isNot()) {
+ bs.invert();
+
+ if (!enc.isSingleByte()) {
+ mbuf = CodeRangeBuffer.notCodeRangeBuff(enc, mbuf);
+ }
+ clearNot();
+ }
+ }
+
+ // and_cclass
+ public void and(CClassNode other, Encoding enc) {
+ boolean not1 = isNot();
+ BitSet bsr1 = bs;
+ CodeRangeBuffer buf1 = mbuf;
+ boolean not2 = other.isNot();
+ BitSet bsr2 = other.bs;
+ CodeRangeBuffer buf2 = other.mbuf;
+
+ if (not1) {
+ BitSet bs1 = new BitSet();
+ bsr1.invertTo(bs1);
+ bsr1 = bs1;
+ }
+
+ if (not2) {
+ BitSet bs2 = new BitSet();
+ bsr2.invertTo(bs2);
+ bsr2 = bs2;
+ }
+
+ bsr1.and(bsr2);
+
+ if (bsr1 != bs) {
+ bs.copy(bsr1);
+ bsr1 = bs;
+ }
+
+ if (not1) {
+ bs.invert();
+ }
+
+ CodeRangeBuffer pbuf = null;
+
+ if (!enc.isSingleByte()) {
+ if (not1 && not2) {
+ pbuf = CodeRangeBuffer.orCodeRangeBuff(enc, buf1, false, buf2, false);
+ } else {
+ pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, not1, buf2, not2);
+
+ if (not1) {
+ pbuf = CodeRangeBuffer.notCodeRangeBuff(enc, pbuf);
+ }
+ }
+ mbuf = pbuf;
+ }
+
+ }
+
+ // or_cclass
+ public void or(CClassNode other, Encoding enc) {
+ boolean not1 = isNot();
+ BitSet bsr1 = bs;
+ CodeRangeBuffer buf1 = mbuf;
+ boolean not2 = other.isNot();
+ BitSet bsr2 = other.bs;
+ CodeRangeBuffer buf2 = other.mbuf;
+
+ if (not1) {
+ BitSet bs1 = new BitSet();
+ bsr1.invertTo(bs1);
+ bsr1 = bs1;
+ }
+
+ if (not2) {
+ BitSet bs2 = new BitSet();
+ bsr2.invertTo(bs2);
+ bsr2 = bs2;
+ }
+
+ bsr1.or(bsr2);
+
+ if (bsr1 != bs) {
+ bs.copy(bsr1);
+ bsr1 = bs;
+ }
+
+ if (not1) {
+ bs.invert();
+ }
+
+ if (!enc.isSingleByte()) {
+ CodeRangeBuffer pbuf = null;
+ if (not1 && not2) {
+ pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, false, buf2, false);
+ } else {
+ pbuf = CodeRangeBuffer.orCodeRangeBuff(enc, buf1, not1, buf2, not2);
+ if (not1) {
+ pbuf = CodeRangeBuffer.notCodeRangeBuff(enc, pbuf);
+ }
+ }
+ mbuf = pbuf;
+ }
+ }
+
+ // add_ctype_to_cc_by_range // Encoding out!
+ public void addCTypeByRange(int ctype, boolean not, Encoding enc, int sbOut, int mbr[]) {
+ int n = mbr[0];
+
+ if (!not) {
+ for (int i=0; i<n; i++) {
+ for (int j=mbr[i * 2 + 1]; j<=mbr[i * 2 + 2]; j++) {
+ if (j >= sbOut) {
+ if (j == mbr[i * 2 + 2]) {
+ i++;
+ } else if (j > mbr[i * 2 + 1]) {
+ addCodeRangeToBuf(j, mbr[i * 2 + 2]);
+ i++;
+ }
+ // !goto sb_end!, remove duplication!
+ for (; i<n; i++) {
+ addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
+ }
+ return;
+ }
+ bs.set(j);
+ }
+ }
+ // !sb_end:!
+ for (int i=0; i<n; i++) {
+ addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
+ }
+
+ } else {
+ int prev = 0;
+
+ for (int i=0; i<n; i++) {
+ for (int j=prev; j < mbr[2 * i + 1]; j++) {
+ if (j >= sbOut) {
+ // !goto sb_end2!, remove duplication
+ prev = sbOut;
+ for (i=0; i<n; i++) {
+ if (prev < mbr[2 * i + 1]) addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
+ prev = mbr[i * 2 + 2] + 1;
+ }
+ if (prev < 0x7fffffff/*!!!*/) addCodeRangeToBuf(prev, 0x7fffffff);
+ return;
+ }
+ bs.set(j);
+ }
+ prev = mbr[2 * i + 2] + 1;
+ }
+
+ for (int j=prev; j<sbOut; j++) {
+ bs.set(j);
+ }
+
+ // !sb_end2:!
+ prev = sbOut;
+ for (int i=0; i<n; i++) {
+ if (prev < mbr[2 * i + 1]) addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
+ prev = mbr[i * 2 + 2] + 1;
+ }
+ if (prev < 0x7fffffff/*!!!*/) addCodeRangeToBuf(prev, 0x7fffffff);
+ }
+ }
+
+ public void addCType(int ctype, boolean not, ScanEnvironment env, IntHolder sbOut) {
+ Encoding enc = env.enc;
+
+ int[]ranges = enc.ctypeCodeRange(ctype, sbOut);
+
+ if (ranges != null) {
+ addCTypeByRange(ctype, not, enc, sbOut.value, ranges);
+ return;
+ }
+
+ switch(ctype) {
+ case CharacterType.ALPHA:
+ case CharacterType.BLANK:
+ case CharacterType.CNTRL:
+ case CharacterType.DIGIT:
+ case CharacterType.LOWER:
+ case CharacterType.PUNCT:
+ case CharacterType.SPACE:
+ case CharacterType.UPPER:
+ case CharacterType.XDIGIT:
+ case CharacterType.ASCII:
+ case CharacterType.ALNUM:
+ if (not) {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ if (!enc.isCodeCType(c, ctype)) bs.set(c);
+ }
+ addAllMultiByteRange(enc);
+ } else {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ if (enc.isCodeCType(c, ctype)) bs.set(c);
+ }
+ }
+ break;
+
+ case CharacterType.GRAPH:
+ case CharacterType.PRINT:
+ if (not) {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ if (!enc.isCodeCType(c, ctype)) bs.set(c);
+ }
+ } else {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ if (enc.isCodeCType(c, ctype)) bs.set(c);
+ }
+ addAllMultiByteRange(enc);
+ }
+ break;
+
+ case CharacterType.WORD:
+ if (!not) {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ if (enc.isSbWord(c)) bs.set(c);
+ }
+
+ addAllMultiByteRange(enc);
+ } else {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ try {
+ if (enc.codeToMbcLength(c) > 0 && /* check invalid code point */
+ !enc.isWord(c)) bs.set(c);
+ } catch (EncodingException ve) {};
+ }
+ }
+ break;
+
+ default:
+ throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
+ } // switch
+ }
+
+ public static final class CCStateArg {
+ public int v;
+ public int vs;
+ public boolean vsIsRaw;
+ public boolean vIsRaw;
+ public CCVALTYPE inType;
+ public CCVALTYPE type;
+ public CCSTATE state;
+ }
+
+ public void nextStateClass(CCStateArg arg, ScanEnvironment env) {
+ if (arg.state == CCSTATE.RANGE) throw new SyntaxException(ErrorMessages.ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE);
+
+ if (arg.state == CCSTATE.VALUE && arg.type != CCVALTYPE.CLASS) {
+ if (arg.type == CCVALTYPE.SB) {
+ bs.set(arg.vs);
+ } else if (arg.type == CCVALTYPE.CODE_POINT) {
+ addCodeRange(env, arg.vs, arg.vs);
+ }
+ }
+ arg.state = CCSTATE.VALUE;
+ arg.type = CCVALTYPE.CLASS;
+ }
+
+ public void nextStateValue(CCStateArg arg, ScanEnvironment env) {
+
+ switch(arg.state) {
+ case VALUE:
+ if (arg.type == CCVALTYPE.SB) {
+ if (arg.vs > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+ bs.set(arg.vs);
+ } else if (arg.type == CCVALTYPE.CODE_POINT) {
+ addCodeRange(env, arg.vs, arg.vs);
+ }
+ break;
+
+ case RANGE:
+ if (arg.inType == arg.type) {
+ if (arg.inType == CCVALTYPE.SB) {
+ if (arg.vs > 0xff || arg.v > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+
+ if (arg.vs > arg.v) {
+ if (env.syntax.allowEmptyRangeInCC()) {
+ // goto ccs_range_end
+ arg.state = CCSTATE.COMPLETE;
+ break;
+ } else {
+ throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
+ }
+ }
+ bs.setRange(arg.vs, arg.v);
+ } else {
+ addCodeRange(env, arg.vs, arg.v);
+ }
+ } else {
+ if (arg.vs > arg.v) {
+ if (env.syntax.allowEmptyRangeInCC()) {
+ // goto ccs_range_end
+ arg.state = CCSTATE.COMPLETE;
+ break;
+ } else {
+ throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
+ }
+ }
+ bs.setRange(arg.vs, arg.v < 0xff ? arg.v : 0xff);
+ addCodeRange(env, arg.vs, arg.v);
+ }
+ // ccs_range_end:
+ arg.state = CCSTATE.COMPLETE;
+ break;
+
+ case COMPLETE:
+ case START:
+ arg.state = CCSTATE.VALUE;
+ break;
+
+ default:
+ break;
+
+ } // switch
+
+ arg.vsIsRaw = arg.vIsRaw;
+ arg.vs = arg.v;
+ arg.type = arg.inType;
+ }
+
+ // onig_is_code_in_cc_len
+ public boolean isCodeInCCLength(int encLength, int code) {
+ boolean found;
+
+ if (encLength > 1 || code >= BitSet.SINGLE_BYTE_SIZE) {
+ if (mbuf == null) {
+ found = false;
+ } else {
+ found = CodeRange.isInCodeRange(mbuf.getCodeRange(), code);
+ }
+ } else {
+ found = bs.at(code);
+ }
+
+ if (isNot()) {
+ return !found;
+ } else {
+ return found;
+ }
+ }
+
+ // onig_is_code_in_cc
+ public boolean isCodeInCC(Encoding enc, int code) {
+ int len;
+ if (enc.minLength() > 1) {
+ len = 2;
+ } else {
+ len = enc.codeToMbcLength(code);
+ }
+ return isCodeInCCLength(len, code);
+ }
+
+ public void setNot() {
+ flags |= FLAG_NCCLASS_NOT;
+ }
+
+ public void clearNot() {
+ flags &= ~FLAG_NCCLASS_NOT;
+ }
+
+ public boolean isNot() {
+ return (flags & FLAG_NCCLASS_NOT) != 0;
+ }
+
+ public void setShare() {
+ flags |= FLAG_NCCLASS_SHARE;
+ }
+
+ public void clearShare() {
+ flags &= ~FLAG_NCCLASS_SHARE;
+ }
+
+ public boolean isShare() {
+ return (flags & FLAG_NCCLASS_SHARE) != 0;
+ }
+
+}
diff --git a/src/org/joni/ast/CTypeNode.java b/src/org/joni/ast/CTypeNode.java
new file mode 100644
index 0000000..093216a
--- /dev/null
+++ b/src/org/joni/ast/CTypeNode.java
@@ -0,0 +1,50 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+public final class CTypeNode extends Node {
+ public int ctype;
+ public boolean not;
+
+ public CTypeNode(int type, boolean not) {
+ this.ctype= type;
+ this.not = not;
+ }
+
+ @Override
+ public int getType() {
+ return CTYPE;
+ }
+
+ @Override
+ public String getName() {
+ return "Character Type";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder();
+ value.append("\n ctype: " + ctype);
+ value.append("\n not: " + not);
+
+ return value.toString();
+ }
+
+}
diff --git a/src/org/joni/ast/CallNode.java b/src/org/joni/ast/CallNode.java
new file mode 100644
index 0000000..8261f75
--- /dev/null
+++ b/src/org/joni/ast/CallNode.java
@@ -0,0 +1,86 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import java.util.Set;
+
+import org.joni.UnsetAddrList;
+import org.joni.WarnCallback;
+
+public final class CallNode extends StateNode {
+ public byte[]name;
+ public int nameP;
+ public int nameEnd;
+
+ public int groupNum;
+ public Node target; // is it an EncloseNode always ?
+ public UnsetAddrList unsetAddrList;
+
+ public CallNode(byte[]name, int nameP, int nameEnd, int gnum) {
+ this.name = name;
+ this.nameP = nameP;
+ this.nameEnd = nameEnd;
+ this.groupNum = gnum; /* call by number if gnum != 0 */
+ }
+
+ @Override
+ public int getType() {
+ return CALL;
+ }
+
+ @Override
+ protected void setChild(Node newChild) {
+ target = newChild;
+ }
+
+ @Override
+ protected Node getChild() {
+ return target;
+ }
+
+ public void setTarget(Node tgt) {
+ target = tgt;
+ tgt.parent = this;
+ }
+
+ @Override
+ public String getName() {
+ return "Call";
+ }
+
+ @Override
+ public void verifyTree(Set<Node> set, WarnCallback warnings) {
+ if (target == null || target.parent == this)
+ warnings.warn(this.getAddressName() + " doesn't point to a target or the target has been stolen");
+ // do not recurse here
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder(super.toString(level));
+ value.append("\n name: " + new String(name, nameP, nameEnd - nameP));
+ value.append("\n groupNum: " + groupNum);
+ value.append("\n target: " + pad(target.getAddressName(), level + 1));
+ value.append("\n unsetAddrList: " + pad(unsetAddrList, level + 1));
+
+ return value.toString();
+ }
+
+}
diff --git a/src/org/joni/ast/ConsAltNode.java b/src/org/joni/ast/ConsAltNode.java
new file mode 100644
index 0000000..3d7f784
--- /dev/null
+++ b/src/org/joni/ast/ConsAltNode.java
@@ -0,0 +1,154 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import java.util.Set;
+
+import org.joni.Config;
+import org.joni.WarnCallback;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.InternalException;
+
+public final class ConsAltNode extends Node {
+ public Node car;
+ public ConsAltNode cdr;
+ private int type; // List or Alt
+
+ private ConsAltNode(Node car, ConsAltNode cdr, int type) {
+ this.car = car;
+ if (car != null) car.parent = this;
+ this.cdr = cdr;
+ if (cdr != null) cdr.parent = this;
+
+ this.type = type;
+ }
+
+ public static ConsAltNode newAltNode(Node left, ConsAltNode right) {
+ return new ConsAltNode(left, right, ALT);
+ }
+
+ public static ConsAltNode newListNode(Node left, ConsAltNode right) {
+ return new ConsAltNode(left, right, LIST);
+ }
+
+ public static ConsAltNode listAdd(ConsAltNode list, Node x) {
+ ConsAltNode n = newListNode(x, null);
+
+ if (list != null) {
+ while (list.cdr != null) {
+ list = list.cdr;
+ }
+ list.setCdr(n);
+ }
+ return n;
+ }
+
+ public void toListNode() {
+ type = LIST;
+ }
+
+ public void toAltNode() {
+ type = ALT;
+ }
+
+ @Override
+ public int getType() {
+ return type;
+ }
+
+ @Override
+ protected void setChild(Node newChild) {
+ car = newChild;
+ }
+
+ @Override
+ protected Node getChild() {
+ return car;
+ }
+
+ @Override
+ public void swap(Node with) {
+ if (cdr != null) {
+ cdr.parent = with;
+ if (with instanceof ConsAltNode) {
+ ConsAltNode withCan = (ConsAltNode)with;
+ withCan.cdr.parent = this;
+ ConsAltNode tmp = cdr;
+ cdr = withCan.cdr;
+ withCan.cdr = tmp;
+ }
+ }
+
+ super.swap(with);
+ }
+
+ @Override
+ public void verifyTree(Set<Node> set, WarnCallback warnings) {
+ if (!set.contains(this)) {
+ set.add(this);
+ if (car != null) {
+ if (car.parent != this) {
+ warnings.warn("broken list car: " + this.getAddressName() + " -> " + car.getAddressName());
+ }
+ car.verifyTree(set,warnings);
+ }
+ if (cdr != null) {
+ if (cdr.parent != this) {
+ warnings.warn("broken list cdr: " + this.getAddressName() + " -> " + cdr.getAddressName());
+ }
+ cdr.verifyTree(set,warnings);
+ }
+ }
+ }
+
+ public Node setCar(Node ca) {
+ car = ca;
+ ca.parent = this;
+ return car;
+ }
+
+ public ConsAltNode setCdr(ConsAltNode cd) {
+ cdr = cd;
+ cd.parent = this;
+ return cdr;
+ }
+
+ @Override
+ public String getName() {
+ switch (type) {
+ case ALT:
+ return "Alt";
+ case LIST:
+ return "List";
+ default:
+ throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
+ }
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder();
+ value.append("\n left: " + pad(car, level + 1));
+ value.append("\n right: " + (cdr == null ? "NULL" : cdr.toString()));
+
+ return value.toString();
+ }
+
+}
diff --git a/src/org/joni/ast/EncloseNode.java b/src/org/joni/ast/EncloseNode.java
new file mode 100644
index 0000000..02b2391
--- /dev/null
+++ b/src/org/joni/ast/EncloseNode.java
@@ -0,0 +1,151 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import org.joni.Config;
+import org.joni.Option;
+import org.joni.constants.EncloseType;
+
+public final class EncloseNode extends StateNode implements EncloseType {
+
+ public int type; // enclose type
+ public int regNum;
+ public int option;
+ public Node target; /* EncloseNode : ENCLOSE_MEMORY */
+ public int callAddr; // AbsAddrType
+ public int minLength; // OnigDistance
+ public int maxLength; // OnigDistance
+ public int charLength;
+ public int optCount; // referenced count in optimize_node_left()
+
+ // node_new_enclose / onig_node_new_enclose
+ public EncloseNode(int type) {
+ this.type = type;
+ callAddr = -1;
+ }
+
+ // node_new_enclose_memory
+ public EncloseNode(int option, boolean isNamed) {
+ this(MEMORY);
+ if (isNamed) setNamedGroup();
+ if (Config.USE_SUBEXP_CALL) this.option = option;
+ }
+
+ // node_new_option
+ public EncloseNode(int option, int _) {
+ this(OPTION);
+ this.option = option;
+ }
+
+ @Override
+ public int getType() {
+ return ENCLOSE;
+ }
+
+ @Override
+ protected void setChild(Node newChild) {
+ target = newChild;
+ }
+
+ @Override
+ protected Node getChild() {
+ return target;
+ }
+
+ public void setTarget(Node tgt) {
+ target = tgt;
+ tgt.parent = this;
+ }
+
+ @Override
+ public String getName() {
+ return "Enclose";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder(super.toString(level));
+ value.append("\n type: " + typeToString());
+ value.append("\n regNum: " + regNum);
+ value.append("\n option: " + Option.toString(option));
+ value.append("\n target: " + pad(target, level + 1));
+ value.append("\n callAddr: " + callAddr);
+ value.append("\n minLength: " + minLength);
+ value.append("\n maxLength: " + maxLength);
+ value.append("\n charLength: " + charLength);
+ value.append("\n optCount: " + optCount);
+
+ return value.toString();
+ }
+
+ public String typeToString() {
+ StringBuilder types = new StringBuilder();
+ if (isStopBacktrack()) types.append("STOP_BACKTRACK ");
+ if (isMemory()) types.append("MEMORY ");
+ if (isOption()) types.append("OPTION ");
+
+ return types.toString();
+ }
+
+ public void setEncloseStatus(int flag) {
+ state |= flag;
+ }
+
+ public void clearEncloseStatus(int flag) {
+ state &= ~flag;
+ }
+
+ public void clearMemory() {
+ type &= ~MEMORY;
+ }
+
+ public void setMemory() {
+ type |= MEMORY;
+ }
+
+ public boolean isMemory() {
+ return (type & MEMORY) != 0;
+ }
+
+ public void clearOption() {
+ type &= ~OPTION;
+ }
+
+ public void setOption() {
+ type |= OPTION;
+ }
+
+ public boolean isOption() {
+ return (type & OPTION) != 0;
+ }
+
+ public void clearStopBacktrack() {
+ type &= ~STOP_BACKTRACK;
+ }
+
+ public void setStopBacktrack() {
+ type |= STOP_BACKTRACK;
+ }
+
+ public boolean isStopBacktrack() {
+ return (type & STOP_BACKTRACK) != 0;
+ }
+
+}
diff --git a/src/org/joni/ast/Node.java b/src/org/joni/ast/Node.java
new file mode 100644
index 0000000..eef7790
--- /dev/null
+++ b/src/org/joni/ast/Node.java
@@ -0,0 +1,136 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import java.util.Set;
+
+import org.joni.Config;
+import org.joni.WarnCallback;
+import org.joni.constants.NodeType;
+
+public abstract class Node implements NodeType {
+ public Node parent;
+
+ public abstract int getType();
+
+ public final int getType2Bit() {
+ return 1 << getType();
+ }
+
+ protected void setChild(Node tgt){} // default definition
+ protected Node getChild(){return null;}; // default definition
+
+ public void swap(Node with) {
+ Node tmp;
+
+ //if (getChild() != null) getChild().parent = with;
+ //if (with.getChild() != null) with.getChild().parent = this;
+
+ //tmp = getChild();
+ //setChild(with.getChild());
+ //with.setChild(tmp);
+
+ if (parent != null) parent.setChild(with);
+
+ if (with.parent != null) with.parent.setChild(this);
+
+ tmp = parent;
+ parent = with.parent;
+ with.parent = tmp;
+ }
+
+ // overridden by ConsAltNode and CallNode
+ public void verifyTree(Set<Node> set, WarnCallback warnings) {
+ if (!set.contains(this) && getChild() != null) {
+ set.add(this);
+ if (getChild().parent != this) {
+ warnings.warn("broken link to child: " + this.getAddressName() + " -> " + getChild().getAddressName());
+ }
+ getChild().verifyTree(set, warnings);
+ }
+ }
+
+ public abstract String getName();
+ protected abstract String toString(int level);
+
+ public String getAddressName() {
+ return getName() + ":0x" + Integer.toHexString(System.identityHashCode(this));
+ }
+
+ public final String toString() {
+ StringBuilder s = new StringBuilder();
+ s.append("<" + getAddressName() + ">");
+ s.append("\n parent: " + (parent == null ? "NULL" : parent.getAddressName()));
+ return s + toString(0);
+ }
+
+ protected static String pad(Object value, int level) {
+ if (value == null) return "NULL";
+
+ StringBuilder pad = new StringBuilder(" ");
+ for (int i=0; i<level; i++) pad.append(pad);
+
+ return value.toString().replace("\n", "\n" + pad);
+ }
+
+ public final boolean isInvalidQuantifier() {
+ if (!Config.VANILLA) return false;
+
+ ConsAltNode node;
+
+ switch(getType()) {
+
+ case ANCHOR:
+ return true;
+
+ case ENCLOSE:
+ /* allow enclosed elements */
+ /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */
+ break;
+
+ case LIST:
+ node = (ConsAltNode)this;
+ do {
+ if (!node.car.isInvalidQuantifier()) return false;
+ } while ((node = node.cdr) != null);
+ return false;
+
+ case ALT:
+ node = (ConsAltNode)this;
+ do {
+ if (node.car.isInvalidQuantifier()) return true;
+ } while ((node = node.cdr) != null);
+ break;
+
+ default:
+ break;
+ }
+
+ return false;
+ }
+
+ public final boolean isAllowedInLookBehind() {
+ return (getType2Bit() & ALLOWED_IN_LB) != 0;
+ }
+
+ public final boolean isSimple() {
+ return (getType2Bit() & SIMPLE) != 0;
+ }
+}
diff --git a/src/org/joni/ast/QuantifierNode.java b/src/org/joni/ast/QuantifierNode.java
new file mode 100644
index 0000000..280f9bf
--- /dev/null
+++ b/src/org/joni/ast/QuantifierNode.java
@@ -0,0 +1,272 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import org.joni.Config;
+import org.joni.ScanEnvironment;
+import org.joni.constants.Reduce;
+import org.joni.constants.TargetInfo;
+
+public final class QuantifierNode extends StateNode {
+
+ public Node target;
+ public int lower;
+ public int upper;
+ public boolean greedy;
+
+ public int targetEmptyInfo;
+
+ public Node headExact;
+ public Node nextHeadExact;
+ public boolean isRefered; /* include called node. don't eliminate even if {0} */
+
+ // USE_COMBINATION_EXPLOSION_CHECK
+ public int combExpCheckNum; /* 1,2,3...: check, 0: no check */
+
+ public QuantifierNode(int lower, int upper, boolean byNumber) {
+ this.lower = lower;
+ this.upper = upper;
+ greedy = true;
+ targetEmptyInfo = TargetInfo.ISNOT_EMPTY;
+
+ if (byNumber) setByNumber();
+ }
+
+ @Override
+ public int getType() {
+ return QTFR;
+ }
+
+ @Override
+ protected void setChild(Node newChild) {
+ target = newChild;
+ }
+
+ @Override
+ protected Node getChild() {
+ return target;
+ }
+
+ public void setTarget(Node tgt) {
+ target = tgt;
+ tgt.parent = this;
+ }
+
+ public StringNode convertToString() {
+ StringNode sn = new StringNode();
+ sn.flag = ((StringNode)target).flag;
+ sn.swap(this);
+ return sn;
+ }
+
+ @Override
+ public String getName() {
+ return "Quantifier";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder(super.toString(level));
+ value.append("\n target: " + pad(target, level + 1));
+ value.append("\n lower: " + lower);
+ value.append("\n upper: " + upper);
+ value.append("\n greedy: " + greedy);
+ value.append("\n targetEmptyInfo: " + targetEmptyInfo);
+ value.append("\n headExact: " + pad(headExact, level + 1));
+ value.append("\n nextHeadExact: " + pad(nextHeadExact, level + 1));
+ value.append("\n isRefered: " + isRefered);
+ value.append("\n combExpCheckNum: " + combExpCheckNum);
+
+ return value.toString();
+ }
+
+ public boolean isAnyCharStar() {
+ return greedy && isRepeatInfinite(upper) && target.getType() == CANY;
+ }
+
+ /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
+ protected int popularNum() {
+ if (greedy) {
+ if (lower == 0) {
+ if (upper == 1) return 0;
+ else if (isRepeatInfinite(upper)) return 1;
+ } else if (lower == 1) {
+ if (isRepeatInfinite(upper)) return 2;
+ }
+ } else {
+ if (lower == 0) {
+ if (upper == 1) return 3;
+ else if (isRepeatInfinite(upper)) return 4;
+ } else if (lower == 1) {
+ if (isRepeatInfinite(upper)) return 5;
+ }
+ }
+ return -1;
+ }
+
+ protected void set(QuantifierNode other) {
+ setTarget(other.target);
+ other.target = null;
+ lower = other.lower;
+ upper = other.upper;
+ greedy = other.greedy;
+ targetEmptyInfo = other.targetEmptyInfo;
+
+ //setHeadExact(other.headExact);
+ //setNextHeadExact(other.nextHeadExact);
+ headExact = other.headExact;
+ nextHeadExact = other.nextHeadExact;
+ isRefered = other.isRefered;
+ combExpCheckNum = other.combExpCheckNum;
+ }
+
+ public void reduceNestedQuantifier(QuantifierNode other) {
+ int pnum = popularNum();
+ int cnum = other.popularNum();
+
+ if (pnum < 0 || cnum < 0) return;
+
+ switch(Reduce.REDUCE_TABLE[cnum][pnum]) {
+ case DEL:
+ // no need to set the parent here...
+ // swap ?
+ set(other); // *pnode = *cnode; ???
+ break;
+
+ case A:
+ setTarget(other.target);
+ lower = 0;
+ upper = REPEAT_INFINITE;
+ greedy = true;
+ break;
+
+ case AQ:
+ setTarget(other.target);
+ lower = 0;
+ upper = REPEAT_INFINITE;
+ greedy = false;
+ break;
+
+ case QQ:
+ setTarget(other.target);
+ lower = 0;
+ upper = 1;
+ greedy = false;
+ break;
+
+ case P_QQ:
+ setTarget(other);
+ lower = 0;
+ upper = 1;
+ greedy = false;
+ other.lower = 1;
+ other.upper = REPEAT_INFINITE;
+ other.greedy = true;
+ return;
+
+ case PQ_Q:
+ setTarget(other);
+ lower = 0;
+ upper = 1;
+ greedy = true;
+ other.lower = 1;
+ other.upper = REPEAT_INFINITE;
+ other.greedy = false;
+ return;
+
+ case ASIS:
+ setTarget(other);
+ return;
+ }
+ // ??? remove the parent from target ???
+ other.target = null; // remove target from reduced quantifier
+ }
+
+ public int setQuantifier(Node tgt, boolean group, ScanEnvironment env, byte[]bytes, int p, int end) {
+ if (lower == 1 && upper == 1) return 1;
+
+ switch(tgt.getType()) {
+
+ case STR:
+ if (!group) {
+ StringNode sn = (StringNode)tgt;
+ if (sn.canBeSplit(env.enc)) {
+ StringNode n = sn.splitLastChar(env.enc);
+ if (n != null) {
+ setTarget(n);
+ return 2;
+ }
+ }
+ }
+ break;
+
+ case QTFR:
+ /* check redundant double repeat. */
+ /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
+ QuantifierNode qnt = (QuantifierNode)tgt;
+ int nestQNum = popularNum();
+ int targetQNum = qnt.popularNum();
+
+ if (Config.USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR) {
+ if (!isByNumber() && !qnt.isByNumber() && env.syntax.warnReduntantNestedRepeat()) {
+ switch(Reduce.REDUCE_TABLE[targetQNum][nestQNum]) {
+ case ASIS:
+ break;
+
+ case DEL:
+ env.reg.warnings.warn(new String(bytes, p, end) +
+ " redundant nested repeat operator");
+ break;
+
+ default:
+ env.reg.warnings.warn(new String(bytes, p, end) +
+ " nested repeat operator " + Reduce.PopularQStr[targetQNum] +
+ " and " + Reduce.PopularQStr[nestQNum] + " was replaced with '" +
+ Reduce.ReduceQStr[Reduce.REDUCE_TABLE[targetQNum][nestQNum].ordinal()] + "'");
+ }
+ }
+ } // USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
+
+ if (targetQNum >= 0) {
+ if (nestQNum >= 0) {
+ reduceNestedQuantifier(qnt);
+ return 0;
+ } else if (targetQNum == 1 || targetQNum == 2) { /* * or + */
+ /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
+ if (!isRepeatInfinite(upper) && upper > 1 && greedy) {
+ upper = lower == 0 ? 1 : lower;
+ }
+ }
+ }
+
+ default:
+ break;
+ }
+
+ setTarget(tgt);
+ return 0;
+ }
+
+ public static final int REPEAT_INFINITE = -1;
+ public static boolean isRepeatInfinite(int n) {
+ return n == REPEAT_INFINITE;
+ }
+
+}
diff --git a/src/org/joni/ast/StateNode.java b/src/org/joni/ast/StateNode.java
new file mode 100644
index 0000000..117d3df
--- /dev/null
+++ b/src/org/joni/ast/StateNode.java
@@ -0,0 +1,232 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import org.joni.constants.NodeStatus;
+
+public abstract class StateNode extends Node implements NodeStatus {
+ protected int state;
+
+ @Override
+ public String toString(int level) {
+ return "\n state: " + stateToString();
+ }
+
+ public String stateToString() {
+ StringBuilder states = new StringBuilder();
+ if (isMinFixed()) states.append("MIN_FIXED ");
+ if (isMaxFixed()) states.append("MAX_FIXED ");
+ if (isMark1()) states.append("MARK1 ");
+ if (isMark2()) states.append("MARK2 ");
+ if (isMemBackrefed()) states.append("MEM_BACKREFED ");
+ if (isStopBtSimpleRepeat()) states.append("STOP_BT_SIMPLE_REPEAT ");
+ if (isRecursion()) states.append("RECURSION ");
+ if (isCalled()) states.append("CALLED ");
+ if (isAddrFixed()) states.append("ADDR_FIXED ");
+ if (isNamedGroup()) states.append("NAMED_GROUP ");
+ if (isNameRef()) states.append("NAME_REF ");
+ if (isInRepeat()) states.append("IN_REPEAT ");
+ if (isNestLevel()) states.append("NEST_LEVEL ");
+ if (isByNumber()) states.append("BY_NUMBER ");
+
+ return states.toString();
+ }
+
+ public boolean isMinFixed() {
+ return (state & NST_MIN_FIXED) != 0;
+ }
+
+ public void setMinFixed() {
+ state |= NST_MIN_FIXED;
+ }
+
+ public void clearMinFixed() {
+ state &= ~NST_MIN_FIXED;
+ }
+
+ public boolean isMaxFixed() {
+ return (state & NST_MAX_FIXED) != 0;
+ }
+
+ public void setMaxFixed() {
+ state |= NST_MAX_FIXED;
+ }
+
+ public void clearMaxFixed() {
+ state &= ~NST_MAX_FIXED;
+ }
+
+ public boolean isCLenFixed() {
+ return (state & NST_CLEN_FIXED) != 0;
+ }
+
+ public void setCLenFixed() {
+ state |= NST_CLEN_FIXED;
+ }
+
+ public void clearCLenFixed() {
+ state &= ~NST_CLEN_FIXED;
+ }
+
+ public boolean isMark1() {
+ return (state & NST_MARK1) != 0;
+ }
+
+ public void setMark1() {
+ state |= NST_MARK1;
+ }
+
+ public void clearMark1() {
+ state &= ~NST_MARK1;
+ }
+
+ public boolean isMark2() {
+ return (state & NST_MARK2) != 0;
+ }
+
+ public void setMark2() {
+ state |= NST_MARK2;
+ }
+
+ public void clearMark2() {
+ state &= ~NST_MARK2;
+ }
+
+ public boolean isMemBackrefed() {
+ return (state & NST_MEM_BACKREFED) != 0;
+ }
+
+ public void setMemBackrefed() {
+ state |= NST_MEM_BACKREFED;
+ }
+
+ public void clearMemBackrefed() {
+ state &= ~NST_MEM_BACKREFED;
+ }
+
+ public boolean isStopBtSimpleRepeat() {
+ return (state & NST_STOP_BT_SIMPLE_REPEAT) != 0;
+ }
+
+ public void setStopBtSimpleRepeat() {
+ state |= NST_STOP_BT_SIMPLE_REPEAT;
+ }
+
+ public void clearStopBtSimpleRepeat() {
+ state &= ~NST_STOP_BT_SIMPLE_REPEAT;
+ }
+
+ public boolean isRecursion() {
+ return (state & NST_RECURSION) != 0;
+ }
+
+ public void setRecursion() {
+ state |= NST_RECURSION;
+ }
+
+ public void clearRecursion() {
+ state &= ~NST_RECURSION;
+ }
+
+ public boolean isCalled() {
+ return (state & NST_CALLED) != 0;
+ }
+
+ public void setCalled() {
+ state |= NST_CALLED;
+ }
+
+ public void clearCAlled() {
+ state &= ~NST_CALLED;
+ }
+
+ public boolean isAddrFixed() {
+ return (state & NST_ADDR_FIXED) != 0;
+ }
+
+ public void setAddrFixed() {
+ state |= NST_ADDR_FIXED;
+ }
+
+ public void clearAddrFixed() {
+ state &= ~NST_ADDR_FIXED;
+ }
+
+ public boolean isNamedGroup() {
+ return (state & NST_NAMED_GROUP) != 0;
+ }
+
+ public void setNamedGroup() {
+ state |= NST_NAMED_GROUP;
+ }
+
+ public void clearNamedGroup() {
+ state &= ~NST_NAMED_GROUP;
+ }
+
+ public boolean isNameRef() {
+ return (state & NST_NAME_REF) != 0;
+ }
+
+ public void setNameRef() {
+ state |= NST_NAME_REF;
+ }
+
+ public void clearNameRef() {
+ state &= ~NST_NAME_REF;
+ }
+
+ public boolean isInRepeat() {
+ return (state & NST_IN_REPEAT) != 0;
+ }
+
+ public void setInRepeat() {
+ state |= NST_IN_REPEAT;
+ }
+
+ public void clearInRepeat() {
+ state &= ~NST_IN_REPEAT;
+ }
+
+ public boolean isNestLevel() {
+ return (state & NST_NEST_LEVEL) != 0;
+ }
+
+ public void setNestLevel() {
+ state |= NST_NEST_LEVEL;
+ }
+
+ public void clearNestLevel() {
+ state &= ~NST_NEST_LEVEL;
+ }
+
+ public boolean isByNumber() {
+ return (state & NST_BY_NUMBER) != 0;
+ }
+
+ public void setByNumber() {
+ state |= NST_BY_NUMBER;
+ }
+
+ public void clearByNumber() {
+ state &= ~NST_BY_NUMBER;
+ }
+
+}
diff --git a/src/org/joni/ast/StringNode.java b/src/org/joni/ast/StringNode.java
new file mode 100644
index 0000000..bf8217e
--- /dev/null
+++ b/src/org/joni/ast/StringNode.java
@@ -0,0 +1,209 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import org.jcodings.Encoding;
+import org.joni.constants.StringType;
+
+public final class StringNode extends Node implements StringType {
+
+ private static final int NODE_STR_MARGIN = 16;
+ private static final int NODE_STR_BUF_SIZE = 24;
+
+ public byte[]bytes;
+ public int p;
+ public int end;
+
+ int flag;
+
+ public StringNode() {
+ this.bytes = new byte[NODE_STR_BUF_SIZE];
+ }
+
+ public StringNode(byte[]bytes, int p, int end) {
+ this.bytes = bytes;
+ this.p = p;
+ this.end = end;
+ setShared();
+ }
+
+ public StringNode(byte c) {
+ this();
+ bytes[end++] = c;
+ }
+
+ /* Ensure there is ahead bytes available in node's buffer
+ * (assumes that the node is not shared)
+ */
+ public void ensure(int ahead) {
+ int len = (end - p) + ahead;
+ if (len >= bytes.length) {
+ byte[]tmp = new byte[len + NODE_STR_MARGIN];
+ System.arraycopy(bytes, p, tmp, 0, end - p);
+ bytes = tmp;
+ }
+ }
+
+ /* COW and/or ensure there is ahead bytes available in node's buffer
+ */
+ private void modifyEnsure(int ahead) {
+ int len = (end - p) + ahead;
+ if (isShared()) {
+ byte[]tmp = new byte[len + NODE_STR_MARGIN];
+ System.arraycopy(bytes, p, tmp, 0, end - p);
+ bytes = tmp;
+ end = end - p;
+ p = 0;
+ clearShared();
+ } else {
+ if (len >= bytes.length) {
+ byte[]tmp = new byte[len + NODE_STR_MARGIN];
+ System.arraycopy(bytes, p, tmp, 0, end - p);
+ bytes = tmp;
+ }
+ }
+ }
+
+ @Override
+ public int getType() {
+ return STR;
+ }
+
+ @Override
+ public String getName() {
+ return "String";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder();
+ value.append("\n bytes: ");
+ for (int i=p; i<end; i++) {
+ if ((bytes[i] & 0xff) >= 0x20 && (bytes[i] & 0xff) < 0x7f) {
+ value.append((char)bytes[i]);
+ } else {
+ value.append(String.format(" 0x%02x", bytes[i]));
+ }
+ }
+ return value.toString();
+ }
+
+ public int length() {
+ return end - p;
+ }
+
+ public int length(Encoding enc) {
+ return enc.strLength(bytes, p, end);
+ }
+
+ public StringNode splitLastChar(Encoding enc) {
+ StringNode n = null;
+
+ if (end > p) {
+ int prev = enc.prevCharHead(bytes, p, end, end);
+ if (prev != -1 && prev > p) { /* can be splitted. */
+ n = new StringNode(bytes, prev, end);
+ if (isRaw()) n.setRaw();
+ end = prev;
+ }
+ }
+ return n;
+ }
+
+ public boolean canBeSplit(Encoding enc) {
+ if (end > p) {
+ return enc.length(bytes, p, end) < (end - p);
+ }
+ return false;
+ }
+
+ public void set(byte[]bytes, int p, int end) {
+ this.bytes = bytes;
+ this.p = p;
+ this.end = end;
+ setShared();
+ }
+
+ public void cat(byte[]cat, int catP, int catEnd) {
+ int len = catEnd - catP;
+ modifyEnsure(len);
+ System.arraycopy(cat, catP, bytes, end, len);
+ end += len;
+ }
+
+ public void cat(byte c) {
+ modifyEnsure(1);
+ bytes[end++] = c;
+ }
+
+ public void clear() {
+ if (bytes.length > NODE_STR_BUF_SIZE) bytes = new byte[NODE_STR_BUF_SIZE];
+ flag = 0;
+ p = end = 0;
+ }
+
+ public void setRaw() {
+ flag |= NSTR_RAW;
+ }
+
+ public void clearRaw() {
+ flag &= ~NSTR_RAW;
+ }
+
+ public boolean isRaw() {
+ return (flag & NSTR_RAW) != 0;
+ }
+
+ public void setAmbig() {
+ flag |= NSTR_AMBIG;
+ }
+
+ public void clearAmbig() {
+ flag &= ~NSTR_AMBIG;
+ }
+
+ public boolean isAmbig() {
+ return (flag & NSTR_AMBIG) != 0;
+ }
+
+ public void setDontGetOptInfo() {
+ flag |= NSTR_DONT_GET_OPT_INFO;
+ }
+
+ public void clearDontGetOptInfo() {
+ flag &= ~NSTR_DONT_GET_OPT_INFO;
+ }
+
+ public boolean isDontGetOptInfo() {
+ return (flag & NSTR_DONT_GET_OPT_INFO) != 0;
+ }
+
+ public void setShared() {
+ flag |= NSTR_SHARED;
+ }
+
+ public void clearShared() {
+ flag &= ~NSTR_SHARED;
+ }
+
+ public boolean isShared() {
+ return (flag & NSTR_SHARED) != 0;
+ }
+}
diff --git a/src/org/joni/bench/AbstractBench.java b/src/org/joni/bench/AbstractBench.java
new file mode 100644
index 0000000..5e058f9
--- /dev/null
+++ b/src/org/joni/bench/AbstractBench.java
@@ -0,0 +1,50 @@
+package org.joni.bench;
+
+import org.jcodings.specific.ASCIIEncoding;
+import org.joni.Option;
+import org.joni.Regex;
+import org.joni.Syntax;
+
+public abstract class AbstractBench {
+ protected void bench(String _reg, String _str, int warmup, int times) throws Exception {
+ byte[] reg = _reg.getBytes();
+ byte[] str = _str.getBytes();
+
+ Regex p = new Regex(reg,0,reg.length,Option.DEFAULT,ASCIIEncoding.INSTANCE,Syntax.DEFAULT);
+
+ System.err.println("::: /" + _reg + "/ =~ \"" + _str + "\", " + warmup + " * " + times + " times");
+
+ for(int j=0;j<warmup;j++) {
+ long before = System.currentTimeMillis();
+ for(int i = 0; i < times; i++) {
+ p.matcher(str, 0, str.length).search(0, str.length, Option.NONE);
+ }
+ long time = System.currentTimeMillis() - before;
+ System.err.println(": " + time + "ms");
+ }
+ }
+
+ protected void benchBestOf(String _reg, String _str, int warmup, int times) throws Exception {
+ byte[] reg = _reg.getBytes();
+ byte[] str = _str.getBytes();
+
+ Regex p = new Regex(reg,0,reg.length,Option.DEFAULT,ASCIIEncoding.INSTANCE,Syntax.DEFAULT);
+
+ System.err.println("::: /" + _reg + "/ =~ \"" + _str + "\", " + warmup + " * " + times + " times");
+
+ long best = Long.MAX_VALUE;
+
+ for(int j=0;j<warmup;j++) {
+ long before = System.currentTimeMillis();
+ for(int i = 0; i < times; i++) {
+ p.matcher(str, 0, str.length).search(0, str.length, Option.NONE);
+ }
+ long time = System.currentTimeMillis() - before;
+ if(time < best) {
+ best = time;
+ }
+ System.err.print(".");
+ }
+ System.err.println(": " + best + "ms");
+ }
+}
diff --git a/src/org/joni/bench/BenchGreedyBacktrack.java b/src/org/joni/bench/BenchGreedyBacktrack.java
new file mode 100644
index 0000000..dcda986
--- /dev/null
+++ b/src/org/joni/bench/BenchGreedyBacktrack.java
@@ -0,0 +1,7 @@
+package org.joni.bench;
+
+public class BenchGreedyBacktrack extends AbstractBench {
+ public static void main(String[] args) throws Exception {
+ new BenchGreedyBacktrack().bench(".*_p","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,1000000);
+ }
+}
diff --git a/src/org/joni/bench/BenchRailsRegs.java b/src/org/joni/bench/BenchRailsRegs.java
new file mode 100644
index 0000000..c11d208
--- /dev/null
+++ b/src/org/joni/bench/BenchRailsRegs.java
@@ -0,0 +1,31 @@
+package org.joni.bench;
+
+public class BenchRailsRegs extends AbstractBench {
+ public static void main(String[] args) throws Exception {
+ final String[][] regexps = {{"a.*?[b-z]{2,4}aaaaaa","afdgdsgderaabxxaaaaaaaaaaaaaaaaaaaaaaaa"},
+ {"://","/shop/viewCategory.shtml?category=DOGS"},
+ {"^\\w+\\://[^/]+(/.*|$)$","/shop/viewCategory.shtml?category=DOGS"},
+ {"\\A/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/signonForm\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/newAccountForm\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/newAccount\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/viewCart\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/index\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/viewCategory\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A(?:::)?([A-Z]\\w*(?:::[A-Z]\\w*)*)\\z","CategoriesController"},
+ {"\\Ainsert","SELECT * FROM sessions WHERE (session_id = '1b341ffe23b5298676d535fcabd3d0d7') LIMIT 1"},
+ {"\\A\\(?\\s*(select|show)","SELECT * FROM sessions WHERE (session_id = '1b341ffe23b5298676d535fcabd3d0d7') LIMIT 1"},
+ {".*?\n","1b341ffe23b5298676d535fcabd3d0d7"},
+ {"^find_(all_by|by)_([_a-zA-Z]\\w*)$","find_by_string_id"},
+ {"\\.rjs$","categories/show.rhtml"},
+ {"^[-a-z]+://","petstore.css"},
+ {"^get$",""},
+ {"^post$",""},
+ {"^[^:]+","www.example.com"},
+ {"(=|\\?|_before_type_cast)$", "updated_on"},
+ {"^(.*?)=(.*?);","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/"}};
+ for(String[] reg : regexps) {
+ new BenchRailsRegs().benchBestOf(reg[0],reg[1],10,1000000);
+ }
+ }
+}
diff --git a/src/org/joni/bench/BenchSeveralRegexps.java b/src/org/joni/bench/BenchSeveralRegexps.java
new file mode 100644
index 0000000..d393feb
--- /dev/null
+++ b/src/org/joni/bench/BenchSeveralRegexps.java
@@ -0,0 +1,17 @@
+package org.joni.bench;
+
+public class BenchSeveralRegexps extends AbstractBench {
+ public static void main(String[] args) throws Exception {
+ int BASE = 1000000;
+
+ new BenchSeveralRegexps().benchBestOf("a"," a",10,4*BASE);
+
+ new BenchSeveralRegexps().benchBestOf(".*?=","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,BASE);
+
+ new BenchSeveralRegexps().benchBestOf("^(.*?)=(.*?);","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,BASE);
+
+ new BenchSeveralRegexps().benchBestOf(".*_p","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,4*BASE);
+
+ new BenchSeveralRegexps().benchBestOf(".*=","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,4*BASE);
+ }
+}
diff --git a/src/org/joni/constants/AnchorType.java b/src/org/joni/constants/AnchorType.java
new file mode 100644
index 0000000..144dd1d
--- /dev/null
+++ b/src/org/joni/constants/AnchorType.java
@@ -0,0 +1,58 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface AnchorType {
+ final int BEGIN_BUF = (1<<0);
+ final int BEGIN_LINE = (1<<1);
+ final int BEGIN_POSITION = (1<<2);
+ final int END_BUF = (1<<3);
+ final int SEMI_END_BUF = (1<<4);
+ final int END_LINE = (1<<5);
+
+ final int WORD_BOUND = (1<<6);
+ final int NOT_WORD_BOUND = (1<<7);
+ final int WORD_BEGIN = (1<<8);
+ final int WORD_END = (1<<9);
+ final int PREC_READ = (1<<10);
+ final int PREC_READ_NOT = (1<<11);
+ final int LOOK_BEHIND = (1<<12);
+ final int LOOK_BEHIND_NOT = (1<<13);
+
+ final int ANYCHAR_STAR = (1<<14); /* ".*" optimize info */
+ final int ANYCHAR_STAR_ML = (1<<15); /* ".*" optimize info (multi-line) */
+
+ final int ANYCHAR_STAR_MASK = (ANYCHAR_STAR | ANYCHAR_STAR_ML);
+ final int END_BUF_MASK = (END_BUF | SEMI_END_BUF);
+
+ final int ALLOWED_IN_LB = ( LOOK_BEHIND |
+ BEGIN_LINE |
+ END_LINE |
+ BEGIN_BUF |
+ BEGIN_POSITION );
+
+ final int ALLOWED_IN_LB_NOT = ( LOOK_BEHIND |
+ LOOK_BEHIND_NOT |
+ BEGIN_LINE |
+ END_LINE |
+ BEGIN_BUF |
+ BEGIN_POSITION );
+
+}
diff --git a/src/org/joni/constants/Arguments.java b/src/org/joni/constants/Arguments.java
new file mode 100644
index 0000000..1aacfdd
--- /dev/null
+++ b/src/org/joni/constants/Arguments.java
@@ -0,0 +1,31 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface Arguments {
+ final int SPECIAL = -1;
+ final int NON = 0;
+ final int RELADDR = 1;
+ final int ABSADDR = 2;
+ final int LENGTH = 3;
+ final int MEMNUM = 4;
+ final int OPTION = 5;
+ final int STATE_CHECK = 6;
+}
diff --git a/src/org/joni/constants/AsmConstants.java b/src/org/joni/constants/AsmConstants.java
new file mode 100644
index 0000000..6329780
--- /dev/null
+++ b/src/org/joni/constants/AsmConstants.java
@@ -0,0 +1,49 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface AsmConstants {
+ final int THIS = 0;
+
+ // argument indexes
+ final int RANGE = 1;
+ final int SSTART = 2;
+ final int SPREV = 3;
+
+ // local var indexes
+ final int S = 4; // current index
+ final int BYTES = 5; // string
+ final int LAST_INDEX = BYTES + 1;
+
+ // frequently used field names (all ints)
+ final String STR = "str";
+ final String END = "end";
+ final String MSA_START = "msaStart";
+ final String MSA_OPTONS = "msaOptions";
+ final String MSA_BEST_LEN = "msaBestLen";
+ final String MSA_BEST_S = "msaBestS";
+ final String MSA_BEGIN = "msaBegin";
+ final String MSA_END = "msaEnd";
+
+ // generated field names
+ final String BITSET = "bitset";
+ final String CODERANGE = "range";
+ final String TEMPLATE = "template";
+}
diff --git a/src/org/joni/constants/CCSTATE.java b/src/org/joni/constants/CCSTATE.java
new file mode 100644
index 0000000..23baa87
--- /dev/null
+++ b/src/org/joni/constants/CCSTATE.java
@@ -0,0 +1,27 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public enum CCSTATE {
+ VALUE,
+ RANGE,
+ COMPLETE,
+ START
+}
diff --git a/src/org/joni/constants/CCVALTYPE.java b/src/org/joni/constants/CCVALTYPE.java
new file mode 100644
index 0000000..b531e30
--- /dev/null
+++ b/src/org/joni/constants/CCVALTYPE.java
@@ -0,0 +1,26 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public enum CCVALTYPE {
+ SB,
+ CODE_POINT,
+ CLASS
+}
diff --git a/src/org/joni/constants/EncloseType.java b/src/org/joni/constants/EncloseType.java
new file mode 100644
index 0000000..553b5dc
--- /dev/null
+++ b/src/org/joni/constants/EncloseType.java
@@ -0,0 +1,29 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface EncloseType {
+ final int MEMORY = 1<<0;
+ final int OPTION = 1<<1;
+ final int STOP_BACKTRACK = 1<<2;
+
+ final int ALLOWED_IN_LB = MEMORY;
+ final int ALLOWED_IN_LB_NOT = 0;
+}
diff --git a/src/org/joni/constants/MetaChar.java b/src/org/joni/constants/MetaChar.java
new file mode 100644
index 0000000..3589aff
--- /dev/null
+++ b/src/org/joni/constants/MetaChar.java
@@ -0,0 +1,31 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface MetaChar {
+ final int ESCAPE = 0;
+ final int ANYCHAR = 1;
+ final int ANYTIME = 2;
+ final int ZERO_OR_ONE_TIME = 3;
+ final int ONE_OR_MORE_TIME = 4;
+ final int ANYCHAR_ANYTIME = 5;
+
+ final int INEFFECTIVE_META_CHAR = 0;
+}
diff --git a/src/org/joni/constants/NodeStatus.java b/src/org/joni/constants/NodeStatus.java
new file mode 100644
index 0000000..901d47d
--- /dev/null
+++ b/src/org/joni/constants/NodeStatus.java
@@ -0,0 +1,39 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface NodeStatus {
+ /* status bits */
+ final int NST_MIN_FIXED = (1<<0);
+ final int NST_MAX_FIXED = (1<<1);
+ final int NST_CLEN_FIXED = (1<<2);
+ final int NST_MARK1 = (1<<3);
+ final int NST_MARK2 = (1<<4);
+ final int NST_MEM_BACKREFED = (1<<5);
+ final int NST_STOP_BT_SIMPLE_REPEAT= (1<<6);
+ final int NST_RECURSION = (1<<7);
+ final int NST_CALLED = (1<<8);
+ final int NST_ADDR_FIXED = (1<<9);
+ final int NST_NAMED_GROUP = (1<<10);
+ final int NST_NAME_REF = (1<<11);
+ final int NST_IN_REPEAT = (1<<12); /* STK_REPEAT is nested in stack. */
+ final int NST_NEST_LEVEL = (1<<13);
+ final int NST_BY_NUMBER = (1<<14); /* {n,m} */
+}
diff --git a/src/org/joni/constants/NodeType.java b/src/org/joni/constants/NodeType.java
new file mode 100644
index 0000000..dccece2
--- /dev/null
+++ b/src/org/joni/constants/NodeType.java
@@ -0,0 +1,66 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface NodeType {
+ /* node type */
+ final int STR = 0;
+ final int CCLASS = 1;
+ final int CTYPE = 2;
+ final int CANY = 3;
+ final int BREF = 4;
+ final int QTFR = 5;
+ final int ENCLOSE = 6;
+ final int ANCHOR = 7;
+ final int LIST = 8;
+ final int ALT = 9;
+ final int CALL = 10;
+
+ final int BIT_STR = 1 << STR;
+ final int BIT_CCLASS = 1 << CCLASS;
+ final int BIT_CTYPE = 1 << CTYPE;
+ final int BIT_CANY = 1 << CANY;
+ final int BIT_BREF = 1 << BREF;
+ final int BIT_QTFR = 1 << QTFR;
+ final int BIT_ENCLOSE = 1 << ENCLOSE;
+ final int BIT_ANCHOR = 1 << ANCHOR;
+ final int BIT_LIST = 1 << LIST;
+ final int BIT_ALT = 1 << ALT;
+ final int BIT_CALL = 1 << CALL;
+
+ /* allowed node types in look-behind */
+ final int ALLOWED_IN_LB = ( BIT_LIST |
+ BIT_ALT |
+ BIT_STR |
+ BIT_CCLASS |
+ BIT_CTYPE |
+ BIT_CANY |
+ BIT_ANCHOR |
+ BIT_ENCLOSE |
+ BIT_QTFR |
+ BIT_CALL );
+
+ final int SIMPLE = ( BIT_STR |
+ BIT_CCLASS |
+ BIT_CTYPE |
+ BIT_CANY |
+ BIT_BREF);
+
+}
diff --git a/src/org/joni/constants/OPCode.java b/src/org/joni/constants/OPCode.java
new file mode 100644
index 0000000..8e06f88
--- /dev/null
+++ b/src/org/joni/constants/OPCode.java
@@ -0,0 +1,387 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+import org.joni.Config;
+
+public interface OPCode {
+ final int FINISH = 0; /* matching process terminator (no more alternative) */
+ final int END = 1; /* pattern code terminator (success end) */
+
+ final int EXACT1 = 2; /* single byte, N = 1 */
+ final int EXACT2 = 3; /* single byte, N = 2 */
+ final int EXACT3 = 4; /* single byte, N = 3 */
+ final int EXACT4 = 5; /* single byte, N = 4 */
+ final int EXACT5 = 6; /* single byte, N = 5 */
+ final int EXACTN = 7; /* single byte */
+ final int EXACTMB2N1 = 8; /* mb-length = 2 N = 1 */
+ final int EXACTMB2N2 = 9; /* mb-length = 2 N = 2 */
+ final int EXACTMB2N3 = 10; /* mb-length = 2 N = 3 */
+ final int EXACTMB2N = 11; /* mb-length = 2 */
+ final int EXACTMB3N = 12; /* mb-length = 3 */
+ final int EXACTMBN = 13; /* other length */
+
+ final int EXACT1_IC = 14; /* single byte, N = 1, ignore case */
+ final int EXACTN_IC = 15; /* single byte, ignore case */
+
+ final int CCLASS = 16;
+ final int CCLASS_MB = 17;
+ final int CCLASS_MIX = 18;
+ final int CCLASS_NOT = 19;
+ final int CCLASS_MB_NOT = 20;
+ final int CCLASS_MIX_NOT = 21;
+ final int CCLASS_NODE = 22; /* pointer to CClassNode node */
+
+ final int ANYCHAR = 23; /* "." */
+ final int ANYCHAR_ML = 24; /* "." multi-line */
+ final int ANYCHAR_STAR = 25; /* ".*" */
+ final int ANYCHAR_ML_STAR = 26; /* ".*" multi-line */
+ final int ANYCHAR_STAR_PEEK_NEXT = 27;
+ final int ANYCHAR_ML_STAR_PEEK_NEXT = 28;
+
+ final int WORD = 29;
+ final int NOT_WORD = 30;
+ final int WORD_BOUND = 31;
+ final int NOT_WORD_BOUND = 32;
+ final int WORD_BEGIN = 33;
+ final int WORD_END = 34;
+
+ final int BEGIN_BUF = 35;
+ final int END_BUF = 36;
+ final int BEGIN_LINE = 37;
+ final int END_LINE = 38;
+ final int SEMI_END_BUF = 39;
+ final int BEGIN_POSITION = 40;
+
+ final int BACKREF1 = 41;
+ final int BACKREF2 = 42;
+ final int BACKREFN = 43;
+ final int BACKREFN_IC = 44;
+ final int BACKREF_MULTI = 45;
+ final int BACKREF_MULTI_IC = 46;
+ final int BACKREF_WITH_LEVEL = 47; /* \k<xxx+n>, \k<xxx-n> */
+
+ final int MEMORY_START = 48;
+ final int MEMORY_START_PUSH = 49; /* push back-tracker to stack */
+ final int MEMORY_END_PUSH = 50; /* push back-tracker to stack */
+ final int MEMORY_END_PUSH_REC = 51; /* push back-tracker to stack */
+ final int MEMORY_END = 52;
+ final int MEMORY_END_REC = 53; /* push marker to stack */
+
+ final int FAIL = 54; /* pop stack and move */
+ final int JUMP = 55;
+ final int PUSH = 56;
+ final int POP = 57;
+ final int PUSH_OR_JUMP_EXACT1 = 58; /* if match exact then push, else jump. */
+ final int PUSH_IF_PEEK_NEXT = 59; /* if match exact then push, else none. */
+
+ final int REPEAT = 60; /* {n,m} */
+ final int REPEAT_NG = 61; /* {n,m}? (non greedy) */
+ final int REPEAT_INC = 62;
+ final int REPEAT_INC_NG = 63; /* non greedy */
+ final int REPEAT_INC_SG = 64; /* search and get in stack */
+ final int REPEAT_INC_NG_SG = 65; /* search and get in stack (non greedy) */
+
+ final int NULL_CHECK_START = 66; /* null loop checker start */
+ final int NULL_CHECK_END = 67; /* null loop checker end */
+ final int NULL_CHECK_END_MEMST = 68; /* null loop checker end (with capture status) */
+ final int NULL_CHECK_END_MEMST_PUSH = 69; /* with capture status and push check-end */
+
+ final int PUSH_POS = 70; /* (?=...) start */
+ final int POP_POS = 71; /* (?=...) end */
+ final int PUSH_POS_NOT = 72; /* (?!...) start */
+ final int FAIL_POS = 73; /* (?!...) end */
+ final int PUSH_STOP_BT = 74; /* (?>...) start */
+ final int POP_STOP_BT = 75; /* (?>...) end */
+ final int LOOK_BEHIND = 76; /* (?<=...) start (no needs end opcode) */
+ final int PUSH_LOOK_BEHIND_NOT = 77; /* (?<!...) start */
+ final int FAIL_LOOK_BEHIND_NOT = 78; /* (?<!...) end */
+
+ final int CALL = 79; /* \g<name> */
+ final int RETURN = 80;
+
+ final int STATE_CHECK_PUSH = 81; /* combination explosion check and push */
+ final int STATE_CHECK_PUSH_OR_JUMP = 82; /* check ok -> push, else jump */
+ final int STATE_CHECK = 83; /* check only */
+ final int STATE_CHECK_ANYCHAR_STAR = 84;
+ final int STATE_CHECK_ANYCHAR_ML_STAR = 85;
+
+ /* no need: IS_DYNAMIC_OPTION() == 0 */
+ final int SET_OPTION_PUSH = 86; /* set option and push recover option */
+ final int SET_OPTION = 87; /* set option */
+
+ // single byte versions
+ final int ANYCHAR_SB = 88; /* "." */
+ final int ANYCHAR_ML_SB = 89; /* "." multi-line */
+ final int ANYCHAR_STAR_SB = 90; /* ".*" */
+ final int ANYCHAR_ML_STAR_SB = 91; /* ".*" multi-line */
+ final int ANYCHAR_STAR_PEEK_NEXT_SB = 92;
+ final int ANYCHAR_ML_STAR_PEEK_NEXT_SB = 93;
+ final int STATE_CHECK_ANYCHAR_STAR_SB = 94;
+ final int STATE_CHECK_ANYCHAR_ML_STAR_SB= 95;
+
+ final int CCLASS_SB = 96;
+ final int CCLASS_NOT_SB = 97;
+ final int WORD_SB = 98;
+ final int NOT_WORD_SB = 99;
+ final int WORD_BOUND_SB = 100;
+ final int NOT_WORD_BOUND_SB = 101;
+ final int WORD_BEGIN_SB = 102;
+ final int WORD_END_SB = 103;
+
+ final int LOOK_BEHIND_SB = 104;
+
+ final int EXACT1_IC_SB = 105; /* single byte, N = 1, ignore case */
+ final int EXACTN_IC_SB = 106; /* single byte, ignore case */
+
+
+ public final String OpCodeNames[] = Config.DEBUG_COMPILE ? new String[] {
+ "finish", /*OP_FINISH*/
+ "end", /*OP_END*/
+ "exact1", /*OP_EXACT1*/
+ "exact2", /*OP_EXACT2*/
+ "exact3", /*OP_EXACT3*/
+ "exact4", /*OP_EXACT4*/
+ "exact5", /*OP_EXACT5*/
+ "exactn", /*OP_EXACTN*/
+ "exactmb2-n1", /*OP_EXACTMB2N1*/
+ "exactmb2-n2", /*OP_EXACTMB2N2*/
+ "exactmb2-n3", /*OP_EXACTMB2N3*/
+ "exactmb2-n", /*OP_EXACTMB2N*/
+ "exactmb3n", /*OP_EXACTMB3N*/
+ "exactmbn", /*OP_EXACTMBN*/
+ "exact1-ic", /*OP_EXACT1_IC*/
+ "exactn-ic", /*OP_EXACTN_IC*/
+ "cclass", /*OP_CCLASS*/
+ "cclass-mb", /*OP_CCLASS_MB*/
+ "cclass-mix", /*OP_CCLASS_MIX*/
+ "cclass-not", /*OP_CCLASS_NOT*/
+ "cclass-mb-not", /*OP_CCLASS_MB_NOT*/
+ "cclass-mix-not", /*OP_CCLASS_MIX_NOT*/
+ "cclass-node", /*OP_CCLASS_NODE*/
+ "anychar", /*OP_ANYCHAR*/
+ "anychar-ml", /*OP_ANYCHAR_ML*/
+ "anychar*", /*OP_ANYCHAR_STAR*/
+ "anychar-ml*", /*OP_ANYCHAR_ML_STAR*/
+ "anychar*-peek-next", /*OP_ANYCHAR_STAR_PEEK_NEXT*/
+ "anychar-ml*-peek-next", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
+ "word", /*OP_WORD*/
+ "not-word", /*OP_NOT_WORD*/
+ "word-bound", /*OP_WORD_BOUND*/
+ "not-word-bound", /*OP_NOT_WORD_BOUND*/
+ "word-begin", /*OP_WORD_BEGIN*/
+ "word-end", /*OP_WORD_END*/
+ "begin-buf", /*OP_BEGIN_BUF*/
+ "end-buf", /*OP_END_BUF*/
+ "begin-line", /*OP_BEGIN_LINE*/
+ "end-line", /*OP_END_LINE*/
+ "semi-end-buf", /*OP_SEMI_END_BUF*/
+ "begin-position", /*OP_BEGIN_POSITION*/
+ "backref1", /*OP_BACKREF1*/
+ "backref2", /*OP_BACKREF2*/
+ "backrefn", /*OP_BACKREFN*/
+ "backrefn-ic", /*OP_BACKREFN_IC*/
+ "backref_multi", /*OP_BACKREF_MULTI*/
+ "backref_multi-ic", /*OP_BACKREF_MULTI_IC*/
+ "backref_at_level", /*OP_BACKREF_AT_LEVEL*/
+ "mem-start", /*OP_MEMORY_START*/
+ "mem-start-push", /*OP_MEMORY_START_PUSH*/
+ "mem-end-push", /*OP_MEMORY_END_PUSH*/
+ "mem-end-push-rec", /*OP_MEMORY_END_PUSH_REC*/
+ "mem-end", /*OP_MEMORY_END*/
+ "mem-end-rec", /*OP_MEMORY_END_REC*/
+ "fail", /*OP_FAIL*/
+ "jump", /*OP_JUMP*/
+ "push", /*OP_PUSH*/
+ "pop", /*OP_POP*/
+ "push-or-jump-e1", /*OP_PUSH_OR_JUMP_EXACT1*/
+ "push-if-peek-next", /*OP_PUSH_IF_PEEK_NEXT*/
+ "repeat", /*OP_REPEAT*/
+ "repeat-ng", /*OP_REPEAT_NG*/
+ "repeat-inc", /*OP_REPEAT_INC*/
+ "repeat-inc-ng", /*OP_REPEAT_INC_NG*/
+ "repeat-inc-sg", /*OP_REPEAT_INC_SG*/
+ "repeat-inc-ng-sg", /*OP_REPEAT_INC_NG_SG*/
+ "null-check-start", /*OP_NULL_CHECK_START*/
+ "null-check-end", /*OP_NULL_CHECK_END*/
+ "null-check-end-memst", /*OP_NULL_CHECK_END_MEMST*/
+ "null-check-end-memst-push", /*OP_NULL_CHECK_END_MEMST_PUSH*/
+ "push-pos", /*OP_PUSH_POS*/
+ "pop-pos", /*OP_POP_POS*/
+ "push-pos-not", /*OP_PUSH_POS_NOT*/
+ "fail-pos", /*OP_FAIL_POS*/
+ "push-stop-bt", /*OP_PUSH_STOP_BT*/
+ "pop-stop-bt", /*OP_POP_STOP_BT*/
+ "look-behind", /*OP_LOOK_BEHIND*/
+ "push-look-behind-not", /*OP_PUSH_LOOK_BEHIND_NOT*/
+ "fail-look-behind-not", /*OP_FAIL_LOOK_BEHIND_NOT*/
+ "call", /*OP_CALL*/
+ "return", /*OP_RETURN*/
+ "state-check-push", /*OP_STATE_CHECK_PUSH*/
+ "state-check-push-or-jump", /*OP_STATE_CHECK_PUSH_OR_JUMP*/
+ "state-check", /*OP_STATE_CHECK*/
+ "state-check-anychar*", /*OP_STATE_CHECK_ANYCHAR_STAR*/
+ "state-check-anychar-ml*", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
+ "set-option-push", /*OP_SET_OPTION_PUSH*/
+ "set-option", /*OP_SET_OPTION*/
+
+ // single byte versions
+ "anychar-sb", /*OP_ANYCHAR*/
+ "anychar-ml-sb", /*OP_ANYCHAR_ML*/
+ "anychar*-sb", /*OP_ANYCHAR_STAR*/
+ "anychar-ml*-sb", /*OP_ANYCHAR_ML_STAR*/
+ "anychar*-peek-next-sb", /*OP_ANYCHAR_STAR_PEEK_NEXT*/
+ "anychar-ml*-peek-next-sb", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
+ "state-check-anychar*-sb", /*OP_STATE_CHECK_ANYCHAR_STAR*/
+ "state-check-anychar-ml*-sb", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
+
+ "cclass-sb", /*OP_CCLASS*/
+ "cclass-not-sb", /*OP_CCLASS_NOT*/
+
+ "word-sb", /*OP_WORD*/
+ "not-word-sb", /*OP_NOT_WORD*/
+ "word-bound-sb", /*OP_WORD_BOUND*/
+ "not-word-bound-sb", /*OP_NOT_WORD_BOUND*/
+ "word-begin-sb", /*OP_WORD_BEGIN*/
+ "word-end-sb", /*OP_WORD_END*/
+
+ "look-behind-sb", /*OP_LOOK_BEHIND*/
+
+ "exact1-ic-sb", /*OP_EXACT1_IC*/
+ "exactn-ic-sb", /*OP_EXACTN_IC*/
+
+ } : null;
+
+ public final int OpCodeArgTypes[] = Config.DEBUG_COMPILE ? new int[] {
+ Arguments.NON, /*OP_FINISH*/
+ Arguments.NON, /*OP_END*/
+ Arguments.SPECIAL, /*OP_EXACT1*/
+ Arguments.SPECIAL, /*OP_EXACT2*/
+ Arguments.SPECIAL, /*OP_EXACT3*/
+ Arguments.SPECIAL, /*OP_EXACT4*/
+ Arguments.SPECIAL, /*OP_EXACT5*/
+ Arguments.SPECIAL, /*OP_EXACTN*/
+ Arguments.SPECIAL, /*OP_EXACTMB2N1*/
+ Arguments.SPECIAL, /*OP_EXACTMB2N2*/
+ Arguments.SPECIAL, /*OP_EXACTMB2N3*/
+ Arguments.SPECIAL, /*OP_EXACTMB2N*/
+ Arguments.SPECIAL, /*OP_EXACTMB3N*/
+ Arguments.SPECIAL, /*OP_EXACTMBN*/
+ Arguments.SPECIAL, /*OP_EXACT1_IC*/
+ Arguments.SPECIAL, /*OP_EXACTN_IC*/
+ Arguments.SPECIAL, /*OP_CCLASS*/
+ Arguments.SPECIAL, /*OP_CCLASS_MB*/
+ Arguments.SPECIAL, /*OP_CCLASS_MIX*/
+ Arguments.SPECIAL, /*OP_CCLASS_NOT*/
+ Arguments.SPECIAL, /*OP_CCLASS_MB_NOT*/
+ Arguments.SPECIAL, /*OP_CCLASS_MIX_NOT*/
+ Arguments.SPECIAL, /*OP_CCLASS_NODE*/
+ Arguments.NON, /*OP_ANYCHAR*/
+ Arguments.NON, /*OP_ANYCHAR_ML*/
+ Arguments.NON, /*OP_ANYCHAR_STAR*/
+ Arguments.NON, /*OP_ANYCHAR_ML_STAR*/
+ Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/
+ Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
+ Arguments.NON, /*OP_WORD*/
+ Arguments.NON, /*OP_NOT_WORD*/
+ Arguments.NON, /*OP_WORD_BOUND*/
+ Arguments.NON, /*OP_NOT_WORD_BOUND*/
+ Arguments.NON, /*OP_WORD_BEGIN*/
+ Arguments.NON, /*OP_WORD_END*/
+ Arguments.NON, /*OP_BEGIN_BUF*/
+ Arguments.NON, /*OP_END_BUF*/
+ Arguments.NON, /*OP_BEGIN_LINE*/
+ Arguments.NON, /*OP_END_LINE*/
+ Arguments.NON, /*OP_SEMI_END_BUF*/
+ Arguments.NON, /*OP_BEGIN_POSITION*/
+ Arguments.NON, /*OP_BACKREF1*/
+ Arguments.NON, /*OP_BACKREF2*/
+ Arguments.MEMNUM, /*OP_BACKREFN*/
+ Arguments.SPECIAL, /*OP_BACKREFN_IC*/
+ Arguments.SPECIAL, /*OP_BACKREF_MULTI*/
+ Arguments.SPECIAL, /*OP_BACKREF_MULTI_IC*/
+ Arguments.SPECIAL, /*OP_BACKREF_AT_LEVEL*/
+ Arguments.MEMNUM, /*OP_MEMORY_START*/
+ Arguments.MEMNUM, /*OP_MEMORY_START_PUSH*/
+ Arguments.MEMNUM, /*OP_MEMORY_END_PUSH*/
+ Arguments.MEMNUM, /*OP_MEMORY_END_PUSH_REC*/
+ Arguments.MEMNUM, /*OP_MEMORY_END*/
+ Arguments.MEMNUM, /*OP_MEMORY_END_REC*/
+ Arguments.NON, /*OP_FAIL*/
+ Arguments.RELADDR, /*OP_JUMP*/
+ Arguments.RELADDR, /*OP_PUSH*/
+ Arguments.NON, /*OP_POP*/
+ Arguments.SPECIAL, /*OP_PUSH_OR_JUMP_EXACT1*/
+ Arguments.SPECIAL, /*OP_PUSH_IF_PEEK_NEXT*/
+ Arguments.SPECIAL, /*OP_REPEAT*/
+ Arguments.SPECIAL, /*OP_REPEAT_NG*/
+ Arguments.MEMNUM, /*OP_REPEAT_INC*/
+ Arguments.MEMNUM, /*OP_REPEAT_INC_NG*/
+ Arguments.MEMNUM, /*OP_REPEAT_INC_SG*/
+ Arguments.MEMNUM, /*OP_REPEAT_INC_NG_SG*/
+ Arguments.MEMNUM, /*OP_NULL_CHECK_START*/
+ Arguments.MEMNUM, /*OP_NULL_CHECK_END*/
+ Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST*/
+ Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST_PUSH*/
+ Arguments.NON, /*OP_PUSH_POS*/
+ Arguments.NON, /*OP_POP_POS*/
+ Arguments.RELADDR, /*OP_PUSH_POS_NOT*/
+ Arguments.NON, /*OP_FAIL_POS*/
+ Arguments.NON, /*OP_PUSH_STOP_BT*/
+ Arguments.NON, /*OP_POP_STOP_BT*/
+ Arguments.SPECIAL, /*OP_LOOK_BEHIND*/
+ Arguments.SPECIAL, /*OP_PUSH_LOOK_BEHIND_NOT*/
+ Arguments.NON, /*OP_FAIL_LOOK_BEHIND_NOT*/
+ Arguments.ABSADDR, /*OP_CALL*/
+ Arguments.NON, /*OP_RETURN*/
+ Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH*/
+ Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH_OR_JUMP*/
+ Arguments.STATE_CHECK, /*OP_STATE_CHECK*/
+ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/
+ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
+ Arguments.OPTION, /*OP_SET_OPTION_PUSH*/
+ Arguments.OPTION, /*OP_SET_OPTION*/
+
+ // single byte versions
+ Arguments.NON, /*OP_ANYCHAR*/
+ Arguments.NON, /*OP_ANYCHAR_ML*/
+ Arguments.NON, /*OP_ANYCHAR_STAR*/
+ Arguments.NON, /*OP_ANYCHAR_ML_STAR*/
+ Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/
+ Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
+ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/
+ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
+
+ Arguments.SPECIAL, /*OP_CCLASS*/
+ Arguments.SPECIAL, /*OP_CCLASS_NOT*/
+
+ Arguments.NON, /*OP_WORD*/
+ Arguments.NON, /*OP_NOT_WORD*/
+ Arguments.NON, /*OP_WORD_BOUND*/
+ Arguments.NON, /*OP_NOT_WORD_BOUND*/
+ Arguments.NON, /*OP_WORD_BEGIN*/
+ Arguments.NON, /*OP_WORD_END*/
+
+ Arguments.SPECIAL, /*OP_LOOK_BEHIND*/
+
+ Arguments.SPECIAL, /*OP_EXACT1_IC*/
+ Arguments.SPECIAL, /*OP_EXACTN_IC*/
+ } : null;
+}
diff --git a/src/org/joni/constants/OPSize.java b/src/org/joni/constants/OPSize.java
new file mode 100644
index 0000000..dcd419b
--- /dev/null
+++ b/src/org/joni/constants/OPSize.java
@@ -0,0 +1,75 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface OPSize {
+
+ // this might be helpful for potential byte[] migration
+ final int OPCODE = 1;
+ final int RELADDR = 1;
+ final int ABSADDR = 1;
+ final int LENGTH = 1;
+ final int MEMNUM = 1;
+ final int STATE_CHECK_NUM = 1;
+ final int REPEATNUM = 1;
+ final int OPTION = 1;
+ final int CODE_POINT = 1;
+ final int POINTER = 1;
+
+ /* op-code + arg size */
+
+ final int ANYCHAR_STAR = OPCODE;
+ final int ANYCHAR_STAR_PEEK_NEXT = (OPCODE + 1);
+ final int JUMP = (OPCODE + RELADDR);
+ final int PUSH = (OPCODE + RELADDR);
+ final int POP = OPCODE;
+ final int PUSH_OR_JUMP_EXACT1 = (OPCODE + RELADDR + 1);
+ final int PUSH_IF_PEEK_NEXT = (OPCODE + RELADDR + 1);
+ final int REPEAT_INC = (OPCODE + MEMNUM);
+ final int REPEAT_INC_NG = (OPCODE + MEMNUM);
+ final int PUSH_POS = OPCODE;
+ final int PUSH_POS_NOT = (OPCODE + RELADDR);
+ final int POP_POS = OPCODE;
+ final int FAIL_POS = OPCODE;
+ final int SET_OPTION = (OPCODE + OPTION);
+ final int SET_OPTION_PUSH = (OPCODE + OPTION);
+ final int FAIL = OPCODE;
+ final int MEMORY_START = (OPCODE + MEMNUM);
+ final int MEMORY_START_PUSH = (OPCODE + MEMNUM);
+ final int MEMORY_END_PUSH = (OPCODE + MEMNUM);
+ final int MEMORY_END_PUSH_REC = (OPCODE + MEMNUM);
+ final int MEMORY_END = (OPCODE + MEMNUM);
+ final int MEMORY_END_REC = (OPCODE + MEMNUM);
+ final int PUSH_STOP_BT = OPCODE;
+ final int POP_STOP_BT = OPCODE;
+ final int NULL_CHECK_START = (OPCODE + MEMNUM);
+ final int NULL_CHECK_END = (OPCODE + MEMNUM);
+ final int LOOK_BEHIND = (OPCODE + LENGTH);
+ final int PUSH_LOOK_BEHIND_NOT = (OPCODE + RELADDR + LENGTH);
+ final int FAIL_LOOK_BEHIND_NOT = OPCODE;
+ final int CALL = (OPCODE + ABSADDR);
+ final int RETURN = OPCODE;
+
+ // #ifdef USE_COMBINATION_EXPLOSION_CHECK
+ final int STATE_CHECK = (OPCODE + STATE_CHECK_NUM);
+ final int STATE_CHECK_PUSH = (OPCODE + STATE_CHECK_NUM + RELADDR);
+ final int STATE_CHECK_PUSH_OR_JUMP = (OPCODE + STATE_CHECK_NUM + RELADDR);
+ final int STATE_CHECK_ANYCHAR_STAR = (OPCODE + STATE_CHECK_NUM);
+}
diff --git a/src/org/joni/constants/Reduce.java b/src/org/joni/constants/Reduce.java
new file mode 100644
index 0000000..e62de7f
--- /dev/null
+++ b/src/org/joni/constants/Reduce.java
@@ -0,0 +1,60 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+import static org.joni.constants.Reduce.ReduceType.A;
+import static org.joni.constants.Reduce.ReduceType.AQ;
+import static org.joni.constants.Reduce.ReduceType.ASIS;
+import static org.joni.constants.Reduce.ReduceType.DEL;
+import static org.joni.constants.Reduce.ReduceType.PQ_Q;
+import static org.joni.constants.Reduce.ReduceType.P_QQ;
+import static org.joni.constants.Reduce.ReduceType.QQ;
+
+public interface Reduce {
+
+ enum ReduceType {
+ ASIS, /* as is */
+ DEL, /* delete parent */
+ A, /* to '*' */
+ AQ, /* to '*?' */
+ QQ, /* to '??' */
+ P_QQ, /* to '+)??' */
+ PQ_Q, /* to '+?)?' */
+ }
+
+ final ReduceType[][]REDUCE_TABLE = {
+ {DEL, A, A, QQ, AQ, ASIS}, /* '?' */
+ {DEL, DEL, DEL, P_QQ, P_QQ, DEL}, /* '*' */
+ {A, A, DEL, ASIS, P_QQ, DEL}, /* '+' */
+ {DEL, AQ, AQ, DEL, AQ, AQ}, /* '??' */
+ {DEL, DEL, DEL, DEL, DEL, DEL}, /* '*?' */
+ {ASIS, PQ_Q, DEL, AQ, AQ, DEL} /* '+?' */
+ };
+
+
+ final String PopularQStr[] = new String[] {
+ "?", "*", "+", "??", "*?", "+?"
+ };
+
+ String ReduceQStr[]= new String[] {
+ "", "", "*", "*?", "??", "+ and ??", "+? and ?"
+ };
+
+}
+
diff --git a/src/org/joni/constants/RegexState.java b/src/org/joni/constants/RegexState.java
new file mode 100644
index 0000000..acc6d84
--- /dev/null
+++ b/src/org/joni/constants/RegexState.java
@@ -0,0 +1,28 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+// we dont need this ATM
+public interface RegexState {
+ final int NORMAL = 0;
+ final int SEARCHING = 1;
+ final int COMPILING = -1;
+ final int MODIFY = -2;
+}
diff --git a/src/org/joni/constants/StackPopLevel.java b/src/org/joni/constants/StackPopLevel.java
new file mode 100644
index 0000000..f1f93bd
--- /dev/null
+++ b/src/org/joni/constants/StackPopLevel.java
@@ -0,0 +1,27 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface StackPopLevel {
+ final int FREE = 0;
+ final int MEM_START = 1;
+ final int ALL = 2;
+
+}
diff --git a/src/org/joni/constants/StackType.java b/src/org/joni/constants/StackType.java
new file mode 100644
index 0000000..34ea41f
--- /dev/null
+++ b/src/org/joni/constants/StackType.java
@@ -0,0 +1,51 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface StackType {
+ /** stack **/
+ final int INVALID_STACK_INDEX = -1;
+
+ /* stack type */
+ /* used by normal-POP */
+ final int ALT = 0x0001;
+ final int LOOK_BEHIND_NOT = 0x0002;
+ final int POS_NOT = 0x0003;
+ /* handled by normal-POP */
+ final int MEM_START = 0x0100;
+ final int MEM_END = 0x8200;
+ final int REPEAT_INC = 0x0300;
+ final int STATE_CHECK_MARK = 0x1000;
+ /* avoided by normal-POP */
+ final int NULL_CHECK_START = 0x3000;
+ final int NULL_CHECK_END = 0x5000; /* for recursive call */
+ final int MEM_END_MARK = 0x8400;
+ final int POS = 0x0500; /* used when POP-POS */
+ final int STOP_BT = 0x0600; /* mark for "(?>...)" */
+ final int REPEAT = 0x0700;
+ final int CALL_FRAME = 0x0800;
+ final int RETURN = 0x0900;
+ final int VOID = 0x0a00; /* for fill a blank */
+
+ /* stack type check mask */
+ final int MASK_POP_USED = 0x00ff;
+ final int MASK_TO_VOID_TARGET = 0x10ff;
+ final int MASK_MEM_END_OR_MARK = 0x8000; /* MEM_END or MEM_END_MARK */
+}
diff --git a/src/org/joni/constants/StringType.java b/src/org/joni/constants/StringType.java
new file mode 100644
index 0000000..46972e4
--- /dev/null
+++ b/src/org/joni/constants/StringType.java
@@ -0,0 +1,27 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface StringType {
+ final int NSTR_RAW = 1<<0;
+ final int NSTR_AMBIG = 1<<1;
+ final int NSTR_DONT_GET_OPT_INFO = 1<<2;
+ final int NSTR_SHARED = 1<<3;
+}
diff --git a/src/org/joni/constants/SyntaxProperties.java b/src/org/joni/constants/SyntaxProperties.java
new file mode 100644
index 0000000..3fd5b4c
--- /dev/null
+++ b/src/org/joni/constants/SyntaxProperties.java
@@ -0,0 +1,124 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface SyntaxProperties {
+ /* syntax (operators); */
+ final int OP_VARIABLE_META_CHARACTERS = (1<<0);
+ final int OP_DOT_ANYCHAR = (1<<1); /* . */
+ final int OP_ASTERISK_ZERO_INF = (1<<2); /* * */
+ final int OP_ESC_ASTERISK_ZERO_INF = (1<<3);
+ final int OP_PLUS_ONE_INF = (1<<4); /* + */
+ final int OP_ESC_PLUS_ONE_INF = (1<<5);
+ final int OP_QMARK_ZERO_ONE = (1<<6); /* ? */
+ final int OP_ESC_QMARK_ZERO_ONE = (1<<7);
+ final int OP_BRACE_INTERVAL = (1<<8); /* {lower,upper} */
+ final int OP_ESC_BRACE_INTERVAL = (1<<9); /* \{lower,upper\} */
+ final int OP_VBAR_ALT = (1<<10); /* | */
+ final int OP_ESC_VBAR_ALT = (1<<11); /* \| */
+ final int OP_LPAREN_SUBEXP = (1<<12); /* (...); */
+ final int OP_ESC_LPAREN_SUBEXP = (1<<13); /* \(...\); */
+ final int OP_ESC_AZ_BUF_ANCHOR = (1<<14); /* \A, \Z, \z */
+ final int OP_ESC_CAPITAL_G_BEGIN_ANCHOR = (1<<15); /* \G */
+ final int OP_DECIMAL_BACKREF = (1<<16); /* \num */
+ final int OP_BRACKET_CC = (1<<17); /* [...] */
+ final int OP_ESC_W_WORD = (1<<18); /* \w, \W */
+ final int OP_ESC_LTGT_WORD_BEGIN_END = (1<<19); /* \<. \> */
+ final int OP_ESC_B_WORD_BOUND = (1<<20); /* \b, \B */
+ final int OP_ESC_S_WHITE_SPACE = (1<<21); /* \s, \S */
+ final int OP_ESC_D_DIGIT = (1<<22); /* \d, \D */
+ final int OP_LINE_ANCHOR = (1<<23); /* ^, $ */
+ final int OP_POSIX_BRACKET = (1<<24); /* [:xxxx:] */
+ final int OP_QMARK_NON_GREEDY = (1<<25); /* ??,*?,+?,{n,m}? */
+ final int OP_ESC_CONTROL_CHARS = (1<<26); /* \n,\r,\t,\a ... */
+ final int OP_ESC_C_CONTROL = (1<<27); /* \cx */
+ final int OP_ESC_OCTAL3 = (1<<28); /* \OOO */
+ final int OP_ESC_X_HEX2 = (1<<29); /* \xHH */
+ final int OP_ESC_X_BRACE_HEX8 = (1<<30); /* \x{7HHHHHHH} */
+
+ final int OP2_ESC_CAPITAL_Q_QUOTE = (1<<0); /* \Q...\E */
+ final int OP2_QMARK_GROUP_EFFECT = (1<<1); /* (?...); */
+ final int OP2_OPTION_PERL = (1<<2); /* (?imsx);,(?-imsx); */
+ final int OP2_OPTION_RUBY = (1<<3); /* (?imx);, (?-imx); */
+ final int OP2_PLUS_POSSESSIVE_REPEAT = (1<<4); /* ?+,*+,++ */
+ final int OP2_PLUS_POSSESSIVE_INTERVAL = (1<<5); /* {n,m}+ */
+ final int OP2_CCLASS_SET_OP = (1<<6); /* [...&&..[..]..] */
+ final int OP2_QMARK_LT_NAMED_GROUP = (1<<7); /* (?<name>...); */
+ final int OP2_ESC_K_NAMED_BACKREF = (1<<8); /* \k<name> */
+ final int OP2_ESC_G_SUBEXP_CALL = (1<<9); /* \g<name>, \g<n> */
+ final int OP2_ATMARK_CAPTURE_HISTORY = (1<<10); /* (?@..);,(?@<x>..); */
+ final int OP2_ESC_CAPITAL_C_BAR_CONTROL = (1<<11); /* \C-x */
+ final int OP2_ESC_CAPITAL_M_BAR_META = (1<<12); /* \M-x */
+ final int OP2_ESC_V_VTAB = (1<<13); /* \v as VTAB */
+ final int OP2_ESC_U_HEX4 = (1<<14); /* \\uHHHH */
+ final int OP2_ESC_GNU_BUF_ANCHOR = (1<<15); /* \`, \' */
+ final int OP2_ESC_P_BRACE_CHAR_PROPERTY = (1<<16); /* \p{...}, \P{...} */
+ final int OP2_ESC_P_BRACE_CIRCUMFLEX_NOT = (1<<17); /* \p{^..}, \P{^..} */
+ /* final int OP2_CHAR_PROPERTY_PREFIX_IS = (1<<18); */
+ final int OP2_ESC_H_XDIGIT = (1<<19); /* \h, \H */
+ final int OP2_INEFFECTIVE_ESCAPE = (1<<20); /* \ */
+
+ /* syntax (behavior); */
+ final int CONTEXT_INDEP_ANCHORS = (1<<31); /* not implemented */
+ final int CONTEXT_INDEP_REPEAT_OPS = (1<<0); /* ?, *, +, {n,m} */
+ final int CONTEXT_INVALID_REPEAT_OPS = (1<<1); /* error or ignore */
+ final int ALLOW_UNMATCHED_CLOSE_SUBEXP = (1<<2); /* ...);... */
+ final int ALLOW_INVALID_INTERVAL = (1<<3); /* {??? */
+ final int ALLOW_INTERVAL_LOW_ABBREV = (1<<4); /* {,n} => {0,n} */
+ final int STRICT_CHECK_BACKREF = (1<<5); /* /(\1);/,/\1();/ ..*/
+ final int DIFFERENT_LEN_ALT_LOOK_BEHIND = (1<<6); /* (?<=a|bc); */
+ final int CAPTURE_ONLY_NAMED_GROUP = (1<<7); /* see doc/RE */
+ final int ALLOW_MULTIPLEX_DEFINITION_NAME = (1<<8); /* (?<x>);(?<x>); */
+ final int FIXED_INTERVAL_IS_GREEDY_ONLY = (1<<9); /* a{n}?=(?:a{n});? */
+
+ /* syntax (behavior); in char class [...] */
+ final int NOT_NEWLINE_IN_NEGATIVE_CC = (1<<20); /* [^...] */
+ final int BACKSLASH_ESCAPE_IN_CC = (1<<21); /* [..\w..] etc.. */
+ final int ALLOW_EMPTY_RANGE_IN_CC = (1<<22);
+ final int ALLOW_DOUBLE_RANGE_OP_IN_CC = (1<<23); /* [0-9-a]=[0-9\-a] */
+ /* syntax (behavior); warning */
+ final int WARN_CC_OP_NOT_ESCAPED = (1<<24); /* [,-,] */
+ final int WARN_REDUNDANT_NESTED_REPEAT = (1<<25); /* (?:a*);+ */
+
+ final int POSIX_COMMON_OP =
+ OP_DOT_ANYCHAR | OP_POSIX_BRACKET |
+ OP_DECIMAL_BACKREF |
+ OP_BRACKET_CC | OP_ASTERISK_ZERO_INF |
+ OP_LINE_ANCHOR |
+ OP_ESC_CONTROL_CHARS;
+
+ final int GNU_REGEX_OP =
+ OP_DOT_ANYCHAR | OP_BRACKET_CC |
+ OP_POSIX_BRACKET | OP_DECIMAL_BACKREF |
+ OP_BRACE_INTERVAL | OP_LPAREN_SUBEXP |
+ OP_VBAR_ALT |
+ OP_ASTERISK_ZERO_INF | OP_PLUS_ONE_INF |
+ OP_QMARK_ZERO_ONE |
+ OP_ESC_AZ_BUF_ANCHOR | OP_ESC_CAPITAL_G_BEGIN_ANCHOR |
+ OP_ESC_W_WORD |
+ OP_ESC_B_WORD_BOUND | OP_ESC_LTGT_WORD_BEGIN_END |
+ OP_ESC_S_WHITE_SPACE | OP_ESC_D_DIGIT |
+ OP_LINE_ANCHOR;
+
+ final int GNU_REGEX_BV =
+ CONTEXT_INDEP_ANCHORS | CONTEXT_INDEP_REPEAT_OPS |
+ CONTEXT_INVALID_REPEAT_OPS | ALLOW_INVALID_INTERVAL |
+ BACKSLASH_ESCAPE_IN_CC | ALLOW_DOUBLE_RANGE_OP_IN_CC;
+}
diff --git a/src/org/joni/constants/TargetInfo.java b/src/org/joni/constants/TargetInfo.java
new file mode 100644
index 0000000..3fdbe5a
--- /dev/null
+++ b/src/org/joni/constants/TargetInfo.java
@@ -0,0 +1,27 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface TargetInfo {
+ final int ISNOT_EMPTY = 0;
+ final int IS_EMPTY = 1;
+ final int IS_EMPTY_MEM = 2;
+ final int IS_EMPTY_REC = 3;
+}
diff --git a/src/org/joni/constants/TokenType.java b/src/org/joni/constants/TokenType.java
new file mode 100644
index 0000000..9ea159d
--- /dev/null
+++ b/src/org/joni/constants/TokenType.java
@@ -0,0 +1,48 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public enum TokenType {
+ EOT, /* end of token */
+ RAW_BYTE,
+ CHAR,
+ STRING,
+ CODE_POINT,
+ ANYCHAR,
+ CHAR_TYPE,
+ BACKREF,
+ CALL,
+ ANCHOR,
+ OP_REPEAT,
+ INTERVAL,
+ ANYCHAR_ANYTIME, /* SQL '%' == .* */
+ ALT,
+ SUBEXP_OPEN,
+ SUBEXP_CLOSE,
+ CC_OPEN,
+ QUOTE_OPEN,
+ CHAR_PROPERTY, /* \p{...}, \P{...} */
+ /* in cc */
+ CC_CLOSE,
+ CC_RANGE,
+ POSIX_BRACKET_OPEN,
+ CC_AND, /* && */
+ CC_CC_OPEN /* [ */
+}
diff --git a/src/org/joni/constants/Traverse.java b/src/org/joni/constants/Traverse.java
new file mode 100644
index 0000000..1c08ea5
--- /dev/null
+++ b/src/org/joni/constants/Traverse.java
@@ -0,0 +1,26 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface Traverse {
+ final int TRAVERSE_CALLBACK_AT_FIRST = 1;
+ final int TRAVERSE_CALLBACK_AT_LAST = 2;
+ final int TRAVERSE_CALLBACK_AT_BOTH = TRAVERSE_CALLBACK_AT_FIRST | TRAVERSE_CALLBACK_AT_LAST;
+}
diff --git a/src/org/joni/exception/ErrorMessages.java b/src/org/joni/exception/ErrorMessages.java
new file mode 100644
index 0000000..d1c1279
--- /dev/null
+++ b/src/org/joni/exception/ErrorMessages.java
@@ -0,0 +1,92 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.exception;
+
+import org.joni.Config;
+
+public interface ErrorMessages extends org.jcodings.exception.ErrorMessages {
+ final String MISMATCH = "mismatch";
+ final String NO_SUPPORT_CONFIG = "no support in this configuration";
+
+ /* internal error */
+ final String ERR_MEMORY = "fail to memory allocation";
+ final String ERR_MATCH_STACK_LIMIT_OVER = "match-stack limit over";
+ final String ERR_TYPE_BUG = "undefined type (bug)";
+ final String ERR_PARSER_BUG = "internal parser error (bug)";
+ final String ERR_STACK_BUG = "stack error (bug)";
+ final String ERR_UNDEFINED_BYTECODE = "undefined bytecode (bug)";
+ final String ERR_UNEXPECTED_BYTECODE = "unexpected bytecode (bug)";
+ final String ERR_DEFAULT_ENCODING_IS_NOT_SETTED = "default multibyte-encoding is not setted";
+ final String ERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR = "can't convert to wide-char on specified multibyte-encoding";
+
+ /* general error */
+ final String ERR_INVALID_ARGUMENT = "invalid argument";
+
+ /* syntax error */
+ final String ERR_END_PATTERN_AT_LEFT_BRACE = "end pattern at left brace";
+ final String ERR_END_PATTERN_AT_LEFT_BRACKET = "end pattern at left bracket";
+ final String ERR_EMPTY_CHAR_CLASS = "empty char-class";
+ final String ERR_PREMATURE_END_OF_CHAR_CLASS = "premature end of char-class";
+ final String ERR_END_PATTERN_AT_ESCAPE = "end pattern at escape";
+ final String ERR_END_PATTERN_AT_META = "end pattern at meta";
+ final String ERR_END_PATTERN_AT_CONTROL = "end pattern at control";
+ final String ERR_META_CODE_SYNTAX = "invalid meta-code syntax";
+ final String ERR_CONTROL_CODE_SYNTAX = "invalid control-code syntax";
+ final String ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE = "char-class value at end of range";
+ final String ERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE = "char-class value at start of range";
+ final String ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS = "unmatched range specifier in char-class";
+ final String ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED = "target of repeat operator is not specified";
+ final String ERR_TARGET_OF_REPEAT_OPERATOR_INVALID = "target of repeat operator is invalid";
+ final String ERR_NESTED_REPEAT_OPERATOR = "nested repeat operator";
+ final String ERR_UNMATCHED_CLOSE_PARENTHESIS = "unmatched close parenthesis";
+ final String ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS = "end pattern with unmatched parenthesis";
+ final String ERR_END_PATTERN_IN_GROUP = "end pattern in group";
+ final String ERR_UNDEFINED_GROUP_OPTION = "undefined group option";
+ final String ERR_INVALID_POSIX_BRACKET_TYPE = "invalid POSIX bracket type";
+ final String ERR_INVALID_LOOK_BEHIND_PATTERN = "invalid pattern in look-behind";
+ final String ERR_INVALID_REPEAT_RANGE_PATTERN = "invalid repeat range {lower,upper}";
+
+ /* values error (syntax error) */
+ final String ERR_TOO_BIG_NUMBER = "too big number";
+ final String ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE = "too big number for repeat range";
+ final String ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE = "upper is smaller than lower in repeat range";
+ final String ERR_EMPTY_RANGE_IN_CHAR_CLASS = "empty range in char class";
+ final String ERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE = "mismatch multibyte code length in char-class range";
+ final String ERR_TOO_MANY_MULTI_BYTE_RANGES = "too many multibyte code ranges are specified";
+ final String ERR_TOO_SHORT_MULTI_BYTE_STRING = "too short multibyte code string";
+ final String ERR_TOO_BIG_BACKREF_NUMBER = "too big backref number";
+ final String ERR_INVALID_BACKREF = Config.USE_NAMED_GROUP ? "invalid backref number/name" : "invalid backref number";
+ final String ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED = "numbered backref/call is not allowed. (use name)";
+ final String ERR_INVALID_WIDE_CHAR_VALUE = "invalid wide-char value";
+ final String ERR_EMPTY_GROUP_NAME = "group name is empty";
+ final String ERR_INVALID_GROUP_NAME = "invalid group name <%n>";
+ final String ERR_INVALID_CHAR_IN_GROUP_NAME = Config.USE_NAMED_GROUP ? "invalid char in group name <%n>" : "invalid char in group number <%n>";
+ final String ERR_UNDEFINED_NAME_REFERENCE = "undefined name <%n> reference";
+ final String ERR_UNDEFINED_GROUP_REFERENCE = "undefined group <%n> reference";
+ final String ERR_MULTIPLEX_DEFINED_NAME = "multiplex defined name <%n>";
+ final String ERR_MULTIPLEX_DEFINITION_NAME_CALL = "multiplex definition name <%n> call";
+ final String ERR_NEVER_ENDING_RECURSION = "never ending recursion";
+ final String ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY = "group number is too big for capture history";
+ final String ERR_NOT_SUPPORTED_ENCODING_COMBINATION = "not supported encoding combination";
+ final String ERR_INVALID_COMBINATION_OF_OPTIONS = "invalid combination of options";
+ final String ERR_OVER_THREAD_PASS_LIMIT_COUNT = "over thread pass limit count";
+ final String ERR_TOO_BIG_SB_CHAR_VALUE = "too big singlebyte char value";
+
+}
diff --git a/src/org/joni/exception/InternalException.java b/src/org/joni/exception/InternalException.java
new file mode 100644
index 0000000..959f44f
--- /dev/null
+++ b/src/org/joni/exception/InternalException.java
@@ -0,0 +1,28 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.exception;
+
+public class InternalException extends JOniException{
+ private static final long serialVersionUID = -3871816465397927992L;
+
+ public InternalException(String message) {
+ super(message);
+ }
+}
diff --git a/src/org/joni/exception/JOniException.java b/src/org/joni/exception/JOniException.java
new file mode 100644
index 0000000..f5d728c
--- /dev/null
+++ b/src/org/joni/exception/JOniException.java
@@ -0,0 +1,28 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.exception;
+
+public class JOniException extends RuntimeException{
+ private static final long serialVersionUID = -6027192180014164667L;
+
+ public JOniException(String message) {
+ super(message);
+ }
+}
diff --git a/src/org/joni/exception/SyntaxException.java b/src/org/joni/exception/SyntaxException.java
new file mode 100644
index 0000000..7b00d1c
--- /dev/null
+++ b/src/org/joni/exception/SyntaxException.java
@@ -0,0 +1,28 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.exception;
+
+public class SyntaxException extends JOniException{
+ private static final long serialVersionUID = 7862720128961874288L;
+
+ public SyntaxException(String message) {
+ super(message);
+ }
+}
diff --git a/src/org/joni/exception/ValueException.java b/src/org/joni/exception/ValueException.java
new file mode 100644
index 0000000..b07ac42
--- /dev/null
+++ b/src/org/joni/exception/ValueException.java
@@ -0,0 +1,37 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.exception;
+
+public class ValueException extends SyntaxException{
+ private static final long serialVersionUID = -196013852479929134L;
+
+ public ValueException(String message) {
+ super(message);
+ }
+
+ public ValueException(String message, String str) {
+ super(message.replaceAll("%n", str));
+ }
+
+ public ValueException(String message, byte[]bytes, int p, int end) {
+ this(message, new String(bytes, p, end - p));
+ }
+
+}
diff --git a/test/org/joni/test/Test.java b/test/org/joni/test/Test.java
new file mode 100644
index 0000000..1a9cec6
--- /dev/null
+++ b/test/org/joni/test/Test.java
@@ -0,0 +1,194 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.test;
+
+import java.io.UnsupportedEncodingException;
+
+import org.joni.Config;
+import org.joni.Matcher;
+import org.joni.Option;
+import org.joni.Regex;
+import org.joni.Region;
+import org.joni.Syntax;
+import org.jcodings.Encoding;
+import org.joni.exception.JOniException;
+
+public abstract class Test {
+
+ int nsucc;
+ int nerror;
+ int nfail;
+
+ public abstract int option();
+ public abstract Encoding encoding();
+ public abstract String testEncoding();
+ public abstract Syntax syntax();
+
+ protected String repr(byte[]bytes) {
+ return new String(bytes);
+ }
+
+ protected int length(byte[]bytes) {
+ return bytes.length;
+ }
+
+ public void xx(byte[]pattern, byte[]str, int from, int to, int mem, boolean not) {
+ xx(pattern, str, from, to, mem, not, option());
+ }
+
+ public void xx(byte[]pattern, byte[]str, int from, int to, int mem, boolean not, int option) {
+ Regex reg;
+
+ try {
+ reg = new Regex(pattern, 0, length(pattern), option, encoding(), syntax());
+ } catch (JOniException je) {
+ Config.err.println("Pattern: " + repr(pattern) + " Str: " + repr(str));
+ je.printStackTrace(Config.err);
+ Config.err.println("ERROR: " + je.getMessage());
+ nerror++;
+ return;
+ } catch (Exception e) {
+ Config.err.println("Pattern: " + repr(pattern) + " Str: " + repr(str));
+ e.printStackTrace(Config.err);
+ Config.err.println("SEVERE ERROR: " + e.getMessage());
+ nerror++;
+ return;
+ }
+
+ Matcher m = reg.matcher(str, 0, length(str));
+ Region region;
+
+ int r = 0;
+ try {
+ r = m.search(0, length(str), Option.NONE);
+ region = m.getEagerRegion();
+ } catch (JOniException je) {
+ Config.err.println("Pattern: " + repr(pattern) + " Str: " + repr(str));
+ je.printStackTrace(Config.err);
+ Config.err.println("ERROR: " + je.getMessage());
+ nerror++;
+ return;
+ } catch (Exception e) {
+ Config.err.println("Pattern: " + repr(pattern) + " Str: " + repr(str));
+ e.printStackTrace(Config.err);
+ Config.err.println("SEVERE ERROR: " + e.getMessage());
+ nerror++;
+ return;
+ }
+
+ if (r == -1) {
+ if (not) {
+ Config.log.println("OK(N): /" + repr(pattern) + "/ '" + repr(str) + "'");
+ nsucc++;
+ } else {
+ Config.log.println("FAIL: /" + repr(pattern) + "/ '" + repr(str) + "'");
+ nfail++;
+ }
+ } else {
+ if (not) {
+ Config.log.println("FAIL(N): /" + repr(pattern) + "/ '" + repr(str) + "'");
+ nfail++;
+ } else {
+ if (region.beg[mem] == from && region.end[mem] == to) {
+ Config.log.println("OK: /" + repr(pattern) + "/ '" +repr(str) + "'");
+ nsucc++;
+ } else {
+ Config.log.println("FAIL: /" + repr(pattern) + "/ '" + repr(str) + "' " +
+ from + "-" + to + " : " + region.beg[mem] + "-" + region.end[mem]
+ );
+ nfail++;
+ }
+ }
+ }
+ }
+
+ protected void x2(byte[]pattern, byte[]str, int from, int to) {
+ xx(pattern, str, from, to, 0, false);
+ }
+
+ protected void x3(byte[]pattern, byte[]str, int from, int to, int mem) {
+ xx(pattern, str, from, to, mem, false);
+ }
+
+ protected void n(byte[]pattern, byte[]str) {
+ xx(pattern, str, 0, 0, 0, true);
+ }
+
+ public void xxs(String pattern, String str, int from, int to, int mem, boolean not) {
+ xxs(pattern, str, from, to, mem, not, option());
+ }
+
+ public void xxs(String pattern, String str, int from, int to, int mem, boolean not, int option) {
+ try{
+ xx(pattern.getBytes(testEncoding()), str.getBytes(testEncoding()), from, to, mem, not, option);
+ } catch (UnsupportedEncodingException uee) {
+ uee.printStackTrace();
+ }
+ }
+
+ public void x2s(String pattern, String str, int from, int to) {
+ x2s(pattern, str, from, to, option());
+ }
+
+ public void x2s(String pattern, String str, int from, int to, int option) {
+ try{
+ xx(pattern.getBytes(testEncoding()), str.getBytes(testEncoding()), from, to, 0, false, option);
+ } catch (UnsupportedEncodingException uee) {
+ uee.printStackTrace();
+ }
+ }
+
+ public void x3s(String pattern, String str, int from, int to, int mem) {
+ x3s(pattern, str, from, to, mem, option());
+ }
+
+ public void x3s(String pattern, String str, int from, int to, int mem, int option) {
+ try{
+ xx(pattern.getBytes(testEncoding()), str.getBytes(testEncoding()), from, to, mem, false, option);
+ } catch (UnsupportedEncodingException uee) {
+ uee.printStackTrace();
+ }
+ }
+
+ public void ns(String pattern, String str) {
+ ns(pattern, str, option());
+ }
+
+ public void ns(String pattern, String str, int option) {
+ try{
+ xx(pattern.getBytes(testEncoding()), str.getBytes(testEncoding()), 0, 0, 0, true, option);
+ } catch (UnsupportedEncodingException uee) {
+ uee.printStackTrace();
+ }
+ }
+
+ public void printResults() {
+ Config.log.println("\nRESULT SUCC: " + nsucc + ", FAIL: " + nfail + ", ERROR: " + nerror +
+ " (by JONI)");
+ }
+
+ public abstract void test();
+
+ public final void run() {
+ test();
+ printResults();
+ }
+
+}
diff --git a/test/org/joni/test/TestA.java b/test/org/joni/test/TestA.java
new file mode 100644
index 0000000..bc3ebf5
--- /dev/null
+++ b/test/org/joni/test/TestA.java
@@ -0,0 +1,481 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.test;
+
+import org.joni.Option;
+import org.joni.Syntax;
+import org.jcodings.Encoding;
+import org.jcodings.specific.ASCIIEncoding;
+
+public class TestA extends Test {
+
+ public int option() {
+ return Option.DEFAULT;
+ }
+
+ public Encoding encoding() {
+ return ASCIIEncoding.INSTANCE;
+ }
+
+ public String testEncoding() {
+ return "iso-8859-2";
+ }
+
+ public Syntax syntax() {
+ return Syntax.DEFAULT;
+ }
+
+ public void test() {
+ x2s("", "", 0, 0);
+ x2s("^", "", 0, 0);
+ x2s("$", "", 0, 0);
+ x2s("\\G", "", 0, 0);
+ x2s("\\A", "", 0, 0);
+ x2s("\\Z", "", 0, 0);
+ x2s("\\z", "", 0, 0);
+ x2s("^$", "", 0, 0);
+ x2s("\\ca", "\001", 0, 1);
+ x2s("\\C-b", "\002", 0, 1);
+ x2s("\\c\\\\", "\034", 0, 1);
+ x2s("q[\\c\\\\]", "q\034", 0, 2);
+ x2s("", "a", 0, 0);
+ x2s("a", "a", 0, 1);
+ x2s("\\x61", "a", 0, 1);
+ x2s("aa", "aa", 0, 2);
+ x2s("aaa", "aaa", 0, 3);
+ x2s("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35);
+ x2s("ab", "ab", 0, 2);
+ x2s("b", "ab", 1, 2);
+ x2s("bc", "abc", 1, 3);
+ x2s("(?i:#RET#)", "#INS##RET#", 5, 10);
+ x2s("\\17", "\017", 0, 1);
+ x2s("\\x1f", "\u001f", 0, 1);
+ x2s("\\xED\\xF2", "\u00ed\u0148", 0, 2);
+ x2s("a(?#....\\\\JJJJ)b", "ab", 0, 2);
+ x2s("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7);
+ x2s(".", "a", 0, 1);
+ ns(".", "");
+ x2s("..", "ab", 0, 2);
+ x2s("\\w", "e", 0, 1);
+ ns("\\W", "e");
+ x2s("\\s", " ", 0, 1);
+ x2s("\\S", "b", 0, 1);
+ x2s("\\d", "4", 0, 1);
+ ns("\\D", "4");
+ x2s("\\b", "z ", 0, 0);
+ x2s("\\b", " z", 1, 1);
+ x2s("\\B", "zz ", 1, 1);
+ x2s("\\B", "z ", 2, 2);
+ x2s("\\B", " z", 0, 0);
+ x2s("[ab]", "b", 0, 1);
+ ns("[ab]", "c");
+ x2s("[a-z]", "t", 0, 1);
+ ns("[^a]", "a");
+ x2s("[^a]", "\n", 0, 1);
+ x2s("[]]", "]", 0, 1);
+ ns("[^]]", "]");
+ x2s("[\\^]+", "0^^1", 1, 3);
+ x2s("[b-]", "b", 0, 1);
+ x2s("[b-]", "-", 0, 1);
+ x2s("[\\w]", "z", 0, 1);
+ ns("[\\w]", " ");
+ x2s("[\\W]", "b$", 1, 2);
+ x2s("[\\d]", "5", 0, 1);
+ ns("[\\d]", "e");
+ x2s("[\\D]", "t", 0, 1);
+ ns("[\\D]", "3");
+ x2s("[\\s]", " ", 0, 1);
+ ns("[\\s]", "a");
+ x2s("[\\S]", "b", 0, 1);
+ ns("[\\S]", " ");
+ x2s("[\\w\\d]", "2", 0, 1);
+ ns("[\\w\\d]", " ");
+ x2s("[[:upper:]]", "B", 0, 1);
+ x2s("[*[:xdigit:]+]", "+", 0, 1);
+ x2s("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7);
+ x2s("[*[:xdigit:]+]", "-@^+", 3, 4);
+ ns("[[:upper]]", "A");
+ x2s("[[:upper]]", ":", 0, 1);
+ x2s("[\\044-\\047]", "\046", 0, 1);
+ x2s("[\\x5a-\\x5c]", "\u005b", 0, 1);
+ x2s("[\\x6A-\\x6D]", "\u006c", 0, 1);
+ ns("[\\x6A-\\x6D]", "\u006e");
+ ns("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply");
+ x2s("[\\[]", "[", 0, 1);
+ x2s("[\\]]", "]", 0, 1);
+ x2s("[&]", "&", 0, 1);
+ x2s("[[ab]]", "b", 0, 1);
+ x2s("[[ab]c]", "c", 0, 1);
+ ns("[[^a]]", "a");
+ ns("[^[a]]", "a");
+ x2s("[[ab]&&bc]", "b", 0, 1);
+ ns("[[ab]&&bc]", "a");
+ ns("[[ab]&&bc]", "c");
+ x2s("[a-z&&b-y&&c-x]", "w", 0, 1);
+ ns("[^a-z&&b-y&&c-x]", "w");
+ x2s("[[^a&&a]&&a-z]", "b", 0, 1);
+ ns("[[^a&&a]&&a-z]", "a");
+ x2s("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1);
+ ns("[[^a-z&&bcdef]&&[^c-g]]", "c");
+ x2s("[^[^abc]&&[^cde]]", "c", 0, 1);
+ x2s("[^[^abc]&&[^cde]]", "e", 0, 1);
+ ns("[^[^abc]&&[^cde]]", "f");
+ x2s("[a-&&-a]", "-", 0, 1);
+ ns("[a\\-&&\\-a]", "&");
+ ns("\\wabc", " abc");
+ x2s("a\\Wbc", "a bc", 0, 4);
+ x2s("a.b.c", "aabbc", 0, 5);
+ x2s(".\\wb\\W..c", "abb bcc", 0, 7);
+ x2s("\\s\\wzzz", " zzzz", 0, 5);
+ x2s("aa.b", "aabb", 0, 4);
+ ns(".a", "ab");
+ x2s(".a", "aa", 0, 2);
+ x2s("^a", "a", 0, 1);
+ x2s("^a$", "a", 0, 1);
+ x2s("^\\w$", "a", 0, 1);
+ ns("^\\w$", " ");
+ x2s("^\\wab$", "zab", 0, 3);
+ x2s("^\\wabcdef$", "zabcdef", 0, 7);
+ x2s("^\\w...def$", "zabcdef", 0, 7);
+ x2s("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8);
+ x2s("\\A\\Z", "", 0, 0);
+ x2s("\\Axyz", "xyz", 0, 3);
+ x2s("xyz\\Z", "xyz", 0, 3);
+ x2s("xyz\\z", "xyz", 0, 3);
+ x2s("a\\Z", "a", 0, 1);
+ x2s("\\Gaz", "az", 0, 2);
+ ns("\\Gz", "bza");
+ ns("az\\G", "az");
+ ns("az\\A", "az");
+ ns("a\\Az", "az");
+ x2s("\\^\\$", "^$", 0, 2);
+ x2s("^x?y", "xy", 0, 2);
+ x2s("^(x?y)", "xy", 0, 2);
+ x2s("\\w", "_", 0, 1);
+ ns("\\W", "_");
+ x2s("(?=z)z", "z", 0, 1);
+ ns("(?=z).", "a");
+ x2s("(?!z)a", "a", 0, 1);
+ ns("(?!z)a", "z");
+ x2s("(?i:a)", "a", 0, 1);
+ x2s("(?i:a)", "A", 0, 1);
+ x2s("(?i:A)", "a", 0, 1);
+ ns("(?i:A)", "b");
+ x2s("(?i:[A-Z])", "a", 0, 1);
+ x2s("(?i:[f-m])", "H", 0, 1);
+ x2s("(?i:[f-m])", "h", 0, 1);
+ ns("(?i:[f-m])", "e");
+ x2s("(?i:[A-c])", "D", 0, 1);
+ x2s("(?i:[!-k])", "Z", 0, 1);
+ x2s("(?i:[!-k])", "7", 0, 1);
+ x2s("(?i:[T-}])", "b", 0, 1);
+ x2s("(?i:[T-}])", "{", 0, 1);
+ x2s("(?i:\\?a)", "?A", 0, 2);
+ x2s("(?i:\\*A)", "*a", 0, 2);
+ ns(".", "\n");
+ x2s("(?m:.)", "\n", 0, 1);
+ x2s("(?m:a.)", "a\n", 0, 2);
+ x2s("(?m:.b)", "a\nb", 1, 3);
+ x2s(".*abc", "dddabdd\nddabc", 8, 13);
+ x2s("(?m:.*abc)", "dddabddabc", 0, 10);
+ ns("(?i)(?-i)a", "A");
+ ns("(?i)(?-i:a)", "A");
+ x2s("a?", "", 0, 0);
+ x2s("a?", "b", 0, 0);
+ x2s("a?", "a", 0, 1);
+ x2s("a*", "", 0, 0);
+ x2s("a*", "a", 0, 1);
+ x2s("a*", "aaa", 0, 3);
+ x2s("a*", "baaaa", 0, 0);
+ ns("a+", "");
+ x2s("a+", "a", 0, 1);
+ x2s("a+", "aaaa", 0, 4);
+ x2s("a+", "aabbb", 0, 2);
+ x2s("a+", "baaaa", 1, 5);
+ x2s(".?", "", 0, 0);
+ x2s(".?", "f", 0, 1);
+ x2s(".?", "\n", 0, 0);
+ x2s(".*", "", 0, 0);
+ x2s(".*", "abcde", 0, 5);
+ x2s(".+", "z", 0, 1);
+ x2s(".+", "zdswer\n", 0, 6);
+ x2s("(.*)a\\1f", "babfbac", 0, 4);
+ x2s("(.*)a\\1f", "bacbabf", 3, 7);
+ x2s("((.*)a\\2f)", "bacbabf", 3, 7);
+ x2s("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23);
+ x2s("a|b", "a", 0, 1);
+ x2s("a|b", "b", 0, 1);
+ x2s("|a", "a", 0, 0);
+ x2s("(|a)", "a", 0, 0);
+ x2s("ab|bc", "ab", 0, 2);
+ x2s("ab|bc", "bc", 0, 2);
+ x2s("z(?:ab|bc)", "zbc", 0, 3);
+ x2s("a(?:ab|bc)c", "aabc", 0, 4);
+ x2s("ab|(?:ac|az)", "az", 0, 2);
+ x2s("a|b|c", "dc", 1, 2);
+ x2s("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2);
+ ns("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn");
+ x2s("a|^z", "ba", 1, 2);
+ x2s("a|^z", "za", 0, 1);
+ x2s("a|\\Gz", "bza", 2, 3);
+ x2s("a|\\Gz", "za", 0, 1);
+ x2s("a|\\Az", "bza", 2, 3);
+ x2s("a|\\Az", "za", 0, 1);
+ x2s("a|b\\Z", "ba", 1, 2);
+ x2s("a|b\\Z", "b", 0, 1);
+ x2s("a|b\\z", "ba", 1, 2);
+ x2s("a|b\\z", "b", 0, 1);
+ x2s("\\w|\\s", " ", 0, 1);
+ ns("\\w|\\w", " ");
+ x2s("\\w|%", "%", 0, 1);
+ x2s("\\w|[&$]", "&", 0, 1);
+ x2s("[b-d]|[^e-z]", "a", 0, 1);
+ x2s("(?:a|[c-f])|bz", "dz", 0, 1);
+ x2s("(?:a|[c-f])|bz", "bz", 0, 2);
+ x2s("abc|(?=zz)..f", "zzf", 0, 3);
+ x2s("abc|(?!zz)..f", "abf", 0, 3);
+ x2s("(?=za)..a|(?=zz)..a", "zza", 0, 3);
+ ns("(?>a|abd)c", "abdc");
+ x2s("(?>abd|a)c", "abdc", 0, 4);
+ x2s("a?|b", "a", 0, 1);
+ x2s("a?|b", "b", 0, 0);
+ x2s("a?|b", "", 0, 0);
+ x2s("a*|b", "aa", 0, 2);
+ x2s("a*|b*", "ba", 0, 0);
+ x2s("a*|b*", "ab", 0, 1);
+ x2s("a+|b*", "", 0, 0);
+ x2s("a+|b*", "bbb", 0, 3);
+ x2s("a+|b*", "abbb", 0, 1);
+ ns("a+|b+", "");
+ x2s("(a|b)?", "b", 0, 1);
+ x2s("(a|b)*", "ba", 0, 2);
+ x2s("(a|b)+", "bab", 0, 3);
+ x2s("(ab|ca)+", "caabbc", 0, 4);
+ x2s("(ab|ca)+", "aabca", 1, 5);
+ x2s("(ab|ca)+", "abzca", 0, 2);
+ x2s("(a|bab)+", "ababa", 0, 5);
+ x2s("(a|bab)+", "ba", 1, 2);
+ x2s("(a|bab)+", "baaaba", 1, 4);
+ x2s("(?:a|b)(?:a|b)", "ab", 0, 2);
+ x2s("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3);
+ x2s("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6);
+ x2s("(?:a+|b+){2}", "aaabbb", 0, 6);
+ x2s("h{0,}", "hhhh", 0, 4);
+ x2s("(?:a+|b+){1,2}", "aaabbb", 0, 6);
+ ns("ax{2}*a", "0axxxa1");
+ ns("a.{0,2}a", "0aXXXa0");
+ ns("a.{0,2}?a", "0aXXXa0");
+ ns("a.{0,2}?a", "0aXXXXa0");
+ x2s("^a{2,}?a$", "aaa", 0, 3);
+ x2s("^[a-z]{2,}?$", "aaa", 0, 3);
+ x2s("(?:a+|\\Ab*)cc", "cc", 0, 2);
+ ns("(?:a+|\\Ab*)cc", "abcc");
+ x2s("(?:^a+|b+)*c", "aabbbabc", 6, 8);
+ x2s("(?:^a+|b+)*c", "aabbbbc", 0, 7);
+ x2s("a|(?i)c", "C", 0, 1);
+ x2s("(?i)c|a", "C", 0, 1);
+ x2s("(?i)c|a", "A", 0, 1);
+ x2s("(?i:c)|a", "C", 0, 1);
+ ns("(?i:c)|a", "A");
+ x2s("[abc]?", "abc", 0, 1);
+ x2s("[abc]*", "abc", 0, 3);
+ x2s("[^abc]*", "abc", 0, 0);
+ ns("[^abc]+", "abc");
+ x2s("a??", "aaa", 0, 0);
+ x2s("ba??b", "bab", 0, 3);
+ x2s("a*?", "aaa", 0, 0);
+ x2s("ba*?", "baa", 0, 1);
+ x2s("ba*?b", "baab", 0, 4);
+ x2s("a+?", "aaa", 0, 1);
+ x2s("ba+?", "baa", 0, 2);
+ x2s("ba+?b", "baab", 0, 4);
+ x2s("(?:a?)??", "a", 0, 0);
+ x2s("(?:a??)?", "a", 0, 0);
+ x2s("(?:a?)+?", "aaa", 0, 1);
+ x2s("(?:a+)??", "aaa", 0, 0);
+ x2s("(?:a+)??b", "aaab", 0, 4);
+ x2s("(?:ab)?{2}", "", 0, 0);
+ x2s("(?:ab)?{2}", "ababa", 0, 4);
+ x2s("(?:ab)*{0}", "ababa", 0, 0);
+ x2s("(?:ab){3,}", "abababab", 0, 8);
+ ns("(?:ab){3,}", "abab");
+ x2s("(?:ab){2,4}", "ababab", 0, 6);
+ x2s("(?:ab){2,4}", "ababababab", 0, 8);
+ x2s("(?:ab){2,4}?", "ababababab", 0, 4);
+ x2s("(?:ab){,}", "ab{,}", 0, 5);
+ x2s("(?:abc)+?{2}", "abcabcabc", 0, 6);
+ x2s("(?:X*)(?i:xa)", "XXXa", 0, 4);
+ x2s("(d+)([^abc]z)", "dddz", 0, 4);
+ x2s("([^abc]*)([^abc]z)", "dddz", 0, 4);
+ x2s("(\\w+)(\\wz)", "dddz", 0, 4);
+ x3s("(a)", "a", 0, 1, 1);
+ x3s("(ab)", "ab", 0, 2, 1);
+ x2s("((ab))", "ab", 0, 2);
+ x3s("((ab))", "ab", 0, 2, 1);
+ x3s("((ab))", "ab", 0, 2, 2);
+ x3s("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20);
+ x3s("(ab)(cd)", "abcd", 0, 2, 1);
+ x3s("(ab)(cd)", "abcd", 2, 4, 2);
+ x3s("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3);
+ x3s("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4);
+ x2s("(^a)", "a", 0, 1);
+ x3s("(a)|(a)", "ba", 1, 2, 1);
+ x3s("(^a)|(a)", "ba", 1, 2, 2);
+ x3s("(a?)", "aaa", 0, 1, 1);
+ x3s("(a*)", "aaa", 0, 3, 1);
+ x3s("(a*)", "", 0, 0, 1);
+ x3s("(a+)", "aaaaaaa", 0, 7, 1);
+ x3s("(a+|b*)", "bbbaa", 0, 3, 1);
+ x3s("(a+|b?)", "bbbaa", 0, 1, 1);
+ x3s("(abc)?", "abc", 0, 3, 1);
+ x3s("(abc)*", "abc", 0, 3, 1);
+ x3s("(abc)+", "abc", 0, 3, 1);
+ x3s("(xyz|abc)+", "abc", 0, 3, 1);
+ x3s("([xyz][abc]|abc)+", "abc", 0, 3, 1);
+ x3s("((?i:abc))", "AbC", 0, 3, 1);
+ x2s("(abc)(?i:\\1)", "abcABC", 0, 6);
+ x3s("((?m:a.c))", "a\nc", 0, 3, 1);
+ x3s("((?=az)a)", "azb", 0, 1, 1);
+ x3s("abc|(.abd)", "zabd", 0, 4, 1);
+ x2s("(?:abc)|(ABC)", "abc", 0, 3);
+ x3s("(?i:(abc))|(zzz)", "ABC", 0, 3, 1);
+ x3s("a*(.)", "aaaaz", 4, 5, 1);
+ x3s("a*?(.)", "aaaaz", 0, 1, 1);
+ x3s("a*?(c)", "aaaac", 4, 5, 1);
+ x3s("[bcd]a*(.)", "caaaaz", 5, 6, 1);
+ x3s("(\\Abb)cc", "bbcc", 0, 2, 1);
+ ns("(\\Abb)cc", "zbbcc");
+ x3s("(^bb)cc", "bbcc", 0, 2, 1);
+ ns("(^bb)cc", "zbbcc");
+ x3s("cc(bb$)", "ccbb", 2, 4, 1);
+ ns("cc(bb$)", "ccbbb");
+ ns("(\\1)", "");
+ ns("\\1(a)", "aa");
+ ns("(a(b)\\1)\\2+", "ababb");
+ ns("(?:(?:\\1|z)(a))+$", "zaa");
+ x2s("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4);
+ x2s("(a)(?=\\1)", "aa", 0, 1);
+ ns("(a)$|\\1", "az");
+ x2s("(a)\\1", "aa", 0, 2);
+ ns("(a)\\1", "ab");
+ x2s("(a?)\\1", "aa", 0, 2);
+ x2s("(a??)\\1", "aa", 0, 0);
+ x2s("(a*)\\1", "aaaaa", 0, 4);
+ x3s("(a*)\\1", "aaaaa", 0, 2, 1);
+ x2s("a(b*)\\1", "abbbb", 0, 5);
+ x2s("a(b*)\\1", "ab", 0, 1);
+ x2s("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10);
+ x2s("(a*)(b*)\\2", "aaabbbb", 0, 7);
+ x2s("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8);
+ x3s("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7);
+ x2s("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6);
+ x2s("([a-d])\\1", "cc", 0, 2);
+ x2s("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6);
+ ns("(\\w\\d\\s)\\1", "f5 f5");
+ x2s("(who|[a-c]{3})\\1", "whowho", 0, 6);
+ x2s("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9);
+ x2s("(who|[a-c]{3})\\1", "cbccbc", 0, 6);
+ x2s("(^a)\\1", "aa", 0, 2);
+ ns("(^a)\\1", "baa");
+ ns("(a$)\\1", "aa");
+ ns("(ab\\Z)\\1", "ab");
+ x2s("(a*\\Z)\\1", "a", 1, 1);
+ x2s(".(a*\\Z)\\1", "ba", 1, 2);
+ x3s("(.(abc)\\2)", "zabcabc", 0, 7, 1);
+ x3s("(.(..\\d.)\\2)", "z12341234", 0, 9, 1);
+ x2s("((?i:az))\\1", "AzAz", 0, 4);
+ ns("((?i:az))\\1", "Azaz");
+ x2s("(?<=a)b", "ab", 1, 2);
+ ns("(?<=a)b", "bb");
+ x2s("(?<=a|b)b", "bb", 1, 2);
+ x2s("(?<=a|bc)b", "bcb", 2, 3);
+ x2s("(?<=a|bc)b", "ab", 1, 2);
+ x2s("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2);
+ x2s("(a)\\g<1>", "aa", 0, 2);
+ x2s("(?<!a)b", "cb", 1, 2);
+ ns("(?<!a)b", "ab");
+ x2s("(?<!a|bc)b", "bbb", 0, 1);
+ ns("(?<!a|bc)z", "bcz");
+ x2s("(?<name1>a)", "a", 0, 1);
+ x2s("(?<name_2>ab)\\g<name_2>", "abab", 0, 4);
+ x2s("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8);
+ x2s("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3);
+ x2s("(?<n>|a\\g<n>)+", "", 0, 0);
+ x2s("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6);
+ x3s("\\g<n>(?<n>.){0}", "X", 0, 1, 1);
+ x2s("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3);
+ x2s("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4);
+ x2s("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8);
+ x2s("(?<name1240>\\w+\\sx)a+\\k<name1240>", " fg xaaaaaaaafg x", 2, 18);
+ x3s("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1);
+ x2s("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3);
+ x2s("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2);
+ x2s("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0);
+ x2s("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9);
+ ns("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg");
+ x2s("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10);
+ x3s("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14);
+ x3s("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16);
+ x2s("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1);
+ x2s("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 0, 13);
+ x3s("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1);
+ x2s("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9);
+ x2s("\\g<1>|\\zEND(.a.)", "bac", 0, 3);
+ x3s("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1);
+ x2s("\\A(?:\\g<pon>|\\g<pan>|\\zEND (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7);
+ x2s("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4);
+ x2s("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5);
+ x2s("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10);
+ x2s("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5);
+ x2s("()*\\1", "", 0, 0);
+ x2s("(?:()|())*\\1\\2", "", 0, 0);
+ x3s("(?:\\1a|())*", "a", 0, 0, 1);
+ x2s("x((.)*)*x", "0x1x2x3", 1, 6);
+ x2s("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9);
+ x2s("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0);
+ x2s("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1);
+
+ x3s("\\A(?<a>|.|(?:(?<b>.)\\g<a>\\k<b+0>))\\z", "reer", 0, 4, 1);
+ x3s("(?-i:\\g<name>)(?i:(?<name>a)){0}", "A", 0, 1, 1);
+
+ String pat =
+ "(?<element> \\g<stag> \\g<content>* \\g<etag> ){0}" +
+ "(?<stag> < \\g<name> \\s* > ){0}" +
+ "(?<name> [a-zA-Z_:]+ ){0}" +
+ "(?<content> [^<&]+ (\\g<element> | [^<&]+)* ){0}" +
+ "(?<etag> </ \\k<name+1> >){0}" +
+ "\\g<element>";
+
+ String str = "<foo>f<bar>bbb</bar>f</foo>";
+
+ x3s(pat, str, 0, 27, 0, Option.EXTEND);
+ x3s(pat, str, 0, 27, 1, Option.EXTEND);
+ x3s(pat, str, 6, 11, 2, Option.EXTEND);
+ x3s(pat, str, 7, 10, 3, Option.EXTEND);
+ x3s(pat, str, 5, 21, 4, Option.EXTEND);
+ x3s(pat, str, 21, 27, 5, Option.EXTEND);
+
+ x2s("(a)b\\k<1>", "aba", 0, 3);
+ }
+
+ public static void main(String[] args) throws Throwable{
+ new TestA().run();
+ }
+}
diff --git a/test/org/joni/test/TestC.java b/test/org/joni/test/TestC.java
new file mode 100644
index 0000000..94f4218
--- /dev/null
+++ b/test/org/joni/test/TestC.java
@@ -0,0 +1,736 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.test;
+
+import org.joni.Option;
+import org.joni.Syntax;
+import org.jcodings.Config;
+import org.jcodings.Encoding;
+import org.jcodings.specific.EUCJPEncoding;
+
+public class TestC extends Test {
+
+ public int option() {
+ return Option.DEFAULT;
+ }
+
+ public Encoding encoding() {
+ return EUCJPEncoding.INSTANCE;
+ }
+
+ public String testEncoding() {
+ return "cp1250";
+ }
+
+ public Syntax syntax() {
+ return Syntax.DEFAULT;
+ }
+
+ public void test() {
+ x2s("", "", 0, 0);
+ x2s("^", "", 0, 0);
+ x2s("$", "", 0, 0);
+ x2s("\\G", "", 0, 0);
+ x2s("\\A", "", 0, 0);
+ x2s("\\Z", "", 0, 0);
+ x2s("\\z", "", 0, 0);
+ x2s("^$", "", 0, 0);
+ x2s("\\ca", "\001", 0, 1);
+ x2s("\\C-b", "\002", 0, 1);
+ x2s("\\c\\\\", "\034", 0, 1);
+ x2s("q[\\c\\\\]", "q\034", 0, 2);
+ x2s("", "a", 0, 0);
+ x2s("a", "a", 0, 1);
+ x2s("\\x61", "a", 0, 1);
+ x2s("aa", "aa", 0, 2);
+ x2s("aaa", "aaa", 0, 3);
+ x2s("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35);
+ x2s("ab", "ab", 0, 2);
+ x2s("b", "ab", 1, 2);
+ x2s("bc", "abc", 1, 3);
+ x2s("(?i:#RET#)", "#INS##RET#", 5, 10);
+ x2s("\\17", "\017", 0, 1);
+ x2s("\\x1f", "\u001f", 0, 1);
+ x2s("a(?#....\\\\JJJJ)b", "ab", 0, 2);
+ x2s("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7);
+ x2s(".", "a", 0, 1);
+ ns(".", "");
+ x2s("..", "ab", 0, 2);
+ x2s("\\w", "e", 0, 1);
+ ns("\\W", "e");
+ x2s("\\s", " ", 0, 1);
+ x2s("\\S", "b", 0, 1);
+ x2s("\\d", "4", 0, 1);
+ ns("\\D", "4");
+ x2s("\\b", "z ", 0, 0);
+ x2s("\\b", " z", 1, 1);
+ x2s("\\B", "zz ", 1, 1);
+ x2s("\\B", "z ", 2, 2);
+ x2s("\\B", " z", 0, 0);
+ x2s("[ab]", "b", 0, 1);
+ ns("[ab]", "c");
+ x2s("[a-z]", "t", 0, 1);
+ ns("[^a]", "a");
+ x2s("[^a]", "\n", 0, 1);
+ x2s("[]]", "]", 0, 1);
+ ns("[^]]", "]");
+ x2s("[\\^]+", "0^^1", 1, 3);
+ x2s("[b-]", "b", 0, 1);
+ x2s("[b-]", "-", 0, 1);
+ x2s("[\\w]", "z", 0, 1);
+ ns("[\\w]", " ");
+ x2s("[\\W]", "b$", 1, 2);
+ x2s("[\\d]", "5", 0, 1);
+ ns("[\\d]", "e");
+ x2s("[\\D]", "t", 0, 1);
+ ns("[\\D]", "3");
+ x2s("[\\s]", " ", 0, 1);
+ ns("[\\s]", "a");
+ x2s("[\\S]", "b", 0, 1);
+ ns("[\\S]", " ");
+ x2s("[\\w\\d]", "2", 0, 1);
+ ns("[\\w\\d]", " ");
+ x2s("[[:upper:]]", "B", 0, 1);
+ x2s("[*[:xdigit:]+]", "+", 0, 1);
+ x2s("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7);
+ x2s("[*[:xdigit:]+]", "-@^+", 3, 4);
+ ns("[[:upper]]", "A");
+ x2s("[[:upper]]", ":", 0, 1);
+ x2s("[\\044-\\047]", "\046", 0, 1);
+ x2s("[\\x5a-\\x5c]", "\u005b", 0, 1);
+ x2s("[\\x6A-\\x6D]", "\u006c", 0, 1);
+ ns("[\\x6A-\\x6D]", "\u006e");
+ ns("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply");
+ x2s("[\\[]", "[", 0, 1);
+ x2s("[\\]]", "]", 0, 1);
+ x2s("[&]", "&", 0, 1);
+ x2s("[[ab]]", "b", 0, 1);
+ x2s("[[ab]c]", "c", 0, 1);
+ ns("[[^a]]", "a");
+ ns("[^[a]]", "a");
+ x2s("[[ab]&&bc]", "b", 0, 1);
+ ns("[[ab]&&bc]", "a");
+ ns("[[ab]&&bc]", "c");
+ x2s("[a-z&&b-y&&c-x]", "w", 0, 1);
+ ns("[^a-z&&b-y&&c-x]", "w");
+ x2s("[[^a&&a]&&a-z]", "b", 0, 1);
+ ns("[[^a&&a]&&a-z]", "a");
+ x2s("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1);
+ ns("[[^a-z&&bcdef]&&[^c-g]]", "c");
+ x2s("[^[^abc]&&[^cde]]", "c", 0, 1);
+ x2s("[^[^abc]&&[^cde]]", "e", 0, 1);
+ ns("[^[^abc]&&[^cde]]", "f");
+ x2s("[a-&&-a]", "-", 0, 1);
+ ns("[a\\-&&\\-a]", "&");
+ ns("\\wabc", " abc");
+ x2s("a\\Wbc", "a bc", 0, 4);
+ x2s("a.b.c", "aabbc", 0, 5);
+ x2s(".\\wb\\W..c", "abb bcc", 0, 7);
+ x2s("\\s\\wzzz", " zzzz", 0, 5);
+ x2s("aa.b", "aabb", 0, 4);
+ ns(".a", "ab");
+ x2s(".a", "aa", 0, 2);
+ x2s("^a", "a", 0, 1);
+ x2s("^a$", "a", 0, 1);
+ x2s("^\\w$", "a", 0, 1);
+ ns("^\\w$", " ");
+ x2s("^\\wab$", "zab", 0, 3);
+ x2s("^\\wabcdef$", "zabcdef", 0, 7);
+ x2s("^\\w...def$", "zabcdef", 0, 7);
+ x2s("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8);
+ x2s("\\A\\Z", "", 0, 0);
+ x2s("\\Axyz", "xyz", 0, 3);
+ x2s("xyz\\Z", "xyz", 0, 3);
+ x2s("xyz\\z", "xyz", 0, 3);
+ x2s("a\\Z", "a", 0, 1);
+ x2s("\\Gaz", "az", 0, 2);
+ ns("\\Gz", "bza");
+ ns("az\\G", "az");
+ ns("az\\A", "az");
+ ns("a\\Az", "az");
+ x2s("\\^\\$", "^$", 0, 2);
+ x2s("^x?y", "xy", 0, 2);
+ x2s("^(x?y)", "xy", 0, 2);
+ x2s("\\w", "_", 0, 1);
+ ns("\\W", "_");
+ x2s("(?=z)z", "z", 0, 1);
+ ns("(?=z).", "a");
+ x2s("(?!z)a", "a", 0, 1);
+ ns("(?!z)a", "z");
+ x2s("(?i:a)", "a", 0, 1);
+ x2s("(?i:a)", "A", 0, 1);
+ x2s("(?i:A)", "a", 0, 1);
+ ns("(?i:A)", "b");
+ x2s("(?i:[A-Z])", "a", 0, 1);
+ x2s("(?i:[f-m])", "H", 0, 1);
+ x2s("(?i:[f-m])", "h", 0, 1);
+ ns("(?i:[f-m])", "e");
+ x2s("(?i:[A-c])", "D", 0, 1);
+ ns("(?i:[^a-z])", "A");
+ ns("(?i:[^a-z])", "a");
+ x2s("(?i:[!-k])", "Z", 0, 1);
+ x2s("(?i:[!-k])", "7", 0, 1);
+ x2s("(?i:[T-}])", "b", 0, 1);
+ x2s("(?i:[T-}])", "{", 0, 1);
+ x2s("(?i:\\?a)", "?A", 0, 2);
+ x2s("(?i:\\*A)", "*a", 0, 2);
+ ns(".", "\n");
+ x2s("(?m:.)", "\n", 0, 1);
+ x2s("(?m:a.)", "a\n", 0, 2);
+ x2s("(?m:.b)", "a\nb", 1, 3);
+ x2s(".*abc", "dddabdd\nddabc", 8, 13);
+ x2s("(?m:.*abc)", "dddabddabc", 0, 10);
+ ns("(?i)(?-i)a", "A");
+ ns("(?i)(?-i:a)", "A");
+ x2s("a?", "", 0, 0);
+ x2s("a?", "b", 0, 0);
+ x2s("a?", "a", 0, 1);
+ x2s("a*", "", 0, 0);
+ x2s("a*", "a", 0, 1);
+ x2s("a*", "aaa", 0, 3);
+ x2s("a*", "baaaa", 0, 0);
+ ns("a+", "");
+ x2s("a+", "a", 0, 1);
+ x2s("a+", "aaaa", 0, 4);
+ x2s("a+", "aabbb", 0, 2);
+ x2s("a+", "baaaa", 1, 5);
+ x2s(".?", "", 0, 0);
+ x2s(".?", "f", 0, 1);
+ x2s(".?", "\n", 0, 0);
+ x2s(".*", "", 0, 0);
+ x2s(".*", "abcde", 0, 5);
+ x2s(".+", "z", 0, 1);
+ x2s(".+", "zdswer\n", 0, 6);
+ x2s("(.*)a\\1f", "babfbac", 0, 4);
+ x2s("(.*)a\\1f", "bacbabf", 3, 7);
+ x2s("((.*)a\\2f)", "bacbabf", 3, 7);
+ x2s("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23);
+ x2s("a|b", "a", 0, 1);
+ x2s("a|b", "b", 0, 1);
+ x2s("|a", "a", 0, 0);
+ x2s("(|a)", "a", 0, 0);
+ x2s("ab|bc", "ab", 0, 2);
+ x2s("ab|bc", "bc", 0, 2);
+ x2s("z(?:ab|bc)", "zbc", 0, 3);
+ x2s("a(?:ab|bc)c", "aabc", 0, 4);
+ x2s("ab|(?:ac|az)", "az", 0, 2);
+ x2s("a|b|c", "dc", 1, 2);
+ x2s("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2);
+ ns("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn");
+ x2s("a|^z", "ba", 1, 2);
+ x2s("a|^z", "za", 0, 1);
+ x2s("a|\\Gz", "bza", 2, 3);
+ x2s("a|\\Gz", "za", 0, 1);
+ x2s("a|\\Az", "bza", 2, 3);
+ x2s("a|\\Az", "za", 0, 1);
+ x2s("a|b\\Z", "ba", 1, 2);
+ x2s("a|b\\Z", "b", 0, 1);
+ x2s("a|b\\z", "ba", 1, 2);
+ x2s("a|b\\z", "b", 0, 1);
+ x2s("\\w|\\s", " ", 0, 1);
+ ns("\\w|\\w", " ");
+ x2s("\\w|%", "%", 0, 1);
+ x2s("\\w|[&$]", "&", 0, 1);
+ x2s("[b-d]|[^e-z]", "a", 0, 1);
+ x2s("(?:a|[c-f])|bz", "dz", 0, 1);
+ x2s("(?:a|[c-f])|bz", "bz", 0, 2);
+ x2s("abc|(?=zz)..f", "zzf", 0, 3);
+ x2s("abc|(?!zz)..f", "abf", 0, 3);
+ x2s("(?=za)..a|(?=zz)..a", "zza", 0, 3);
+ ns("(?>a|abd)c", "abdc");
+ x2s("(?>abd|a)c", "abdc", 0, 4);
+ x2s("a?|b", "a", 0, 1);
+ x2s("a?|b", "b", 0, 0);
+ x2s("a?|b", "", 0, 0);
+ x2s("a*|b", "aa", 0, 2);
+ x2s("a*|b*", "ba", 0, 0);
+ x2s("a*|b*", "ab", 0, 1);
+ x2s("a+|b*", "", 0, 0);
+ x2s("a+|b*", "bbb", 0, 3);
+ x2s("a+|b*", "abbb", 0, 1);
+ ns("a+|b+", "");
+ x2s("(a|b)?", "b", 0, 1);
+ x2s("(a|b)*", "ba", 0, 2);
+ x2s("(a|b)+", "bab", 0, 3);
+ x2s("(ab|ca)+", "caabbc", 0, 4);
+ x2s("(ab|ca)+", "aabca", 1, 5);
+ x2s("(ab|ca)+", "abzca", 0, 2);
+ x2s("(a|bab)+", "ababa", 0, 5);
+ x2s("(a|bab)+", "ba", 1, 2);
+ x2s("(a|bab)+", "baaaba", 1, 4);
+ x2s("(?:a|b)(?:a|b)", "ab", 0, 2);
+ x2s("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3);
+ x2s("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6);
+ x2s("(?:a+|b+){2}", "aaabbb", 0, 6);
+ x2s("h{0,}", "hhhh", 0, 4);
+ x2s("(?:a+|b+){1,2}", "aaabbb", 0, 6);
+ ns("ax{2}*a", "0axxxa1");
+ ns("a.{0,2}a", "0aXXXa0");
+ ns("a.{0,2}?a", "0aXXXa0");
+ ns("a.{0,2}?a", "0aXXXXa0");
+ x2s("^a{2,}?a$", "aaa", 0, 3);
+ x2s("^[a-z]{2,}?$", "aaa", 0, 3);
+ x2s("(?:a+|\\Ab*)cc", "cc", 0, 2);
+ ns("(?:a+|\\Ab*)cc", "abcc");
+ x2s("(?:^a+|b+)*c", "aabbbabc", 6, 8);
+ x2s("(?:^a+|b+)*c", "aabbbbc", 0, 7);
+ x2s("a|(?i)c", "C", 0, 1);
+ x2s("(?i)c|a", "C", 0, 1);
+ x2s("(?i)c|a", "A", 0, 1);
+ x2s("(?i:c)|a", "C", 0, 1);
+ ns("(?i:c)|a", "A");
+ x2s("[abc]?", "abc", 0, 1);
+ x2s("[abc]*", "abc", 0, 3);
+ x2s("[^abc]*", "abc", 0, 0);
+ ns("[^abc]+", "abc");
+ x2s("a??", "aaa", 0, 0);
+ x2s("ba??b", "bab", 0, 3);
+ x2s("a*?", "aaa", 0, 0);
+ x2s("ba*?", "baa", 0, 1);
+ x2s("ba*?b", "baab", 0, 4);
+ x2s("a+?", "aaa", 0, 1);
+ x2s("ba+?", "baa", 0, 2);
+ x2s("ba+?b", "baab", 0, 4);
+ x2s("(?:a?)??", "a", 0, 0);
+ x2s("(?:a??)?", "a", 0, 0);
+ x2s("(?:a?)+?", "aaa", 0, 1);
+ x2s("(?:a+)??", "aaa", 0, 0);
+ x2s("(?:a+)??b", "aaab", 0, 4);
+ x2s("(?:ab)?{2}", "", 0, 0);
+ x2s("(?:ab)?{2}", "ababa", 0, 4);
+ x2s("(?:ab)*{0}", "ababa", 0, 0);
+ x2s("(?:ab){3,}", "abababab", 0, 8);
+ ns("(?:ab){3,}", "abab");
+ x2s("(?:ab){2,4}", "ababab", 0, 6);
+ x2s("(?:ab){2,4}", "ababababab", 0, 8);
+ x2s("(?:ab){2,4}?", "ababababab", 0, 4);
+ x2s("(?:ab){,}", "ab{,}", 0, 5);
+ x2s("(?:abc)+?{2}", "abcabcabc", 0, 6);
+ x2s("(?:X*)(?i:xa)", "XXXa", 0, 4);
+ x2s("(d+)([^abc]z)", "dddz", 0, 4);
+ x2s("([^abc]*)([^abc]z)", "dddz", 0, 4);
+ x2s("(\\w+)(\\wz)", "dddz", 0, 4);
+ x3s("(a)", "a", 0, 1, 1);
+ x3s("(ab)", "ab", 0, 2, 1);
+ x2s("((ab))", "ab", 0, 2);
+ x3s("((ab))", "ab", 0, 2, 1);
+ x3s("((ab))", "ab", 0, 2, 2);
+ x3s("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20);
+ x3s("(ab)(cd)", "abcd", 0, 2, 1);
+ x3s("(ab)(cd)", "abcd", 2, 4, 2);
+ x3s("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3);
+ x3s("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4);
+ x2s("(^a)", "a", 0, 1);
+ x3s("(a)|(a)", "ba", 1, 2, 1);
+ x3s("(^a)|(a)", "ba", 1, 2, 2);
+ x3s("(a?)", "aaa", 0, 1, 1);
+ x3s("(a*)", "aaa", 0, 3, 1);
+ x3s("(a*)", "", 0, 0, 1);
+ x3s("(a+)", "aaaaaaa", 0, 7, 1);
+ x3s("(a+|b*)", "bbbaa", 0, 3, 1);
+ x3s("(a+|b?)", "bbbaa", 0, 1, 1);
+ x3s("(abc)?", "abc", 0, 3, 1);
+ x3s("(abc)*", "abc", 0, 3, 1);
+ x3s("(abc)+", "abc", 0, 3, 1);
+ x3s("(xyz|abc)+", "abc", 0, 3, 1);
+ x3s("([xyz][abc]|abc)+", "abc", 0, 3, 1);
+ x3s("((?i:abc))", "AbC", 0, 3, 1);
+ x2s("(abc)(?i:\\1)", "abcABC", 0, 6);
+ x3s("((?m:a.c))", "a\nc", 0, 3, 1);
+ x3s("((?=az)a)", "azb", 0, 1, 1);
+ x3s("abc|(.abd)", "zabd", 0, 4, 1);
+ x2s("(?:abc)|(ABC)", "abc", 0, 3);
+ x3s("(?i:(abc))|(zzz)", "ABC", 0, 3, 1);
+ x3s("a*(.)", "aaaaz", 4, 5, 1);
+ x3s("a*?(.)", "aaaaz", 0, 1, 1);
+ x3s("a*?(c)", "aaaac", 4, 5, 1);
+ x3s("[bcd]a*(.)", "caaaaz", 5, 6, 1);
+ x3s("(\\Abb)cc", "bbcc", 0, 2, 1);
+ ns("(\\Abb)cc", "zbbcc");
+ x3s("(^bb)cc", "bbcc", 0, 2, 1);
+ ns("(^bb)cc", "zbbcc");
+ x3s("cc(bb$)", "ccbb", 2, 4, 1);
+ ns("cc(bb$)", "ccbbb");
+ ns("(\\1)", "");
+ ns("\\1(a)", "aa");
+ ns("(a(b)\\1)\\2+", "ababb");
+ ns("(?:(?:\\1|z)(a))+$", "zaa");
+ x2s("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4);
+ x2s("(a)(?=\\1)", "aa", 0, 1);
+ ns("(a)$|\\1", "az");
+ x2s("(a)\\1", "aa", 0, 2);
+ ns("(a)\\1", "ab");
+ x2s("(a?)\\1", "aa", 0, 2);
+ x2s("(a??)\\1", "aa", 0, 0);
+ x2s("(a*)\\1", "aaaaa", 0, 4);
+ x3s("(a*)\\1", "aaaaa", 0, 2, 1);
+ x2s("a(b*)\\1", "abbbb", 0, 5);
+ x2s("a(b*)\\1", "ab", 0, 1);
+ x2s("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10);
+ x2s("(a*)(b*)\\2", "aaabbbb", 0, 7);
+ x2s("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8);
+ x3s("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7);
+ x2s("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6);
+ x2s("([a-d])\\1", "cc", 0, 2);
+ x2s("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6);
+ ns("(\\w\\d\\s)\\1", "f5 f5");
+ x2s("(who|[a-c]{3})\\1", "whowho", 0, 6);
+ x2s("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9);
+ x2s("(who|[a-c]{3})\\1", "cbccbc", 0, 6);
+ x2s("(^a)\\1", "aa", 0, 2);
+ ns("(^a)\\1", "baa");
+ ns("(a$)\\1", "aa");
+ ns("(ab\\Z)\\1", "ab");
+ x2s("(a*\\Z)\\1", "a", 1, 1);
+ x2s(".(a*\\Z)\\1", "ba", 1, 2);
+ x3s("(.(abc)\\2)", "zabcabc", 0, 7, 1);
+ x3s("(.(..\\d.)\\2)", "z12341234", 0, 9, 1);
+ x2s("((?i:az))\\1", "AzAz", 0, 4);
+ ns("((?i:az))\\1", "Azaz");
+ x2s("(?<=a)b", "ab", 1, 2);
+ ns("(?<=a)b", "bb");
+ x2s("(?<=a|b)b", "bb", 1, 2);
+ x2s("(?<=a|bc)b", "bcb", 2, 3);
+ x2s("(?<=a|bc)b", "ab", 1, 2);
+ x2s("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2);
+ x2s("(a)\\g<1>", "aa", 0, 2);
+ x2s("(?<!a)b", "cb", 1, 2);
+ ns("(?<!a)b", "ab");
+ x2s("(?<!a|bc)b", "bbb", 0, 1);
+ ns("(?<!a|bc)z", "bcz");
+ x2s("(?<name1>a)", "a", 0, 1);
+ x2s("(?<name_2>ab)\\g<name_2>", "abab", 0, 4);
+ x2s("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8);
+ x2s("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3);
+ x2s("(?<n>|a\\g<n>)+", "", 0, 0);
+ x2s("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6);
+ x3s("\\g<n>(?<n>.){0}", "X", 0, 1, 1);
+ x2s("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3);
+ x2s("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4);
+ x2s("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8);
+ x2s("(?<name1240>\\w+\\sx)a+\\k<name1240>", " fg xaaaaaaaafg x", 2, 18);
+ x3s("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1);
+ x2s("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3);
+ x2s("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2);
+ x2s("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0);
+ x2s("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9);
+ ns("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg");
+ x2s("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10);
+ x3s("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14);
+ x3s("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16);
+ x2s("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1);
+ x2s("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 0, 13);
+ x3s("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1);
+ x2s("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9);
+ x2s("\\g<1>|\\zEND(.a.)", "bac", 0, 3);
+ x3s("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1);
+ x2s("\\A(?:\\g<pon>|\\g<pan>|\\zEND (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7);
+ x2s("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4);
+ x2s("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5);
+ x2s("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10);
+ x2s("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5);
+ x2s("()*\\1", "", 0, 0);
+ x2s("(?:()|())*\\1\\2", "", 0, 0);
+ x3s("(?:\\1a|())*", "a", 0, 0, 1);
+ x2s("x((.)*)*x", "0x1x2x3", 1, 6);
+ x2s("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9);
+ x2s("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0);
+ x2s("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1);
+ if (Config.VANILLA) x2s("\\xED\\xF2", "\u00ed\u0148", 0, 2);
+ x2s("", "\u00a4\u02d8", 0, 0);
+ x2s("\u00a4\u02d8", "\u00a4\u02d8", 0, 2);
+ ns("\u00a4\u00a4", "\u00a4\u02d8");
+ x2s("\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00a6\u00a4\u00a6", 0, 4);
+ x2s("\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6);
+ x2s("\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142", "\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\ [...]
+ x2s("\u00a4\u02d8", "\u00a4\u00a4\u00a4\u02d8", 2, 4);
+ x2s("\u00a4\u00a4\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 2, 6);
+ if (Config.VANILLA) x2s("\\xca\\xb8", "\u0118\u00b8", 0, 2);
+ x2s(".", "\u00a4\u02d8", 0, 2);
+ x2s("..", "\u00a4\u00ab\u00a4\u00ad", 0, 4);
+ x2s("\\w", "\u00a4\u015e", 0, 2);
+ ns("\\W", "\u00a4\u02d8");
+ x2s("[\\W]", "\u00a4\u00a6$", 2, 3);
+ x2s("\\S", "\u00a4\u02dd", 0, 2);
+ x2s("\\S", "\u00b4\u00c1", 0, 2);
+ x2s("\\b", "\u00b5\u00a4 ", 0, 0);
+ x2s("\\b", " \u00a4\u0170", 1, 1);
+ x2s("\\B", "\u00a4\u00bb\u00a4\u02dd ", 2, 2);
+ x2s("\\B", "\u00a4\u00a6 ", 3, 3);
+ x2s("\\B", " \u00a4\u00a4", 0, 0);
+ x2s("[\u00a4\u017c\u00a4\u00c1]", "\u00a4\u00c1", 0, 2);
+ ns("[\u00a4\u0118\u00a4\u00cb]", "\u00a4\u011a");
+ x2s("[\u00a4\u00a6-\u00a4\u015e]", "\u00a4\u00a8", 0, 2);
+ ns("[^\u00a4\u00b1]", "\u00a4\u00b1");
+ x2s("[\\w]", "\u00a4\u00cd", 0, 2);
+ ns("[\\d]", "\u00a4\u0150");
+ x2s("[\\D]", "\u00a4\u010e", 0, 2);
+ ns("[\\s]", "\u00a4\u017b");
+ x2s("[\\S]", "\u00a4\u0158", 0, 2);
+ x2s("[\\w\\d]", "\u00a4\u010d", 0, 2);
+ x2s("[\\w\\d]", " \u00a4\u010d", 3, 5);
+ ns("\\w\u00b5\u00b4\u013d\u00d6", " \u00b5\u00b4\u013d\u00d6");
+ x2s("\u00b5\u00b4\\W\u013d\u00d6", "\u00b5\u00b4 \u013d\u00d6", 0, 5);
+ x2s("\u00a4\u02d8.\u00a4\u00a4.\u00a4\u00a6", "\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6", 0, 10);
+ x2s(".\\w\u00a4\u00a6\\W..\u00a4\u013e", "\u00a4\u00a8\u00a4\u00a6\u00a4\u00a6 \u00a4\u00a6\u00a4\u013e\u00a4\u013e", 0, 13);
+ x2s("\\s\\w\u00a4\u0142\u00a4\u0142\u00a4\u0142", " \u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142", 0, 9);
+ x2s("\u00a4\u02d8\u00a4\u02d8.\u00a4\u00b1", "\u00a4\u02d8\u00a4\u02d8\u00a4\u00b1\u00a4\u00b1", 0, 8);
+ ns(".\u00a4\u00a4", "\u00a4\u00a4\u00a4\u00a8");
+ x2s(".\u00a4\u015e", "\u00a4\u015e\u00a4\u015e", 0, 4);
+ x2s("^\u00a4\u02d8", "\u00a4\u02d8", 0, 2);
+ x2s("^\u00a4\u0155$", "\u00a4\u0155", 0, 2);
+ x2s("^\\w$", "\u00a4\u00cb", 0, 2);
+ x2s("^\\w\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142$", "z\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142", 0, 11);
+ x2s("^\\w...\u00a4\u00a6\u00a4\u00a8\u00a4\u015e$", "z\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6\u00a4\u00a8\u00a4\u015e", 0, 13);
+ x2s("\\w\\w\\s\\W\u00a4\u015e\u00a4\u015e\u00a4\u015e\\d", "a\u00a4\u015e \u00a4\u015e\u00a4\u015e\u00a4\u015e4", 0, 12);
+ x2s("\\A\u00a4\u017c\u00a4\u00c1\u00a4\u00c4", "\u00a4\u017c\u00a4\u00c1\u00a4\u00c4", 0, 6);
+ x2s("\u00a4\u0155\u00a4\u00e1\u00a4\u00e2\\Z", "\u00a4\u0155\u00a4\u00e1\u00a4\u00e2", 0, 6);
+ x2s("\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\\z", "\u00a4\u00ab\u00a4\u00ad\u00a4\u017b", 0, 6);
+ x2s("\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\\Z", "\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\n", 0, 6);
+ x2s("\\G\u00a4\u00dd\u00a4\u00d4", "\u00a4\u00dd\u00a4\u00d4", 0, 4);
+ ns("\\G\u00a4\u00a8", "\u00a4\u00a6\u00a4\u00a8\u00a4\u015e");
+ ns("\u00a4\u010c\u00a4\u0106\\G", "\u00a4\u010c\u00a4\u0106");
+ ns("\u00a4\u0162\u00a4\u00df\\A", "\u00a4\u0162\u00a4\u00df");
+ ns("\u00a4\u0162\\A\u00a4\u00df", "\u00a4\u0162\u00a4\u00df");
+ x2s("(?=\u00a4\u00bb)\u00a4\u00bb", "\u00a4\u00bb", 0, 2);
+ ns("(?=\u00a4\u00a6).", "\u00a4\u00a4");
+ x2s("(?!\u00a4\u00a6)\u00a4\u00ab", "\u00a4\u00ab", 0, 2);
+ ns("(?!\u00a4\u010c)\u00a4\u02d8", "\u00a4\u010c");
+ x2s("(?i:\u00a4\u02d8)", "\u00a4\u02d8", 0, 2);
+ x2s("(?i:\u00a4\u00d6\u00a4\u016e)", "\u00a4\u00d6\u00a4\u016e", 0, 4);
+ ns("(?i:\u00a4\u00a4)", "\u00a4\u00a6");
+ x2s("(?m:\u00a4\u010d.)", "\u00a4\u010d\n", 0, 3);
+ x2s("(?m:.\u00a4\u00e1)", "\u00a4\u0162\n\u00a4\u00e1", 2, 5);
+ x2s("\u00a4\u02d8?", "", 0, 0);
+ x2s("\u0118\u0143?", "\u02db\u02dd", 0, 0);
+ x2s("\u0118\u0143?", "\u0118\u0143", 0, 2);
+ x2s("\u00ce\u011a*", "", 0, 0);
+ x2s("\u00ce\u011a*", "\u00ce\u011a", 0, 2);
+ x2s("\u00bb\u0147*", "\u00bb\u0147\u00bb\u0147\u00bb\u0147", 0, 6);
+ x2s("\u00c7\u010e*", "\u013d\u017b\u00c7\u010e\u00c7\u010e\u00c7\u010e\u00c7\u010e", 0, 0);
+ ns("\u00bb\u0142+", "");
+ x2s("\u02db\u010e+", "\u02db\u010e", 0, 2);
+ x2s("\u00bb\u0163+", "\u00bb\u0163\u00bb\u0163\u00bb\u0163\u00bb\u0163", 0, 8);
+ x2s("\u00a4\u00a8+", "\u00a4\u00a8\u00a4\u00a8\u00a4\u00a6\u00a4\u00a6\u00a4\u00a6", 0, 4);
+ x2s("\u00a4\u00a6+", "\u00a4\u015e\u00a4\u00a6\u00a4\u00a6\u00a4\u00a6\u00a4\u00a6", 2, 10);
+ x2s(".?", "\u00a4\u017c", 0, 2);
+ x2s(".*", "\u00a4\u0143\u00a4\u00d4\u00a4\u00d7\u00a4\u00da", 0, 8);
+ x2s(".+", "\u00a4\u00ed", 0, 2);
+ x2s(".+", "\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8\u00a4\u00ab\n", 0, 8);
+ x2s("\u00a4\u02d8|\u00a4\u00a4", "\u00a4\u02d8", 0, 2);
+ x2s("\u00a4\u02d8|\u00a4\u00a4", "\u00a4\u00a4", 0, 2);
+ x2s("\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a4\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4", 0, 4);
+ x2s("\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a4\u00a4\u00a6", "\u00a4\u00a4\u00a4\u00a6", 0, 4);
+ x2s("\u00a4\u0148(?:\u00a4\u00ab\u00a4\u00ad|\u00a4\u00ad\u00a4\u017b)", "\u00a4\u0148\u00a4\u00ab\u00a4\u00ad", 0, 6);
+ x2s("\u00a4\u0148(?:\u00a4\u00ab\u00a4\u00ad|\u00a4\u00ad\u00a4\u017b)\u00a4\u00b1", "\u00a4\u0148\u00a4\u00ad\u00a4\u017b\u00a4\u00b1", 0, 8);
+ x2s("\u00a4\u02d8\u00a4\u00a4|(?:\u00a4\u02d8\u00a4\u00a6|\u00a4\u02d8\u00a4\u0148)", "\u00a4\u02d8\u00a4\u0148", 0, 4);
+ x2s("\u00a4\u02d8|\u00a4\u00a4|\u00a4\u00a6", "\u00a4\u00a8\u00a4\u00a6", 2, 4);
+ x2s("\u00a4\u02d8|\u00a4\u00a4|\u00a4\u00a6\u00a4\u00a8|\u00a4\u015e\u00a4\u00ab\u00a4\u00ad|\u00a4\u017b|\u00a4\u00b1\u00a4\u0142\u00a4\u00b5|\u00a4\u00b7\u00a4\u0105\u00a4\u00bb|\u00a4\u02dd|\u00a4\u017c\u00a4\u00c1|\u00a4\u00c4\u00a4\u0106\u00a4\u010c\u00a4\u0118\u00a4\u00cb|\u00a4\u011a\u00a4\u00cd", "\u00a4\u00b7\u00a4\u0105\u00a4\u00bb", 0, 6);
+ ns("\u00a4\u02d8|\u00a4\u00a4|\u00a4\u00a6\u00a4\u00a8|\u00a4\u015e\u00a4\u00ab\u00a4\u00ad|\u00a4\u017b|\u00a4\u00b1\u00a4\u0142\u00a4\u00b5|\u00a4\u00b7\u00a4\u0105\u00a4\u00bb|\u00a4\u02dd|\u00a4\u017c\u00a4\u00c1|\u00a4\u00c4\u00a4\u0106\u00a4\u010c\u00a4\u0118\u00a4\u00cb|\u00a4\u011a\u00a4\u00cd", "\u00a4\u0105\u00a4\u00bb");
+ x2s("\u00a4\u02d8|^\u00a4\u010f", "\u00a4\u00d6\u00a4\u02d8", 2, 4);
+ x2s("\u00a4\u02d8|^\u00a4\u0148", "\u00a4\u0148\u00a4\u02d8", 0, 2);
+ x2s("\u00b5\u00b4|\\G\u013d\u00d6", "\u00a4\u00b1\u013d\u00d6\u00b5\u00b4", 4, 6);
+ x2s("\u00b5\u00b4|\\G\u013d\u00d6", "\u013d\u00d6\u00b5\u00b4", 0, 2);
+ x2s("\u00b5\u00b4|\\A\u013d\u00d6", "b\u013d\u00d6\u00b5\u00b4", 3, 5);
+ x2s("\u00b5\u00b4|\\A\u013d\u00d6", "\u013d\u00d6", 0, 2);
+ x2s("\u00b5\u00b4|\u013d\u00d6\\Z", "\u013d\u00d6\u00b5\u00b4", 2, 4);
+ x2s("\u00b5\u00b4|\u013d\u00d6\\Z", "\u013d\u00d6", 0, 2);
+ x2s("\u00b5\u00b4|\u013d\u00d6\\Z", "\u013d\u00d6\n", 0, 2);
+ x2s("\u00b5\u00b4|\u013d\u00d6\\z", "\u013d\u00d6\u00b5\u00b4", 2, 4);
+ x2s("\u00b5\u00b4|\u013d\u00d6\\z", "\u013d\u00d6", 0, 2);
+ x2s("\\w|\\s", "\u00a4\u015e", 0, 2);
+ x2s("\\w|%", "%\u00a4\u015e", 0, 1);
+ x2s("\\w|[&$]", "\u00a4\u00a6&", 0, 2);
+ x2s("[\u00a4\u00a4-\u00a4\u00b1]", "\u00a4\u00a6", 0, 2);
+ x2s("[\u00a4\u00a4-\u00a4\u00b1]|[^\u00a4\u00ab-\u00a4\u0142]", "\u00a4\u02d8", 0, 2);
+ x2s("[\u00a4\u00a4-\u00a4\u00b1]|[^\u00a4\u00ab-\u00a4\u0142]", "\u00a4\u00ab", 0, 2);
+ x2s("[^\u00a4\u02d8]", "\n", 0, 1);
+ x2s("(?:\u00a4\u02d8|[\u00a4\u00a6-\u00a4\u00ad])|\u00a4\u00a4\u00a4\u0148", "\u00a4\u00a6\u00a4\u0148", 0, 2);
+ x2s("(?:\u00a4\u02d8|[\u00a4\u00a6-\u00a4\u00ad])|\u00a4\u00a4\u00a4\u0148", "\u00a4\u00a4\u00a4\u0148", 0, 4);
+ x2s("\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6|(?=\u00a4\u00b1\u00a4\u00b1)..\u00a4\u0170", "\u00a4\u00b1\u00a4\u00b1\u00a4\u0170", 0, 6);
+ x2s("\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6|(?!\u00a4\u00b1\u00a4\u00b1)..\u00a4\u0170", "\u00a4\u02d8\u00a4\u00a4\u00a4\u0170", 0, 6);
+ if (Config.VANILLA) x2s("(?=\u00a4\u0148\u00a4\u02d8)..\u00a4\u02d8|(?=\u00a4\u0148\u00a4\u0148)..\u00a4\u02d8", "\u00a4\u0148\u00a4\u0148\u00a4\u02d8", 0, 6);
+ x2s("(?<=\u00a4\u02d8|\u00a4\u00a4\u00a4\u00a6)\u00a4\u00a4", "\u00a4\u00a4\u00a4\u00a6\u00a4\u00a4", 4, 6);
+ ns("(?>\u00a4\u02d8|\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8)\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8\u00a4\u00a6");
+ x2s("(?>\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8|\u00a4\u02d8)\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8\u00a4\u00a6", 0, 8);
+ x2s("\u00a4\u02d8?|\u00a4\u00a4", "\u00a4\u02d8", 0, 2);
+ x2s("\u00a4\u02d8?|\u00a4\u00a4", "\u00a4\u00a4", 0, 0);
+ x2s("\u00a4\u02d8?|\u00a4\u00a4", "", 0, 0);
+ x2s("\u00a4\u02d8*|\u00a4\u00a4", "\u00a4\u02d8\u00a4\u02d8", 0, 4);
+ x2s("\u00a4\u02d8*|\u00a4\u00a4*", "\u00a4\u00a4\u00a4\u02d8", 0, 0);
+ x2s("\u00a4\u02d8*|\u00a4\u00a4*", "\u00a4\u02d8\u00a4\u00a4", 0, 2);
+ x2s("[a\u00a4\u02d8]*|\u00a4\u00a4*", "a\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 3);
+ x2s("\u00a4\u02d8+|\u00a4\u00a4*", "", 0, 0);
+ x2s("\u00a4\u02d8+|\u00a4\u00a4*", "\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 6);
+ x2s("\u00a4\u02d8+|\u00a4\u00a4*", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 2);
+ x2s("\u00a4\u02d8+|\u00a4\u00a4*", "a\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 0);
+ ns("\u00a4\u02d8+|\u00a4\u00a4+", "");
+ x2s("(\u00a4\u02d8|\u00a4\u00a4)?", "\u00a4\u00a4", 0, 2);
+ x2s("(\u00a4\u02d8|\u00a4\u00a4)*", "\u00a4\u00a4\u00a4\u02d8", 0, 4);
+ x2s("(\u00a4\u02d8|\u00a4\u00a4)+", "\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4", 0, 6);
+ x2s("(\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a6\u00a4\u02d8)+", "\u00a4\u00a6\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8", 0, 8);
+ x2s("(\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a6\u00a4\u00a8)+", "\u00a4\u00a6\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8", 4, 12);
+ x2s("(\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a6\u00a4\u02d8)+", "\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u02d8", 2, 10);
+ x2s("(\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a6\u00a4\u02d8)+", "\u00a4\u02d8\u00a4\u00a4\u00a4\u0148\u00a4\u00a6\u00a4\u02d8", 0, 4);
+ x2s("(\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a6\u00a4\u02d8)+", "$$zzzz\u00a4\u02d8\u00a4\u00a4\u00a4\u0148\u00a4\u00a6\u00a4\u02d8", 6, 10);
+ x2s("(\u00a4\u02d8|\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4)+", "\u00a4\u02d8\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4\u00a4\u02d8", 0, 10);
+ x2s("(\u00a4\u02d8|\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4)+", "\u00a4\u00a4\u00a4\u02d8", 2, 4);
+ x2s("(\u00a4\u02d8|\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4)+", "\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u02d8", 2, 8);
+ x2s("(?:\u00a4\u02d8|\u00a4\u00a4)(?:\u00a4\u02d8|\u00a4\u00a4)", "\u00a4\u02d8\u00a4\u00a4", 0, 4);
+ x2s("(?:\u00a4\u02d8*|\u00a4\u00a4*)(?:\u00a4\u02d8*|\u00a4\u00a4*)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 6);
+ x2s("(?:\u00a4\u02d8*|\u00a4\u00a4*)(?:\u00a4\u02d8+|\u00a4\u00a4+)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 12);
+ x2s("(?:\u00a4\u02d8+|\u00a4\u00a4+){2}", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 12);
+ x2s("(?:\u00a4\u02d8+|\u00a4\u00a4+){1,2}", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 12);
+ x2s("(?:\u00a4\u02d8+|\\A\u00a4\u00a4*)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00a6\u00a4\u00a6", 0, 4);
+ ns("(?:\u00a4\u02d8+|\\A\u00a4\u00a4*)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6");
+ x2s("(?:^\u00a4\u02d8+|\u00a4\u00a4+)*\u00a4\u00a6", "\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 12, 16);
+ x2s("(?:^\u00a4\u02d8+|\u00a4\u00a4+)*\u00a4\u00a6", "\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6", 0, 14);
+ x2s("\u00a4\u00a6{0,}", "\u00a4\u00a6\u00a4\u00a6\u00a4\u00a6\u00a4\u00a6", 0, 8);
+ x2s("\u00a4\u02d8|(?i)c", "C", 0, 1);
+ x2s("(?i)c|\u00a4\u02d8", "C", 0, 1);
+ x2s("(?i:\u00a4\u02d8)|a", "a", 0, 1);
+ ns("(?i:\u00a4\u02d8)|a", "A");
+ x2s("[\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]?", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 2);
+ x2s("[\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]*", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6);
+ x2s("[^\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]*", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 0);
+ ns("[^\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]+", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6");
+ x2s("\u00a4\u02d8??", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8", 0, 0);
+ x2s("\u00a4\u00a4\u00a4\u02d8??\u00a4\u00a4", "\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4", 0, 6);
+ x2s("\u00a4\u02d8*?", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8", 0, 0);
+ x2s("\u00a4\u00a4\u00a4\u02d8*?", "\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8", 0, 2);
+ x2s("\u00a4\u00a4\u00a4\u02d8*?\u00a4\u00a4", "\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4", 0, 8);
+ x2s("\u00a4\u02d8+?", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8", 0, 2);
+ x2s("\u00a4\u00a4\u00a4\u02d8+?", "\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8", 0, 4);
+ x2s("\u00a4\u00a4\u00a4\u02d8+?\u00a4\u00a4", "\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4", 0, 8);
+ x2s("(?:\u0139\u00b7?)??", "\u0139\u00b7", 0, 0);
+ x2s("(?:\u0139\u00b7??)?", "\u0139\u00b7", 0, 0);
+ x2s("(?:\u011a\u00b4?)+?", "\u011a\u00b4\u011a\u00b4\u011a\u00b4", 0, 2);
+ x2s("(?:\u00c9\u00f7+)??", "\u00c9\u00f7\u00c9\u00f7\u00c9\u00f7", 0, 0);
+ x2s("(?:\u0154\u0103+)??\u00c1\u00fa", "\u0154\u0103\u0154\u0103\u0154\u0103\u00c1\u00fa", 0, 8);
+ x2s("(?:\u00a4\u02d8\u00a4\u00a4)?{2}", "", 0, 0);
+ x2s("(?:\u00b5\u00b4\u013d\u00d6)?{2}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4", 0, 8);
+ x2s("(?:\u00b5\u00b4\u013d\u00d6)*{0}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4", 0, 0);
+ x2s("(?:\u00b5\u00b4\u013d\u00d6){3,}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6", 0, 16);
+ ns("(?:\u00b5\u00b4\u013d\u00d6){3,}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6");
+ x2s("(?:\u00b5\u00b4\u013d\u00d6){2,4}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6", 0, 12);
+ x2s("(?:\u00b5\u00b4\u013d\u00d6){2,4}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6", 0, 16);
+ x2s("(?:\u00b5\u00b4\u013d\u00d6){2,4}?", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6", 0, 8);
+ x2s("(?:\u00b5\u00b4\u013d\u00d6){,}", "\u00b5\u00b4\u013d\u00d6{,}", 0, 7);
+ x2s("(?:\u00a4\u00ab\u00a4\u00ad\u00a4\u017b)+?{2}", "\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00ab\u00a4\u00ad\u00a4\u017b", 0, 12);
+ x3s("(\u02db\u0110)", "\u02db\u0110", 0, 2, 1);
+ x3s("(\u02db\u0110\u017c\u013a)", "\u02db\u0110\u017c\u013a", 0, 4, 1);
+ x2s("((\u00bb\u0163\u00b4\u00d6))", "\u00bb\u0163\u00b4\u00d6", 0, 4);
+ x3s("((\u00c9\u00f7\u017c\u013a))", "\u00c9\u00f7\u017c\u013a", 0, 4, 1);
+ x3s("((\u015f\u0148\u0106\u00fc))", "\u015f\u0148\u0106\u00fc", 0, 4, 2);
+ x3s("((((((((((((((((((((\u00ce\u011a\u00bb\u0147))))))))))))))))))))", "\u00ce\u011a\u00bb\u0147", 0, 4, 20);
+ x3s("(\u00a4\u02d8\u00a4\u00a4)(\u00a4\u00a6\u00a4\u00a8)", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8", 0, 4, 1);
+ x3s("(\u00a4\u02d8\u00a4\u00a4)(\u00a4\u00a6\u00a4\u00a8)", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8", 4, 8, 2);
+ x3s("()(\u00a4\u02d8)\u00a4\u00a4\u00a4\u00a6(\u00a4\u00a8\u00a4\u015e\u00a4\u00ab)\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8\u00a4\u015e\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142", 6, 12, 3);
+ x3s("(()(\u00a4\u02d8)\u00a4\u00a4\u00a4\u00a6(\u00a4\u00a8\u00a4\u015e\u00a4\u00ab)\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142)", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8\u00a4\u015e\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142", 6, 12, 4);
+ x3s(".*(\u0104\u0150\u0104\u00a9)\u0104\u00f3\u02c7\u00a6\u0104\u0162(\u0104\u00f3()\u0104\u00b7\u0104\u013a\u0104\u017c)\u0104\u00a4\u0104\u00f3", "\u0104\u0150\u0104\u00a9\u0104\u00f3\u02c7\u00a6\u0104\u0162\u0104\u00f3\u0104\u00b7\u0104\u013a\u0104\u017c\u0104\u00a4\u0104\u00f3", 10, 18, 2);
+ x2s("(^\u00a4\u02d8)", "\u00a4\u02d8", 0, 2);
+ x3s("(\u00a4\u02d8)|(\u00a4\u02d8)", "\u00a4\u00a4\u00a4\u02d8", 2, 4, 1);
+ x3s("(^\u00a4\u02d8)|(\u00a4\u02d8)", "\u00a4\u00a4\u00a4\u02d8", 2, 4, 2);
+ x3s("(\u00a4\u02d8?)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8", 0, 2, 1);
+ x3s("(\u00a4\u0162*)", "\u00a4\u0162\u00a4\u0162\u00a4\u0162", 0, 6, 1);
+ x3s("(\u00a4\u010c*)", "", 0, 0, 1);
+ x3s("(\u00a4\u00eb+)", "\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb", 0, 14, 1);
+ x3s("(\u00a4\u0150+|\u00a4\u0158*)", "\u00a4\u0150\u00a4\u0150\u00a4\u0150\u00a4\u0158\u00a4\u0158", 0, 6, 1);
+ x3s("(\u00a4\u02d8+|\u00a4\u00a4?)", "\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8", 0, 2, 1);
+ x3s("(\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6)?", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6, 1);
+ x3s("(\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6)*", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6, 1);
+ x3s("(\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6)+", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6, 1);
+ x3s("(\u00a4\u00b5\u00a4\u00b7\u00a4\u0105|\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6)+", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6, 1);
+ x3s("([\u00a4\u0118\u00a4\u00cb\u00a4\u011a][\u00a4\u00ab\u00a4\u00ad\u00a4\u017b]|\u00a4\u00ab\u00a4\u00ad\u00a4\u017b)+", "\u00a4\u00ab\u00a4\u00ad\u00a4\u017b", 0, 6, 1);
+ x3s("((?i:\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6))", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6, 1);
+ x3s("((?m:\u00a4\u02d8.\u00a4\u00a6))", "\u00a4\u02d8\n\u00a4\u00a6", 0, 5, 1);
+ x3s("((?=\u00a4\u02d8\u00a4\u00f3)\u00a4\u02d8)", "\u00a4\u02d8\u00a4\u00f3\u00a4\u00a4", 0, 2, 1);
+ x3s("\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6|(.\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8)", "\u00a4\u00f3\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8", 0, 8, 1);
+ x3s("\u00a4\u02d8*(.)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00f3", 8, 10, 1);
+ x3s("\u00a4\u02d8*?(.)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00f3", 0, 2, 1);
+ x3s("\u00a4\u02d8*?(\u00a4\u00f3)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00f3", 8, 10, 1);
+ x3s("[\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8]\u00a4\u02d8*(.)", "\u00a4\u00a8\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00f3", 10, 12, 1);
+ x3s("(\\A\u00a4\u00a4\u00a4\u00a4)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6", 0, 4, 1);
+ ns("(\\A\u00a4\u00a4\u00a4\u00a4)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00f3\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6");
+ x3s("(^\u00a4\u00a4\u00a4\u00a4)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6", 0, 4, 1);
+ ns("(^\u00a4\u00a4\u00a4\u00a4)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00f3\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6");
+ x3s("\u00a4\u00ed\u00a4\u00ed(\u00a4\u00eb\u00a4\u00eb$)", "\u00a4\u00ed\u00a4\u00ed\u00a4\u00eb\u00a4\u00eb", 4, 8, 1);
+ ns("\u00a4\u00ed\u00a4\u00ed(\u00a4\u00eb\u00a4\u00eb$)", "\u00a4\u00ed\u00a4\u00ed\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb");
+ x2s("(\u011a\u00b5)\\1", "\u011a\u00b5\u011a\u00b5", 0, 4);
+ ns("(\u011a\u00b5)\\1", "\u011a\u00b5\u00c9\u0111");
+ x2s("(\u00b6\u0151?)\\1", "\u00b6\u0151\u00b6\u0151", 0, 4);
+ x2s("(\u00b6\u0151??)\\1", "\u00b6\u0151\u00b6\u0151", 0, 0);
+ x2s("(\u00b6\u0151*)\\1", "\u00b6\u0151\u00b6\u0151\u00b6\u0151\u00b6\u0151\u00b6\u0151", 0, 8);
+ x3s("(\u00b6\u0151*)\\1", "\u00b6\u0151\u00b6\u0151\u00b6\u0151\u00b6\u0151\u00b6\u0151", 0, 4, 1);
+ x2s("\u00a4\u02d8(\u00a4\u00a4*)\\1", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 10);
+ x2s("\u00a4\u02d8(\u00a4\u00a4*)\\1", "\u00a4\u02d8\u00a4\u00a4", 0, 2);
+ x2s("(\u00a4\u02d8*)(\u00a4\u00a4*)\\1\\2", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4", 0, 20);
+ x2s("(\u00a4\u02d8*)(\u00a4\u00a4*)\\2", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 14);
+ x3s("(\u00a4\u02d8*)(\u00a4\u00a4*)\\2", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 6, 10, 2);
+ x2s("(((((((\u00a4\u00dd*)\u00a4\u00da))))))\u00a4\u00d4\\7", "\u00a4\u00dd\u00a4\u00dd\u00a4\u00dd\u00a4\u00da\u00a4\u00d4\u00a4\u00dd\u00a4\u00dd\u00a4\u00dd", 0, 16);
+ x3s("(((((((\u00a4\u00dd*)\u00a4\u00da))))))\u00a4\u00d4\\7", "\u00a4\u00dd\u00a4\u00dd\u00a4\u00dd\u00a4\u00da\u00a4\u00d4\u00a4\u00dd\u00a4\u00dd\u00a4\u00dd", 0, 6, 7);
+ x2s("(\u00a4\u010e)(\u00a4\u0147)(\u00a4\u0150)\\2\\1\\3", "\u00a4\u010e\u00a4\u0147\u00a4\u0150\u00a4\u0147\u00a4\u010e\u00a4\u0150", 0, 12);
+ x2s("([\u00a4\u00ad-\u00a4\u00b1])\\1", "\u00a4\u017b\u00a4\u017b", 0, 4);
+ x2s("(\\w\\d\\s)\\1", "\u00a4\u02d85 \u00a4\u02d85 ", 0, 8);
+ ns("(\\w\\d\\s)\\1", "\u00a4\u02d85 \u00a4\u02d85");
+ x2s("(\u0102\u017b\u02c7\u00a9|[\u00a4\u02d8-\u00a4\u00a6]{3})\\1", "\u0102\u017b\u02c7\u00a9\u0102\u017b\u02c7\u00a9", 0, 8);
+ x2s("...(\u0102\u017b\u02c7\u00a9|[\u00a4\u02d8-\u00a4\u00a6]{3})\\1", "\u00a4\u02d8a\u00a4\u02d8\u0102\u017b\u02c7\u00a9\u0102\u017b\u02c7\u00a9", 0, 13);
+ x2s("(\u0102\u017b\u02c7\u00a9|[\u00a4\u02d8-\u00a4\u00a6]{3})\\1", "\u00a4\u00a6\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6\u00a4\u00a4\u00a4\u00a6", 0, 12);
+ x2s("(^\u00a4\u0142)\\1", "\u00a4\u0142\u00a4\u0142", 0, 4);
+ ns("(^\u00a4\u0155)\\1", "\u00a4\u00e1\u00a4\u0155\u00a4\u0155");
+ ns("(\u00a4\u02d8$)\\1", "\u00a4\u02d8\u00a4\u02d8");
+ ns("(\u00a4\u02d8\u00a4\u00a4\\Z)\\1", "\u00a4\u02d8\u00a4\u00a4");
+ x2s("(\u00a4\u02d8*\\Z)\\1", "\u00a4\u02d8", 2, 2);
+ x2s(".(\u00a4\u02d8*\\Z)\\1", "\u00a4\u00a4\u00a4\u02d8", 2, 4);
+ x3s("(.(\u00a4\u00e4\u00a4\u00a4\u00a4\u0107)\\2)", "z\u00a4\u00e4\u00a4\u00a4\u00a4\u0107\u00a4\u00e4\u00a4\u00a4\u00a4\u0107", 0, 13, 1);
+ x3s("(.(..\\d.)\\2)", "\u00a4\u02d812341234", 0, 10, 1);
+ x2s("((?i:\u00a4\u02d8v\u00a4\u015f))\\1", "\u00a4\u02d8v\u00a4\u015f\u00a4\u02d8v\u00a4\u015f", 0, 10);
+ x2s("(?<\u00b6\u0148\u00a4\u00ab>\u0118\u0143|\\(\\g<\u00b6\u0148\u00a4\u00ab>\\))", "((((((\u0118\u0143))))))", 0, 14);
+ x2s("\\A(?:\\g<\u00b0\u00a4_1>|\\g<\u00b1\u013e_2>|\\z\u02dd\u015e\u00ce\u00bb (?<\u00b0\u00a4_1>\u00b4\u0143|\u013d\u00ab\\g<\u00b1\u013e_2>\u013d\u00ab)(?<\u00b1\u013e_2>\u015f\u00df|\u0118\u00ee\u00bb\u00a7\\g<\u00b0\u00a4_1>\u0118\u00ee\u00bb\u00a7))$", "\u0118\u00ee\u00bb\u00a7\u013d\u00ab\u0118\u00ee\u00bb\u00a7\u013d\u00ab\u015f\u00df\u013d\u00ab\u0118\u00ee\u00bb\u00a7\u013d\u00ab\u0118\u00ee\u00bb\u00a7", 0, 26);
+ x2s("[[\u00a4\u0147\u00a4\u0150]]", "\u00a4\u0150", 0, 2);
+ x2s("[[\u00a4\u00a4\u00a4\u015e\u00a4\u00a6]\u00a4\u00ab]", "\u00a4\u00ab", 0, 2);
+ ns("[[^\u00a4\u02d8]]", "\u00a4\u02d8");
+ ns("[^[\u00a4\u02d8]]", "\u00a4\u02d8");
+ x2s("[^[^\u00a4\u02d8]]", "\u00a4\u02d8", 0, 2);
+ x2s("[[\u00a4\u00ab\u00a4\u00ad\u00a4\u017b]&&\u00a4\u00ad\u00a4\u017b]", "\u00a4\u017b", 0, 2);
+ ns("[[\u00a4\u00ab\u00a4\u00ad\u00a4\u017b]&&\u00a4\u00ad\u00a4\u017b]", "\u00a4\u00ab");
+ ns("[[\u00a4\u00ab\u00a4\u00ad\u00a4\u017b]&&\u00a4\u00ad\u00a4\u017b]", "\u00a4\u00b1");
+ x2s("[\u00a4\u02d8-\u00a4\u00f3&&\u00a4\u00a4-\u00a4\u0148&&\u00a4\u00a6-\u00a4\u0144]", "\u00a4\u0144", 0, 2);
+ ns("[^\u00a4\u02d8-\u00a4\u00f3&&\u00a4\u00a4-\u00a4\u0148&&\u00a4\u00a6-\u00a4\u0144]", "\u00a4\u0144");
+ x2s("[[^\u00a4\u02d8&&\u00a4\u02d8]&&\u00a4\u02d8-\u00a4\u00f3]", "\u00a4\u00a4", 0, 2);
+ ns("[[^\u00a4\u02d8&&\u00a4\u02d8]&&\u00a4\u02d8-\u00a4\u00f3]", "\u00a4\u02d8");
+ x2s("[[^\u00a4\u02d8-\u00a4\u00f3&&\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]&&[^\u00a4\u00a6-\u00a4\u00ab]]", "\u00a4\u00ad", 0, 2);
+ ns("[[^\u00a4\u02d8-\u00a4\u00f3&&\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]&&[^\u00a4\u00a6-\u00a4\u00ab]]", "\u00a4\u00a4");
+ x2s("[^[^\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]]", "\u00a4\u00a6", 0, 2);
+ x2s("[^[^\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]]", "\u00a4\u00a8", 0, 2);
+ ns("[^[^\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]]", "\u00a4\u00ab");
+ x2s("[\u00a4\u02d8-&&-\u00a4\u02d8]", "-", 0, 1);
+ x2s("[^[^a-z\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^bcdefg\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]q-w]", "\u00a4\u00a8", 0, 2);
+ x2s("[^[^a-z\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^bcdefg\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]g-w]", "f", 0, 1);
+ x2s("[^[^a-z\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^bcdefg\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]g-w]", "g", 0, 1);
+ ns("[^[^a-z\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^bcdefg\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]g-w]", "2");
+ x2s("a<b>\u0104\u0110\u02c7\u013d\u0104\u00b8\u0104\u00e7\u0104\u00f3\u00a4\u00ce\u0104\u0154\u0104\u00a6\u0104\u00f3\u0104\u00ed\u02c7\u013d\u0104\u00c9<\\/b>", "a<b>\u0104\u0110\u02c7\u013d\u0104\u00b8\u0104\u00e7\u0104\u00f3\u00a4\u00ce\u0104\u0154\u0104\u00a6\u0104\u00f3\u0104\u00ed\u02c7\u013d\u0104\u00c9</b>", 0, 32);
+ x2s(".<b>\u0104\u0110\u02c7\u013d\u0104\u00b8\u0104\u00e7\u0104\u00f3\u00a4\u00ce\u0104\u0154\u0104\u00a6\u0104\u00f3\u0104\u00ed\u02c7\u013d\u0104\u00c9<\\/b>", "a<b>\u0104\u0110\u02c7\u013d\u0104\u00b8\u0104\u00e7\u0104\u00f3\u00a4\u00ce\u0104\u0154\u0104\u00a6\u0104\u00f3\u0104\u00ed\u02c7\u013d\u0104\u00c9</b>", 0, 32);
+ }
+
+ public static void main(String[] args) throws Throwable{
+ new TestC().run();
+ }
+}
diff --git a/test/org/joni/test/TestCornerCases.java b/test/org/joni/test/TestCornerCases.java
new file mode 100644
index 0000000..b2b8377
--- /dev/null
+++ b/test/org/joni/test/TestCornerCases.java
@@ -0,0 +1,62 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.test;
+
+import org.joni.Config;
+import org.joni.Option;
+import org.joni.Regex;
+import org.joni.Region;
+import org.joni.Syntax;
+import org.jcodings.Encoding;
+import org.jcodings.specific.ASCIIEncoding;
+
+public class TestCornerCases extends Test {
+ public int option() {
+ return Option.DEFAULT;
+ }
+
+ public Encoding encoding() {
+ return ASCIIEncoding.INSTANCE;
+ }
+
+ public String testEncoding() {
+ return "cp1250";
+ }
+
+ public Syntax syntax() {
+ return Syntax.DEFAULT;
+ }
+
+ public void test() {
+ byte[] reg = "l.".getBytes();
+ byte[] str = "hello,lo".getBytes();
+
+ Regex p = new Regex(reg,0,reg.length,Option.DEFAULT,ASCIIEncoding.INSTANCE,Syntax.DEFAULT);
+ int result = p.matcher(str, 0, str.length).search(3, 0, Option.NONE);
+ if(result != 3) {
+ Config.log.println("FAIL: /l./ 'hello,lo' - with reverse, 3,0");
+ nfail++;
+ }
+ }
+
+ public static void main(String[] args) throws Throwable{
+ new TestCornerCases().run();
+ }
+}
diff --git a/test/org/joni/test/TestCrnl.java b/test/org/joni/test/TestCrnl.java
new file mode 100644
index 0000000..a8ef6a6
--- /dev/null
+++ b/test/org/joni/test/TestCrnl.java
@@ -0,0 +1,86 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.test;
+
+import org.joni.Config;
+import org.joni.Option;
+import org.joni.Syntax;
+import org.jcodings.Encoding;
+import org.jcodings.specific.ASCIIEncoding;
+
+public class TestCrnl extends Test {
+
+ public int option() {
+ return Option.DEFAULT;
+ }
+
+ public Encoding encoding() {
+ return ASCIIEncoding.INSTANCE;
+ }
+
+ public String testEncoding() {
+ return "ascii";
+ }
+
+ public Syntax syntax() {
+ return Syntax.DEFAULT;
+ }
+
+ public void test() {
+ x2s("", "\r\n", 0, 0);
+ x2s(".", "\r\n", 0, 1);
+ ns("..", "\r\n");
+ x2s("^", "\r\n", 0, 0);
+ x2s("\\n^", "\r\nf", 1, 2);
+ x2s("\\n^a", "\r\na", 1, 3);
+ x2s("$", "\r\n", 0, 0);
+ x2s("T$", "T\r\n", 0, 1);
+ x2s("T$", "T\raT\r\n", 3, 4);
+ x2s("\\z", "\r\n", 2, 2);
+ ns("a\\z", "a\r\n");
+ x2s("\\Z", "\r\n", 0, 0);
+ x2s("\\Z", "\r\na", 3, 3);
+ x2s("\\Z", "\r\n\r\n\n", 4, 4);
+ x2s("\\Z", "\r\n\r\nX", 5, 5);
+ x2s("a\\Z", "a\r\n", 0, 1);
+ x2s("aaaaaaaaaaaaaaa\\Z", "aaaaaaaaaaaaaaa\r\n", 0, 15);
+ x2s("a|$", "b\r\n", 1, 1);
+ x2s("$|b", "\rb", 1, 2);
+ x2s("a$|ab$", "\r\nab\r\n", 2, 4);
+
+ x2s("a|\\Z", "b\r\n", 1, 1);
+ x2s("\\Z|b", "\rb", 1, 2);
+ x2s("a\\Z|ab\\Z", "\r\nab\r\n", 2, 4);
+ x2s("(?=a$).", "a\r\n", 0, 1);
+ ns("(?=a$).", "a\r");
+ x2s("(?!a$)..", "a\r", 0, 2);
+ x2s("(?<=a$).\\n", "a\r\n", 1, 3);
+ ns("(?<!a$).\\n", "a\r\n");
+ x2s("(?=a\\Z).", "a\r\n", 0, 1);
+ ns("(?=a\\Z).", "a\r");
+ x2s("(?!a\\Z)..", "a\r", 0, 2);
+
+ if (nfail > 0 || nerror > 0) Config.err.println("make sure to enable USE_CRNL_AS_LINE_TERMINATOR");
+ }
+
+ public static void main(String[] args) throws Throwable{
+ new TestCrnl().run();
+ }
+}
diff --git a/test/org/joni/test/TestJoni.java b/test/org/joni/test/TestJoni.java
new file mode 100644
index 0000000..6fb14cc
--- /dev/null
+++ b/test/org/joni/test/TestJoni.java
@@ -0,0 +1,37 @@
+package org.joni.test;
+
+import junit.framework.TestCase;
+
+public class TestJoni extends TestCase {
+
+ private Test testa;
+ private Test testc;
+ private Test testu;
+
+ protected void setUp() {
+ testa = new TestA();
+ testc = new TestC();
+ testu = new TestU();
+ }
+
+ protected void tearDown() {
+ }
+
+ private void testJoniTest(Test test) {
+ test.run();
+ assertEquals(test.nerror, 0);
+ assertEquals(test.nfail, 0);
+ }
+
+ public void testAscii() {
+ testJoniTest(testa);
+ }
+
+ public void testEUCJP() {
+ testJoniTest(testc);
+ }
+
+ public void testUnicode() {
+ testJoniTest(testu);
+ }
+}
diff --git a/test/org/joni/test/TestU.java b/test/org/joni/test/TestU.java
new file mode 100644
index 0000000..ee12227
--- /dev/null
+++ b/test/org/joni/test/TestU.java
@@ -0,0 +1,770 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.test;
+
+import org.joni.Option;
+import org.joni.Syntax;
+import org.jcodings.Encoding;
+import org.jcodings.specific.UTF16BEEncoding;
+
+public class TestU extends Test {
+
+ public int option() {
+ return Option.DEFAULT;
+ }
+
+ public Encoding encoding() {
+ return UTF16BEEncoding.INSTANCE;
+ }
+
+ public String testEncoding() {
+ return "iso-8859-1";
+ }
+
+ public Syntax syntax() {
+ return Syntax.DEFAULT;
+ }
+
+ private int ulen(byte[]bytes) {
+ return encoding().strByteLengthNull(bytes, 0, bytes.length);
+ }
+
+ private String uconv(byte []bytes, int len) {
+ StringBuilder sb = new StringBuilder();
+
+ for (int i = 0; i < len; i += 2) {
+ int c = bytes[i] & 0xff;
+ // sb.append(String.format("\\%03o", c));
+ if (c == 0) {
+ c = bytes[i+1] & 0xff;
+ if (c < 0x20 || c >= 0x7f || c == 0x5c || c == 0x22) {
+ sb.append(String.format("\\%03o", c));
+ } else {
+ sb.append(new String(new byte[]{(byte)c}));
+ }
+ } else {
+ sb.append(String.format("\\%03o", c));
+ c = bytes[i+1] & 0xff;
+ sb.append(String.format("\\%03o", c));
+ }
+ }
+
+ return sb.toString();
+ }
+
+ protected String repr(byte[]bytes) {
+ return uconv(bytes, ulen(bytes));
+ }
+
+ protected int length(byte[]bytes) {
+ return ulen(bytes);
+ }
+
+ public void test() {
+ x2s("\000\000", "\000\000", 0, 0);
+ x2s("\000^\000\000", "\000\000", 0, 0);
+ x2s("\000$\000\000", "\000\000", 0, 0);
+ x2s("\000\134\000G\000\000", "\000\000", 0, 0);
+ x2s("\000\134\000A\000\000", "\000\000", 0, 0);
+ x2s("\000\134\000Z\000\000", "\000\000", 0, 0);
+ x2s("\000\134\000z\000\000", "\000\000", 0, 0);
+ x2s("\000^\000$\000\000", "\000\000", 0, 0);
+ x2s("\000\134\000c\000a\000\000", "\000\001\000\000", 0, 2);
+ x2s("\000\134\000C\000-\000b\000\000", "\000\002\000\000", 0, 2);
+ x2s("\000\134\000c\000\134\000\134\000\000", "\000\034\000\000", 0, 2);
+ x2s("\000q\000[\000\134\000c\000\134\000\134\000]\000\000", "\000q\000\034\000\000", 0, 4);
+ x2s("\000\000", "\000a\000\000", 0, 0);
+ x2s("\000a\000\000", "\000a\000\000", 0, 2);
+ x2s("\000\134\000x\0000\0000\000\134\000x\0006\0001\000\000", "\000a\000\000", 0, 2);
+ x2s("\000a\000a\000\000", "\000a\000a\000\000", 0, 4);
+ x2s("\000a\000a\000a\000\000", "\000a\000a\000a\000\000", 0, 6);
+ x2s("\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000\000", "\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000\000", 0, 70);
+ x2s("\000a\000b\000\000", "\000a\000b\000\000", 0, 4);
+ x2s("\000b\000\000", "\000a\000b\000\000", 2, 4);
+ x2s("\000b\000c\000\000", "\000a\000b\000c\000\000", 2, 6);
+ x2s("\000(\000?\000i\000:\000#\000R\000E\000T\000#\000)\000\000", "\000#\000I\000N\000S\000#\000#\000R\000E\000T\000#\000\000", 10, 20);
+ x2s("\000\134\0000\0000\0000\000\134\0001\0007\000\000", "\000\017\000\000", 0, 2);
+ x2s("\000\134\000x\0000\0000\000\134\000x\0001\000f\000\000", "\000\037\000\000", 0, 2);
+ x2s("\000a\000(\000?\000#\000.\000.\000.\000.\000\134\000\134\000J\000J\000J\000J\000)\000b\000\000", "\000a\000b\000\000", 0, 4);
+ x2s("\000(\000?\000x\000)\000 \000 \000G\000 \000(\000o\000 \000O\000(\000?\000-\000x\000)\000o\000O\000)\000 \000g\000 \000L\000\000", "\000G\000o\000O\000o\000O\000g\000L\000e\000\000", 0, 14);
+ x2s("\000.\000\000", "\000a\000\000", 0, 2);
+ ns("\000.\000\000", "\000\000");
+ x2s("\000.\000.\000\000", "\000a\000b\000\000", 0, 4);
+ x2s("\000\134\000w\000\000", "\000e\000\000", 0, 2);
+ ns("\000\134\000W\000\000", "\000e\000\000");
+ x2s("\000\134\000s\000\000", "\000 \000\000", 0, 2);
+ x2s("\000\134\000S\000\000", "\000b\000\000", 0, 2);
+ x2s("\000\134\000d\000\000", "\0004\000\000", 0, 2);
+ ns("\000\134\000D\000\000", "\0004\000\000");
+ x2s("\000\134\000b\000\000", "\000z\000 \000\000", 0, 0);
+ x2s("\000\134\000b\000\000", "\000 \000z\000\000", 2, 2);
+ x2s("\000\134\000B\000\000", "\000z\000z\000 \000\000", 2, 2);
+ x2s("\000\134\000B\000\000", "\000z\000 \000\000", 4, 4);
+ x2s("\000\134\000B\000\000", "\000 \000z\000\000", 0, 0);
+ x2s("\000[\000a\000b\000]\000\000", "\000b\000\000", 0, 2);
+ ns("\000[\000a\000b\000]\000\000", "\000c\000\000");
+ x2s("\000[\000a\000-\000z\000]\000\000", "\000t\000\000", 0, 2);
+ ns("\000[\000^\000a\000]\000\000", "\000a\000\000");
+ x2s("\000[\000^\000a\000]\000\000", "\000\012\000\000", 0, 2);
+ x2s("\000[\000]\000]\000\000", "\000]\000\000", 0, 2);
+ ns("\000[\000^\000]\000]\000\000", "\000]\000\000");
+ x2s("\000[\000\134\000^\000]\000+\000\000", "\0000\000^\000^\0001\000\000", 2, 6);
+ x2s("\000[\000b\000-\000]\000\000", "\000b\000\000", 0, 2);
+ x2s("\000[\000b\000-\000]\000\000", "\000-\000\000", 0, 2);
+ x2s("\000[\000\134\000w\000]\000\000", "\000z\000\000", 0, 2);
+ ns("\000[\000\134\000w\000]\000\000", "\000 \000\000");
+ x2s("\000[\000\134\000W\000]\000\000", "\000b\000$\000\000", 2, 4);
+ x2s("\000[\000\134\000d\000]\000\000", "\0005\000\000", 0, 2);
+ ns("\000[\000\134\000d\000]\000\000", "\000e\000\000");
+ x2s("\000[\000\134\000D\000]\000\000", "\000t\000\000", 0, 2);
+ ns("\000[\000\134\000D\000]\000\000", "\0003\000\000");
+ x2s("\000[\000\134\000s\000]\000\000", "\000 \000\000", 0, 2);
+ ns("\000[\000\134\000s\000]\000\000", "\000a\000\000");
+ x2s("\000[\000\134\000S\000]\000\000", "\000b\000\000", 0, 2);
+ ns("\000[\000\134\000S\000]\000\000", "\000 \000\000");
+ x2s("\000[\000\134\000w\000\134\000d\000]\000\000", "\0002\000\000", 0, 2);
+ ns("\000[\000\134\000w\000\134\000d\000]\000\000", "\000 \000\000");
+ x2s("\000[\000[\000:\000u\000p\000p\000e\000r\000:\000]\000]\000\000", "\000B\000\000", 0, 2);
+ x2s("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000+\000\000", 0, 2);
+ x2s("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000G\000H\000I\000K\000K\000-\0009\000+\000*\000\000", 12, 14);
+ x2s("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000-\000@\000^\000+\000\000", 6, 8);
+ ns("\000[\000[\000:\000u\000p\000p\000e\000r\000]\000]\000\000", "\000A\000\000");
+ x2s("\000[\000[\000:\000u\000p\000p\000e\000r\000]\000]\000\000", "\000:\000\000", 0, 2);
+ x2s("\000[\000\134\0000\0000\0000\000\134\0000\0004\0004\000-\000\134\0000\0000\0000\000\134\0000\0004\0007\000]\000\000", "\000&\000\000", 0, 2);
+ x2s("\000[\000\134\000x\0000\0000\000\134\000x\0005\000a\000-\000\134\000x\0000\0000\000\134\000x\0005\000c\000]\000\000", "\000[\000\000", 0, 2);
+ x2s("\000[\000\134\000x\0000\0000\000\134\000x\0006\000A\000-\000\134\000x\0000\0000\000\134\000x\0006\000D\000]\000\000", "\000l\000\000", 0, 2);
+ ns("\000[\000\134\000x\0000\0000\000\134\000x\0006\000A\000-\000\134\000x\0000\0000\000\134\000x\0006\000D\000]\000\000", "\000n\000\000");
+ ns("\000^\000[\0000\000-\0009\000A\000-\000F\000]\000+\000 \0000\000+\000 \000U\000N\000D\000E\000F\000 \000\000", "\0007\0005\000F\000 \0000\0000\0000\0000\0000\0000\0000\0000\000 \000S\000E\000C\000T\0001\0004\000A\000 \000n\000o\000t\000y\000p\000e\000 \000(\000)\000 \000 \000 \000 \000E\000x\000t\000e\000r\000n\000a\000l\000 \000 \000 \000 \000|\000 \000_\000r\000b\000_\000a\000p\000p\000l\000y\000\000");
+ x2s("\000[\000\134\000[\000]\000\000", "\000[\000\000", 0, 2);
+ x2s("\000[\000\134\000]\000]\000\000", "\000]\000\000", 0, 2);
+ x2s("\000[\000&\000]\000\000", "\000&\000\000", 0, 2);
+ x2s("\000[\000[\000a\000b\000]\000]\000\000", "\000b\000\000", 0, 2);
+ x2s("\000[\000[\000a\000b\000]\000c\000]\000\000", "\000c\000\000", 0, 2);
+ ns("\000[\000[\000^\000a\000]\000]\000\000", "\000a\000\000");
+ ns("\000[\000^\000[\000a\000]\000]\000\000", "\000a\000\000");
+ x2s("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000b\000\000", 0, 2);
+ ns("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000a\000\000");
+ ns("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000c\000\000");
+ x2s("\000[\000a\000-\000z\000&\000&\000b\000-\000y\000&\000&\000c\000-\000x\000]\000\000", "\000w\000\000", 0, 2);
+ ns("\000[\000^\000a\000-\000z\000&\000&\000b\000-\000y\000&\000&\000c\000-\000x\000]\000\000", "\000w\000\000");
+ x2s("\000[\000[\000^\000a\000&\000&\000a\000]\000&\000&\000a\000-\000z\000]\000\000", "\000b\000\000", 0, 2);
+ ns("\000[\000[\000^\000a\000&\000&\000a\000]\000&\000&\000a\000-\000z\000]\000\000", "\000a\000\000");
+ x2s("\000[\000[\000^\000a\000-\000z\000&\000&\000b\000c\000d\000e\000f\000]\000&\000&\000[\000^\000c\000-\000g\000]\000]\000\000", "\000h\000\000", 0, 2);
+ ns("\000[\000[\000^\000a\000-\000z\000&\000&\000b\000c\000d\000e\000f\000]\000&\000&\000[\000^\000c\000-\000g\000]\000]\000\000", "\000c\000\000");
+ x2s("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000c\000\000", 0, 2);
+ x2s("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000e\000\000", 0, 2);
+ ns("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000f\000\000");
+ x2s("\000[\000a\000-\000&\000&\000-\000a\000]\000\000", "\000-\000\000", 0, 2);
+ ns("\000[\000a\000\134\000-\000&\000&\000\134\000-\000a\000]\000\000", "\000&\000\000");
+ ns("\000\134\000w\000a\000b\000c\000\000", "\000 \000a\000b\000c\000\000");
+ x2s("\000a\000\134\000W\000b\000c\000\000", "\000a\000 \000b\000c\000\000", 0, 8);
+ x2s("\000a\000.\000b\000.\000c\000\000", "\000a\000a\000b\000b\000c\000\000", 0, 10);
+ x2s("\000.\000\134\000w\000b\000\134\000W\000.\000.\000c\000\000", "\000a\000b\000b\000 \000b\000c\000c\000\000", 0, 14);
+ x2s("\000\134\000s\000\134\000w\000z\000z\000z\000\000", "\000 \000z\000z\000z\000z\000\000", 0, 10);
+ x2s("\000a\000a\000.\000b\000\000", "\000a\000a\000b\000b\000\000", 0, 8);
+ ns("\000.\000a\000\000", "\000a\000b\000\000");
+ x2s("\000.\000a\000\000", "\000a\000a\000\000", 0, 4);
+ x2s("\000^\000a\000\000", "\000a\000\000", 0, 2);
+ x2s("\000^\000a\000$\000\000", "\000a\000\000", 0, 2);
+ x2s("\000^\000\134\000w\000$\000\000", "\000a\000\000", 0, 2);
+ ns("\000^\000\134\000w\000$\000\000", "\000 \000\000");
+ x2s("\000^\000\134\000w\000a\000b\000$\000\000", "\000z\000a\000b\000\000", 0, 6);
+ x2s("\000^\000\134\000w\000a\000b\000c\000d\000e\000f\000$\000\000", "\000z\000a\000b\000c\000d\000e\000f\000\000", 0, 14);
+ x2s("\000^\000\134\000w\000.\000.\000.\000d\000e\000f\000$\000\000", "\000z\000a\000b\000c\000d\000e\000f\000\000", 0, 14);
+ x2s("\000\134\000w\000\134\000w\000\134\000s\000\134\000W\000a\000a\000a\000\134\000d\000\000", "\000a\000a\000 \000 \000a\000a\000a\0004\000\000", 0, 16);
+ x2s("\000\134\000A\000\134\000Z\000\000", "\000\000", 0, 0);
+ x2s("\000\134\000A\000x\000y\000z\000\000", "\000x\000y\000z\000\000", 0, 6);
+ x2s("\000x\000y\000z\000\134\000Z\000\000", "\000x\000y\000z\000\000", 0, 6);
+ x2s("\000x\000y\000z\000\134\000z\000\000", "\000x\000y\000z\000\000", 0, 6);
+ x2s("\000a\000\134\000Z\000\000", "\000a\000\000", 0, 2);
+ x2s("\000\134\000G\000a\000z\000\000", "\000a\000z\000\000", 0, 4);
+ ns("\000\134\000G\000z\000\000", "\000b\000z\000a\000\000");
+ ns("\000a\000z\000\134\000G\000\000", "\000a\000z\000\000");
+ ns("\000a\000z\000\134\000A\000\000", "\000a\000z\000\000");
+ ns("\000a\000\134\000A\000z\000\000", "\000a\000z\000\000");
+ x2s("\000\134\000^\000\134\000$\000\000", "\000^\000$\000\000", 0, 4);
+ x2s("\000^\000x\000?\000y\000\000", "\000x\000y\000\000", 0, 4);
+ x2s("\000^\000(\000x\000?\000y\000)\000\000", "\000x\000y\000\000", 0, 4);
+ x2s("\000\134\000w\000\000", "\000_\000\000", 0, 2);
+ ns("\000\134\000W\000\000", "\000_\000\000");
+ x2s("\000(\000?\000=\000z\000)\000z\000\000", "\000z\000\000", 0, 2);
+ ns("\000(\000?\000=\000z\000)\000.\000\000", "\000a\000\000");
+ x2s("\000(\000?\000!\000z\000)\000a\000\000", "\000a\000\000", 0, 2);
+ ns("\000(\000?\000!\000z\000)\000a\000\000", "\000z\000\000");
+ x2s("\000(\000?\000i\000:\000a\000)\000\000", "\000a\000\000", 0, 2);
+ x2s("\000(\000?\000i\000:\000a\000)\000\000", "\000A\000\000", 0, 2);
+ x2s("\000(\000?\000i\000:\000A\000)\000\000", "\000a\000\000", 0, 2);
+ ns("\000(\000?\000i\000:\000A\000)\000\000", "\000b\000\000");
+ x2s("\000(\000?\000i\000:\000[\000A\000-\000Z\000]\000)\000\000", "\000a\000\000", 0, 2);
+ x2s("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000H\000\000", 0, 2);
+ x2s("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000h\000\000", 0, 2);
+ ns("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000e\000\000");
+ x2s("\000(\000?\000i\000:\000[\000A\000-\000c\000]\000)\000\000", "\000D\000\000", 0, 2);
+ ns("\000(\000?\000i\000:\000[\000^\000a\000-\000z\000]\000)\000\000", "\000A\000\000");
+ ns("\000(\000?\000i\000:\000[\000^\000a\000-\000z\000]\000)\000\000", "\000a\000\000");
+ x2s("\000(\000?\000i\000:\000[\000!\000-\000k\000]\000)\000\000", "\000Z\000\000", 0, 2);
+ x2s("\000(\000?\000i\000:\000[\000!\000-\000k\000]\000)\000\000", "\0007\000\000", 0, 2);
+ x2s("\000(\000?\000i\000:\000[\000T\000-\000}\000]\000)\000\000", "\000b\000\000", 0, 2);
+ x2s("\000(\000?\000i\000:\000[\000T\000-\000}\000]\000)\000\000", "\000{\000\000", 0, 2);
+ x2s("\000(\000?\000i\000:\000\134\000?\000a\000)\000\000", "\000?\000A\000\000", 0, 4);
+ x2s("\000(\000?\000i\000:\000\134\000*\000A\000)\000\000", "\000*\000a\000\000", 0, 4);
+ ns("\000.\000\000", "\000\012\000\000");
+ x2s("\000(\000?\000m\000:\000.\000)\000\000", "\000\012\000\000", 0, 2);
+ x2s("\000(\000?\000m\000:\000a\000.\000)\000\000", "\000a\000\012\000\000", 0, 4);
+ x2s("\000(\000?\000m\000:\000.\000b\000)\000\000", "\000a\000\012\000b\000\000", 2, 6);
+ x2s("\000.\000*\000a\000b\000c\000\000", "\000d\000d\000d\000a\000b\000d\000d\000\012\000d\000d\000a\000b\000c\000\000", 16, 26);
+ x2s("\000(\000?\000m\000:\000.\000*\000a\000b\000c\000)\000\000", "\000d\000d\000d\000a\000b\000d\000d\000a\000b\000c\000\000", 0, 20);
+ ns("\000(\000?\000i\000)\000(\000?\000-\000i\000)\000a\000\000", "\000A\000\000");
+ ns("\000(\000?\000i\000)\000(\000?\000-\000i\000:\000a\000)\000\000", "\000A\000\000");
+ x2s("\000a\000?\000\000", "\000\000", 0, 0);
+ x2s("\000a\000?\000\000", "\000b\000\000", 0, 0);
+ x2s("\000a\000?\000\000", "\000a\000\000", 0, 2);
+ x2s("\000a\000*\000\000", "\000\000", 0, 0);
+ x2s("\000a\000*\000\000", "\000a\000\000", 0, 2);
+ x2s("\000a\000*\000\000", "\000a\000a\000a\000\000", 0, 6);
+ x2s("\000a\000*\000\000", "\000b\000a\000a\000a\000a\000\000", 0, 0);
+ ns("\000a\000+\000\000", "\000\000");
+ x2s("\000a\000+\000\000", "\000a\000\000", 0, 2);
+ x2s("\000a\000+\000\000", "\000a\000a\000a\000a\000\000", 0, 8);
+ x2s("\000a\000+\000\000", "\000a\000a\000b\000b\000b\000\000", 0, 4);
+ x2s("\000a\000+\000\000", "\000b\000a\000a\000a\000a\000\000", 2, 10);
+ x2s("\000.\000?\000\000", "\000\000", 0, 0);
+ x2s("\000.\000?\000\000", "\000f\000\000", 0, 2);
+ x2s("\000.\000?\000\000", "\000\012\000\000", 0, 0);
+ x2s("\000.\000*\000\000", "\000\000", 0, 0);
+ x2s("\000.\000*\000\000", "\000a\000b\000c\000d\000e\000\000", 0, 10);
+ x2s("\000.\000+\000\000", "\000z\000\000", 0, 2);
+ x2s("\000.\000+\000\000", "\000z\000d\000s\000w\000e\000r\000\012\000\000", 0, 12);
+ x2s("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000b\000f\000b\000a\000c\000\000", 0, 8);
+ x2s("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000c\000b\000a\000b\000f\000\000", 6, 14);
+ x2s("\000(\000(\000.\000*\000)\000a\000\134\0002\000f\000)\000\000", "\000b\000a\000c\000b\000a\000b\000f\000\000", 6, 14);
+ x2s("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000c\000z\000z\000z\000z\000z\000z\000\012\000b\000a\000z\000z\000\012\000z\000z\000z\000z\000b\000a\000b\000f\000\000", 38, 46);
+ x2s("\000a\000|\000b\000\000", "\000a\000\000", 0, 2);
+ x2s("\000a\000|\000b\000\000", "\000b\000\000", 0, 2);
+ x2s("\000|\000a\000\000", "\000a\000\000", 0, 0);
+ x2s("\000(\000|\000a\000)\000\000", "\000a\000\000", 0, 0);
+ x2s("\000a\000b\000|\000b\000c\000\000", "\000a\000b\000\000", 0, 4);
+ x2s("\000a\000b\000|\000b\000c\000\000", "\000b\000c\000\000", 0, 4);
+ x2s("\000z\000(\000?\000:\000a\000b\000|\000b\000c\000)\000\000", "\000z\000b\000c\000\000", 0, 6);
+ x2s("\000a\000(\000?\000:\000a\000b\000|\000b\000c\000)\000c\000\000", "\000a\000a\000b\000c\000\000", 0, 8);
+ x2s("\000a\000b\000|\000(\000?\000:\000a\000c\000|\000a\000z\000)\000\000", "\000a\000z\000\000", 0, 4);
+ x2s("\000a\000|\000b\000|\000c\000\000", "\000d\000c\000\000", 2, 4);
+ x2s("\000a\000|\000b\000|\000c\000d\000|\000e\000f\000g\000|\000h\000|\000i\000j\000k\000|\000l\000m\000n\000|\000o\000|\000p\000q\000|\000r\000s\000t\000u\000v\000w\000x\000|\000y\000z\000\000", "\000p\000q\000r\000\000", 0, 4);
+ ns("\000a\000|\000b\000|\000c\000d\000|\000e\000f\000g\000|\000h\000|\000i\000j\000k\000|\000l\000m\000n\000|\000o\000|\000p\000q\000|\000r\000s\000t\000u\000v\000w\000x\000|\000y\000z\000\000", "\000m\000n\000\000");
+ x2s("\000a\000|\000^\000z\000\000", "\000b\000a\000\000", 2, 4);
+ x2s("\000a\000|\000^\000z\000\000", "\000z\000a\000\000", 0, 2);
+ x2s("\000a\000|\000\134\000G\000z\000\000", "\000b\000z\000a\000\000", 4, 6);
+ x2s("\000a\000|\000\134\000G\000z\000\000", "\000z\000a\000\000", 0, 2);
+ x2s("\000a\000|\000\134\000A\000z\000\000", "\000b\000z\000a\000\000", 4, 6);
+ x2s("\000a\000|\000\134\000A\000z\000\000", "\000z\000a\000\000", 0, 2);
+ x2s("\000a\000|\000b\000\134\000Z\000\000", "\000b\000a\000\000", 2, 4);
+ x2s("\000a\000|\000b\000\134\000Z\000\000", "\000b\000\000", 0, 2);
+ x2s("\000a\000|\000b\000\134\000z\000\000", "\000b\000a\000\000", 2, 4);
+ x2s("\000a\000|\000b\000\134\000z\000\000", "\000b\000\000", 0, 2);
+ x2s("\000\134\000w\000|\000\134\000s\000\000", "\000 \000\000", 0, 2);
+ ns("\000\134\000w\000|\000\134\000w\000\000", "\000 \000\000");
+ x2s("\000\134\000w\000|\000%\000\000", "\000%\000\000", 0, 2);
+ x2s("\000\134\000w\000|\000[\000&\000$\000]\000\000", "\000&\000\000", 0, 2);
+ x2s("\000[\000b\000-\000d\000]\000|\000[\000^\000e\000-\000z\000]\000\000", "\000a\000\000", 0, 2);
+ x2s("\000(\000?\000:\000a\000|\000[\000c\000-\000f\000]\000)\000|\000b\000z\000\000", "\000d\000z\000\000", 0, 2);
+ x2s("\000(\000?\000:\000a\000|\000[\000c\000-\000f\000]\000)\000|\000b\000z\000\000", "\000b\000z\000\000", 0, 4);
+ x2s("\000a\000b\000c\000|\000(\000?\000=\000z\000z\000)\000.\000.\000f\000\000", "\000z\000z\000f\000\000", 0, 6);
+ x2s("\000a\000b\000c\000|\000(\000?\000!\000z\000z\000)\000.\000.\000f\000\000", "\000a\000b\000f\000\000", 0, 6);
+ x2s("\000(\000?\000=\000z\000a\000)\000.\000.\000a\000|\000(\000?\000=\000z\000z\000)\000.\000.\000a\000\000", "\000z\000z\000a\000\000", 0, 6);
+ ns("\000(\000?\000>\000a\000|\000a\000b\000d\000)\000c\000\000", "\000a\000b\000d\000c\000\000");
+ x2s("\000(\000?\000>\000a\000b\000d\000|\000a\000)\000c\000\000", "\000a\000b\000d\000c\000\000", 0, 8);
+ x2s("\000a\000?\000|\000b\000\000", "\000a\000\000", 0, 2);
+ x2s("\000a\000?\000|\000b\000\000", "\000b\000\000", 0, 0);
+ x2s("\000a\000?\000|\000b\000\000", "\000\000", 0, 0);
+ x2s("\000a\000*\000|\000b\000\000", "\000a\000a\000\000", 0, 4);
+ x2s("\000a\000*\000|\000b\000*\000\000", "\000b\000a\000\000", 0, 0);
+ x2s("\000a\000*\000|\000b\000*\000\000", "\000a\000b\000\000", 0, 2);
+ x2s("\000a\000+\000|\000b\000*\000\000", "\000\000", 0, 0);
+ x2s("\000a\000+\000|\000b\000*\000\000", "\000b\000b\000b\000\000", 0, 6);
+ x2s("\000a\000+\000|\000b\000*\000\000", "\000a\000b\000b\000b\000\000", 0, 2);
+ ns("\000a\000+\000|\000b\000+\000\000", "\000\000");
+ x2s("\000(\000a\000|\000b\000)\000?\000\000", "\000b\000\000", 0, 2);
+ x2s("\000(\000a\000|\000b\000)\000*\000\000", "\000b\000a\000\000", 0, 4);
+ x2s("\000(\000a\000|\000b\000)\000+\000\000", "\000b\000a\000b\000\000", 0, 6);
+ x2s("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000c\000a\000a\000b\000b\000c\000\000", 0, 8);
+ x2s("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000a\000a\000b\000c\000a\000\000", 2, 10);
+ x2s("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000a\000b\000z\000c\000a\000\000", 0, 4);
+ x2s("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 10);
+ x2s("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000b\000a\000\000", 2, 4);
+ x2s("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000b\000a\000a\000a\000b\000a\000\000", 2, 8);
+ x2s("\000(\000?\000:\000a\000|\000b\000)\000(\000?\000:\000a\000|\000b\000)\000\000", "\000a\000b\000\000", 0, 4);
+ x2s("\000(\000?\000:\000a\000*\000|\000b\000*\000)\000(\000?\000:\000a\000*\000|\000b\000*\000)\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 6);
+ x2s("\000(\000?\000:\000a\000*\000|\000b\000*\000)\000(\000?\000:\000a\000+\000|\000b\000+\000)\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12);
+ x2s("\000(\000?\000:\000a\000+\000|\000b\000+\000)\000{\0002\000}\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12);
+ x2s("\000h\000{\0000\000,\000}\000\000", "\000h\000h\000h\000h\000\000", 0, 8);
+ x2s("\000(\000?\000:\000a\000+\000|\000b\000+\000)\000{\0001\000,\0002\000}\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12);
+ ns("\000a\000x\000{\0002\000}\000*\000a\000\000", "\0000\000a\000x\000x\000x\000a\0001\000\000");
+ ns("\000a\000.\000{\0000\000,\0002\000}\000a\000\000", "\0000\000a\000X\000X\000X\000a\0000\000\000");
+ ns("\000a\000.\000{\0000\000,\0002\000}\000?\000a\000\000", "\0000\000a\000X\000X\000X\000a\0000\000\000");
+ ns("\000a\000.\000{\0000\000,\0002\000}\000?\000a\000\000", "\0000\000a\000X\000X\000X\000X\000a\0000\000\000");
+ x2s("\000^\000a\000{\0002\000,\000}\000?\000a\000$\000\000", "\000a\000a\000a\000\000", 0, 6);
+ x2s("\000^\000[\000a\000-\000z\000]\000{\0002\000,\000}\000?\000$\000\000", "\000a\000a\000a\000\000", 0, 6);
+ x2s("\000(\000?\000:\000a\000+\000|\000\134\000A\000b\000*\000)\000c\000c\000\000", "\000c\000c\000\000", 0, 4);
+ ns("\000(\000?\000:\000a\000+\000|\000\134\000A\000b\000*\000)\000c\000c\000\000", "\000a\000b\000c\000c\000\000");
+ x2s("\000(\000?\000:\000^\000a\000+\000|\000b\000+\000)\000*\000c\000\000", "\000a\000a\000b\000b\000b\000a\000b\000c\000\000", 12, 16);
+ x2s("\000(\000?\000:\000^\000a\000+\000|\000b\000+\000)\000*\000c\000\000", "\000a\000a\000b\000b\000b\000b\000c\000\000", 0, 14);
+ x2s("\000a\000|\000(\000?\000i\000)\000c\000\000", "\000C\000\000", 0, 2);
+ x2s("\000(\000?\000i\000)\000c\000|\000a\000\000", "\000C\000\000", 0, 2);
+ x2s("\000(\000?\000i\000)\000c\000|\000a\000\000", "\000A\000\000", 0, 2);
+ x2s("\000(\000?\000i\000:\000c\000)\000|\000a\000\000", "\000C\000\000", 0, 2);
+ ns("\000(\000?\000i\000:\000c\000)\000|\000a\000\000", "\000A\000\000");
+ x2s("\000[\000a\000b\000c\000]\000?\000\000", "\000a\000b\000c\000\000", 0, 2);
+ x2s("\000[\000a\000b\000c\000]\000*\000\000", "\000a\000b\000c\000\000", 0, 6);
+ x2s("\000[\000^\000a\000b\000c\000]\000*\000\000", "\000a\000b\000c\000\000", 0, 0);
+ ns("\000[\000^\000a\000b\000c\000]\000+\000\000", "\000a\000b\000c\000\000");
+ x2s("\000a\000?\000?\000\000", "\000a\000a\000a\000\000", 0, 0);
+ x2s("\000b\000a\000?\000?\000b\000\000", "\000b\000a\000b\000\000", 0, 6);
+ x2s("\000a\000*\000?\000\000", "\000a\000a\000a\000\000", 0, 0);
+ x2s("\000b\000a\000*\000?\000\000", "\000b\000a\000a\000\000", 0, 2);
+ x2s("\000b\000a\000*\000?\000b\000\000", "\000b\000a\000a\000b\000\000", 0, 8);
+ x2s("\000a\000+\000?\000\000", "\000a\000a\000a\000\000", 0, 2);
+ x2s("\000b\000a\000+\000?\000\000", "\000b\000a\000a\000\000", 0, 4);
+ x2s("\000b\000a\000+\000?\000b\000\000", "\000b\000a\000a\000b\000\000", 0, 8);
+ x2s("\000(\000?\000:\000a\000?\000)\000?\000?\000\000", "\000a\000\000", 0, 0);
+ x2s("\000(\000?\000:\000a\000?\000?\000)\000?\000\000", "\000a\000\000", 0, 0);
+ x2s("\000(\000?\000:\000a\000?\000)\000+\000?\000\000", "\000a\000a\000a\000\000", 0, 2);
+ x2s("\000(\000?\000:\000a\000+\000)\000?\000?\000\000", "\000a\000a\000a\000\000", 0, 0);
+ x2s("\000(\000?\000:\000a\000+\000)\000?\000?\000b\000\000", "\000a\000a\000a\000b\000\000", 0, 8);
+ x2s("\000(\000?\000:\000a\000b\000)\000?\000{\0002\000}\000\000", "\000\000", 0, 0);
+ x2s("\000(\000?\000:\000a\000b\000)\000?\000{\0002\000}\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 8);
+ x2s("\000(\000?\000:\000a\000b\000)\000*\000{\0000\000}\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 0);
+ x2s("\000(\000?\000:\000a\000b\000)\000{\0003\000,\000}\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 16);
+ ns("\000(\000?\000:\000a\000b\000)\000{\0003\000,\000}\000\000", "\000a\000b\000a\000b\000\000");
+ x2s("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000\000", "\000a\000b\000a\000b\000a\000b\000\000", 0, 12);
+ x2s("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 16);
+ x2s("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000?\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 8);
+ x2s("\000(\000?\000:\000a\000b\000)\000{\000,\000}\000\000", "\000a\000b\000{\000,\000}\000\000", 0, 10);
+ x2s("\000(\000?\000:\000a\000b\000c\000)\000+\000?\000{\0002\000}\000\000", "\000a\000b\000c\000a\000b\000c\000a\000b\000c\000\000", 0, 12);
+ x2s("\000(\000?\000:\000X\000*\000)\000(\000?\000i\000:\000x\000a\000)\000\000", "\000X\000X\000X\000a\000\000", 0, 8);
+ x2s("\000(\000d\000+\000)\000(\000[\000^\000a\000b\000c\000]\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8);
+ x2s("\000(\000[\000^\000a\000b\000c\000]\000*\000)\000(\000[\000^\000a\000b\000c\000]\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8);
+ x2s("\000(\000\134\000w\000+\000)\000(\000\134\000w\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8);
+ x3s("\000(\000a\000)\000\000", "\000a\000\000", 0, 2, 1);
+ x3s("\000(\000a\000b\000)\000\000", "\000a\000b\000\000", 0, 4, 1);
+ x2s("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4);
+ x3s("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 1);
+ x3s("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 2);
+ x3s("\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000a\000b\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 20);
+ x3s("\000(\000a\000b\000)\000(\000c\000d\000)\000\000", "\000a\000b\000c\000d\000\000", 0, 4, 1);
+ x3s("\000(\000a\000b\000)\000(\000c\000d\000)\000\000", "\000a\000b\000c\000d\000\000", 4, 8, 2);
+ x3s("\000(\000)\000(\000a\000)\000b\000c\000(\000d\000e\000f\000)\000g\000h\000i\000j\000k\000\000", "\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000\000", 6, 12, 3);
+ x3s("\000(\000(\000)\000(\000a\000)\000b\000c\000(\000d\000e\000f\000)\000g\000h\000i\000j\000k\000)\000\000", "\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000\000", 6, 12, 4);
+ x2s("\000(\000^\000a\000)\000\000", "\000a\000\000", 0, 2);
+ x3s("\000(\000a\000)\000|\000(\000a\000)\000\000", "\000b\000a\000\000", 2, 4, 1);
+ x3s("\000(\000^\000a\000)\000|\000(\000a\000)\000\000", "\000b\000a\000\000", 2, 4, 2);
+ x3s("\000(\000a\000?\000)\000\000", "\000a\000a\000a\000\000", 0, 2, 1);
+ x3s("\000(\000a\000*\000)\000\000", "\000a\000a\000a\000\000", 0, 6, 1);
+ x3s("\000(\000a\000*\000)\000\000", "\000\000", 0, 0, 1);
+ x3s("\000(\000a\000+\000)\000\000", "\000a\000a\000a\000a\000a\000a\000a\000\000", 0, 14, 1);
+ x3s("\000(\000a\000+\000|\000b\000*\000)\000\000", "\000b\000b\000b\000a\000a\000\000", 0, 6, 1);
+ x3s("\000(\000a\000+\000|\000b\000?\000)\000\000", "\000b\000b\000b\000a\000a\000\000", 0, 2, 1);
+ x3s("\000(\000a\000b\000c\000)\000?\000\000", "\000a\000b\000c\000\000", 0, 6, 1);
+ x3s("\000(\000a\000b\000c\000)\000*\000\000", "\000a\000b\000c\000\000", 0, 6, 1);
+ x3s("\000(\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1);
+ x3s("\000(\000x\000y\000z\000|\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1);
+ x3s("\000(\000[\000x\000y\000z\000]\000[\000a\000b\000c\000]\000|\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1);
+ x3s("\000(\000(\000?\000i\000:\000a\000b\000c\000)\000)\000\000", "\000A\000b\000C\000\000", 0, 6, 1);
+ x2s("\000(\000a\000b\000c\000)\000(\000?\000i\000:\000\134\0001\000)\000\000", "\000a\000b\000c\000A\000B\000C\000\000", 0, 12);
+ x3s("\000(\000(\000?\000m\000:\000a\000.\000c\000)\000)\000\000", "\000a\000\012\000c\000\000", 0, 6, 1);
+ x3s("\000(\000(\000?\000=\000a\000z\000)\000a\000)\000\000", "\000a\000z\000b\000\000", 0, 2, 1);
+ x3s("\000a\000b\000c\000|\000(\000.\000a\000b\000d\000)\000\000", "\000z\000a\000b\000d\000\000", 0, 8, 1);
+ x2s("\000(\000?\000:\000a\000b\000c\000)\000|\000(\000A\000B\000C\000)\000\000", "\000a\000b\000c\000\000", 0, 6);
+ x3s("\000(\000?\000i\000:\000(\000a\000b\000c\000)\000)\000|\000(\000z\000z\000z\000)\000\000", "\000A\000B\000C\000\000", 0, 6, 1);
+ x3s("\000a\000*\000(\000.\000)\000\000", "\000a\000a\000a\000a\000z\000\000", 8, 10, 1);
+ x3s("\000a\000*\000?\000(\000.\000)\000\000", "\000a\000a\000a\000a\000z\000\000", 0, 2, 1);
+ x3s("\000a\000*\000?\000(\000c\000)\000\000", "\000a\000a\000a\000a\000c\000\000", 8, 10, 1);
+ x3s("\000[\000b\000c\000d\000]\000a\000*\000(\000.\000)\000\000", "\000c\000a\000a\000a\000a\000z\000\000", 10, 12, 1);
+ x3s("\000(\000\134\000A\000b\000b\000)\000c\000c\000\000", "\000b\000b\000c\000c\000\000", 0, 4, 1);
+ ns("\000(\000\134\000A\000b\000b\000)\000c\000c\000\000", "\000z\000b\000b\000c\000c\000\000");
+ x3s("\000(\000^\000b\000b\000)\000c\000c\000\000", "\000b\000b\000c\000c\000\000", 0, 4, 1);
+ ns("\000(\000^\000b\000b\000)\000c\000c\000\000", "\000z\000b\000b\000c\000c\000\000");
+ x3s("\000c\000c\000(\000b\000b\000$\000)\000\000", "\000c\000c\000b\000b\000\000", 4, 8, 1);
+ ns("\000c\000c\000(\000b\000b\000$\000)\000\000", "\000c\000c\000b\000b\000b\000\000");
+ ns("\000(\000\134\0001\000)\000\000", "\000\000");
+ ns("\000\134\0001\000(\000a\000)\000\000", "\000a\000a\000\000");
+ ns("\000(\000a\000(\000b\000)\000\134\0001\000)\000\134\0002\000+\000\000", "\000a\000b\000a\000b\000b\000\000");
+ ns("\000(\000?\000:\000(\000?\000:\000\134\0001\000|\000z\000)\000(\000a\000)\000)\000+\000$\000\000", "\000z\000a\000a\000\000");
+ x2s("\000(\000?\000:\000(\000?\000:\000\134\0001\000|\000z\000)\000(\000a\000)\000)\000+\000$\000\000", "\000z\000a\000a\000a\000\000", 0, 8);
+ x2s("\000(\000a\000)\000(\000?\000=\000\134\0001\000)\000\000", "\000a\000a\000\000", 0, 2);
+ ns("\000(\000a\000)\000$\000|\000\134\0001\000\000", "\000a\000z\000\000");
+ x2s("\000(\000a\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4);
+ ns("\000(\000a\000)\000\134\0001\000\000", "\000a\000b\000\000");
+ x2s("\000(\000a\000?\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4);
+ x2s("\000(\000a\000?\000?\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 0);
+ x2s("\000(\000a\000*\000)\000\134\0001\000\000", "\000a\000a\000a\000a\000a\000\000", 0, 8);
+ x3s("\000(\000a\000*\000)\000\134\0001\000\000", "\000a\000a\000a\000a\000a\000\000", 0, 4, 1);
+ x2s("\000a\000(\000b\000*\000)\000\134\0001\000\000", "\000a\000b\000b\000b\000b\000\000", 0, 10);
+ x2s("\000a\000(\000b\000*\000)\000\134\0001\000\000", "\000a\000b\000\000", 0, 2);
+ x2s("\000(\000a\000*\000)\000(\000b\000*\000)\000\134\0001\000\134\0002\000\000", "\000a\000a\000a\000b\000b\000a\000a\000a\000b\000b\000\000", 0, 20);
+ x2s("\000(\000a\000*\000)\000(\000b\000*\000)\000\134\0002\000\000", "\000a\000a\000a\000b\000b\000b\000b\000\000", 0, 14);
+ x2s("\000(\000(\000(\000(\000(\000(\000(\000a\000*\000)\000b\000)\000)\000)\000)\000)\000)\000c\000\134\0007\000\000", "\000a\000a\000a\000b\000c\000a\000a\000a\000\000", 0, 16);
+ x3s("\000(\000(\000(\000(\000(\000(\000(\000a\000*\000)\000b\000)\000)\000)\000)\000)\000)\000c\000\134\0007\000\000", "\000a\000a\000a\000b\000c\000a\000a\000a\000\000", 0, 6, 7);
+ x2s("\000(\000a\000)\000(\000b\000)\000(\000c\000)\000\134\0002\000\134\0001\000\134\0003\000\000", "\000a\000b\000c\000b\000a\000c\000\000", 0, 12);
+ x2s("\000(\000[\000a\000-\000d\000]\000)\000\134\0001\000\000", "\000c\000c\000\000", 0, 4);
+ x2s("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "\000f\0005\000 \000f\0005\000 \000\000", 0, 12);
+ ns("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "\000f\0005\000 \000f\0005\000\000");
+ x2s("\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000w\000h\000o\000w\000h\000o\000\000", 0, 12);
+ x2s("\000.\000.\000.\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000a\000b\000c\000w\000h\000o\000w\000h\000o\000\000", 0, 18);
+ x2s("\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000c\000b\000c\000c\000b\000c\000\000", 0, 12);
+ x2s("\000(\000^\000a\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4);
+ ns("\000(\000^\000a\000)\000\134\0001\000\000", "\000b\000a\000a\000\000");
+ ns("\000(\000a\000$\000)\000\134\0001\000\000", "\000a\000a\000\000");
+ ns("\000(\000a\000b\000\134\000Z\000)\000\134\0001\000\000", "\000a\000b\000\000");
+ x2s("\000(\000a\000*\000\134\000Z\000)\000\134\0001\000\000", "\000a\000\000", 2, 2);
+ x2s("\000.\000(\000a\000*\000\134\000Z\000)\000\134\0001\000\000", "\000b\000a\000\000", 2, 4);
+ x3s("\000(\000.\000(\000a\000b\000c\000)\000\134\0002\000)\000\000", "\000z\000a\000b\000c\000a\000b\000c\000\000", 0, 14, 1);
+ x3s("\000(\000.\000(\000.\000.\000\134\000d\000.\000)\000\134\0002\000)\000\000", "\000z\0001\0002\0003\0004\0001\0002\0003\0004\000\000", 0, 18, 1);
+ x2s("\000(\000(\000?\000i\000:\000a\000z\000)\000)\000\134\0001\000\000", "\000A\000z\000A\000z\000\000", 0, 8);
+ ns("\000(\000(\000?\000i\000:\000a\000z\000)\000)\000\134\0001\000\000", "\000A\000z\000a\000z\000\000");
+ x2s("\000(\000?\000<\000=\000a\000)\000b\000\000", "\000a\000b\000\000", 2, 4);
+ ns("\000(\000?\000<\000=\000a\000)\000b\000\000", "\000b\000b\000\000");
+ x2s("\000(\000?\000<\000=\000a\000|\000b\000)\000b\000\000", "\000b\000b\000\000", 2, 4);
+ x2s("\000(\000?\000<\000=\000a\000|\000b\000c\000)\000b\000\000", "\000b\000c\000b\000\000", 4, 6);
+ x2s("\000(\000?\000<\000=\000a\000|\000b\000c\000)\000b\000\000", "\000a\000b\000\000", 2, 4);
+ x2s("\000(\000?\000<\000=\000a\000|\000b\000c\000|\000|\000d\000e\000f\000g\000h\000i\000j\000|\000k\000l\000m\000n\000o\000p\000q\000|\000r\000)\000z\000\000", "\000r\000z\000\000", 2, 4);
+ x2s("\000(\000a\000)\000\134\000g\000<\0001\000>\000\000", "\000a\000a\000\000", 0, 4);
+ x2s("\000(\000?\000<\000!\000a\000)\000b\000\000", "\000c\000b\000\000", 2, 4);
+ ns("\000(\000?\000<\000!\000a\000)\000b\000\000", "\000a\000b\000\000");
+ x2s("\000(\000?\000<\000!\000a\000|\000b\000c\000)\000b\000\000", "\000b\000b\000b\000\000", 0, 2);
+ ns("\000(\000?\000<\000!\000a\000|\000b\000c\000)\000z\000\000", "\000b\000c\000z\000\000");
+ x2s("\000(\000?\000<\000n\000a\000m\000e\0001\000>\000a\000)\000\000", "\000a\000\000", 0, 2);
+ x2s("\000(\000?\000<\000n\000a\000m\000e\000_\0002\000>\000a\000b\000)\000\134\000g\000<\000n\000a\000m\000e\000_\0002\000>\000\000", "\000a\000b\000a\000b\000\000", 0, 8);
+ x2s("\000(\000?\000<\000n\000a\000m\000e\000_\0003\000>\000.\000z\000v\000.\000)\000\134\000k\000<\000n\000a\000m\000e\000_\0003\000>\000\000", "\000a\000z\000v\000b\000a\000z\000v\000b\000\000", 0, 16);
+ x2s("\000(\000?\000<\000=\000\134\000g\000<\000a\000b\000>\000)\000|\000-\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000a\000b\000>\000X\000y\000Z\000)\000\000", "\000X\000y\000Z\000\000", 6, 6);
+ x2s("\000(\000?\000<\000n\000>\000|\000a\000\134\000g\000<\000n\000>\000)\000+\000\000", "\000\000", 0, 0);
+ x2s("\000(\000?\000<\000n\000>\000|\000\134\000(\000\134\000g\000<\000n\000>\000\134\000)\000)\000+\000$\000\000", "\000(\000)\000(\000(\000)\000)\000\000", 0, 12);
+ x3s("\000\134\000g\000<\000n\000>\000(\000?\000<\000n\000>\000.\000)\000{\0000\000}\000\000", "\000X\000\000", 0, 2, 1);
+ x2s("\000\134\000g\000<\000n\000>\000(\000a\000b\000c\000|\000d\000f\000(\000?\000<\000n\000>\000.\000Y\000Z\000)\000{\0002\000,\0008\000}\000)\000{\0000\000}\000\000", "\000X\000Y\000Z\000\000", 0, 6);
+ x2s("\000\134\000A\000(\000?\000<\000n\000>\000(\000a\000\134\000g\000<\000n\000>\000)\000|\000)\000\134\000z\000\000", "\000a\000a\000a\000a\000\000", 0, 8);
+ x2s("\000(\000?\000<\000n\000>\000|\000\134\000g\000<\000m\000>\000\134\000g\000<\000n\000>\000)\000\134\000z\000|\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000m\000>\000a\000|\000(\000b\000)\000\134\000g\000<\000m\000>\000)\000\000", "\000b\000b\000b\000b\000a\000b\000b\000a\000\000", 0, 16);
+ x2s("\000(\000?\000<\000n\000a\000m\000e\0001\0002\0004\0000\000>\000\134\000w\000+\000\134\000s\000x\000)\000a\000+\000\134\000k\000<\000n\000a\000m\000e\0001\0002\0004\0000\000>\000\000", "\000 \000 \000f\000g\000 \000x\000a\000a\000a\000a\000a\000a\000a\000a\000f\000g\000 \000x\000\000", 4, 36);
+ x3s("\000(\000z\000)\000(\000)\000(\000)\000(\000?\000<\000_\0009\000>\000a\000)\000\134\000g\000<\000_\0009\000>\000\000", "\000z\000a\000a\000\000", 4, 6, 1);
+ x2s("\000(\000.\000)\000(\000(\000(\000?\000<\000_\000>\000a\000)\000)\000)\000\134\000k\000<\000_\000>\000\000", "\000z\000a\000a\000\000", 0, 6);
+ x2s("\000(\000(\000?\000<\000n\000a\000m\000e\0001\000>\000\134\000d\000)\000|\000(\000?\000<\000n\000a\000m\000e\0002\000>\000\134\000w\000)\000)\000(\000\134\000k\000<\000n\000a\000m\000e\0001\000>\000|\000\134\000k\000<\000n\000a\000m\000e\0002\000>\000)\000\000", "\000f\000f\000\000", 0, 4);
+ x2s("\000(\000?\000:\000(\000?\000<\000x\000>\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000\000", 0, 0);
+ x2s("\000(\000?\000:\000(\000?\000<\000x\000>\000a\000b\000c\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000a\000b\000c\000e\000f\000g\000e\000f\000g\000\000", 6, 18);
+ ns("\000(\000?\000:\000(\000?\000<\000x\000>\000a\000b\000c\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000a\000b\000c\000e\000f\000g\000\000");
+ x2s("\000(\000?\000:\000(\000?\000<\000n\0001\000>\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000 [...]
+ x3s("\000(\000?\000:\000(\000?\000<\000n\0001\000>\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000 [...]
+ x3s("\000(\000?\000<\000n\000a\000m\000e\0001\000>\000)\000(\000?\000<\000n\000a\000m\000e\0002\000>\000)\000(\000?\000<\000n\000a\000m\000e\0003\000>\000)\000(\000?\000<\000n\000a\000m\000e\0004\000>\000)\000(\000?\000<\000n\000a\000m\000e\0005\000>\000)\000(\000?\000<\000n\000a\000m\000e\0006\000>\000)\000(\000?\000<\000n\000a\000m\000e\0007\000>\000)\000(\000?\000<\000n\000a\000m\000e\0008\000>\000)\000(\000?\000<\000n\000a\000m\000e\0009\000>\000)\000(\000?\000<\000n\000a\000 [...]
+ x2s("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000a\000\000", 0, 2);
+ x2s("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(\000a\000)\000)\000)\000)\000)\000)\000\000", 0, 26);
+ x3s("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(\000(\000(\000a\000)\000)\000)\000)\000)\000)\000)\000)\000\000", 0, 34, 1);
+ x2s("\000\134\000g\000<\000b\000a\000r\000>\000|\000\134\000z\000E\000N\000D\000(\000?\000<\000b\000a\000r\000>\000.\000*\000a\000b\000c\000$\000)\000\000", "\000a\000b\000c\000x\000x\000x\000a\000b\000c\000\000", 0, 18);
+ x2s("\000\134\000g\000<\0001\000>\000|\000\134\000z\000E\000N\000D\000(\000.\000a\000.\000)\000\000", "\000b\000a\000c\000\000", 0, 6);
+ x3s("\000\134\000g\000<\000_\000A\000>\000\134\000g\000<\000_\000A\000>\000|\000\134\000z\000E\000N\000D\000(\000.\000a\000.\000)\000(\000?\000<\000_\000A\000>\000.\000b\000.\000)\000\000", "\000x\000b\000x\000y\000b\000y\000\000", 6, 12, 1);
+ x2s("\000\134\000A\000(\000?\000:\000\134\000g\000<\000p\000o\000n\000>\000|\000\134\000g\000<\000p\000a\000n\000>\000|\000\134\000z\000E\000N\000D\000 \000 \000(\000?\000<\000p\000a\000n\000>\000a\000|\000c\000\134\000g\000<\000p\000o\000n\000>\000c\000)\000(\000?\000<\000p\000o\000n\000>\000b\000|\000d\000\134\000g\000<\000p\000a\000n\000>\000d\000)\000)\000$\000\000", "\000c\000d\000c\000b\000c\000d\000c\000\000", 0, 14);
+ x2s("\000\134\000A\000(\000?\000<\000n\000>\000|\000a\000\134\000g\000<\000m\000>\000)\000\134\000z\000|\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000m\000>\000\134\000g\000<\000n\000>\000)\000\000", "\000a\000a\000a\000a\000\000", 0, 8);
+ x2s("\000(\000?\000<\000n\000>\000(\000a\000|\000b\000\134\000g\000<\000n\000>\000c\000)\000{\0003\000,\0005\000}\000)\000\000", "\000b\000a\000a\000a\000a\000c\000a\000\000", 2, 10);
+ x2s("\000(\000?\000<\000n\000>\000(\000a\000|\000b\000\134\000g\000<\000n\000>\000c\000)\000{\0003\000,\0005\000}\000)\000\000", "\000b\000a\000a\000a\000a\000c\000a\000a\000a\000a\000a\000\000", 0, 20);
+ x2s("\000(\000?\000<\000p\000a\000r\000e\000>\000\134\000(\000(\000[\000^\000\134\000(\000\134\000)\000]\000+\000+\000|\000\134\000g\000<\000p\000a\000r\000e\000>\000)\000*\000+\000\134\000)\000)\000\000", "\000(\000(\000a\000)\000)\000\000", 0, 10);
+ x2s("\000(\000)\000*\000\134\0001\000\000", "\000\000", 0, 0);
+ x2s("\000(\000?\000:\000(\000)\000|\000(\000)\000)\000*\000\134\0001\000\134\0002\000\000", "\000\000", 0, 0);
+ x3s("\000(\000?\000:\000\134\0001\000a\000|\000(\000)\000)\000*\000\000", "\000a\000\000", 0, 0, 1);
+ x2s("\000x\000(\000(\000.\000)\000*\000)\000*\000x\000\000", "\0000\000x\0001\000x\0002\000x\0003\000\000", 2, 12);
+ x2s("\000x\000(\000(\000.\000)\000*\000)\000*\000x\000(\000?\000i\000:\000\134\0001\000)\000\134\000Z\000\000", "\0000\000x\0001\000x\0002\000x\0001\000X\0002\000\000", 2, 18);
+ x2s("\000(\000?\000:\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000)\000*\000\134\0002\000\134\0005\000\000", "\000\000", 0, 0);
+ x2s("\000(\000?\000:\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000x\000)\000|\000(\000)\000|\000(\000)\000)\000*\000\134\0002\000b\000\134\0005\000\000", "\000b\000\000", 0, 2);
+ x2s("\217\372\000\000", "\217\372\000\000", 0, 2);
+ x2s("\000\000", "0B\000\000", 0, 0);
+ x2s("0B\000\000", "0B\000\000", 0, 2);
+ ns("0D\000\000", "0B\000\000");
+ x2s("0F0F\000\000", "0F0F\000\000", 0, 4);
+ x2s("0B0D0F\000\000", "0B0D0F\000\000", 0, 6);
+ x2s("0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S\000\000", "0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S\000\000", 0, 70);
+ x2s("0B\000\000", "0D0B\000\000", 2, 4);
+ x2s("0D0F\000\000", "0B0D0F\000\000", 2, 6);
+ x2s("e\207\000\000", "e\207\000\000", 0, 2);
+ x2s("\000.\000\000", "0B\000\000", 0, 2);
+ x2s("\000.\000.\000\000", "0K0M\000\000", 0, 4);
+ x2s("\000\134\000w\000\000", "0J\000\000", 0, 2);
+ ns("\000\134\000W\000\000", "0B\000\000");
+ x2s("\000[\000\134\000W\000]\000\000", "0F\000$\000\000", 2, 4);
+ x2s("\000\134\000S\000\000", "0]\000\000", 0, 2);
+ x2s("\000\134\000S\000\000", "o\042\000\000", 0, 2);
+ x2s("\000\134\000b\000\000", "l\027\000 \000\000", 0, 0);
+ x2s("\000\134\000b\000\000", "\000 0{\000\000", 2, 2);
+ x2s("\000\134\000B\000\000", "0[0]\000 \000\000", 2, 2);
+ x2s("\000\134\000B\000\000", "0F\000 \000\000", 4, 4);
+ x2s("\000\134\000B\000\000", "\000 0D\000\000", 0, 0);
+ x2s("\000[0_0a\000]\000\000", "0a\000\000", 0, 2);
+ ns("\000[0j0k\000]\000\000", "0l\000\000");
+ x2s("\000[0F\000-0J\000]\000\000", "0H\000\000", 0, 2);
+ ns("\000[\000^0Q\000]\000\000", "0Q\000\000");
+ x2s("\000[\000\134\000w\000]\000\000", "0m\000\000", 0, 2);
+ ns("\000[\000\134\000d\000]\000\000", "0u\000\000");
+ x2s("\000[\000\134\000D\000]\000\000", "0o\000\000", 0, 2);
+ ns("\000[\000\134\000s\000]\000\000", "0O\000\000");
+ x2s("\000[\000\134\000S\000]\000\000", "0x\000\000", 0, 2);
+ x2s("\000[\000\134\000w\000\134\000d\000]\000\000", "0\210\000\000", 0, 2);
+ x2s("\000[\000\134\000w\000\134\000d\000]\000\000", "\000 \000 \000 0\210\000\000", 6, 8);
+ ns("\000\134\000w\233<\216\312\000\000", "\000 \233<\216\312\000\000");
+ x2s("\233<\000\134\000W\216\312\000\000", "\233<\000 \216\312\000\000", 0, 6);
+ x2s("0B\000.0D\000.0F\000\000", "0B0B0D0D0F\000\000", 0, 10);
+ x2s("\000.\000\134\000w0F\000\134\000W\000.\000.0^\000\000", "0H0F0F\000 0F0^0^\000\000", 0, 14);
+ x2s("\000\134\000s\000\134\000w0S0S0S\000\000", "\000 0S0S0S0S\000\000", 0, 10);
+ x2s("0B0B\000.0Q\000\000", "0B0B0Q0Q\000\000", 0, 8);
+ ns("\000.0D\000\000", "0D0H\000\000");
+ x2s("\000.0J\000\000", "0J0J\000\000", 0, 4);
+ x2s("\000^0B\000\000", "0B\000\000", 0, 2);
+ x2s("\000^0\200\000$\000\000", "0\200\000\000", 0, 2);
+ x2s("\000^\000\134\000w\000$\000\000", "0k\000\000", 0, 2);
+ x2s("\000^\000\134\000w0K0M0O0Q0S\000$\000\000", "\000z0K0M0O0Q0S\000\000", 0, 12);
+ x2s("\000^\000\134\000w\000.\000.\000.0F0H0J\000$\000\000", "\000z0B0D0F0F0H0J\000\000", 0, 14);
+ x2s("\000\134\000w\000\134\000w\000\134\000s\000\134\000W0J0J0J\000\134\000d\000\000", "\000a0J\000 \000 0J0J0J\0004\000\000", 0, 16);
+ x2s("\000\134\000A0_0a0d\000\000", "0_0a0d\000\000", 0, 6);
+ x2s("0\2000\2010\202\000\134\000Z\000\000", "0\2000\2010\202\000\000", 0, 6);
+ x2s("0K0M0O\000\134\000z\000\000", "0K0M0O\000\000", 0, 6);
+ x2s("0K0M0O\000\134\000Z\000\000", "0K0M0O\000\012\000\000", 0, 6);
+ x2s("\000\134\000G0}0t\000\000", "0}0t\000\000", 0, 4);
+ ns("\000\134\000G0H\000\000", "0F0H0J\000\000");
+ ns("0h0f\000\134\000G\000\000", "0h0f\000\000");
+ ns("0~0\177\000\134\000A\000\000", "0~0\177\000\000");
+ ns("0~\000\134\000A0\177\000\000", "0~0\177\000\000");
+ x2s("\000(\000?\000=0[\000)0[\000\000", "0[\000\000", 0, 2);
+ ns("\000(\000?\000=0F\000)\000.\000\000", "0D\000\000");
+ x2s("\000(\000?\000!0F\000)0K\000\000", "0K\000\000", 0, 2);
+ ns("\000(\000?\000!0h\000)0B\000\000", "0h\000\000");
+ x2s("\000(\000?\000i\000:0B\000)\000\000", "0B\000\000", 0, 2);
+ x2s("\000(\000?\000i\000:0v0y\000)\000\000", "0v0y\000\000", 0, 4);
+ ns("\000(\000?\000i\000:0D\000)\000\000", "0F\000\000");
+ x2s("\000(\000?\000m\000:0\210\000.\000)\000\000", "0\210\000\012\000\000", 0, 4);
+ x2s("\000(\000?\000m\000:\000.0\201\000)\000\000", "0~\000\0120\201\000\000", 2, 6);
+ x2s("0B\000?\000\000", "\000\000", 0, 0);
+ x2s("Y\011\000?\000\000", "S\026\000\000", 0, 0);
+ x2s("Y\011\000?\000\000", "Y\011\000\000", 0, 2);
+ x2s("\221\317\000*\000\000", "\000\000", 0, 0);
+ x2s("\221\317\000*\000\000", "\221\317\000\000", 0, 2);
+ x2s("[P\000*\000\000", "[P[P[P\000\000", 0, 6);
+ x2s("\231\254\000*\000\000", "\236\177\231\254\231\254\231\254\231\254\000\000", 0, 0);
+ ns("\134q\000+\000\000", "\000\000");
+ x2s("l\263\000+\000\000", "l\263\000\000", 0, 2);
+ x2s("fB\000+\000\000", "fBfBfBfB\000\000", 0, 8);
+ x2s("0H\000+\000\000", "0H0H0F0F0F\000\000", 0, 4);
+ x2s("0F\000+\000\000", "0J0F0F0F0F\000\000", 2, 10);
+ x2s("\000.\000?\000\000", "0_\000\000", 0, 2);
+ x2s("\000.\000*\000\000", "0q0t0w0z\000\000", 0, 8);
+ x2s("\000.\000+\000\000", "0\215\000\000", 0, 2);
+ x2s("\000.\000+\000\000", "0D0F0H0K\000\012\000\000", 0, 8);
+ x2s("0B\000|0D\000\000", "0B\000\000", 0, 2);
+ x2s("0B\000|0D\000\000", "0D\000\000", 0, 2);
+ x2s("0B0D\000|0D0F\000\000", "0B0D\000\000", 0, 4);
+ x2s("0B0D\000|0D0F\000\000", "0D0F\000\000", 0, 4);
+ x2s("0\222\000(\000?\000:0K0M\000|0M0O\000)\000\000", "0\2220K0M\000\000", 0, 6);
+ x2s("0\222\000(\000?\000:0K0M\000|0M0O\000)0Q\000\000", "0\2220M0O0Q\000\000", 0, 8);
+ x2s("0B0D\000|\000(\000?\000:0B0F\000|0B0\222\000)\000\000", "0B0\222\000\000", 0, 4);
+ x2s("0B\000|0D\000|0F\000\000", "0H0F\000\000", 2, 4);
+ x2s("0B\000|0D\000|0F0H\000|0J0K0M\000|0O\000|0Q0S0U\000|0W0Y0[\000|0]\000|0_0a\000|0d0f0h0j0k\000|0l0m\000\000", "0W0Y0[\000\000", 0, 6);
+ ns("0B\000|0D\000|0F0H\000|0J0K0M\000|0O\000|0Q0S0U\000|0W0Y0[\000|0]\000|0_0a\000|0d0f0h0j0k\000|0l0m\000\000", "0Y0[\000\000");
+ x2s("0B\000|\000^0\217\000\000", "0v0B\000\000", 2, 4);
+ x2s("0B\000|\000^0\222\000\000", "0\2220B\000\000", 0, 2);
+ x2s("\233<\000|\000\134\000G\216\312\000\000", "0Q\216\312\233<\000\000", 4, 6);
+ x2s("\233<\000|\000\134\000G\216\312\000\000", "\216\312\233<\000\000", 0, 2);
+ x2s("\233<\000|\000\134\000A\216\312\000\000", "\000b\216\312\233<\000\000", 4, 6);
+ x2s("\233<\000|\000\134\000A\216\312\000\000", "\216\312\000\000", 0, 2);
+ x2s("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\233<\000\000", 2, 4);
+ x2s("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\000\000", 0, 2);
+ x2s("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\000\012\000\000", 0, 2);
+ x2s("\233<\000|\216\312\000\134\000z\000\000", "\216\312\233<\000\000", 2, 4);
+ x2s("\233<\000|\216\312\000\134\000z\000\000", "\216\312\000\000", 0, 2);
+ x2s("\000\134\000w\000|\000\134\000s\000\000", "0J\000\000", 0, 2);
+ x2s("\000\134\000w\000|\000%\000\000", "\000%0J\000\000", 0, 2);
+ x2s("\000\134\000w\000|\000[\000&\000$\000]\000\000", "0F\000&\000\000", 0, 2);
+ x2s("\000[0D\000-0Q\000]\000\000", "0F\000\000", 0, 2);
+ x2s("\000[0D\000-0Q\000]\000|\000[\000^0K\000-0S\000]\000\000", "0B\000\000", 0, 2);
+ x2s("\000[0D\000-0Q\000]\000|\000[\000^0K\000-0S\000]\000\000", "0K\000\000", 0, 2);
+ x2s("\000[\000^0B\000]\000\000", "\000\012\000\000", 0, 2);
+ x2s("\000(\000?\000:0B\000|\000[0F\000-0M\000]\000)\000|0D0\222\000\000", "0F0\222\000\000", 0, 2);
+ x2s("\000(\000?\000:0B\000|\000[0F\000-0M\000]\000)\000|0D0\222\000\000", "0D0\222\000\000", 0, 4);
+ x2s("0B0D0F\000|\000(\000?\000=0Q0Q\000)\000.\000.0{\000\000", "0Q0Q0{\000\000", 0, 6);
+ x2s("0B0D0F\000|\000(\000?\000!0Q0Q\000)\000.\000.0{\000\000", "0B0D0{\000\000", 0, 6);
+ x2s("\000(\000?\000=0\2220B\000)\000.\000.0B\000|\000(\000?\000=0\2220\222\000)\000.\000.0B\000\000", "0\2220\2220B\000\000", 0, 6);
+ x2s("\000(\000?\000<\000=0B\000|0D0F\000)0D\000\000", "0D0F0D\000\000", 4, 6);
+ ns("\000(\000?\000>0B\000|0B0D0H\000)0F\000\000", "0B0D0H0F\000\000");
+ x2s("\000(\000?\000>0B0D0H\000|0B\000)0F\000\000", "0B0D0H0F\000\000", 0, 8);
+ x2s("0B\000?\000|0D\000\000", "0B\000\000", 0, 2);
+ x2s("0B\000?\000|0D\000\000", "0D\000\000", 0, 0);
+ x2s("0B\000?\000|0D\000\000", "\000\000", 0, 0);
+ x2s("0B\000*\000|0D\000\000", "0B0B\000\000", 0, 4);
+ x2s("0B\000*\000|0D\000*\000\000", "0D0B\000\000", 0, 0);
+ x2s("0B\000*\000|0D\000*\000\000", "0B0D\000\000", 0, 2);
+ x2s("\000[\000a0B\000]\000*\000|0D\000*\000\000", "\000a0B0D0D0D\000\000", 0, 4);
+ x2s("0B\000+\000|0D\000*\000\000", "\000\000", 0, 0);
+ x2s("0B\000+\000|0D\000*\000\000", "0D0D0D\000\000", 0, 6);
+ x2s("0B\000+\000|0D\000*\000\000", "0B0D0D0D\000\000", 0, 2);
+ x2s("0B\000+\000|0D\000*\000\000", "\000a0B0D0D0D\000\000", 0, 0);
+ ns("0B\000+\000|0D\000+\000\000", "\000\000");
+ x2s("\000(0B\000|0D\000)\000?\000\000", "0D\000\000", 0, 2);
+ x2s("\000(0B\000|0D\000)\000*\000\000", "0D0B\000\000", 0, 4);
+ x2s("\000(0B\000|0D\000)\000+\000\000", "0D0B0D\000\000", 0, 6);
+ x2s("\000(0B0D\000|0F0B\000)\000+\000\000", "0F0B0B0D0F0H\000\000", 0, 8);
+ x2s("\000(0B0D\000|0F0H\000)\000+\000\000", "0F0B0B0D0F0H\000\000", 4, 12);
+ x2s("\000(0B0D\000|0F0B\000)\000+\000\000", "0B0B0D0F0B\000\000", 2, 10);
+ x2s("\000(0B0D\000|0F0B\000)\000+\000\000", "0B0D0\2220F0B\000\000", 0, 4);
+ x2s("\000(0B0D\000|0F0B\000)\000+\000\000", "\000$\000$\000z\000z\000z\000z0B0D0\2220F0B\000\000", 12, 16);
+ x2s("\000(0B\000|0D0B0D\000)\000+\000\000", "0B0D0B0D0B\000\000", 0, 10);
+ x2s("\000(0B\000|0D0B0D\000)\000+\000\000", "0D0B\000\000", 2, 4);
+ x2s("\000(0B\000|0D0B0D\000)\000+\000\000", "0D0B0B0B0D0B\000\000", 2, 8);
+ x2s("\000(\000?\000:0B\000|0D\000)\000(\000?\000:0B\000|0D\000)\000\000", "0B0D\000\000", 0, 4);
+ x2s("\000(\000?\000:0B\000*\000|0D\000*\000)\000(\000?\000:0B\000*\000|0D\000*\000)\000\000", "0B0B0B0D0D0D\000\000", 0, 6);
+ x2s("\000(\000?\000:0B\000*\000|0D\000*\000)\000(\000?\000:0B\000+\000|0D\000+\000)\000\000", "0B0B0B0D0D0D\000\000", 0, 12);
+ x2s("\000(\000?\000:0B\000+\000|0D\000+\000)\000{\0002\000}\000\000", "0B0B0B0D0D0D\000\000", 0, 12);
+ x2s("\000(\000?\000:0B\000+\000|0D\000+\000)\000{\0001\000,\0002\000}\000\000", "0B0B0B0D0D0D\000\000", 0, 12);
+ x2s("\000(\000?\000:0B\000+\000|\000\134\000A0D\000*\000)0F0F\000\000", "0F0F\000\000", 0, 4);
+ ns("\000(\000?\000:0B\000+\000|\000\134\000A0D\000*\000)0F0F\000\000", "0B0D0F0F\000\000");
+ x2s("\000(\000?\000:\000^0B\000+\000|0D\000+\000)\000*0F\000\000", "0B0B0D0D0D0B0D0F\000\000", 12, 16);
+ x2s("\000(\000?\000:\000^0B\000+\000|0D\000+\000)\000*0F\000\000", "0B0B0D0D0D0D0F\000\000", 0, 14);
+ x2s("0F\000{\0000\000,\000}\000\000", "0F0F0F0F\000\000", 0, 8);
+ x2s("0B\000|\000(\000?\000i\000)\000c\000\000", "\000C\000\000", 0, 2);
+ x2s("\000(\000?\000i\000)\000c\000|0B\000\000", "\000C\000\000", 0, 2);
+ x2s("\000(\000?\000i\000:0B\000)\000|\000a\000\000", "\000a\000\000", 0, 2);
+ ns("\000(\000?\000i\000:0B\000)\000|\000a\000\000", "\000A\000\000");
+ x2s("\000[0B0D0F\000]\000?\000\000", "0B0D0F\000\000", 0, 2);
+ x2s("\000[0B0D0F\000]\000*\000\000", "0B0D0F\000\000", 0, 6);
+ x2s("\000[\000^0B0D0F\000]\000*\000\000", "0B0D0F\000\000", 0, 0);
+ ns("\000[\000^0B0D0F\000]\000+\000\000", "0B0D0F\000\000");
+ x2s("0B\000?\000?\000\000", "0B0B0B\000\000", 0, 0);
+ x2s("0D0B\000?\000?0D\000\000", "0D0B0D\000\000", 0, 6);
+ x2s("0B\000*\000?\000\000", "0B0B0B\000\000", 0, 0);
+ x2s("0D0B\000*\000?\000\000", "0D0B0B\000\000", 0, 2);
+ x2s("0D0B\000*\000?0D\000\000", "0D0B0B0D\000\000", 0, 8);
+ x2s("0B\000+\000?\000\000", "0B0B0B\000\000", 0, 2);
+ x2s("0D0B\000+\000?\000\000", "0D0B0B\000\000", 0, 4);
+ x2s("0D0B\000+\000?0D\000\000", "0D0B0B0D\000\000", 0, 8);
+ x2s("\000(\000?\000:Y)\000?\000)\000?\000?\000\000", "Y)\000\000", 0, 0);
+ x2s("\000(\000?\000:Y)\000?\000?\000)\000?\000\000", "Y)\000\000", 0, 0);
+ x2s("\000(\000?\000:Y\042\000?\000)\000+\000?\000\000", "Y\042Y\042Y\042\000\000", 0, 2);
+ x2s("\000(\000?\000:\230\250\000+\000)\000?\000?\000\000", "\230\250\230\250\230\250\000\000", 0, 0);
+ x2s("\000(\000?\000:\226\352\000+\000)\000?\000?\227\034\000\000", "\226\352\226\352\226\352\227\034\000\000", 0, 8);
+ x2s("\000(\000?\000:0B0D\000)\000?\000{\0002\000}\000\000", "\000\000", 0, 0);
+ x2s("\000(\000?\000:\233<\216\312\000)\000?\000{\0002\000}\000\000", "\233<\216\312\233<\216\312\233<\000\000", 0, 8);
+ x2s("\000(\000?\000:\233<\216\312\000)\000*\000{\0000\000}\000\000", "\233<\216\312\233<\216\312\233<\000\000", 0, 0);
+ x2s("\000(\000?\000:\233<\216\312\000)\000{\0003\000,\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 16);
+ ns("\000(\000?\000:\233<\216\312\000)\000{\0003\000,\000}\000\000", "\233<\216\312\233<\216\312\000\000");
+ x2s("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 12);
+ x2s("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 16);
+ x2s("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000?\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 8);
+ x2s("\000(\000?\000:\233<\216\312\000)\000{\000,\000}\000\000", "\233<\216\312\000{\000,\000}\000\000", 0, 10);
+ x2s("\000(\000?\000:0K0M0O\000)\000+\000?\000{\0002\000}\000\000", "0K0M0O0K0M0O0K0M0O\000\000", 0, 12);
+ x3s("\000(pk\000)\000\000", "pk\000\000", 0, 2, 1);
+ x3s("\000(pkl4\000)\000\000", "pkl4\000\000", 0, 4, 1);
+ x2s("\000(\000(fB\225\223\000)\000)\000\000", "fB\225\223\000\000", 0, 4);
+ x3s("\000(\000(\230\250l4\000)\000)\000\000", "\230\250l4\000\000", 0, 4, 1);
+ x3s("\000(\000(f(e\345\000)\000)\000\000", "f(e\345\000\000", 0, 4, 2);
+ x3s("\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\221\317[P\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000\000", "\221\317[P\000\000", 0, 4, 20);
+ x3s("\000(0B0D\000)\000(0F0H\000)\000\000", "0B0D0F0H\000\000", 0, 4, 1);
+ x3s("\000(0B0D\000)\000(0F0H\000)\000\000", "0B0D0F0H\000\000", 4, 8, 2);
+ x3s("\000(\000)\000(0B\000)0D0F\000(0H0J0K\000)0M0O0Q0S\000\000", "0B0D0F0H0J0K0M0O0Q0S\000\000", 6, 12, 3);
+ x3s("\000(\000(\000)\000(0B\000)0D0F\000(0H0J0K\000)0M0O0Q0S\000)\000\000", "0B0D0F0H0J0K0M0O0Q0S\000\000", 6, 12, 4);
+ x3s("\000.\000*\000(0\3250\251\000)0\3630\3730\336\000(0\363\000(\000)0\2670\3450\277\000)0\2440\363\000\000", "0\3250\2510\3630\3730\3360\3630\2670\3450\2770\2440\363\000\000", 10, 18, 2);
+ x2s("\000(\000^0B\000)\000\000", "0B\000\000", 0, 2);
+ x3s("\000(0B\000)\000|\000(0B\000)\000\000", "0D0B\000\000", 2, 4, 1);
+ x3s("\000(\000^0B\000)\000|\000(0B\000)\000\000", "0D0B\000\000", 2, 4, 2);
+ x3s("\000(0B\000?\000)\000\000", "0B0B0B\000\000", 0, 2, 1);
+ x3s("\000(0~\000*\000)\000\000", "0~0~0~\000\000", 0, 6, 1);
+ x3s("\000(0h\000*\000)\000\000", "\000\000", 0, 0, 1);
+ x3s("\000(0\213\000+\000)\000\000", "0\2130\2130\2130\2130\2130\2130\213\000\000", 0, 14, 1);
+ x3s("\000(0u\000+\000|0x\000*\000)\000\000", "0u0u0u0x0x\000\000", 0, 6, 1);
+ x3s("\000(0B\000+\000|0D\000?\000)\000\000", "0D0D0D0B0B\000\000", 0, 2, 1);
+ x3s("\000(0B0D0F\000)\000?\000\000", "0B0D0F\000\000", 0, 6, 1);
+ x3s("\000(0B0D0F\000)\000*\000\000", "0B0D0F\000\000", 0, 6, 1);
+ x3s("\000(0B0D0F\000)\000+\000\000", "0B0D0F\000\000", 0, 6, 1);
+ x3s("\000(0U0W0Y\000|0B0D0F\000)\000+\000\000", "0B0D0F\000\000", 0, 6, 1);
+ x3s("\000(\000[0j0k0l\000]\000[0K0M0O\000]\000|0K0M0O\000)\000+\000\000", "0K0M0O\000\000", 0, 6, 1);
+ x3s("\000(\000(\000?\000i\000:0B0D0F\000)\000)\000\000", "0B0D0F\000\000", 0, 6, 1);
+ x3s("\000(\000(\000?\000m\000:0B\000.0F\000)\000)\000\000", "0B\000\0120F\000\000", 0, 6, 1);
+ x3s("\000(\000(\000?\000=0B0\223\000)0B\000)\000\000", "0B0\2230D\000\000", 0, 2, 1);
+ x3s("0B0D0F\000|\000(\000.0B0D0H\000)\000\000", "0\2230B0D0H\000\000", 0, 8, 1);
+ x3s("0B\000*\000(\000.\000)\000\000", "0B0B0B0B0\223\000\000", 8, 10, 1);
+ x3s("0B\000*\000?\000(\000.\000)\000\000", "0B0B0B0B0\223\000\000", 0, 2, 1);
+ x3s("0B\000*\000?\000(0\223\000)\000\000", "0B0B0B0B0\223\000\000", 8, 10, 1);
+ x3s("\000[0D0F0H\000]0B\000*\000(\000.\000)\000\000", "0H0B0B0B0B0\223\000\000", 10, 12, 1);
+ x3s("\000(\000\134\000A0D0D\000)0F0F\000\000", "0D0D0F0F\000\000", 0, 4, 1);
+ ns("\000(\000\134\000A0D0D\000)0F0F\000\000", "0\2230D0D0F0F\000\000");
+ x3s("\000(\000^0D0D\000)0F0F\000\000", "0D0D0F0F\000\000", 0, 4, 1);
+ ns("\000(\000^0D0D\000)0F0F\000\000", "0\2230D0D0F0F\000\000");
+ x3s("0\2150\215\000(0\2130\213\000$\000)\000\000", "0\2150\2150\2130\213\000\000", 4, 8, 1);
+ ns("0\2150\215\000(0\2130\213\000$\000)\000\000", "0\2150\2150\2130\2130\213\000\000");
+ x2s("\000(q!\000)\000\134\0001\000\000", "q!q!\000\000", 0, 4);
+ ns("\000(q!\000)\000\134\0001\000\000", "q!kf\000\000");
+ x2s("\000(zz\000?\000)\000\134\0001\000\000", "zzzz\000\000", 0, 4);
+ x2s("\000(zz\000?\000?\000)\000\134\0001\000\000", "zzzz\000\000", 0, 0);
+ x2s("\000(zz\000*\000)\000\134\0001\000\000", "zzzzzzzzzz\000\000", 0, 8);
+ x3s("\000(zz\000*\000)\000\134\0001\000\000", "zzzzzzzzzz\000\000", 0, 4, 1);
+ x2s("0B\000(0D\000*\000)\000\134\0001\000\000", "0B0D0D0D0D\000\000", 0, 10);
+ x2s("0B\000(0D\000*\000)\000\134\0001\000\000", "0B0D\000\000", 0, 2);
+ x2s("\000(0B\000*\000)\000(0D\000*\000)\000\134\0001\000\134\0002\000\000", "0B0B0B0D0D0B0B0B0D0D\000\000", 0, 20);
+ x2s("\000(0B\000*\000)\000(0D\000*\000)\000\134\0002\000\000", "0B0B0B0D0D0D0D\000\000", 0, 14);
+ x3s("\000(0B\000*\000)\000(0D\000*\000)\000\134\0002\000\000", "0B0B0B0D0D0D0D\000\000", 6, 10, 2);
+ x2s("\000(\000(\000(\000(\000(\000(\000(0}\000*\000)0z\000)\000)\000)\000)\000)\000)0t\000\134\0007\000\000", "0}0}0}0z0t0}0}0}\000\000", 0, 16);
+ x3s("\000(\000(\000(\000(\000(\000(\000(0}\000*\000)0z\000)\000)\000)\000)\000)\000)0t\000\134\0007\000\000", "0}0}0}0z0t0}0}0}\000\000", 0, 6, 7);
+ x2s("\000(0o\000)\000(0r\000)\000(0u\000)\000\134\0002\000\134\0001\000\134\0003\000\000", "0o0r0u0r0o0u\000\000", 0, 12);
+ x2s("\000(\000[0M\000-0Q\000]\000)\000\134\0001\000\000", "0O0O\000\000", 0, 4);
+ x2s("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "0B\0005\000 0B\0005\000 \000\000", 0, 12);
+ ns("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "0B\0005\000 0B\0005\000\000");
+ x2s("\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "\212\260\377\037\212\260\377\037\000\000", 0, 8);
+ x2s("\000.\000.\000.\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "0B\000a0B\212\260\377\037\212\260\377\037\000\000", 0, 14);
+ x2s("\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "0F0D0F0F0D0F\000\000", 0, 12);
+ x2s("\000(\000^0S\000)\000\134\0001\000\000", "0S0S\000\000", 0, 4);
+ ns("\000(\000^0\200\000)\000\134\0001\000\000", "0\2010\2000\200\000\000");
+ ns("\000(0B\000$\000)\000\134\0001\000\000", "0B0B\000\000");
+ ns("\000(0B0D\000\134\000Z\000)\000\134\0001\000\000", "0B0D\000\000");
+ x2s("\000(0B\000*\000\134\000Z\000)\000\134\0001\000\000", "0B\000\000", 2, 2);
+ x2s("\000.\000(0B\000*\000\134\000Z\000)\000\134\0001\000\000", "0D0B\000\000", 2, 4);
+ x3s("\000(\000.\000(0\2040D0\206\000)\000\134\0002\000)\000\000", "\000z0\2040D0\2060\2040D0\206\000\000", 0, 14, 1);
+ x3s("\000(\000.\000(\000.\000.\000\134\000d\000.\000)\000\134\0002\000)\000\000", "0B\0001\0002\0003\0004\0001\0002\0003\0004\000\000", 0, 18, 1);
+ x2s("\000(\000(\000?\000i\000:0B\000v0Z\000)\000)\000\134\0001\000\000", "0B\000v0Z0B\000v0Z\000\000", 0, 12);
+ x2s("\000(\000?\000<a\0320K\000>Y\011\000|\000\134\000(\000\134\000g\000<a\0320K\000>\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(Y\011\000)\000)\000)\000)\000)\000)\000\000", 0, 26);
+ x2s("\000\134\000A\000(\000?\000:\000\134\000g\000<\226?\000_\0001\000>\000|\000\134\000g\000<N\221\000_\0002\000>\000|\000\134\000z}BN\206\000 \000 \000(\000?\000<\226?\000_\0001\000>\211\263\000|\201\352\000\134\000g\000<N\221\000_\0002\000>\201\352\000)\000(\000?\000<N\221\000_\0002\000>W(\000|\203\351\205\251\000\134\000g\000<\226?\000_\0001\000>\203\351\205\251\000)\000)\000$\000\000", "\203\351\205\251\201\352\203\351\205\251\201\352W(\201\352\203\351\205\251\201\352\203\35 [...]
+ x2s("\000[\000[0r0u\000]\000]\000\000", "0u\000\000", 0, 2);
+ x2s("\000[\000[0D0J0F\000]0K\000]\000\000", "0K\000\000", 0, 2);
+ ns("\000[\000[\000^0B\000]\000]\000\000", "0B\000\000");
+ ns("\000[\000^\000[0B\000]\000]\000\000", "0B\000\000");
+ x2s("\000[\000^\000[\000^0B\000]\000]\000\000", "0B\000\000", 0, 2);
+ x2s("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0O\000\000", 0, 2);
+ ns("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0K\000\000");
+ ns("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0Q\000\000");
+ x2s("\000[0B\000-0\223\000&\000&0D\000-0\222\000&\000&0F\000-0\221\000]\000\000", "0\221\000\000", 0, 2);
+ ns("\000[\000^0B\000-0\223\000&\000&0D\000-0\222\000&\000&0F\000-0\221\000]\000\000", "0\221\000\000");
+ x2s("\000[\000[\000^0B\000&\000&0B\000]\000&\000&0B\000-0\223\000]\000\000", "0D\000\000", 0, 2);
+ ns("\000[\000[\000^0B\000&\000&0B\000]\000&\000&0B\000-0\223\000]\000\000", "0B\000\000");
+ x2s("\000[\000[\000^0B\000-0\223\000&\000&0D0F0H0J\000]\000&\000&\000[\000^0F\000-0K\000]\000]\000\000", "0M\000\000", 0, 2);
+ ns("\000[\000[\000^0B\000-0\223\000&\000&0D0F0H0J\000]\000&\000&\000[\000^0F\000-0K\000]\000]\000\000", "0D\000\000");
+ x2s("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0F\000\000", 0, 2);
+ x2s("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0H\000\000", 0, 2);
+ ns("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0K\000\000");
+ x2s("\000[0B\000-\000&\000&\000-0B\000]\000\000", "\000-\000\000", 0, 2);
+ x2s("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000q\000-\000w\000]\000\000", "0H\000\000", 0, 2);
+ x2s("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\000f\000\000", 0, 2);
+ x2s("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\000g\000\000", 0, 2);
+ ns("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\0002\000\000");
+ x2s("\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000\134\000/\000b\000>\000\000", "\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000/\000b\000>\000\000", 0, 40);
+ x2s("\000.\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000\134\000/\000b\000>\000\000", "\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000/\000b\000>\000\000", 0, 40);
+ }
+
+ public static void main(String[] args) throws Throwable {
+ new TestU().run();
+ }
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jruby-joni.git
More information about the pkg-java-commits
mailing list