[jruby-joni] 02/279: Move src to trunk/src
Hideki Yamane
henrich at moszumanska.debian.org
Mon Nov 16 11:26:25 UTC 2015
This is an automated email from the git hooks/post-receive script.
henrich pushed a commit to branch debian/sid
in repository jruby-joni.
commit 9573627b1b612475ae3ebd0c4ad15c2e9fd2db69
Author: Nick Sieger <nick at nicksieger.com>
Date: Mon Jan 7 01:50:15 2008 +0000
Move src to trunk/src
git-svn-id: http://svn.codehaus.org/jruby/joni/trunk@5517 961051c9-f516-0410-bf72-c9f7e237a7b7
---
src/org/joni/Analyser.java | 2093 ++++++++++
src/org/joni/ApplyAllCaseFoldFunction.java | 25 +
src/org/joni/ApplyCaseFold.java | 98 +
src/org/joni/ApplyCaseFoldArg.java | 35 +
src/org/joni/BitSet.java | 115 +
src/org/joni/BitStatus.java | 55 +
src/org/joni/ByteCodeMachine.java | 1715 ++++++++
src/org/joni/ByteCodePrinter.java | 350 ++
src/org/joni/CaptureTreeNode.java | 74 +
src/org/joni/CaseFoldCodeItem.java | 34 +
src/org/joni/CodeRangeBuffer.java | 413 ++
src/org/joni/CompileInfo.java | 31 +
src/org/joni/Compiler.java | 1390 +++++++
src/org/joni/Config.java | 111 +
src/org/joni/ErrorInfo.java | 28 +
src/org/joni/IntHolder.java | 25 +
src/org/joni/Lexer.java | 1385 +++++++
src/org/joni/Matcher.java | 542 +++
src/org/joni/MinMaxLen.java | 139 +
src/org/joni/NameEntry.java | 84 +
src/org/joni/NodeOptInfo.java | 126 +
src/org/joni/OptAnchorInfo.java | 94 +
src/org/joni/OptEnvironment.java | 39 +
src/org/joni/OptExactInfo.java | 172 +
src/org/joni/OptMapInfo.java | 128 +
src/org/joni/Option.java | 122 +
src/org/joni/Parser.java | 1032 +++++
src/org/joni/Regex.java | 480 +++
src/org/joni/Region.java | 66 +
src/org/joni/ScanEnvironment.java | 143 +
src/org/joni/ScannerSupport.java | 178 +
src/org/joni/SearchAlgorithm.java | 526 +++
src/org/joni/StackEntry.java | 164 +
src/org/joni/StackMachine.java | 621 +++
src/org/joni/Syntax.java | 606 +++
src/org/joni/Token.java | 172 +
src/org/joni/UnsetAddrList.java | 69 +
src/org/joni/WarnCallback.java | 32 +
src/org/joni/ast/AnchorNode.java | 92 +
src/org/joni/ast/AnyCharNode.java | 40 +
src/org/joni/ast/BackRefNode.java | 98 +
src/org/joni/ast/CClassNode.java | 529 +++
src/org/joni/ast/CTypeNode.java | 50 +
src/org/joni/ast/CallNode.java | 86 +
src/org/joni/ast/ConsAltNode.java | 154 +
src/org/joni/ast/EncloseNode.java | 151 +
src/org/joni/ast/Node.java | 134 +
src/org/joni/ast/QuantifierNode.java | 272 ++
src/org/joni/ast/StateNode.java | 232 ++
src/org/joni/ast/StringNode.java | 224 +
src/org/joni/bench/AbstractBench.java | 52 +
src/org/joni/bench/BenchGreedyBacktrack.java | 7 +
src/org/joni/bench/BenchRailsRegs.java | 31 +
src/org/joni/bench/BenchSeveralRegexps.java | 17 +
src/org/joni/constants/AnchorType.java | 58 +
src/org/joni/constants/Arguments.java | 31 +
src/org/joni/constants/CCSTATE.java | 27 +
src/org/joni/constants/CCVALTYPE.java | 26 +
src/org/joni/constants/CharacterType.java | 57 +
src/org/joni/constants/EncloseType.java | 29 +
src/org/joni/constants/MetaChar.java | 31 +
src/org/joni/constants/NodeStatus.java | 39 +
src/org/joni/constants/NodeType.java | 66 +
src/org/joni/constants/OPCode.java | 387 ++
src/org/joni/constants/OPSize.java | 75 +
src/org/joni/constants/PosixBracket.java | 83 +
src/org/joni/constants/Reduce.java | 60 +
src/org/joni/constants/RegexState.java | 28 +
src/org/joni/constants/ReturnCodes.java | 26 +
src/org/joni/constants/StackPopLevel.java | 27 +
src/org/joni/constants/StackType.java | 51 +
src/org/joni/constants/StringType.java | 27 +
src/org/joni/constants/SyntaxProperties.java | 124 +
src/org/joni/constants/TargetInfo.java | 27 +
src/org/joni/constants/TokenType.java | 48 +
src/org/joni/constants/Traverse.java | 26 +
src/org/joni/encoding/AbstractEncoding.java | 217 +
src/org/joni/encoding/CaseFoldMapEncoding.java | 129 +
src/org/joni/encoding/Encoding.java | 427 ++
src/org/joni/encoding/EucEncoding.java | 42 +
src/org/joni/encoding/ISOEncoding.java | 58 +
src/org/joni/encoding/MultiByteEncoding.java | 131 +
src/org/joni/encoding/SingleByteEncoding.java | 112 +
src/org/joni/encoding/specific/ASCIIEncoding.java | 83 +
src/org/joni/encoding/specific/BIG5Encoding.java | 154 +
src/org/joni/encoding/specific/CP1251Encoding.java | 159 +
src/org/joni/encoding/specific/EUCCNEncoding.java | 93 +
src/org/joni/encoding/specific/EUCJPEncoding.java | 205 +
src/org/joni/encoding/specific/EUCKREncoding.java | 112 +
src/org/joni/encoding/specific/EUCTWEncoding.java | 112 +
.../joni/encoding/specific/ISO8859_10Encoding.java | 155 +
.../joni/encoding/specific/ISO8859_11Encoding.java | 94 +
.../joni/encoding/specific/ISO8859_13Encoding.java | 140 +
.../joni/encoding/specific/ISO8859_14Encoding.java | 156 +
.../joni/encoding/specific/ISO8859_15Encoding.java | 146 +
.../joni/encoding/specific/ISO8859_16Encoding.java | 153 +
.../joni/encoding/specific/ISO8859_1Encoding.java | 233 ++
.../joni/encoding/specific/ISO8859_2Encoding.java | 151 +
.../joni/encoding/specific/ISO8859_3Encoding.java | 147 +
.../joni/encoding/specific/ISO8859_4Encoding.java | 150 +
.../joni/encoding/specific/ISO8859_5Encoding.java | 171 +
.../joni/encoding/specific/ISO8859_6Encoding.java | 94 +
.../joni/encoding/specific/ISO8859_7Encoding.java | 159 +
.../joni/encoding/specific/ISO8859_8Encoding.java | 94 +
.../joni/encoding/specific/ISO8859_9Encoding.java | 140 +
src/org/joni/encoding/specific/KOI8Encoding.java | 200 +
src/org/joni/encoding/specific/SJISEncoding.java | 225 ++
.../joni/encoding/specific/UTF16BEEncoding.java | 185 +
.../joni/encoding/specific/UTF16LEEncoding.java | 170 +
.../joni/encoding/specific/UTF32BEEncoding.java | 148 +
.../joni/encoding/specific/UTF32LEEncoding.java | 146 +
src/org/joni/encoding/specific/UTF8Encoding.java | 250 ++
.../joni/encoding/unicode/UnicodeCTypeNames.java | 160 +
.../joni/encoding/unicode/UnicodeCaseFolds.java | 3841 ++++++++++++++++++
.../joni/encoding/unicode/UnicodeCodeRanges.java | 3529 ++++++++++++++++
src/org/joni/encoding/unicode/UnicodeEncoding.java | 454 +++
.../joni/encoding/unicode/UnicodeProperties.java | 4270 ++++++++++++++++++++
.../encoding/unicode/UnicodePropertiesScripts.java | 866 ++++
src/org/joni/exception/ErrorMessages.java | 95 +
src/org/joni/exception/InternalException.java | 28 +
src/org/joni/exception/JOniException.java | 28 +
src/org/joni/exception/StandardException.java | 28 +
src/org/joni/exception/SyntaxException.java | 28 +
src/org/joni/exception/ValueException.java | 37 +
src/org/joni/util/ArrayCopy.java | 53 +
src/org/joni/util/BytesHash.java | 134 +
src/org/joni/util/Hash.java | 180 +
src/org/joni/util/IntArrayHash.java | 126 +
src/org/joni/util/IntHash.java | 95 +
src/org/joni/util/ObjHash.java | 99 +
130 files changed, 36626 insertions(+)
diff --git a/src/org/joni/Analyser.java b/src/org/joni/Analyser.java
new file mode 100644
index 0000000..2a83d1b
--- /dev/null
+++ b/src/org/joni/Analyser.java
@@ -0,0 +1,2093 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.BitStatus.bsAt;
+import static org.joni.BitStatus.bsClear;
+import static org.joni.BitStatus.bsOnAt;
+import static org.joni.BitStatus.bsOnAtSimple;
+import static org.joni.Option.isCaptureGroup;
+import static org.joni.Option.isIgnoreCase;
+import static org.joni.Option.isMultiline;
+import static org.joni.ast.ConsAltNode.newAltNode;
+import static org.joni.ast.ConsAltNode.newListNode;
+import static org.joni.ast.QuantifierNode.isRepeatInfinite;
+
+import org.joni.ast.AnchorNode;
+import org.joni.ast.BackRefNode;
+import org.joni.ast.CClassNode;
+import org.joni.ast.CTypeNode;
+import org.joni.ast.CallNode;
+import org.joni.ast.ConsAltNode;
+import org.joni.ast.EncloseNode;
+import org.joni.ast.Node;
+import org.joni.ast.QuantifierNode;
+import org.joni.ast.StringNode;
+import org.joni.constants.AnchorType;
+import org.joni.constants.CharacterType;
+import org.joni.constants.EncloseType;
+import org.joni.constants.NodeType;
+import org.joni.constants.TargetInfo;
+import org.joni.exception.SyntaxException;
+
+class Analyser extends Parser {
+
+ protected Analyser(ScanEnvironment env, byte[]bytes, int p, int end) {
+ super(env, bytes, p, end);
+ }
+
+ private Node noNameDisableMap(Node node, int[]map, int[]counter) {
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ can.setCar(noNameDisableMap(can.car, map, counter));
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ Node target = qn.target;
+ Node old = target;
+ target = noNameDisableMap(target, map, counter);
+
+ if (target != old) {
+ qn.setTarget(target);
+ if (target.getType() == NodeType.QTFR) qn.reduceNestedQuantifier((QuantifierNode)target);
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if (en.type == EncloseType.MEMORY) {
+ if (en.isNamedGroup()) {
+ counter[0]++;
+ map[en.regNum] = counter[0];
+ en.regNum = counter[0];
+ //en.target = noNameDisableMap(en.target, map, counter);
+ en.setTarget(noNameDisableMap(en.target, map, counter)); // ???
+ } else {
+ node = en.target;
+ en.target = null; // remove first enclose: /(a)(?<b>c)/
+ node = noNameDisableMap(node, map, counter);
+ }
+ } else {
+ //en.target = noNameDisableMap(en.target, map, counter);
+ en.setTarget(noNameDisableMap(en.target, map, counter)); // ???
+ }
+ break;
+
+ default:
+ break;
+ } // switch
+
+ return node;
+ }
+
+ private void renumberByMap(Node node, int[]map) {
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ renumberByMap(can.car, map);
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ renumberByMap(((QuantifierNode)node).target, map);
+ break;
+
+ case NodeType.ENCLOSE:
+ renumberByMap(((EncloseNode)node).target, map);
+ break;
+
+ case NodeType.BREF:
+ ((BackRefNode)node).renumber(map);
+ break;
+
+ default:
+ break;
+ } // switch
+ }
+
+ protected final void numberedRefCheck(Node node) {
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ numberedRefCheck(can.car);
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ numberedRefCheck(((QuantifierNode)node).target);
+ break;
+
+ case NodeType.ENCLOSE:
+ numberedRefCheck(((EncloseNode)node).target);
+ break;
+
+ case NodeType.BREF:
+ BackRefNode br = (BackRefNode)node;
+ if (!br.isNameRef()) newValueException(ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED);
+ break;
+
+ default:
+ break;
+ } // switch
+ }
+
+ protected final Node disableNoNameGroupCapture(Node root) {
+ int[]map = new int[env.numMem + 1];
+
+ for (int i=1; i<=env.numMem; i++) map[i] = 0;
+
+ int[]counter = new int[]{0}; // !!! this should be passed as the recursion goes right ?, move to plain int
+ root = noNameDisableMap(root, map, counter); // ???
+ renumberByMap(root, map);
+
+ for (int i=1, pos=1; i<=env.numMem; i++) {
+ if (map[i] > 0) {
+ env.memNodes[pos] = env.memNodes[i];
+ pos++;
+ }
+ }
+
+ int loc = env.captureHistory;
+ env.captureHistory = bsClear();
+
+ for (int i=1; i<=Config.MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (bsAt(loc, i)) {
+ env.captureHistory = bsOnAtSimple(env.captureHistory, map[i]);
+ }
+ }
+
+ env.numMem = env.numNamed;
+ regex.numMem = env.numNamed;
+
+ regex.renumberNameTable(map);
+
+ return root;
+ }
+
+ private void swap(Node a, Node b) {
+ a.swap(b);
+
+ if (root == b) {
+ root = a;
+ } else if (root == a) {
+ root = b;
+ }
+ }
+
+ // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ private int quantifiersMemoryInfo(Node node) {
+ int info = 0;
+
+ switch(node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ int v = quantifiersMemoryInfo(can.car);
+ if (v > info) info = v;
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (cn.isRecursion()) {
+ return TargetInfo.IS_EMPTY_REC; /* tiny version */
+ } else {
+ info = quantifiersMemoryInfo(cn.target);
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.upper != 0) {
+ info = quantifiersMemoryInfo(qn.target);
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch (en.type) {
+ case EncloseType.MEMORY:
+ return TargetInfo.IS_EMPTY_MEM;
+
+ case EncloseType.OPTION:
+ case EncloseNode.STOP_BACKTRACK:
+ info = quantifiersMemoryInfo(en.target);
+ break;
+
+ default:
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.BREF:
+ case NodeType.STR:
+ case NodeType.CTYPE:
+ case NodeType.CCLASS:
+ case NodeType.CANY:
+ case NodeType.ANCHOR:
+ default:
+ break;
+ } // switch
+
+ return info;
+ }
+
+ private int getMinMatchLength(Node node) {
+ int min = 0;
+
+ switch (node.getType()) {
+ case NodeType.BREF:
+ BackRefNode br = (BackRefNode)node;
+ if (br.isRecursion()) break;
+
+ if (br.back[0] > env.numMem) newValueException(ERR_INVALID_BACKREF);
+ min = getMinMatchLength(env.memNodes[br.back[0]]);
+
+ for (int i=1; i<br.backNum; i++) {
+ if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
+ int tmin = getMinMatchLength(env.memNodes[br.back[i]]);
+ if (min > tmin) min = tmin;
+ }
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (cn.isRecursion()) {
+ EncloseNode en = (EncloseNode)cn.target;
+ if (en.isMinFixed()) min = en.minLength;
+ } else {
+ min = getMinMatchLength(cn.target);
+ }
+ break;
+
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.LIST:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ min += getMinMatchLength(can.car);
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode y = (ConsAltNode)node;
+ do {
+ Node x = y.car;
+ int tmin = getMinMatchLength(x);
+ if (y == node) {
+ min = tmin;
+ } else if (min > tmin) {
+ min = tmin;
+ }
+ } while ((y = y.cdr) != null);
+ break;
+
+ case NodeType.STR:
+ min = ((StringNode)node).length();
+ break;
+
+ case NodeType.CTYPE:
+ min = 1;
+ break;
+
+ case NodeType.CCLASS:
+ case NodeType.CANY:
+ min = 1;
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.lower > 0) {
+ min = getMinMatchLength(qn.target);
+ min = MinMaxLen.distanceMultiply(min, qn.lower);
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch (en.type) {
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL) {
+ if (en.isMinFixed()) {
+ min = en.minLength;
+ } else {
+ min = getMinMatchLength(en.target);
+ en.minLength = min;
+ en.setMinFixed();
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case EncloseType.OPTION:
+ case EncloseType.STOP_BACKTRACK:
+ min = getMinMatchLength(en.target);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ANCHOR:
+ default:
+ break;
+ } // switch
+
+ return min;
+ }
+
+ private int getMaxMatchLength(Node node) {
+ int max = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode ln = (ConsAltNode)node;
+ do {
+ int tmax = getMaxMatchLength(ln.car);
+ max = MinMaxLen.distanceAdd(max, tmax);
+ } while ((ln = ln.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode an = (ConsAltNode)node;
+ do {
+ int tmax = getMaxMatchLength(an.car);
+ if (max < tmax) max = tmax;
+ } while ((an = an.cdr) != null);
+ break;
+
+ case NodeType.STR:
+ max = ((StringNode)node).length();
+ break;
+
+ case NodeType.CTYPE:
+ max = enc.maxLengthDistance();
+ break;
+
+ case NodeType.CCLASS:
+ case NodeType.CANY:
+ max = enc.maxLengthDistance();
+ break;
+
+ case NodeType.BREF:
+ BackRefNode br = (BackRefNode)node;
+ if (br.isRecursion()) {
+ max = MinMaxLen.INFINITE_DISTANCE;
+ break;
+ }
+
+ for (int i=0; i<br.backNum; i++) {
+ if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
+ int tmax = getMaxMatchLength(env.memNodes[br.back[i]]);
+ if (max < tmax) max = tmax;
+ }
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (!cn.isRecursion()) {
+ max = getMaxMatchLength(cn.target);
+ } else {
+ max = MinMaxLen.INFINITE_DISTANCE;
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.upper != 0) {
+ max = getMaxMatchLength(qn.target);
+ if (max != 0) {
+ if (!isRepeatInfinite(qn.upper)) {
+ max = MinMaxLen.distanceMultiply(max, qn.upper);
+ } else {
+ max = MinMaxLen.INFINITE_DISTANCE;
+ }
+ }
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch (en.type) {
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL) {
+ if (en.isMaxFixed()) {
+ max = en.maxLength;
+ } else {
+ max = getMaxMatchLength(en.target);
+ en.maxLength = max;
+ en.setMaxFixed();
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case EncloseType.OPTION:
+ case EncloseType.STOP_BACKTRACK:
+ max = getMaxMatchLength(en.target);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ANCHOR:
+ default:
+ break;
+ } // switch
+
+ return max;
+ }
+
+ private static final int GET_CHAR_LEN_VARLEN = -1;
+ private static final int GET_CHAR_LEN_TOP_ALT_VARLEN = -2;
+ protected final int getCharLengthTree(Node node) {
+ return getCharLengthTree(node, 0);
+ }
+
+ private int getCharLengthTree(Node node, int level) {
+ level++;
+
+ int len = 0;
+ returnCode = 0;
+
+ switch(node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode ln = (ConsAltNode)node;
+ do {
+ int tlen = getCharLengthTree(ln.car, level);
+ if (returnCode == 0) len = MinMaxLen.distanceAdd(len, tlen);
+ } while (returnCode == 0 && (ln = ln.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode an = (ConsAltNode)node;
+ boolean varLen = false;
+
+ int tlen = getCharLengthTree(an.car, level);
+ while (returnCode == 0 && (an = an.cdr) != null) {
+ int tlen2 = getCharLengthTree(an.car, level);
+ if (returnCode == 0) {
+ if (tlen != tlen2) varLen = true;
+ }
+ }
+
+ if (returnCode == 0) {
+ if (varLen) {
+ if (level == 1) {
+ returnCode = GET_CHAR_LEN_TOP_ALT_VARLEN;
+ } else {
+ returnCode = GET_CHAR_LEN_VARLEN;
+ }
+ } else {
+ len = tlen;
+ }
+ }
+ break;
+
+ case NodeType.STR:
+ StringNode sn = (StringNode)node;
+ len = sn.length(enc);
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.lower == qn.upper) {
+ tlen = getCharLengthTree(qn.target, level);
+ if (returnCode == 0) len = MinMaxLen.distanceMultiply(tlen, qn.lower);
+ } else {
+ returnCode = GET_CHAR_LEN_VARLEN;
+ }
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (!cn.isRecursion()) {
+ len = getCharLengthTree(cn.target, level);
+ } else {
+ returnCode = GET_CHAR_LEN_VARLEN;
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.CTYPE:
+ len = 1;
+
+ case NodeType.CCLASS:
+ case NodeType.CANY:
+ len = 1;
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch(en.type) {
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL) {
+ if (en.isCLenFixed()) {
+ len = en.charLength;
+ } else {
+ len = getCharLengthTree(en.target, level);
+ if (returnCode == 0) {
+ en.charLength = len;
+ en.setCLenFixed();
+ }
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case EncloseType.OPTION:
+ case EncloseType.STOP_BACKTRACK:
+ len = getCharLengthTree(en.target, level);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ANCHOR:
+ break;
+
+ default:
+ returnCode = GET_CHAR_LEN_VARLEN;
+ } // switch
+ return len;
+ }
+
+ /* x is not included y ==> 1 : 0 */
+ private boolean isNotIncluded(Node x, Node y) {
+ Node tmp;
+
+ // !retry:!
+ retry:while(true) {
+
+ int yType = y.getType();
+
+ switch(x.getType()) {
+ case NodeType.CTYPE:
+ switch(yType) {
+ case NodeType.CTYPE:
+ CTypeNode cny = (CTypeNode)y;
+ CTypeNode cnx = (CTypeNode)x;
+ return cny.ctype == cnx.ctype && cny.not != cnx.not;
+
+ case NodeType.CCLASS:
+ // !swap:!
+ tmp = x;
+ x = y;
+ y = tmp;
+ // !goto retry;!
+ continue retry;
+
+ case NodeType.STR:
+ // !goto swap;!
+ tmp = x;
+ x = y;
+ y = tmp;
+ continue retry;
+
+ default:
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.CCLASS:
+ CClassNode xc = (CClassNode)x;
+
+ switch(yType) {
+ case NodeType.CTYPE:
+ switch(((CTypeNode)y).ctype) {
+ case CharacterType.WORD:
+ if (!((CTypeNode)y).not) {
+ if (xc.mbuf == null && !xc.isNot()) {
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ if (xc.bs.at(i)) {
+ if (enc.isSbWord(i)) return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ } else {
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ if (!enc.isSbWord(i)) {
+ if (!xc.isNot()) {
+ if (xc.bs.at(i)) return false;
+ } else {
+ if (!xc.bs.at(i)) return false;
+ }
+ }
+ }
+ return true;
+ }
+ // break; not reached
+
+ default:
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.CCLASS:
+ CClassNode yc = (CClassNode)y;
+
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ boolean v = xc.bs.at(i);
+ if ((v && !xc.isNot()) || (!v && xc.isNot())) {
+ v = yc.bs.at(i);
+ if ((v && !yc.isNot()) || (!v && yc.isNot())) return false;
+ }
+ }
+ if ((xc.mbuf == null && !xc.isNot()) || yc.mbuf == null && !yc.isNot()) return true;
+ return false;
+ // break; not reached
+
+ case NodeType.STR:
+ // !goto swap;!
+ tmp = x;
+ x = y;
+ y = tmp;
+ continue retry;
+
+ default:
+ break;
+
+ } // inner switch
+ break; // case NodeType.CCLASS
+
+ case NodeType.STR:
+ StringNode xs = (StringNode)x;
+ if (xs.length() == 0) break;
+
+ switch (yType) {
+ case NodeType.CTYPE:
+ CTypeNode cy = ((CTypeNode)y);
+ switch (cy.ctype) {
+ case CharacterType.WORD:
+ if (enc.isMbcWord(xs.bytes, xs.p, xs.end)) {
+ return cy.not;
+ } else {
+ return !cy.not;
+ }
+
+ default:
+ break;
+
+ } // inner switch
+ break;
+
+ case NodeType.CCLASS:
+ CClassNode cc = (CClassNode)y;
+ int code = enc.mbcToCode(xs.bytes, xs.p, xs.p + enc.maxLength());
+ return !cc.isCodeInCC(enc, code);
+
+ case NodeType.STR:
+ StringNode ys = (StringNode)y;
+ int len = xs.length();
+ if (len > ys.length()) len = ys.length();
+ if (xs.isAmbig() || ys.isAmbig()) {
+ /* tiny version */
+ return false;
+ } else {
+ for (int i=0, p=ys.p, q=xs.p; i<len; i++, p++, q++) {
+ if (ys.bytes[p] != xs.bytes[q]) return true;
+ }
+ }
+ break;
+
+ default:
+ break;
+ } // inner switch
+
+ break; // case NodeType.STR
+
+ } // switch
+
+ break;
+ } // retry:while
+ return false;
+ }
+
+ private Node getHeadValueNode(Node node, boolean exact) {
+ Node n = null;
+
+ switch(node.getType()) {
+ case NodeType.BREF:
+ case NodeType.ALT:
+ case NodeType.CANY:
+ break;
+
+ case NodeType.CALL:
+ break; // if (Config.USE_SUBEXP_CALL)
+
+ case NodeType.CTYPE:
+ case NodeType.CCLASS:
+ if (!exact) n = node;
+ break;
+
+ case NodeType.LIST:
+ n = getHeadValueNode(((ConsAltNode)node).car, exact);
+ break;
+
+ case NodeType.STR:
+ StringNode sn = (StringNode)node;
+ if (sn.end <= sn.p) break; // ???
+
+ if (exact && !sn.isRaw() && isIgnoreCase(regex.options)){
+ // nothing
+ } else {
+ n = node;
+ }
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.lower > 0) {
+ if (qn.headExact != null) {
+ n = qn.headExact;
+ } else {
+ n = getHeadValueNode(qn.target, exact);
+ }
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+
+ switch (en.type) {
+ case EncloseType.OPTION:
+ int options = regex.options;
+ regex.options = en.option;
+ n = getHeadValueNode(en.target, exact);
+ regex.options = options;
+ break;
+
+ case EncloseType.MEMORY:
+ case EncloseType.STOP_BACKTRACK:
+ n = getHeadValueNode(en.target, exact);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ if (an.type == AnchorType.PREC_READ) n = getHeadValueNode(an.target, exact);
+ break;
+
+ default:
+ break;
+ } // switch
+
+ return n;
+ }
+
+ // true: invalid
+ private boolean checkTypeTree(Node node, int typeMask, int encloseMask, int anchorMask) {
+ if ((node.getType2Bit() & typeMask) == 0) return true;
+
+ boolean invalid = false;
+
+ switch(node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ invalid = checkTypeTree(can.car, typeMask, encloseMask, anchorMask);
+ } while (!invalid && (can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ invalid = checkTypeTree(((QuantifierNode)node).target, typeMask, encloseMask, anchorMask);
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if ((en.type & encloseMask) == 0) return true;
+ invalid = checkTypeTree(en.target, typeMask, encloseMask, anchorMask);
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ if ((an.type & anchorMask) == 0) return true;
+
+ if (an.target != null) invalid = checkTypeTree(an.target, typeMask, encloseMask, anchorMask);
+ break;
+
+ default:
+ break;
+
+ } // switch
+
+ return invalid;
+ }
+
+ private static final int RECURSION_EXIST = 1;
+ private static final int RECURSION_INFINITE = 2;
+ private int subexpInfRecursiveCheck(Node node, boolean head) {
+ int r = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ int min;
+ ConsAltNode x = (ConsAltNode)node;
+ do {
+ int ret = subexpInfRecursiveCheck(x.car, head);
+ if (ret == RECURSION_INFINITE) return ret;
+ r |= ret;
+ if (head) {
+ min = getMinMatchLength(x.car);
+ if (min != 0) head = false;
+ }
+ } while ((x = x.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ r = RECURSION_EXIST;
+ do {
+ int ret = subexpInfRecursiveCheck(can.car, head);
+ if (ret == RECURSION_INFINITE) return ret;
+ r &= ret;
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ r = subexpInfRecursiveCheck(qn.target, head);
+ if (r == RECURSION_EXIST) {
+ if (qn.lower == 0) r = 0;
+ }
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND:
+ case AnchorType.LOOK_BEHIND_NOT:
+ r = subexpInfRecursiveCheck(an.target, head);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.CALL:
+ r = subexpInfRecursiveCheck(((CallNode)node).target, head);
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if (en.isMark2()) {
+ return 0;
+ } else if (en.isMark1()) {
+ return !head ? RECURSION_EXIST : RECURSION_INFINITE;
+ // throw exception here ???
+ } else {
+ en.setMark2();
+ r = subexpInfRecursiveCheck(en.target, head);
+ en.clearMark2();
+ }
+ break;
+
+ default:
+ break;
+ } // switch
+ return r;
+ }
+
+ protected final int subexpInfRecursiveCheckTrav(Node node) {
+ int r = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ r = subexpInfRecursiveCheckTrav(can.car);
+ } while (r == 0 && (can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ r = subexpInfRecursiveCheckTrav(((QuantifierNode)node).target);
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND:
+ case AnchorType.LOOK_BEHIND_NOT:
+ r = subexpInfRecursiveCheckTrav(an.target);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if (en.isRecursion()) {
+ en.setMark1();
+ r = subexpInfRecursiveCheck(en.target, true);
+ if (r > 0) newValueException(ERR_NEVER_ENDING_RECURSION);
+ en.clearMark1();
+ }
+ r = subexpInfRecursiveCheckTrav(en.target);
+ break;
+
+ default:
+ break;
+ } // switch
+
+ return r;
+ }
+
+ private int subexpRecursiveCheck(Node node) {
+ int r = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ r |= subexpRecursiveCheck(can.car);
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ r = subexpRecursiveCheck(((QuantifierNode)node).target);
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND:
+ case AnchorType.LOOK_BEHIND_NOT:
+ r = subexpRecursiveCheck(an.target);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.CALL:
+ CallNode cn = (CallNode)node;
+ r = subexpRecursiveCheck(cn.target);
+ if (r != 0) cn.setRecursion();
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if (en.isMark2()) {
+ return 0;
+ } else if (en.isMark1()) {
+ return 1; /* recursion */
+ } else {
+ en.setMark2();
+ r = subexpRecursiveCheck(en.target);
+ en.clearMark2();
+ }
+ break;
+
+ default:
+ break;
+ } // switch
+
+ return r;
+ }
+
+ private static final int FOUND_CALLED_NODE = 1;
+ protected final int subexpRecursiveCheckTrav(Node node) {
+ int r = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ int ret = subexpRecursiveCheckTrav(can.car);
+ if (ret == FOUND_CALLED_NODE) {
+ r = FOUND_CALLED_NODE;
+ }
+ // else if (ret < 0) return ret; ???
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ r = subexpRecursiveCheckTrav(qn.target);
+ if (qn.upper == 0) {
+ if (r == FOUND_CALLED_NODE) qn.isRefered = true;
+ }
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND:
+ case AnchorType.LOOK_BEHIND_NOT:
+ r = subexpRecursiveCheckTrav(an.target);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if (!en.isRecursion()) {
+ if (en.isCalled()) {
+ en.setMark1();
+ r = subexpRecursiveCheck(en.target);
+ if (r != 0) en.setRecursion();
+ en.clearMark1();
+ }
+ }
+ r = subexpRecursiveCheckTrav(en.target);
+ if (en.isCalled()) r |= FOUND_CALLED_NODE;
+ break;
+
+ default:
+ break;
+ } // switch
+
+ return r;
+ }
+
+ protected final void setupSubExpCall(Node node) {
+
+ switch(node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode ln = (ConsAltNode)node;
+ do {
+ setupSubExpCall(ln.car);
+ } while ((ln = ln.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ setupSubExpCall(can.car);
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ setupSubExpCall(((QuantifierNode)node).target);
+ break;
+
+ case NodeType.ENCLOSE:
+ setupSubExpCall(((EncloseNode)node).target);
+ break;
+
+ case NodeType.CALL:
+ CallNode cn = (CallNode)node;
+
+ if (cn.groupNum != 0) {
+ int gNum = cn.groupNum;
+
+ if (Config.USE_NAMED_GROUP) {
+ if (env.numNamed > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(env.option)) {
+ newValueException(ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED);
+ }
+ } // USE_NAMED_GROUP
+ if (gNum > env.numMem) newValueException(ERR_UNDEFINED_GROUP_REFERENCE, cn.nameP, cn.nameEnd);
+
+ // !goto set_call_attr!; // remove duplication ?
+ cn.target = env.memNodes[cn.groupNum]; // no setTarget in call nodes!
+ if (cn.target == null) newValueException(ERR_UNDEFINED_NAME_REFERENCE, cn.nameP, cn.nameEnd);
+
+ ((EncloseNode)cn.target).setCalled();
+ env.btMemStart = BitStatus.bsOnAt(env.btMemStart, cn.groupNum);
+ cn.unsetAddrList = env.unsetAddrList;
+ } else {
+ if (Config.USE_NAMED_GROUP) {
+ NameEntry ne = regex.nameToGroupNumbers(cn.name, cn.nameP, cn.nameEnd);
+
+ if (ne == null) {
+ newValueException(ERR_UNDEFINED_NAME_REFERENCE, cn.nameP, cn.nameEnd);
+ } else if (ne.backNum > 1) {
+ newValueException(ERR_MULTIPLEX_DEFINITION_NAME_CALL, cn.nameP, cn.nameEnd);
+ } else {
+ cn.groupNum = ne.backRef1; // ne.backNum == 1 ? ne.backRef1 : ne.backRefs[0]; // ??? need to check ?
+ // !set_call_attr:!
+ cn.target = env.memNodes[cn.groupNum]; // no setTarget in call nodes!
+ if (cn.target == null) newValueException(ERR_UNDEFINED_NAME_REFERENCE, cn.nameP, cn.nameEnd);
+
+ ((EncloseNode)cn.target).setCalled();
+ env.btMemStart = BitStatus.bsOnAt(env.btMemStart, cn.groupNum);
+ cn.unsetAddrList = env.unsetAddrList;
+ }
+ }
+ }
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND:
+ case AnchorType.LOOK_BEHIND_NOT:
+ setupSubExpCall(an.target);
+ break;
+ }
+ break;
+
+ } // switch
+ }
+
+ /* divide different length alternatives in look-behind.
+ (?<=A|B) ==> (?<=A)|(?<=B)
+ (?<!A|B) ==> (?<!A)(?<!B)
+ */
+ private void divideLookBehindAlternatives(Node node) {
+ AnchorNode an = (AnchorNode)node;
+ int anchorType = an.type;
+
+ Node head = an.target;
+ Node np = ((ConsAltNode)head).car;
+
+
+ swap(node, head);
+
+ Node tmp = node;
+ node = head;
+ head = tmp;
+
+ ((ConsAltNode)node).setCar(head);
+ ((AnchorNode)head).setTarget(np);
+ np = node;
+
+ while ((np = ((ConsAltNode)np).cdr) != null) {
+ AnchorNode insert = new AnchorNode(anchorType);
+ insert.setTarget(((ConsAltNode)np).car);
+ ((ConsAltNode)np).setCar(insert);
+ }
+
+ if (anchorType == AnchorType.LOOK_BEHIND_NOT) {
+ np = node;
+ do {
+ ((ConsAltNode)np).toListNode(); /* alt -> list */
+ } while ((np = ((ConsAltNode)np).cdr) != null);
+ }
+ }
+
+ private void setupLookBehind(Node node) {
+ AnchorNode an = (AnchorNode)node;
+
+ int len = getCharLengthTree(an.target);
+ switch(returnCode) {
+ case 0:
+ an.charLength = len;
+ break;
+ case GET_CHAR_LEN_VARLEN:
+ newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+ break;
+ case GET_CHAR_LEN_TOP_ALT_VARLEN:
+ if (syntax.differentLengthAltLookBehind()) {
+ divideLookBehindAlternatives(node);
+ } else {
+ newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+ }
+ }
+ }
+
+ private void nextSetup(Node node, Node nextNode) {
+ // retry:
+ retry: while(true) {
+
+ int type = node.getType();
+ if (type == NodeType.QTFR) {
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.greedy && isRepeatInfinite(qn.upper)) {
+ if (Config.USE_QTFR_PEEK_NEXT) {
+ StringNode n = (StringNode)getHeadValueNode(nextNode, true);
+ /* '\0': for UTF-16BE etc... */
+ if (n != null && n.bytes[n.p] != 0) { // ?????????
+ qn.nextHeadExact = n;
+ }
+ } // USE_QTFR_PEEK_NEXT
+ /* automatic posseivation a*b ==> (?>a*)b */
+ if (qn.lower <= 1) {
+ if (qn.target.isSimple()) {
+ Node x = getHeadValueNode(qn.target, false);
+ if (x != null) {
+ Node y = getHeadValueNode(nextNode, false);
+ if (y != null && isNotIncluded(x, y)) {
+ EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); //onig_node_new_enclose
+ en.setStopBtSimpleRepeat();
+ //en.setTarget(qn.target); // optimize it ??
+ swap(node, en);
+
+ en.setTarget(node);
+ }
+ }
+ }
+ }
+ }
+ } else if (type == NodeType.ENCLOSE) {
+ EncloseNode en = (EncloseNode)node;
+ if (en.isMemory()) {
+ node = en.target;
+ // !goto retry;!
+ continue retry;
+ }
+ }
+
+ break;
+ } // while
+ }
+
+ private void updateStringNodeCaseFold(Node node) {
+ StringNode sn = (StringNode)node;
+
+ byte[]sbuf = new byte[sn.length() << 1];
+ int sp = 0;
+
+ value = sn.p;
+ int end = sn.end;
+
+ byte[]buf = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN];
+ while (value < end) {
+ int len = enc.mbcCaseFold(regex.caseFoldFlag, sn.bytes, this, end, buf);
+ for (int i=0; i<len; i++) {
+ if (sp >= sbuf.length) {
+ byte[]tmp = new byte[sbuf.length << 1];
+ System.arraycopy(sbuf, 0, tmp, 0, sbuf.length);
+ sbuf = tmp;
+ }
+ sbuf[sp++] = buf[i];
+ }
+ }
+
+ sn.set(sbuf, 0, sp);
+ }
+
+ private Node expandCaseFoldMakeRemString(byte[]bytes, int p, int end) {
+ StringNode node = new StringNode(bytes, p, end);
+
+ updateStringNodeCaseFold(node);
+ node.setAmbig();
+ node.setDontGetOptInfo();
+ return node;
+ }
+
+ private boolean expandCaseFoldStringAlt(int itemNum, CaseFoldCodeItem[]items,
+ byte[]bytes, int p, int slen, int end, Node[]node) {
+ boolean varlen = false;
+
+ for (int i=0; i<itemNum; i++) {
+ if (items[i].byteLen != slen) {
+ varlen = true;
+ break;
+ }
+ }
+
+ ConsAltNode varANode = null, anode, xnode;
+ if (varlen) {
+ node[0] = varANode = newAltNode(null, null);
+
+ xnode = newListNode(null, null);
+ varANode.setCar(xnode);
+
+ anode = newAltNode(null, null);
+ xnode.setCar(anode);
+ } else {
+ node[0] = anode = newAltNode(null, null);
+ }
+
+ StringNode snode = new StringNode(bytes, p, p + slen);
+ anode.setCar(snode);
+
+ for (int i=0; i<itemNum; i++) {
+ snode = new StringNode();
+
+ for (int j=0; j<items[i].codeLen; j++) {
+ snode.ensure(Config.ENC_CODE_TO_MBC_MAXLEN);
+ snode.end += enc.codeToMbc(items[i].code[j], snode.bytes, snode.end);
+ }
+
+ ConsAltNode an = newAltNode(null, null);
+ if (items[i].byteLen != slen) {
+ int q = p + items[i].byteLen;
+ if (q < end) {
+ Node rem = expandCaseFoldMakeRemString(bytes, q, end);
+
+ xnode = ConsAltNode.listAdd(null, snode);
+ ConsAltNode.listAdd(xnode, rem);
+ an.setCar(xnode);
+ } else {
+ an.setCar(snode);
+ }
+ varANode.setCdr(an);
+ varANode = an;
+ } else {
+ an.setCar(snode);
+ anode.setCdr(an);
+ anode = an;
+ }
+ }
+ return varlen;
+ }
+
+ private static final int THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION = 8;
+ private void expandCaseFoldString(Node node) {
+ StringNode sn = (StringNode)node;
+
+ if (sn.isAmbig()) return;
+ if (sn.length() <= 0) return;
+
+
+ byte[]bytes = sn.bytes;
+ int p = sn.p;
+ int end = sn.end;
+ int altNum = 1;
+
+ ConsAltNode topRoot = null, root = null;
+ Node[]prevNode = new Node[]{null};
+ StringNode snode = null;
+
+ while (p < end) {
+ CaseFoldCodeItem[]items = enc.caseFoldCodesByString(regex.caseFoldFlag, bytes, p, end);
+ int len = enc.length(bytes[p]);
+
+ if (items.length == 0) {
+ if (snode == null) {
+ if (root == null && prevNode[0] != null) {
+ topRoot = root = ConsAltNode.listAdd(null, prevNode[0]);
+ }
+
+ prevNode[0] = snode = new StringNode(); // onig_node_new_str(NULL, NULL);
+
+ if (root != null) {
+ ConsAltNode.listAdd(root, snode);
+ }
+
+ }
+
+ snode.cat(bytes, p, p + len);
+ } else {
+ altNum *= (items.length + 1);
+ if (altNum > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
+
+ if (root == null && prevNode[0] != null) {
+ topRoot = root = ConsAltNode.listAdd(null, prevNode[0]);
+ }
+
+ boolean r = expandCaseFoldStringAlt(items.length, items, bytes, p, len, end, prevNode);
+ if (r) { // if (r == 1)
+ if (root == null) {
+ topRoot = (ConsAltNode)prevNode[0];
+ } else {
+ ConsAltNode.listAdd(root, prevNode[0]);
+ }
+
+ root = (ConsAltNode)((ConsAltNode)prevNode[0]).car;
+ } else { /* r == 0 */
+ if (root != null) {
+ ConsAltNode.listAdd(root, prevNode[0]);
+ }
+ }
+ snode = null;
+ }
+ p += len;
+ }
+
+ if (p < end) {
+ Node srem = expandCaseFoldMakeRemString(bytes, p, end);
+
+ if (prevNode[0] != null && root == null) {
+ topRoot = root = ConsAltNode.listAdd(null, prevNode[0]);
+ }
+
+ if (root == null) {
+ prevNode[0] = srem;
+ } else {
+ ConsAltNode.listAdd(root, srem);
+ }
+ }
+ /* ending */
+ Node xnode = topRoot != null ? topRoot : prevNode[0];
+ swap(node, xnode);
+ }
+
+ private static final int CEC_THRES_NUM_BIG_REPEAT = 512;
+ private static final int CEC_INFINITE_NUM = 0x7fffffff;
+
+ private static final int CEC_IN_INFINITE_REPEAT = (1<<0);
+ private static final int CEC_IN_FINITE_REPEAT = (1<<1);
+ private static final int CEC_CONT_BIG_REPEAT = (1<<2);
+
+ protected final int setupCombExpCheck(Node node, int state) {
+ int r = state;
+ int ret;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode ln = (ConsAltNode)node;
+
+ do {
+ r = setupCombExpCheck(ln.car, r);
+ //prev = ((ConsAltNode)node).car;
+ } while (r >= 0 && (ln = ln.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode an = (ConsAltNode)node;
+ do {
+ ret = setupCombExpCheck(an.car, state);
+ r |= ret;
+ } while (ret >= 0 && (an = an.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ int childState = state;
+ int addState = 0;
+ int varNum;
+
+ if (!isRepeatInfinite(qn.upper)) {
+ if (qn.upper > 1) {
+ /* {0,1}, {1,1} are allowed */
+ childState |= CEC_IN_FINITE_REPEAT;
+
+ /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
+ if (env.backrefedMem == 0) {
+ if (qn.target.getType() == NodeType.ENCLOSE) {
+ EncloseNode en = (EncloseNode)qn.target;
+ if (en.type == EncloseType.MEMORY) {
+ if (en.target.getType() == NodeType.QTFR) {
+ QuantifierNode q = (QuantifierNode)en.target;
+ if (isRepeatInfinite(q.upper) && q.greedy == qn.greedy) {
+ qn.upper = qn.lower == 0 ? 1 : qn.lower;
+ if (qn.upper == 1) childState = state;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if ((state & CEC_IN_FINITE_REPEAT) != 0) {
+ qn.combExpCheckNum = -1;
+ } else {
+ if (isRepeatInfinite(qn.upper)) {
+ varNum = CEC_INFINITE_NUM;
+ childState |= CEC_IN_INFINITE_REPEAT;
+ } else {
+ varNum = qn.upper - qn.lower;
+ }
+
+ if (varNum >= CEC_THRES_NUM_BIG_REPEAT) addState |= CEC_CONT_BIG_REPEAT;
+
+ if (((state & CEC_IN_INFINITE_REPEAT) != 0 && varNum != 0) ||
+ ((state & CEC_CONT_BIG_REPEAT) != 0 && varNum >= CEC_THRES_NUM_BIG_REPEAT)) {
+ if (qn.combExpCheckNum == 0) {
+ env.numCombExpCheck++;
+ qn.combExpCheckNum = env.numCombExpCheck;
+ if (env.currMaxRegNum > env.combExpMaxRegNum) {
+ env.combExpMaxRegNum = env.currMaxRegNum;
+ }
+ }
+ }
+ }
+ r = setupCombExpCheck(qn.target, childState);
+ r |= addState;
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch( en.type) {
+ case EncloseNode.MEMORY:
+ if (env.currMaxRegNum < en.regNum) {
+ env.currMaxRegNum = en.regNum;
+ }
+ r = setupCombExpCheck(en.target, state);
+ break;
+
+ default:
+ r = setupCombExpCheck(en.target, state);
+ } // inner switch
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (cn.isRecursion()) {
+ env.hasRecursion = true;
+ } else {
+ r = setupCombExpCheck(cn.target, state);
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ default:
+ break;
+
+ } // switch
+
+ return r;
+ }
+
+ private static final int IN_ALT = (1<<0);
+ private static final int IN_NOT = (1<<1);
+ private static final int IN_REPEAT = (1<<2);
+ private static final int IN_VAR_REPEAT = (1<<3);
+ private static final int EXPAND_STRING_MAX_LENGTH = 100;
+
+ /* setup_tree does the following work.
+ 1. check empty loop. (set qn->target_empty_info)
+ 2. expand ignore-case in char class.
+ 3. set memory status bit flags. (reg->mem_stats)
+ 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
+ 5. find invalid patterns in look-behind.
+ 6. expand repeated string.
+ */
+ protected final void setupTree(Node node, int state) {
+ switch (node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode lin = (ConsAltNode)node;
+ Node prev = null;
+ do {
+ setupTree(lin.car, state);
+ if (prev != null) {
+ nextSetup(prev, lin.car);
+ }
+ prev = lin.car;
+ } while ((lin = lin.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode aln = (ConsAltNode)node;
+ do {
+ setupTree(aln.car, (state | IN_ALT));
+ } while ((aln = aln.cdr) != null);
+ break;
+
+ case NodeType.CCLASS:
+ break;
+
+ case NodeType.STR:
+ if (isIgnoreCase(regex.options) && !((StringNode)node).isRaw()) {
+ expandCaseFoldString(node);
+ }
+ break;
+
+ case NodeType.CTYPE:
+ case NodeType.CANY:
+ break;
+
+ case NodeType.CALL: // if (Config.USE_SUBEXP_CALL) ?
+ break;
+
+ case NodeType.BREF:
+ BackRefNode br = (BackRefNode)node;
+ for (int i=0; i<br.backNum; i++) {
+ if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
+ env.backrefedMem = bsOnAt(env.backrefedMem, br.back[i]);
+ env.btMemStart = bsOnAt(env.btMemStart, br.back[i]);
+ if (Config.USE_BACKREF_WITH_LEVEL) {
+ if (br.isNestLevel()) {
+ env.btMemEnd = bsOnAt(env.btMemEnd, br.back[i]);
+ }
+ } // USE_BACKREF_AT_LEVEL
+ ((EncloseNode)env.memNodes[br.back[i]]).setMemBackrefed();
+ }
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ Node target = qn.target;
+
+ if ((state & IN_REPEAT) != 0) qn.setInRepeat();
+
+ if (isRepeatInfinite(qn.upper) || qn.lower >= 1) {
+ int d = getMinMatchLength(target);
+ if (d == 0) {
+ qn.targetEmptyInfo = TargetInfo.IS_EMPTY;
+ if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) {
+ int info = quantifiersMemoryInfo(target);
+ if (info > 0) qn.targetEmptyInfo = info;
+ } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ // strange stuff here (turned off)
+ }
+ }
+
+ state |= IN_REPEAT;
+ if (qn.lower != qn.upper) state |= IN_VAR_REPEAT;
+
+ setupTree(target, state);
+
+ /* expand string */
+ if (target.getType() == NodeType.STR) {
+ if (!isRepeatInfinite(qn.lower) && qn.lower == qn.upper &&
+ qn.lower > 1 && qn.lower <= EXPAND_STRING_MAX_LENGTH) {
+ StringNode sn = (StringNode)target;
+ int len = sn.length();
+
+ if (len * qn.lower <= EXPAND_STRING_MAX_LENGTH) {
+ StringNode str = qn.convertToString();
+ // if (str.parent == null) root = str;
+ int n = qn.lower;
+ for (int i=0; i<n; i++) {
+ str.cat(sn.bytes, sn.p, sn.end);
+ }
+ }
+ break; /* break case NT_QTFR: */
+ }
+ }
+ if (Config.USE_OP_PUSH_OR_JUMP_EXACT) {
+ if (qn.greedy && qn.targetEmptyInfo != 0) {
+ if (target.getType() == NodeType.QTFR) {
+ QuantifierNode tqn = (QuantifierNode)target;
+ if (tqn.headExact != null) {
+ qn.headExact = tqn.headExact;
+ tqn.headExact = null;
+ }
+ } else {
+ qn.headExact = getHeadValueNode(qn.target, true);
+ }
+ }
+ } // USE_OP_PUSH_OR_JUMP_EXACT
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch (en.type) {
+ case EncloseType.OPTION:
+ int options = regex.options;
+ regex.options = en.option;
+ setupTree(en.target, state);
+ regex.options = options;
+ break;
+
+ case EncloseType.MEMORY:
+ if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
+ env.btMemStart = bsOnAt(env.btMemStart, en.regNum);
+ /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
+
+ }
+ setupTree(en.target, state);
+ break;
+
+ case EncloseType.STOP_BACKTRACK:
+ setupTree(en.target, state);
+ if (en.target.getType() == NodeType.QTFR) {
+ QuantifierNode tqn = (QuantifierNode)en.target;
+ if (isRepeatInfinite(tqn.upper) && tqn.lower <= 1 && tqn.greedy) {
+ /* (?>a*), a*+ etc... */
+ if (tqn.target.isSimple()) en.setStopBtSimpleRepeat();
+ }
+ }
+ break;
+
+ } // inner switch
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ setupTree(an.target, state);
+ break;
+
+ case AnchorType.PREC_READ_NOT:
+ setupTree(an.target, (state | IN_NOT));
+ break;
+
+ case AnchorType.LOOK_BEHIND:
+ boolean lbInvalid = checkTypeTree(an.target, NodeType.ALLOWED_IN_LB,
+ EncloseType.ALLOWED_IN_LB,
+ AnchorType.ALLOWED_IN_LB);
+
+ if (lbInvalid) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+ setupLookBehind(node);
+ setupTree(an.target, state);
+ break;
+
+ case AnchorType.LOOK_BEHIND_NOT:
+ boolean lbnInvalid = checkTypeTree(an.target, NodeType.ALLOWED_IN_LB,
+ EncloseType.ALLOWED_IN_LB,
+ AnchorType.ALLOWED_IN_LB);
+
+ if (lbnInvalid) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+
+ setupLookBehind(node);
+ setupTree(an.target, (state | IN_NOT));
+ break;
+
+ } // inner switch
+ break;
+
+ default:
+ break;
+
+ } // switch
+ }
+
+ private static final int MAX_NODE_OPT_INFO_REF_COUNT = 5;
+ private void optimizeNodeLeft(Node node, NodeOptInfo opt, OptEnvironment oenv) { // oenv remove, pass mmd
+ opt.clear();
+ opt.setBoundNode(oenv.mmd);
+
+ switch (node.getType()) {
+ case NodeType.LIST: {
+ OptEnvironment nenv = new OptEnvironment();
+ NodeOptInfo nopt = new NodeOptInfo();
+ nenv.copy(oenv);
+ ConsAltNode lin = (ConsAltNode)node;
+ do {
+ optimizeNodeLeft(lin.car, nopt, nenv);
+ nenv.mmd.add(nopt.length);
+ opt.concatLeftNode(nopt, enc);
+ } while ((lin = lin.cdr) != null);
+ break;
+ }
+
+ case NodeType.ALT: {
+ NodeOptInfo nopt = new NodeOptInfo();
+ ConsAltNode aln = (ConsAltNode)node;
+ do {
+ optimizeNodeLeft(aln.car, nopt, oenv);
+ if (aln == node) {
+ opt.copy(nopt);
+ } else {
+ opt.altMerge(nopt, oenv);
+ }
+ } while ((aln = aln.cdr) != null);
+ break;
+ }
+
+ case NodeType.STR: {
+ StringNode sn = (StringNode)node;
+
+ int slen = sn.length();
+
+ if (!sn.isAmbig()) {
+ opt.exb.concatStr(sn.bytes, sn.p, sn.end, sn.isRaw(), enc);
+
+ if (slen > 0) {
+ opt.map.addChar(sn.bytes[sn.p], enc);
+ }
+
+ opt.length.set(slen, slen);
+ } else {
+ int max;
+ if (sn.isDontGetOptInfo()) {
+ int n = sn.length(enc);
+ max = enc.maxLengthDistance() * n;
+ } else {
+ opt.exb.concatStr(sn.bytes, sn.p, sn.end, sn.isRaw(), enc);
+ opt.exb.ignoreCase = true;
+
+ if (slen > 0) {
+ opt.map.addCharAmb(sn.bytes, sn.p, sn.end, enc, oenv.caseFoldFlag);
+ }
+
+ max = slen;
+ }
+ opt.length.set(slen, max);
+ }
+
+ if (opt.exb.length == slen) {
+ opt.exb.reachEnd = true;
+ }
+ break;
+ }
+
+ case NodeType.CCLASS: {
+ CClassNode cc = (CClassNode)node;
+ /* no need to check ignore case. (setted in setup_tree()) */
+ if (cc.mbuf != null || cc.isNot()) {
+ int min = enc.minLength();
+ int max = enc.maxLengthDistance();
+ opt.length.set(min, max);
+ } else {
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ boolean z = cc.bs.at(i);
+ if ((z && !cc.isNot()) || (!z && cc.isNot())) {
+ opt.map.addChar((byte)i, enc);
+ }
+ }
+ opt.length.set(1, 1);
+ }
+ break;
+ }
+
+ case NodeType.CTYPE: {
+ int min;
+ int max = enc.maxLengthDistance();
+ if (max == 1) {
+ min = 1;
+ CTypeNode cn = (CTypeNode)node;
+
+ switch (cn.ctype) {
+ case CharacterType.WORD:
+ if (cn.not) {
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ if (!enc.isWord(i)) {
+ opt.map.addChar((byte)i, enc);
+ }
+ }
+ } else {
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ if (enc.isWord(i)) {
+ opt.map.addChar((byte)i, enc);
+ }
+ }
+ }
+ break;
+ } // inner switch
+ } else {
+ min = enc.minLength();
+ }
+ opt.length.set(min, max);
+ break;
+ }
+
+ case NodeType.CANY: {
+ opt.length.set(enc.minLength(), enc.maxLengthDistance());
+ break;
+ }
+
+ case NodeType.ANCHOR: {
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.BEGIN_BUF:
+ case AnchorType.BEGIN_POSITION:
+ case AnchorType.BEGIN_LINE:
+ case AnchorType.END_BUF:
+ case AnchorType.SEMI_END_BUF:
+ case AnchorType.END_LINE:
+ opt.anchor.add(an.type);
+ break;
+
+ case AnchorType.PREC_READ:
+ NodeOptInfo nopt = new NodeOptInfo();
+ optimizeNodeLeft(an.target, nopt, oenv);
+ if (nopt.exb.length > 0) {
+ opt.expr.copy(nopt.exb);
+ } else if (nopt.exm.length > 0) {
+ opt.expr.copy(nopt.exm);
+ opt.expr.reachEnd = true;
+
+ if (nopt.map.value > 0) opt.map.copy(nopt.map);
+ }
+ break;
+
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND: /* Sorry, I can't make use of it. */
+ case AnchorType.LOOK_BEHIND_NOT:
+ break;
+
+ } // inner switch
+ break;
+ }
+
+ case NodeType.BREF: {
+ BackRefNode br = (BackRefNode)node;
+
+ if (br.isRecursion()) {
+ opt.length.set(0, MinMaxLen.INFINITE_DISTANCE);
+ break;
+ }
+
+ Node[]nodes = oenv.scanEnv.memNodes;
+
+ int min = getMinMatchLength(nodes[br.back[0]]);
+ int max = getMaxMatchLength(nodes[br.back[0]]);
+
+ for (int i=1; i<br.backNum; i++) {
+ int tmin = getMinMatchLength(nodes[br.back[i]]);
+ int tmax = getMaxMatchLength(nodes[br.back[i]]);
+ if (min > tmin) min = tmin;
+ if (max < tmax) max = tmax;
+ }
+ opt.length.set(min, max);
+ break;
+ }
+
+ case NodeType.CALL: {
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (cn.isRecursion()) {
+ opt.length.set(0, MinMaxLen.INFINITE_DISTANCE);
+ } else {
+ int safe = oenv.options;
+ oenv.options = ((EncloseNode)cn.target).option;
+ optimizeNodeLeft(cn.target, opt, oenv);
+ oenv.options = safe;
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+ }
+
+ case NodeType.QTFR: {
+ NodeOptInfo nopt = new NodeOptInfo();
+ QuantifierNode qn = (QuantifierNode)node;
+ optimizeNodeLeft(qn.target, nopt, oenv);
+ if (qn.lower == 0 && isRepeatInfinite(qn.upper)) {
+ if (oenv.mmd.max == 0 && qn.target.getType() == NodeType.CANY && qn.greedy) {
+ if (isMultiline(oenv.options)) {
+ opt.anchor.add(AnchorType.ANYCHAR_STAR_ML);
+ } else {
+ opt.anchor.add(AnchorType.ANYCHAR_STAR);
+ }
+ }
+ } else {
+ if (qn.lower > 0) {
+ opt.copy(nopt);
+ if (nopt.exb.length > 0) {
+ if (nopt.exb.reachEnd) {
+ int i;
+ for (i=2; i<qn.lower && !opt.exb.isFull(); i++) {
+ opt.exb.concat(nopt.exb, enc);
+ }
+ if (i < qn.lower) {
+ opt.exb.reachEnd = false;
+ }
+ }
+ }
+ if (qn.lower != qn.upper) {
+ opt.exb.reachEnd = false;
+ opt.exm.reachEnd = false;
+ }
+ if (qn.lower > 1) {
+ opt.exm.reachEnd = false;
+ }
+
+ }
+ }
+ int min = MinMaxLen.distanceMultiply(nopt.length.min, qn.lower);
+ int max;
+ if (isRepeatInfinite(qn.upper)) {
+ max = nopt.length.max > 0 ? MinMaxLen.INFINITE_DISTANCE : 0;
+ } else {
+ max = MinMaxLen.distanceMultiply(nopt.length.max, qn.upper);
+ }
+ opt.length.set(min, max);
+ break;
+ }
+
+ case NodeType.ENCLOSE: {
+ EncloseNode en = (EncloseNode)node;
+ switch (en.type) {
+ case EncloseType.OPTION:
+ int save = oenv.options;
+ oenv.options = en.option;
+ optimizeNodeLeft(en.target, opt, oenv);
+ oenv.options = save;
+ break;
+
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL && ++en.optCount > MAX_NODE_OPT_INFO_REF_COUNT) {
+ int min = 0;
+ int max = MinMaxLen.INFINITE_DISTANCE;
+ if (en.isMinFixed()) min = en.minLength;
+ if (en.isMaxFixed()) max = en.maxLength;
+ opt.length.set(min, max);
+ } else { // USE_SUBEXP_CALL
+ optimizeNodeLeft(en.target, opt, oenv);
+ if (opt.anchor.isSet(AnchorType.ANYCHAR_STAR_MASK)) {
+ if (bsAt(oenv.scanEnv.backrefedMem, en.regNum)) {
+ opt.anchor.remove(AnchorType.ANYCHAR_STAR_MASK);
+ }
+ }
+ }
+ break;
+
+ case EncloseType.STOP_BACKTRACK:
+ optimizeNodeLeft(en.target, opt, oenv);
+ break;
+ } // inner switch
+ break;
+ }
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ } // switch
+ }
+
+ protected final void setOptimizedInfoFromTree(Node node) {
+ NodeOptInfo opt = new NodeOptInfo();
+ OptEnvironment oenv = new OptEnvironment();
+
+ oenv.enc = regex.enc;
+ oenv.options = regex.options;
+ oenv.caseFoldFlag = regex.caseFoldFlag;
+ oenv.scanEnv = env;
+ oenv.mmd.clear(); // ??
+
+ optimizeNodeLeft(node, opt, oenv);
+
+ regex.anchor = opt.anchor.leftAnchor & (AnchorType.BEGIN_BUF |
+ AnchorType.BEGIN_POSITION |
+ AnchorType.ANYCHAR_STAR |
+ AnchorType.ANYCHAR_STAR_ML);
+
+ regex.anchor |= opt.anchor.rightAnchor & (AnchorType.END_BUF |
+ AnchorType.SEMI_END_BUF);
+
+ if ((regex.anchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF)) != 0) {
+ regex.anchorDmin = opt.length.min;
+ regex.anchorDmax = opt.length.max;
+ }
+
+ if (opt.exb.length > 0 || opt.exm.length > 0) {
+ opt.exb.select(opt.exm, enc);
+ if (opt.map.value > 0 && opt.exb.compare(opt.map) > 0) {
+ // !goto set_map;!
+ regex.setOptimizeMapInfo(opt.map);
+ regex.setSubAnchor(opt.map.anchor);
+ } else {
+ regex.setExactInfo(opt.exb);
+ regex.setSubAnchor(opt.exb.anchor);
+ }
+ } else if (opt.map.value > 0) {
+ // !set_map:!
+ regex.setOptimizeMapInfo(opt.map);
+ regex.setSubAnchor(opt.map.anchor);
+ } else {
+ regex.subAnchor |= opt.anchor.leftAnchor & AnchorType.BEGIN_LINE;
+ if (opt.length.max == 0) regex.subAnchor |= opt.anchor.rightAnchor & AnchorType.END_LINE;
+ }
+
+ if (Config.DEBUG_COMPILE || Config.DEBUG_MATCH) {
+ Config.log.println(regex.optimizeInfoToString());
+ }
+ }
+}
diff --git a/src/org/joni/ApplyAllCaseFoldFunction.java b/src/org/joni/ApplyAllCaseFoldFunction.java
new file mode 100644
index 0000000..f5ce266
--- /dev/null
+++ b/src/org/joni/ApplyAllCaseFoldFunction.java
@@ -0,0 +1,25 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+public interface ApplyAllCaseFoldFunction {
+ public void apply(int from, int[]to, int toLength, Object o);
+}
+
diff --git a/src/org/joni/ApplyCaseFold.java b/src/org/joni/ApplyCaseFold.java
new file mode 100644
index 0000000..6fc436b
--- /dev/null
+++ b/src/org/joni/ApplyCaseFold.java
@@ -0,0 +1,98 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.ast.CClassNode;
+import org.joni.ast.ConsAltNode;
+import org.joni.ast.StringNode;
+import org.joni.encoding.Encoding;
+
+final class ApplyCaseFold implements ApplyAllCaseFoldFunction {
+
+ // i_apply_case_fold
+ public void apply(int from, int[]to, int length, Object o) {
+ ApplyCaseFoldArg arg = (ApplyCaseFoldArg)o;
+
+ ScanEnvironment env = arg.env;
+ Encoding enc = env.enc;
+ CClassNode cc = arg.cc;
+ BitSet bs = cc.bs;
+
+ if (length == 1) {
+ boolean inCC = cc.isCodeInCC(enc, from);
+
+ if (Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS) {
+ if ((inCC && !cc.isNot()) || (!inCC && cc.isNot())) {
+ if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE) {
+ cc.addCodeRange(env, to[0], to[0]);
+ } else {
+ /* /(?i:[^A-C])/.match("a") ==> fail. */
+ bs.set(to[0]);
+ }
+ }
+ } else {
+ if (inCC) {
+ if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE) {
+ if (cc.isNot()) cc.clearNotFlag(enc);
+ cc.addCodeRange(env, to[0], to[0]);
+ } else {
+ if (cc.isNot()) {
+ bs.clear(to[0]);
+ } else {
+ bs.set(to[0]);
+ }
+ }
+ }
+ } // CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
+
+ } else {
+ if (cc.isCodeInCC(enc, from) && (!Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS || !cc.isNot())) {
+ StringNode node = null;
+ for (int i=0; i<length; i++) {
+ if (i == 0) {
+ node = new StringNode();
+ node.ensure(Config.ENC_CODE_TO_MBC_MAXLEN);
+ node.end += enc.codeToMbc(to[i], node.bytes, node.end);
+
+ /* char-class expanded multi-char only
+ compare with string folded at match time. */
+ node.setAmbig();
+ } else {
+ node.ensure(Config.ENC_CODE_TO_MBC_MAXLEN);
+ node.end += enc.codeToMbc(to[i], node.bytes, node.end);
+ }
+ }
+
+ ConsAltNode alt = ConsAltNode.newAltNode(node, null);
+
+ if (arg.tail == null) {
+ arg.altRoot = alt;
+ } else {
+ arg.tail.setCdr(alt);
+ }
+ arg.tail = alt;
+ }
+
+ }
+
+ }
+
+ static final ApplyCaseFold INSTANCE = new ApplyCaseFold();
+}
diff --git a/src/org/joni/ApplyCaseFoldArg.java b/src/org/joni/ApplyCaseFoldArg.java
new file mode 100644
index 0000000..92dd2fb
--- /dev/null
+++ b/src/org/joni/ApplyCaseFoldArg.java
@@ -0,0 +1,35 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.ast.CClassNode;
+import org.joni.ast.ConsAltNode;
+
+public final class ApplyCaseFoldArg {
+ final ScanEnvironment env;
+ final CClassNode cc;
+ ConsAltNode altRoot;
+ ConsAltNode tail;
+
+ public ApplyCaseFoldArg(ScanEnvironment env, CClassNode cc) {
+ this.env = env;
+ this.cc = cc;
+ }
+}
diff --git a/src/org/joni/BitSet.java b/src/org/joni/BitSet.java
new file mode 100644
index 0000000..3d9cf99
--- /dev/null
+++ b/src/org/joni/BitSet.java
@@ -0,0 +1,115 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+public final class BitSet {
+ static final int BITS_PER_BYTE = 8;
+ public static final int SINGLE_BYTE_SIZE = (1 << BITS_PER_BYTE);
+ private static final int BITS_IN_ROOM = 4 * BITS_PER_BYTE;
+ static final int BITSET_SIZE = (SINGLE_BYTE_SIZE / BITS_IN_ROOM);
+ static final int ROOM_SHIFT = log2(BITS_IN_ROOM);
+
+ final int[] bits = new int[BITSET_SIZE];
+
+ private static final int BITS_TO_STRING_WRAP = 4;
+ public String toString() {
+ StringBuilder buffer = new StringBuilder();
+ buffer.append("BitSet");
+ for (int i=0; i<SINGLE_BYTE_SIZE; i++) {
+ if ((i % (SINGLE_BYTE_SIZE / BITS_TO_STRING_WRAP)) == 0) buffer.append("\n ");
+ buffer.append(at(i) ? "1" : "0");
+ }
+ return buffer.toString();
+ }
+
+ public boolean at(int pos) {
+ return (bits[pos >>> ROOM_SHIFT] & bit(pos)) != 0;
+ }
+
+ public void set(int pos) {
+ bits[pos >>> ROOM_SHIFT] |= bit(pos);
+ }
+
+ public void clear(int pos) {
+ bits[pos >>> ROOM_SHIFT] &= ~bit(pos);
+ }
+
+ public void invert(int pos) {
+ bits[pos >>> ROOM_SHIFT] ^= bit(pos);
+ }
+
+ public void clear() {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i]=0;
+ }
+
+ public boolean isEmpty() {
+ for (int i=0; i<BITSET_SIZE; i++) {
+ if (bits[i] != 0) return false;
+ }
+ return true;
+ }
+
+ public void setRange(int from, int to) {
+ for (int i=from; i<=to && i < SINGLE_BYTE_SIZE; i++) set(i);
+ }
+
+ public void setAll() {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i] = ~0;
+ }
+
+ public void invert() {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i] = ~bits[i];
+ }
+
+ public void invertTo(BitSet to) {
+ for (int i=0; i<BITSET_SIZE; i++) to.bits[i] = ~bits[i];
+ }
+
+ public void and(BitSet other) {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i] &= other.bits[i];
+ }
+
+ public void or(BitSet other) {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i] |= other.bits[i];
+ }
+
+ public void copy(BitSet other) {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i] = other.bits[i];
+ }
+
+ public int numOn() {
+ int num = 0;
+ for (int i=0; i<SINGLE_BYTE_SIZE; i++) {
+ if (at(i)) num++;
+ }
+ return num;
+ }
+
+ static int bit(int pos){
+ return 1 << (pos % SINGLE_BYTE_SIZE);
+ }
+
+ private static int log2(int n){
+ int log = 0;
+ while ((n >>>= 1) != 0) log++;
+ return log;
+ }
+
+}
diff --git a/src/org/joni/BitStatus.java b/src/org/joni/BitStatus.java
new file mode 100644
index 0000000..1440170
--- /dev/null
+++ b/src/org/joni/BitStatus.java
@@ -0,0 +1,55 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+final class BitStatus {
+ public static final int BIT_STATUS_BITS_NUM = 4 * 8;
+
+ public static int bsClear() {
+ return 0;
+ }
+ public static int bsAll() {
+ return -1;
+ }
+ public static boolean bsAt(int stats, int n) {
+ return (n < BIT_STATUS_BITS_NUM ? stats & (1 << n) : (stats & 1)) != 0;
+ }
+ public static int bsOnAt(int stats, int n) {
+ if (n < BIT_STATUS_BITS_NUM) {
+ stats |= (1 << n);
+ } else {
+ stats |= 1;
+ }
+ return stats;
+ }
+ public static int bsOnAtSimple(int stats, int n) {
+ if (n < BIT_STATUS_BITS_NUM) stats |= (1 << n);
+ return stats;
+ }
+
+ public static int bsOnOff(int v, int f, boolean negative) {
+ if (negative) {
+ v &= ~f;
+ } else {
+ v |= f;
+ }
+ return v;
+ }
+}
diff --git a/src/org/joni/ByteCodeMachine.java b/src/org/joni/ByteCodeMachine.java
new file mode 100644
index 0000000..549a280
--- /dev/null
+++ b/src/org/joni/ByteCodeMachine.java
@@ -0,0 +1,1715 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.BitStatus.bsAt;
+import static org.joni.Option.isFindCondition;
+import static org.joni.Option.isFindLongest;
+import static org.joni.Option.isFindNotEmpty;
+import static org.joni.Option.isNotBol;
+import static org.joni.Option.isNotEol;
+import static org.joni.Option.isPosixRegion;
+
+import org.joni.ast.CClassNode;
+import org.joni.constants.OPCode;
+import org.joni.constants.ReturnCodes;
+import org.joni.encoding.Encoding;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.InternalException;
+
+class ByteCodeMachine extends StackMachine {
+ protected final byte[]bytes;
+ protected final int str;
+ protected final int end;
+
+ private int bestLen; // return value
+ private int s = 0; // current char
+
+ private int range; // right range
+ private int sprev;
+ private int sstart;
+ private int sbegin;
+
+ private int msaStart;
+ private int msaOptions;
+ protected final Region msaRegion;
+ protected int msaBestLen;
+ protected int msaBestS;
+
+ protected int msaBegin;
+ protected int msaEnd;
+
+
+ private final int[]code; // byte code
+ private int ip; // instruction pointer
+
+ // cached values
+ protected final int option;
+ protected final Encoding enc;
+ protected final int caseFoldFlag;
+
+ ByteCodeMachine(Regex regex, byte[]bytes, int p, int end) {
+ super(regex);
+ this.bytes = bytes;
+ this.str = p;
+ this.end = end;
+
+ this.code = regex.code;
+ this.option = regex.options;
+ this.enc = regex.enc;
+ this.caseFoldFlag = regex.caseFoldFlag;
+
+ this.msaRegion = regex.numMem == 0 ? null : new Region(regex.numMem + 1);
+ }
+
+ protected final void msaInit(int option, int start) {
+ msaOptions = option;
+ msaStart = start;
+ if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) msaBestLen = ReturnCodes.MISMATCH;
+ }
+
+ // USE_COMBINATION_EXPLOSION_CHECK
+ private int stateCheckPos(int s, int snum) {
+ return (s - str) * regex.numCombExpCheck + (snum - 1); // make numCombExpCheck ivar ?
+ }
+
+ private boolean stateCheckVal(int snum) {
+ if (stateCheckBuff != null) {
+ int x = stateCheckPos(s, snum);
+ return (stateCheckBuff[x / 8] & (1 << (x % 8))) != 0;
+ }
+ return false;
+ }
+
+ // ELSE_IF_STATE_CHECK_MARK
+ protected final void stateCheckMark() {
+ StackEntry e = stack[stk];
+ int x = stateCheckPos(e.getStatePStr(), e.getStateCheck());
+ stateCheckBuff[x / 8] |= (1 << (x % 8));
+ }
+
+ protected int stkp; // a temporary
+ private boolean makeCaptureHistoryTree(CaptureTreeNode node) {
+ //CaptureTreeNode child;
+ int k = stkp;
+ //int k = kp;
+
+ while (k < stk) {
+ StackEntry e = stack[k];
+ if (e.type == MEM_START) {
+ int n = e.getMemNum();
+ if (n <= Config.MAX_CAPTURE_HISTORY_GROUP && bsAt(regex.captureHistory, n)) {
+ CaptureTreeNode child = new CaptureTreeNode();
+ child.group = n;
+ child.beg = e.getMemPStr() - str;
+ node.addChild(child);
+ stkp = k + 1;
+ if (makeCaptureHistoryTree(child)) return true;
+
+ k = stkp;
+ child.end = e.getMemPStr() - str;
+ }
+ } else if (e.type == MEM_END) {
+ if (e.getMemNum() == node.group) {
+ node.end = e.getMemPStr() - str;
+ stkp = k;
+ return false;
+ }
+ }
+ }
+ return true; /* 1: root node ending. */
+ }
+
+ private byte[]cfbuf;
+ private byte[]cfbuf2;
+
+ protected final byte[]cfbuf() {
+ return cfbuf == null ? cfbuf = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN] : cfbuf;
+ }
+
+ protected final byte[]cfbuf2() {
+ return cfbuf2 == null ? cfbuf2 = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN] : cfbuf2;
+ }
+
+ private boolean stringCmpIC(int caseFlodFlag, int s1, IntHolder ps2, int mbLen) {
+ byte[]buf1 = cfbuf();
+ byte[]buf2 = cfbuf2();
+
+ int s2 = ps2.value;
+ int end1 = s1 + mbLen;
+ int end2 = s2 + mbLen;
+
+ while (s1 < end1) {
+ value = s1;
+ int len1 = enc.mbcCaseFold(caseFlodFlag, bytes, this, end1, buf1);
+ s1 = value;
+ value = s2;
+ int len2 = enc.mbcCaseFold(caseFlodFlag, bytes, this, end2, buf2);
+ s2 = value;
+
+ if (len1 != len2) return false;
+ int p1 = 0;
+ int p2 = 0;
+
+ while (len1-- > 0) {
+ if (buf1[p1] != buf2[p2]) return false;
+ p1++; p2++;
+ }
+ }
+ ps2.value = s2;
+ return true;
+ }
+
+ private void debugMatchBegin() {
+ Config.log.println("match_at: " +
+ "str: " + str +
+ ", end: " + end +
+ ", start: " + this.sstart +
+ ", sprev: " + this.sprev);
+ Config.log.println("size: " + (end - str) + ", start offset: " + (this.sstart - str));
+ }
+
+ private void debugMatchLoop() {
+ if (Config.DEBUG_MATCH) {
+ Config.log.printf("%4d", (s - str)).print("> \"");
+ int q, i;
+ for (i=0, q=s; i<7 && q<end; i++) {
+ int len = enc.length(bytes[q]);
+ while (len-- > 0) if (q < this.end) Config.log.print(new String(new byte[]{bytes[q++]}));
+ }
+ String str = q < end ? "...\"" : "\"";
+ q += str.length();
+ Config.log.print(str);
+ for (i=0; i<20-(q-s);i++) Config.log.print(" ");
+ StringBuilder sb = new StringBuilder();
+ new ByteCodePrinter(regex).compiledByteCodeToString(sb, ip);
+ Config.log.println(sb.toString());
+ }
+ }
+
+ protected final int matchAt(int range, int sstart, int sprev) {
+ this.range = range;
+ this.sstart = sstart;
+ this.sprev = sprev;
+
+ stk = 0;
+ ip = 0;
+
+ if (Config.DEBUG_MATCH) debugMatchBegin();
+
+ init();
+
+ bestLen = -1;
+ s = sstart;
+
+ final int[]code = this.code;
+ while (true) {
+ if (Config.DEBUG_MATCH) debugMatchLoop();
+
+ sbegin = s;
+ switch (code[ip++]) {
+ case OPCode.END: if (opEnd()) return finish(); break;
+ case OPCode.EXACT1: opExact1(); break;
+ case OPCode.EXACT2: opExact2(); continue;
+ case OPCode.EXACT3: opExact3(); continue;
+ case OPCode.EXACT4: opExact4(); continue;
+ case OPCode.EXACT5: opExact5(); continue;
+ case OPCode.EXACTN: opExactN(); continue;
+
+ case OPCode.EXACTMB2N1: opExactMB2N1(); break;
+ case OPCode.EXACTMB2N2: opExactMB2N2(); continue;
+ case OPCode.EXACTMB2N3: opExactMB2N3(); continue;
+ case OPCode.EXACTMB2N: opExactMB2N(); continue;
+ case OPCode.EXACTMB3N: opExactMB3N(); continue;
+ case OPCode.EXACTMBN: opExactMBN(); continue;
+
+ case OPCode.EXACT1_IC: opExact1IC(); break;
+ case OPCode.EXACTN_IC: opExactNIC(); continue;
+
+ case OPCode.CCLASS: opCClass(); break;
+ case OPCode.CCLASS_MB: opCClassMB(); break;
+ case OPCode.CCLASS_MIX: opCClassMIX(); break;
+ case OPCode.CCLASS_NOT: opCClassNot(); break;
+ case OPCode.CCLASS_MB_NOT: opCClassMBNot(); break;
+ case OPCode.CCLASS_MIX_NOT: opCClassMIXNot(); break;
+ case OPCode.CCLASS_NODE: opCClassNode(); break;
+
+ case OPCode.ANYCHAR: opAnyChar(); break;
+ case OPCode.ANYCHAR_ML: opAnyCharML(); break;
+ case OPCode.ANYCHAR_STAR: opAnyCharStar(); break;
+ case OPCode.ANYCHAR_ML_STAR: opAnyCharMLStar(); break;
+ case OPCode.ANYCHAR_STAR_PEEK_NEXT: opAnyCharStarPeekNext(); break;
+ case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: opAnyCharMLStarPeekNext(); break;
+ case OPCode.STATE_CHECK_ANYCHAR_STAR: opStateCheckAnyCharStar(); break;
+ case OPCode.STATE_CHECK_ANYCHAR_ML_STAR:opStateCheckAnyCharMLStar();break;
+
+ case OPCode.WORD: opWord(); break;
+ case OPCode.NOT_WORD: opNotWord(); break;
+ case OPCode.WORD_BOUND: opWordBound(); continue;
+ case OPCode.NOT_WORD_BOUND: opNotWordBound(); continue;
+ case OPCode.WORD_BEGIN: opWordBegin(); continue;
+ case OPCode.WORD_END: opWordEnd(); continue;
+
+ case OPCode.BEGIN_BUF: opBeginBuf(); continue;
+ case OPCode.END_BUF: opEndBuf(); continue;
+ case OPCode.BEGIN_LINE: opBeginLine(); continue;
+ case OPCode.END_LINE: opEndLine(); continue;
+ case OPCode.SEMI_END_BUF: opSemiEndBuf(); continue;
+ case OPCode.BEGIN_POSITION: opBeginPosition(); continue;
+
+ case OPCode.MEMORY_START_PUSH: opMemoryStartPush(); continue;
+ case OPCode.MEMORY_START: opMemoryStart(); continue;
+ case OPCode.MEMORY_END_PUSH: opMemoryEndPush(); continue;
+ case OPCode.MEMORY_END: opMemoryEnd(); continue;
+ case OPCode.MEMORY_END_PUSH_REC: opMemoryEndPushRec(); continue;
+ case OPCode.MEMORY_END_REC: opMemoryEndRec(); continue;
+
+ case OPCode.BACKREF1: opBackRef1(); continue;
+ case OPCode.BACKREF2: opBackRef2(); continue;
+ case OPCode.BACKREFN: opBackRefN(); continue;
+ case OPCode.BACKREFN_IC: opBackRefNIC(); continue;
+ case OPCode.BACKREF_MULTI: opBackRefMulti(); continue;
+ case OPCode.BACKREF_MULTI_IC: opBackRefMultiIC(); continue;
+ case OPCode.BACKREF_WITH_LEVEL: opBackRefAtLevel(); continue;
+
+ case OPCode.NULL_CHECK_START: opNullCheckStart(); continue;
+ case OPCode.NULL_CHECK_END: opNullCheckEnd(); continue;
+ case OPCode.NULL_CHECK_END_MEMST: opNullCheckEndMemST(); continue;
+ case OPCode.NULL_CHECK_END_MEMST_PUSH: opNullCheckEndMemSTPush(); continue;
+
+ case OPCode.JUMP: opJump(); continue;
+ case OPCode.PUSH: opPush(); continue;
+
+ // CEC
+ case OPCode.STATE_CHECK_PUSH: opStateCheckPush(); continue;
+ case OPCode.STATE_CHECK_PUSH_OR_JUMP: opStateCheckPushOrJump(); continue;
+ case OPCode.STATE_CHECK: opStateCheck(); continue;
+
+ case OPCode.POP: opPop(); continue;
+ case OPCode.PUSH_OR_JUMP_EXACT1: opPushOrJumpExact1(); continue;
+ case OPCode.PUSH_IF_PEEK_NEXT: opPushIfPeekNext(); continue;
+
+ case OPCode.REPEAT: opRepeat(); continue;
+ case OPCode.REPEAT_NG: opRepeatNG(); continue;
+ case OPCode.REPEAT_INC: opRepeatInc(); continue;
+ case OPCode.REPEAT_INC_SG: opRepeatIncSG(); continue;
+ case OPCode.REPEAT_INC_NG: opRepeatIncNG(); continue;
+ case OPCode.REPEAT_INC_NG_SG: opRepeatIncNGSG(); continue;
+
+ case OPCode.PUSH_POS: opPushPos(); continue;
+ case OPCode.POP_POS: opPopPos(); continue;
+ case OPCode.PUSH_POS_NOT: opPushPosNot(); continue;
+ case OPCode.FAIL_POS: opFailPos(); continue;
+ case OPCode.PUSH_STOP_BT: opPushStopBT(); continue;
+ case OPCode.POP_STOP_BT: opPopStopBT(); continue;
+
+ case OPCode.LOOK_BEHIND: opLookBehind(); continue;
+ case OPCode.PUSH_LOOK_BEHIND_NOT: opPushLookBehindNot(); continue;
+ case OPCode.FAIL_LOOK_BEHIND_NOT: opFailLookBehindNot(); continue;
+
+ // USE_SUBEXP_CALL
+ case OPCode.CALL: opCall(); continue;
+ case OPCode.RETURN: opReturn(); continue;
+
+ // single byte implementations
+ case OPCode.CCLASS_SB: opCClassSb(); break;
+ case OPCode.CCLASS_NOT_SB: opCClassNotSb(); break;
+
+ case OPCode.ANYCHAR_SB: opAnyCharSb(); break;
+ case OPCode.ANYCHAR_ML_SB: opAnyCharMLSb(); break;
+ case OPCode.ANYCHAR_STAR_SB: opAnyCharStarSb(); break;
+ case OPCode.ANYCHAR_ML_STAR_SB: opAnyCharMLStarSb(); break;
+ case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB: opAnyCharStarPeekNextSb(); break;
+ case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB: opAnyCharMLStarPeekNextSb(); break;
+ case OPCode.STATE_CHECK_ANYCHAR_STAR_SB: opStateCheckAnyCharStarSb(); break;
+ case OPCode.STATE_CHECK_ANYCHAR_ML_STAR_SB: opStateCheckAnyCharMLStarSb();break;
+
+ case OPCode.WORD_SB: opWordSb(); break;
+ case OPCode.NOT_WORD_SB: opNotWordSb(); break;
+ case OPCode.WORD_BOUND_SB: opWordBoundSb(); continue;
+ case OPCode.NOT_WORD_BOUND_SB: opNotWordBoundSb(); continue;
+ case OPCode.WORD_BEGIN_SB: opWordBeginSb(); continue;
+ case OPCode.WORD_END_SB: opWordEndSb(); continue;
+
+ case OPCode.LOOK_BEHIND_SB: opLookBehindSb(); continue;
+
+ case OPCode.EXACT1_IC_SB: opExact1ICSb(); break;
+ case OPCode.EXACTN_IC_SB: opExactNICSb(); continue;
+
+ case OPCode.FINISH:
+ return finish();
+
+ case OPCode.FAIL: opFail(); continue;
+
+ default:
+ throw new InternalException(ErrorMessages.ERR_UNDEFINED_BYTECODE);
+
+ } // main switch
+ } // main while
+ }
+
+ private boolean opEnd() {
+ int n = s - sstart;
+
+ if (n > bestLen) {
+ if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
+ if (isFindLongest(option)) {
+ if (n > msaBestLen) {
+ msaBestLen = n;
+ msaBestS = sstart;
+ } else {
+ // goto end_best_len;
+ return endBestLength();
+ }
+ }
+ } // USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+
+ bestLen = n;
+ Region region = msaRegion;
+ if (region != null) {
+ // USE_POSIX_REGION_OPTION ... else ...
+ region.beg[0] = sstart - str;
+ region.end[0] = s - str;
+ for (int i=1; i<=regex.numMem; i++) {
+ // opt!
+ if (repeatStk[memEndStk + i] != INVALID_INDEX) {
+ region.beg[i] = bsAt(regex.btMemStart, i) ?
+ stack[repeatStk[memStartStk + i]].getMemPStr() - str :
+ repeatStk[memStartStk + i] - str;
+
+
+ region.end[i] = bsAt(regex.btMemEnd, i) ?
+ stack[repeatStk[memEndStk + i]].getMemPStr() :
+ repeatStk[memEndStk + i] - str;
+
+ } else {
+ region.beg[i] = region.end[i] = Region.REGION_NOTPOS;
+ }
+
+ }
+
+ if (Config.USE_CAPTURE_HISTORY) {
+ if (regex.captureHistory != 0) {
+ CaptureTreeNode node;
+ if (region.historyRoot == null) {
+ node = region.historyRoot = new CaptureTreeNode();
+ } else {
+ node = region.historyRoot;
+ node.clear();
+ }
+
+ // was clear ???
+ node.group = 0;
+ node.beg = sstart - str;
+ node.end = s - str;
+
+ stkp = 0;
+ makeCaptureHistoryTree(region.historyRoot);
+ }
+ }
+ } else {
+ msaBegin = sstart - str;
+ msaEnd = s - str;
+ }
+ } else {
+ Region region = msaRegion;
+ if (Config.USE_POSIX_API_REGION_OPTION) {
+ if (!isPosixRegion(option)) {
+ if (region != null) {
+ region.clear();
+ } else {
+ msaBegin = msaEnd = 0;
+ }
+ }
+ } else {
+ if (region != null) {
+ region.clear();
+ } else {
+ msaBegin = msaEnd = 0;
+ }
+ } // USE_POSIX_REGION_OPTION
+ }
+ // end_best_len:
+ /* default behavior: return first-matching result. */
+ return endBestLength();
+ }
+
+ private boolean endBestLength() {
+ if (isFindCondition(option)) {
+ if (isFindNotEmpty(option) && s == sstart) {
+ bestLen = ReturnCodes.MISMATCH;
+ {opFail(); return false;} /* for retry */
+ }
+ if (isFindLongest(option) && s < range) {
+ {opFail(); return false;} /* for retry */
+ }
+ }
+ // goto finish;
+ return true;
+ }
+
+ private void opExact1() {
+ if (s >= range || code[ip] != bytes[s++]) {opFail(); return;}
+ //if (s > range) {opFail(); return;}
+ ip++;
+ sprev = sbegin; // break;
+ }
+
+ private void opExact2() {
+ if (s + 2 > range) {opFail(); return;}
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ sprev = s;
+ ip++; s++;
+ }
+
+ private void opExact3() {
+ if (s + 3 > range) {opFail(); return;}
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ sprev = s;
+ ip++; s++;
+ }
+
+ private void opExact4() {
+ if (s + 4 > range) {opFail(); return;}
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ sprev = s;
+ ip++; s++;
+ }
+
+ private void opExact5() {
+ if (s + 5 > range) {opFail(); return;}
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ sprev = s;
+ ip++; s++;
+ }
+
+ private void opExactN() {
+ int tlen = code[ip++];
+ if (s + tlen > range) {opFail(); return;}
+
+ while (tlen-- > 0) if (code[ip++] != bytes[s++]) {opFail(); return;}
+ sprev = s - 1;
+ }
+
+ private void opExactMB2N1() {
+ if (s + 2 > range) {opFail(); return;}
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ sprev = sbegin; // break;
+ }
+
+ private void opExactMB2N2() {
+ if (s + 4 > range) {opFail(); return;}
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ sprev = s;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ }
+
+ private void opExactMB2N3() {
+ if (s + 6 > range) {opFail(); return;}
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ sprev = s;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ }
+
+ private void opExactMB2N() {
+ int tlen = code[ip++];
+ if (tlen * 2 > range) {opFail(); return;}
+
+ while(tlen-- > 0) {
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ }
+ sprev = s - 2;
+ }
+
+ private void opExactMB3N() {
+ int tlen = code[ip++];
+ if (tlen * 3 > range) {opFail(); return;}
+
+ while (tlen-- > 0) {
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ }
+ sprev = s - 3;
+ }
+
+ private void opExactMBN() {
+ int tlen = code[ip++]; /* mb-len */
+ int tlen2= code[ip++]; /* string len */
+
+ tlen2 *= tlen;
+ if (s + tlen2 > range) {opFail(); return;}
+
+ while(tlen2-- > 0) {
+ if (code[ip] != bytes[s]) {opFail(); return;}
+ ip++; s++;
+ }
+ sprev = s - tlen;
+ }
+
+ private void opExact1IC() {
+ if (s >= range) {opFail(); return;}
+
+ byte[]lowbuf = cfbuf();
+
+ value = s;
+ int len = enc.mbcCaseFold(caseFoldFlag, bytes, this, end, lowbuf);
+ s = value;
+
+ if (s > range) {opFail(); return;}
+
+ int q = 0;
+ while (len-- > 0) {
+ if (code[ip] != lowbuf[q]) {opFail(); return;}
+ ip++; q++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opExact1ICSb() {
+ if (s >= range || code[ip] != enc.toLowerCaseTable()[bytes[s++] & 0xff]) {opFail(); return;}
+ ip++;
+ sprev = sbegin; // break;
+ }
+
+ private void opExactNIC() {
+ int tlen = code[ip++];
+ int endp = ip + tlen;
+
+ byte[]lowbuf = cfbuf();
+
+ while (ip < endp) {
+ sprev = s;
+ if (s >= range) {opFail(); return;}
+
+ value = s;
+ int len = enc.mbcCaseFold(caseFoldFlag, bytes, this, end, lowbuf);
+ s = value;
+
+ if (s > range) {opFail(); return;}
+ int q = 0;
+ while (len-- > 0) {
+ if (code[ip] != lowbuf[q]) {opFail(); return;}
+ ip++; q++;
+ }
+ }
+ }
+
+ private void opExactNICSb() {
+ int tlen = code[ip++];
+ if (s + tlen > range) {opFail(); return;}
+ byte[]toLowerTable = enc.toLowerCaseTable();
+ while (tlen-- > 0) if (code[ip++] != toLowerTable[bytes[s++] & 0xff]) {opFail(); return;}
+ sprev = s - 1;
+ }
+
+ private boolean isInBitSet() {
+ int c = bytes[s] & 0xff;
+ return ((code[ip + (c >>> BitSet.ROOM_SHIFT)] & (1 << c)) != 0);
+ }
+
+ private void opCClass() {
+ if (s >= range || !isInBitSet()) {opFail(); return;}
+ ip += BitSet.BITSET_SIZE;
+ s += enc.length(bytes[s]); /* OP_CCLASS can match mb-code. \D, \S */
+ sprev = sbegin; // break;
+ }
+
+ private void opCClassSb() {
+ if (s >= range || !isInBitSet()) {opFail(); return;}
+ ip += BitSet.BITSET_SIZE;
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private boolean isInClassMB() {
+ int tlen = code[ip++];
+ if (s >= range) return false;
+ int mbLen = enc.length(bytes[s]);
+ if (s + mbLen > range) return false;
+ int ss = s;
+ s += mbLen;
+ int c = enc.mbcToCode(bytes, ss, s);
+ if (!CodeRangeBuffer.isInCodeRange(code, ip, c)) return false;
+ ip += tlen;
+ return true;
+ }
+
+ private void opCClassMB() {
+ // beyond string check
+ if (s >= range || !enc.isMbcHead(bytes[s])) {opFail(); return;}
+ if (!isInClassMB()) {opFail(); return;} // not!!!
+ sprev = sbegin; // break;
+ }
+
+ private void opCClassMIX() {
+ if (s >= range) {opFail(); return;}
+ if (enc.isMbcHead(bytes[s])) {
+ ip += BitSet.BITSET_SIZE;
+ if (!isInClassMB()) {opFail(); return;}
+ } else {
+ if (!isInBitSet()) {opFail(); return;}
+ ip += BitSet.BITSET_SIZE;
+ int tlen = code[ip++]; // by code range length
+ ip += tlen;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opCClassNot() {
+ if (s >= range || isInBitSet()) {opFail(); return;}
+ ip += BitSet.BITSET_SIZE;
+ s += enc.length(bytes[s]);
+ sprev = sbegin; // break;
+ }
+
+ private void opCClassNotSb() {
+ if (s >= range || isInBitSet()) {opFail(); return;}
+ ip += BitSet.BITSET_SIZE;
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private boolean isNotInClassMB() {
+ int tlen = code[ip++];
+ int mbLen = enc.length(bytes[s]);
+
+ if (!(s + mbLen <= range)) {
+ if (s >= range) return false;
+ s = end;
+ ip += tlen;
+ return true;
+ }
+
+ int ss = s;
+ s += mbLen;
+ int c = enc.mbcToCode(bytes, ss, s);
+
+ if (CodeRangeBuffer.isInCodeRange(code, ip, c)) return false;
+ ip += tlen;
+ return true;
+ }
+
+ private void opCClassMBNot() {
+ if (s >= range) {opFail(); return;}
+ if (!enc.isMbcHead(bytes[s])) {
+ s++;
+ int tlen = code[ip++];
+ ip += tlen;
+ sprev = sbegin; // break;
+ return;
+ }
+ if (!isNotInClassMB()) {opFail(); return;}
+ sprev = sbegin; // break;
+ }
+
+ private void opCClassMIXNot() {
+ if (s >= range) {opFail(); return;}
+ if (enc.isMbcHead(bytes[s])) {
+ ip += BitSet.BITSET_SIZE;
+ if (!isNotInClassMB()) {opFail(); return;}
+ } else {
+ if (isInBitSet()) {opFail(); return;}
+ ip += BitSet.BITSET_SIZE;
+ int tlen = code[ip++];
+ ip += tlen;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opCClassNode() {
+ if (s >= range) {opFail(); return;}
+ CClassNode cc = (CClassNode)regex.operands[code[ip++]];
+ int mbLen = enc.length(bytes[s]);
+ int ss = s;
+ s += mbLen;
+ if (s > range) {opFail(); return;}
+ int c = enc.mbcToCode(bytes, ss, s);
+ if (!cc.isCodeInCCLength(mbLen, c)) {opFail(); return;}
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyChar() {
+ if (s >= range) {opFail(); return;}
+ int n = enc.length(bytes[s]);
+ if (s + n > range) {opFail(); return;}
+ if (enc.isNewLine(bytes, s, end)) {opFail(); return;}
+ s += n;
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharSb() {
+ if (s >= range) {opFail(); return;}
+ if (bytes[s] == Encoding.NEW_LINE) {opFail(); return;}
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharML() {
+ if (s >= range) {opFail(); return;}
+ int n = enc.length(bytes[s]);
+ if (s + n > range) {opFail(); return;}
+ s += n;
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharMLSb() {
+ if (s >= range) {opFail(); return;}
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharStar() {
+ final byte[]bytes = this.bytes;
+ while (s < range) {
+ pushAlt(ip, s, sprev);
+ int n = enc.length(bytes[s]);
+ if (s + n > range) {opFail(); return;}
+ if (enc.isNewLine(bytes, s, end)) {opFail(); return;}
+ sprev = s;
+ s += n;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharStarSb() {
+ final byte[]bytes = this.bytes;
+ while (s < range) {
+ pushAlt(ip, s, sprev);
+ if (bytes[s] == Encoding.NEW_LINE) {opFail(); return;}
+ sprev = s;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharMLStar() {
+ final byte[]bytes = this.bytes;
+ while (s < range) {
+ pushAlt(ip, s, sprev);
+ int n = enc.length(bytes[s]);
+ if (s + n > range) {opFail(); return;}
+ sprev = s;
+ s += n;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharMLStarSb() {
+ while (s < range) {
+ pushAlt(ip, s, sprev);
+ sprev = s;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharStarPeekNext() {
+ final byte c = (byte)code[ip];
+ final byte[]bytes = this.bytes;
+
+ while (s < range) {
+ byte b = bytes[s];
+ if (c == b) pushAlt(ip + 1, s, sprev);
+ int n = enc.length(b);
+ if (s + n > range) {opFail(); return;}
+ if (enc.isNewLine(bytes, s, end)) {opFail(); return;}
+ sprev = s;
+ s += n;
+ }
+ ip++;
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharStarPeekNextSb() {
+ final byte c = (byte)code[ip];
+ final byte[]bytes = this.bytes;
+
+ while (s < range) {
+ byte b = bytes[s];
+ if (c == b) pushAlt(ip + 1, s, sprev);
+ if (b == Encoding.NEW_LINE) {opFail(); return;}
+ sprev = s;
+ s++;
+ }
+ ip++;
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharMLStarPeekNext() {
+ final byte c = (byte)code[ip];
+ final byte[]bytes = this.bytes;
+
+ while (s < range) {
+ if (c == bytes[s]) pushAlt(ip + 1, s, sprev);
+ int n = enc.length(bytes[s]);
+ if (s + n > range) {opFail(); return;}
+ sprev = s;
+ s += n;
+ }
+ ip++;
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharMLStarPeekNextSb() {
+ final byte c = (byte)code[ip];
+ final byte[]bytes = this.bytes;
+
+ while (s < range) {
+ if (c == bytes[s]) pushAlt(ip + 1, s, sprev);
+ sprev = s;
+ s++;
+ }
+ ip++;
+ sprev = sbegin; // break;
+ }
+
+ // CEC
+ private void opStateCheckAnyCharStar() {
+ int mem = code[ip++];
+ final byte[]bytes = this.bytes;
+
+ while (s < range) {
+ if (stateCheckVal(mem)) {opFail(); return;}
+ pushAltWithStateCheck(ip, s, sprev, mem);
+ int n = enc.length(bytes[s]);
+ if (s + n > range) {opFail(); return;}
+ if (enc.isNewLine(bytes, s, end)) {opFail(); return;}
+ sprev = s;
+ s += n;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opStateCheckAnyCharStarSb() {
+ int mem = code[ip++];
+ final byte[]bytes = this.bytes;
+
+ while (s < range) {
+ if (stateCheckVal(mem)) {opFail(); return;}
+ pushAltWithStateCheck(ip, s, sprev, mem);
+ if (bytes[s] == Encoding.NEW_LINE) {opFail(); return;}
+ sprev = s;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ // CEC
+ private void opStateCheckAnyCharMLStar() {
+ int mem = code[ip++];
+
+ final byte[]bytes = this.bytes;
+ while (s < range) {
+ if (stateCheckVal(mem)) {opFail(); return;}
+ pushAltWithStateCheck(ip, s, sprev, mem);
+ int n = enc.length(bytes[s]);
+ if (s + n > range) {opFail(); return;}
+ sprev = s;
+ s += n;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opStateCheckAnyCharMLStarSb() {
+ int mem = code[ip++];
+
+ while (s < range) {
+ if (stateCheckVal(mem)) {opFail(); return;}
+ pushAltWithStateCheck(ip, s, sprev, mem);
+ sprev = s;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opWord() {
+ if (s >= range) {opFail(); return;}
+ if (!enc.isMbcWord(bytes, s, end)) {opFail(); return;}
+ s += enc.length(bytes[s]);
+ sprev = sbegin; // break;
+ }
+
+ private void opWordSb() {
+ if (s >= range) {opFail(); return;}
+ if (!enc.isWord(bytes[s] & 0xff)) {opFail(); return;}
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private void opNotWord() {
+ if (s >= range) {opFail(); return;}
+ if (enc.isMbcWord(bytes, s, end)) {opFail(); return;}
+ s += enc.length(bytes[s]);
+ sprev = sbegin; // break;
+ }
+
+ private void opNotWordSb() {
+ if (s >= range) {opFail(); return;}
+ if (enc.isWord(bytes[s] & 0xff)) {opFail(); return;}
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private void opWordBound() {
+ if (s == str) {
+ if (s >= range) {opFail(); return;}
+ if (!enc.isMbcWord(bytes, s, end)) {opFail(); return;}
+ } else if (s == end) {
+ if (!enc.isMbcWord(bytes, sprev, end)) {opFail(); return;}
+ } else {
+ if (enc.isMbcWord(bytes, s, end) == enc.isMbcWord(bytes, sprev, end)) {opFail(); return;}
+ }
+ }
+
+ private void opWordBoundSb() {
+ if (s == str) {
+ if (s >= range) {opFail(); return;}
+ if (!enc.isWord(bytes[s] & 0xff)) {opFail(); return;}
+ } else if (s == end) {
+ if (!enc.isWord(bytes[sprev] & 0xff)) {opFail(); return;}
+ } else {
+ if (enc.isWord(bytes[s] & 0xff) == enc.isWord(bytes[sprev] & 0xff)) {opFail(); return;}
+ }
+ }
+
+ private void opNotWordBound() {
+ if (s == str) {
+ if (s < range && enc.isMbcWord(bytes, s, end)) {opFail(); return;}
+ } else if (s == end) {
+ if (enc.isMbcWord(bytes, sprev, end)) {opFail(); return;}
+ } else {
+ if (enc.isMbcWord(bytes, s, end) != enc.isMbcWord(bytes, sprev, end)) {opFail(); return;}
+ }
+ }
+
+ private void opNotWordBoundSb() {
+ if (s == str) {
+ if (s < range && enc.isWord(bytes[s] & 0xff)) {opFail(); return;}
+ } else if (s == end) {
+ if (enc.isWord(bytes[sprev] & 0xff)) {opFail(); return;}
+ } else {
+ if (enc.isWord(bytes[s] & 0xff) != enc.isWord(bytes[sprev] & 0xff)) {opFail(); return;}
+ }
+ }
+
+ private void opWordBegin() {
+ if (s < range && enc.isMbcWord(bytes, s, end)) {
+ if (s == str || !enc.isMbcWord(bytes, sprev, end)) return;
+ }
+ opFail();
+ }
+
+ private void opWordBeginSb() {
+ if (s < range && enc.isWord(bytes[s] & 0xff)) {
+ if (s == str || !enc.isWord(bytes[sprev] & 0xff)) return;
+ }
+ opFail();
+ }
+
+ private void opWordEnd() {
+ if (s != str && enc.isMbcWord(bytes, sprev, end)) {
+ if (s == end || !enc.isMbcWord(bytes, s, end)) return;
+ }
+ opFail();
+ }
+
+ private void opWordEndSb() {
+ if (s != str && enc.isWord(bytes[sprev] & 0xff)) {
+ if (s == end || !enc.isWord(bytes[s] & 0xff)) return;
+ }
+ opFail();
+ }
+
+ private void opBeginBuf() {
+ if (s != str) opFail();
+ }
+
+ private void opEndBuf() {
+ if (s != end) opFail();
+ }
+
+ private void opBeginLine() {
+ if (s == str) {
+ if (isNotBol(msaOptions)) opFail();
+ return;
+ } else if (enc.isNewLine(bytes, sprev, end) && s != end) {
+ return;
+ }
+ opFail();
+ }
+
+ private void opEndLine() {
+ if (s == end) {
+ if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE &&
+ str == end || !enc.isNewLine(bytes, sprev, end)) {
+ if (isNotEol(msaOptions)) opFail();
+ return;
+ } else {
+ if (isNotEol(msaOptions)) opFail();
+ return;
+ }
+ } else if (enc.isNewLine(bytes, s, end)) {
+ return;
+ } else if (Config.USE_CRNL_AS_LINE_TERMINATOR && enc.isMbcCrnl(bytes, s, end)) {
+ return;
+ }
+ opFail();
+ }
+
+ private void opSemiEndBuf() {
+ if (s == end) {
+ if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE &&
+ str == end || !enc.isNewLine(bytes, sprev, end)) {
+ if (isNotEol(msaOptions)) opFail();
+ return;
+ } else {
+ if (isNotEol(msaOptions)) opFail();
+ return;
+ }
+ } else if (enc.isNewLine(bytes, s, end) && (s + enc.length(bytes[s])) == end) {
+ return;
+ } else if (Config.USE_CRNL_AS_LINE_TERMINATOR && enc.isMbcCrnl(bytes, s, end)) {
+ int ss = s + enc.length(bytes[s]);
+ ss += enc.length(bytes[ss]);
+ if (ss == end) return;
+ }
+ opFail();
+ }
+
+ private void opBeginPosition() {
+ if (s != msaStart) opFail();;
+ }
+
+ private void opMemoryStartPush() {
+ int mem = code[ip++];
+ pushMemStart(mem, s);
+ }
+
+ private void opMemoryStart() {
+ int mem = code[ip++];
+ repeatStk[memStartStk + mem] = s;
+ }
+
+ private void opMemoryEndPush() {
+ int mem = code[ip++];
+ pushMemEnd(mem, s);
+ }
+
+ private void opMemoryEnd() {
+ int mem = code[ip++];
+ repeatStk[memEndStk + mem] = s;
+ }
+
+ private void opMemoryEndPushRec() {
+ int mem = code[ip++];
+ int stkp = getMemStart(mem); /* should be before push mem-end. */
+ pushMemEnd(mem, s);
+ repeatStk[memStartStk + mem] = stkp;
+ }
+
+ private void opMemoryEndRec() {
+ int mem = code[ip++];
+ repeatStk[memEndStk + mem] = s;
+ int stkp = getMemStart(mem);
+
+ if (BitStatus.bsAt(regex.btMemStart, mem)) {
+ repeatStk[memStartStk + mem] = stkp;
+ } else {
+ repeatStk[memStartStk + mem] = stack[stkp].getMemPStr();
+ }
+
+ pushMemEndMark(mem);
+ }
+
+ private boolean backrefInvalid(int mem) {
+ return repeatStk[memEndStk + mem] == INVALID_INDEX || repeatStk[memStartStk + mem] == INVALID_INDEX;
+ }
+
+ private int backrefStart(int mem) {
+ return bsAt(regex.btMemStart, mem) ? stack[repeatStk[memStartStk + mem]].getMemPStr() : repeatStk[memStartStk + mem];
+ }
+
+ private int backrefEnd(int mem) {
+ return bsAt(regex.btMemEnd, mem) ? stack[repeatStk[memEndStk + mem]].getMemPStr() : repeatStk[memEndStk + mem];
+ }
+
+ private void backref(int mem) {
+ /* if you want to remove following line,
+ you should check in parse and compile time. (numMem) */
+ if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;}
+
+ int pstart = backrefStart(mem);
+ int pend = backrefEnd(mem);
+
+ int n = pend - pstart;
+ if (s + n > range) {opFail(); return;}
+ sprev = s;
+
+ // STRING_CMP
+ while(n-- > 0) if (bytes[pstart++] != bytes[s++]) {opFail(); return;}
+
+ int len;
+
+ // beyond string check
+ if (sprev < range) {
+ while (sprev + (len = enc.length(bytes[sprev])) < s) sprev += len;
+ }
+ }
+
+ private void opBackRef1() {
+ backref(1);
+ }
+
+ private void opBackRef2() {
+ backref(2);
+ }
+
+ private void opBackRefN() {
+ backref(code[ip++]);
+ }
+
+ private void opBackRefNIC() {
+ int mem = code[ip++];
+ /* if you want to remove following line,
+ you should check in parse and compile time. (numMem) */
+ if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;}
+
+ int pstart = backrefStart(mem);
+ int pend = backrefEnd(mem);
+
+ int n = pend - pstart;
+ if (s + n > range) {opFail(); return;}
+ sprev = s;
+
+ value = s;
+ if (!stringCmpIC(caseFoldFlag, pstart, this, n)) {opFail(); return;}
+ s = value;
+
+ int len;
+ // if (sprev < bytes.length)
+ while (sprev + (len = enc.length(bytes[sprev])) < s) sprev += len;
+ }
+
+ private void opBackRefMulti() {
+ int tlen = code[ip++];
+
+ int i;
+ loop:for (i=0; i<tlen; i++) {
+ int mem = code[ip++];
+ if (backrefInvalid(mem)) continue;
+
+ int pstart = backrefStart(mem);
+ int pend = backrefEnd(mem);
+
+ int n = pend - pstart;
+ if (s + n > range) {opFail(); return;}
+
+ sprev = s;
+ int swork = s;
+
+ while (n-- > 0) {
+ if (bytes[pstart++] != bytes[swork++]) continue loop;
+ }
+
+ s = swork;
+
+ int len;
+
+ // beyond string check
+ if (sprev < range) {
+ while (sprev + (len = enc.length(bytes[sprev])) < s) sprev += len;
+ }
+
+ ip += tlen - i - 1; // * SIZE_MEMNUM (1)
+ break; /* success */
+ }
+ if (i == tlen) {opFail(); return;}
+ }
+
+ private void opBackRefMultiIC() {
+ int tlen = code[ip++];
+
+ int i;
+ loop:for (i=0; i<tlen; i++) {
+ int mem = code[ip++];
+ if (backrefInvalid(mem)) continue;
+
+ int pstart = backrefStart(mem);
+ int pend = backrefEnd(mem);
+
+ int n = pend - pstart;
+ if (s + n > range) {opFail(); return;}
+
+ sprev = s;
+
+ value = s;
+ if (!stringCmpIC(caseFoldFlag, pstart, this, n)) continue loop; // STRING_CMP_VALUE_IC
+ s = value;
+
+ int len;
+ // if (sprev < bytes.length)
+ while (sprev + (len = enc.length(bytes[sprev])) < s) sprev += len;
+
+ ip += tlen - i - 1; // * SIZE_MEMNUM (1)
+ break; /* success */
+ }
+ if (i == tlen) {opFail(); return;}
+ }
+
+ private boolean memIsInMemp(int mem, int num, int memp) {
+ for (int i=0; i<num; i++) {
+ int m = code[memp++];
+ if (mem == m) return true;
+ }
+ return false;
+ }
+
+ // USE_BACKREF_AT_LEVEL // (s) and (end) implicit
+ private boolean backrefMatchAtNestedLevel(boolean ignoreCase, int caseFoldFlag,
+ int nest, int memNum, int memp) {
+ int pend = -1;
+ int level = 0;
+ int k = stk - 1;
+
+ while (k >= 0) {
+ StackEntry e = stack[k];
+
+ if (e.type == CALL_FRAME) {
+ level--;
+ } else if (e.type == RETURN) {
+ level++;
+ } else if (level == nest) {
+ if (e.type == MEM_START) {
+ if (memIsInMemp(e.getMemNum(), memNum, memp)) {
+ int pstart = e.getMemPStr();
+ if (pend != -1) {
+ if (pend - pstart > end - s) return false; /* or goto next_mem; */
+ int p = pstart;
+
+ value = s;
+ if (ignoreCase) {
+ if (!stringCmpIC(caseFoldFlag, pstart, this, pend - pstart)) {
+ return false; /* or goto next_mem; */
+ }
+ } else {
+ while (p < pend) {
+ if (bytes[p++] != bytes[value++]) return false; /* or goto next_mem; */
+ }
+ }
+ s = value;
+
+ return true;
+ }
+ }
+ } else if (e.type == MEM_END) {
+ if (memIsInMemp(e.getMemNum(), memNum, memp)) {
+ pend = e.getMemPStr();
+ }
+ }
+ }
+ k--;
+ }
+ return false;
+ }
+
+ private void opBackRefAtLevel() {
+ int ic = code[ip++];
+ int level = code[ip++];
+ int tlen = code[ip++];
+
+ sprev = s;
+ if (backrefMatchAtNestedLevel(ic != 0, caseFoldFlag, level, tlen, ip)) { // (s) and (end) implicit
+ int len;
+ while (sprev + (len = enc.length(bytes[sprev])) < s) sprev += len;
+ ip += tlen; // * SIZE_MEMNUM
+ } else {
+ {opFail(); return;}
+ }
+ }
+
+ private void opNullCheckStart() {
+ int mem = code[ip++];
+ pushNullCheckStart(mem, s);
+ }
+
+ private void nullCheckFound() {
+ // null_check_found:
+ /* empty loop founded, skip next instruction */
+ switch(code[ip++]) {
+ case OPCode.JUMP:
+ case OPCode.PUSH:
+ ip++; // p += SIZE_RELADDR;
+ break;
+ case OPCode.REPEAT_INC:
+ case OPCode.REPEAT_INC_NG:
+ case OPCode.REPEAT_INC_SG:
+ case OPCode.REPEAT_INC_NG_SG:
+ ip++; // p += SIZE_MEMNUM;
+ break;
+ default:
+ throw new InternalException(ErrorMessages.ERR_UNEXPECTED_BYTECODE);
+ } // switch
+ }
+
+ private void opNullCheckEnd() {
+ int mem = code[ip++];
+ int isNull = nullCheck(mem, s); /* mem: null check id */
+
+ if (isNull != 0) {
+ if (Config.DEBUG_MATCH) {
+ Config.log.println("NULL_CHECK_END: skip id:" + mem + ", s:" + s);
+ }
+
+ nullCheckFound();
+ }
+ }
+
+ // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ private void opNullCheckEndMemST() {
+ int mem = code[ip++]; /* mem: null check id */
+ int isNull = nullCheckMemSt(mem, s);
+
+ if (isNull != 0) {
+ if (Config.DEBUG_MATCH) {
+ Config.log.println("NULL_CHECK_END_MEMST: skip id:" + mem + ", s:" + s);
+ }
+
+ if (isNull == -1) {opFail(); return;}
+ nullCheckFound();
+ }
+ }
+
+ // USE_SUBEXP_CALL
+ private void opNullCheckEndMemSTPush() {
+ int mem = code[ip++]; /* mem: null check id */
+
+ int isNull;
+ if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) {
+ isNull = nullCheckMemStRec(mem, s);
+ } else {
+ isNull = nullCheckRec(mem, s);
+ }
+
+ if (isNull != 0) {
+ if (Config.DEBUG_MATCH) {
+ Config.log.println("NULL_CHECK_END_MEMST_PUSH: skip id:" + mem + ", s:" + s);
+ }
+
+ if (isNull == -1) {opFail(); return;}
+ nullCheckFound();
+ } else {
+ pushNullCheckEnd(mem);
+ }
+ }
+
+ private void opJump() {
+ int addr = code[ip++];
+ ip += addr;
+ }
+
+ private void opPush() {
+ int addr = code[ip++];
+ pushAlt(ip + addr, s, sprev);
+ }
+
+ // CEC
+ private void opStateCheckPush() {
+ int mem = code[ip++];
+ if (stateCheckVal(mem)) {opFail(); return;}
+ int addr = code[ip++];
+ pushAltWithStateCheck(ip + addr, s, sprev, mem);
+ }
+
+ // CEC
+ private void opStateCheckPushOrJump() {
+ int mem = code[ip++];
+ int addr= code[ip++];
+
+ if (stateCheckVal(mem)) {
+ ip += addr;
+ } else {
+ pushAltWithStateCheck(ip + addr, s, sprev, mem);
+ }
+ }
+
+ // CEC
+ private void opStateCheck() {
+ int mem = code[ip++];
+ if (stateCheckVal(mem)) {opFail(); return;}
+ pushStateCheck(s, mem);
+ }
+
+ private void opPop() {
+ popOne();
+ }
+
+ private void opPushOrJumpExact1() {
+ int addr = code[ip++];
+ if (code[ip] == bytes[s] && s < range) {
+ ip++;
+ pushAlt(ip + addr, s, sprev);
+ return;
+ }
+ ip += addr + 1;
+ }
+
+ private void opPushIfPeekNext() {
+ int addr = code[ip++];
+ // beyond string check
+ if (s < range && code[ip] == bytes[s]) {
+ ip++;
+ pushAlt(ip + addr, s, sprev);
+ return;
+ }
+ ip++;
+ }
+
+ private void opRepeat() {
+ int mem = code[ip++]; /* mem: OP_REPEAT ID */
+ int addr= code[ip++];
+
+ ensure1();
+ repeatStk[mem] = stk;
+ pushRepeat(mem, ip);
+
+ if (regex.repeatRangeLo[mem] == 0) { // lower
+ pushAlt(ip + addr, s, sprev);
+ }
+ }
+
+ private void opRepeatNG() {
+ int mem = code[ip++]; /* mem: OP_REPEAT ID */
+ int addr= code[ip++];
+
+ ensure1();
+ repeatStk[mem] = stk;
+ pushRepeat(mem, ip);
+
+ if (regex.repeatRangeLo[mem] == 0) {
+ pushAlt(ip, s, sprev);
+ ip += addr;
+ }
+ }
+
+ private void repeatInc(int mem, int si) {
+ StackEntry e = stack[si];
+
+ e.increaseRepeatCount();
+
+ if (e.getRepeatCount() >= regex.repeatRangeHi[mem]) {
+ /* end of repeat. Nothing to do. */
+ } else if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) {
+ pushAlt(ip, s, sprev);
+ ip = e.getRepeatPCode(); /* Don't use stkp after PUSH. */
+ } else {
+ ip = e.getRepeatPCode();
+ }
+ pushRepeatInc(si);
+ }
+
+ private void opRepeatInc() {
+ int mem = code[ip++]; /* mem: OP_REPEAT ID */
+ int si = repeatStk[mem];
+ repeatInc(mem, si);
+ }
+
+ private void opRepeatIncSG() {
+ int mem = code[ip++]; /* mem: OP_REPEAT ID */
+ int si = getRepeat(mem);
+ repeatInc(mem, si);
+ }
+
+ private void repeatIncNG(int mem, int si) {
+ StackEntry e = stack[si];
+
+ e.increaseRepeatCount();
+
+ if (e.getRepeatCount() < regex.repeatRangeHi[mem]) {
+ if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) {
+ int pcode = e.getRepeatPCode();
+ pushRepeatInc(si);
+ pushAlt(pcode, s, sprev);
+ } else {
+ ip = e.getRepeatPCode();
+ pushRepeatInc(si);
+ }
+ } else if (e.getRepeatCount() == regex.repeatRangeHi[mem]) {
+ pushRepeatInc(si);
+ }
+ }
+
+ private void opRepeatIncNG() {
+ int mem = code[ip++];
+ int si = repeatStk[mem];
+ repeatIncNG(mem, si);
+ }
+
+ private void opRepeatIncNGSG() {
+ int mem = code[ip++];
+ int si = getRepeat(mem);
+ repeatIncNG(mem, si);
+ }
+
+ private void opPushPos() {
+ pushPos(s, sprev);
+ }
+
+ private void opPopPos() {
+ StackEntry e = stack[posEnd()];
+ s = e.getStatePStr();
+ sprev= e.getStatePStrPrev();
+ }
+
+ private void opPushPosNot() {
+ int addr = code[ip++];
+ pushPosNot(ip + addr, s, sprev);
+ }
+
+ private void opFailPos() {
+ popTilPosNot();
+ opFail();
+ }
+
+ private void opPushStopBT() {
+ pushStopBT();
+ }
+
+ private void opPopStopBT() {
+ stopBtEnd();
+ }
+
+ private void opLookBehind() {
+ int tlen = code[ip++];
+ s = enc.stepBack(bytes, str, s, tlen);
+ if (s == -1) {opFail(); return;}
+ sprev = enc.prevCharHead(bytes, str, s);
+ }
+
+ private void opLookBehindSb() {
+ int tlen = code[ip++];
+ s -= tlen;
+ if (s < str) {opFail(); return;}
+ sprev = s == str ? -1 : s - 1;
+ }
+
+ private void opPushLookBehindNot() {
+ int addr = code[ip++];
+ int tlen = code[ip++];
+ int q = enc.stepBack(bytes, str, s, tlen);
+ if (q == -1) {
+ /* too short case -> success. ex. /(?<!XXX)a/.match("a")
+ If you want to change to fail, replace following line. */
+ ip += addr;
+ // return FAIL;
+ } else {
+ pushLookBehindNot(ip + addr, s, sprev);
+ s = q;
+ sprev = enc.prevCharHead(bytes, str, s);
+ }
+ }
+
+ private void opFailLookBehindNot() {
+ popTilLookBehindNot();
+ opFail();
+ }
+
+ private void opCall() {
+ int addr = code[ip++];
+ pushCallFrame(ip);
+ ip = addr; // absolute address
+ }
+
+ private void opReturn() {
+ ip = sreturn();
+ pushReturn();
+ }
+
+ private void opFail() {
+ if (stack == null) {
+ ip = regex.codeLength - 1;
+ return;
+ }
+
+
+ StackEntry e = pop();
+ ip = e.getStatePCode();
+ s = e.getStatePStr();
+ sprev = e.getStatePStrPrev();
+
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.getStateCheck() != 0) {
+ e.type = STATE_CHECK_MARK;
+ stk++;
+ }
+ }
+ }
+
+ private int finish() {
+ return bestLen;
+ }
+}
\ No newline at end of file
diff --git a/src/org/joni/ByteCodePrinter.java b/src/org/joni/ByteCodePrinter.java
new file mode 100644
index 0000000..9f18fb9
--- /dev/null
+++ b/src/org/joni/ByteCodePrinter.java
@@ -0,0 +1,350 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.ast.CClassNode;
+import org.joni.constants.Arguments;
+import org.joni.constants.OPCode;
+import org.joni.constants.OPSize;
+import org.joni.encoding.Encoding;
+
+
+class ByteCodePrinter {
+ int[]code;
+ int codeLength;
+
+ Object[]operands;
+ int operantCount;
+ Encoding enc;
+ WarnCallback warnings;
+
+ public ByteCodePrinter(Regex regex) {
+ code = regex.code;
+ codeLength = regex.codeLength;
+ operands = regex.operands;
+ operantCount = regex.operandLength;
+ enc = regex.enc;
+ warnings = regex.warnings;
+ }
+
+ public String byteCodeListToString() {
+ return compiledByteCodeListToString();
+ }
+
+ private void pString(StringBuilder sb, int len, int s) {
+ sb.append(":");
+ while (len-- > 0) sb.append(new String(new byte[]{(byte)code[s++]}));
+ }
+
+ private void pLenString(StringBuilder sb, int len, int mbLen, int s) {
+ int x = len * mbLen;
+ sb.append(":" + len + ":");
+ while (x-- > 0) sb.append(new String(new byte[]{(byte)code[s++]}));
+ }
+
+ public int compiledByteCodeToString(StringBuilder sb, int bp) {
+ int len, n, mem, addr, scn, cod;
+ BitSet bs;
+ CClassNode cc;
+
+ sb.append("[" + OPCode.OpCodeNames[code[bp]]);
+ int argType = OPCode.OpCodeArgTypes[code[bp]];
+ if (argType != Arguments.SPECIAL) {
+ bp++;
+ switch (argType) {
+ case Arguments.NON:
+ break;
+
+ case Arguments.RELADDR:
+ sb.append(":(" + code[bp] + ")");
+ bp += OPSize.RELADDR;
+ break;
+
+ case Arguments.ABSADDR:
+ sb.append(":(" + code[bp] + ")");
+ bp += OPSize.ABSADDR;
+ break;
+
+ case Arguments.LENGTH:
+ sb.append(":" + code[bp]);
+ bp += OPSize.LENGTH;
+ break;
+
+ case Arguments.MEMNUM:
+ sb.append(":" + code[bp]);
+ bp += OPSize.MEMNUM;
+ break;
+
+ case Arguments.OPTION:
+ sb.append(":" + code[bp]);
+ bp += OPSize.OPTION;
+ break;
+
+ case Arguments.STATE_CHECK:
+ sb.append(":" + code[bp]);
+ bp += OPSize.STATE_CHECK;
+ break;
+ }
+ } else {
+ switch (code[bp++]) {
+ case OPCode.EXACT1:
+ case OPCode.ANYCHAR_STAR_PEEK_NEXT:
+ case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
+ pString(sb, 1, bp++);
+ break;
+
+ case OPCode.EXACT2:
+ pString(sb, 2, bp);
+ bp += 2;
+ break;
+
+ case OPCode.EXACT3:
+ pString(sb, 3, bp);
+ bp += 3;
+ break;
+
+ case OPCode.EXACT4:
+ pString(sb, 4, bp);
+ bp += 4;
+ break;
+
+ case OPCode.EXACT5:
+ pString(sb, 5, bp);
+ bp += 5;
+ break;
+
+ case OPCode.EXACTN:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ pLenString(sb, len, 1, bp);
+ bp += len;
+ break;
+
+ case OPCode.EXACTMB2N1:
+ pString(sb, 2, bp);
+ bp += 2;
+ break;
+
+ case OPCode.EXACTMB2N2:
+ pString(sb, 4, bp);
+ bp += 4;
+ break;
+
+ case OPCode.EXACTMB2N3:
+ pString(sb, 6, bp);
+ bp += 6;
+ break;
+
+ case OPCode.EXACTMB2N:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ pLenString(sb, len, 2, bp);
+ bp += len * 2;
+ break;
+
+ case OPCode.EXACTMB3N:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ pLenString(sb, len, 3, bp);
+ bp += len * 3;
+ break;
+
+ case OPCode.EXACTMBN:
+ int mbLen = code[bp];
+ bp += OPSize.LENGTH;
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ sb.append(":" + mbLen + ":" + len + ":");
+ n = len * mbLen;
+ while (n-- > 0) sb.append(new String(new byte[]{(byte)code[bp++]}));
+ break;
+
+ case OPCode.EXACT1_IC:
+ case OPCode.EXACT1_IC_SB:
+ len = enc.length((byte)code[bp]);
+ pString(sb, len, bp);
+ bp += len;
+ break;
+
+ case OPCode.EXACTN_IC:
+ case OPCode.EXACTN_IC_SB:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ pLenString(sb, len, 1, bp);
+ bp += len;
+ break;
+
+ case OPCode.CCLASS:
+ case OPCode.CCLASS_SB:
+ bs = new BitSet();
+ System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
+ n = bs.numOn();
+ bp += BitSet.BITSET_SIZE;
+ sb.append(":" + n);
+ break;
+
+ case OPCode.CCLASS_NOT:
+ case OPCode.CCLASS_NOT_SB:
+ bs = new BitSet();
+ System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
+ n = bs.numOn();
+ bp += BitSet.BITSET_SIZE;
+ sb.append(":" + n);
+ break;
+
+ case OPCode.CCLASS_MB:
+ case OPCode.CCLASS_MB_NOT:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ cod = code[bp];
+ //bp += OPSize.CODE_POINT;
+ bp += len;
+ sb.append(":" + cod + ":" + len);
+ break;
+
+ case OPCode.CCLASS_MIX:
+ case OPCode.CCLASS_MIX_NOT:
+ bs = new BitSet();
+ System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
+ n = bs.numOn();
+ bp += BitSet.BITSET_SIZE;
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ cod = code[bp];
+ //bp += OPSize.CODE_POINT;
+ bp += len;
+ sb.append(":" + n + ":" + cod + ":" + len);
+ break;
+
+ case OPCode.CCLASS_NODE:
+ cc = (CClassNode)operands[code[bp]];
+ bp += OPSize.POINTER;
+ n = cc.bs.numOn();
+ sb.append(":" + cc + ":" + n);
+ break;
+
+ case OPCode.BACKREFN_IC:
+ mem = code[bp];
+ bp += OPSize.MEMNUM;
+ sb.append(":" + mem);
+ break;
+
+ case OPCode.BACKREF_MULTI_IC:
+ case OPCode.BACKREF_MULTI:
+ sb.append(" ");
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ for (int i=0; i<len; i++) {
+ mem = code[bp];
+ bp += OPSize.MEMNUM;
+ if (i > 0) sb.append(", ");
+ sb.append(mem);
+ }
+ break;
+
+ case OPCode.BACKREF_WITH_LEVEL: {
+ int option = code[bp];
+ bp += OPSize.OPTION;
+ sb.append(":" + option);
+ int level = code[bp];
+ bp += OPSize.LENGTH;
+ sb.append(":" + level);
+ sb.append(" ");
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ for (int i=0; i<len; i++) {
+ mem = code[bp];
+ bp += OPSize.MEMNUM;
+ if (i > 0) sb.append(", ");
+ sb.append(mem);
+ }
+ break;
+ }
+
+ case OPCode.REPEAT:
+ case OPCode.REPEAT_NG:
+ mem = code[bp];
+ bp += OPSize.MEMNUM;
+ addr = code[bp];
+ bp += OPSize.RELADDR;
+ sb.append(":" + mem + ":" + addr);
+ break;
+
+ case OPCode.PUSH_OR_JUMP_EXACT1:
+ case OPCode.PUSH_IF_PEEK_NEXT:
+ addr = code[bp];
+ bp += OPSize.RELADDR;
+ sb.append(":(" + addr + ")");
+ pString(sb, 1, bp);
+ bp++;
+ break;
+
+ case OPCode.LOOK_BEHIND:
+ case OPCode.LOOK_BEHIND_SB:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ sb.append(":" + len);
+ break;
+
+ case OPCode.PUSH_LOOK_BEHIND_NOT:
+ addr = code[bp];
+ bp += OPSize.RELADDR;
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ sb.append(":" + len + ":(" + addr + ")");
+ break;
+
+ case OPCode.STATE_CHECK_PUSH:
+ case OPCode.STATE_CHECK_PUSH_OR_JUMP:
+ scn = code[bp];
+ bp += OPSize.STATE_CHECK_NUM;
+ addr = code[bp];
+ bp += OPSize.RELADDR;
+ sb.append(":" + scn + ":(" + addr + ")");
+ break;
+
+ default:
+ warnings.warn("undefined code: " + --bp);
+
+ }
+ }
+ sb.append("]");
+ return bp;
+ }
+
+ private String compiledByteCodeListToString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("code length: " + codeLength + "\n");
+
+ int ncode = 0;
+ int bp = 0;
+ int end = codeLength;
+
+ while (bp < end) {
+ ncode++;
+
+ if (bp > 0) sb.append(ncode % 5 == 0 ? "\n" : " ");
+
+ bp = compiledByteCodeToString(sb, bp);
+ }
+ sb.append("\n");
+ return sb.toString();
+ }
+}
diff --git a/src/org/joni/CaptureTreeNode.java b/src/org/joni/CaptureTreeNode.java
new file mode 100644
index 0000000..dd6549c
--- /dev/null
+++ b/src/org/joni/CaptureTreeNode.java
@@ -0,0 +1,74 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+public class CaptureTreeNode {
+
+
+ int group;
+ int beg;
+ int end;
+ // int allocated;
+ int numChildren;
+ CaptureTreeNode[]children;
+
+ CaptureTreeNode() {
+ beg = Region.REGION_NOTPOS;
+ end = Region.REGION_NOTPOS;
+ group = -1;
+ }
+
+ static final int HISTORY_TREE_INIT_ALLOC_SIZE = 8;
+ void addChild(CaptureTreeNode child) {
+ if (children == null) {
+ children = new CaptureTreeNode[HISTORY_TREE_INIT_ALLOC_SIZE];
+ } else if (numChildren >= children.length) {
+ CaptureTreeNode[]tmp = new CaptureTreeNode[children.length << 1];
+ System.arraycopy(children, 0, tmp, 0, children.length);
+ children = tmp;
+ }
+
+ children[numChildren] = child;
+ numChildren++;
+ }
+
+ void clear() {
+ for (int i=0; i<numChildren; i++) {
+ children[i] = null; // ???
+ }
+ numChildren = 0;
+ beg = end = Region.REGION_NOTPOS;
+ group = -1;
+ }
+
+ CaptureTreeNode cloneTree() {
+ CaptureTreeNode clone = new CaptureTreeNode();
+ clone.beg = beg;
+ clone.end = end;
+
+ for (int i=0; i<numChildren; i++) {
+ CaptureTreeNode child = children[i].cloneTree();
+ clone.addChild(child);
+ }
+ return clone;
+ }
+
+
+}
diff --git a/src/org/joni/CaseFoldCodeItem.java b/src/org/joni/CaseFoldCodeItem.java
new file mode 100644
index 0000000..615bfbf
--- /dev/null
+++ b/src/org/joni/CaseFoldCodeItem.java
@@ -0,0 +1,34 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+public class CaseFoldCodeItem {
+ static final int ENC_MAX_COMP_CASE_FOLD_CODE_LEN = 3;
+
+ public final int byteLen;
+ public final int codeLen;
+ public final int code[];
+
+ public CaseFoldCodeItem(int byteLen, int codeLen, int[]code) {
+ this.byteLen = byteLen;
+ this.codeLen = codeLen;
+ this.code = code;
+ }
+}
diff --git a/src/org/joni/CodeRangeBuffer.java b/src/org/joni/CodeRangeBuffer.java
new file mode 100644
index 0000000..f409346
--- /dev/null
+++ b/src/org/joni/CodeRangeBuffer.java
@@ -0,0 +1,413 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.encoding.Encoding;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.ValueException;
+
+public final class CodeRangeBuffer {
+ private static final int INIT_MULTI_BYTE_RANGE_SIZE = 5;
+ private static final int ALL_MULTI_BYTE_RANGE = 0x7fffffff;
+
+ int[]p;
+ int used;
+
+ public CodeRangeBuffer(int[]ranges) {
+ p = ranges;
+ used = ranges[0] + 1;
+ }
+
+ public CodeRangeBuffer() {
+ p = new int[INIT_MULTI_BYTE_RANGE_SIZE];
+ writeCodePoint(0, 0);
+ }
+
+ public int[]getCodeRange() {
+ return p;
+ }
+
+ private CodeRangeBuffer(CodeRangeBuffer orig) {
+ p = new int[orig.p.length];
+ System.arraycopy(orig.p, 0, p, 0, p.length);
+ used = orig.used;
+ }
+
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("CodeRange");
+ buf.append("\n used: " + used);
+ buf.append("\n code point: " + p[0]);
+ buf.append("\n ranges: ");
+
+ for (int i=0; i<p[0]; i++) {
+ buf.append("[" + rangeNumToString(p[i * 2 + 1]) + ".." + rangeNumToString(p[i * 2 + 2]) + "]");
+ if (i > 0 && i % 6 == 0) buf.append("\n ");
+ }
+
+ return buf.toString();
+ }
+
+ private static String rangeNumToString(int num){
+ return "0x" + Integer.toString(num, 16);
+ }
+
+ public void expand(int low) {
+ int length = p.length;
+ do { length <<= 1; } while (length < low);
+ int[]tmp = new int[length];
+ System.arraycopy(p, 0, tmp, 0, used);
+ p = tmp;
+ }
+
+ public void ensureSize(int size) {
+ int length = p.length;
+ while (length < size ) { length <<= 1; }
+ if (p.length != length) {
+ int[]tmp = new int[length];
+ System.arraycopy(p, 0, tmp, 0, used);
+ p = tmp;
+ }
+ }
+
+ private void moveRight(int from, int to, int n) {
+ if (to + n > p.length) expand(to + n);
+ System.arraycopy(p, from, p, to, n);
+ if (to + n > used) used = to + n;
+ }
+
+ protected void moveLeft(int from, int to, int n) {
+ System.arraycopy(p, from, p, to, n);
+ }
+
+ private void moveLeftAndReduce(int from, int to) {
+ System.arraycopy(p, from, p, to, used - from);
+ used -= from - to;
+ }
+
+ public void writeCodePoint(int pos, int b) {
+ int u = pos + 1;
+ if (p.length < u) expand(u);
+ p[pos] = b;
+ if (used < u) used = u;
+ }
+
+ public CodeRangeBuffer clone() {
+ return new CodeRangeBuffer(this);
+ }
+
+ // ugly part: these methods should be made OO
+ // add_code_range_to_buf
+ public static CodeRangeBuffer addCodeRangeToBuff(CodeRangeBuffer pbuf, int from, int to) {
+ if (from > to) {
+ int n = from;
+ from = to;
+ to = n;
+ }
+
+ if (pbuf == null) pbuf = new CodeRangeBuffer(); // move to CClassNode
+
+ int[]p = pbuf.p;
+ int n = p[0];
+
+ int low = 0;
+ int bound = n;
+
+ while (low < bound) {
+ int x = (low + bound) >>> 1;
+ if (from > p[x * 2 + 2]) {
+ low = x + 1;
+ } else {
+ bound = x;
+ }
+ }
+
+ int high = low;
+ bound = n;
+
+ while (high < bound) {
+ int x = (high + bound) >>> 1;
+ if (to >= p[x * 2 + 1] - 1) {
+ high = x + 1;
+ } else {
+ bound = x;
+ }
+ }
+
+ int incN = low + 1 - high;
+
+ if (n + incN > Config.MAX_MULTI_BYTE_RANGES_NUM) throw new ValueException(ErrorMessages.ERR_TOO_MANY_MULTI_BYTE_RANGES);
+
+ if (incN != 1) {
+ if (from > p[low * 2 + 1]) from = p[low * 2 + 1];
+ if (to < p[(high - 1) * 2 + 2]) to = p[(high - 1) * 2 + 2];
+ }
+
+ if (incN != 0 && high < n) {
+ int fromPos = 1 + high * 2;
+ int toPos = 1 + (low + 1) * 2;
+ int size = (n - high) * 2;
+
+ if (incN > 0) {
+ pbuf.moveRight(fromPos, toPos, size);
+ } else {
+ pbuf.moveLeftAndReduce(fromPos, toPos);
+ }
+ }
+
+ int pos = 1 + low * 2;
+ // pbuf.ensureSize(pos + 2);
+ pbuf.writeCodePoint(pos, from);
+ pbuf.writeCodePoint(pos + 1, to);
+ n += incN;
+ pbuf.writeCodePoint(0, n);
+
+ return pbuf;
+ }
+
+ // add_code_range, be aware of it returning null!
+ public static CodeRangeBuffer addCodeRange(CodeRangeBuffer pbuf, ScanEnvironment env, int from, int to) {
+ if (from >to) {
+ if (env.syntax.allowEmptyRangeInCC()) {
+ return pbuf;
+ } else {
+ throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
+ }
+ }
+ return addCodeRangeToBuff(pbuf, from, to);
+ }
+
+ // SET_ALL_MULTI_BYTE_RANGE
+ protected static CodeRangeBuffer setAllMultiByteRange(Encoding enc, CodeRangeBuffer pbuf) {
+ return addCodeRangeToBuff(pbuf, enc.mbcodeStartPosition(), ALL_MULTI_BYTE_RANGE);
+ }
+
+ // ADD_ALL_MULTI_BYTE_RANGE
+ public static CodeRangeBuffer addAllMultiByteRange(Encoding enc, CodeRangeBuffer pbuf) {
+ if (!enc.isSingleByte()) return setAllMultiByteRange(enc, pbuf);
+ return pbuf;
+ }
+
+ // not_code_range_buf
+ public static CodeRangeBuffer notCodeRangeBuff(Encoding enc, CodeRangeBuffer bbuf) {
+ CodeRangeBuffer pbuf = null;
+
+ if (bbuf == null) return setAllMultiByteRange(enc, pbuf);
+
+ int[]p = bbuf.p;
+ int n = p[0];
+
+ if (n <= 0) return setAllMultiByteRange(enc, pbuf);
+
+ int pre = enc.mbcodeStartPosition();
+
+ int from;
+ int to = 0;
+ for (int i=0; i<n; i++) {
+ from = p[i * 2 + 1];
+ to = p[i * 2 + 2];
+ if (pre <= from - 1) {
+ pbuf = addCodeRangeToBuff(pbuf, pre, from - 1);
+ }
+ if (to == ALL_MULTI_BYTE_RANGE) break;
+ pre = to + 1;
+ }
+
+ if (to < ALL_MULTI_BYTE_RANGE) pbuf = addCodeRangeToBuff(pbuf, to + 1, ALL_MULTI_BYTE_RANGE);
+ return pbuf;
+ }
+
+ // or_code_range_buf
+ public static CodeRangeBuffer orCodeRangeBuff(Encoding enc, CodeRangeBuffer bbuf1, boolean not1,
+ CodeRangeBuffer bbuf2, boolean not2) {
+ CodeRangeBuffer pbuf = null;
+
+ if (bbuf1 == null && bbuf2 == null) {
+ if (not1 || not2) {
+ return setAllMultiByteRange(enc, pbuf);
+ }
+ return null;
+ }
+
+ if (bbuf2 == null) {
+ CodeRangeBuffer tbuf;
+ boolean tnot;
+ // swap
+ tnot = not1; not1 = not2; not2 = tnot;
+ tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
+ }
+
+ if (bbuf1 == null) {
+ if (not1) {
+ return setAllMultiByteRange(enc, pbuf);
+ } else {
+ if (!not2) {
+ return bbuf2.clone();
+ } else {
+ return notCodeRangeBuff(enc, bbuf2);
+ }
+ }
+ }
+
+ if (not1) {
+ CodeRangeBuffer tbuf;
+ boolean tnot;
+ // swap
+ tnot = not1; not1 = not2; not2 = tnot;
+ tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
+ }
+
+ if (!not2 && !not1) { /* 1 OR 2 */
+ pbuf = bbuf2.clone();
+ } else if (!not1) { /* 1 OR (not 2) */
+ pbuf = notCodeRangeBuff(enc, bbuf2);
+ }
+
+ int[]p1 = bbuf1.p;
+ int n1 = p1[0];
+
+ for (int i=0; i<n1; i++) {
+ int from = p1[i * 2 + 1];
+ int to = p1[i * 2 + 2];
+ pbuf = addCodeRangeToBuff(pbuf, from, to);
+ }
+
+ return pbuf;
+ }
+
+ // and_code_range1
+ public static CodeRangeBuffer andCodeRange1(CodeRangeBuffer pbuf, int from1, int to1, int[]data, int n) {
+ for (int i=0; i<n; i++) {
+ int from2 = data[i * 2 + 1];
+ int to2 = data[i * 2 + 2];
+ if (from2 < from1) {
+ if (to2 < from1) {
+ continue;
+ } else {
+ from1 = to2 + 1;
+ }
+ } else if (from2 <= to1) {
+ if (to2 < to1) {
+ if (from1 <= from2 - 1) {
+ pbuf = addCodeRangeToBuff(pbuf, from1, from2 - 1);
+ }
+ from1 = to2 + 1;
+ } else {
+ to1 = from2 - 1;
+ }
+ } else {
+ from1 = from2;
+ }
+ if (from1 > to1) break;
+ }
+
+ if (from1 <= to1) {
+ pbuf = addCodeRangeToBuff(pbuf, from1, to1);
+ }
+
+ return pbuf;
+ }
+
+ // and_code_range_buf
+ public static CodeRangeBuffer andCodeRangeBuff(CodeRangeBuffer bbuf1, boolean not1,
+ CodeRangeBuffer bbuf2, boolean not2) {
+ CodeRangeBuffer pbuf = null;
+
+ if (bbuf1 == null) {
+ if (not1 && bbuf2 != null) return bbuf2.clone(); /* not1 != 0 -> not2 == 0 */
+ return null;
+ } else if (bbuf2 == null) {
+ if (not2) return bbuf1.clone();
+ return null;
+ }
+
+ if (not1) {
+ CodeRangeBuffer tbuf;
+ boolean tnot;
+ // swap
+ tnot = not1; not1 = not2; not2 = tnot;
+ tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
+ }
+
+ int[]p1 = bbuf1.p;
+ int n1 = p1[0];
+ int[]p2 = bbuf2.p;
+ int n2 = p2[0];
+
+ if (!not2 && !not1) { /* 1 AND 2 */
+ for (int i=0; i<n1; i++) {
+ int from1 = p1[i * 2 + 1];
+ int to1 = p1[i * 2 + 2];
+
+ for (int j=0; j<n2; j++) {
+ int from2 = p2[j * 2 + 1];
+ int to2 = p2[j * 2 + 2];
+
+ if (from2 > to1) break;
+ if (to2 < from1) continue;
+ int from = from1 > from2 ? from1 : from2;
+ int to = to1 < to2 ? to1 : to2;
+ pbuf = addCodeRangeToBuff(pbuf, from, to);
+ }
+ }
+ } else if (!not1) { /* 1 AND (not 2) */
+ for (int i=0; i<n1; i++) {
+ int from1 = p1[i * 2 + 1];
+ int to1 = p1[i * 2 + 2];
+ pbuf = andCodeRange1(pbuf, from1, to1, p2, n2);
+ }
+ }
+
+ return pbuf;
+ }
+
+ public static boolean isInCodeRange(int[]p, int code) {
+ int low = 0;
+ int n = p[0];
+ int high = n;
+
+ while (low < high) {
+ int x = (low + high) >> 1;
+ if (code > p[(x << 1) + 2]) {
+ low = x + 1;
+ } else {
+ high = x;
+ }
+ }
+ return low < n && code >= p[(low << 1) + 1];
+ }
+
+ public static boolean isInCodeRange(int[]p, int offset, int code) {
+ int low = 0;
+ int n = p[offset];
+ int high = n ;
+
+ while (low < high) {
+ int x = (low + high) >> 1;
+ if (code > p[(x << 1) + 2 + offset]) {
+ low = x + 1;
+ } else {
+ high = x;
+ }
+ }
+ return low < n && code >= p[(low << 1) + 1 + offset];
+ }
+
+}
diff --git a/src/org/joni/CompileInfo.java b/src/org/joni/CompileInfo.java
new file mode 100644
index 0000000..53cf83f
--- /dev/null
+++ b/src/org/joni/CompileInfo.java
@@ -0,0 +1,31 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.encoding.Encoding;
+
+public class CompileInfo {
+ int elements;
+ Encoding patternEnc;
+ Encoding targetEnd;
+ Syntax syntax;
+ int option;
+ int caseFoldFlag;
+}
diff --git a/src/org/joni/Compiler.java b/src/org/joni/Compiler.java
new file mode 100644
index 0000000..e8f32bf
--- /dev/null
+++ b/src/org/joni/Compiler.java
@@ -0,0 +1,1390 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.BitStatus.bsAll;
+import static org.joni.BitStatus.bsAt;
+import static org.joni.Option.isCaptureGroup;
+import static org.joni.Option.isDynamic;
+import static org.joni.Option.isFindCondition;
+import static org.joni.Option.isIgnoreCase;
+import static org.joni.Option.isMultiline;
+import static org.joni.ast.QuantifierNode.isRepeatInfinite;
+
+import java.util.HashSet;
+
+import org.joni.ast.AnchorNode;
+import org.joni.ast.BackRefNode;
+import org.joni.ast.CClassNode;
+import org.joni.ast.CTypeNode;
+import org.joni.ast.CallNode;
+import org.joni.ast.ConsAltNode;
+import org.joni.ast.EncloseNode;
+import org.joni.ast.Node;
+import org.joni.ast.QuantifierNode;
+import org.joni.ast.StringNode;
+import org.joni.constants.AnchorType;
+import org.joni.constants.CharacterType;
+import org.joni.constants.EncloseType;
+import org.joni.constants.NodeType;
+import org.joni.constants.OPCode;
+import org.joni.constants.OPSize;
+import org.joni.constants.RegexState;
+import org.joni.constants.StackPopLevel;
+import org.joni.constants.TargetInfo;
+
+final class Compiler extends Analyser {
+
+ protected Compiler(ScanEnvironment env, byte[]bytes, int p, int end) {
+ super(env, bytes, p, end);
+ }
+
+ protected final void compile() {
+ regex.state = RegexState.COMPILING;
+
+ if (Config.DEBUG) {
+ Config.log.println(regex.encStringToString(bytes, getBegin(), getEnd()));
+ }
+
+ reset();
+
+ regex.code = new int[(stop - p) * 2 + 1]; // +1: empty regex ??
+ regex.codeLength = 0;
+ //regex.operands = new Object[10];
+
+ regex.numMem = 0;
+ regex.numRepeat = 0;
+ regex.numNullCheck = 0;
+ //regex.repeatRangeAlloc = 0;
+ regex.repeatRangeLo = null;
+ regex.repeatRangeHi = null;
+ regex.numCombExpCheck = 0;
+
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) regex.numCombExpCheck = 0;
+
+ parse();
+
+ if (Config.USE_NAMED_GROUP) {
+ /* mixed use named group and no-named group */
+ if (env.numNamed > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(regex.options)) {
+ if (env.numNamed != env.numMem) {
+ root = disableNoNameGroupCapture(root);
+ } else {
+ numberedRefCheck(root);
+ }
+ }
+ } // USE_NAMED_GROUP
+
+ if (Config.USE_NAMED_GROUP) {
+ if (env.numCall > 0) {
+ env.unsetAddrList = new UnsetAddrList(env.numCall);
+ setupSubExpCall(root);
+ // r != 0 ???
+ subexpRecursiveCheckTrav(root);
+ // r < 0 -< err, FOUND_CALLED_NODE = 1
+ subexpInfRecursiveCheckTrav(root);
+ // r != 0 recursion infinite ???
+ regex.numCall = env.numCall;
+ } else {
+ regex.numCall = 0;
+ }
+ } // USE_NAMED_GROUP
+
+ setupTree(root, 0);
+ if (Config.DEBUG_PARSE_TREE) {
+ root.verifyTree(new HashSet<Node>(),env.reg.warnings);
+ Config.log.println(root + "\n");
+ }
+
+ regex.captureHistory = env.captureHistory;
+ regex.btMemStart = env.btMemStart;
+ regex.btMemEnd = env.btMemEnd;
+
+ if (isFindCondition(regex.options)) {
+ regex.btMemEnd = bsAll();
+ } else {
+ regex.btMemEnd = env.btMemEnd;
+ regex.btMemEnd |= regex.captureHistory;
+ }
+
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (env.backrefedMem == 0 || (Config.USE_SUBEXP_CALL && env.numCall == 0)) {
+ setupCombExpCheck(root, 0);
+
+ if (Config.USE_SUBEXP_CALL && env.hasRecursion) {
+ env.numCombExpCheck = 0;
+ } else { // USE_SUBEXP_CALL
+ if (env.combExpMaxRegNum > 0) {
+ for (int i=1; i<env.combExpMaxRegNum; i++) {
+ if (bsAt(env.backrefedMem, i)) {
+ env.numCombExpCheck = 0;
+ break;
+ }
+ }
+ }
+ }
+
+ } // USE_SUBEXP_CALL
+ regex.numCombExpCheck = env.numCombExpCheck;
+ } // USE_COMBINATION_EXPLOSION_CHECK
+
+ regex.clearOptimizeInfo();
+
+ if (!Config.DONT_OPTIMIZE) setOptimizedInfoFromTree(root);
+
+ env.memNodes = null;
+
+ compileTree(root);
+ addOpcode(OPCode.END);
+ addOpcode(OPCode.FINISH); // for stack bottom
+
+ if (Config.USE_SUBEXP_CALL && env.unsetAddrList != null) {
+ env.unsetAddrList.fix(regex);
+ env.unsetAddrList = null; /// ???
+ }
+
+ if (regex.numRepeat != 0 || regex.btMemEnd != 0) {
+ regex.stackPopLevel = StackPopLevel.ALL;
+ } else {
+ if (regex.btMemStart != 0) {
+ regex.stackPopLevel = StackPopLevel.MEM_START;
+ } else {
+ regex.stackPopLevel = StackPopLevel.FREE;
+ }
+ }
+
+ if (Config.DEBUG_COMPILE) {
+ if (Config.USE_NAMED_GROUP) Config.log.print(regex.nameTableToString());
+ Config.log.println("stack used: " + regex.stackNeeded);
+ Config.log.println(new ByteCodePrinter(regex).byteCodeListToString());
+ } // DEBUG_COMPILE
+
+ regex.state = RegexState.NORMAL;
+ }
+
+ private boolean isNeedStrLenOpExact(int op) {
+ return op == OPCode.EXACTN ||
+ op == OPCode.EXACTMB2N ||
+ op == OPCode.EXACTMB3N ||
+ op == OPCode.EXACTMBN ||
+ op == OPCode.EXACTN_IC ||
+ op == OPCode.EXACTN_IC_SB;
+ }
+
+ private int selectStrOpcode(int mbLength, int strLength, boolean ignoreCase) {
+ int op;
+
+ if (ignoreCase) {
+ switch(strLength) {
+ case 1: op = enc.toLowerCaseTable() != null ? OPCode.EXACT1_IC_SB : OPCode.EXACT1_IC; break;
+ default:op = enc.toLowerCaseTable() != null ? OPCode.EXACTN_IC_SB : OPCode.EXACTN_IC; break;
+ } // switch
+ } else {
+ switch (mbLength) {
+ case 1:
+ switch (strLength) {
+ case 1: op = OPCode.EXACT1; break;
+ case 2: op = OPCode.EXACT2; break;
+ case 3: op = OPCode.EXACT3; break;
+ case 4: op = OPCode.EXACT4; break;
+ case 5: op = OPCode.EXACT5; break;
+ default:op = OPCode.EXACTN; break;
+ } // inner switch
+ break;
+ case 2:
+ switch (strLength) {
+ case 1: op = OPCode.EXACTMB2N1; break;
+ case 2: op = OPCode.EXACTMB2N2; break;
+ case 3: op = OPCode.EXACTMB2N3; break;
+ default:op = OPCode.EXACTMB2N; break;
+ } // inner switch
+ break;
+ case 3:
+ op = OPCode.EXACTMB3N;
+ default:
+ op = OPCode.EXACTMBN;
+ } // switch
+ }
+ return op;
+ }
+
+ private void compileTreeEmptyCheck(Node node, int emptyInfo) {
+ int savedNumNullCheck = regex.numNullCheck;
+
+ if (emptyInfo != 0) {
+ addOpcode(OPCode.NULL_CHECK_START);
+ addMemNum(regex.numNullCheck); /* NULL CHECK ID */
+ regex.numNullCheck++;
+ }
+
+ compileTree(node);
+
+ if (emptyInfo != 0) {
+ switch(emptyInfo) {
+ case TargetInfo.IS_EMPTY:
+ addOpcode(OPCode.NULL_CHECK_END);
+ break;
+ case TargetInfo.IS_EMPTY_MEM:
+ addOpcode(OPCode.NULL_CHECK_END_MEMST);
+ break;
+ case TargetInfo.IS_EMPTY_REC:
+ addOpcode(OPCode.NULL_CHECK_END_MEMST_PUSH);
+ break;
+ } // switch
+
+ addMemNum(savedNumNullCheck); /* NULL CHECK ID */
+ }
+ }
+
+ private void compileCall(CallNode node) {
+ addOpcode(OPCode.CALL);
+ node.unsetAddrList.add(regex.codeLength, node.target);
+ addAbsAddr(0); /*dummy addr.*/
+ }
+
+ private void compileTreeNTimes(Node node, int n) {
+ for (int i=0; i<n; i++) compileTree(node);
+ }
+
+ private int addCompileStringlength(byte[]bytes, int p, int mbLength, int strLength, boolean ignoreCase) {
+ int op = selectStrOpcode(mbLength, strLength, ignoreCase);
+
+ int len = OPSize.OPCODE;
+
+ if (op == OPCode.EXACTMBN) len += OPSize.LENGTH;
+ if (isNeedStrLenOpExact(op)) len += OPSize.LENGTH;
+
+ len += mbLength * strLength;
+ return len;
+ }
+
+ private void addCompileString(byte[]bytes, int p, int mbLength, int strLength, boolean ignoreCase) {
+ int op = selectStrOpcode(mbLength, strLength, ignoreCase);
+ addOpcode(op);
+
+ if (op == OPCode.EXACTMBN) addLength(mbLength);
+
+ if (isNeedStrLenOpExact(op)) {
+ if (op == OPCode.EXACTN_IC || op == OPCode.EXACTN_IC_SB) {
+ addLength(mbLength * strLength);
+ } else {
+ addOpcode(strLength);
+ }
+ }
+ regex.addBytes(bytes, p, mbLength * strLength);
+ }
+
+ private int compileLengthStringNode(Node node) {
+ StringNode sn = (StringNode)node;
+ if (sn.length() <= 0) return 0; // ??? out
+ boolean ambig = sn.isAmbig();
+
+ int p, prev;
+ p = prev = sn.p;
+ int end = sn.end;
+ byte[]bytes = sn.bytes;
+ int prevLen = enc.length(bytes[p]);
+ p += prevLen;
+
+ int slen = 1;
+ int rlen = 0;
+
+ while (p < end) {
+ int len = enc.length(bytes[p]);
+ if (len == prevLen) {
+ slen++;
+ } else {
+ int r = addCompileStringlength(bytes, prev, prevLen, slen, ambig);
+ rlen += r;
+ prev = p;
+ slen = 1;
+ prevLen = len;
+ }
+ p += len;
+ }
+ int r = addCompileStringlength(bytes, prev, prevLen, slen, ambig);
+ rlen += r;
+ return rlen;
+ }
+
+ private int compileLengthStringRawNode(StringNode sn) {
+ if (sn.length() <= 0) return 0; // ??? throw an exception ??
+ return addCompileStringlength(sn.bytes, sn.p, 1 /*sb*/, sn.length(), false);
+ }
+
+ private void compileStringNode(Node node) {
+ StringNode sn = (StringNode)node;
+ if (sn.length() <= 0) return; // out ?
+ boolean ambig = sn.isAmbig();
+
+ int p, prev;
+ p = prev = sn.p;
+ int end = sn.end;
+ byte[]bytes = sn.bytes;
+ int prevLen = enc.length(bytes[p]);
+ p += prevLen;
+ int slen = 1;
+
+ while (p < end) {
+ int len = enc.length(bytes[p]);
+ if (len == prevLen) {
+ slen++;
+ } else {
+ addCompileString(bytes, prev, prevLen, slen, ambig);
+ prev = p;
+ slen = 1;
+ prevLen = len;
+ }
+ p += len;
+ }
+ addCompileString(bytes, prev, prevLen, slen, ambig);
+ }
+
+ private void compileStringRawNode(StringNode sn) {
+ if (sn.length() <= 0) return; // ??
+ addCompileString(sn.bytes, sn.p, 1 /*sb*/, sn.length(), false);
+ }
+
+ private void addMultiByteCClass(CodeRangeBuffer mbuf) {
+ addLength(mbuf.used);
+ regex.addInts(mbuf.p, mbuf.used);
+ }
+
+ private int compileLengthCClassNode(CClassNode cc) {
+ if (cc.isShare()) return OPSize.OPCODE + OPSize.POINTER;
+
+ int len;
+ if (cc.mbuf == null) {
+ len = OPSize.OPCODE + BitSet.BITSET_SIZE;
+ } else {
+ if (enc.minLength() > 1 || cc.bs.isEmpty()) {
+ len = OPSize.OPCODE;
+ } else {
+ len = OPSize.OPCODE + BitSet.BITSET_SIZE;
+ }
+
+ len += OPSize.LENGTH + cc.mbuf.used;
+ }
+ return len;
+ }
+
+ private void compileCClassNode(CClassNode cc) {
+ if (cc.isShare()) { // shared char class
+ addOpcode(OPCode.CCLASS_NODE);
+ addPointer(cc);
+ return;
+ }
+
+ if (cc.mbuf == null) {
+ if (cc.isNot()) {
+ addOpcode(enc.isSingleByte() ? OPCode.CCLASS_NOT_SB : OPCode.CCLASS_NOT);
+ } else {
+ addOpcode(enc.isSingleByte() ? OPCode.CCLASS_SB : OPCode.CCLASS);
+ }
+ regex.addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset
+ } else {
+ if (enc.minLength() > 1 || cc.bs.isEmpty()) {
+ if (cc.isNot()) {
+ addOpcode(OPCode.CCLASS_MB_NOT);
+ } else {
+ addOpcode(OPCode.CCLASS_MB);
+ }
+ addMultiByteCClass(cc.mbuf);
+ } else {
+ if (cc.isNot()) {
+ addOpcode(OPCode.CCLASS_MIX_NOT);
+ } else {
+ addOpcode(OPCode.CCLASS_MIX);
+ }
+ // store the bit set and mbuf themself!
+ regex.addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset
+ addMultiByteCClass(cc.mbuf);
+ }
+ }
+ }
+
+ private static final int REPEAT_RANGE_ALLOC = 8;
+ private void entryRepeatRange(int id, int lower, int upper) {
+ if (regex.repeatRangeLo == null) {
+ regex.repeatRangeLo = new int[REPEAT_RANGE_ALLOC];
+ regex.repeatRangeHi = new int[REPEAT_RANGE_ALLOC];
+ } else if (id >= regex.repeatRangeLo.length){
+ int[]tmp = new int[regex.repeatRangeLo.length + REPEAT_RANGE_ALLOC];
+ System.arraycopy(regex.repeatRangeLo, 0, tmp, 0, regex.repeatRangeLo.length);
+ regex.repeatRangeLo = tmp;
+ tmp = new int[regex.repeatRangeHi.length + REPEAT_RANGE_ALLOC];
+ System.arraycopy(regex.repeatRangeHi, 0, tmp, 0, regex.repeatRangeHi.length);
+ regex.repeatRangeHi = tmp;
+ }
+
+ regex.repeatRangeLo[id] = lower;
+ regex.repeatRangeHi[id] = isRepeatInfinite(upper) ? 0x7fffffff : upper;
+ }
+
+ private void compileRangeRepeatNode(QuantifierNode qn, int targetLen, int emptyInfo) {
+ int numRepeat = regex.numRepeat;
+ addOpcode(qn.greedy ? OPCode.REPEAT : OPCode.REPEAT_NG);
+ addMemNum(numRepeat); /* OP_REPEAT ID */
+ regex.numRepeat++;
+ addRelAddr(targetLen + OPSize.REPEAT_INC);
+
+ entryRepeatRange(numRepeat, qn.lower, qn.upper);
+
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+
+ if ((Config.USE_SUBEXP_CALL && regex.numCall > 0) || qn.isInRepeat()) {
+ addOpcode(qn.greedy ? OPCode.REPEAT_INC_SG : OPCode.REPEAT_INC_NG_SG);
+ } else {
+ addOpcode(qn.greedy ? OPCode.REPEAT_INC : OPCode.REPEAT_INC_NG);
+ }
+
+ addMemNum(numRepeat); /* OP_REPEAT ID */
+ }
+
+ private static final int QUANTIFIER_EXPAND_LIMIT_SIZE = 50; // was 50
+
+ private static boolean cknOn(int ckn) {
+ return ckn > 0;
+ }
+
+ private int compileCECLengthQuantifierNode(QuantifierNode qn) {
+ boolean infinite = isRepeatInfinite(qn.upper);
+ int emptyInfo = qn.targetEmptyInfo;
+
+ int tlen = compileLengthTree(qn.target);
+ int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0;
+ int cklen = cknOn(ckn) ? OPSize.STATE_CHECK_NUM : 0;
+
+ /* anychar repeat */
+ if (qn.target.getType() == NodeType.CANY) {
+ if (qn.greedy && infinite) {
+ if (qn.nextHeadExact != null && !cknOn(ckn)) {
+ return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower + cklen;
+ } else {
+ return OPSize.ANYCHAR_STAR + tlen * qn.lower + cklen;
+ }
+ }
+ }
+
+ int modTLen;
+ if (emptyInfo != 0) {
+ modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
+ } else {
+ modTLen = tlen;
+ }
+
+ int len;
+ if (infinite && qn.lower <= 1) {
+ if (qn.greedy) {
+ if (qn.lower == 1) {
+ len = OPSize.JUMP;
+ } else {
+ len = 0;
+ }
+ len += OPSize.PUSH + cklen + modTLen + OPSize.JUMP;
+ } else {
+ if (qn.lower == 0) {
+ len = OPSize.JUMP;
+ } else {
+ len = 0;
+ }
+ len += modTLen + OPSize.PUSH + cklen;
+ }
+ } else if (qn.upper == 0) {
+ if (qn.isRefered) { /* /(?<n>..){0}/ */
+ len = OPSize.JUMP + tlen;
+ } else {
+ len = 0;
+ }
+ } else if (qn.upper == 1 && qn.greedy) {
+ if (qn.lower == 0) {
+ if (cknOn(ckn)) {
+ len = OPSize.STATE_CHECK_PUSH + tlen;
+ } else {
+ len = OPSize.PUSH + tlen;
+ }
+ } else {
+ len = tlen;
+ }
+ } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */
+ len = OPSize.PUSH + cklen + OPSize.JUMP + tlen;
+ } else {
+ len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM;
+
+ if (cknOn(ckn)) {
+ len += OPSize.STATE_CHECK;
+ }
+ }
+ return len;
+ }
+
+ private void compileCECQuantifierNode(QuantifierNode qn) {
+ boolean infinite = isRepeatInfinite(qn.upper);
+ int emptyInfo = qn.targetEmptyInfo;
+
+ int tlen = compileLengthTree(qn.target);
+
+ int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0;
+
+ if (qn.isAnyCharStar()) {
+ compileTreeNTimes(qn.target, qn.lower);
+ if (qn.nextHeadExact != null && !cknOn(ckn)) {
+ if (isMultiline(regex.options)) {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB : OPCode.ANYCHAR_ML_STAR_PEEK_NEXT);
+ } else {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_STAR_PEEK_NEXT_SB : OPCode.ANYCHAR_STAR_PEEK_NEXT);
+ }
+ if (cknOn(ckn)) {
+ addStateCheckNum(ckn);
+ }
+ StringNode sn = (StringNode)qn.nextHeadExact;
+ regex.addBytes(sn.bytes, sn.p, 1);
+ return;
+ } else {
+ if (isMultiline(regex.options)) {
+ if (cknOn(ckn)) {
+ addOpcode(enc.isSingleByte() ? OPCode.STATE_CHECK_ANYCHAR_ML_STAR_SB : OPCode.STATE_CHECK_ANYCHAR_ML_STAR);
+ } else {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_STAR_SB : OPCode.ANYCHAR_ML_STAR);
+ }
+ } else {
+ if (cknOn(ckn)) {
+ addOpcode(enc.isSingleByte() ? OPCode.STATE_CHECK_ANYCHAR_STAR_SB : OPCode.STATE_CHECK_ANYCHAR_STAR);
+ } else {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_STAR_SB : OPCode.ANYCHAR_STAR);
+ }
+ }
+ if (cknOn(ckn)) {
+ addStateCheckNum(ckn);
+ }
+ return;
+ }
+ }
+
+ int modTLen;
+ if (emptyInfo != 0) {
+ modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
+ } else {
+ modTLen = tlen;
+ }
+ if (infinite && qn.lower <= 1) {
+ if (qn.greedy) {
+ if (qn.lower == 1) {
+ addOpcodeRelAddr(OPCode.JUMP, cknOn(ckn) ? OPSize.STATE_CHECK_PUSH :
+ OPSize.PUSH);
+ }
+ if (cknOn(ckn)) {
+ addOpcode(OPCode.STATE_CHECK_PUSH);
+ addStateCheckNum(ckn);
+ addRelAddr(modTLen + OPSize.JUMP);
+ } else {
+ addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP);
+ }
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + (cknOn(ckn) ?
+ OPSize.STATE_CHECK_PUSH :
+ OPSize.PUSH)));
+ } else {
+ if (qn.lower == 0) {
+ addOpcodeRelAddr(OPCode.JUMP, modTLen);
+ }
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ if (cknOn(ckn)) {
+ addOpcode(OPCode.STATE_CHECK_PUSH_OR_JUMP);
+ addStateCheckNum(ckn);
+ addRelAddr(-(modTLen + OPSize.STATE_CHECK_PUSH_OR_JUMP));
+ } else {
+ addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH));
+ }
+ }
+ } else if (qn.upper == 0) {
+ if (qn.isRefered) { /* /(?<n>..){0}/ */
+ addOpcodeRelAddr(OPCode.JUMP, tlen);
+ compileTree(qn.target);
+ } // else r=0 ???
+ } else if (qn.upper == 1 && qn.greedy) {
+ if (qn.lower == 0) {
+ if (cknOn(ckn)) {
+ addOpcode(OPCode.STATE_CHECK_PUSH);
+ addStateCheckNum(ckn);
+ addRelAddr(tlen);
+ } else {
+ addOpcodeRelAddr(OPCode.PUSH, tlen);
+ }
+ }
+ compileTree(qn.target);
+ } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0){ /* '??' */
+ if (cknOn(ckn)) {
+ addOpcode(OPCode.STATE_CHECK_PUSH);
+ addStateCheckNum(ckn);
+ addRelAddr(OPSize.JUMP);
+ } else {
+ addOpcodeRelAddr(OPCode.PUSH, OPSize.JUMP);
+ }
+
+ addOpcodeRelAddr(OPCode.JUMP, tlen);
+ compileTree(qn.target);
+ } else {
+ compileRangeRepeatNode(qn, modTLen, emptyInfo);
+ if (cknOn(ckn)) {
+ addOpcode(OPCode.STATE_CHECK);
+ addStateCheckNum(ckn);
+ }
+ }
+ }
+
+ private int compileLengthQuantifierNode(QuantifierNode qn) {
+ return Config.USE_COMBINATION_EXPLOSION_CHECK ?
+ compileCECLengthQuantifierNode(qn) :
+ compileNonCECLengthQuantifierNode(qn);
+ }
+
+ private int compileNonCECLengthQuantifierNode(QuantifierNode qn) {
+ boolean infinite = isRepeatInfinite(qn.upper);
+ int emptyInfo = qn.targetEmptyInfo;
+
+ int tlen = compileLengthTree(qn.target);
+
+ /* anychar repeat */
+ if (qn.target.getType() == NodeType.CANY) {
+ if (qn.greedy && infinite) {
+ if (qn.nextHeadExact != null) {
+ return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower;
+ } else {
+ return OPSize.ANYCHAR_STAR + tlen * qn.lower;
+ }
+ }
+ }
+
+ int modTLen = 0;
+ if (emptyInfo != 0) {
+ modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
+ } else {
+ modTLen = tlen;
+ }
+
+ int len;
+ if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
+ len = OPSize.JUMP;
+ } else {
+ len = tlen * qn.lower;
+ }
+
+ if (qn.greedy) {
+ if (qn.headExact != null) {
+ len += OPSize.PUSH_OR_JUMP_EXACT1 + modTLen + OPSize.JUMP;
+ } else if (qn.nextHeadExact != null) {
+ len += OPSize.PUSH_IF_PEEK_NEXT + modTLen + OPSize.JUMP;
+ } else {
+ len += OPSize.PUSH + modTLen + OPSize.JUMP;
+ }
+ } else {
+ len += OPSize.JUMP + modTLen + OPSize.PUSH;
+ }
+
+ } else if (qn.upper == 0 && qn.isRefered) { /* /(?<n>..){0}/ */
+ len = OPSize.JUMP + tlen;
+ } else if (!infinite && qn.greedy &&
+ (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE )) {
+ len = tlen * qn.lower;
+ len += (OPSize.PUSH + tlen) * (qn.upper - qn.lower);
+ } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */
+ len = OPSize.PUSH + OPSize.JUMP + tlen;
+ } else {
+ len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM;
+ }
+ return len;
+ }
+
+ private void compileQuantifierNode(QuantifierNode qn) {
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ compileCECQuantifierNode(qn);
+ } else {
+ compileNonCECQuantifierNode(qn);
+ }
+ }
+
+ private void compileNonCECQuantifierNode(QuantifierNode qn) {
+ boolean infinite = isRepeatInfinite(qn.upper);
+ int emptyInfo = qn.targetEmptyInfo;
+
+ int tlen = compileLengthTree(qn.target);
+
+ if (qn.isAnyCharStar()) {
+ compileTreeNTimes(qn.target, qn.lower);
+ if (qn.nextHeadExact != null) {
+ if (isMultiline(regex.options)) {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB : OPCode.ANYCHAR_ML_STAR_PEEK_NEXT);
+ } else {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_STAR_PEEK_NEXT_SB : OPCode.ANYCHAR_STAR_PEEK_NEXT);
+ }
+ StringNode sn = (StringNode)qn.nextHeadExact;
+ regex.addBytes(sn.bytes, sn.p, 1);
+ return;
+ } else {
+ if (isMultiline(regex.options)) {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_STAR_SB : OPCode.ANYCHAR_ML_STAR);
+ } else {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_STAR_SB : OPCode.ANYCHAR_STAR);
+ }
+ return;
+ }
+ }
+
+ int modTLen;
+ if (emptyInfo != 0) {
+ modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
+ } else {
+ modTLen = tlen;
+ }
+ if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
+ if (qn.greedy) {
+ if (qn.headExact != null) {
+ addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_OR_JUMP_EXACT1);
+ } else if (qn.nextHeadExact != null) {
+ addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_IF_PEEK_NEXT);
+ } else {
+ addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH);
+ }
+ } else {
+ addOpcodeRelAddr(OPCode.JUMP, OPSize.JUMP);
+ }
+ } else {
+ compileTreeNTimes(qn.target, qn.lower);
+ }
+
+ if (qn.greedy) {
+ if (qn.headExact != null) {
+ addOpcodeRelAddr(OPCode.PUSH_OR_JUMP_EXACT1, modTLen + OPSize.JUMP);
+ StringNode sn = (StringNode)qn.headExact;
+ regex.addBytes(sn.bytes, sn.p, 1);
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_OR_JUMP_EXACT1));
+ } else if (qn.nextHeadExact != null) {
+ addOpcodeRelAddr(OPCode.PUSH_IF_PEEK_NEXT, modTLen + OPSize.JUMP);
+ StringNode sn = (StringNode)qn.nextHeadExact;
+ regex.addBytes(sn.bytes, sn.p, 1);
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_IF_PEEK_NEXT));
+ } else {
+ addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP);
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH));
+ }
+ } else {
+ addOpcodeRelAddr(OPCode.JUMP, modTLen);
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH));
+ }
+ } else if (qn.upper == 0 && qn.isRefered) { /* /(?<n>..){0}/ */
+ addOpcodeRelAddr(OPCode.JUMP, tlen);
+ compileTree(qn.target);
+ } else if (!infinite && qn.greedy &&
+ (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ int n = qn.upper - qn.lower;
+ compileTreeNTimes(qn.target, qn.lower);
+
+ for (int i=0; i<n; i++) {
+ addOpcodeRelAddr(OPCode.PUSH, (n - i) * tlen + (n - i - 1) * OPSize.PUSH);
+ compileTree(qn.target);
+ }
+ } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */
+ addOpcodeRelAddr(OPCode.PUSH, OPSize.JUMP);
+ addOpcodeRelAddr(OPCode.JUMP, tlen);
+ compileTree(qn.target);
+ } else {
+ compileRangeRepeatNode(qn, modTLen, emptyInfo);
+ }
+ }
+
+ private int compileLengthOptionNode(EncloseNode node) {
+ int prev = regex.options;
+ regex.options = node.option;
+ int tlen = compileLengthTree(node.target);
+ regex.options = prev;
+
+ if (isDynamic(prev ^ node.option)) {
+ return OPSize.SET_OPTION_PUSH + OPSize.SET_OPTION + OPSize.FAIL + tlen + OPSize.SET_OPTION;
+ } else {
+ return tlen;
+ }
+ }
+
+ private void compileOptionNode(EncloseNode node) {
+ int prev = regex.options;
+
+ if (isDynamic(prev ^ node.option)) {
+ addOpcodeOption(OPCode.SET_OPTION_PUSH, node.option);
+ addOpcodeOption(OPCode.SET_OPTION, prev);
+ addOpcode(OPCode.FAIL);
+ }
+
+ regex.options = node.option;
+ compileTree(node.target);
+ regex.options = prev;
+
+ if (isDynamic(prev ^ node.option)) {
+ addOpcodeOption(OPCode.SET_OPTION, prev);
+ }
+ }
+
+ private int compileLengthEncloseNode(EncloseNode node) {
+ if (node.isOption()) {
+ return compileLengthOptionNode(node);
+ }
+
+ int tlen;
+ if (node.target != null) {
+ tlen = compileLengthTree(node.target);
+ } else {
+ tlen = 0;
+ }
+
+ int len;
+ switch (node.type) {
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL && node.isCalled()) {
+ len = OPSize.MEMORY_START_PUSH + tlen + OPSize.CALL + OPSize.JUMP + OPSize.RETURN;
+ if (bsAt(regex.btMemEnd, node.regNum)) {
+ len += node.isRecursion() ? OPSize.MEMORY_END_PUSH_REC : OPSize.MEMORY_END_PUSH;
+ } else {
+ len += node.isRecursion() ? OPSize.MEMORY_END_REC : OPSize.MEMORY_END;
+ }
+ } else { // USE_SUBEXP_CALL
+ if (bsAt(regex.btMemStart, node.regNum)) {
+ len = OPSize.MEMORY_START_PUSH;
+ } else {
+ len = OPSize.MEMORY_START;
+ }
+ len += tlen + (bsAt(regex.btMemEnd, node.regNum) ? OPSize.MEMORY_END_PUSH : OPSize.MEMORY_END);
+ }
+ break;
+
+ case EncloseType.STOP_BACKTRACK:
+ if (node.isStopBtSimpleRepeat()) {
+ QuantifierNode qn = (QuantifierNode)node.target;
+ tlen = compileLengthTree(qn.target);
+ len = tlen * qn.lower + OPSize.PUSH + tlen + OPSize.POP + OPSize.JUMP;
+ } else {
+ len = OPSize.PUSH_STOP_BT + tlen + OPSize.POP_STOP_BT;
+ }
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ return 0; // not reached
+ } // switch
+ return len;
+ }
+
+ private void compileEncloseNode(EncloseNode node) {
+ if (node.isOption()) {
+ compileOptionNode(node);
+ return;
+ }
+
+ int len;
+ switch (node.type) {
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL) {
+ if (node.isCalled()) {
+ addOpcode(OPCode.CALL);
+ node.callAddr = regex.codeLength + OPSize.ABSADDR + OPSize.JUMP;
+ node.setAddrFixed();
+ addAbsAddr(node.callAddr);
+ len = compileLengthTree(node.target);
+ len += OPSize.MEMORY_START_PUSH + OPSize.RETURN;
+ if (bsAt(regex.btMemEnd, node.regNum)) {
+ len += node.isRecursion() ? OPSize.MEMORY_END_PUSH_REC : OPSize.MEMORY_END_PUSH;
+ } else {
+ len += node.isRecursion() ? OPSize.MEMORY_END_REC : OPSize.MEMORY_END;
+ }
+ addOpcodeRelAddr(OPCode.JUMP, len);
+ }
+ } // USE_SUBEXP_CALL
+
+ if (bsAt(regex.btMemStart, node.regNum)) {
+ addOpcode(OPCode.MEMORY_START_PUSH);
+ } else {
+ addOpcode(OPCode.MEMORY_START);
+ }
+
+ addMemNum(node.regNum);
+ compileTree(node.target);
+
+ if (Config.USE_SUBEXP_CALL && node.isCalled()) {
+ if (bsAt(regex.btMemEnd, node.regNum)) {
+ addOpcode(node.isRecursion() ? OPCode.MEMORY_END_PUSH_REC : OPCode.MEMORY_END_PUSH);
+ } else {
+ addOpcode(node.isRecursion() ? OPCode.MEMORY_END_REC : OPCode.MEMORY_END);
+ }
+ addMemNum(node.regNum);
+ addOpcode(OPCode.RETURN);
+ } else { // USE_SUBEXP_CALL
+ if (bsAt(regex.btMemEnd, node.regNum)) {
+ addOpcode(OPCode.MEMORY_END_PUSH);
+ } else {
+ addOpcode(OPCode.MEMORY_END);
+ }
+ addMemNum(node.regNum);
+ }
+ break;
+
+ case EncloseType.STOP_BACKTRACK:
+ if (node.isStopBtSimpleRepeat()) {
+ QuantifierNode qn = (QuantifierNode)node.target;
+
+ compileTreeNTimes(qn.target, qn.lower);
+
+ len = compileLengthTree(qn.target);
+ addOpcodeRelAddr(OPCode.PUSH, len + OPSize.POP + OPSize.JUMP);
+ compileTree(qn.target);
+ addOpcode(OPCode.POP);
+ addOpcodeRelAddr(OPCode.JUMP, -(OPSize.PUSH + len + OPSize.POP + OPSize.JUMP));
+ } else {
+ addOpcode(OPCode.PUSH_STOP_BT);
+ compileTree(node.target);
+ addOpcode(OPCode.POP_STOP_BT);
+ }
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ break;
+ } // switch
+ }
+
+ private int compileLengthAnchorNode(AnchorNode node) {
+ int tlen;
+ if (node.target != null) {
+ tlen = compileLengthTree(node.target);
+ } else {
+ tlen = 0;
+ }
+
+ int len;
+ switch (node.type) {
+ case AnchorType.PREC_READ:
+ len = OPSize.PUSH_POS + tlen + OPSize.POP_POS;
+ break;
+
+ case AnchorType.PREC_READ_NOT:
+ len = OPSize.PUSH_POS_NOT + tlen + OPSize.FAIL_POS;
+ break;
+
+ case AnchorType.LOOK_BEHIND:
+ len = OPSize.LOOK_BEHIND + tlen;
+ break;
+
+ case AnchorType.LOOK_BEHIND_NOT:
+ len = OPSize.PUSH_LOOK_BEHIND_NOT + tlen + OPSize.FAIL_LOOK_BEHIND_NOT;
+ break;
+
+ default:
+ len = OPSize.OPCODE;
+ break;
+ } // switch
+ return len;
+ }
+
+ private void compileAnchorNode(AnchorNode node) {
+ int len;
+ int n;
+
+ switch (node.type) {
+ case AnchorType.BEGIN_BUF: addOpcode(OPCode.BEGIN_BUF); break;
+ case AnchorType.END_BUF: addOpcode(OPCode.END_BUF); break;
+ case AnchorType.BEGIN_LINE: addOpcode(OPCode.BEGIN_LINE); break;
+ case AnchorType.END_LINE: addOpcode(OPCode.END_LINE); break;
+ case AnchorType.SEMI_END_BUF: addOpcode(OPCode.SEMI_END_BUF); break;
+ case AnchorType.BEGIN_POSITION: addOpcode(OPCode.BEGIN_POSITION); break;
+
+ case AnchorType.WORD_BOUND:
+ addOpcode(enc.isSingleByte() ? OPCode.WORD_BOUND_SB : OPCode.WORD_BOUND);
+ break;
+
+ case AnchorType.NOT_WORD_BOUND:
+ addOpcode(enc.isSingleByte() ? OPCode.NOT_WORD_BOUND_SB : OPCode.NOT_WORD_BOUND);
+ break;
+
+ case AnchorType.WORD_BEGIN:
+ if (Config.USE_WORD_BEGIN_END)
+ addOpcode(enc.isSingleByte() ? OPCode.WORD_BEGIN_SB : OPCode.WORD_BEGIN);
+ break;
+
+ case AnchorType.WORD_END:
+ if (Config.USE_WORD_BEGIN_END)
+ addOpcode(enc.isSingleByte() ? OPCode.WORD_END_SB : OPCode.WORD_END);
+ break;
+
+ case AnchorType.PREC_READ:
+ addOpcode(OPCode.PUSH_POS);
+ compileTree(node.target);
+ addOpcode(OPCode.POP_POS);
+ break;
+
+ case AnchorType.PREC_READ_NOT:
+ len = compileLengthTree(node.target);
+ addOpcodeRelAddr(OPCode.PUSH_POS_NOT, len + OPSize.FAIL_POS);
+ compileTree(node.target);
+ addOpcode(OPCode.FAIL_POS);
+ break;
+
+ case AnchorType.LOOK_BEHIND:
+ addOpcode(enc.isSingleByte() ? OPCode.LOOK_BEHIND_SB : OPCode.LOOK_BEHIND);
+ if (node.charLength < 0) {
+ n = getCharLengthTree(node.target);
+ if (returnCode != 0) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+ } else {
+ n = node.charLength;
+ }
+ addLength(n);
+ compileTree(node.target);
+ break;
+
+ case AnchorType.LOOK_BEHIND_NOT:
+ len = compileLengthTree(node.target);
+ addOpcodeRelAddr(OPCode.PUSH_LOOK_BEHIND_NOT, len + OPSize.FAIL_LOOK_BEHIND_NOT);
+ if (node.charLength < 0) {
+ n = getCharLengthTree(node.target);
+ if (returnCode != 0) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+ } else {
+ n = node.charLength;
+ }
+ addLength(n);
+ compileTree(node.target);
+ addOpcode(OPCode.FAIL_LOOK_BEHIND_NOT);
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ } // switch
+ }
+
+ private int compileLengthTree(Node node) {
+ int len = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode lin = (ConsAltNode)node;
+ do {
+ len += compileLengthTree(lin.car);
+ } while ((lin = lin.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode aln = (ConsAltNode)node;
+ int n = 0;
+ do {
+ len += compileLengthTree(aln.car);
+ n++;
+ } while ((aln = aln.cdr) != null);
+ len += (OPSize.PUSH + OPSize.JUMP) * (n - 1);
+ break;
+
+ case NodeType.STR:
+ StringNode sn = (StringNode)node;
+ if (sn.isRaw()) {
+ len = compileLengthStringRawNode(sn);
+ } else {
+ len = compileLengthStringNode(sn);
+ }
+ break;
+
+ case NodeType.CCLASS:
+ len = compileLengthCClassNode((CClassNode)node);
+ break;
+
+ case NodeType.CTYPE:
+ case NodeType.CANY:
+ len = OPSize.OPCODE;
+ break;
+
+ case NodeType.BREF:
+ BackRefNode br = (BackRefNode)node;
+
+ if (Config.USE_BACKREF_WITH_LEVEL && br.isNestLevel()) {
+ len = OPSize.OPCODE + OPSize.OPTION + OPSize.LENGTH +
+ OPSize.LENGTH + (OPSize.MEMNUM * br.backNum);
+ } else { // USE_BACKREF_AT_LEVEL
+ if (br.backNum == 1) {
+ len = ((!isIgnoreCase(regex.options) && br.back[0] <= 2)
+ ? OPSize.OPCODE : (OPSize.OPCODE + OPSize.MEMNUM));
+ } else {
+ len = OPSize.OPCODE + OPSize.LENGTH + (OPSize.MEMNUM * br.backNum);
+ }
+ }
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ len = OPSize.CALL;
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.QTFR:
+ len = compileLengthQuantifierNode((QuantifierNode)node);
+ break;
+
+ case NodeType.ENCLOSE:
+ len = compileLengthEncloseNode((EncloseNode)node);
+ break;
+
+ case NodeType.ANCHOR:
+ len = compileLengthAnchorNode((AnchorNode)node);
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+
+ } //switch
+ return len;
+ }
+
+ private void compileTree(Node node) {
+ int len = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode lin = (ConsAltNode)node;
+ do {
+ compileTree(lin.car);
+ } while ((lin = lin.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode aln = (ConsAltNode)node;
+ do {
+ len += compileLengthTree(aln.car);
+ if (aln.cdr != null) {
+ len += OPSize.PUSH + OPSize.JUMP;
+ }
+ } while ((aln = aln.cdr) != null);
+ int pos = regex.codeLength + len; /* goal position */
+
+ aln = (ConsAltNode)node;
+ do {
+ len = compileLengthTree(aln.car);
+ if (aln.cdr != null) {
+ addOpcodeRelAddr(OPCode.PUSH, len + OPSize.JUMP);
+ }
+ compileTree(aln.car);
+ if (aln.cdr != null) {
+ len = pos - (regex.codeLength + OPSize.JUMP);
+ addOpcodeRelAddr(OPCode.JUMP, len);
+ }
+
+ } while ((aln = aln.cdr) != null);
+ break;
+
+ case NodeType.STR:
+ StringNode sn = (StringNode)node;
+ if (sn.isRaw()) {
+ compileStringRawNode(sn);
+ } else {
+ compileStringNode(sn);
+ }
+ break;
+
+ case NodeType.CCLASS:
+ compileCClassNode((CClassNode)node);
+ break;
+
+ case NodeType.CTYPE:
+ CTypeNode cn = (CTypeNode)node;
+ int op;
+ switch (cn.ctype) {
+ case CharacterType.WORD:
+ if (cn.not) {
+ op = enc.isSingleByte() ? OPCode.NOT_WORD_SB : OPCode.NOT_WORD;
+ } else {
+ op = enc.isSingleByte() ? OPCode.WORD_SB : OPCode.WORD;
+ }
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ return; // not reached
+ } // inner switch
+ addOpcode(op);
+ break;
+
+ case NodeType.CANY:
+ if (isMultiline(regex.options)) {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_ML_SB : OPCode.ANYCHAR_ML);
+ } else {
+ addOpcode(enc.isSingleByte() ? OPCode.ANYCHAR_SB : OPCode.ANYCHAR);
+ }
+ break;
+
+ case NodeType.BREF:
+ BackRefNode br = (BackRefNode)node;
+ if (Config.USE_BACKREF_WITH_LEVEL && br.isNestLevel()) {
+ addOpcode(OPCode.BACKREF_WITH_LEVEL);
+ addOption(regex.options & Option.IGNORECASE);
+ addLength(br.nestLevel);
+ // !goto add_bacref_mems;!
+ addLength(br.backNum);
+ for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]);
+ break;
+ } else { // USE_BACKREF_AT_LEVEL
+ if (br.backNum == 1) {
+ if (isIgnoreCase(regex.options)) {
+ addOpcode(OPCode.BACKREFN_IC);
+ addMemNum(br.back[0]);
+ } else {
+ switch (br.back[0]) {
+ case 1:
+ addOpcode(OPCode.BACKREF1);
+ break;
+ case 2:
+ addOpcode(OPCode.BACKREF2);
+ break;
+ default:
+ addOpcode(OPCode.BACKREFN);
+ addOpcode(br.back[0]);
+ break;
+ } // switch
+ }
+ } else {
+ if (isIgnoreCase(regex.options)) {
+ addOpcode(OPCode.BACKREF_MULTI_IC);
+ } else {
+ addOpcode(OPCode.BACKREF_MULTI);
+ }
+ // !add_bacref_mems:!
+ addLength(br.backNum);
+ for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]);
+ }
+ }
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ compileCall((CallNode)node);
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.QTFR:
+ compileQuantifierNode((QuantifierNode)node);
+ break;
+
+ case NodeType.ENCLOSE:
+ compileEncloseNode((EncloseNode)node);
+ break;
+
+ case NodeType.ANCHOR:
+ compileAnchorNode((AnchorNode)node);
+ break;
+
+ default:
+ // undefined node type
+ newInternalException(ERR_PARSER_BUG);
+ } // switch
+ }
+
+ void addOpcode(int opcode) {
+ regex.addInt(opcode);
+
+ switch(opcode) {
+ case OPCode.ANYCHAR_STAR:
+ case OPCode.ANYCHAR_STAR_SB:
+ case OPCode.ANYCHAR_ML_STAR:
+ case OPCode.ANYCHAR_ML_STAR_SB:
+ case OPCode.ANYCHAR_STAR_PEEK_NEXT:
+ case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB:
+ case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
+ case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB:
+ case OPCode.STATE_CHECK_ANYCHAR_STAR:
+ case OPCode.STATE_CHECK_ANYCHAR_STAR_SB:
+ case OPCode.STATE_CHECK_ANYCHAR_ML_STAR:
+ case OPCode.MEMORY_START_PUSH:
+ case OPCode.MEMORY_END_PUSH:
+ case OPCode.MEMORY_END_PUSH_REC:
+ case OPCode.MEMORY_END_REC:
+ case OPCode.NULL_CHECK_START:
+ case OPCode.NULL_CHECK_END_MEMST_PUSH:
+ case OPCode.PUSH:
+ case OPCode.STATE_CHECK_PUSH:
+ case OPCode.STATE_CHECK_PUSH_OR_JUMP:
+ case OPCode.STATE_CHECK:
+ case OPCode.PUSH_OR_JUMP_EXACT1:
+ case OPCode.PUSH_IF_PEEK_NEXT:
+ case OPCode.REPEAT:
+ case OPCode.REPEAT_NG:
+ case OPCode.REPEAT_INC_SG:
+ case OPCode.REPEAT_INC_NG:
+ case OPCode.REPEAT_INC_NG_SG:
+ case OPCode.PUSH_POS:
+ case OPCode.PUSH_POS_NOT:
+ case OPCode.PUSH_STOP_BT:
+ case OPCode.PUSH_LOOK_BEHIND_NOT:
+ case OPCode.CALL:
+ case OPCode.RETURN: // it will appear only with CALL though
+ regex.stackNeeded = true;
+ }
+
+ }
+
+ void addStateCheckNum(int num) {
+ regex.addInt(num);
+ }
+
+ void addRelAddr(int addr) {
+ regex.addInt(addr);
+ }
+
+ void addAbsAddr(int addr) {
+ regex.addInt(addr);
+ }
+
+ void addLength(int length) {
+ regex.addInt(length);
+ }
+
+ void addMemNum(int num) {
+ regex.addInt(num);
+ }
+
+ void addPointer(Object o) {
+ regex.addObject(o);
+ }
+
+ void addOption(int option) {
+ regex.addInt(option);
+ }
+
+ void addOpcodeRelAddr(int opcode, int addr) {
+ addOpcode(opcode);
+ addRelAddr(addr);
+ }
+
+ void addOpcodeOption(int opcode, int option) {
+ addOpcode(opcode);
+ addOption(option);
+ }
+
+
+
+}
diff --git a/src/org/joni/Config.java b/src/org/joni/Config.java
new file mode 100644
index 0000000..165a245
--- /dev/null
+++ b/src/org/joni/Config.java
@@ -0,0 +1,111 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import java.io.PrintStream;
+
+public interface Config {
+ final int CHAR_TABLE_SIZE = 256;
+
+
+ final boolean USE_NAMED_GROUP = true;
+ final boolean USE_SUBEXP_CALL = true;
+ final boolean USE_BACKREF_WITH_LEVEL = true; /* \k<name+n>, \k<name-n> */
+
+ final boolean USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT = true; /* /(?:()|())*\2/ */
+ final boolean USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE = true; /* /\n$/ =~ "\n" */
+ final boolean USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR = false;
+
+ final boolean CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS = true;
+
+ final boolean USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE = false;
+ final boolean USE_CAPTURE_HISTORY = true;
+ final boolean USE_VARIABLE_META_CHARS = true;
+ final boolean USE_WORD_BEGIN_END = true; /* "\<": word-begin, "\>": word-end */
+ final boolean USE_POSIX_API_REGION_OPTION = true; /* needed for POSIX API support */
+ final boolean USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE = true;
+ final boolean USE_COMBINATION_EXPLOSION_CHECK = false;
+
+
+
+
+ final int ENC_CASE_FOLD_TURKISH_AZERI = (1<<20);
+ final int INTERNAL_ENC_CASE_FOLD_MULTI_CHAR = (1<<30);
+ final int ENC_CASE_FOLD_MIN = INTERNAL_ENC_CASE_FOLD_MULTI_CHAR;
+ final int ENC_CASE_FOLD_DEFAULT = ENC_CASE_FOLD_MIN;
+
+
+
+ final boolean USE_UNICODE_CASE_FOLD_TURKISH_AZERI = false;
+ final boolean USE_UNICODE_ALL_LINE_TERMINATORS = false;
+ final boolean USE_CRNL_AS_LINE_TERMINATOR = false;
+
+ final boolean USE_UNICODE_PROPERTIES = false;
+
+
+ final int NREGION = 10;
+ final int MAX_BACKREF_NUM = 1000;
+ final int MAX_REPEAT_NUM = 100000;
+ final int MAX_MULTI_BYTE_RANGES_NUM = 10000;
+
+
+
+
+ final boolean USE_WARN = true;
+
+ /* work size */
+ final int ENC_CODE_TO_MBC_MAXLEN = 7;
+ final int ENC_MBC_CASE_FOLD_MAXLEN = 18;
+
+ final int ENC_MAX_COMP_CASE_FOLD_CODE_LEN = 3;
+ final int ENC_GET_CASE_FOLD_CODES_MAX_NUM = 13; /* 13 => Unicode:0x1ffc */
+
+ // internal config
+ final boolean USE_PARSE_TREE_NODE_RECYCLE = true;
+ final boolean USE_OP_PUSH_OR_JUMP_EXACT = true;
+ final boolean USE_SHARED_CCLASS_TABLE = false;
+ final boolean USE_QTFR_PEEK_NEXT = true;
+
+ final int INIT_MATCH_STACK_SIZE = 100;
+ final int DEFAULT_MATCH_STACK_LIMIT_SIZE = 0; /* unlimited */
+ final int NUMBER_OF_POOLED_STACKS = 4;
+
+
+
+ final boolean DONT_OPTIMIZE = false;
+
+
+ final int MAX_CAPTURE_HISTORY_GROUP = 31;
+
+
+ final int CHECK_STRING_THRESHOLD_LEN = 7;
+ final int CHECK_BUFF_MAX_SIZE = 0x4000;
+
+
+ final PrintStream log = System.out;
+ final PrintStream err = System.err;
+
+ final boolean DEBUG_ALL = false;
+ final boolean DEBUG = DEBUG_ALL;
+ final boolean DEBUG_PARSE_TREE = DEBUG_ALL;
+ final boolean DEBUG_COMPILE = DEBUG_ALL;
+ final boolean DEBUG_SEARCH = DEBUG_ALL;
+ final boolean DEBUG_MATCH = DEBUG_ALL;
+}
diff --git a/src/org/joni/ErrorInfo.java b/src/org/joni/ErrorInfo.java
new file mode 100644
index 0000000..a243ee0
--- /dev/null
+++ b/src/org/joni/ErrorInfo.java
@@ -0,0 +1,28 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.encoding.Encoding;
+
+public class ErrorInfo {
+ public Encoding encoding;
+ public String par;
+ public String parEnd;
+}
diff --git a/src/org/joni/IntHolder.java b/src/org/joni/IntHolder.java
new file mode 100644
index 0000000..909ae1e
--- /dev/null
+++ b/src/org/joni/IntHolder.java
@@ -0,0 +1,25 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.joni;
+
+public class IntHolder {
+ public int value;
+}
diff --git a/src/org/joni/Lexer.java b/src/org/joni/Lexer.java
new file mode 100644
index 0000000..4fc169d
--- /dev/null
+++ b/src/org/joni/Lexer.java
@@ -0,0 +1,1385 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.Option.isSingleline;
+import static org.joni.ast.QuantifierNode.isRepeatInfinite;
+
+import org.joni.ast.QuantifierNode;
+import org.joni.constants.AnchorType;
+import org.joni.constants.CharacterType;
+import org.joni.constants.MetaChar;
+import org.joni.constants.TokenType;
+import org.joni.exception.ErrorMessages;
+
+class Lexer extends ScannerSupport {
+ protected final ScanEnvironment env;
+ protected final Syntax syntax; // fast access to syntax
+ protected final Token token = new Token(); // current token
+
+ protected Lexer(ScanEnvironment env, byte[]bytes, int p, int end) {
+ super(env.enc, bytes, p, end);
+ this.env = env;
+ this.syntax = env.syntax;
+ }
+
+ /**
+ * @return 0: normal {n,m}, 2: fixed {n}
+ * !introduce returnCode here
+ */
+ private int fetchRangeQuantifier() {
+ mark();
+ boolean synAllow = syntax.allowInvalidInterval();
+
+ if (!left()) {
+ if (synAllow) {
+ return 1; /* "....{" : OK! */
+ } else {
+ newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
+ }
+ }
+
+ if (!synAllow) {
+ c = peek();
+ if (c == ')' || c == '(' || c == '|') {
+ newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
+ }
+ }
+
+ int low = scanUnsignedNumber();
+ if (low < 0) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
+ if (low > Config.MAX_REPEAT_NUM) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
+
+ boolean nonLow = false;
+ if (p == _p) { /* can't read low */
+ if (syntax.allowIntervalLowAbbrev()) {
+ low = 0;
+ nonLow = true;
+ } else {
+ return invalidRangeQuantifier(synAllow);
+ }
+ }
+
+ if (!left()) return invalidRangeQuantifier(synAllow);
+
+ fetch();
+ int up;
+ int ret = 0;
+ if (c == ',') {
+ int prev = p; // ??? last
+ up = scanUnsignedNumber();
+ if (up < 0) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
+ if (up > Config.MAX_REPEAT_NUM) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
+
+ if (p == prev) {
+ if (nonLow) return invalidRangeQuantifier(synAllow);
+ up = QuantifierNode.REPEAT_INFINITE; /* {n,} : {n,infinite} */
+ }
+ } else {
+ if (nonLow) return invalidRangeQuantifier(synAllow);
+ unfetch();
+ up = low; /* {n} : exact n times */
+ ret = 2; /* fixed */
+ }
+
+ if (!left()) return invalidRangeQuantifier(synAllow);
+ fetch();
+
+ if (syntax.opEscBraceInterval()) {
+ if (c != syntax.metaCharTable.esc) return invalidRangeQuantifier(synAllow);
+ fetch();
+ }
+
+ if (c != '}') return invalidRangeQuantifier(synAllow);
+
+ if (!isRepeatInfinite(up) && low > up) {
+ newValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE);
+ }
+
+ token.type = TokenType.INTERVAL;
+ token.setRepeatLower(low);
+ token.setRepeatUpper(up);
+
+ return ret; /* 0: normal {n,m}, 2: fixed {n} */
+ }
+
+ private int invalidRangeQuantifier(boolean synAllow) {
+ if (synAllow) {
+ restore();
+ return 1;
+ } else {
+ newSyntaxException(ERR_INVALID_REPEAT_RANGE_PATTERN);
+ return 0; // not reached
+ }
+ }
+
+ /* \M-, \C-, \c, or \... */
+ private int fetchEscapedValue() {
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE);
+ fetch();
+
+ switch(c) {
+
+ case 'M':
+ if (syntax.op2EscCapitalMBarMeta()) {
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_META);
+ fetch();
+ if (c != '-') newSyntaxException(ERR_META_CODE_SYNTAX);
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_META);
+ fetch();
+ if (c == syntax.metaCharTable.esc) {
+ c = fetchEscapedValue();
+ }
+ c = ((c & 0xff) | 0x80);
+ } else {
+ fetchEscapedValueBackSlash();
+ }
+ break;
+
+ case 'C':
+ if (syntax.op2EscCapitalCBarControl()) {
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_CONTROL);
+ fetch();
+ if (c != '-') newSyntaxException(ERR_CONTROL_CODE_SYNTAX);
+ fetchEscapedValueControl();
+ } else {
+ fetchEscapedValueBackSlash();
+ }
+ break;
+
+ case 'c':
+ if (syntax.opEscCControl()) {
+ fetchEscapedValueControl();
+ }
+ /* fall through */
+
+ default:
+ fetchEscapedValueBackSlash();
+ } // switch
+
+ return c; // ???
+ }
+
+ private void fetchEscapedValueBackSlash() {
+ c = env.convertBackslashValue(c);
+ }
+
+ private void fetchEscapedValueControl() {
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_CONTROL);
+ fetch();
+ if (c == '?') {
+ c = 0177;
+ } else {
+ if (c == syntax.metaCharTable.esc) {
+ c = fetchEscapedValue();
+ }
+ c &= 0x9f;
+ }
+ }
+
+ private int nameEndCodePoint(int start) {
+ switch(start) {
+ case '<':
+ return '>';
+ case '\'':
+ return '\'';
+ default:
+ return 0;
+ }
+ }
+
+ // USE_NAMED_GROUP && USE_BACKREF_AT_LEVEL
+ /*
+ \k<name+n>, \k<name-n>
+ \k<num+n>, \k<num-n>
+ \k<-num+n>, \k<-num-n>
+ */
+
+ // value implicit (rnameEnd)
+ private boolean fetchNameWithLevel(int startCode, int[]rbackNum, int[]rlevel) {
+ int src = p;
+ boolean existLevel = false;
+ int isNum = 0;
+ int sign = 1;
+
+ int endCode = nameEndCodePoint(startCode);
+ int pnumHead = p;
+ int nameEnd = stop;
+
+ String err = null;
+ if (!left()) {
+ newValueException(ERR_EMPTY_GROUP_NAME);
+ } else {
+ fetch();
+ if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME);
+ if (enc.isDigit(c)) {
+ isNum = 1;
+ } else if (c == '-') {
+ isNum = 2;
+ sign = -1;
+ pnumHead = p;
+ } else if (!enc.isWord(c)) {
+ err = ERR_INVALID_GROUP_NAME;
+ }
+ }
+
+ while (left()) {
+ nameEnd = p;
+ fetch();
+ if (c == endCode || c == ')' || c == '+' || c == '-') {
+ if (isNum == 2) err = ERR_INVALID_GROUP_NAME;
+ break;
+ }
+
+ if (isNum != 0) {
+ if (enc.isDigit(c)) {
+ isNum = 1;
+ } else {
+ err = ERR_INVALID_GROUP_NAME;
+ // isNum = 0;
+ }
+ } else if (!enc.isWord(c)) {
+ err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ boolean isEndCode = false;
+ if (err == null && c != endCode) {
+ if (c == '+' || c == '-') {
+ int flag = c == '-' ? -1 : 1;
+
+ fetch();
+ if (!enc.isDigit(c)) newValueException(ERR_INVALID_GROUP_NAME, src, stop);
+ unfetch();
+ int level = scanUnsignedNumber();
+ if (level < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ rlevel[0] = level * flag;
+ existLevel = true;
+
+ fetch();
+ isEndCode = c == endCode;
+ }
+
+ if (!isEndCode) {
+ err = ERR_INVALID_GROUP_NAME;
+ nameEnd = stop;
+ }
+ }
+
+ if (err == null) {
+ if (isNum != 0) {
+ mark();
+ p = pnumHead;
+ int backNum = scanUnsignedNumber();
+ restore();
+ if (backNum < 0) {
+ newValueException(ERR_TOO_BIG_NUMBER);
+ } else if (backNum == 0) {
+ newValueException(ERR_INVALID_GROUP_NAME, src, stop);
+ }
+ rbackNum[0] = backNum * sign;
+ }
+ value = nameEnd;
+ return existLevel;
+ } else {
+ newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd);
+ return false; // not reached
+ }
+ }
+
+ // USE_NAMED_GROUP
+ // ref: 0 -> define name (don't allow number name)
+ // 1 -> reference name (allow number name)
+ private int fetchNameForNamedGroup(int startCode, boolean ref) {
+ int src = p;
+ value = 0;
+
+ int isNum = 0;
+ int sign = 1;
+
+ int endCode = nameEndCodePoint(startCode);
+ int pnumHead = p;
+ int nameEnd = stop;
+
+ String err = null;
+ if (!left()) {
+ newValueException(ERR_EMPTY_GROUP_NAME);
+ } else {
+ fetch();
+ if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME);
+ if (enc.isDigit(c)) {
+ if (ref) {
+ isNum = 1;
+ } else {
+ err = ERR_INVALID_GROUP_NAME;
+ // isNum = 0;
+ }
+ } else if (c == '-') {
+ if (ref) {
+ isNum = 2;
+ sign = -1;
+ pnumHead = p;
+ } else {
+ err = ERR_INVALID_GROUP_NAME;
+ // isNum = 0;
+ }
+ } else if (!enc.isWord(c)) {
+ err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ if (err == null) {
+ while (left()) {
+ nameEnd = p;
+ fetch();
+ if (c == endCode || c == ')') {
+ if (isNum == 2) err = ERR_INVALID_GROUP_NAME;
+ break;
+ }
+
+ if (isNum != 0) {
+ if (enc.isDigit(c)) {
+ isNum = 1;
+ } else {
+ if (!enc.isWord(c)) {
+ err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ } else {
+ err = ERR_INVALID_GROUP_NAME;
+ }
+ // isNum = 0;
+ }
+ } else {
+ if (!enc.isWord(c)) {
+ err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+ }
+
+ if (c != endCode) {
+ err = ERR_INVALID_GROUP_NAME;
+ nameEnd = stop;
+ }
+
+ int backNum = 0;
+ if (isNum != 0) {
+ mark();
+ p = pnumHead;
+ backNum = scanUnsignedNumber();
+ restore();
+ if (backNum < 0) {
+ newValueException(ERR_TOO_BIG_NUMBER);
+ } else if (backNum == 0) {
+ newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd);
+ }
+ backNum *= sign;
+ }
+ value = nameEnd;
+ return backNum;
+ } else {
+ while (left()) {
+ nameEnd = p;
+ fetch();
+ if (c == endCode || c == ')') break;
+ }
+ if (!left()) nameEnd = stop;
+ newValueException(err, src, nameEnd);
+ return 0; // not reached
+ }
+ }
+
+ // #else USE_NAMED_GROUP
+ // make it return nameEnd!
+ private final int fetchNameForNoNamedGroup(int startCode, boolean ref) {
+ int src = p;
+ value = 0;
+
+ int isNum = 0;
+ int sign = 1;
+
+ int endCode = nameEndCodePoint(startCode);
+ int pnumHead = p;
+ int nameEnd = stop;
+
+ String err = null;
+ if (!left()) {
+ newValueException(ERR_EMPTY_GROUP_NAME);
+ } else {
+ fetch();
+ if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME);
+
+ if (enc.isDigit(c)) {
+ isNum = 1;
+ } else if (c == '-') {
+ isNum = 2;
+ sign = -1;
+ pnumHead = p;
+ } else {
+ err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ while(left()) {
+ nameEnd = p;
+
+ fetch();
+ if (c == endCode || c == ')') break;
+ if (!enc.isDigit(c)) err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+
+ if (err == null && c != endCode) {
+ err = ERR_INVALID_GROUP_NAME;
+ nameEnd = stop;
+ }
+
+ if (err == null) {
+ mark();
+ p = pnumHead;
+ int backNum = scanUnsignedNumber();
+ restore();
+ if (backNum < 0) {
+ newValueException(ERR_TOO_BIG_NUMBER);
+ } else if (backNum == 0){
+ newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd);
+ }
+ backNum *= sign;
+
+ value = nameEnd;
+ return backNum;
+ } else {
+ newValueException(err, src, nameEnd);
+ return 0; // not reached
+ }
+ }
+
+ protected final int fetchName(int startCode, boolean ref) {
+ if (Config.USE_NAMED_GROUP) {
+ return fetchNameForNamedGroup(startCode, ref);
+ } else {
+ return fetchNameForNoNamedGroup(startCode, ref);
+ }
+ }
+
+ private boolean strExistCheckWithEsc(int[]s, int n, int bad) {
+ int p = this.p;
+ int to = this.stop;
+
+ boolean inEsc = false;
+ int i=0;
+
+ while(p < to) {
+ if (inEsc) {
+ inEsc = false;
+ p += enc.length(bytes[p]);
+ } else {
+ int x = enc.mbcToCode(bytes, p, to);
+ int q = p + enc.length(bytes[p]);
+ if (x == s[0]) {
+ for (i=1; i<n && q < to; i++) {
+ x = enc.mbcToCode(bytes, q, to);
+ if (x != s[i]) break;
+ q += enc.length(bytes[q]);
+ }
+ if (i >= n) return true;
+ p += enc.length(bytes[p]);
+ } else {
+ x = enc.mbcToCode(bytes, p, to);
+ if (x == bad) return false;
+ else if (x == syntax.metaCharTable.esc) inEsc = true;
+ p = q;
+ }
+ }
+ }
+ return false;
+ }
+
+ private static final int send[] = new int[]{':', ']'};
+
+ protected final TokenType fetchTokenInCC() {
+ int last;
+ int c2;
+
+ if (!left()) {
+ token.type = TokenType.EOT;
+ return token.type;
+ }
+
+ fetch();
+ token.type = TokenType.CHAR;
+ token.base = 0;
+ token.setC(c);
+ token.escaped = false;
+
+ if (c == ']') {
+ token.type = TokenType.CC_CLOSE;
+ } else if (c == '-') {
+ token.type = TokenType.CC_RANGE;
+ } else if (c == syntax.metaCharTable.esc) {
+ if (!syntax.backSlashEscapeInCC()) return token.type;
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE);
+ fetch();
+ token.escaped = true;
+ token.setC(c);
+
+ switch (c) {
+
+ case 'w':
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.WORD);
+ token.setPropNot(false);
+ break;
+
+ case 'W':
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.WORD);
+ token.setPropNot(true);
+ break;
+
+ case 'd':
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.DIGIT);
+ token.setPropNot(false);
+ break;
+
+ case 'D':
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.DIGIT);
+ token.setPropNot(true);
+ break;
+
+ case 's':
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.SPACE);
+ token.setPropNot(false);
+ break;
+
+ case 'S':
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.SPACE);
+ token.setPropNot(true);
+ break;
+
+ case 'h':
+ if (!syntax.op2EscHXDigit()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.XDIGIT);
+ token.setPropNot(false);
+ break;
+
+ case 'H':
+ if (!syntax.op2EscHXDigit()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.XDIGIT);
+ token.setPropNot(true);
+ break;
+
+ case 'p':
+ case 'P':
+ c2 = peek(); // !!! migrate to peekIs
+ if (c2 == '{' && syntax.op2EscPBraceCharProperty()) {
+ inc();
+ token.type = TokenType.CHAR_PROPERTY;
+ token.setPropNot(c == 'P');
+
+ if (syntax.op2EscPBraceCircumflexNot()) {
+ c2 = fetchTo();
+ if (c2 == '^') {
+ token.setPropNot(!token.getPropNot());
+ } else {
+ unfetch();
+ }
+ }
+ }
+ break;
+
+ case 'x':
+ if (!left()) break;
+ last = p;
+
+ if (peekIs('{') && syntax.opEscXBraceHex8()) {
+ inc();
+ int num = scanUnsignedHexadecimalNumber(8);
+ if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
+ if (left()) {
+ c2 = peek();
+ if (enc.isXDigit(c2)) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
+ }
+
+ if (p > last + enc.length(bytes[last]) && left() && peekIs('}')) {
+ inc();
+ token.type = TokenType.CODE_POINT;
+ token.base = 16;
+ token.setCode(num);
+ } else {
+ /* can't read nothing or invalid format */
+ p = last;
+ }
+ } else if (syntax.opEscXHex2()) {
+ int num = scanUnsignedHexadecimalNumber(2);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.RAW_BYTE;
+ token.base = 16;
+ token.setC(num);
+ }
+ break;
+
+ case 'u':
+ if (!left()) break;
+ last = p;
+
+ if (syntax.op2EscUHex4()) {
+ int num = scanUnsignedHexadecimalNumber(4);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.CODE_POINT;
+ token.base = 16;
+ token.setCode(num);
+ }
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ if (syntax.opEscOctal3()) {
+ unfetch();
+ last = p;
+ int num = scanUnsignedOctalNumber(3);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.RAW_BYTE;
+ token.base = 8;
+ token.setC(num);
+ }
+ break;
+
+ default:
+ unfetch();
+ int num = fetchEscapedValue();
+ if (token.getC() != num) {
+ token.setCode(num);
+ token.type = TokenType.CODE_POINT;
+ }
+ break;
+ } // switch
+
+ } else if (c == '[') {
+ if (syntax.opPosixBracket() && peekIs(':')) {
+ token.backP = p; /* point at '[' is readed */
+ inc();
+ if (strExistCheckWithEsc(send, send.length, ']')) {
+ token.type = TokenType.POSIX_BRACKET_OPEN;
+ } else {
+ unfetch();
+ // remove duplication, goto cc_in_cc;
+ if (syntax.op2CClassSetOp()) {
+ token.type = TokenType.CC_CC_OPEN;
+ } else {
+ env.ccEscWarn("[");
+ }
+ }
+ } else { // cc_in_cc:
+ if (syntax.op2CClassSetOp()) {
+ token.type = TokenType.CC_CC_OPEN;
+ } else {
+ env.ccEscWarn("[");
+ }
+ }
+ } else if (c == '&') {
+ if (syntax.op2CClassSetOp() && left() && peekIs('&')) {
+ inc();
+ token.type = TokenType.CC_AND;
+ }
+ }
+ return token.type;
+ }
+
+ protected final int backrefRelToAbs(int relNo) {
+ return env.numMem + 1 + relNo;
+ }
+
+ protected final TokenType fetchToken() {
+ int last;
+
+ // mark(); // out
+
+ start:
+ while(true) {
+
+ if (!left()) {
+ token.type = TokenType.EOT;
+ return token.type;
+ }
+
+ token.type = TokenType.STRING;
+ token.base = 0;
+ token.backP = p;
+
+ fetch();
+
+ if (c == syntax.metaCharTable.esc && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn)
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE);
+
+ token.backP = p;
+ fetch();
+
+ token.setC(c);
+ token.escaped = true;
+ switch(c) {
+
+ case '*':
+ if (!syntax.opEscAsteriskZeroInf()) break;
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(0);
+ token.setRepeatUpper(QuantifierNode.REPEAT_INFINITE);
+ greedyCheck();
+ break;
+
+ case '+':
+ if (!syntax.opEscPlusOneInf()) break;
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(1);
+ token.setRepeatUpper(QuantifierNode.REPEAT_INFINITE);
+ greedyCheck();
+ break;
+
+ case '?':
+ if (!syntax.opEscQMarkZeroOne()) break;
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(0);
+ token.setRepeatUpper(1);
+ greedyCheck();
+ break;
+
+ case '{':
+ if (!syntax.opEscBraceInterval()) break;
+ switch (fetchRangeQuantifier()) {
+ case 0:
+ greedyCheck();
+ break;
+ case 2:
+ if (syntax.fixedIntervalIsGreedyOnly()) {
+ possessiveCheck();
+ } else {
+ greedyCheck();
+ }
+ break;
+ default: /* 1 : normal char */
+ } // inner switch
+ break;
+
+ case '|':
+ if (!syntax.opEscVBarAlt()) break;
+ token.type = TokenType.ALT;
+ break;
+
+ case '(':
+ if (!syntax.opEscLParenSubexp()) break;
+ token.type = TokenType.SUBEXP_OPEN;
+ break;
+
+ case ')':
+ if (!syntax.opEscLParenSubexp()) break;
+ token.type = TokenType.SUBEXP_CLOSE;
+ break;
+
+ case 'w':
+ if (!syntax.opEscWWord()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.WORD);
+ token.setPropNot(false);
+ break;
+
+ case 'W':
+ if (!syntax.opEscWWord()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.WORD);
+ token.setPropNot(true);
+ break;
+
+ case 'b':
+ if (!syntax.opEscBWordBound()) break;
+ token.type = TokenType.ANCHOR;
+ token.setAnchor(AnchorType.WORD_BOUND);
+ break;
+
+ case 'B':
+ if (!syntax.opEscBWordBound()) break;
+ token.type = TokenType.ANCHOR;
+ token.setAnchor(AnchorType.NOT_WORD_BOUND);
+ break;
+
+ case '<':
+ if (Config.USE_WORD_BEGIN_END) {
+ if (!syntax.opEscLtGtWordBeginEnd()) break;
+ token.type = TokenType.ANCHOR;
+ token.setAnchor(AnchorType.WORD_BEGIN);
+ break;
+ } // USE_WORD_BEGIN_END
+ break; // ?
+
+ case '>':
+ if (Config.USE_WORD_BEGIN_END) {
+ if (!syntax.opEscLtGtWordBeginEnd()) break;
+ token.type = TokenType.ANCHOR;
+ token.setAnchor(AnchorType.WORD_END);
+ break;
+ } // USE_WORD_BEGIN_END
+ break; // ?
+
+ case 's':
+ if (!syntax.opEscSWhiteSpace()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.SPACE);
+ token.setPropNot(false);
+ break;
+
+ case 'S':
+ if (!syntax.opEscSWhiteSpace()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.SPACE);
+ token.setPropNot(true);
+ break;
+
+ case 'd':
+ if (!syntax.opEscDDigit()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.DIGIT);
+ token.setPropNot(false);
+ break;
+
+ case 'D':
+ if (!syntax.opEscDDigit()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.DIGIT);
+ token.setPropNot(true);
+ break;
+
+ case 'h':
+ if (!syntax.op2EscHXDigit()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.XDIGIT);
+ token.setPropNot(false);
+ break;
+
+ case 'H':
+ if (!syntax.op2EscHXDigit()) break;
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(CharacterType.XDIGIT);
+ token.setPropNot(true);
+ break;
+
+ case 'A':
+ if (!syntax.opEscAZBufAnchor()) break;
+ // begin_buf label
+ token.type = TokenType.ANCHOR;
+ token.setSubtype(AnchorType.BEGIN_BUF);
+ break;
+
+ case 'Z':
+ if (!syntax.opEscAZBufAnchor()) break;
+ token.type = TokenType.ANCHOR;
+ token.setSubtype(AnchorType.SEMI_END_BUF);
+ break;
+
+ case 'z':
+ if (!syntax.opEscAZBufAnchor()) break;
+ // end_buf label
+ token.type = TokenType.ANCHOR;
+ token.setSubtype(AnchorType.END_BUF);
+ break;
+
+ case 'G':
+ if (!syntax.opEscCapitalGBeginAnchor()) break;
+ token.type = TokenType.ANCHOR;
+ token.setSubtype(AnchorType.BEGIN_POSITION);
+ break;
+
+ case '`':
+ if (!syntax.op2EscGnuBufAnchor()) break;
+ // goto begin_buf
+ token.type = TokenType.ANCHOR;
+ token.setSubtype(AnchorType.BEGIN_BUF);
+ break;
+
+ case '\'':
+ if (!syntax.op2EscGnuBufAnchor()) break;
+ // goto end_buf
+ token.type = TokenType.ANCHOR;
+ token.setSubtype(AnchorType.END_BUF);
+ break;
+
+ case 'x': // extract to helper for all 'x'
+ if (!left()) break;
+ last = p;
+ if (peekIs('{') && syntax.opEscXBraceHex8()) {
+ inc();
+ int num = scanUnsignedHexadecimalNumber(8);
+ if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
+ if (left()) {
+ if (enc.isXDigit(peek())) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
+ }
+
+ if (p > last + enc.length(bytes[last]) && left() && peekIs('}')) {
+ inc();
+ token.type = TokenType.CODE_POINT;
+ token.setCode(num);
+ } else {
+ /* can't read nothing or invalid format */
+ p = last;
+ }
+ } else if (syntax.opEscXHex2()) {
+ int num = scanUnsignedHexadecimalNumber(2);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.RAW_BYTE;
+ token.base = 16;
+ token.setC(num);
+ }
+ break;
+
+ case 'u': // extract to helper
+ if (!left()) break;
+ last = p;
+
+ if (syntax.op2EscUHex4()) {
+ int num = scanUnsignedHexadecimalNumber(4);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.CODE_POINT;
+ token.base = 16;
+ token.setCode(num);
+ }
+ break;
+
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ unfetch();
+ last = p;
+ int num = scanUnsignedNumber();
+ if (num < 0 || num > Config.MAX_BACKREF_NUM) {
+ // goto skip_backref
+ } else if (syntax.opDecimalBackref() && (num <= env.numMem || num <= 9)) { /* This spec. from GNU regex */
+ if (syntax.strictCheckBackref()) {
+ if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) newValueException(ERR_INVALID_BACKREF);
+ }
+ token.type = TokenType.BACKREF;
+ token.setBackrefNum(1);
+ token.setBackrefRef1(num);
+ token.setBackrefByName(false);
+ if (Config.USE_BACKREF_WITH_LEVEL) token.setBackrefExistLevel(false);
+ break;
+ }
+ // skip_backref:
+ if (c == '8' || c == '9') {
+ /* normal char */
+ p = last;
+ inc();
+ break;
+ }
+ p = last;
+ /* fall through */
+
+ case '0':
+ if (syntax.opEscOctal3()) {
+ last = p;
+ num = scanUnsignedOctalNumber(c == '0' ? 2 : 3);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.RAW_BYTE;
+ token.base = 8;
+ token.setC(num);
+ } else if (c != '0') {
+ inc();
+ }
+ break;
+
+ case 'k':
+ if (Config.USE_NAMED_GROUP) {
+ if (syntax.op2EscKNamedBackref()) {
+ fetch();
+ if (c =='<' || c == '\'') {
+ last = p;
+ int backNum;
+ if (Config.USE_BACKREF_WITH_LEVEL) {
+ int[]rbackNum = new int[1];
+ int[]rlevel = new int[1];
+ token.setBackrefExistLevel(fetchNameWithLevel(c, rbackNum, rlevel));
+ token.setBackrefLevel(rlevel[0]);
+ backNum = rbackNum[0];
+ } else {
+ backNum = fetchName(c, true);
+ } // USE_BACKREF_AT_LEVEL
+ int nameEnd = value; // set by fetchNameWithLevel/fetchName
+
+ if (backNum != 0) {
+ if (backNum < 0) {
+ backNum = backrefRelToAbs(backNum);
+ if (backNum <= 0) newValueException(ERR_INVALID_BACKREF);
+ }
+
+ if (syntax.strictCheckBackref()) {
+ if (backNum > env.numMem ||
+ env.memNodes == null) newValueException(ERR_INVALID_BACKREF);
+ token.type = TokenType.BACKREF;
+ token.setBackrefByName(false);
+ token.setBackrefNum(1);
+ token.setBackrefRef1(backNum);
+ }
+ } else {
+ NameEntry e = env.reg.nameToGroupNumbers(bytes, last, nameEnd);
+ if (e == null) newValueException(ERR_UNDEFINED_NAME_REFERENCE, last, nameEnd);
+
+ if (syntax.strictCheckBackref()) {
+ if (e.backNum == 1) {
+ if (e.backRef1 > env.numMem ||
+ env.memNodes == null ||
+ env.memNodes[e.backRef1] == null) newValueException(ERR_INVALID_BACKREF);
+ } else {
+ for (int i=0; i<e.backNum; i++) {
+ if (e.backRefs[i] > env.numMem ||
+ env.memNodes == null ||
+ env.memNodes[e.backRefs[i]] == null) newValueException(ERR_INVALID_BACKREF);
+ }
+ }
+ }
+
+ token.type = TokenType.BACKREF;
+ token.setBackrefByName(true);
+
+ if (e.backNum == 1) {
+ token.setBackrefNum(1);
+ token.setBackrefRef1(e.backRef1);
+ } else {
+ token.setBackrefNum(e.backNum);
+ token.setBackrefRefs(e.backRefs);
+ }
+ }
+ } else {
+ unfetch();
+ }
+ }
+
+ break;
+ } // USE_NAMED_GROUP
+ break;
+
+ case 'g':
+ if (Config.USE_SUBEXP_CALL) {
+ if (syntax.op2EscGSubexpCall()) {
+ fetch();
+ if (c == '<' || c == '\'') {
+ last = p;
+ int gNum = fetchName(c, true);
+ int nameEnd = value;
+ token.type = TokenType.CALL;
+ token.setCallNameP(last);
+ token.setCallNameEnd(nameEnd);
+ token.setCallGNum(gNum);
+ } else {
+ unfetch();
+ }
+ }
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case 'Q':
+ if (syntax.op2EscCapitalQQuote()) {
+ token.type = TokenType.QUOTE_OPEN;
+ }
+ break;
+
+ case 'p':
+ case 'P':
+ if (peekIs('{') && syntax.op2EscPBraceCharProperty()) {
+ inc();
+ token.type = TokenType.CHAR_PROPERTY;
+ token.setPropNot(c == 'P');
+
+ if (syntax.op2EscPBraceCircumflexNot()) {
+ fetch();
+ if (c == '^') {
+ token.setPropNot(!token.getPropNot());
+ } else {
+ unfetch();
+ }
+ }
+ }
+ break;
+
+ default:
+ unfetch();
+ num = fetchEscapedValue();
+
+ /* set_raw: */
+ if (token.getC() != num) {
+ token.type = TokenType.CODE_POINT;
+ token.setCode(num);
+ } else { /* string */
+ p = token.backP + enc.length(bytes[token.backP]); // backP ???
+ }
+ break;
+
+ } // switch (c)
+
+ } else {
+ token.setC(c);
+ token.escaped = false;
+
+ // remove code duplication
+ if (Config.USE_VARIABLE_META_CHARS) {
+ if (c != MetaChar.INEFFECTIVE_META_CHAR && syntax.opVariableMetaCharacters()) {
+ if (c == syntax.metaCharTable.anyChar) { // goto any_char
+ token.type = TokenType.ANYCHAR;
+ break;
+ } else if (c == syntax.metaCharTable.anyTime) { // goto anytime
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(0);
+ token.setRepeatUpper(QuantifierNode.REPEAT_INFINITE);
+ greedyCheck();
+ break;
+ } else if (c == syntax.metaCharTable.zeroOrOneTime) { // goto zero_or_one_time
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(0);
+ token.setRepeatUpper(1);
+ greedyCheck();
+ break;
+ } else if (c == syntax.metaCharTable.oneOrMoreTime) { // goto one_or_more_time
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(1);
+ token.setRepeatUpper(QuantifierNode.REPEAT_INFINITE);
+ greedyCheck();
+ break;
+ } else if (c == syntax.metaCharTable.anyCharAnyTime) { // goto one_or_more_time
+ token.type = TokenType.ANYCHAR_ANYTIME;
+ break;
+ // goto out
+ }
+ }
+ } // USE_VARIABLE_META_CHARS
+
+ {
+ switch(c) {
+
+ case '.':
+ if (!syntax.opDotAnyChar()) break;
+ // any_char:
+ token.type = TokenType.ANYCHAR;
+ break;
+
+ case '*':
+ if (!syntax.opAsteriskZeroInf()) break;
+ // anytime:
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(0);
+ token.setRepeatUpper(QuantifierNode.REPEAT_INFINITE);
+ greedyCheck();
+ break;
+
+ case '+':
+ if (!syntax.opPlusOneInf()) break;
+ // one_or_more_time:
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(1);
+ token.setRepeatUpper(QuantifierNode.REPEAT_INFINITE);
+ greedyCheck();
+ break;
+
+ case '?':
+ if (!syntax.opQMarkZeroOne()) break;
+ // zero_or_one_time:
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(0);
+ token.setRepeatUpper(1);
+ greedyCheck();
+ break;
+
+ case '{':
+ if (!syntax.opBraceInterval()) break;
+ switch(fetchRangeQuantifier()) {
+ case 0:
+ greedyCheck();
+ break;
+ case 2:
+ if (syntax.fixedIntervalIsGreedyOnly()) {
+ possessiveCheck();
+ } else {
+ greedyCheck();
+ }
+ break;
+ default: /* 1 : normal char */
+ } // inner switch
+ break;
+
+ case '|':
+ if (!syntax.opVBarAlt()) break;
+ token.type = TokenType.ALT;
+ break;
+
+ case '(':
+ if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
+ inc();
+ if (peekIs('#')) {
+ fetch();
+ while (true) {
+ if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
+ fetch();
+ if (c == syntax.metaCharTable.esc) {
+ if (left()) fetch();
+ } else {
+ if (c == ')') break;
+ }
+ }
+ continue start; // goto start
+ }
+ unfetch();
+ }
+
+ if (!syntax.opLParenSubexp()) break;
+ token.type = TokenType.SUBEXP_OPEN;
+ break;
+
+ case ')':
+ if (!syntax.opLParenSubexp()) break;
+ token.type = TokenType.SUBEXP_CLOSE;
+ break;
+
+ case '^':
+ if (!syntax.opLineAnchor()) break;
+ token.type = TokenType.ANCHOR;
+ token.setSubtype(isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE);
+ break;
+
+ case '$':
+ if (!syntax.opLineAnchor()) break;
+ token.type = TokenType.ANCHOR;
+ token.setSubtype(isSingleline(env.option) ? AnchorType.SEMI_END_BUF : AnchorType.END_LINE);
+ break;
+
+ case '[':
+ if (!syntax.opBracketCC()) break;
+ token.type = TokenType.CC_CC_OPEN;
+ break;
+
+ case ']':
+ //if (*src > env->pattern) /* /].../ is allowed. */
+ //CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
+ break;
+
+ case '#':
+ if (Option.isExtend(env.option)) {
+ while (left()) {
+ fetch();
+ if (enc.isNewLine(c)) break;
+ }
+ continue start; // goto start
+
+ }
+ break;
+
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ case '\f':
+ if (Option.isExtend(env.option)) {
+ continue start; // goto start
+ }
+ break;
+
+ default: // string
+ break;
+
+ } // switch
+ }
+ }
+
+ break;
+ } // while
+ return token.type;
+ }
+
+ private void greedyCheck() {
+ if (left() && peekIs('?') && syntax.opQMarkNonGreedy()) {
+
+ fetch();
+
+ token.setRepeatGreedy(false);
+ token.setRepeatPossessive(false);
+ } else {
+ possessiveCheck();
+ }
+ }
+
+ private void possessiveCheck() {
+ if (left() && peekIs('+') &&
+ (syntax.op2PlusPossessiveRepeat() && token.type != TokenType.INTERVAL ||
+ syntax.op2PlusPossessiveInterval() && token.type == TokenType.INTERVAL)) {
+
+ fetch();
+
+ token.setRepeatGreedy(true);
+ token.setRepeatPossessive(true);
+ } else {
+ token.setRepeatGreedy(true);
+ token.setRepeatPossessive(false);
+ }
+ }
+
+ protected final int fetchCharPropertyToCType() {
+ mark();
+
+ while (left()) {
+ int last = p;
+ fetch();
+ if (c == '}') {
+ return enc.propertyNameToCType(bytes, _p, last);
+ } else if (c == '(' || c == ')' || c == '{' || c == '|') {
+ newValueException(ERR_INVALID_CHAR_PROPERTY_NAME, _p, last);
+ }
+ }
+ newInternalException(ERR_PARSER_BUG);
+ return 0; // not reached
+ }
+}
diff --git a/src/org/joni/Matcher.java b/src/org/joni/Matcher.java
new file mode 100644
index 0000000..001ce70
--- /dev/null
+++ b/src/org/joni/Matcher.java
@@ -0,0 +1,542 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.joni;
+
+import static org.joni.Option.isFindLongest;
+
+import org.joni.constants.AnchorType;
+
+public final class Matcher extends ByteCodeMachine {
+
+ public Matcher(Regex regex, byte[]bytes) {
+ this(regex, bytes, 0, bytes.length);
+ }
+
+ public Matcher(Regex regex, byte[]bytes, int p, int end) {
+ super(regex, bytes, p, end);
+ }
+
+ public final Region getRegion() {
+ return msaRegion;
+ }
+
+ public final Region getEagerRegion() {
+ return msaRegion != null ? msaRegion : new Region(msaBegin, msaEnd);
+ }
+
+ public final int getBegin() {
+ return msaBegin;
+ }
+
+ public final int getEnd() {
+ return msaEnd;
+ }
+
+ public final int match(int at, int range, int option) {
+ msaInit(option, at);
+
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ int offset = at = str;
+ stateCheckBuffInit(end - str, offset, regex.numCombExpCheck); // move it to construction?
+ } // USE_COMBINATION_EXPLOSION_CHECK
+
+ int prev = enc.prevCharHead(bytes, str, at);
+
+ if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
+ return matchAt(end /*range*/, at, prev);
+ } else {
+ return matchAt(range /*range*/, at, prev);
+ }
+ }
+
+ int low, high; // these are the return values
+ private boolean forwardSearchRange(byte[]bytes, int str, int end, int s, int range, IntHolder lowPrev) {
+ int pprev = -1;
+ int p = s;
+
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("forward_search_range: "+
+ "str: " + str +
+ ", end: " + end +
+ ", s: " + s +
+ ", range: " + range);
+ }
+
+ if (regex.dMin > 0) {
+ if (enc.isSingleByte()) {
+ p += regex.dMin;
+ } else {
+ int q = p + regex.dMin;
+ while (p < q) p += enc.length(bytes[p]);
+ }
+ }
+
+ retry:while (true) {
+ p = regex.searchAlgorithm.search(regex, bytes, p, end, range);
+
+ if (p != -1 && p < range) {
+ if (p - regex.dMin < s) {
+ // retry_gate:
+ pprev = p;
+ p += enc.length(bytes[p]);
+ continue retry;
+ }
+
+ if (regex.subAnchor != 0) {
+ switch (regex.subAnchor) {
+ case AnchorType.BEGIN_LINE:
+ if (p != str) {
+ int prev = enc.prevCharHead(bytes, (pprev != -1) ? pprev : str, p);
+ if (!enc.isNewLine(bytes, prev, end)) {
+ // goto retry_gate;
+ pprev = p;
+ p += enc.length(bytes[p]);
+ continue retry;
+ }
+ }
+ break;
+
+ case AnchorType.END_LINE:
+ if (p == end) {
+ if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
+ int prev = enc.prevCharHead(bytes, (pprev != -1) ? pprev : str, p);
+ if (prev != -1 && enc.isNewLine(bytes, prev, end)) {
+ // goto retry_gate;
+ pprev = p;
+ p += enc.length(bytes[p]);
+ continue retry;
+ }
+ } else if (!enc.isNewLine(bytes, p, end)) {
+ if (Config.USE_CRNL_AS_LINE_TERMINATOR && enc.isMbcCrnl(bytes, p, end)) break;
+ // goto retry_gate;
+ pprev = p;
+ p += enc.length(bytes[p]);
+ continue retry;
+ }
+ }
+ break;
+ } // switch
+ }
+
+ if (regex.dMax == 0) {
+ low = p;
+ if (lowPrev != null) { // ??? // remove null checks
+ if (low > s) {
+ lowPrev.value = enc.prevCharHead(bytes, s, p);
+ } else {
+ lowPrev.value = enc.prevCharHead(bytes, (pprev != -1) ? pprev : str, p);
+ }
+ }
+ } else {
+ if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
+ low = p - regex.dMax;
+
+ if (low > s) {
+ low = enc.rightAdjustCharHeadWithPrev(bytes, s, low, lowPrev);
+ if (lowPrev != null && lowPrev.value == -1) {
+ lowPrev.value = enc.prevCharHead(bytes, (pprev != -1) ? pprev : s, low);
+ }
+ } else {
+ if (lowPrev != null) {
+ lowPrev.value = enc.prevCharHead(bytes, (pprev != -1) ? pprev : str, low);
+ }
+ }
+ }
+ }
+ /* no needs to adjust *high, *high is used as range check only */
+ high = p - regex.dMin;
+
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("forward_search_range success: "+
+ "low: " + (low - str) +
+ ", high: " + (high - str) +
+ ", dmin: " + regex.dMin +
+ ", dmax: " + regex.dMax);
+ }
+
+ return true; /* success */
+ }
+
+ return false; /* fail */
+ } //while
+ }
+
+ // low, high
+ private boolean backwardSearchRange(byte[]bytes, int str, int end, int s, int range, int adjrange) {
+ range += regex.dMin;
+ int p = s;
+
+ retry:while (true) {
+ p = regex.searchAlgorithm.searchBackward(regex, bytes, range, adjrange, end, p, s, range);
+
+ if (p != -1) {
+ if (regex.subAnchor != 0) {
+ switch (regex.subAnchor) {
+ case AnchorType.BEGIN_LINE:
+ if (p != str) {
+ int prev = enc.prevCharHead(bytes, str, p);
+ if (!enc.isNewLine(bytes, prev, end)) {
+ p = prev;
+ continue retry;
+ }
+ }
+ break;
+
+ case AnchorType.END_LINE:
+ if (p == end) {
+ if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
+ int prev = enc.prevCharHead(bytes, adjrange, p);
+ if (prev == -1) return false;
+ if (enc.isNewLine(bytes, prev, end)) {
+ p = prev;
+ continue retry;
+ }
+ }
+ } else if (!enc.isNewLine(bytes, p, end)) {
+ if (Config.USE_CRNL_AS_LINE_TERMINATOR && enc.isMbcCrnl(bytes, p, end)) break;
+ p = enc.prevCharHead(bytes, adjrange, p);
+ if (p == -1) return false;
+ continue retry;
+ }
+ break;
+ } // switch
+ }
+
+ /* no needs to adjust *high, *high is used as range check only */
+ if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
+ low = p - regex.dMax;
+ high = p - regex.dMin;
+ high = enc.rightAdjustCharHead(bytes, adjrange, high);
+ }
+
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("backward_search_range: "+
+ "low: " + (low - str) +
+ ", high: " + (high - str));
+ }
+
+ return true;
+ }
+
+ if (Config.DEBUG_SEARCH) Config.log.println("backward_search_range: fail.");
+ return false;
+ } // while
+ }
+
+ // MATCH_AND_RETURN_CHECK
+ private boolean matchCheck(int upperRange, int s, int prev) {
+ if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
+ if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
+ //range = upperRange;
+ if (matchAt(upperRange, s, prev) != -1) {
+ if (!isFindLongest(option)) return true;
+ }
+ } else {
+ //range = upperRange;
+ if (matchAt(upperRange, s, prev) != -1) return true;
+ }
+ } else {
+ if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
+ if (matchAt(end, s, prev) != -1) {
+ //range = upperRange;
+ if (!isFindLongest(option)) return true;
+ }
+ } else {
+ //range = upperRange;
+ if (matchAt(end, s, prev) != -1) return true;
+ }
+ }
+ return false;
+ }
+
+ public final int search(int start, int range, int option) {
+ int s, prev;
+ int origStart = start;
+ int origRange = range;
+
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("onig_search (entry point): "+
+ "str: " + str +
+ ", end: " + (end - str) +
+ ", start: " + (start - str) +
+ ", range " + (range - str));
+ }
+
+ if (start > end || start < str) return -1;
+
+ /* anchor optimize: resume search range */
+ if (regex.anchor != 0 && str < end) {
+ int minSemiEnd, maxSemiEnd;
+
+ if ((regex.anchor & AnchorType.BEGIN_POSITION) != 0) {
+ /* search start-position only */
+ // !begin_position:!
+ if (range > start) {
+ range = start + 1;
+ } else {
+ range = start;
+ }
+ } else if ((regex.anchor & AnchorType.BEGIN_BUF) != 0) {
+ /* search str-position only */
+ if (range > start) {
+ if (start != str) return -1; // mismatch_no_msa;
+ range = str + 1;
+ } else {
+ if (range <= str) {
+ start = str;
+ range = str;
+ } else {
+ return -1; // mismatch_no_msa;
+ }
+ }
+ } else if ((regex.anchor & AnchorType.END_BUF) != 0) {
+ minSemiEnd = maxSemiEnd = end;
+ // !end_buf:!
+ if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
+ } else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) {
+ int preEnd = enc.stepBack(bytes, str, end, 1);
+ maxSemiEnd = end;
+ if (enc.isNewLine(bytes, preEnd, end)) {
+ minSemiEnd = preEnd;
+ if (Config.USE_CRNL_AS_LINE_TERMINATOR) {
+ preEnd = enc.stepBack(bytes, str, preEnd, 1);
+ if (preEnd != -1 && enc.isMbcCrnl(bytes, preEnd, end)) {
+ minSemiEnd = preEnd;
+ }
+ }
+ if (minSemiEnd > str && start <= minSemiEnd) {
+ // !goto end_buf;!
+ if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
+ }
+ } else {
+ minSemiEnd = end;
+ // !goto end_buf;!
+ if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
+ }
+ } else if ((regex.anchor & AnchorType.ANYCHAR_STAR_ML) != 0) {
+ // goto !begin_position;!
+ if (range > start) {
+ range = start + 1;
+ } else {
+ range = start;
+ }
+ }
+
+ } else if (str == end) { /* empty string */
+ // empty address ?
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("onig_search: empty string.");
+ }
+
+ if (regex.thresholdLength == 0) {
+ s = start = str;
+ prev = -1;
+ msaInit(option, start);
+
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ stateCheckBuff = null;
+ stateCheckBuffSize = 0;
+ }
+
+ if (matchCheck(end, s, prev)) return match(s);
+ return mismatch();
+ }
+ return -1; // goto mismatch_no_msa;
+ }
+
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("onig_search(apply anchor): " +
+ "end: " + (end - str) +
+ ", start " + (start - str) +
+ ", range " + (range - str));
+ }
+
+ msaInit(option, origStart);
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ int offset = Math.min(start, range) - str;
+ stateCheckBuffInit(end - str, offset, regex.numCombExpCheck);
+ }
+
+ s = start;
+ if (range > start) { /* forward search */
+ if (s > str) {
+ prev = enc.prevCharHead(bytes, str, s);
+ } else {
+ prev = 0; // -1
+ }
+
+ if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
+ int schRange = range;
+ if (regex.dMax != 0) {
+ if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
+ schRange = end;
+ } else {
+ schRange += regex.dMax;
+ if (schRange > end) schRange = end;
+ }
+ }
+ if ((end - start) < regex.thresholdLength) return mismatch();
+
+ if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
+ do {
+ if (!forwardSearchRange(bytes, str, end, s, schRange, this)) return mismatch(); // low, high, lowPrev
+ if (s < low) {
+ s = low;
+ prev = value;
+ }
+ while (s <= high) {
+ if (matchCheck(origRange, s, prev)) return match(s); // ???
+ prev = s;
+ s += enc.length(bytes[s]);
+ }
+ } while (s < range);
+ return mismatch();
+
+ } else { /* check only. */
+ if (!forwardSearchRange(bytes, str, end, s, schRange, null)) return mismatch();
+
+ if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) {
+ do {
+ if (matchCheck(origRange, s, prev)) return match(s);
+ prev = s;
+ s += enc.length(bytes[s]);
+
+ while (!enc.isNewLine(bytes, prev, end) && s < range) {
+ prev = s;
+ s += enc.length(bytes[s]);
+ }
+ } while (s < range);
+ return mismatch();
+ }
+
+ }
+ }
+
+ do {
+ if (matchCheck(origRange, s, prev)) return match(s);
+ prev = s;
+ s += enc.length(bytes[s]);
+ } while (s < range);
+
+ if (s == range) { /* because empty match with /$/. */
+ if (matchCheck(origRange, s, prev)) return match(s);
+ }
+ } else { /* backward search */
+ if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
+ if (origStart < end) {
+ origStart += enc.length(bytes[origStart]); // /* is upper range */
+ }
+ }
+
+ if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
+ int adjrange;
+ if (range < end) {
+ adjrange = enc.leftAdjustCharHead(bytes, str, range);
+ } else {
+ adjrange = end;
+ }
+ if (regex.dMax != MinMaxLen.INFINITE_DISTANCE && (end - range) >= regex.thresholdLength) {
+ do {
+ int schStart = s + regex.dMax;
+ if (schStart > end) schStart = end;
+ if (!backwardSearchRange(bytes, str, end, schStart, range, adjrange)) return mismatch(); // low, high
+ if (s > high) s = high;
+ while (s >= low) {
+ prev = enc.prevCharHead(bytes, str, s);
+ if (matchCheck(origStart, s, prev)) return match(s);
+ s = prev;
+ }
+ } while (s >= range);
+ return mismatch();
+ } else { /* check only. */
+ if ((end - range) < regex.thresholdLength) return mismatch();
+
+ int schStart = s;
+ if (regex.dMax != 0) {
+ if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
+ schStart = end;
+ } else {
+ schStart += regex.dMax;
+ if (schStart > end) {
+ schStart = end;
+ } else {
+ schStart = enc.leftAdjustCharHead(bytes, start, schStart);
+ }
+ }
+ }
+ if (!backwardSearchRange(bytes, str, end, schStart, range, adjrange)) return mismatch();
+ }
+ }
+
+ do {
+ prev = enc.prevCharHead(bytes, str, s);
+ if (matchCheck(origStart, s, prev)) return match(s);
+ s = prev;
+ } while (s >= range);
+
+ }
+ return mismatch();
+ }
+
+ private boolean endBuf(int start, int range, int minSemiEnd, int maxSemiEnd) {
+ if ((maxSemiEnd - str) < regex.anchorDmin) return true; // mismatch_no_msa;
+
+ if (range > start) {
+ if ((minSemiEnd - start) > regex.anchorDmax) {
+ start = minSemiEnd - regex.anchorDmax;
+ if (start < end) {
+ start = enc.rightAdjustCharHead(bytes, str, start);
+ } else { /* match with empty at end */
+ start = enc.prevCharHead(bytes, str, end);
+ }
+ }
+ if ((maxSemiEnd - (range - 1)) < regex.anchorDmin) {
+ range = maxSemiEnd - regex.anchorDmin + 1;
+ }
+ if (start >= range) return true; // mismatch_no_msa;
+ } else {
+ if ((minSemiEnd - range) > regex.anchorDmax) {
+ range = minSemiEnd - regex.anchorDmax;
+ }
+ if ((maxSemiEnd - start) < regex.anchorDmin) {
+ start = maxSemiEnd - regex.anchorDmin;
+ start = enc.leftAdjustCharHead(bytes, str, start);
+ }
+ if (range > start) return true; // mismatch_no_msa;
+ }
+ return false;
+ }
+
+ private int match(int s) {
+ return s - str; // sstart ???
+ }
+
+ private int mismatch() {
+ if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
+ if (msaBestLen >= 0) {
+ int s = msaBestS;
+ return match(s);
+ }
+ }
+ // falls through finish:
+ return -1;
+ }
+}
diff --git a/src/org/joni/MinMaxLen.java b/src/org/joni/MinMaxLen.java
new file mode 100644
index 0000000..dca90e6
--- /dev/null
+++ b/src/org/joni/MinMaxLen.java
@@ -0,0 +1,139 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+final class MinMaxLen {
+ int min; /* min byte length */
+ int max; /* max byte length */
+
+ MinMaxLen() {
+ }
+
+ MinMaxLen(int min, int max) {
+ this.min = min;
+ this.max = max;
+ }
+
+ /* 1000 / (min-max-dist + 1) */
+ private static final short distValues[] = {
+ 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
+ 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
+ 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
+ 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
+ 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
+ 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
+ 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
+ 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
+ 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
+ };
+
+ int distanceValue() {
+ if (max == INFINITE_DISTANCE) return 0;
+ int d = max - min;
+ /* return dist_vals[d] * 16 / (mm->min + 12); */
+ return d < distValues.length ? distValues[d] : 1;
+ }
+
+ int compareDistanceValue(MinMaxLen other, int v1, int v2) {
+ if (v2 <= 0) return -1;
+ if (v1 <= 0) return 1;
+
+ v1 *= distanceValue();
+ v2 *= other.distanceValue();
+
+ if (v2 > v1) return 1;
+ if (v2 < v1) return -1;
+
+ if (other.min < min) return 1;
+ if (other.min > min) return -1;
+ return 0;
+ }
+
+ boolean equal(MinMaxLen other) {
+ return min == other.min && max == other.max;
+ }
+
+ void set(int min, int max) {
+ this.min = min;
+ this.max = max;
+ }
+
+ void clear() {
+ min = max = 0;
+ }
+
+ void copy(MinMaxLen other) {
+ min = other.min;
+ max = other.max;
+ }
+
+ void add(MinMaxLen other) {
+ min = distanceAdd(min, other.min);
+ max = distanceAdd(max, other.max);
+ }
+
+ void addLength(int len) {
+ min = distanceAdd(min, len);
+ max = distanceAdd(max, len);
+ }
+
+ void altMerge(MinMaxLen other) {
+ if (min > other.min) min = other.min;
+ if (max < other.max) max = other.max;
+ }
+
+ static final int INFINITE_DISTANCE = 0x7FFFFFFF;
+ static int distanceAdd(int d1, int d2) {
+ if (d1 == INFINITE_DISTANCE || d2 == INFINITE_DISTANCE) {
+ return INFINITE_DISTANCE;
+ } else {
+ if (d1 <= INFINITE_DISTANCE - d2) return d1 + d2;
+ else return INFINITE_DISTANCE;
+ }
+ }
+
+ static int distanceMultiply(int d, int m) {
+ if (m == 0) return 0;
+ if (d < INFINITE_DISTANCE / m) {
+ return d * m;
+ } else {
+ return INFINITE_DISTANCE;
+ }
+ }
+
+ static String distanceRangeToString(int a, int b) {
+ String s = "";
+ if (a == INFINITE_DISTANCE) {
+ s += "inf";
+ } else {
+ s += "(" + a + ")";
+ }
+
+ s += "-";
+
+ if (b == INFINITE_DISTANCE) {
+ s += "inf";
+ } else {
+ s += "(" + b + ")";
+ }
+ return s;
+ }
+}
diff --git a/src/org/joni/NameEntry.java b/src/org/joni/NameEntry.java
new file mode 100644
index 0000000..b5aacac
--- /dev/null
+++ b/src/org/joni/NameEntry.java
@@ -0,0 +1,84 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+public final class NameEntry {
+ static final int INIT_NAME_BACKREFS_ALLOC_NUM = 8;
+
+ final byte[]name;
+ final int nameP;
+ final int nameEnd;
+
+ int backNum;
+ int backRef1;
+ int backRefs[];
+
+ public NameEntry(byte[]bytes, int p, int end) {
+ name = bytes;
+ nameP = p;
+ nameEnd = end;
+ }
+
+ private void alloc() {
+ backRefs = new int[INIT_NAME_BACKREFS_ALLOC_NUM];
+ }
+
+ private void ensureSize() {
+ if (backNum > backRefs.length) {
+ int[]tmp = new int[backRefs.length << 1];
+ System.arraycopy(backRefs, 0, tmp, 0, backRefs.length);
+ backRefs = tmp;
+ }
+ }
+
+ public void addBackref(int backRef) {
+ backNum++;
+
+ switch (backNum) {
+ case 1:
+ backRef1 = backRef;
+ break;
+ case 2:
+ alloc();
+ backRefs[0] = backRef1;
+ backRefs[1] = backRef;
+ break;
+ default:
+ ensureSize();
+ backRefs[backNum - 1] = backRef;
+ }
+ }
+
+ public String toString() {
+ StringBuilder buff = new StringBuilder(new String(name, nameP, nameEnd - nameP) + " ");
+ if (backNum == 0) {
+ buff.append("-");
+ } else if (backNum == 1){
+ buff.append(backRef1);
+ } else {
+ for (int i=0; i<backNum; i++){
+ if (i > 0) buff.append(", ");
+ buff.append(backRefs[i]);
+ }
+ }
+ return buff.toString();
+ }
+
+}
diff --git a/src/org/joni/NodeOptInfo.java b/src/org/joni/NodeOptInfo.java
new file mode 100644
index 0000000..a8573c2
--- /dev/null
+++ b/src/org/joni/NodeOptInfo.java
@@ -0,0 +1,126 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.encoding.Encoding;
+
+public final class NodeOptInfo {
+ final MinMaxLen length = new MinMaxLen();
+ final OptAnchorInfo anchor = new OptAnchorInfo();
+ final OptExactInfo exb = new OptExactInfo(); /* boundary */
+ final OptExactInfo exm = new OptExactInfo(); /* middle */
+ final OptExactInfo expr = new OptExactInfo(); /* prec read (?=...) */
+ final OptMapInfo map = new OptMapInfo(); /* boundary */
+
+ public void setBoundNode(MinMaxLen mmd) {
+ exb.mmd.copy(mmd);
+ expr.mmd.copy(mmd);
+ map.mmd.copy(mmd);
+ }
+
+ public void clear() {
+ length.clear();
+ anchor.clear();
+ exb.clear();
+ exm.clear();
+ expr.clear();
+ map.clear();
+ }
+
+ public void copy(NodeOptInfo other) {
+ length.copy(other.length);
+ anchor.copy(other.anchor);
+ exb.copy(other.exb);
+ exm.copy(other.exm);
+ expr.copy(other.expr);
+ map.copy(other.map);
+ }
+
+ public void concatLeftNode(NodeOptInfo other, Encoding enc) {
+ OptAnchorInfo tanchor = new OptAnchorInfo(); // remove it somehow ?
+ tanchor.concat(anchor, other.anchor, length.max, other.length.max);
+ anchor.copy(tanchor);
+
+ if (other.exb.length > 0 && length.max == 0) {
+ tanchor.concat(anchor, other.exb.anchor, length.max, other.length.max);
+ other.exb.anchor.copy(tanchor);
+ }
+
+ if (other.map.value > 0 && length.max == 0) {
+ if (other.map.mmd.max == 0) {
+ other.map.anchor.leftAnchor |= anchor.leftAnchor;
+ }
+ }
+
+ boolean exbReach = exb.reachEnd;
+ boolean exmReach = exm.reachEnd;
+
+ if (other.length.max != 0) {
+ exb.reachEnd = exm.reachEnd = false;
+ }
+
+ if (other.exb.length > 0) {
+ if (exbReach) {
+ exb.concat(other.exb, enc);
+ other.exb.clear();
+ } else if (exmReach) {
+ exm.concat(other.exb, enc);
+ other.exb.clear();
+ }
+ }
+
+ exm.select(other.exb, enc);
+ exm.select(other.exm, enc);
+
+ if (expr.length > 0) {
+ if (other.length.max > 0) {
+ if (expr.length > other.length.max) expr.length = other.length.max;
+
+ if (expr.mmd.max == 0) {
+ exb.select(expr, enc);
+ } else {
+ exm.select(expr, enc);
+ }
+ }
+ } else if (other.expr.length > 0) {
+ expr.copy(other.expr);
+ }
+
+ map.select(other.map);
+
+ length.add(other.length);
+ }
+
+ public void altMerge(NodeOptInfo other, OptEnvironment env) {
+ anchor.altMerge(other.anchor);
+ exb.altMerge(other.exb, env);
+ exm.altMerge(other.exm, env);
+ expr.altMerge(other.expr, env);
+ map.altMerge(other.map, env.enc);
+ length.altMerge(other.length);
+ }
+
+ public void setBound(MinMaxLen mmd) {
+ exb.mmd.copy(mmd);
+ expr.mmd.copy(mmd);
+ map.mmd.copy(mmd);
+ }
+
+}
diff --git a/src/org/joni/OptAnchorInfo.java b/src/org/joni/OptAnchorInfo.java
new file mode 100644
index 0000000..8b9ff97
--- /dev/null
+++ b/src/org/joni/OptAnchorInfo.java
@@ -0,0 +1,94 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.constants.AnchorType;
+
+final class OptAnchorInfo implements AnchorType {
+ int leftAnchor;
+ int rightAnchor;
+
+ void clear() {
+ leftAnchor = rightAnchor = 0;
+ }
+
+ void copy(OptAnchorInfo other) {
+ leftAnchor = other.leftAnchor;
+ rightAnchor = other.rightAnchor;
+ }
+
+ void concat(OptAnchorInfo left, OptAnchorInfo right, int leftLength, int rightLength) {
+ clear(); // ??? remove
+
+ leftAnchor = left.leftAnchor;
+ if (leftLength == 0) leftAnchor |= right.leftAnchor;
+
+ rightAnchor = right.rightAnchor;
+ if (rightLength == 0) rightAnchor |= left.rightAnchor;
+ }
+
+ boolean isSet(int anchor) {
+ if ((leftAnchor & anchor) != 0) return true;
+ return (rightAnchor & anchor) != 0;
+ }
+
+ void add(int anchor) {
+ if (isLeftAnchor(anchor)) {
+ leftAnchor |= anchor;
+ } else {
+ rightAnchor |= anchor;
+ }
+ }
+
+ void remove(int anchor) {
+ if (isLeftAnchor(anchor)) {
+ leftAnchor &= ~anchor;
+ } else {
+ rightAnchor &= ~anchor;
+ }
+ }
+
+ void altMerge(OptAnchorInfo other) {
+ leftAnchor &= other.leftAnchor;
+ rightAnchor &= other.rightAnchor;
+ }
+
+ static boolean isLeftAnchor(int anchor) { // make a mask for it ?
+ return !(anchor == END_BUF || anchor == SEMI_END_BUF ||
+ anchor == END_LINE || anchor == PREC_READ ||
+ anchor == PREC_READ_NOT);
+ }
+
+ static String anchorToString(int anchor) {
+ StringBuffer s = new StringBuffer("[");
+
+ if ((anchor & AnchorType.BEGIN_BUF) !=0 ) s.append("begin-buf ");
+ if ((anchor & AnchorType.BEGIN_LINE) !=0 ) s.append("begin-line ");
+ if ((anchor & AnchorType.BEGIN_POSITION) !=0 ) s.append("begin-pos ");
+ if ((anchor & AnchorType.END_BUF) !=0 ) s.append("end-buf ");
+ if ((anchor & AnchorType.SEMI_END_BUF) !=0 ) s.append("semi-end-buf ");
+ if ((anchor & AnchorType.END_LINE) !=0 ) s.append("end-line ");
+ if ((anchor & AnchorType.ANYCHAR_STAR) !=0 ) s.append("anychar-star ");
+ if ((anchor & AnchorType.ANYCHAR_STAR_ML) !=0 ) s.append("anychar-star-pl ");
+ s.append("]");
+
+ return s.toString();
+ }
+}
diff --git a/src/org/joni/OptEnvironment.java b/src/org/joni/OptEnvironment.java
new file mode 100644
index 0000000..3461937
--- /dev/null
+++ b/src/org/joni/OptEnvironment.java
@@ -0,0 +1,39 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.encoding.Encoding;
+
+// remove this one in future and pass mmd directly
+final class OptEnvironment {
+ final MinMaxLen mmd = new MinMaxLen();
+ Encoding enc;
+ int options;
+ int caseFoldFlag;
+ ScanEnvironment scanEnv;
+
+ void copy(OptEnvironment other) {
+ mmd.copy(other.mmd);
+ enc = other.enc;
+ options = other.options;
+ caseFoldFlag = other.caseFoldFlag;
+ scanEnv = other.scanEnv;
+ }
+}
diff --git a/src/org/joni/OptExactInfo.java b/src/org/joni/OptExactInfo.java
new file mode 100644
index 0000000..33123f4
--- /dev/null
+++ b/src/org/joni/OptExactInfo.java
@@ -0,0 +1,172 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.encoding.Encoding;
+
+final class OptExactInfo {
+ static final int OPT_EXACT_MAXLEN = 24;
+
+ final MinMaxLen mmd = new MinMaxLen();
+ final OptAnchorInfo anchor = new OptAnchorInfo();
+
+ boolean reachEnd;
+ boolean ignoreCase;
+ int length;
+
+ final byte s[] = new byte[OPT_EXACT_MAXLEN];
+
+ boolean isFull() {
+ return length >= OPT_EXACT_MAXLEN;
+ }
+
+ void clear() {
+ mmd.clear();
+ anchor.clear();
+
+ reachEnd = false;
+ ignoreCase = false;
+ length = 0;
+ s[0] = 0; // ???
+ }
+
+ void copy(OptExactInfo other) {
+ mmd.copy(other.mmd);
+ anchor.copy(other.anchor);
+ reachEnd = other.reachEnd;
+ ignoreCase = other.ignoreCase;
+ length = other.length;
+
+ System.arraycopy(other.s, 0, s, 0, OPT_EXACT_MAXLEN);
+ }
+
+ void concat(OptExactInfo other, Encoding enc) {
+ if (!ignoreCase && other.ignoreCase) {
+ if (length >= other.length) return; /* avoid */
+ ignoreCase = true;
+ }
+
+ int p = 0; // add->s;
+ int end = p + other.length;
+
+ int i;
+ for (i=length; p < end;) {
+ int len = enc.length(other.s[p]);
+ if (i + len > OPT_EXACT_MAXLEN) break;
+ for (int j=0; j<len && p < end; j++) {
+ s[i++] = other.s[p++]; // arraycopy or even don't copy anything ??
+ }
+ }
+
+ length = i;
+ reachEnd = (p == end ? other.reachEnd : false);
+
+ // !!! remove this temporary when we know it's safe
+ OptAnchorInfo tmp = new OptAnchorInfo();
+ tmp.concat(anchor, other.anchor, 1, 1);
+ if (!other.reachEnd) tmp.rightAnchor = 0;
+ anchor.copy(tmp);
+ }
+
+ // ?? raw is not used here
+ void concatStr(byte[]bytes, int p, int end, boolean raw, Encoding enc) {
+ int i;
+ for (i = length; p < end && i < OPT_EXACT_MAXLEN;) {
+ int len = enc.length(bytes[p]);
+ if (i + len > OPT_EXACT_MAXLEN) break;
+ for (int j=0; j<len && p < end; j++) {
+ s[i++] = bytes[p++];
+ }
+ }
+
+ length = i;
+ }
+
+ void altMerge(OptExactInfo other, OptEnvironment env) {
+ if (other.length == 0 || length == 0) {
+ clear();
+ return;
+ }
+
+ if (!mmd.equal(other.mmd)) {
+ clear();
+ return;
+ }
+
+ int i;
+ for (i=0; i<length && i<other.length;) {
+ if (s[i] != other.s[i]) break;
+ int len = env.enc.length(s[i]);
+
+ int j;
+ for (j=1; j<len; j++) {
+ if (s[i+j] != other.s[i+j]) break;
+ }
+
+ if (j < len) break;
+ i += len;
+ }
+
+ if (!other.reachEnd || i<other.length || i<length) reachEnd = false;
+
+ length = i;
+ ignoreCase |= other.ignoreCase;
+
+ anchor.altMerge(other.anchor);
+
+ if (!reachEnd) anchor.rightAnchor = 0;
+ }
+
+
+ void select(OptExactInfo alt, Encoding enc) {
+ int v1 = length;
+ int v2 = alt.length;
+
+ if (v2 == 0) {
+ return;
+ } else if (v1 == 0) {
+ copy(alt);
+ return;
+ } else if (v1 <= 2 && v2 <= 2) {
+ /* ByteValTable[x] is big value --> low price */
+ v2 = OptMapInfo.positionValue(enc, s[0] & 0xff);
+ v1 = OptMapInfo.positionValue(enc, alt.s[0] & 0xff);
+
+ if (length > 1) v1 += 5;
+ if (alt.length > 1) v2 += 5;
+ }
+
+ if (!ignoreCase) v1 *= 2;
+ if (!alt.ignoreCase) v2 *= 2;
+
+ if (mmd.compareDistanceValue(alt.mmd, v1, v2) > 0) copy(alt);
+ }
+
+ // comp_opt_exact_or_map_info
+ private static final int COMP_EM_BASE = 20;
+ int compare(OptMapInfo m) {
+ if (m.value <= 0) return -1;
+
+ int ve = COMP_EM_BASE * length * (ignoreCase ? 1 : 2);
+ int vm = COMP_EM_BASE * 5 * 2 / m.value;
+
+ return mmd.compareDistanceValue(m.mmd, ve, vm);
+ }
+}
diff --git a/src/org/joni/OptMapInfo.java b/src/org/joni/OptMapInfo.java
new file mode 100644
index 0000000..6950a42
--- /dev/null
+++ b/src/org/joni/OptMapInfo.java
@@ -0,0 +1,128 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.encoding.Encoding;
+
+final class OptMapInfo {
+
+ final MinMaxLen mmd = new MinMaxLen(); /* info position */
+ final OptAnchorInfo anchor = new OptAnchorInfo();
+
+ int value; /* weighted value */
+ final byte map[] = new byte[Config.CHAR_TABLE_SIZE];
+
+ void clear() {
+ mmd.clear();
+ anchor.clear();
+ value = 0;
+ for (int i=0; i<map.length; i++) map[i] = 0;
+ }
+
+ void copy(OptMapInfo other) {
+ mmd.copy(other.mmd);
+ anchor.copy(other.anchor);
+ value = other.value;
+ //for(int i=0; i<map.length; i++) map[i] = other.map[i];
+ System.arraycopy(other.map, 0, map, 0, other.map.length);
+ }
+
+ void addChar(byte c, Encoding enc) {
+ int c_ = c & 0xff;
+ if (map[c_] == 0) {
+ map[c_] = 1;
+ value += positionValue(enc, c_);
+ }
+ }
+
+ void addCharAmb(byte[]bytes, int p, int end, Encoding enc, int caseFoldFlag) {
+ addChar(bytes[p], enc);
+
+ caseFoldFlag &= ~Config.INTERNAL_ENC_CASE_FOLD_MULTI_CHAR;
+ CaseFoldCodeItem[]items = enc.caseFoldCodesByString(caseFoldFlag, bytes, p, end);
+
+ byte[] buf = new byte[Config.ENC_CODE_TO_MBC_MAXLEN];
+ for (int i=0; i<items.length; i++) {
+ enc.codeToMbc(items[i].code[0], buf, 0);
+ addChar(buf[0], enc);
+ }
+ }
+
+ // select_opt_map_info
+ private static final int z = 1<<15; /* 32768: something big value */
+ void select(OptMapInfo alt) {
+ if (alt.value == 0) return;
+ if (value == 0) {
+ copy(alt);
+ return;
+ }
+
+ int v1 = z / value;
+ int v2 = z /alt.value;
+
+ if (mmd.compareDistanceValue(alt.mmd, v1, v2) > 0) copy(alt);
+ }
+
+ // alt_merge_opt_map_info
+ void altMerge(OptMapInfo other, Encoding enc) {
+ /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
+ if (value == 0) return;
+ if (other.value == 0 || mmd.max < other.mmd.max) {
+ clear();
+ return;
+ }
+
+ mmd.altMerge(other.mmd);
+
+ int val = 0;
+ for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
+ if (other.map[i] != 0) map[i] = 1;
+ if (map[i] != 0) val += positionValue(enc, i);
+ }
+
+ value = val;
+ anchor.altMerge(other.anchor);
+ }
+
+ static final short ByteValTable[] = {
+ 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
+ 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
+ 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
+ };
+
+ // map_position_value
+ static int positionValue(Encoding enc, int i) {
+ if (i < ByteValTable.length) {
+ if (i == 0 && enc.minLength() > 1) {
+ return 20;
+ } else {
+ return ByteValTable[i];
+ }
+ } else {
+ return 4; /* Take it easy. */
+ }
+ }
+
+}
diff --git a/src/org/joni/Option.java b/src/org/joni/Option.java
new file mode 100644
index 0000000..adee24f
--- /dev/null
+++ b/src/org/joni/Option.java
@@ -0,0 +1,122 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+public class Option {
+
+ /* options */
+ public static final int NONE = 0;
+ public static final int IGNORECASE = (1<<0);
+ public static final int EXTEND = (1<<1);
+ public static final int MULTILINE = (1<<2);
+ public static final int SINGLELINE = (1<<3);
+ public static final int FIND_LONGEST = (1<<4);
+ public static final int FIND_NOT_EMPTY = (1<<5);
+ public static final int NEGATE_SINGLELINE = (1<<6);
+ public static final int DONT_CAPTURE_GROUP = (1<<7);
+ public static final int CAPTURE_GROUP = (1<<8);
+
+ /* options (search time) */
+ public static final int NOTBOL = (1<<9);
+ public static final int NOTEOL = (1<<10);
+ public static final int POSIX_REGION = (1<<11);
+ public static final int MAXBIT = (1<<12); /* limit */
+
+ public static final int DEFAULT = NONE;
+
+ public static String toString(int option) {
+ String options = "";
+ if (isIgnoreCase(option)) options += "IGNORECASE ";
+ if (isExtend(option)) options += "EXTEND ";
+ if (isMultiline(option)) options += "MULTILINE ";
+ if (isSingleline(option)) options += "SINGLELINE ";
+ if (isFindLongest(option)) options += "FIND_LONGEST ";
+ if (isFindNotEmpty(option)) options += "FIND_NOT_EMPTY ";
+ if (isNegateSingleline(option)) options += "NEGATE_SINGLELINE ";
+ if (isDontCaptureGroup(option)) options += "DONT_CAPTURE_GROUP ";
+ if (isCaptureGroup(option)) options += "CAPTURE_GROUP ";
+
+ if (isNotBol(option)) options += "NOTBOL ";
+ if (isNotEol(option)) options += "NOTEOL ";
+ if (isPosixRegion(option)) options += "POSIX_REGION ";
+
+ return options;
+ }
+
+ public static boolean isIgnoreCase(int option) {
+ return (option & IGNORECASE) != 0;
+ }
+
+ public static boolean isExtend(int option) {
+ return (option & EXTEND) != 0;
+ }
+
+ public static boolean isSingleline(int option) {
+ return (option & SINGLELINE) != 0;
+ }
+
+ public static boolean isMultiline(int option) {
+ return (option & MULTILINE) != 0;
+ }
+
+ public static boolean isFindLongest(int option) {
+ return (option & FIND_LONGEST) != 0;
+ }
+
+ public static boolean isFindNotEmpty(int option) {
+ return (option & FIND_NOT_EMPTY) != 0;
+ }
+
+ public static boolean isFindCondition(int option) {
+ return (option & (FIND_LONGEST | FIND_NOT_EMPTY)) != 0;
+ }
+
+ public static boolean isNegateSingleline(int option) {
+ return (option & NEGATE_SINGLELINE) != 0;
+ }
+
+ public static boolean isDontCaptureGroup(int option) {
+ return (option & DONT_CAPTURE_GROUP) != 0;
+ }
+
+ public static boolean isCaptureGroup(int option) {
+ return (option & CAPTURE_GROUP) != 0;
+ }
+
+ public static boolean isNotBol(int option) {
+ return (option & NOTBOL) != 0;
+ }
+
+ public static boolean isNotEol(int option) {
+ return (option & NOTEOL) != 0;
+ }
+
+ public static boolean isPosixRegion(int option) {
+ return (option & POSIX_REGION) != 0;
+ }
+
+ /* OP_SET_OPTION is required for these options. ??? */
+ // public static boolean isDynamic(int option) {
+ // return (option & (MULTILINE | IGNORECASE)) != 0;
+ // }
+ public static boolean isDynamic(int option) {
+ return false;
+ }
+}
diff --git a/src/org/joni/Parser.java b/src/org/joni/Parser.java
new file mode 100644
index 0000000..6b1d696
--- /dev/null
+++ b/src/org/joni/Parser.java
@@ -0,0 +1,1032 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.BitStatus.bsOnAtSimple;
+import static org.joni.BitStatus.bsOnOff;
+import static org.joni.Option.isDontCaptureGroup;
+import static org.joni.Option.isIgnoreCase;
+
+import org.joni.ast.AnchorNode;
+import org.joni.ast.AnyCharNode;
+import org.joni.ast.BackRefNode;
+import org.joni.ast.CClassNode;
+import org.joni.ast.CTypeNode;
+import org.joni.ast.CallNode;
+import org.joni.ast.ConsAltNode;
+import org.joni.ast.EncloseNode;
+import org.joni.ast.Node;
+import org.joni.ast.QuantifierNode;
+import org.joni.ast.StringNode;
+import org.joni.ast.CClassNode.CCStateArg;
+import org.joni.constants.AnchorType;
+import org.joni.constants.CCSTATE;
+import org.joni.constants.CCVALTYPE;
+import org.joni.constants.CharacterType;
+import org.joni.constants.EncloseType;
+import org.joni.constants.NodeType;
+import org.joni.constants.PosixBracket;
+import org.joni.constants.TokenType;
+
+class Parser extends Lexer {
+
+ protected final Regex regex;
+ protected Node root;
+
+ protected int returnCode; // return code used by parser methods (they itself return parsed nodes)
+ // this approach will not affect recursive calls
+
+ protected Parser(ScanEnvironment env, byte[]bytes, int p, int end) {
+ super(env, bytes, p, end);
+ regex = env.reg;
+ }
+
+ // onig_parse_make_tree
+ protected final Node parse() {
+ root = parseRegexp();
+ regex.numMem = env.numMem;
+ return root;
+ }
+
+ private static final int POSIX_BRACKET_NAME_MIN_LEN = 4;
+ private static final int POSIX_BRACKET_CHECK_LIMIT_LENGTH = 20;
+ private static final byte BRACKET_END[] = ":]".getBytes();
+ private boolean parsePosixBracket(CClassNode cc) {
+ mark();
+
+ boolean not;
+ if (peekIs('^')) {
+ inc();
+ not = true;
+ } else {
+ not = false;
+ }
+ if (enc.strLength(bytes, p, stop) >= POSIX_BRACKET_NAME_MIN_LEN + 3) { // else goto not_posix_bracket
+ byte[][] pbs= PosixBracket.PBSNamesLower;
+ for (int i=0; i<pbs.length; i++) {
+ byte[]name = pbs[i];
+ // hash lookup here ?
+ if (enc.strNCmp(bytes, p, stop, name, 0, name.length) == 0) {
+ p = enc.step(bytes, p, stop, name.length);
+ if (enc.strNCmp(bytes, p, stop, BRACKET_END, 0, BRACKET_END.length) != 0) {
+ newSyntaxException(ERR_INVALID_POSIX_BRACKET_TYPE);
+ }
+ cc.addCType(PosixBracket.PBSValues[i], not, env, this);
+ inc();
+ inc();
+ return false;
+ }
+ }
+
+ }
+
+ // not_posix_bracket:
+ c = 0;
+ int i= 0;
+ while(left() && ((c=peek()) != ':') && c != ']') {
+ inc();
+ if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
+ }
+
+ if (c == ':' && left()) {
+ inc();
+ if (left()) {
+ fetch();
+ if (c == ']') newSyntaxException(ERR_INVALID_POSIX_BRACKET_TYPE);
+ }
+ }
+ restore();
+ return true; /* 1: is not POSIX bracket, but no error. */
+ }
+
+ private CClassNode parseCharProperty() {
+ int ctype = fetchCharPropertyToCType();
+ CClassNode n = new CClassNode();
+ n.addCType(ctype, false, env, this);
+ if (token.getPropNot()) n.setNot();
+ return n;
+ }
+
+ private boolean codeExistCheck(int code, boolean ignoreEscaped) {
+ mark();
+
+ boolean inEsc = false;
+ while(left()) {
+ if (ignoreEscaped && inEsc) {
+ inEsc = false;
+ } else {
+ fetch();
+ if (c == code) {
+ restore();
+ return true;
+ }
+ if (c == syntax.metaCharTable.esc) inEsc = true;
+ }
+ }
+
+ restore();
+ return false;
+ }
+
+ private CClassNode parseCharClass() {
+ fetchTokenInCC();
+
+ boolean neg;
+ if (token.type == TokenType.CHAR && token.getC() == '^' && !token.escaped) {
+ neg = true;
+ fetchTokenInCC();
+ } else {
+ neg = false;
+ }
+
+ if (token.type == TokenType.CC_CLOSE) {
+ if (!codeExistCheck(']', true)) newSyntaxException(ERR_EMPTY_CHAR_CLASS);
+ env.ccEscWarn("]");
+ token.type = TokenType.CHAR; /* allow []...] */
+ }
+
+ CClassNode cc = new CClassNode();
+ CClassNode prevCC = null;
+ CClassNode workCC = null;
+
+ CCStateArg arg = new CCStateArg();
+
+ boolean andStart = false;
+ arg.state = CCSTATE.START;
+
+ while(token.type != TokenType.CC_CLOSE) {
+ boolean fetched = false;
+
+ switch (token.type) {
+
+ case CHAR:
+ int len = enc.codeToMbcLength(token.getC());
+ if (len > 1) {
+ arg.inType = CCVALTYPE.CODE_POINT;
+ } else {
+ // !sb_char:!
+ arg.inType = CCVALTYPE.SB;
+ }
+ arg.v = token.getC();
+ arg.vIsRaw = false;
+ // !goto val_entry2;!
+ valEntry2(cc, arg);
+ break;
+
+ case RAW_BYTE:
+ /* tok->base != 0 : octal or hexadec. */
+ if (!enc.isSingleByte() && token.base != 0) {
+ byte[]buf = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN];
+ int psave = p;
+ int base = token.base;
+ buf[0] = (byte)token.getC();
+ int i;
+ for (i=1; i<enc.maxLength(); i++) {
+ fetchTokenInCC();
+ if (token.type != TokenType.RAW_BYTE || token.base != base) {
+ fetched = true;
+ break;
+ }
+ buf[i] = (byte)token.getC();
+ }
+ if (i < enc.minLength()) newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);
+
+ len = enc.length(buf[0]);
+ if (i < len) {
+ newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);
+ } else if (i > len) { /* fetch back */
+ p = psave;
+ for (i=1; i<len; i++) fetchTokenInCC();
+ fetched = false;
+ }
+ if (i == 1) {
+ arg.v = buf[0] & 0xff;
+ // !goto raw_single!
+ arg.inType = CCVALTYPE.SB;
+ } else {
+ arg.v = enc.mbcToCode(buf, 0, buf.length);
+ arg.inType = CCVALTYPE.CODE_POINT;
+ }
+ } else {
+ arg.v = token.getC();
+ // !raw_single:!
+ arg.inType = CCVALTYPE.SB;
+ }
+ arg.vIsRaw = true;
+ // !goto val_entry2;!
+ valEntry2(cc, arg);
+ break;
+
+ case CODE_POINT:
+ arg.v = token.getCode();
+ arg.vIsRaw = true;
+ // !val_entry:!
+ // !val_entry2:!
+ valEntry(cc, arg);
+ break;
+
+ case POSIX_BRACKET_OPEN:
+ if (parsePosixBracket(cc)) { /* true: is not POSIX bracket */
+ env.ccEscWarn("[");
+ p = token.backP;
+ arg.v = token.getC();
+ arg.vIsRaw = false;
+ // !goto val_entry;!
+ valEntry(cc, arg);
+ break;
+ }
+ // !goto next_class;!
+ cc.nextStateClass(arg, env);
+ break;
+
+ case CHAR_TYPE:
+ cc.addCType(token.getPropCType(), token.getPropNot(), env, this);
+ // !next_class:!
+ cc.nextStateClass(arg, env);
+ break;
+
+ case CHAR_PROPERTY:
+ int ctype = fetchCharPropertyToCType();
+ cc.addCType(ctype, token.getPropNot(), env, this);
+ // !goto next_class;!
+ cc.nextStateClass(arg, env);
+ break;
+
+ case CC_RANGE:
+ if (arg.state == CCSTATE.VALUE) {
+ fetchTokenInCC();
+ fetched = true;
+ if (token.type == TokenType.CC_CLOSE) { /* allow [x-] */
+ // !range_end_val:!
+ // !goto val_entry;!
+ rangeEndVal(cc, arg);
+ break;
+ } else if (token.type == TokenType.CC_AND) {
+ env.ccEscWarn("-");
+ // goto !range_end_val;!
+ rangeEndVal(cc, arg);
+ break;
+ }
+ arg.state = CCSTATE.RANGE;
+ } else if (arg.state == CCSTATE.START) {
+ /* [-xa] is allowed */
+ arg.v = token.getC();
+ arg.vIsRaw = false;
+ fetchTokenInCC();
+ fetched = true;
+ /* [--x] or [a&&-x] is warned. */
+ if (token.type == TokenType.CC_RANGE || andStart) env.ccEscWarn("-");
+ // !goto val_entry;!
+ valEntry(cc, arg);
+ break;
+ } else if (arg.state == CCSTATE.RANGE) {
+ env.ccEscWarn("-");
+ /* [!--x] is allowed */
+ // !goto sb_char;!
+ sbChar(cc, arg);
+ break;
+ } else { /* CCS_COMPLETE */
+ fetchTokenInCC();
+ fetched = true;
+ if (token.type == TokenType.CC_CLOSE) { /* allow [a-b-] */
+ // goto !range_end_val!
+ rangeEndVal(cc, arg);
+ break;
+ } else if (token.type == TokenType.CC_AND) {
+ env.ccEscWarn("-");
+ // goto !range_end_val;!
+ rangeEndVal(cc, arg);
+ break;
+ }
+
+ if (syntax.allowDoubleRangeOpInCC()) {
+ env.ccEscWarn("-");
+ /* [0-9-a] is allowed as [0-9\-a] */
+ // !goto sb_char!
+ sbChar(cc, arg);
+ break;
+ }
+ newSyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
+ }
+ break;
+
+ case CC_CC_OPEN: /* [ */
+ CClassNode acc = parseCharClass();
+ cc.or(acc, enc);
+ break;
+
+ case CC_AND: /* && */
+ if (arg.state == CCSTATE.VALUE) {
+ arg.v = 0; // ??? safe v ?
+ arg.vIsRaw = false;
+ cc.nextStateValue(arg, env);
+ }
+ /* initialize local variables */
+ andStart = true;
+ arg.state = CCSTATE.START;
+ if (prevCC != null) {
+ prevCC.and(cc, enc);
+ } else {
+ prevCC = cc;
+ if (workCC == null) workCC = new CClassNode();
+ cc = workCC;
+ }
+ // initialize_cclass(cc); // clear it ??
+ break;
+
+ case EOT:
+ newSyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS);
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ } // switch
+
+ if (!fetched) fetchTokenInCC();
+
+ } // while
+
+ if (arg.state == CCSTATE.VALUE) {
+ arg.v = 0; // ??? safe v ?
+ arg.vIsRaw = false;
+ cc.nextStateValue(arg, env);
+ }
+
+ if (prevCC != null) {
+ prevCC.and(cc, enc);
+ cc = prevCC;
+ }
+
+ if (neg) {
+ cc.setNot();
+ } else {
+ cc.clearNot();
+ }
+
+ if (cc.isNot() && syntax.notNewlineInNegativeCC()) {
+ if (!cc.isEmpty()) {
+ final int NEW_LINE = 0x0a;
+ if (enc.isNewLine(NEW_LINE)) {
+ if (enc.codeToMbcLength(NEW_LINE) == 1) {
+ cc.bs.set(NEW_LINE);
+ } else {
+ cc.addCodeRange(env, NEW_LINE, NEW_LINE);
+ }
+ }
+ }
+ }
+
+ return cc;
+ }
+
+ private void valEntry2(CClassNode cc, CCStateArg arg) {
+ cc.nextStateValue(arg, env);
+ }
+
+ private void valEntry(CClassNode cc, CCStateArg arg) {
+ int len = enc.codeToMbcLength(arg.v);
+ arg.inType = len == 1 ? CCVALTYPE.SB : CCVALTYPE.CODE_POINT;
+ // !val_entry2:!
+ valEntry2(cc, arg);
+ }
+
+ private void sbChar(CClassNode cc, CCStateArg arg) {
+ arg.inType = CCVALTYPE.SB;
+ arg.v = token.getC();
+ arg.vIsRaw = false;
+ // !goto val_entry2;!
+ valEntry2(cc, arg);
+ }
+
+ private void rangeEndVal(CClassNode cc, CCStateArg arg) {
+ arg.v = '-';
+ arg.vIsRaw = false;
+ // !goto val_entry;!
+ valEntry(cc, arg);
+ }
+
+ private Node parseEnclose(TokenType term) {
+ Node node = null;
+
+ if (!left()) newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
+
+ int option = env.option;
+
+ if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
+ inc();
+ if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
+
+ boolean listCapture = false;
+
+ fetch();
+ switch(c) {
+ case ':': /* (?:...) grouping only */
+ // !group:!
+ fetchToken();
+ node = parseSubExp(term);
+ returnCode = 1; /* group */
+ return node;
+
+ case '=':
+ node = new AnchorNode(AnchorType.PREC_READ);
+ break;
+
+ case '!': /* preceding read */
+ node = new AnchorNode(AnchorType.PREC_READ_NOT);
+ break;
+
+ case '>': /* (?>...) stop backtrack */
+ node = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
+ break;
+
+ case '\'':
+ if (Config.USE_NAMED_GROUP) {
+ if (syntax.op2QMarkLtNamedGroup()) {
+ // !goto named_group1!;
+ listCapture = false;
+ node = namedGroup2(listCapture);
+ break;
+ } else {
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ }
+ break;
+ } // USE_NAMED_GROUP
+ break;
+ case '<': /* look behind (?<=...), (?<!...) */
+ fetch();
+ if (c == '=') {
+ node = new AnchorNode(AnchorType.LOOK_BEHIND);
+ } else if (c == '!') {
+ node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT);
+ } else {
+ if (Config.USE_NAMED_GROUP) {
+ if (syntax.op2QMarkLtNamedGroup()) {
+ unfetch();
+ c = '<';
+
+ // !named_group1:!
+ listCapture = false;
+ // !named_group2:!
+ node = namedGroup2(listCapture);
+ break;
+ } else {
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ }
+
+ } else { // USE_NAMED_GROUP
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ } // USE_NAMED_GROUP
+ }
+ break;
+
+ case '@':
+ if (syntax.op2AtMarkCaptureHistory()) {
+ if (Config.USE_NAMED_GROUP) {
+ if (syntax.op2QMarkLtNamedGroup()) {
+ fetch();
+ if (c == '<' || c == '\'') {
+ listCapture = true;
+ // /* (?@<name>...) */
+ // goto !named_group2;!
+ node = namedGroup2(listCapture);
+ }
+ unfetch();
+ }
+ } // USE_NAMED_GROUP
+ EncloseNode en = new EncloseNode(env.option, false); // node_new_enclose_memory
+ int num = env.addMemEntry();
+ if (num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
+ en.regNum = num;
+ node = en;
+ } else {
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ }
+ break;
+
+ // case 'p': #ifdef USE_POSIXLINE_OPTION
+ case '-':
+ case 'i':
+ case 'm':
+ case 's':
+ case 'x':
+ boolean neg = false;
+ while(true) {
+ switch(c) {
+ case ':':
+ case ')':
+ break;
+
+ case '-':
+ neg = true;
+ break;
+
+ case 'x':
+ option = bsOnOff(option, Option.EXTEND, neg);
+ break;
+
+ case 'i':
+ option = bsOnOff(option, Option.IGNORECASE, neg);
+ break;
+
+ case 's':
+ if (syntax.op2OptionPerl()) {
+ option = bsOnOff(option, Option.MULTILINE, neg);
+ } else {
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ }
+ break;
+
+ case 'm':
+ if (syntax.op2OptionPerl()) {
+ option = bsOnOff(option, Option.SINGLELINE, !neg);
+ } else if (syntax.op2OptionRuby()) {
+ option = bsOnOff(option, Option.MULTILINE, neg);
+ } else {
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ }
+ break;
+
+ // case 'p': #ifdef USE_POSIXLINE_OPTION // not defined
+ // option = bsOnOff(option, Option.MULTILINE|Option.SINGLELINE, neg);
+ // break;
+
+ default:
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ } // switch
+
+ if (c == ')') {
+ EncloseNode en = new EncloseNode(option, 0); // node_new_option
+ node = en;
+ returnCode = 2; /* option only */
+ return node;
+ } else if (c == ':') {
+ int prev = env.option;
+ env.option = option;
+ fetchToken();
+ Node target = parseSubExp(term);
+ env.option = prev;
+ EncloseNode en = new EncloseNode(option, 0); // node_new_option
+ en.setTarget(target);
+ node = en;
+ returnCode = 0;
+ return node;
+ }
+ if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
+ fetch();
+ } // while
+
+ default:
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ } // switch
+
+ } else {
+ if (isDontCaptureGroup(env.option)) {
+ // !goto group;!
+ fetchToken();
+ node = parseSubExp(term);
+ returnCode = 1; /* group */
+ return node;
+ }
+ EncloseNode en = new EncloseNode(env.option, false); // node_new_enclose_memory
+ int num = env.addMemEntry();
+ en.regNum = num;
+ node = en;
+ }
+
+ fetchToken();
+ Node target = parseSubExp(term);
+
+ if (node.getType() == NodeType.ANCHOR) {
+ AnchorNode an = (AnchorNode) node;
+ an.setTarget(target);
+ } else {
+ EncloseNode en = (EncloseNode)node;
+ en.setTarget(target);
+ if (en.type == EncloseType.MEMORY) {
+ /* Don't move this to previous of parse_subexp() */
+ env.setMemNode(en.regNum, node);
+ }
+ }
+ returnCode = 0;
+ return node; // ??
+ }
+
+ private Node namedGroup2(boolean listCapture) {
+ int nm = p;
+ int num = fetchName(c, false);
+ int nameEnd = value;
+ num = env.addMemEntry();
+ if (listCapture && num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
+
+ regex.nameAdd(bytes, nm, nameEnd, num, syntax);
+ EncloseNode en = new EncloseNode(env.option, true); // node_new_enclose_memory
+ en.regNum = num;
+
+ Node node = en;
+
+ if (listCapture) env.captureHistory = bsOnAtSimple(env.captureHistory, num);
+ env.numNamed++;
+ return node;
+ }
+
+ private int nextChar; // hidden var
+ private int findStrPosition(int[]s, int n, int from, int to) {
+ int x;
+ int q;
+ int p = from;
+ int i = 0;
+ while(p < to) {
+ x = enc.mbcToCode(bytes, p, to);
+ q = p + enc.length(bytes[p]);
+ if (x == s[0]) {
+ for (i=1; i<n && q<to; i++) {
+ x = enc.mbcToCode(bytes, q, to);
+ if (x != s[i]) break;
+ q += enc.length(bytes[q]);
+ }
+ if (i >= n) {
+ if (bytes[nextChar] != 0) nextChar = q; // ??????
+ return p;
+ }
+ }
+ p = q;
+ }
+ return -1;
+ }
+
+ private Node parseExp(TokenType term) {
+ if (token.type == term) {
+ //!goto end_of_token;!
+ return new StringNode();
+ }
+
+ Node node = null;
+ boolean group = false;
+
+ switch(token.type) {
+ case ALT:
+ case EOT:
+ // !end_of_token:!
+ return new StringNode(); // node_new_empty
+
+ case SUBEXP_OPEN:
+ node = parseEnclose(TokenType.SUBEXP_CLOSE);
+ if (returnCode == 1) {
+ group = true;
+ } else if (returnCode == 2) { /* option only */
+ int prev = env.option;
+ EncloseNode en = (EncloseNode)node;
+ env.option = en.option;
+ fetchToken();
+ Node target = parseSubExp(term);
+ env.option = prev;
+ en.setTarget(target);
+ return node;
+ }
+ break;
+
+ case SUBEXP_CLOSE:
+ if (!syntax.allowUnmatchedCloseSubexp()) newSyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS);
+
+ if (token.escaped) {
+ // !goto tk_raw_byte;!
+ return parseExpTkRawByte(group);
+ } else {
+ // !goto tk_byte;!
+ return parseExpTkByte(group);
+ }
+
+ case STRING:
+ // !tk_byte:!
+ return parseExpTkByte(group);
+
+ case RAW_BYTE:
+ // !tk_raw_byte:!
+ return parseExpTkRawByte(group);
+
+ case CODE_POINT:
+ byte[]buf = new byte[Config.ENC_CODE_TO_MBC_MAXLEN];
+ int num = enc.codeToMbc(token.getCode(), buf, 0);
+ // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else
+ node = new StringNode(buf, 0, num);
+ break;
+
+ case QUOTE_OPEN:
+ int[]endOp = new int[]{syntax.metaCharTable.esc, 'E'};
+ int qstart = p;
+ int qend = findStrPosition(endOp, endOp.length, qstart, stop); // will set nextChar!!!
+ if (qend == -1) {
+ nextChar = qend = stop;
+ }
+ node = new StringNode(bytes, qstart, qend);
+ p = nextChar;
+ break;
+
+ case CHAR_TYPE:
+ switch(token.getPropCType()) {
+ case CharacterType.WORD:
+ node = new CTypeNode(token.getPropCType(), token.getPropNot());
+ break;
+
+ case CharacterType.SPACE:
+ case CharacterType.DIGIT:
+ case CharacterType.XDIGIT:
+ // #ifdef USE_SHARED_CCLASS_TABLE ... #endif
+ CClassNode ccn = new CClassNode();
+ ccn.addCType(token.getPropCType(), false, env, this);
+ if (token.getPropNot()) ccn.setNot();
+ node = ccn;
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+
+ } // inner switch
+ break;
+
+ case CHAR_PROPERTY:
+ node = parseCharProperty();
+ break;
+
+ case CC_CC_OPEN:
+ CClassNode cc = parseCharClass();
+ node = cc;
+ if (isIgnoreCase(env.option)) {
+ ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc);
+ enc.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg);
+
+ if (arg.altRoot != null) {
+ node = ConsAltNode.newAltNode(node, arg.altRoot);
+ }
+ }
+ break;
+
+ case ANYCHAR:
+ node = new AnyCharNode();
+ break;
+
+ case ANYCHAR_ANYTIME:
+ node = new AnyCharNode();
+ QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
+ qn.setTarget(node);
+ node = qn;
+ break;
+
+ case BACKREF:
+ int[]backRefs = token.getBackrefNum() > 1 ? token.getBackrefRefs() : new int[]{token.getBackrefRef1()};
+ node = new BackRefNode(token.getBackrefNum(),
+ backRefs,
+ token.getBackrefByName(),
+ token.getBackrefExistLevel(), // #ifdef USE_BACKREF_AT_LEVEL
+ token.getBackrefLevel(), // ...
+ env);
+
+ break;
+
+ case CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ int gNum = token.getCallGNum();
+
+ if (gNum < 0) {
+ gNum = backrefRelToAbs(gNum);
+ if (gNum <= 0) newValueException(ERR_INVALID_BACKREF);
+ }
+ node = new CallNode(bytes, token.getCallNameP(), token.getCallNameEnd(), gNum);
+ env.numCall++;
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case ANCHOR:
+ node = new AnchorNode(token.getAnchor()); // possible bug in oniguruma
+ break;
+
+ case OP_REPEAT:
+ case INTERVAL:
+ if (syntax.contextIndepRepeatOps()) {
+ if (syntax.contextInvalidRepeatOps()) {
+ newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED);
+ } else {
+ node = new StringNode(); // node_new_empty
+ }
+ } else {
+ // !goto tk_byte;!
+ return parseExpTkByte(group);
+ }
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ } //switch
+
+ //targetp = node;
+
+ // !re_entry:!
+ fetchToken();
+
+ // !repeat:!
+ return parseExpRepeat(node, group);
+ }
+
+ private Node parseExpTkByte(boolean group) {
+ // !tk_byte:!
+ StringNode node = new StringNode(bytes, token.backP, p);
+ while (true) {
+ fetchToken();
+ if (token.type != TokenType.STRING) break;
+
+ if (token.backP == node.end) {
+ node.end = p; // non escaped character, remain shared, just increase shared range
+ } else {
+ node.cat(bytes, token.backP, p); // non continuous string stream, need to COW
+ }
+ }
+ // !string_end:!
+ // targetp = node;
+ // !goto repeat;!
+ return parseExpRepeat(node, group);
+ }
+
+ private Node parseExpTkRawByte(boolean group) {
+ // !tk_raw_byte:!
+
+ // important: we don't use 0xff mask here neither in the compiler
+ // (in the template string) so we won't have to mask target
+ // strings when comparing against them in the matcher
+ StringNode node = new StringNode((byte)token.getC());
+ node.setRaw();
+
+ int len = 1;
+ while (true) {
+ if (len >= enc.minLength()) {
+ if (len == enc.length(node.bytes[node.p])) {
+ fetchToken();
+ node.clearRaw();
+ // !goto string_end;!
+ return parseExpRepeat(node, group);
+ }
+ }
+
+ fetchToken();
+ if (token.type != TokenType.RAW_BYTE) {
+ /* Don't use this, it is wrong for little endian encodings. */
+ // USE_PAD_TO_SHORT_BYTE_CHAR ...
+
+ newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);
+ }
+
+ // important: we don't use 0xff mask here neither in the compiler
+ // (in the template string) so we won't have to mask target
+ // strings when comparing against them in the matcher
+ node.cat((byte)token.getC());
+ len++;
+ } // while
+ }
+
+ private Node parseExpRepeat(Node target, boolean group) {
+ // !repeat:!
+ while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) {
+ if (target.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
+
+ QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
+ token.getRepeatUpper(),
+ token.type == TokenType.INTERVAL);
+
+ qtfr.greedy = token.getRepeatGreedy();
+ int ret = qtfr.setQuantifier(target, group, env, bytes, getBegin(), getEnd());
+ Node qn = qtfr;
+
+ if (token.getRepeatPossessive()) {
+ EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
+ en.setTarget(qn);
+ qn = en;
+ }
+
+ if (ret == 0) {
+ target = qn;
+ } else if (ret == 2) { /* split case: /abc+/ */
+ target = ConsAltNode.newListNode(target, null);
+ ConsAltNode tmp = ((ConsAltNode)target).setCdr(ConsAltNode.newListNode(qn, null));
+
+ fetchToken();
+ return parseExpRepeatForCar(target, tmp, group);
+ }
+ // !goto re_entry;!
+ fetchToken();
+ }
+ return target;
+ }
+
+ private Node parseExpRepeatForCar(Node top, ConsAltNode target, boolean group) {
+ // !repeat:!
+ while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) {
+ if (target.car.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
+
+ QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
+ token.getRepeatUpper(),
+ token.type == TokenType.INTERVAL);
+
+ qtfr.greedy = token.getRepeatGreedy();
+ int ret = qtfr.setQuantifier(target.car, group, env, bytes, getBegin(), getEnd());
+ Node qn = qtfr;
+
+ if (token.getRepeatPossessive()) {
+ EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
+ en.setTarget(qn);
+ qn = en;
+ }
+
+ if (ret == 0) {
+ target.setCar(qn);
+ } else if (ret == 2) { /* split case: /abc+/ */ //!!! this shouldn't happen here, remove
+ assert false;
+ target.setCar(ConsAltNode.newListNode(target.car, null));
+
+ Node tmp = (((ConsAltNode)(target).car).setCdr(ConsAltNode.newListNode(qn, null)));
+ target = (ConsAltNode)tmp;
+ }
+ // !goto re_entry;!
+ fetchToken();
+ }
+ return top;
+ }
+
+ private Node parseBranch(TokenType term) {
+ Node node = parseExp(term);
+
+ if (token.type == TokenType.EOT || token.type == term || token.type == TokenType.ALT) {
+ return node;
+ } else {
+ ConsAltNode top = ConsAltNode.newListNode(node, null);
+ ConsAltNode t = top;
+
+ while (token.type != TokenType.EOT && token.type != term && token.type != TokenType.ALT) {
+ node = parseExp(term);
+ if (node.getType() == NodeType.LIST) {
+ t.setCdr((ConsAltNode)node);
+ while (((ConsAltNode)node).cdr != null ) node = ((ConsAltNode)node).cdr;
+
+ t = ((ConsAltNode)node);
+ } else {
+ t.setCdr(ConsAltNode.newListNode(node, null));
+ t = t.cdr;
+ }
+ }
+ return top;
+ }
+ }
+
+ /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
+ private Node parseSubExp(TokenType term) {
+ Node node = parseBranch(term);
+
+ if (token.type == term) {
+ return node;
+ } else if (token.type == TokenType.ALT) {
+ ConsAltNode top = ConsAltNode.newAltNode(node, null);
+ ConsAltNode t = top;
+ while (token.type == TokenType.ALT) {
+ fetchToken();
+ node = parseBranch(term);
+
+ t.setCdr(ConsAltNode.newAltNode(node, null));
+ t = t.cdr;
+ }
+
+ if (token.type != term) parseSubExpError(term);
+ return top;
+ } else {
+ parseSubExpError(term);
+ return null; //not reached
+ }
+ }
+
+ private void parseSubExpError(TokenType term) {
+ if (term == TokenType.SUBEXP_CLOSE) {
+ newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
+ } else {
+ newInternalException(ERR_PARSER_BUG);
+ }
+ }
+
+ private Node parseRegexp() {
+ fetchToken();
+ return parseSubExp(TokenType.EOT);
+ }
+}
diff --git a/src/org/joni/Regex.java b/src/org/joni/Regex.java
new file mode 100644
index 0000000..8b82ae1
--- /dev/null
+++ b/src/org/joni/Regex.java
@@ -0,0 +1,480 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.BitStatus.bsAt;
+import static org.joni.Option.isCaptureGroup;
+import static org.joni.Option.isDontCaptureGroup;
+
+import java.util.IllegalFormatConversionException;
+
+import org.joni.constants.AnchorType;
+import org.joni.constants.RegexState;
+import org.joni.encoding.Encoding;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.InternalException;
+import org.joni.exception.ValueException;
+import org.joni.util.BytesHash;
+
+public final class Regex implements RegexState {
+
+ int[] code; /* compiled pattern */
+ int codeLength;
+ boolean stackNeeded;
+ Object[]operands; /* e.g. shared CClassNode */
+ int operandLength;
+
+ int state; /* normal, searching, compiling */ // remove
+ int numMem; /* used memory(...) num counted from 1 */
+ int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
+ int numNullCheck; /* OP_NULL_CHECK_START/END id counter */
+ int numCombExpCheck; /* combination explosion check */
+ int numCall; /* number of subexp call */
+ int captureHistory; /* (?@...) flag (1-31) */
+ int btMemStart; /* need backtrack flag */
+ int btMemEnd; /* need backtrack flag */
+
+ int stackPopLevel;
+
+ int[]repeatRangeLo;
+ int[]repeatRangeHi;
+
+ public final WarnCallback warnings;
+
+ final Encoding enc;
+ int options;
+ //final Syntax syntax;
+ final int caseFoldFlag;
+
+ BytesHash<NameEntry> nameTable; // named entries
+
+ /* optimization info (string search, char-map and anchors) */
+ SearchAlgorithm searchAlgorithm; /* optimize flag */
+ int thresholdLength; /* search str-length for apply optimize */
+ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
+ int anchorDmin; /* (SEMI_)END_BUF anchor distance */
+ int anchorDmax; /* (SEMI_)END_BUF anchor distance */
+ int subAnchor; /* start-anchor for exact or map */
+
+ byte[]exact;
+ int exactP;
+ int exactEnd;
+
+ byte[]map; /* used as BM skip or char-map */
+ int[]intMap; /* BM skip for exact_len > 255 */
+ int[]intMapBackward; /* BM skip for backward search */
+ int dMin; /* min-distance of exact or map */
+ int dMax; /* max-distance of exact or map */
+
+ public Regex(byte[]bytes, int p, int end, int option, Encoding enc) {
+ this(bytes, p, end, option, enc, Syntax.RUBY, WarnCallback.DEFAULT);
+ }
+
+ // onig_new
+ public Regex(byte[]bytes, int p, int end, int option, Encoding enc, Syntax syntax) {
+ this(bytes, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, enc, syntax, WarnCallback.DEFAULT);
+ }
+
+ public Regex(byte[]bytes, int p, int end, int option, Encoding enc, WarnCallback warnings) {
+ this(bytes, p, end, option, enc, Syntax.RUBY, warnings);
+ }
+
+ // onig_new
+ public Regex(byte[]bytes, int p, int end, int option, Encoding enc, Syntax syntax, WarnCallback warnings) {
+ this(bytes, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, enc, syntax, warnings);
+ }
+
+ // onig_alloc_init
+ public Regex(byte[]bytes, int p, int end, int option, int caseFoldFlag, Encoding enc, Syntax syntax, WarnCallback warnings) {
+
+ if ((option & (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) ==
+ (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) {
+ throw new ValueException(ErrorMessages.ERR_INVALID_COMBINATION_OF_OPTIONS);
+ }
+
+ if ((option & Option.NEGATE_SINGLELINE) != 0) {
+ option |= syntax.options;
+ option &= ~Option.SINGLELINE;
+ } else {
+ option |= syntax.options;
+ }
+
+ this.enc = enc;
+ this.options = option;
+ this.caseFoldFlag = caseFoldFlag;
+ this.warnings = warnings;
+
+ new Compiler(new ScanEnvironment(this, syntax), bytes, p, end).compile();
+ }
+
+ public Matcher matcher(byte[]bytes) {
+ return matcher(bytes, 0, bytes.length);
+ }
+
+ public Matcher matcher(byte[]bytes, int p, int end) {
+ return new Matcher(this, bytes, p, end);
+ }
+
+ public int numberOfCaptures() {
+ return numMem;
+ }
+
+ public int numberOfCaptureHistories() {
+ if (Config.USE_CAPTURE_HISTORY) {
+ int n = 0;
+ for (int i=0; i<=Config.MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (bsAt(captureHistory, i)) n++;
+ }
+ return n;
+ } else {
+ return 0;
+ }
+ }
+
+ String nameTableToString() {
+ StringBuilder sb = new StringBuilder();
+
+ if (nameTable != null) {
+ sb.append("name table\n");
+ for (NameEntry ne : nameTable) {
+ sb.append(" " + ne + "\n");
+ }
+ sb.append("\n");
+ }
+ return sb.toString();
+ }
+
+ NameEntry nameFind(byte[]name, int nameP, int nameEnd) {
+ if (nameTable != null) return nameTable.get(name, nameP, nameEnd);
+ return null;
+ }
+
+ void renumberNameTable(int[]map) {
+ if (nameTable != null) {
+ for (NameEntry e : nameTable) {
+ if (e.backNum > 1) {
+ for (int i=0; i<e.backNum; i++) {
+ e.backRefs[i] = map[e.backRefs[i]];
+ }
+ } else if (e.backNum == 1) {
+ e.backRef1 = map[e.backRef1];
+ }
+ }
+ }
+ }
+
+ int numberOfNames() {
+ return nameTable == null ? 0 : nameTable.size();
+ }
+
+ void nameAdd(byte[]name, int nameP, int nameEnd, int backRef, Syntax syntax) {
+ if (nameEnd - nameP <= 0) throw new ValueException(ErrorMessages.ERR_EMPTY_GROUP_NAME);
+
+ NameEntry e = null;
+ if (nameTable == null) {
+ nameTable = new BytesHash<NameEntry>(); // 13, oni defaults to 5
+ } else {
+ e = nameFind(name, nameP, nameEnd);
+ }
+
+ if (e == null) {
+ // dup the name here as oni does ?, what for ? (it has to manage it, we don't)
+ e = new NameEntry(name, nameP, nameEnd);
+ nameTable.putDirect(name, nameP, nameEnd, e);
+ } else if (e.backNum >= 1 && !syntax.allowMultiplexDefinitionName()) { // env out!!!
+ throw new ValueException(ErrorMessages.ERR_MULTIPLEX_DEFINED_NAME, new String(name, nameP, nameEnd - nameP));
+ }
+
+ e.addBackref(backRef);
+ }
+
+ NameEntry nameToGroupNumbers(byte[]name, int nameP, int nameEnd) {
+ NameEntry e = nameFind(name, nameP, nameEnd);
+ return e;
+ }
+
+ public int nameToBackrefNumber(byte[]name, int nameP, int nameEnd, Region region) {
+ NameEntry e = nameToGroupNumbers(name, nameP, nameEnd);
+ if (e == null) throw new ValueException(ErrorMessages.ERR_UNDEFINED_NAME_REFERENCE,
+ new String(name, nameP, nameEnd - nameP));
+
+
+ switch(e.backNum) {
+ case 0:
+ throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
+ case 1:
+ return e.backRef1;
+ default:
+ if (region == null) {
+ for (int i = e.backNum - 1; i>=0; i--) {
+ if (region.beg[e.backRefs[i]] != Region.REGION_NOTPOS) return e.backRefs[i];
+ }
+ }
+ return e.backRefs[e.backNum - 1];
+ }
+ }
+
+ boolean noNameGroupIsActive(Syntax syntax) {
+ if (isDontCaptureGroup(options)) return false;
+
+ if (Config.USE_NAMED_GROUP) {
+ if (numberOfNames() > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(options)) return false;
+ }
+ return true;
+ }
+
+ /* set skip map for Boyer-Moor search */
+ void setupBMSkipMap() {
+ byte[]bytes = exact;
+ int p = exactP;
+ int end = exactEnd;
+ int len = end - p;
+ if (map == null) map = new byte[Config.CHAR_TABLE_SIZE]; // ?? but seems to be safe
+
+ // map/skip
+
+ if (len < Config.CHAR_TABLE_SIZE) {
+ for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) map[i] = (byte)len;
+ for (int i=0; i<len-1; i++) map[bytes[p + i] & 0xff] = (byte)(len - 1 -i); // oxff ??
+ } else {
+ if (intMap == null) intMap = new int[Config.CHAR_TABLE_SIZE];
+
+ for (int i=0; i<len-1; i++) intMap[bytes[p + i] & 0xff] = len - 1 - i; // oxff ??
+ }
+ }
+
+ void setExactInfo(OptExactInfo e) {
+ if (e.length == 0) return;
+
+ // shall we copy that ?
+ exact = e.s;
+ exactP = 0;
+ exactEnd = e.length;
+
+ if (e.ignoreCase) {
+ // encodings won't return toLowerTable for case insensitive search if it's not safe to use it directly
+ searchAlgorithm = enc.toLowerCaseTable() != null ? SearchAlgorithm.SLOW_IC_SB : new SearchAlgorithm.SLOW_IC(this);
+ } else {
+ boolean allowReverse = enc.isReverseMatchAllowed(exact, exactP, exactEnd);
+
+ if (e.length >= 3 || (e.length >= 2 && allowReverse)) {
+ setupBMSkipMap();
+ if (allowReverse) {
+ searchAlgorithm = SearchAlgorithm.BM;
+ } else {
+ searchAlgorithm = SearchAlgorithm.BM_NOT_REV;
+ }
+ } else {
+ searchAlgorithm = enc.isSingleByte() ? SearchAlgorithm.SLOW_SB : SearchAlgorithm.SLOW;
+ }
+ }
+
+ dMin = e.mmd.min;
+ dMax = e.mmd.max;
+
+ if (dMin != MinMaxLen.INFINITE_DISTANCE) {
+ thresholdLength = dMin + (exactEnd - exactP);
+ }
+ }
+
+ void setOptimizeMapInfo(OptMapInfo m) {
+ /*
+ for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
+ map[i] = m.map[i]; // do we really have to copy that ???
+ }
+ */
+ map = m.map;
+
+ searchAlgorithm = enc.isSingleByte() ? SearchAlgorithm.MAP_SB : SearchAlgorithm.MAP;
+ dMin = m.mmd.min;
+ dMax = m.mmd.max;
+
+ if (dMin != MinMaxLen.INFINITE_DISTANCE) {
+ thresholdLength = dMin + 1;
+ }
+ }
+
+ void setSubAnchor(OptAnchorInfo anc) {
+ subAnchor |= anc.leftAnchor & AnchorType.BEGIN_LINE;
+ subAnchor |= anc.rightAnchor & AnchorType.END_LINE;
+ }
+
+ void clearOptimizeInfo() {
+ searchAlgorithm = SearchAlgorithm.NONE;
+ anchor = 0;
+ anchorDmax = 0;
+ anchorDmin = 0;
+ subAnchor = 0;
+
+ exact = null;
+ exactP = exactEnd = 0;
+ }
+
+ public String encStringToString(byte[]bytes, int p, int end) {
+ StringBuilder sb = new StringBuilder("\nPATTERN: /");
+
+ if (enc.minLength() > 1) {
+ int p_ = p;
+ while (p_ < end) {
+ int code = enc.mbcToCode(bytes, p_, end);
+ if (code >= 0x80) {
+ try {
+ sb.append(String.format(" 0x%04x ", code));
+ } catch (IllegalFormatConversionException ifce) {
+ sb.append(code);
+ }
+ } else {
+ sb.append((char)code);
+ }
+ p_ += enc.length(bytes[p_]);
+ }
+ } else {
+ while (p < end) {
+ sb.append(new String(new byte[]{bytes[p]}));
+ p++;
+ }
+ }
+ return sb.append("/").toString();
+ }
+
+ public String optimizeInfoToString() {
+ String s = "";
+ s += "optimize: " + searchAlgorithm.getName() + "\n";
+ s += " anchor: " + OptAnchorInfo.anchorToString(anchor);
+
+ if ((anchor & AnchorType.END_BUF_MASK) != 0) {
+ s += MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax);
+ }
+
+ s += "\n";
+
+ if (searchAlgorithm != SearchAlgorithm.NONE) {
+ s += " sub anchor: " + OptAnchorInfo.anchorToString(subAnchor) + "\n";
+ }
+
+ s += "threshold length: " + thresholdLength;
+ s += "\n";
+
+ if (exact != null) {
+ s += "exact: [" + new String(exact, exactP, exactEnd - exactP) + "]: length: " + (exactEnd - exactP) + "\n";
+ } else if (searchAlgorithm == SearchAlgorithm.MAP || searchAlgorithm == SearchAlgorithm.MAP_SB) {
+ int n=0;
+ for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) if (map[i] != 0) n++;
+
+ s += "map: n = " + n + "\n";
+ if (n > 0) {
+ int c=0;
+ s += "[";
+ for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
+ if (map[i] != 0) {
+ if (c > 0) s += ", ";
+ c++;
+ if (enc.maxLength() == 1 && enc.isPrint(i)) s += ((char)i);
+ else s += i;
+ }
+ }
+ s += "]\n";
+ }
+ }
+ return s;
+ }
+
+ private void ensure(int size) {
+ if (size >= code.length) {
+ int length = code.length << 1;
+ while (length <= size) length <<= 1;
+ int[]tmp = new int[length];
+ System.arraycopy(code, 0, tmp, 0, code.length);
+ code = tmp;
+ }
+ }
+
+ void addInt(int i) {
+ if (codeLength >= code.length) {
+ int[]tmp = new int[code.length << 1];
+ System.arraycopy(code, 0, tmp, 0, code.length);
+ code = tmp;
+ }
+ code[codeLength++] = i;
+ }
+
+ void setInt(int i, int offset) {
+ ensure(offset);
+ code[offset] = i;
+ }
+
+ void addObject(Object o) {
+ if (operands == null) {
+ operands = new Object[4];
+ } else if (operandLength >= operands.length) {
+ Object[]tmp = new Object[operands.length << 1];
+ System.arraycopy(operands, 0, tmp, 0, operands.length);
+ operands = tmp;
+ }
+ addInt(operandLength);
+ operands[operandLength++] = o;
+ }
+
+ void addBytes(byte[]bytes, int p ,int length) {
+ ensure(codeLength + length);
+ int end = p + length;
+
+ while (p < end) code[codeLength++] = bytes[p++];
+ }
+
+ void addInts(int[]ints, int length) {
+ ensure(codeLength + length);
+ System.arraycopy(ints, 0, code, codeLength, length);
+ codeLength += length;
+ }
+
+ public int getOptions() {
+ return options;
+ }
+
+ public Encoding getEncoding() {
+ return enc;
+ }
+
+ /**
+ * rb_reg_adjust_startpos
+ */
+ public int adjustStartPosition(byte[] str, int start, int len, int pos, boolean reverse) {
+ int range;
+
+ if(reverse) {
+ range = -pos;
+ } else {
+ range = len - pos;
+ }
+
+ if(pos > 0 && enc.maxLength() != 1 && pos < len) {
+ int p;
+ if(range > 0) {
+ p = enc.rightAdjustCharHead(str, start, start + pos);
+ } else {
+ p = enc.leftAdjustCharHead(str, start, start + pos);
+ }
+ return p - start;
+ }
+
+ return pos;
+ }
+}
diff --git a/src/org/joni/Region.java b/src/org/joni/Region.java
new file mode 100644
index 0000000..08b90f9
--- /dev/null
+++ b/src/org/joni/Region.java
@@ -0,0 +1,66 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+public final class Region {
+ static final int REGION_NOTPOS = -1;
+
+ public final int numRegs;
+ public final int[]beg;
+ public final int[]end;
+ public CaptureTreeNode historyRoot;
+
+ public Region(int num) {
+ this.numRegs = num;
+ this.beg = new int[num];
+ this.end = new int[num];
+ }
+
+ public Region(int begin, int end) {
+ this.numRegs = 1;
+ this.beg = new int[]{begin};
+ this.end = new int[]{end};
+ }
+
+ public Region clone() {
+ Region region = new Region(numRegs);
+ System.arraycopy(beg, 0, region.beg, 0, beg.length);
+ System.arraycopy(end, 0, region.end, 0, end.length);
+ if (historyRoot != null) region.historyRoot = historyRoot.cloneTree();
+ return region;
+ }
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("Region: \n");
+ for (int i=0; i<beg.length; i++) sb.append(" " + i + ": (" + beg[i] + "-" + end[i] + ")");
+ return sb.toString();
+ }
+
+ CaptureTreeNode getCaptureTree() {
+ return historyRoot;
+ }
+
+ void clear() {
+ for (int i=0; i<beg.length; i++) {
+ beg[i] = end[i] = REGION_NOTPOS;
+ }
+ }
+}
diff --git a/src/org/joni/ScanEnvironment.java b/src/org/joni/ScanEnvironment.java
new file mode 100644
index 0000000..152479d
--- /dev/null
+++ b/src/org/joni/ScanEnvironment.java
@@ -0,0 +1,143 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.BitStatus.bsClear;
+
+import org.joni.ast.Node;
+import org.joni.encoding.Encoding;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.InternalException;
+
+public final class ScanEnvironment {
+
+ private static final int SCANENV_MEMNODES_SIZE = 8;
+
+ int option;
+ final int caseFoldFlag;
+ final public Encoding enc;
+ final public Syntax syntax;
+ int captureHistory;
+ int btMemStart;
+ int btMemEnd;
+ int backrefedMem;
+
+ final public Regex reg;
+
+ int numCall;
+ UnsetAddrList unsetAddrList; // USE_SUBEXP_CALL
+ public int numMem;
+
+ int numNamed; // USE_NAMED_GROUP
+
+ public Node memNodes[] = new Node[SCANENV_MEMNODES_SIZE]; // should be EncloseNode[] ???
+
+ // USE_COMBINATION_EXPLOSION_CHECK
+ int numCombExpCheck;
+ int combExpMaxRegNum;
+ int currMaxRegNum;
+ boolean hasRecursion;
+
+ public ScanEnvironment(Regex regex, Syntax syntax) {
+ this.reg = regex;
+ option = regex.options;
+ caseFoldFlag = regex.caseFoldFlag;
+ enc = regex.enc;
+ this.syntax = syntax;
+ }
+
+ public void clear() {
+ captureHistory = bsClear();
+ btMemStart = bsClear();
+ btMemEnd = bsClear();
+ backrefedMem = bsClear();
+
+ numCall = 0;
+ numMem = 0;
+
+ numNamed = 0;
+
+ if (memNodes.length > SCANENV_MEMNODES_SIZE) {
+ memNodes = new Node[SCANENV_MEMNODES_SIZE];
+ } else {
+ for (int i=0; i<SCANENV_MEMNODES_SIZE; i++) memNodes[i] = null;
+ }
+
+ numCombExpCheck = 0;
+ combExpMaxRegNum = 0;
+ currMaxRegNum = 0;
+ hasRecursion = false;
+ }
+
+ public int addMemEntry() {
+ numMem++;
+ if (numMem >= memNodes.length) {
+ Node[]tmp = new Node[memNodes.length << 1];
+ System.arraycopy(memNodes, 0, tmp, 0, memNodes.length);
+ memNodes = tmp;
+ }
+
+ return numMem;
+ }
+
+ public void setMemNode(int num, Node node) {
+ if (numMem >= num) {
+ memNodes[num] = node;
+ } else {
+ throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
+ }
+ }
+
+ public int convertBackslashValue(int c) {
+ if (syntax.opEscControlChars()) {
+ switch (c) {
+ case 'n': return '\n';
+ case 't': return '\t';
+ case 'r': return '\r';
+ case 'f': return '\f';
+ case 'a': return '\007';
+ case 'b': return '\010';
+ case 'e': return '\033';
+ case 'v':
+ if (syntax.op2EscVVtab()) return 11; // ???
+ break;
+ default:
+ break;
+ }
+ }
+ return c;
+ }
+
+ void ccEscWarn(String s) {
+ if (Config.USE_WARN) {
+ if (syntax.warnCCOpNotEscaped() && syntax.backSlashEscapeInCC()) {
+ reg.warnings.warn("character class has '" + s + "' without escape");
+ }
+ }
+ }
+
+ void closeBracketWithoutEscapeWarn(String s) {
+ if (Config.USE_WARN) {
+ if (syntax.warnCCOpNotEscaped()) {
+ reg.warnings.warn("regular expression has '" + s + "' without escape");
+ }
+ }
+ }
+}
diff --git a/src/org/joni/ScannerSupport.java b/src/org/joni/ScannerSupport.java
new file mode 100644
index 0000000..696ac64
--- /dev/null
+++ b/src/org/joni/ScannerSupport.java
@@ -0,0 +1,178 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.encoding.Encoding;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.InternalException;
+import org.joni.exception.SyntaxException;
+import org.joni.exception.ValueException;
+
+abstract class ScannerSupport extends IntHolder implements ErrorMessages {
+ protected final Encoding enc; // fast access to encoding
+
+ protected final byte[]bytes; // pattern
+ protected int p; // current scanner position
+ protected int stop; // pattern end (mutable)
+ private int lastFetched; // last fetched value for unfetch support
+ protected int c; // current code point
+
+ private final int begin; // pattern begin position for reset() support
+ private final int end; // pattern end position for reset() support
+ protected int _p; // used by mark()/restore() to mark positions
+
+ protected ScannerSupport(Encoding enc, byte[]bytes, int p, int end) {
+ this.enc = enc;
+
+ this.bytes = bytes;
+ this.begin = p;
+ this.end = end;
+
+ reset();
+ }
+
+ protected int getBegin() {
+ return begin;
+ }
+
+ protected int getEnd() {
+ return end;
+ }
+
+ private final int INT_SIGN_BIT = 1 << 31;
+
+ protected final int scanUnsignedNumber() {
+ int num = 0; // long ???
+ while(left()) {
+ fetch();
+ if (enc.isDigit(c)) {
+ int onum = num;
+ num = num * 10 + enc.digitVal(c);
+ if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1;
+ } else {
+ unfetch();
+ break;
+ }
+ }
+ return num;
+ }
+
+ protected final int scanUnsignedHexadecimalNumber(int maxLength) {
+ int num = 0;
+ while(left() && maxLength-- != 0) {
+ fetch();
+ if (enc.isXDigit(c)) {
+ int onum = num;
+ int val = enc.xdigitVal(c);
+ num = (num << 4) + val;
+ if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1;
+ } else {
+ unfetch();
+ break;
+ }
+ }
+ return num;
+ }
+
+ protected final int scanUnsignedOctalNumber(int maxLength) {
+ int num = 0;
+ while(left() && maxLength-- != 0) {
+ fetch();
+ if (enc.isDigit(c) && c < '8') {
+ int onum = num;
+ int val = enc.odigitVal(c);
+ num = (num << 3) + val;
+ if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1;
+ } else {
+ unfetch();
+ break;
+ }
+ }
+ return num;
+ }
+
+ protected final void reset() {
+ p = begin;
+ stop = end;
+ }
+
+ protected final void mark() {
+ _p = p;
+ }
+
+ protected final void restore() {
+ p = _p;
+ }
+
+ protected final void inc() {
+ lastFetched = p;
+ p += enc.length(bytes[p]);
+ }
+
+ protected final void fetch() {
+ c = enc.mbcToCode(bytes, p, stop);
+ lastFetched = p;
+ p += enc.length(bytes[p]);
+ }
+
+ protected int fetchTo() {
+ int to = enc.mbcToCode(bytes, p, stop);
+ lastFetched = p;
+ p += enc.length(bytes[p]);
+ return to;
+ }
+
+ protected final void unfetch() {
+ p = lastFetched;
+ }
+
+ protected final int peek() {
+ return p < stop ? enc.mbcToCode(bytes, p, stop) : 0;
+ }
+
+ protected final boolean peekIs(int c) {
+ return peek() == c;
+ }
+
+ protected final boolean left() {
+ return p < stop;
+ }
+
+ protected void newSyntaxException(String message) {
+ throw new SyntaxException(message);
+ }
+
+ protected void newValueException(String message) {
+ throw new ValueException(message);
+ }
+
+ protected void newValueException(String message, String str) {
+ throw new ValueException(message, str);
+ }
+
+ protected void newValueException(String message, int p, int end) {
+ throw new ValueException(message, new String(bytes, p, end - p));
+ }
+
+ protected void newInternalException(String message) {
+ throw new InternalException(message);
+ }
+
+}
diff --git a/src/org/joni/SearchAlgorithm.java b/src/org/joni/SearchAlgorithm.java
new file mode 100644
index 0000000..ab9712f
--- /dev/null
+++ b/src/org/joni/SearchAlgorithm.java
@@ -0,0 +1,526 @@
+package org.joni;
+
+import org.joni.encoding.Encoding;
+
+public abstract class SearchAlgorithm {
+
+ public abstract String getName();
+ public abstract int search(Regex regex, byte[]text, int textP, int textEnd, int textRange);
+ public abstract int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_);
+
+
+ public static final SearchAlgorithm NONE = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "NONE";
+ }
+
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ return textP;
+ }
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ return textP;
+ }
+
+ };
+
+ public static final SearchAlgorithm SLOW = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "EXACT";
+ }
+
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ Encoding enc = regex.enc;
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+
+ int end = textEnd;
+ end -= targetEnd - targetP - 1;
+
+ if (end > textRange) end = textRange;
+
+ int s = textP;
+
+ while (s < end) {
+ if (text[s] == target[targetP]) {
+ int p = s + 1;
+ int t = targetP + 1;
+ while (t < targetEnd) {
+ if (target[t] != text[p++]) break;
+ t++;
+ }
+
+ if (t == targetEnd) return s;
+ }
+ s += enc.length(text[s]);
+ }
+
+ return -1;
+ }
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ Encoding enc = regex.enc;
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int s = textEnd;
+ s -= targetEnd - targetP;
+
+ if (s > textStart) {
+ s = textStart;
+ } else {
+ s = enc.leftAdjustCharHead(text, adjustText, s);
+ }
+
+ while (s >= textP) {
+ if (text[s] == target[targetP]) {
+ int p = s + 1;
+ int t = targetP + 1;
+ while (t < targetEnd) {
+ if (target[t] != text[p++]) break;
+ t++;
+ }
+ if (t == targetEnd) return s;
+ }
+ s = enc.prevCharHead(text, adjustText, s);
+ }
+ return -1;
+ }
+ };
+
+ public static final SearchAlgorithm SLOW_SB = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "EXACT_SB";
+ }
+
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int end = textEnd;
+ end -= targetEnd - targetP - 1;
+
+ if (end > textRange) end = textRange;
+
+ int s = textP;
+
+ while (s < end) {
+ if (text[s] == target[targetP]) {
+ int p = s + 1;
+ int t = targetP + 1;
+ while (t < targetEnd) {
+ if (target[t] != text[p++]) break;
+ t++;
+ }
+
+ if (t == targetEnd) return s;
+ }
+ s++;
+ }
+
+ return -1;
+ }
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int s = textEnd;
+ s -= targetEnd - targetP;
+
+ if (s > textStart) s = textStart;
+
+ while (s >= textP) {
+ if (text[s] == target[targetP]) {
+ int p = s + 1;
+ int t = targetP + 1;
+ while (t < targetEnd) {
+ if (target[t] != text[p++]) break;
+ t++;
+ }
+ if (t == targetEnd) return s;
+ }
+ //s = s <= adjustText ? -1 : s - 1;
+ s--;
+ }
+ return -1;
+ }
+ };
+
+
+ public static final class SLOW_IC extends SearchAlgorithm {
+ private final byte[]buf = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN];
+ private final IntHolder holder = new IntHolder();
+ private final int caseFoldFlag;
+ private final Encoding enc;
+
+ public SLOW_IC(Regex regex) {
+ this.caseFoldFlag = regex.caseFoldFlag;
+ this.enc = regex.enc;
+ }
+
+ public final String getName() {
+ return "EXACT_IC";
+ }
+
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int end = textEnd;
+ end -= targetEnd - targetP - 1;
+
+ if (end > textRange) end = textRange;
+ int s = textP;
+
+ while (s < end) {
+ if (lowerCaseMatch(target, targetP, targetEnd, text, s, textEnd)) return s;
+ s += enc.length(text[s]);
+ }
+ return -1;
+ }
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int s = textEnd;
+ s -= targetEnd - targetP;
+
+ if (s > textStart) {
+ s = textStart;
+ } else {
+ s = enc.leftAdjustCharHead(text, adjustText, s);
+ }
+
+ while (s >= textP) {
+ if (lowerCaseMatch(target, targetP, targetEnd, text, s, textEnd)) return s;
+ s = enc.prevCharHead(text, adjustText, s);
+ }
+ return -1;
+ }
+
+ private boolean lowerCaseMatch(byte[]t, int tP, int tEnd,
+ byte[]bytes, int p, int end) {
+
+ holder.value = p;
+ while (tP < tEnd) {
+ int lowlen = enc.mbcCaseFold(caseFoldFlag, bytes, holder, end, buf);
+ if (lowlen == 1) {
+ if (t[tP++] != buf[0]) return false;
+ } else {
+ int q = 0;
+ while (lowlen > 0) {
+ if (t[tP++] != buf[q++]) return false;
+ lowlen--;
+ }
+ }
+ }
+ return true;
+ }
+ };
+
+ public static final SearchAlgorithm SLOW_IC_SB = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "EXACT_IC_SB";
+ }
+
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ final byte[]toLowerTable = regex.enc.toLowerCaseTable();
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int end = textEnd;
+ end -= targetEnd - targetP - 1;
+
+ if (end > textRange) end = textRange;
+ int s = textP;
+
+ while (s < end) {
+ if (target[targetP] == toLowerTable[text[s] & 0xff]) {
+ int p = s + 1;
+ int t = targetP + 1;
+ while (t < targetEnd) {
+ if (target[t] != toLowerTable[text[p++] & 0xff]) break;
+ t++;
+ }
+
+ if (t == targetEnd) return s;
+ }
+ s++;
+ }
+ return -1;
+ }
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ final byte[]toLowerTable = regex.enc.toLowerCaseTable();
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int s = textEnd;
+ s -= targetEnd - targetP;
+
+ if (s > textStart) s = textStart;
+
+ while (s >= textP) {
+ if (target[targetP] == toLowerTable[text[s] & 0xff]) {
+ int p = s + 1;
+ int t = targetP + 1;
+ while (t < targetEnd) {
+ if (target[t] != toLowerTable[text[p++] & 0xff]) break;
+ t++;
+ }
+ if (t == targetEnd) return s;
+ }
+ //s = s <= adjustText ? -1 : s - 1;
+ s--;
+ }
+ return -1;
+ }
+
+ };
+
+ public static final SearchAlgorithm BM = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "EXACT_BM";
+ }
+
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int end = textRange + (targetEnd - targetP) - 1;
+ if (end > textEnd) end = textEnd;
+
+ int tail = targetEnd - 1;
+ int s = textP + (targetEnd - targetP) - 1;
+
+ if (regex.intMap == null) {
+ while (s < end) {
+ int p = s;
+ int t = tail;
+ while (t >= targetP && text[p] == target[t]) {
+ p--; t--;
+ }
+ if (t < targetP) return p + 1;
+ s += regex.map[text[s] & 0xff];
+ }
+ } else { /* see int_map[] */
+ while (s < end) {
+ int p = s;
+ int t = tail;
+ while (t >= targetP && text[p] == target[t]) {
+ p--; t--;
+ }
+ if (t < targetP) return p + 1;
+ s += regex.intMap[text[s] & 0xff];
+ }
+ }
+ return -1;
+ }
+
+ private static final int BM_BACKWARD_SEARCH_LENGTH_THRESHOLD = 100;
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ Encoding enc = regex.enc;
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ if (regex.intMapBackward == null) {
+ if (s_ - range_ < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) {
+ // goto exact_method;
+ return SLOW.searchBackward(regex, text, textP, adjustText, textEnd, textStart, s_, range_);
+ }
+ setBmBackwardSkip(regex, target, targetP, targetEnd);
+ }
+
+ int s = textEnd - (targetEnd - targetP);
+
+ if (textStart < s) {
+ s = textStart;
+ } else {
+ s = enc.leftAdjustCharHead(text, adjustText, s);
+ }
+
+ while (s >= textP) {
+ int p = s;
+ int t = targetP;
+ while (t < targetEnd && text[p] == target[t]) {
+ p++; t++;
+ }
+ if (t == targetEnd) return s;
+
+ s -= regex.intMapBackward[text[s] & 0xff];
+ s = enc.leftAdjustCharHead(text, adjustText, s);
+ }
+ return -1;
+ }
+
+
+ private void setBmBackwardSkip(Regex regex, byte[]bytes, int p, int end) {
+ int[] skip;
+ if (regex.intMapBackward == null) {
+ skip = new int[Config.CHAR_TABLE_SIZE];
+ regex.intMapBackward = skip;
+ } else {
+ skip = regex.intMapBackward;
+ }
+
+ int len = end - p;
+
+ for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) skip[i] = len;
+ for (int i=len-1; i>0; i--) skip[bytes[i] & 0xff] = i;
+ }
+ };
+
+ public static final SearchAlgorithm BM_NOT_REV = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "EXACT_BM_NOT_REV";
+ }
+
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ Encoding enc = regex.enc;
+ byte[]target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int tail = targetEnd - 1;
+ int tlen1 = tail - targetP;
+ int end = textRange;
+
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("bm_search_notrev: "+
+ "text: " + textP +
+ ", text_end: " + textEnd +
+ ", text_range: " + textRange);
+ }
+
+ if (end + tlen1 > textEnd) end = textEnd - tlen1;
+
+ int s = textP;
+
+ if (regex.intMap == null) {
+ while (s < end) {
+ int p, se;
+ p = se = s + tlen1;
+ int t = tail;
+ while (t >= targetP && text[p] == target[t]) {
+ p--; t--;
+ }
+
+ if (t < targetP) return s;
+
+ int skip = regex.map[text[se] & 0xff];
+ t = s;
+ do {
+ s += enc.length(text[s]);
+ } while ((s - t) < skip && s < end);
+ }
+ } else {
+ while (s < end) {
+ int p, se;
+ p = se = s + tlen1;
+ int t = tail;
+ while (t >= targetP && text[p] == target[t]) {
+ p--; t--;
+ }
+
+ if (t < targetP) return s;
+
+ int skip = regex.intMap[text[se] & 0xff];
+ t = s;
+ do {
+ s += enc.length(text[s]);
+ } while ((s - t) < skip && s < end);
+
+ }
+ }
+ return -1;
+ }
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ return BM.searchBackward(regex, text, textP, adjustText, textEnd, textStart, s_, range_);
+ }
+ };
+
+
+ public static final SearchAlgorithm MAP = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "MAP";
+ }
+
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ Encoding enc = regex.enc;
+ byte[]map = regex.map;
+ int s = textP;
+
+ while (s < textRange) {
+ if (map[text[s] & 0xff] != 0) return s;
+ s += enc.length(text[s]);
+ }
+ return -1;
+ }
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ Encoding enc = regex.enc;
+ byte[]map = regex.map;
+ int s = textStart;
+
+ if (s >= textEnd) s = textEnd - 1; // multibyte safe ?
+ while (s >= textP) {
+ if (map[text[s] & 0xff] != 0) return s;
+ s = enc.prevCharHead(text, adjustText, s);
+ }
+ return -1;
+ }
+ };
+
+ public static final SearchAlgorithm MAP_SB = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "MAP_SB";
+ }
+
+ public final int search(Regex regex, byte[]text, int textP, int textEnd, int textRange) {
+ byte[]map = regex.map;
+ int s = textP;
+
+ while (s < textRange) {
+ if (map[text[s] & 0xff] != 0) return s;
+ s++;
+ }
+ return -1;
+ }
+
+ public final int searchBackward(Regex regex, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ byte[]map = regex.map;
+ int s = textStart;
+
+ if (s >= textEnd) s = textEnd - 1;
+ while (s >= textP) {
+ if (map[text[s] & 0xff] != 0) return s;
+ s--;
+ }
+ return -1;
+ }
+ };
+
+}
diff --git a/src/org/joni/StackEntry.java b/src/org/joni/StackEntry.java
new file mode 100644
index 0000000..001c98d
--- /dev/null
+++ b/src/org/joni/StackEntry.java
@@ -0,0 +1,164 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+final class StackEntry {
+ int type;
+ private int E1, E2, E3, E4;
+
+ // first union member
+ /* byte code position */
+ void setStatePCode(int pcode) {
+ E1 = pcode;
+ }
+ int getStatePCode() {
+ return E1;
+ }
+ /* string position */
+ void setStatePStr(int pstr) {
+ E2 = pstr;
+ }
+ int getStatePStr() {
+ return E2;
+ }
+ /* previous char position of pstr */
+ void setStatePStrPrev(int pstrPrev) {
+ E3 = pstrPrev;
+ }
+ int getStatePStrPrev() {
+ return E3;
+ }
+
+ void setStateCheck(int check) {
+ E4 = check;
+ }
+ int getStateCheck() {
+ return E4;
+ }
+
+ // second union member
+ /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
+ void setRepeatCount(int count) {
+ E1 = count;
+ }
+ int getRepeatCount() {
+ return E1;
+ }
+ void decreaseRepeatCount() {
+ E1--;
+ }
+ void increaseRepeatCount() {
+ E1++;
+ }
+ /* byte code position (head of repeated target) */
+ void setRepeatPCode(int pcode) {
+ E2 = pcode;
+ }
+ int getRepeatPCode() {
+ return E2;
+ }
+ /* repeat id */
+ void setRepeatNum(int num) {
+ E3 = num;
+ }
+ int getRepeatNum() {
+ return E3;
+ }
+
+ // third union member
+ /* index of stack */ /*int repeat_inc struct*/
+ void setSi(int si) {
+ E1 = si;
+ }
+ int getSi() {
+ return E1;
+ }
+
+ // fourth union member
+ /* memory num */
+ void setMemNum(int num) {
+ E1 = num;
+ }
+ int getMemNum() {
+ return E1;
+ }
+ /* start/end position */
+ void setMemPstr(int pstr) {
+ E2 = pstr;
+ }
+ int getMemPStr() {
+ return E2;
+ }
+
+ /* Following information is set, if this stack type is MEM-START */
+ /* prev. info (for backtrack "(...)*" ) */
+ void setMemStart(int start) {
+ E3 = start;
+ }
+ int getMemStart() {
+ return E3;
+ }
+ /* prev. info (for backtrack "(...)*" ) */
+ void setMemEnd(int end) {
+ E4 = end;
+ }
+ int getMemEnd() {
+ return E4;
+ }
+
+ // fifth union member
+ /* null check id */
+ void setNullCheckNum(int num) {
+ E1 = num;
+ }
+ int getNullCheckNum() {
+ return E1;
+ }
+ /* start position */
+ void setNullCheckPStr(int pstr) {
+ E2 = pstr;
+ }
+ int getNullCheckPStr() {
+ return E2;
+ }
+
+ // sixth union member
+ /* byte code position */
+ void setCallFrameRetAddr(int addr) {
+ E1 = addr;
+ }
+ int getCallFrameRetAddr() {
+ return E1;
+ }
+ /* null check id */
+ void setCallFrameNum(int num) {
+ E2 = num;
+ }
+ int getCallFrameNum() {
+ return E2;
+ }
+ /* string position */
+ void setCallFramePStr(int pstr) {
+ E3 = pstr;
+ }
+ int getCallFramePStr() {
+ return E3;
+ }
+}
diff --git a/src/org/joni/StackMachine.java b/src/org/joni/StackMachine.java
new file mode 100644
index 0000000..11aa350
--- /dev/null
+++ b/src/org/joni/StackMachine.java
@@ -0,0 +1,621 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.BitStatus.bsAt;
+
+import java.util.Arrays;
+
+import org.joni.constants.StackPopLevel;
+import org.joni.constants.StackType;
+
+abstract class StackMachine extends IntHolder implements StackType {
+ protected static final int INVALID_INDEX = -1;
+
+ protected StackEntry[]stack;
+ protected int stk; // stkEnd
+
+ protected final int[]repeatStk;
+ protected final int memStartStk, memEndStk;
+
+ protected final Regex regex;
+
+ // CEC
+ protected byte[] stateCheckBuff; // move to int[] ?
+ int stateCheckBuffSize;
+
+ public StackMachine(Regex regex) {
+ this.regex = regex;
+ this.stack = regex.stackNeeded ? fetchStack() : null;
+ int n = regex.numRepeat + (regex.numMem << 1);
+ this.repeatStk = n > 0 ? new int[n] : null;
+
+ memStartStk = regex.numRepeat - 1;
+ memEndStk = memStartStk + regex.numMem;
+ /* for index start from 1, mem_start_stk[1]..mem_start_stk[num_mem] */
+ /* for index start from 1, mem_end_stk[1]..mem_end_stk[num_mem] */
+ }
+
+ private static StackEntry[] allocateStack() {
+ StackEntry[] stack = new StackEntry[Config.INIT_MATCH_STACK_SIZE];
+ for (int i=0; i<Config.INIT_MATCH_STACK_SIZE; i++) stack[i] = new StackEntry();
+ return stack;
+ }
+
+ private void doubleStack() {
+ StackEntry[] newStack = new StackEntry[stack.length << 1];
+ System.arraycopy(stack, 0, newStack, 0, stack.length);
+ for (int i=stack.length; i<newStack.length; i++) newStack[i] = new StackEntry();
+ stack = newStack;
+ }
+
+ static final ThreadLocal<StackEntry[]> stacks = new ThreadLocal<StackEntry[]>();
+ private static StackEntry[] fetchStack() {
+ StackEntry[] stack = stacks.get();
+ if (stack == null) {
+ stacks.set(stack = allocateStack());
+ return stack;
+ }
+ return stack;
+ }
+
+ protected final void init() {
+ if (stack != null) pushEnsured(ALT, regex.codeLength - 1); /* bottom stack */
+ if (repeatStk != null) {
+ for (int i=1; i<=regex.numMem; i++) {
+ repeatStk[i + memStartStk] = repeatStk[i + memEndStk] = INVALID_INDEX;
+ }
+ }
+ }
+
+ protected final void ensure1() {
+ if (stk >= stack.length) doubleStack();
+ }
+
+ protected final void pushType(int type) {
+ ensure1();
+ stack[stk++].type = type;
+ }
+
+ // ELSE_IF_STATE_CHECK_MARK
+ protected abstract void stateCheckMark();
+ // STATE_CHECK_POS and STATE_CHECK_VAL implemented in byteCodeMachine, CEC only
+
+ // STATE_CHECK_BUFF_INIT
+ private static final int STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE = 16;
+ void stateCheckBuffInit(int strLength, int offset, int stateNum) {
+ if (stateNum > 0 && strLength >= Config.CHECK_STRING_THRESHOLD_LEN) {
+ int size = ((strLength + 1) * stateNum + 7) >>> 3;
+ offset = (offset * stateNum) >>> 3;
+
+ if (size > 0 && offset < size && size < Config.CHECK_BUFF_MAX_SIZE) {
+ if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {
+ stateCheckBuff = new byte[size];
+ } else {
+ // same impl, reduce...
+ stateCheckBuff = new byte[size];
+ }
+ Arrays.fill(stateCheckBuff, offset, (size - offset), (byte)0);
+ stateCheckBuffSize = size;
+ } else {
+ stateCheckBuff = null; // reduce
+ stateCheckBuffSize = 0;
+ }
+ } else {
+ stateCheckBuff = null; // reduce
+ stateCheckBuffSize = 0;
+ }
+ }
+
+ private void push(int type, int pat, int s, int prev) {
+ ensure1();
+ StackEntry e = stack[stk];
+
+ e.type = type;
+ e.setStatePCode(pat);
+ e.setStatePStr(s);
+ e.setStatePStrPrev(prev);
+
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(0);
+
+ stk++;
+ }
+
+ protected final void pushEnsured(int type, int pat) {
+ StackEntry e = stack[stk];
+
+ e.type = type;
+ e.setStatePCode(pat);
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(0);
+
+ stk++;
+ }
+
+ protected final void pushAltWithStateCheck(int pat, int s, int sprev, int snum) {
+ ensure1();
+ StackEntry e = stack[stk];
+
+ e.type = ALT;
+ e.setStatePCode(pat);
+ e.setStatePStr(s);
+ e.setStatePStrPrev(sprev);
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(stateCheckBuff != null ? snum : 0);
+
+ stk++;
+ }
+
+ protected final void pushStateCheck(int s, int snum) {
+ if (stateCheckBuff != null) {
+ ensure1();
+ StackEntry e = stack[stk];
+
+ e.type = STATE_CHECK_MARK;
+ e.setStatePStr(s);
+ e.setStateCheck(snum);
+
+ stk++;
+ }
+ }
+
+ protected final void pushAlt(int pat, int s, int prev) {
+ push(ALT, pat, s, prev);
+ }
+
+ protected final void pushPos(int s, int prev) {
+ push(POS, -1 /*NULL_UCHARP*/, s, prev);
+ }
+
+ protected final void pushPosNot(int pat, int s, int prev) {
+ push(POS_NOT, pat, s, prev);
+ }
+
+ protected final void pushStopBT() {
+ pushType(STOP_BT);
+ }
+
+ protected final void pushLookBehindNot(int pat, int s, int sprev) {
+ push(LOOK_BEHIND_NOT, pat, s, sprev);
+ }
+
+ protected final void pushRepeat(int id, int pat) {
+ ensure1();
+ StackEntry e = stack[stk];
+
+ e.type = REPEAT;
+ e.setRepeatNum(id);
+ e.setRepeatPCode(pat);
+ e.setRepeatCount(0);
+
+ stk++;
+ }
+
+ protected final void pushRepeatInc(int sindex) {
+ ensure1();
+ StackEntry e = stack[stk];
+
+ e.type = REPEAT_INC;
+ e.setSi(sindex);
+
+ stk++;
+ }
+
+ protected final void pushMemStart(int mnum, int s) {
+ ensure1();
+ StackEntry e = stack[stk];
+
+ e.type = MEM_START;
+ e.setMemNum(mnum);
+ e.setMemPstr(s);
+ e.setMemStart(repeatStk[memStartStk + mnum]);
+ e.setMemEnd(repeatStk[memEndStk + mnum]);
+
+ repeatStk[memStartStk + mnum] = stk;
+ repeatStk[memEndStk + mnum] = INVALID_INDEX;
+
+ stk++;
+ }
+
+ protected final void pushMemEnd(int mnum, int s) {
+ ensure1();
+ StackEntry e = stack[stk];
+
+ e.type = MEM_END;
+ e.setMemNum(mnum);
+ e.setMemPstr(s);
+ e.setMemStart(repeatStk[memStartStk + mnum]);
+ e.setMemEnd(repeatStk[memEndStk + mnum]);
+
+ repeatStk[memEndStk + mnum] = stk;
+
+ stk++;
+ }
+
+ protected final void pushMemEndMark(int mnum) {
+ ensure1();
+ StackEntry e = stack[stk];
+
+ e.type = MEM_END_MARK;
+ e.setMemNum(mnum);
+
+ stk++;
+ }
+
+ protected final int getMemStart(int mnum) {
+ int level = 0;
+ int stkp = stk;
+
+ while (stkp > 0) {
+ stkp--;
+ StackEntry e = stack[stkp];
+ if ((e.type & MASK_MEM_END_OR_MARK) != 0 && e.getMemNum() == mnum) {
+ level++;
+ } else if (e.type == MEM_START && e.getMemNum() == mnum) {
+ if (level == 0) break;
+ level--;
+ }
+ }
+ return stkp;
+ }
+
+ protected final void pushNullCheckStart(int cnum, int s) {
+ ensure1();
+ StackEntry e = stack[stk];
+
+ e.type = NULL_CHECK_START;
+ e.setNullCheckNum(cnum);
+ e.setNullCheckPStr(s);
+
+ stk++;
+ }
+
+ protected final void pushNullCheckEnd(int cnum) {
+ ensure1();
+ StackEntry e = stack[stk];
+
+ e.type = NULL_CHECK_END;
+ e.setNullCheckNum(cnum);
+
+ stk++;
+ }
+
+ protected final void pushCallFrame(int pat) {
+ ensure1();
+ StackEntry e = stack[stk];
+
+ e.type = CALL_FRAME;
+ e.setCallFrameRetAddr(pat);
+
+ stk++;
+ }
+
+ protected final void pushReturn() {
+ ensure1();
+ StackEntry e = stack[stk];
+
+ e.type = RETURN;
+
+ stk++;
+ }
+
+ // stack debug routines here
+ // ...
+
+ protected final void popOne() {
+ stk--;
+ }
+
+ protected final StackEntry pop() {
+ StackEntry e;
+
+ switch (regex.stackPopLevel) {
+ case StackPopLevel.FREE:
+ while (true) {
+ e = stack[--stk];
+
+ if ((e.type & MASK_POP_USED) != 0) {
+ break;
+ } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.type == STATE_CHECK_MARK) stateCheckMark();
+ }
+ }
+ return e;
+ case StackPopLevel.MEM_START:
+ while (true) {
+ e = stack[--stk];
+
+ if ((e.type & MASK_POP_USED) != 0) {
+ break;
+ } else if (e.type == MEM_START) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd();
+ } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.type == STATE_CHECK_MARK) stateCheckMark();
+ }
+ }
+ return e;
+ default:
+ while (true) {
+ e = stack[--stk];
+
+ if ((e.type & MASK_POP_USED) != 0) {
+ break;
+ } else if (e.type == MEM_START) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd();
+ } else if (e.type == REPEAT_INC) {
+ //int si = stack[stk + IREPEAT_INC_SI];
+ //stack[si + IREPEAT_COUNT]--;
+ stack[e.getSi()].decreaseRepeatCount();
+ } else if (e.type == MEM_END) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd();
+ } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.type == STATE_CHECK_MARK) stateCheckMark();
+ }
+ }
+ return e;
+ }
+ }
+
+ protected final void popTilPosNot() {
+ while (true) {
+ stk--;
+ StackEntry e = stack[stk];
+
+ if (e.type == POS_NOT) {
+ break;
+ } else if (e.type == MEM_START) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemStart();
+ } else if (e.type == REPEAT_INC) {
+ //int si = stack[stk + IREPEAT_INC_SI];
+ //stack[si + IREPEAT_COUNT]--;
+ stack[e.getSi()].decreaseRepeatCount();
+ } else if (e.type == MEM_END){
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemStart();
+ } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.type == STATE_CHECK_MARK) stateCheckMark();
+ }
+ }
+ }
+
+ protected final void popTilLookBehindNot() {
+ while (true) {
+ stk--;
+ StackEntry e = stack[stk];
+
+ if (e.type == LOOK_BEHIND_NOT) {
+ break;
+ } else if (e.type == MEM_START) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd();
+ } else if (e.type == REPEAT_INC) {
+ //int si = stack[stk + IREPEAT_INC_SI];
+ //stack[si + IREPEAT_COUNT]--;
+ stack[e.getSi()].decreaseRepeatCount();
+ } else if (e.type == MEM_END) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd();
+ } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.type == STATE_CHECK_MARK) stateCheckMark();
+ }
+ }
+ }
+
+ protected final int posEnd() {
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+ if ((e.type & MASK_TO_VOID_TARGET) != 0) {
+ e.type = VOID;
+ } else if (e.type == POS) {
+ e.type = VOID;
+ break;
+ }
+ }
+ return k;
+ }
+
+ protected final void stopBtEnd() {
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if ((e.type & MASK_TO_VOID_TARGET) != 0) {
+ e.type = VOID;
+ } else if (e.type == STOP_BT) {
+ e.type = VOID;
+ break;
+ }
+ }
+ }
+
+ // int for consistency with other null check routines
+ protected final int nullCheck(int id, int s) {
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == NULL_CHECK_START) {
+ if (e.getNullCheckNum() == id) {
+ return e.getNullCheckPStr() == s ? 1 : 0;
+ }
+ }
+ }
+ }
+
+ protected final int nullCheckRec(int id, int s) {
+ int level = 0;
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == NULL_CHECK_START) {
+ if (e.getNullCheckNum() == id) {
+ if (level == 0) {
+ return e.getNullCheckPStr() == s ? 1 : 0;
+ } else {
+ level--;
+ }
+ }
+ } else if (e.type == NULL_CHECK_END) {
+ level++;
+ }
+ }
+ }
+
+ protected final int nullCheckMemSt(int id, int s) {
+ int k = stk;
+ int isNull;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == NULL_CHECK_START) {
+ if (e.getNullCheckNum() == id) {
+ if (e.getNullCheckPStr() != s) {
+ isNull = 0;
+ break;
+ } else {
+ int endp;
+ isNull = 1;
+ while (k < stk) {
+ if (e.type == MEM_START) {
+ if (e.getMemEnd() == INVALID_INDEX) {
+ isNull = 0;
+ break;
+ }
+ if (bsAt(regex.btMemEnd, e.getMemNum())) {
+ endp = stack[e.getMemEnd()].getMemPStr();
+ } else {
+ endp = e.getMemEnd();
+ }
+ if (stack[e.getMemStart()].getMemPStr() != endp) {
+ isNull = 0;
+ break;
+ } else if (endp != s) {
+ isNull = -1; /* empty, but position changed */
+ }
+ }
+ k++;
+ e = stack[k]; // !!
+ }
+ break;
+ }
+ }
+ }
+ }
+ return isNull;
+ }
+
+ protected final int nullCheckMemStRec(int id, int s) {
+ int level = 0;
+ int k = stk;
+ int isNull;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == NULL_CHECK_START) {
+ if (e.getNullCheckNum() == id) {
+ if (level == 0) {
+ if (e.getNullCheckPStr() != s) {
+ isNull = 0;
+ break;
+ } else {
+ int endp;
+ isNull = 1;
+ while (k < stk) {
+ if (e.type == MEM_START) {
+ if (e.getMemEnd() == INVALID_INDEX) {
+ isNull = 0;
+ break;
+ }
+ if (bsAt(regex.btMemEnd, e.getMemNum())) {
+ endp = stack[e.getMemEnd()].getMemPStr();
+ } else {
+ endp = e.getMemEnd();
+ }
+ if (stack[e.getMemStart()].getMemPStr() != endp) {
+ isNull = 0;
+ break;
+ } else if (endp != s) {
+ isNull = -1;; /* empty, but position changed */
+ }
+ }
+ k++;
+ e = stack[k];
+ }
+ break;
+ }
+ } else {
+ level--;
+ }
+ }
+ } else if (e.type == NULL_CHECK_END) {
+ if (e.getNullCheckNum() == id) level++;
+ }
+ }
+ return isNull;
+ }
+
+ protected final int getRepeat(int id) {
+ int level = 0;
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == REPEAT) {
+ if (level == 0) {
+ if (e.getRepeatNum() == id) return k;
+ }
+ } else if (e.type == CALL_FRAME) {
+ level--;
+ } else if (e.type == RETURN) {
+ level++;
+ }
+ }
+ }
+
+ protected final int sreturn() {
+ int level = 0;
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == CALL_FRAME) {
+ if (level == 0) {
+ return e.getCallFrameRetAddr();
+ } else {
+ level--;
+ }
+ } else if (e.type == RETURN) {
+ level++;
+ }
+ }
+ }
+}
diff --git a/src/org/joni/Syntax.java b/src/org/joni/Syntax.java
new file mode 100644
index 0000000..b89abe9
--- /dev/null
+++ b/src/org/joni/Syntax.java
@@ -0,0 +1,606 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import static org.joni.constants.MetaChar.INEFFECTIVE_META_CHAR;
+
+import org.joni.constants.SyntaxProperties;
+
+public final class Syntax implements SyntaxProperties{
+ private final int op;
+ private final int op2;
+ private final int behavior;
+ public final int options;
+ public final MetaCharTable metaCharTable;
+
+ public Syntax(int op, int op2, int behavior, int options, MetaCharTable metaCharTable) {
+ this.op = op;
+ this.op2 = op2;
+ this.behavior = behavior;
+ this.options = options;
+ this.metaCharTable = metaCharTable;
+ }
+
+ public static class MetaCharTable {
+ public final int esc;
+ public final int anyChar;
+ public final int anyTime;
+ public final int zeroOrOneTime;
+ public final int oneOrMoreTime;
+ public final int anyCharAnyTime;
+
+ public MetaCharTable(int esc, int anyChar, int anyTime,
+ int zeroOrOneTime, int oneOrMoreTime, int anyCharAnyTime) {
+ this.esc = esc;
+ this.anyChar = anyChar;
+ this.anyTime = anyTime;
+ this.zeroOrOneTime = zeroOrOneTime;
+ this.oneOrMoreTime = oneOrMoreTime;
+ this.anyCharAnyTime = anyCharAnyTime;
+ }
+ }
+
+ /**
+ * OP
+ *
+ */
+ protected boolean isOp(int opm) {
+ return (op & opm) != 0;
+ }
+
+ public boolean opVariableMetaCharacters() {
+ return isOp(OP_VARIABLE_META_CHARACTERS);
+ }
+
+ public boolean opDotAnyChar() {
+ return isOp(OP_DOT_ANYCHAR);
+ }
+
+ public boolean opAsteriskZeroInf() {
+ return isOp(OP_ASTERISK_ZERO_INF);
+ }
+
+ public boolean opEscAsteriskZeroInf() {
+ return isOp(OP_ESC_ASTERISK_ZERO_INF);
+ }
+
+ public boolean opPlusOneInf() {
+ return isOp(OP_PLUS_ONE_INF);
+ }
+
+ public boolean opEscPlusOneInf() {
+ return isOp(OP_ESC_PLUS_ONE_INF);
+ }
+
+ public boolean opQMarkZeroOne() {
+ return isOp(OP_QMARK_ZERO_ONE);
+ }
+
+ public boolean opEscQMarkZeroOne() {
+ return isOp(OP_ESC_QMARK_ZERO_ONE);
+ }
+
+ public boolean opBraceInterval() {
+ return isOp(OP_BRACE_INTERVAL);
+ }
+
+ public boolean opEscBraceInterval() {
+ return isOp(OP_ESC_BRACE_INTERVAL);
+ }
+
+ public boolean opVBarAlt() {
+ return isOp(OP_VBAR_ALT);
+ }
+
+ public boolean opEscVBarAlt() {
+ return isOp(OP_ESC_VBAR_ALT);
+ }
+
+ public boolean opLParenSubexp() {
+ return isOp(OP_LPAREN_SUBEXP);
+ }
+
+ public boolean opEscLParenSubexp() {
+ return isOp(OP_ESC_LPAREN_SUBEXP);
+ }
+
+ public boolean opEscAZBufAnchor() {
+ return isOp(OP_ESC_AZ_BUF_ANCHOR);
+ }
+
+ public boolean opEscCapitalGBeginAnchor() {
+ return isOp(OP_ESC_CAPITAL_G_BEGIN_ANCHOR);
+ }
+
+ public boolean opDecimalBackref() {
+ return isOp(OP_DECIMAL_BACKREF);
+ }
+
+ public boolean opBracketCC() {
+ return isOp(OP_BRACKET_CC);
+ }
+
+ public boolean opEscWWord() {
+ return isOp(OP_ESC_W_WORD);
+ }
+
+ public boolean opEscLtGtWordBeginEnd() {
+ return isOp(OP_ESC_LTGT_WORD_BEGIN_END);
+ }
+
+ public boolean opEscBWordBound() {
+ return isOp(OP_ESC_B_WORD_BOUND);
+ }
+
+ public boolean opEscSWhiteSpace() {
+ return isOp(OP_ESC_S_WHITE_SPACE);
+ }
+
+ public boolean opEscDDigit() {
+ return isOp(OP_ESC_D_DIGIT);
+ }
+
+ public boolean opLineAnchor() {
+ return isOp(OP_LINE_ANCHOR);
+ }
+
+ public boolean opPosixBracket() {
+ return isOp(OP_POSIX_BRACKET);
+ }
+
+ public boolean opQMarkNonGreedy() {
+ return isOp(OP_QMARK_NON_GREEDY);
+ }
+
+ public boolean opEscControlChars() {
+ return isOp(OP_ESC_CONTROL_CHARS);
+ }
+
+ public boolean opEscCControl() {
+ return isOp(OP_ESC_C_CONTROL);
+ }
+
+ public boolean opEscOctal3() {
+ return isOp(OP_ESC_OCTAL3);
+ }
+
+ public boolean opEscXHex2() {
+ return isOp(OP_ESC_X_HEX2);
+ }
+
+ public boolean opEscXBraceHex8() {
+ return isOp(OP_ESC_X_BRACE_HEX8);
+ }
+
+
+ /**
+ * OP
+ *
+ */
+ protected boolean isOp2(int opm) {
+ return (op2 & opm) != 0;
+ }
+
+ public boolean op2EscCapitalQQuote() {
+ return isOp2(OP2_ESC_CAPITAL_Q_QUOTE);
+ }
+
+ public boolean op2QMarkGroupEffect() {
+ return isOp2(OP2_QMARK_GROUP_EFFECT);
+ }
+
+ public boolean op2OptionPerl() {
+ return isOp2(OP2_OPTION_PERL);
+ }
+
+ public boolean op2OptionRuby() {
+ return isOp2(OP2_OPTION_RUBY);
+ }
+
+ public boolean op2PlusPossessiveRepeat() {
+ return isOp2(OP2_PLUS_POSSESSIVE_REPEAT);
+ }
+
+ public boolean op2PlusPossessiveInterval() {
+ return isOp2(OP2_PLUS_POSSESSIVE_INTERVAL);
+ }
+
+ public boolean op2CClassSetOp() {
+ return isOp2(OP2_CCLASS_SET_OP);
+ }
+
+ public boolean op2QMarkLtNamedGroup() {
+ return isOp2(OP2_QMARK_LT_NAMED_GROUP);
+ }
+
+ public boolean op2EscKNamedBackref() {
+ return isOp2(OP2_ESC_K_NAMED_BACKREF);
+ }
+
+ public boolean op2EscGSubexpCall() {
+ return isOp2(OP2_ESC_G_SUBEXP_CALL);
+ }
+
+ public boolean op2AtMarkCaptureHistory() {
+ return isOp2(OP2_ATMARK_CAPTURE_HISTORY);
+ }
+
+ public boolean op2EscCapitalCBarControl() {
+ return isOp2(OP2_ESC_CAPITAL_C_BAR_CONTROL);
+ }
+
+ public boolean op2EscCapitalMBarMeta() {
+ return isOp2(OP2_ESC_CAPITAL_M_BAR_META);
+ }
+
+ public boolean op2EscVVtab() {
+ return isOp2(OP2_ESC_V_VTAB);
+ }
+
+ public boolean op2EscUHex4() {
+ return isOp2(OP2_ESC_U_HEX4);
+ }
+
+ public boolean op2EscGnuBufAnchor() {
+ return isOp2(OP2_ESC_GNU_BUF_ANCHOR);
+ }
+
+ public boolean op2EscPBraceCharProperty() {
+ return isOp2(OP2_ESC_P_BRACE_CHAR_PROPERTY);
+ }
+
+ public boolean op2EscPBraceCircumflexNot() {
+ return isOp2(OP2_ESC_P_BRACE_CIRCUMFLEX_NOT);
+ }
+
+ public boolean op2EscHXDigit() {
+ return isOp2(OP2_ESC_H_XDIGIT);
+ }
+
+ public boolean op2IneffectiveEscape() {
+ return isOp2(OP2_INEFFECTIVE_ESCAPE);
+ }
+
+ /**
+ * BEHAVIOR
+ *
+ */
+ protected boolean isBehavior(int bvm) {
+ return (behavior & bvm) != 0;
+ }
+
+ public boolean contextIndepRepeatOps() {
+ return isBehavior(CONTEXT_INDEP_REPEAT_OPS);
+ }
+
+ public boolean contextInvalidRepeatOps() {
+ return isBehavior(CONTEXT_INVALID_REPEAT_OPS);
+ }
+
+ public boolean allowUnmatchedCloseSubexp() {
+ return isBehavior(ALLOW_UNMATCHED_CLOSE_SUBEXP);
+ }
+
+ public boolean allowInvalidInterval() {
+ return isBehavior(ALLOW_INVALID_INTERVAL);
+ }
+
+ public boolean allowIntervalLowAbbrev() {
+ return isBehavior(ALLOW_INTERVAL_LOW_ABBREV);
+ }
+
+ public boolean strictCheckBackref() {
+ return isBehavior(STRICT_CHECK_BACKREF);
+ }
+
+ public boolean differentLengthAltLookBehind() {
+ return isBehavior(DIFFERENT_LEN_ALT_LOOK_BEHIND);
+ }
+
+ public boolean captureOnlyNamedGroup() {
+ return isBehavior(CAPTURE_ONLY_NAMED_GROUP);
+ }
+
+ public boolean allowMultiplexDefinitionName() {
+ return isBehavior(ALLOW_MULTIPLEX_DEFINITION_NAME);
+ }
+
+ public boolean fixedIntervalIsGreedyOnly() {
+ return isBehavior(FIXED_INTERVAL_IS_GREEDY_ONLY);
+ }
+
+
+ public boolean notNewlineInNegativeCC() {
+ return isBehavior(NOT_NEWLINE_IN_NEGATIVE_CC);
+ }
+
+ public boolean backSlashEscapeInCC() {
+ return isBehavior(BACKSLASH_ESCAPE_IN_CC);
+ }
+
+ public boolean allowEmptyRangeInCC() {
+ return isBehavior(ALLOW_EMPTY_RANGE_IN_CC);
+ }
+
+ public boolean allowDoubleRangeOpInCC() {
+ return isBehavior(ALLOW_DOUBLE_RANGE_OP_IN_CC);
+ }
+
+ public boolean warnCCOpNotEscaped() {
+ return isBehavior(WARN_CC_OP_NOT_ESCAPED);
+ }
+
+ public boolean warnReduntantNestedRepeat() {
+ return isBehavior(WARN_REDUNDANT_NESTED_REPEAT);
+ }
+
+ public static final Syntax RUBY = new Syntax(
+ (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY |
+ OP_ESC_OCTAL3 | OP_ESC_X_HEX2 |
+ OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS |
+ OP_ESC_C_CONTROL )
+ & ~OP_ESC_LTGT_WORD_BEGIN_END ),
+
+ ( OP2_QMARK_GROUP_EFFECT |
+ OP2_OPTION_RUBY |
+ OP2_QMARK_LT_NAMED_GROUP | OP2_ESC_K_NAMED_BACKREF |
+ OP2_ESC_G_SUBEXP_CALL |
+ OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
+ OP2_PLUS_POSSESSIVE_REPEAT |
+ OP2_CCLASS_SET_OP | OP2_ESC_CAPITAL_C_BAR_CONTROL |
+ OP2_ESC_CAPITAL_M_BAR_META | OP2_ESC_V_VTAB |
+ OP2_ESC_H_XDIGIT ),
+
+ ( GNU_REGEX_BV |
+ ALLOW_INTERVAL_LOW_ABBREV |
+ DIFFERENT_LEN_ALT_LOOK_BEHIND |
+ CAPTURE_ONLY_NAMED_GROUP |
+ ALLOW_MULTIPLEX_DEFINITION_NAME |
+ FIXED_INTERVAL_IS_GREEDY_ONLY |
+ WARN_CC_OP_NOT_ESCAPED |
+ WARN_REDUNDANT_NESTED_REPEAT ),
+
+ Option.NONE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax DEFAULT = RUBY;
+
+ public static final Syntax ASIS = new Syntax(
+ 0,
+
+ OP2_INEFFECTIVE_ESCAPE,
+
+ 0,
+
+ Option.NONE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax PosixBasic = new Syntax(
+ (POSIX_COMMON_OP | OP_ESC_LPAREN_SUBEXP |
+ OP_ESC_BRACE_INTERVAL ),
+
+ 0,
+
+ 0,
+
+ ( Option.SINGLELINE | Option.MULTILINE ),
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax PosixExtended = new Syntax(
+ ( POSIX_COMMON_OP | OP_LPAREN_SUBEXP |
+ OP_BRACE_INTERVAL |
+ OP_PLUS_ONE_INF | OP_QMARK_ZERO_ONE |OP_VBAR_ALT ),
+
+ 0,
+
+ ( CONTEXT_INDEP_ANCHORS |
+ CONTEXT_INDEP_REPEAT_OPS | CONTEXT_INVALID_REPEAT_OPS |
+ ALLOW_UNMATCHED_CLOSE_SUBEXP |
+ ALLOW_DOUBLE_RANGE_OP_IN_CC ),
+
+ ( Option.SINGLELINE | Option.MULTILINE ),
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax Emacs = new Syntax(
+ ( OP_DOT_ANYCHAR | OP_BRACKET_CC |
+ OP_ESC_BRACE_INTERVAL |
+ OP_ESC_LPAREN_SUBEXP | OP_ESC_VBAR_ALT |
+ OP_ASTERISK_ZERO_INF | OP_PLUS_ONE_INF |
+ OP_QMARK_ZERO_ONE | OP_DECIMAL_BACKREF |
+ OP_LINE_ANCHOR | OP_ESC_CONTROL_CHARS ),
+
+ OP2_ESC_GNU_BUF_ANCHOR,
+
+ ALLOW_EMPTY_RANGE_IN_CC,
+
+ Option.NONE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax Grep = new Syntax(
+ ( OP_DOT_ANYCHAR | OP_BRACKET_CC | OP_POSIX_BRACKET |
+ OP_ESC_BRACE_INTERVAL | OP_ESC_LPAREN_SUBEXP |
+ OP_ESC_VBAR_ALT |
+ OP_ASTERISK_ZERO_INF | OP_ESC_PLUS_ONE_INF |
+ OP_ESC_QMARK_ZERO_ONE | OP_LINE_ANCHOR |
+ OP_ESC_W_WORD | OP_ESC_B_WORD_BOUND |
+ OP_ESC_LTGT_WORD_BEGIN_END | OP_DECIMAL_BACKREF ),
+
+ 0,
+
+ ( ALLOW_EMPTY_RANGE_IN_CC | NOT_NEWLINE_IN_NEGATIVE_CC ),
+
+ Option.NONE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax GnuRegex = new Syntax(
+ GNU_REGEX_OP,
+ 0,
+ GNU_REGEX_BV,
+
+ Option.NONE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax Java = new Syntax(
+ (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY |
+ OP_ESC_CONTROL_CHARS | OP_ESC_C_CONTROL |
+ OP_ESC_OCTAL3 | OP_ESC_X_HEX2 )
+ & ~OP_ESC_LTGT_WORD_BEGIN_END ),
+
+ ( OP2_ESC_CAPITAL_Q_QUOTE | OP2_QMARK_GROUP_EFFECT |
+ OP2_OPTION_PERL | OP2_PLUS_POSSESSIVE_REPEAT |
+ OP2_PLUS_POSSESSIVE_INTERVAL | OP2_CCLASS_SET_OP |
+ OP2_ESC_V_VTAB | OP2_ESC_U_HEX4 |
+ OP2_ESC_P_BRACE_CHAR_PROPERTY ),
+
+ ( GNU_REGEX_BV | DIFFERENT_LEN_ALT_LOOK_BEHIND ),
+
+ Option.SINGLELINE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax Perl = new Syntax(
+ (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY |
+ OP_ESC_OCTAL3 | OP_ESC_X_HEX2 |
+ OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS |
+ OP_ESC_C_CONTROL )
+ & ~OP_ESC_LTGT_WORD_BEGIN_END ),
+
+ ( OP2_ESC_CAPITAL_Q_QUOTE |
+ OP2_QMARK_GROUP_EFFECT | OP2_OPTION_PERL |
+ OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ OP2_ESC_P_BRACE_CIRCUMFLEX_NOT ),
+
+ GNU_REGEX_BV,
+
+ Option.SINGLELINE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax PerlNG = new Syntax(
+ (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY |
+ OP_ESC_OCTAL3 | OP_ESC_X_HEX2 |
+ OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS |
+ OP_ESC_C_CONTROL )
+ & ~OP_ESC_LTGT_WORD_BEGIN_END ),
+
+ ( OP2_ESC_CAPITAL_Q_QUOTE |
+ OP2_QMARK_GROUP_EFFECT | OP2_OPTION_PERL |
+ OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
+ OP2_QMARK_LT_NAMED_GROUP |
+ OP2_ESC_K_NAMED_BACKREF |
+ OP2_ESC_G_SUBEXP_CALL ),
+
+ ( GNU_REGEX_BV |
+ CAPTURE_ONLY_NAMED_GROUP |
+ ALLOW_MULTIPLEX_DEFINITION_NAME ),
+
+ Option.SINGLELINE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+}
diff --git a/src/org/joni/Token.java b/src/org/joni/Token.java
new file mode 100644
index 0000000..16e2b1a
--- /dev/null
+++ b/src/org/joni/Token.java
@@ -0,0 +1,172 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.constants.TokenType;
+
+final class Token {
+ TokenType type;
+ boolean escaped;
+ int base; /* is number: 8, 16 (used in [....]) */
+ int backP;
+
+ // union fields
+ private int INT1, INT2, INT3, INT4, INT5;
+ private int []INTA1;
+
+ // union accessors
+ int getC() {
+ return INT1;
+ }
+ void setC(int c) {
+ INT1 = c;
+ }
+
+ int getCode() {
+ return INT1;
+ }
+ void setCode(int code) {
+ INT1 = code;
+ }
+
+ int getAnchor() {
+ return INT1;
+ }
+ void setAnchor(int anchor) {
+ INT1 = anchor;
+ }
+
+ int getSubtype() {
+ return INT1;
+ }
+ void setSubtype(int subtype) {
+ INT1 = subtype;
+ }
+
+ // repeat union member
+ int getRepeatLower() {
+ return INT1;
+ }
+ void setRepeatLower(int lower) {
+ INT1 = lower;
+ }
+
+ int getRepeatUpper() {
+ return INT2;
+ }
+ void setRepeatUpper(int upper) {
+ INT2 = upper;
+ }
+
+ boolean getRepeatGreedy() {
+ return INT3 != 0;
+ }
+ void setRepeatGreedy(boolean greedy) {
+ INT3 = greedy ? 1 : 0;
+ }
+
+ boolean getRepeatPossessive() {
+ return INT4 != 0;
+ }
+ void setRepeatPossessive(boolean possessive) {
+ INT4 = possessive ? 1 : 0;
+ }
+
+ // backref union member
+ int getBackrefNum() {
+ return INT1;
+ }
+ void setBackrefNum(int num) {
+ INT1 = num;
+ }
+
+ int getBackrefRef1() {
+ return INT2;
+ }
+ void setBackrefRef1(int ref1) {
+ INT2 = ref1;
+ }
+
+ int[]getBackrefRefs() {
+ return INTA1;
+ }
+ void setBackrefRefs(int[]refs) {
+ INTA1 = refs;
+ }
+
+ boolean getBackrefByName() {
+ return INT3 != 0;
+ }
+ void setBackrefByName(boolean byName) {
+ INT3 = byName ? 1 : 0;
+ }
+
+ // USE_BACKREF_AT_LEVEL
+ boolean getBackrefExistLevel() {
+ return INT4 != 0;
+ }
+ void setBackrefExistLevel(boolean existLevel) {
+ INT4 = existLevel ? 1 : 0;
+ }
+
+ int getBackrefLevel() {
+ return INT5;
+ }
+ void setBackrefLevel(int level) {
+ INT5 = level;
+ }
+
+ // call union member
+ int getCallNameP() {
+ return INT1;
+ }
+ void setCallNameP(int nameP) {
+ INT1 = nameP;
+ }
+
+ int getCallNameEnd() {
+ return INT2;
+ }
+ void setCallNameEnd(int nameEnd) {
+ INT2 = nameEnd;
+ }
+
+ int getCallGNum() {
+ return INT3;
+ }
+ void setCallGNum(int gnum) {
+ INT3 = gnum;
+ }
+
+ // prop union member
+ int getPropCType() {
+ return INT1;
+ }
+ void setPropCType(int ctype) {
+ INT1 = ctype;
+ }
+
+ boolean getPropNot() {
+ return INT2 != 0;
+ }
+ void setPropNot(boolean not) {
+ INT2 = not ? 1 : 0;
+ }
+}
diff --git a/src/org/joni/UnsetAddrList.java b/src/org/joni/UnsetAddrList.java
new file mode 100644
index 0000000..d0648df
--- /dev/null
+++ b/src/org/joni/UnsetAddrList.java
@@ -0,0 +1,69 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+import org.joni.ast.EncloseNode;
+import org.joni.ast.Node;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.InternalException;
+
+public final class UnsetAddrList {
+ int num;
+ Node[]targets;
+ int[]offsets;
+
+ public UnsetAddrList(int size) {
+ targets = new Node[size];
+ offsets = new int[size];
+ }
+
+ public void add(int offset, Node node) {
+ if (num >= offsets.length) {
+ Node []ttmp = new Node[targets.length << 1];
+ System.arraycopy(targets, 0, ttmp, 0, num);
+ targets = ttmp;
+ int[]otmp = new int[offsets.length << 1];
+ System.arraycopy(offsets, 0, otmp, 0, num);
+ offsets = otmp;
+ }
+ targets[num] = node;
+ offsets[num] = offset;
+
+ num++;
+ }
+
+ public void fix(Regex regex) {
+ for (int i=0; i<num; i++) {
+ EncloseNode en = (EncloseNode)targets[i];
+ if (!en.isAddrFixed()) new InternalException(ErrorMessages.ERR_PARSER_BUG);
+ regex.setInt(en.callAddr, offsets[i]);
+ }
+ }
+
+ public String toString() {
+ StringBuilder value = new StringBuilder();
+ if (num > 0) {
+ for (int i=0; i<num; i++) {
+ value.append("offset + " + offsets[i] + " target: " + targets[i].getAddressName());
+ }
+ }
+ return value.toString();
+ }
+}
diff --git a/src/org/joni/WarnCallback.java b/src/org/joni/WarnCallback.java
new file mode 100644
index 0000000..351146b
--- /dev/null
+++ b/src/org/joni/WarnCallback.java
@@ -0,0 +1,32 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni;
+
+/**
+ * @author <a href="mailto:ola.bini at gmail.com">Ola Bini</a>
+ */
+public interface WarnCallback {
+ WarnCallback DEFAULT = new WarnCallback(){
+ public void warn(String message) {
+ System.err.println(message);
+ }
+ };
+ void warn(String message);
+}// WarnCallback
diff --git a/src/org/joni/ast/AnchorNode.java b/src/org/joni/ast/AnchorNode.java
new file mode 100644
index 0000000..cccbc49
--- /dev/null
+++ b/src/org/joni/ast/AnchorNode.java
@@ -0,0 +1,92 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import org.joni.constants.AnchorType;
+
+public final class AnchorNode extends Node implements AnchorType {
+ public int type;
+ public Node target;
+ public int charLength;
+
+ public AnchorNode(int type) {
+ this.type = type;
+ charLength = -1;
+ }
+
+ @Override
+ public int getType() {
+ return ANCHOR;
+ }
+
+ @Override
+ protected void setChild(Node newChild) {
+ target = newChild;
+ }
+
+ @Override
+ protected Node getChild() {
+ return target;
+ }
+
+ public void setTarget(Node tgt) {
+ target = tgt;
+ tgt.parent = this;
+ }
+
+ @Override
+ public String getName() {
+ return "Anchor";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder();
+ value.append("\n type: " + typeToString());
+ value.append("\n target: " + pad(target, level + 1));
+ return value.toString();
+ }
+
+ public String typeToString() {
+ StringBuilder type = new StringBuilder();
+ if (isType(BEGIN_BUF)) type.append("BEGIN_BUF ");
+ if (isType(BEGIN_LINE)) type.append("BEGIN_LINE ");
+ if (isType(BEGIN_POSITION)) type.append("BEGIN_POSITION ");
+ if (isType(END_BUF)) type.append("END_BUF ");
+ if (isType(SEMI_END_BUF)) type.append("SEMI_END_BUF ");
+ if (isType(END_LINE)) type.append("END_LINE ");
+ if (isType(WORD_BOUND)) type.append("WORD_BOUND ");
+ if (isType(NOT_WORD_BOUND)) type.append("NOT_WORD_BOUND ");
+ if (isType(WORD_BEGIN)) type.append("WORD_BEGIN ");
+ if (isType(WORD_END)) type.append("WORD_END ");
+ if (isType(PREC_READ)) type.append("PREC_READ ");
+ if (isType(PREC_READ_NOT)) type.append("PREC_READ_NOT ");
+ if (isType(LOOK_BEHIND)) type.append("LOOK_BEHIND ");
+ if (isType(LOOK_BEHIND_NOT)) type.append("LOOK_BEHIND_NOT ");
+ if (isType(ANYCHAR_STAR)) type.append("ANYCHAR_STAR ");
+ if (isType(ANYCHAR_STAR_ML)) type.append("ANYCHAR_STAR_ML ");
+ return type.toString();
+ }
+
+ private boolean isType(int type) {
+ return (this.type & type) != 0;
+ }
+
+}
diff --git a/src/org/joni/ast/AnyCharNode.java b/src/org/joni/ast/AnyCharNode.java
new file mode 100644
index 0000000..d349d8c
--- /dev/null
+++ b/src/org/joni/ast/AnyCharNode.java
@@ -0,0 +1,40 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+public final class AnyCharNode extends Node {
+ public AnyCharNode(){}
+
+ @Override
+ public int getType() {
+ return CANY;
+ }
+
+ @Override
+ public String getName() {
+ return "Any Char";
+ }
+
+ @Override
+ public String toString(int level) {
+ String value = "";
+ return value;
+ }
+}
diff --git a/src/org/joni/ast/BackRefNode.java b/src/org/joni/ast/BackRefNode.java
new file mode 100644
index 0000000..040fb81
--- /dev/null
+++ b/src/org/joni/ast/BackRefNode.java
@@ -0,0 +1,98 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import org.joni.ScanEnvironment;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.ValueException;
+
+public final class BackRefNode extends StateNode {
+ //private static int NODE_BACKREFS_SIZE = 6;
+
+ //int state;
+ public int backNum;
+ public int back[];
+
+ public int nestLevel;
+
+ public BackRefNode(int backNum, int[]backRefs, boolean byName, ScanEnvironment env) {
+ this.backNum = backNum;
+ if (byName) setNameRef();
+
+ for (int i=0; i<backNum; i++) {
+ if (backRefs[i] <= env.numMem && env.memNodes[backRefs[i]] == null) {
+ setRecursion(); /* /...(\1).../ */
+ break;
+ }
+ }
+
+ back = new int[backNum];
+ System.arraycopy(backRefs, 0, back, 0, backNum); // shall we really dup it ???
+ }
+
+ // #ifdef USE_BACKREF_AT_LEVEL
+ public BackRefNode(int backNum, int[]backRefs, boolean byName, boolean existLevel, int nestLevel, ScanEnvironment env) {
+ this(backNum, backRefs, byName, env);
+
+ if (existLevel) {
+ //state |= NST_NEST_LEVEL;
+ setNestLevel();
+ this.nestLevel = nestLevel;
+ }
+ }
+
+ @Override
+ public int getType() {
+ return BREF;
+ }
+
+ @Override
+ public String getName() {
+ return "Back Ref";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder(super.toString(level));
+ value.append("\n backNum: " + backNum);
+ String backs = "";
+ for (int i=0; i<back.length; i++) backs += back[i] + ", ";
+ value.append("\n back: " + backs);
+ value.append("\n nextLevel: " + nestLevel);
+ return value.toString();
+ }
+
+ public void renumber(int[]map) {
+ if (!isNameRef()) throw new ValueException(ErrorMessages.ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED);
+
+ int oldNum = backNum;
+
+ int pos = 0;
+ for (int i=0; i<oldNum; i++) {
+ int n = map[back[i]];
+ if (n > 0) {
+ back[pos] = n;
+ pos++;
+ }
+ }
+ backNum = pos;
+ }
+
+}
diff --git a/src/org/joni/ast/CClassNode.java b/src/org/joni/ast/CClassNode.java
new file mode 100644
index 0000000..4ae2264
--- /dev/null
+++ b/src/org/joni/ast/CClassNode.java
@@ -0,0 +1,529 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import org.joni.BitSet;
+import org.joni.CodeRangeBuffer;
+import org.joni.Config;
+import org.joni.IntHolder;
+import org.joni.ScanEnvironment;
+import org.joni.constants.CCSTATE;
+import org.joni.constants.CCVALTYPE;
+import org.joni.constants.CharacterType;
+import org.joni.encoding.Encoding;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.InternalException;
+import org.joni.exception.SyntaxException;
+import org.joni.exception.ValueException;
+
+public final class CClassNode extends Node {
+ private static final int FLAG_NCCLASS_NOT = 1<<0;
+ private static final int FLAG_NCCLASS_SHARE = 1<<1;
+
+ int flags;
+ public final BitSet bs = new BitSet(); // conditional creation ?
+ public CodeRangeBuffer mbuf; /* multi-byte info or NULL */
+
+ private int ctype; // for hashing purposes
+ private Encoding enc; // ...
+
+
+ // node_new_cclass
+ public CClassNode() {}
+
+ public CClassNode(int ctype, Encoding enc, boolean not, int sbOut, int[]ranges) {
+ this(not, sbOut, ranges);
+ this.ctype = ctype;
+ this.enc = enc;
+ }
+
+ // node_new_cclass_by_codepoint_range, only used by shared Char Classes
+ public CClassNode(boolean not, int sbOut, int[]ranges) {
+ if (not) setNot();
+ // bs.clear();
+
+ if (sbOut > 0 && ranges != null) {
+ int n = ranges[0];
+ for (int i=0; i<n; i++) {
+ int from = ranges[i * 2 + 1];
+ int to = ranges[i * 2 + 2];
+ for (int j=from; j<=to; j++) {
+ if (j >= sbOut) {
+ setupBuffer(ranges);
+ return;
+ }
+ bs.set(j);
+ }
+ }
+ }
+ setupBuffer(ranges);
+ }
+
+ @Override
+ public int getType() {
+ return CCLASS;
+ }
+
+ @Override
+ public String getName() {
+ return "Character Class";
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof CClassNode)) return false;
+ CClassNode cc = (CClassNode)other;
+ return ctype == cc.ctype && isNot() == cc.isNot() && enc == cc.enc;
+ }
+
+ @Override
+ public int hashCode() {
+ if (Config.USE_SHARED_CCLASS_TABLE) {
+ int hash = 0;
+ hash += ctype;
+ hash += enc.hashCode();
+ if (isNot()) hash++;
+ return hash + (hash >> 5);
+ } else {
+ return super.hashCode();
+ }
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder();
+ value.append("\n flags: " + flagsToString());
+ value.append("\n bs: " + pad(bs, level + 1));
+ value.append("\n mbuf: " + pad(mbuf, level + 1));
+
+ return value.toString();
+ }
+
+ public String flagsToString() {
+ StringBuilder flags = new StringBuilder();
+ if (isNot()) flags.append("NOT ");
+ if (isShare()) flags.append("SHARE ");
+ return flags.toString();
+ }
+
+ private void setupBuffer(int[]ranges) {
+ if (ranges != null) {
+ if (ranges[0] == 0) return;
+ mbuf = new CodeRangeBuffer(ranges);
+ }
+ }
+
+ public boolean isEmpty() {
+ return mbuf == null && bs.isEmpty();
+ }
+
+ public void addCodeRangeToBuf(int from, int to) {
+ mbuf = CodeRangeBuffer.addCodeRangeToBuff(mbuf, from, to);
+ }
+
+ public void addCodeRange(ScanEnvironment env, int from, int to) {
+ mbuf = CodeRangeBuffer.addCodeRange(mbuf, env, from, to);
+ }
+
+ public void addAllMultiByteRange(Encoding enc) {
+ mbuf = CodeRangeBuffer.addAllMultiByteRange(enc, mbuf);
+ }
+
+ public void clearNotFlag(Encoding enc) {
+ if (isNot()) {
+ bs.invert();
+
+ if (!enc.isSingleByte()) {
+ mbuf = CodeRangeBuffer.notCodeRangeBuff(enc, mbuf);
+ }
+ clearNot();
+ }
+ }
+
+ // and_cclass
+ public void and(CClassNode other, Encoding enc) {
+ boolean not1 = isNot();
+ BitSet bsr1 = bs;
+ CodeRangeBuffer buf1 = mbuf;
+ boolean not2 = other.isNot();
+ BitSet bsr2 = other.bs;
+ CodeRangeBuffer buf2 = other.mbuf;
+
+ if (not1) {
+ BitSet bs1 = new BitSet();
+ bsr1.invertTo(bs1);
+ bsr1 = bs1;
+ }
+
+ if (not2) {
+ BitSet bs2 = new BitSet();
+ bsr2.invertTo(bs2);
+ bsr2 = bs2;
+ }
+
+ bsr1.and(bsr2);
+
+ if (bsr1 != bs) {
+ bs.copy(bsr1);
+ bsr1 = bs;
+ }
+
+ if (not1) {
+ bs.invert();
+ }
+
+ CodeRangeBuffer pbuf = null;
+
+ if (!enc.isSingleByte()) {
+ if (not1 && not2) {
+ pbuf = CodeRangeBuffer.orCodeRangeBuff(enc, buf1, false, buf2, false);
+ } else {
+ pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, not1, buf2, not2);
+
+ if (not1) {
+ pbuf = CodeRangeBuffer.notCodeRangeBuff(enc, pbuf);
+ }
+ }
+ mbuf = pbuf;
+ }
+
+ }
+
+ // or_cclass
+ public void or(CClassNode other, Encoding enc) {
+ boolean not1 = isNot();
+ BitSet bsr1 = bs;
+ CodeRangeBuffer buf1 = mbuf;
+ boolean not2 = other.isNot();
+ BitSet bsr2 = other.bs;
+ CodeRangeBuffer buf2 = other.mbuf;
+
+ if (not1) {
+ BitSet bs1 = new BitSet();
+ bsr1.invertTo(bs1);
+ bsr1 = bs1;
+ }
+
+ if (not2) {
+ BitSet bs2 = new BitSet();
+ bsr2.invertTo(bs2);
+ bsr2 = bs2;
+ }
+
+ bsr1.or(bsr2);
+
+ if (bsr1 != bs) {
+ bs.copy(bsr1);
+ bsr1 = bs;
+ }
+
+ if (not1) {
+ bs.invert();
+ }
+
+ if (!enc.isSingleByte()) {
+ CodeRangeBuffer pbuf = null;
+ if (not1 && not2) {
+ pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, false, buf2, false);
+ } else {
+ pbuf = CodeRangeBuffer.orCodeRangeBuff(enc, buf1, not1, buf2, not2);
+ if (not1) {
+ pbuf = CodeRangeBuffer.notCodeRangeBuff(enc, pbuf);
+ }
+ }
+ mbuf = pbuf;
+ }
+ }
+
+ // add_ctype_to_cc_by_range // Encoding out!
+ public void addCTypeByRange(int ctype, boolean not, Encoding enc, int sbOut, int mbr[]) {
+ int n = mbr[0];
+
+ if (!not) {
+ for (int i=0; i<n; i++) {
+ for (int j=mbr[i * 2 + 1]; j<=mbr[i * 2 + 2]; j++) {
+ if (j >= sbOut) {
+ if (j == mbr[i * 2 + 2]) {
+ i++;
+ } else if (j > mbr[i * 2 + 1]) {
+ addCodeRangeToBuf(j, mbr[i * 2 + 2]);
+ i++;
+ }
+ // !goto sb_end!, remove duplication!
+ for (; i<n; i++) {
+ addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
+ }
+ return;
+ }
+ bs.set(j);
+ }
+ }
+ // !sb_end:!
+ for (int i=0; i<n; i++) {
+ addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
+ }
+
+ } else {
+ int prev = 0;
+
+ for (int i=0; i<n; i++) {
+ for (int j=prev; j < mbr[2 * i + 1]; j++) {
+ if (j >= sbOut) {
+ // !goto sb_end2!, remove duplication
+ prev = sbOut;
+ for (i=0; i<n; i++) {
+ if (prev < mbr[2 * i + 1]) addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
+ prev = mbr[i * 2 + 2] + 1;
+ }
+ if (prev < 0x7fffffff/*!!!*/) addCodeRangeToBuf(prev, 0x7fffffff);
+ return;
+ }
+ bs.set(j);
+ }
+ prev = mbr[2 * i + 2] + 1;
+ }
+
+ for (int j=prev; j<sbOut; j++) {
+ bs.set(j);
+ }
+
+ // !sb_end2:!
+ prev = sbOut;
+ for (int i=0; i<n; i++) {
+ if (prev < mbr[2 * i + 1]) addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
+ prev = mbr[i * 2 + 2] + 1;
+ }
+ if (prev < 0x7fffffff/*!!!*/) addCodeRangeToBuf(prev, 0x7fffffff);
+ }
+ }
+
+ public void addCType(int ctype, boolean not, ScanEnvironment env, IntHolder sbOut) {
+ Encoding enc = env.enc;
+
+ int[]ranges = enc.ctypeCodeRange(ctype, sbOut);
+
+ if (ranges != null) {
+ addCTypeByRange(ctype, not, enc, sbOut.value, ranges);
+ return;
+ }
+
+ switch(ctype) {
+ case CharacterType.ALPHA:
+ case CharacterType.BLANK:
+ case CharacterType.CNTRL:
+ case CharacterType.DIGIT:
+ case CharacterType.LOWER:
+ case CharacterType.PUNCT:
+ case CharacterType.SPACE:
+ case CharacterType.UPPER:
+ case CharacterType.XDIGIT:
+ case CharacterType.ASCII:
+ case CharacterType.ALNUM:
+ if (not) {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ if (!enc.isCodeCType(c, ctype)) bs.set(c);
+ }
+ addAllMultiByteRange(enc);
+ } else {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ if (enc.isCodeCType(c, ctype)) bs.set(c);
+ }
+ }
+ break;
+
+ case CharacterType.GRAPH:
+ case CharacterType.PRINT:
+ if (not) {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ if (!enc.isCodeCType(c, ctype)) bs.set(c);
+ }
+ } else {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ if (enc.isCodeCType(c, ctype)) bs.set(c);
+ }
+ addAllMultiByteRange(enc);
+ }
+ break;
+
+ case CharacterType.WORD:
+ if (!not) {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ if (enc.isSbWord(c)) bs.set(c);
+ }
+
+ addAllMultiByteRange(enc);
+ } else {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ try {
+ if (enc.codeToMbcLength(c) > 0 && /* check invalid code point */
+ !enc.isWord(c)) bs.set(c);
+ } catch (ValueException ve) {};
+ }
+ }
+ break;
+
+ default:
+ throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
+ } // switch
+ }
+
+ public static final class CCStateArg {
+ public int v;
+ public int vs;
+ public boolean vsIsRaw;
+ public boolean vIsRaw;
+ public CCVALTYPE inType;
+ public CCVALTYPE type;
+ public CCSTATE state;
+ }
+
+ public void nextStateClass(CCStateArg arg, ScanEnvironment env) {
+ if (arg.state == CCSTATE.RANGE) throw new SyntaxException(ErrorMessages.ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE);
+
+ if (arg.state == CCSTATE.VALUE && arg.type != CCVALTYPE.CLASS) {
+ if (arg.type == CCVALTYPE.SB) {
+ bs.set(arg.vs);
+ } else if (arg.type == CCVALTYPE.CODE_POINT) {
+ addCodeRange(env, arg.vs, arg.vs);
+ }
+ }
+ arg.state = CCSTATE.VALUE;
+ arg.type = CCVALTYPE.CLASS;
+ }
+
+ public void nextStateValue(CCStateArg arg, ScanEnvironment env) {
+
+ switch(arg.state) {
+ case VALUE:
+ if (arg.type == CCVALTYPE.SB) {
+ if (arg.vs > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+ bs.set(arg.vs);
+ } else if (arg.type == CCVALTYPE.CODE_POINT) {
+ addCodeRange(env, arg.vs, arg.vs);
+ }
+ break;
+
+ case RANGE:
+ if (arg.inType == arg.type) {
+ if (arg.inType == CCVALTYPE.SB) {
+ if (arg.vs > 0xff || arg.v > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+
+ if (arg.vs > arg.v) {
+ if (env.syntax.allowEmptyRangeInCC()) {
+ // goto ccs_range_end
+ arg.state = CCSTATE.COMPLETE;
+ break;
+ } else {
+ throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
+ }
+ }
+ bs.setRange(arg.vs, arg.v);
+ } else {
+ addCodeRange(env, arg.vs, arg.v);
+ }
+ } else {
+ if (arg.vs > arg.v) {
+ if (env.syntax.allowEmptyRangeInCC()) {
+ // goto ccs_range_end
+ arg.state = CCSTATE.COMPLETE;
+ break;
+ } else {
+ throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
+ }
+ }
+ bs.setRange(arg.vs, arg.v < 0xff ? arg.v : 0xff);
+ addCodeRange(env, arg.vs, arg.v);
+ }
+ // ccs_range_end:
+ arg.state = CCSTATE.COMPLETE;
+ break;
+
+ case COMPLETE:
+ case START:
+ arg.state = CCSTATE.VALUE;
+ break;
+
+ default:
+ break;
+
+ } // switch
+
+ arg.vsIsRaw = arg.vIsRaw;
+ arg.vs = arg.v;
+ arg.type = arg.inType;
+ }
+
+ // onig_is_code_in_cc_len
+ public boolean isCodeInCCLength(int encLength, int code) {
+ boolean found;
+
+ if (encLength > 1 || code >= BitSet.SINGLE_BYTE_SIZE) {
+ if (mbuf == null) {
+ found = false;
+ } else {
+ found = CodeRangeBuffer.isInCodeRange(mbuf.getCodeRange(), code);
+ }
+ } else {
+ found = bs.at(code);
+ }
+
+ if (isNot()) {
+ return !found;
+ } else {
+ return found;
+ }
+ }
+
+ // onig_is_code_in_cc
+ public boolean isCodeInCC(Encoding enc, int code) {
+ int len;
+ if (enc.minLength() > 1) {
+ len = 2;
+ } else {
+ len = enc.codeToMbcLength(code);
+ }
+ return isCodeInCCLength(len, code);
+ }
+
+ public void setNot() {
+ flags |= FLAG_NCCLASS_NOT;
+ }
+
+ public void clearNot() {
+ flags &= ~FLAG_NCCLASS_NOT;
+ }
+
+ public boolean isNot() {
+ return (flags & FLAG_NCCLASS_NOT) != 0;
+ }
+
+ public void setShare() {
+ flags |= FLAG_NCCLASS_SHARE;
+ }
+
+ public void clearShare() {
+ flags &= ~FLAG_NCCLASS_SHARE;
+ }
+
+ public boolean isShare() {
+ return (flags & FLAG_NCCLASS_SHARE) != 0;
+ }
+
+}
diff --git a/src/org/joni/ast/CTypeNode.java b/src/org/joni/ast/CTypeNode.java
new file mode 100644
index 0000000..093216a
--- /dev/null
+++ b/src/org/joni/ast/CTypeNode.java
@@ -0,0 +1,50 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+public final class CTypeNode extends Node {
+ public int ctype;
+ public boolean not;
+
+ public CTypeNode(int type, boolean not) {
+ this.ctype= type;
+ this.not = not;
+ }
+
+ @Override
+ public int getType() {
+ return CTYPE;
+ }
+
+ @Override
+ public String getName() {
+ return "Character Type";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder();
+ value.append("\n ctype: " + ctype);
+ value.append("\n not: " + not);
+
+ return value.toString();
+ }
+
+}
diff --git a/src/org/joni/ast/CallNode.java b/src/org/joni/ast/CallNode.java
new file mode 100644
index 0000000..8261f75
--- /dev/null
+++ b/src/org/joni/ast/CallNode.java
@@ -0,0 +1,86 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import java.util.Set;
+
+import org.joni.UnsetAddrList;
+import org.joni.WarnCallback;
+
+public final class CallNode extends StateNode {
+ public byte[]name;
+ public int nameP;
+ public int nameEnd;
+
+ public int groupNum;
+ public Node target; // is it an EncloseNode always ?
+ public UnsetAddrList unsetAddrList;
+
+ public CallNode(byte[]name, int nameP, int nameEnd, int gnum) {
+ this.name = name;
+ this.nameP = nameP;
+ this.nameEnd = nameEnd;
+ this.groupNum = gnum; /* call by number if gnum != 0 */
+ }
+
+ @Override
+ public int getType() {
+ return CALL;
+ }
+
+ @Override
+ protected void setChild(Node newChild) {
+ target = newChild;
+ }
+
+ @Override
+ protected Node getChild() {
+ return target;
+ }
+
+ public void setTarget(Node tgt) {
+ target = tgt;
+ tgt.parent = this;
+ }
+
+ @Override
+ public String getName() {
+ return "Call";
+ }
+
+ @Override
+ public void verifyTree(Set<Node> set, WarnCallback warnings) {
+ if (target == null || target.parent == this)
+ warnings.warn(this.getAddressName() + " doesn't point to a target or the target has been stolen");
+ // do not recurse here
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder(super.toString(level));
+ value.append("\n name: " + new String(name, nameP, nameEnd - nameP));
+ value.append("\n groupNum: " + groupNum);
+ value.append("\n target: " + pad(target.getAddressName(), level + 1));
+ value.append("\n unsetAddrList: " + pad(unsetAddrList, level + 1));
+
+ return value.toString();
+ }
+
+}
diff --git a/src/org/joni/ast/ConsAltNode.java b/src/org/joni/ast/ConsAltNode.java
new file mode 100644
index 0000000..3d7f784
--- /dev/null
+++ b/src/org/joni/ast/ConsAltNode.java
@@ -0,0 +1,154 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import java.util.Set;
+
+import org.joni.Config;
+import org.joni.WarnCallback;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.InternalException;
+
+public final class ConsAltNode extends Node {
+ public Node car;
+ public ConsAltNode cdr;
+ private int type; // List or Alt
+
+ private ConsAltNode(Node car, ConsAltNode cdr, int type) {
+ this.car = car;
+ if (car != null) car.parent = this;
+ this.cdr = cdr;
+ if (cdr != null) cdr.parent = this;
+
+ this.type = type;
+ }
+
+ public static ConsAltNode newAltNode(Node left, ConsAltNode right) {
+ return new ConsAltNode(left, right, ALT);
+ }
+
+ public static ConsAltNode newListNode(Node left, ConsAltNode right) {
+ return new ConsAltNode(left, right, LIST);
+ }
+
+ public static ConsAltNode listAdd(ConsAltNode list, Node x) {
+ ConsAltNode n = newListNode(x, null);
+
+ if (list != null) {
+ while (list.cdr != null) {
+ list = list.cdr;
+ }
+ list.setCdr(n);
+ }
+ return n;
+ }
+
+ public void toListNode() {
+ type = LIST;
+ }
+
+ public void toAltNode() {
+ type = ALT;
+ }
+
+ @Override
+ public int getType() {
+ return type;
+ }
+
+ @Override
+ protected void setChild(Node newChild) {
+ car = newChild;
+ }
+
+ @Override
+ protected Node getChild() {
+ return car;
+ }
+
+ @Override
+ public void swap(Node with) {
+ if (cdr != null) {
+ cdr.parent = with;
+ if (with instanceof ConsAltNode) {
+ ConsAltNode withCan = (ConsAltNode)with;
+ withCan.cdr.parent = this;
+ ConsAltNode tmp = cdr;
+ cdr = withCan.cdr;
+ withCan.cdr = tmp;
+ }
+ }
+
+ super.swap(with);
+ }
+
+ @Override
+ public void verifyTree(Set<Node> set, WarnCallback warnings) {
+ if (!set.contains(this)) {
+ set.add(this);
+ if (car != null) {
+ if (car.parent != this) {
+ warnings.warn("broken list car: " + this.getAddressName() + " -> " + car.getAddressName());
+ }
+ car.verifyTree(set,warnings);
+ }
+ if (cdr != null) {
+ if (cdr.parent != this) {
+ warnings.warn("broken list cdr: " + this.getAddressName() + " -> " + cdr.getAddressName());
+ }
+ cdr.verifyTree(set,warnings);
+ }
+ }
+ }
+
+ public Node setCar(Node ca) {
+ car = ca;
+ ca.parent = this;
+ return car;
+ }
+
+ public ConsAltNode setCdr(ConsAltNode cd) {
+ cdr = cd;
+ cd.parent = this;
+ return cdr;
+ }
+
+ @Override
+ public String getName() {
+ switch (type) {
+ case ALT:
+ return "Alt";
+ case LIST:
+ return "List";
+ default:
+ throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
+ }
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder();
+ value.append("\n left: " + pad(car, level + 1));
+ value.append("\n right: " + (cdr == null ? "NULL" : cdr.toString()));
+
+ return value.toString();
+ }
+
+}
diff --git a/src/org/joni/ast/EncloseNode.java b/src/org/joni/ast/EncloseNode.java
new file mode 100644
index 0000000..02b2391
--- /dev/null
+++ b/src/org/joni/ast/EncloseNode.java
@@ -0,0 +1,151 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import org.joni.Config;
+import org.joni.Option;
+import org.joni.constants.EncloseType;
+
+public final class EncloseNode extends StateNode implements EncloseType {
+
+ public int type; // enclose type
+ public int regNum;
+ public int option;
+ public Node target; /* EncloseNode : ENCLOSE_MEMORY */
+ public int callAddr; // AbsAddrType
+ public int minLength; // OnigDistance
+ public int maxLength; // OnigDistance
+ public int charLength;
+ public int optCount; // referenced count in optimize_node_left()
+
+ // node_new_enclose / onig_node_new_enclose
+ public EncloseNode(int type) {
+ this.type = type;
+ callAddr = -1;
+ }
+
+ // node_new_enclose_memory
+ public EncloseNode(int option, boolean isNamed) {
+ this(MEMORY);
+ if (isNamed) setNamedGroup();
+ if (Config.USE_SUBEXP_CALL) this.option = option;
+ }
+
+ // node_new_option
+ public EncloseNode(int option, int _) {
+ this(OPTION);
+ this.option = option;
+ }
+
+ @Override
+ public int getType() {
+ return ENCLOSE;
+ }
+
+ @Override
+ protected void setChild(Node newChild) {
+ target = newChild;
+ }
+
+ @Override
+ protected Node getChild() {
+ return target;
+ }
+
+ public void setTarget(Node tgt) {
+ target = tgt;
+ tgt.parent = this;
+ }
+
+ @Override
+ public String getName() {
+ return "Enclose";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder(super.toString(level));
+ value.append("\n type: " + typeToString());
+ value.append("\n regNum: " + regNum);
+ value.append("\n option: " + Option.toString(option));
+ value.append("\n target: " + pad(target, level + 1));
+ value.append("\n callAddr: " + callAddr);
+ value.append("\n minLength: " + minLength);
+ value.append("\n maxLength: " + maxLength);
+ value.append("\n charLength: " + charLength);
+ value.append("\n optCount: " + optCount);
+
+ return value.toString();
+ }
+
+ public String typeToString() {
+ StringBuilder types = new StringBuilder();
+ if (isStopBacktrack()) types.append("STOP_BACKTRACK ");
+ if (isMemory()) types.append("MEMORY ");
+ if (isOption()) types.append("OPTION ");
+
+ return types.toString();
+ }
+
+ public void setEncloseStatus(int flag) {
+ state |= flag;
+ }
+
+ public void clearEncloseStatus(int flag) {
+ state &= ~flag;
+ }
+
+ public void clearMemory() {
+ type &= ~MEMORY;
+ }
+
+ public void setMemory() {
+ type |= MEMORY;
+ }
+
+ public boolean isMemory() {
+ return (type & MEMORY) != 0;
+ }
+
+ public void clearOption() {
+ type &= ~OPTION;
+ }
+
+ public void setOption() {
+ type |= OPTION;
+ }
+
+ public boolean isOption() {
+ return (type & OPTION) != 0;
+ }
+
+ public void clearStopBacktrack() {
+ type &= ~STOP_BACKTRACK;
+ }
+
+ public void setStopBacktrack() {
+ type |= STOP_BACKTRACK;
+ }
+
+ public boolean isStopBacktrack() {
+ return (type & STOP_BACKTRACK) != 0;
+ }
+
+}
diff --git a/src/org/joni/ast/Node.java b/src/org/joni/ast/Node.java
new file mode 100644
index 0000000..73bedd5
--- /dev/null
+++ b/src/org/joni/ast/Node.java
@@ -0,0 +1,134 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import java.util.Set;
+
+import org.joni.Config;
+import org.joni.WarnCallback;
+import org.joni.constants.NodeType;
+
+public abstract class Node implements NodeType {
+ public Node parent;
+
+ public abstract int getType();
+
+ public final int getType2Bit() {
+ return 1 << getType();
+ }
+
+ protected void setChild(Node tgt){} // default definition
+ protected Node getChild(){return null;}; // default definition
+
+ public void swap(Node with) {
+ Node tmp;
+
+ //if (getChild() != null) getChild().parent = with;
+ //if (with.getChild() != null) with.getChild().parent = this;
+
+ //tmp = getChild();
+ //setChild(with.getChild());
+ //with.setChild(tmp);
+
+ if (parent != null) parent.setChild(with);
+
+ if (with.parent != null) with.parent.setChild(this);
+
+ tmp = parent;
+ parent = with.parent;
+ with.parent = tmp;
+ }
+
+ // overridden by ConsAltNode and CallNode
+ public void verifyTree(Set<Node> set, WarnCallback warnings) {
+ if (!set.contains(this) && getChild() != null) {
+ set.add(this);
+ if (getChild().parent != this) {
+ warnings.warn("broken link to child: " + this.getAddressName() + " -> " + getChild().getAddressName());
+ }
+ getChild().verifyTree(set, warnings);
+ }
+ }
+
+ public abstract String getName();
+ protected abstract String toString(int level);
+
+ public String getAddressName() {
+ return getName() + ":0x" + Integer.toHexString(System.identityHashCode(this));
+ }
+
+ public final String toString() {
+ StringBuilder s = new StringBuilder();
+ s.append("<" + getAddressName() + ">");
+ s.append("\n parent: " + (parent == null ? "NULL" : parent.getAddressName()));
+ return s + toString(0);
+ }
+
+ protected static String pad(Object value, int level) {
+ if (value == null) return "NULL";
+
+ StringBuilder pad = new StringBuilder(" ");
+ for (int i=0; i<level; i++) pad.append(pad);
+
+ return value.toString().replace("\n", "\n" + pad);
+ }
+
+ public final boolean isInvalidQuantifier() {
+ ConsAltNode node;
+
+ switch(getType()) {
+
+ case ANCHOR:
+ return true;
+
+ case ENCLOSE:
+ /* allow enclosed elements */
+ /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */
+ break;
+
+ case LIST:
+ node = (ConsAltNode)this;
+ do {
+ if (!node.car.isInvalidQuantifier()) return false;
+ } while ((node = node.cdr) != null);
+ return false;
+
+ case ALT:
+ node = (ConsAltNode)this;
+ do {
+ if (node.car.isInvalidQuantifier()) return true;
+ } while ((node = node.cdr) != null);
+ break;
+
+ default:
+ break;
+ }
+
+ return false;
+ }
+
+ public final boolean isAllowedInLookBehind() {
+ return (getType2Bit() & ALLOWED_IN_LB) != 0;
+ }
+
+ public final boolean isSimple() {
+ return (getType2Bit() & SIMPLE) != 0;
+ }
+}
diff --git a/src/org/joni/ast/QuantifierNode.java b/src/org/joni/ast/QuantifierNode.java
new file mode 100644
index 0000000..280f9bf
--- /dev/null
+++ b/src/org/joni/ast/QuantifierNode.java
@@ -0,0 +1,272 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import org.joni.Config;
+import org.joni.ScanEnvironment;
+import org.joni.constants.Reduce;
+import org.joni.constants.TargetInfo;
+
+public final class QuantifierNode extends StateNode {
+
+ public Node target;
+ public int lower;
+ public int upper;
+ public boolean greedy;
+
+ public int targetEmptyInfo;
+
+ public Node headExact;
+ public Node nextHeadExact;
+ public boolean isRefered; /* include called node. don't eliminate even if {0} */
+
+ // USE_COMBINATION_EXPLOSION_CHECK
+ public int combExpCheckNum; /* 1,2,3...: check, 0: no check */
+
+ public QuantifierNode(int lower, int upper, boolean byNumber) {
+ this.lower = lower;
+ this.upper = upper;
+ greedy = true;
+ targetEmptyInfo = TargetInfo.ISNOT_EMPTY;
+
+ if (byNumber) setByNumber();
+ }
+
+ @Override
+ public int getType() {
+ return QTFR;
+ }
+
+ @Override
+ protected void setChild(Node newChild) {
+ target = newChild;
+ }
+
+ @Override
+ protected Node getChild() {
+ return target;
+ }
+
+ public void setTarget(Node tgt) {
+ target = tgt;
+ tgt.parent = this;
+ }
+
+ public StringNode convertToString() {
+ StringNode sn = new StringNode();
+ sn.flag = ((StringNode)target).flag;
+ sn.swap(this);
+ return sn;
+ }
+
+ @Override
+ public String getName() {
+ return "Quantifier";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder(super.toString(level));
+ value.append("\n target: " + pad(target, level + 1));
+ value.append("\n lower: " + lower);
+ value.append("\n upper: " + upper);
+ value.append("\n greedy: " + greedy);
+ value.append("\n targetEmptyInfo: " + targetEmptyInfo);
+ value.append("\n headExact: " + pad(headExact, level + 1));
+ value.append("\n nextHeadExact: " + pad(nextHeadExact, level + 1));
+ value.append("\n isRefered: " + isRefered);
+ value.append("\n combExpCheckNum: " + combExpCheckNum);
+
+ return value.toString();
+ }
+
+ public boolean isAnyCharStar() {
+ return greedy && isRepeatInfinite(upper) && target.getType() == CANY;
+ }
+
+ /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
+ protected int popularNum() {
+ if (greedy) {
+ if (lower == 0) {
+ if (upper == 1) return 0;
+ else if (isRepeatInfinite(upper)) return 1;
+ } else if (lower == 1) {
+ if (isRepeatInfinite(upper)) return 2;
+ }
+ } else {
+ if (lower == 0) {
+ if (upper == 1) return 3;
+ else if (isRepeatInfinite(upper)) return 4;
+ } else if (lower == 1) {
+ if (isRepeatInfinite(upper)) return 5;
+ }
+ }
+ return -1;
+ }
+
+ protected void set(QuantifierNode other) {
+ setTarget(other.target);
+ other.target = null;
+ lower = other.lower;
+ upper = other.upper;
+ greedy = other.greedy;
+ targetEmptyInfo = other.targetEmptyInfo;
+
+ //setHeadExact(other.headExact);
+ //setNextHeadExact(other.nextHeadExact);
+ headExact = other.headExact;
+ nextHeadExact = other.nextHeadExact;
+ isRefered = other.isRefered;
+ combExpCheckNum = other.combExpCheckNum;
+ }
+
+ public void reduceNestedQuantifier(QuantifierNode other) {
+ int pnum = popularNum();
+ int cnum = other.popularNum();
+
+ if (pnum < 0 || cnum < 0) return;
+
+ switch(Reduce.REDUCE_TABLE[cnum][pnum]) {
+ case DEL:
+ // no need to set the parent here...
+ // swap ?
+ set(other); // *pnode = *cnode; ???
+ break;
+
+ case A:
+ setTarget(other.target);
+ lower = 0;
+ upper = REPEAT_INFINITE;
+ greedy = true;
+ break;
+
+ case AQ:
+ setTarget(other.target);
+ lower = 0;
+ upper = REPEAT_INFINITE;
+ greedy = false;
+ break;
+
+ case QQ:
+ setTarget(other.target);
+ lower = 0;
+ upper = 1;
+ greedy = false;
+ break;
+
+ case P_QQ:
+ setTarget(other);
+ lower = 0;
+ upper = 1;
+ greedy = false;
+ other.lower = 1;
+ other.upper = REPEAT_INFINITE;
+ other.greedy = true;
+ return;
+
+ case PQ_Q:
+ setTarget(other);
+ lower = 0;
+ upper = 1;
+ greedy = true;
+ other.lower = 1;
+ other.upper = REPEAT_INFINITE;
+ other.greedy = false;
+ return;
+
+ case ASIS:
+ setTarget(other);
+ return;
+ }
+ // ??? remove the parent from target ???
+ other.target = null; // remove target from reduced quantifier
+ }
+
+ public int setQuantifier(Node tgt, boolean group, ScanEnvironment env, byte[]bytes, int p, int end) {
+ if (lower == 1 && upper == 1) return 1;
+
+ switch(tgt.getType()) {
+
+ case STR:
+ if (!group) {
+ StringNode sn = (StringNode)tgt;
+ if (sn.canBeSplit(env.enc)) {
+ StringNode n = sn.splitLastChar(env.enc);
+ if (n != null) {
+ setTarget(n);
+ return 2;
+ }
+ }
+ }
+ break;
+
+ case QTFR:
+ /* check redundant double repeat. */
+ /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
+ QuantifierNode qnt = (QuantifierNode)tgt;
+ int nestQNum = popularNum();
+ int targetQNum = qnt.popularNum();
+
+ if (Config.USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR) {
+ if (!isByNumber() && !qnt.isByNumber() && env.syntax.warnReduntantNestedRepeat()) {
+ switch(Reduce.REDUCE_TABLE[targetQNum][nestQNum]) {
+ case ASIS:
+ break;
+
+ case DEL:
+ env.reg.warnings.warn(new String(bytes, p, end) +
+ " redundant nested repeat operator");
+ break;
+
+ default:
+ env.reg.warnings.warn(new String(bytes, p, end) +
+ " nested repeat operator " + Reduce.PopularQStr[targetQNum] +
+ " and " + Reduce.PopularQStr[nestQNum] + " was replaced with '" +
+ Reduce.ReduceQStr[Reduce.REDUCE_TABLE[targetQNum][nestQNum].ordinal()] + "'");
+ }
+ }
+ } // USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
+
+ if (targetQNum >= 0) {
+ if (nestQNum >= 0) {
+ reduceNestedQuantifier(qnt);
+ return 0;
+ } else if (targetQNum == 1 || targetQNum == 2) { /* * or + */
+ /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
+ if (!isRepeatInfinite(upper) && upper > 1 && greedy) {
+ upper = lower == 0 ? 1 : lower;
+ }
+ }
+ }
+
+ default:
+ break;
+ }
+
+ setTarget(tgt);
+ return 0;
+ }
+
+ public static final int REPEAT_INFINITE = -1;
+ public static boolean isRepeatInfinite(int n) {
+ return n == REPEAT_INFINITE;
+ }
+
+}
diff --git a/src/org/joni/ast/StateNode.java b/src/org/joni/ast/StateNode.java
new file mode 100644
index 0000000..117d3df
--- /dev/null
+++ b/src/org/joni/ast/StateNode.java
@@ -0,0 +1,232 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import org.joni.constants.NodeStatus;
+
+public abstract class StateNode extends Node implements NodeStatus {
+ protected int state;
+
+ @Override
+ public String toString(int level) {
+ return "\n state: " + stateToString();
+ }
+
+ public String stateToString() {
+ StringBuilder states = new StringBuilder();
+ if (isMinFixed()) states.append("MIN_FIXED ");
+ if (isMaxFixed()) states.append("MAX_FIXED ");
+ if (isMark1()) states.append("MARK1 ");
+ if (isMark2()) states.append("MARK2 ");
+ if (isMemBackrefed()) states.append("MEM_BACKREFED ");
+ if (isStopBtSimpleRepeat()) states.append("STOP_BT_SIMPLE_REPEAT ");
+ if (isRecursion()) states.append("RECURSION ");
+ if (isCalled()) states.append("CALLED ");
+ if (isAddrFixed()) states.append("ADDR_FIXED ");
+ if (isNamedGroup()) states.append("NAMED_GROUP ");
+ if (isNameRef()) states.append("NAME_REF ");
+ if (isInRepeat()) states.append("IN_REPEAT ");
+ if (isNestLevel()) states.append("NEST_LEVEL ");
+ if (isByNumber()) states.append("BY_NUMBER ");
+
+ return states.toString();
+ }
+
+ public boolean isMinFixed() {
+ return (state & NST_MIN_FIXED) != 0;
+ }
+
+ public void setMinFixed() {
+ state |= NST_MIN_FIXED;
+ }
+
+ public void clearMinFixed() {
+ state &= ~NST_MIN_FIXED;
+ }
+
+ public boolean isMaxFixed() {
+ return (state & NST_MAX_FIXED) != 0;
+ }
+
+ public void setMaxFixed() {
+ state |= NST_MAX_FIXED;
+ }
+
+ public void clearMaxFixed() {
+ state &= ~NST_MAX_FIXED;
+ }
+
+ public boolean isCLenFixed() {
+ return (state & NST_CLEN_FIXED) != 0;
+ }
+
+ public void setCLenFixed() {
+ state |= NST_CLEN_FIXED;
+ }
+
+ public void clearCLenFixed() {
+ state &= ~NST_CLEN_FIXED;
+ }
+
+ public boolean isMark1() {
+ return (state & NST_MARK1) != 0;
+ }
+
+ public void setMark1() {
+ state |= NST_MARK1;
+ }
+
+ public void clearMark1() {
+ state &= ~NST_MARK1;
+ }
+
+ public boolean isMark2() {
+ return (state & NST_MARK2) != 0;
+ }
+
+ public void setMark2() {
+ state |= NST_MARK2;
+ }
+
+ public void clearMark2() {
+ state &= ~NST_MARK2;
+ }
+
+ public boolean isMemBackrefed() {
+ return (state & NST_MEM_BACKREFED) != 0;
+ }
+
+ public void setMemBackrefed() {
+ state |= NST_MEM_BACKREFED;
+ }
+
+ public void clearMemBackrefed() {
+ state &= ~NST_MEM_BACKREFED;
+ }
+
+ public boolean isStopBtSimpleRepeat() {
+ return (state & NST_STOP_BT_SIMPLE_REPEAT) != 0;
+ }
+
+ public void setStopBtSimpleRepeat() {
+ state |= NST_STOP_BT_SIMPLE_REPEAT;
+ }
+
+ public void clearStopBtSimpleRepeat() {
+ state &= ~NST_STOP_BT_SIMPLE_REPEAT;
+ }
+
+ public boolean isRecursion() {
+ return (state & NST_RECURSION) != 0;
+ }
+
+ public void setRecursion() {
+ state |= NST_RECURSION;
+ }
+
+ public void clearRecursion() {
+ state &= ~NST_RECURSION;
+ }
+
+ public boolean isCalled() {
+ return (state & NST_CALLED) != 0;
+ }
+
+ public void setCalled() {
+ state |= NST_CALLED;
+ }
+
+ public void clearCAlled() {
+ state &= ~NST_CALLED;
+ }
+
+ public boolean isAddrFixed() {
+ return (state & NST_ADDR_FIXED) != 0;
+ }
+
+ public void setAddrFixed() {
+ state |= NST_ADDR_FIXED;
+ }
+
+ public void clearAddrFixed() {
+ state &= ~NST_ADDR_FIXED;
+ }
+
+ public boolean isNamedGroup() {
+ return (state & NST_NAMED_GROUP) != 0;
+ }
+
+ public void setNamedGroup() {
+ state |= NST_NAMED_GROUP;
+ }
+
+ public void clearNamedGroup() {
+ state &= ~NST_NAMED_GROUP;
+ }
+
+ public boolean isNameRef() {
+ return (state & NST_NAME_REF) != 0;
+ }
+
+ public void setNameRef() {
+ state |= NST_NAME_REF;
+ }
+
+ public void clearNameRef() {
+ state &= ~NST_NAME_REF;
+ }
+
+ public boolean isInRepeat() {
+ return (state & NST_IN_REPEAT) != 0;
+ }
+
+ public void setInRepeat() {
+ state |= NST_IN_REPEAT;
+ }
+
+ public void clearInRepeat() {
+ state &= ~NST_IN_REPEAT;
+ }
+
+ public boolean isNestLevel() {
+ return (state & NST_NEST_LEVEL) != 0;
+ }
+
+ public void setNestLevel() {
+ state |= NST_NEST_LEVEL;
+ }
+
+ public void clearNestLevel() {
+ state &= ~NST_NEST_LEVEL;
+ }
+
+ public boolean isByNumber() {
+ return (state & NST_BY_NUMBER) != 0;
+ }
+
+ public void setByNumber() {
+ state |= NST_BY_NUMBER;
+ }
+
+ public void clearByNumber() {
+ state &= ~NST_BY_NUMBER;
+ }
+
+}
diff --git a/src/org/joni/ast/StringNode.java b/src/org/joni/ast/StringNode.java
new file mode 100644
index 0000000..b5b0d69
--- /dev/null
+++ b/src/org/joni/ast/StringNode.java
@@ -0,0 +1,224 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.ast;
+
+import org.joni.constants.StringType;
+import org.joni.encoding.Encoding;
+
+public final class StringNode extends Node implements StringType {
+
+ private static final int NODE_STR_MARGIN = 16;
+ private static final int NODE_STR_BUF_SIZE = 24;
+
+ public byte[]bytes;
+ public int p;
+ public int end;
+
+ int flag;
+
+ public StringNode() {
+ this.bytes = new byte[NODE_STR_BUF_SIZE];
+ }
+
+ public StringNode(byte[]bytes, int p, int end) {
+ this.bytes = bytes;
+ this.p = p;
+ this.end = end;
+ setShared();
+ }
+
+ public StringNode(byte c) {
+ this();
+ bytes[end++] = c;
+ }
+
+ /* Ensure there is ahead bytes available in node's buffer
+ * (assumes that the node is not shared)
+ */
+ public void ensure(int ahead) {
+ int len = (end - p) + ahead;
+ if (len >= bytes.length) {
+ byte[]tmp = new byte[len + NODE_STR_MARGIN];
+ System.arraycopy(bytes, p, tmp, 0, end - p);
+ bytes = tmp;
+ }
+ }
+
+ /* COW and/or ensure there is ahead bytes available in node's buffer
+ */
+ private void modifyEnsure(int ahead) {
+ int len = (end - p) + ahead;
+ if (isShared()) {
+ byte[]tmp = new byte[len + NODE_STR_MARGIN];
+ System.arraycopy(bytes, p, tmp, 0, end - p);
+ bytes = tmp;
+ end = end - p;
+ p = 0;
+ clearShared();
+ } else {
+ if (len >= bytes.length) {
+ byte[]tmp = new byte[len + NODE_STR_MARGIN];
+ System.arraycopy(bytes, p, tmp, 0, end - p);
+ bytes = tmp;
+ }
+ }
+ }
+
+ @Override
+ public int getType() {
+ return STR;
+ }
+
+ @Override
+ public String getName() {
+ return "String";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder();
+ value.append("\n bytes: ");
+ for (int i=p; i<end; i++) {
+ if ((bytes[i] & 0xff) >= 0x20 && (bytes[i] & 0xff) < 0x7f) {
+ value.append((char)bytes[i]);
+ } else {
+ value.append(String.format(" 0x%02x", bytes[i]));
+ }
+ }
+ return value.toString();
+ }
+
+ public int length() {
+ return end - p;
+ }
+
+ public int length(Encoding enc) {
+ return enc.strLength(bytes, p, end);
+ }
+
+ public StringNode splitLastChar(Encoding enc) {
+ StringNode n = null;
+
+ if (end > p) {
+ int prev = enc.prevCharHead(bytes, p, end);
+ if (prev != -1 && prev > p) { /* can be splitted. */
+ n = new StringNode(bytes, prev, end);
+ if (isRaw()) n.setRaw();
+ end = prev;
+ }
+ }
+ return n;
+ }
+
+ public boolean canBeSplit(Encoding enc) {
+ if (end > p) {
+ return enc.length(bytes[p]) < (end - p) ? true : false;
+ }
+ return false;
+ }
+
+ public void set(byte[]bytes, int p, int end) {
+ this.bytes = bytes;
+ this.p = p;
+ this.end = end;
+ setShared();
+ }
+
+ public void cat(byte[]cat, int catP, int catEnd) {
+ int len = catEnd - catP;
+ modifyEnsure(len);
+ System.arraycopy(cat, catP, bytes, end, len);
+ end += len;
+ }
+
+ public void cat(byte c) {
+ modifyEnsure(1);
+ bytes[end++] = c;
+ }
+
+ public void clear() {
+ if (bytes.length > NODE_STR_BUF_SIZE) bytes = new byte[NODE_STR_BUF_SIZE];
+ flag = 0;
+ p = end = 0;
+ }
+
+ public int getLength() {
+ return end - p;
+ }
+
+ public int getLength(Encoding enc) {
+ int len = 0;
+ int p_ = p;
+
+ while (p_ < end) {
+ p_ += enc.length(bytes[p_]);
+ len++;
+ }
+ return len;
+ }
+
+ public void setRaw() {
+ flag |= NSTR_RAW;
+ }
+
+ public void clearRaw() {
+ flag &= ~NSTR_RAW;
+ }
+
+ public boolean isRaw() {
+ return (flag & NSTR_RAW) != 0;
+ }
+
+ public void setAmbig() {
+ flag |= NSTR_AMBIG;
+ }
+
+ public void clearAmbig() {
+ flag &= ~NSTR_AMBIG;
+ }
+
+ public boolean isAmbig() {
+ return (flag & NSTR_AMBIG) != 0;
+ }
+
+ public void setDontGetOptInfo() {
+ flag |= NSTR_DONT_GET_OPT_INFO;
+ }
+
+ public void clearDontGetOptInfo() {
+ flag &= ~NSTR_DONT_GET_OPT_INFO;
+ }
+
+ public boolean isDontGetOptInfo() {
+ return (flag & NSTR_DONT_GET_OPT_INFO) != 0;
+ }
+
+ public void setShared() {
+ flag |= NSTR_SHARED;
+ }
+
+ public void clearShared() {
+ flag &= ~NSTR_SHARED;
+ }
+
+ public boolean isShared() {
+ return (flag & NSTR_SHARED) != 0;
+ }
+}
diff --git a/src/org/joni/bench/AbstractBench.java b/src/org/joni/bench/AbstractBench.java
new file mode 100644
index 0000000..93ea085
--- /dev/null
+++ b/src/org/joni/bench/AbstractBench.java
@@ -0,0 +1,52 @@
+package org.joni.bench;
+
+import org.joni.Regex;
+import org.joni.Matcher;
+import org.joni.Region;
+import org.joni.Option;
+import org.joni.Syntax;
+import org.joni.encoding.specific.ASCIIEncoding;
+
+public abstract class AbstractBench {
+ protected void bench(String _reg, String _str, int warmup, int times) throws Exception {
+ byte[] reg = _reg.getBytes();
+ byte[] str = _str.getBytes();
+
+ Regex p = new Regex(reg,0,reg.length,Option.DEFAULT,ASCIIEncoding.INSTANCE,Syntax.DEFAULT);
+
+ System.err.println("::: /" + _reg + "/ =~ \"" + _str + "\", " + warmup + " * " + times + " times");
+
+ for(int j=0;j<warmup;j++) {
+ long before = System.currentTimeMillis();
+ for(int i = 0; i < times; i++) {
+ p.matcher(str, 0, str.length).search(0, str.length, Option.NONE);
+ }
+ long time = System.currentTimeMillis() - before;
+ System.err.println(": " + time + "ms");
+ }
+ }
+
+ protected void benchBestOf(String _reg, String _str, int warmup, int times) throws Exception {
+ byte[] reg = _reg.getBytes();
+ byte[] str = _str.getBytes();
+
+ Regex p = new Regex(reg,0,reg.length,Option.DEFAULT,ASCIIEncoding.INSTANCE,Syntax.DEFAULT);
+
+ System.err.println("::: /" + _reg + "/ =~ \"" + _str + "\", " + warmup + " * " + times + " times");
+
+ long best = Long.MAX_VALUE;
+
+ for(int j=0;j<warmup;j++) {
+ long before = System.currentTimeMillis();
+ for(int i = 0; i < times; i++) {
+ p.matcher(str, 0, str.length).search(0, str.length, Option.NONE);
+ }
+ long time = System.currentTimeMillis() - before;
+ if(time < best) {
+ best = time;
+ }
+ System.err.print(".");
+ }
+ System.err.println(": " + best + "ms");
+ }
+}
diff --git a/src/org/joni/bench/BenchGreedyBacktrack.java b/src/org/joni/bench/BenchGreedyBacktrack.java
new file mode 100644
index 0000000..dcda986
--- /dev/null
+++ b/src/org/joni/bench/BenchGreedyBacktrack.java
@@ -0,0 +1,7 @@
+package org.joni.bench;
+
+public class BenchGreedyBacktrack extends AbstractBench {
+ public static void main(String[] args) throws Exception {
+ new BenchGreedyBacktrack().bench(".*_p","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,1000000);
+ }
+}
diff --git a/src/org/joni/bench/BenchRailsRegs.java b/src/org/joni/bench/BenchRailsRegs.java
new file mode 100644
index 0000000..c11d208
--- /dev/null
+++ b/src/org/joni/bench/BenchRailsRegs.java
@@ -0,0 +1,31 @@
+package org.joni.bench;
+
+public class BenchRailsRegs extends AbstractBench {
+ public static void main(String[] args) throws Exception {
+ final String[][] regexps = {{"a.*?[b-z]{2,4}aaaaaa","afdgdsgderaabxxaaaaaaaaaaaaaaaaaaaaaaaa"},
+ {"://","/shop/viewCategory.shtml?category=DOGS"},
+ {"^\\w+\\://[^/]+(/.*|$)$","/shop/viewCategory.shtml?category=DOGS"},
+ {"\\A/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/signonForm\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/newAccountForm\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/newAccount\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/viewCart\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/index\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/viewCategory\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A(?:::)?([A-Z]\\w*(?:::[A-Z]\\w*)*)\\z","CategoriesController"},
+ {"\\Ainsert","SELECT * FROM sessions WHERE (session_id = '1b341ffe23b5298676d535fcabd3d0d7') LIMIT 1"},
+ {"\\A\\(?\\s*(select|show)","SELECT * FROM sessions WHERE (session_id = '1b341ffe23b5298676d535fcabd3d0d7') LIMIT 1"},
+ {".*?\n","1b341ffe23b5298676d535fcabd3d0d7"},
+ {"^find_(all_by|by)_([_a-zA-Z]\\w*)$","find_by_string_id"},
+ {"\\.rjs$","categories/show.rhtml"},
+ {"^[-a-z]+://","petstore.css"},
+ {"^get$",""},
+ {"^post$",""},
+ {"^[^:]+","www.example.com"},
+ {"(=|\\?|_before_type_cast)$", "updated_on"},
+ {"^(.*?)=(.*?);","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/"}};
+ for(String[] reg : regexps) {
+ new BenchRailsRegs().benchBestOf(reg[0],reg[1],10,1000000);
+ }
+ }
+}
diff --git a/src/org/joni/bench/BenchSeveralRegexps.java b/src/org/joni/bench/BenchSeveralRegexps.java
new file mode 100644
index 0000000..d393feb
--- /dev/null
+++ b/src/org/joni/bench/BenchSeveralRegexps.java
@@ -0,0 +1,17 @@
+package org.joni.bench;
+
+public class BenchSeveralRegexps extends AbstractBench {
+ public static void main(String[] args) throws Exception {
+ int BASE = 1000000;
+
+ new BenchSeveralRegexps().benchBestOf("a"," a",10,4*BASE);
+
+ new BenchSeveralRegexps().benchBestOf(".*?=","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,BASE);
+
+ new BenchSeveralRegexps().benchBestOf("^(.*?)=(.*?);","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,BASE);
+
+ new BenchSeveralRegexps().benchBestOf(".*_p","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,4*BASE);
+
+ new BenchSeveralRegexps().benchBestOf(".*=","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,4*BASE);
+ }
+}
diff --git a/src/org/joni/constants/AnchorType.java b/src/org/joni/constants/AnchorType.java
new file mode 100644
index 0000000..144dd1d
--- /dev/null
+++ b/src/org/joni/constants/AnchorType.java
@@ -0,0 +1,58 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface AnchorType {
+ final int BEGIN_BUF = (1<<0);
+ final int BEGIN_LINE = (1<<1);
+ final int BEGIN_POSITION = (1<<2);
+ final int END_BUF = (1<<3);
+ final int SEMI_END_BUF = (1<<4);
+ final int END_LINE = (1<<5);
+
+ final int WORD_BOUND = (1<<6);
+ final int NOT_WORD_BOUND = (1<<7);
+ final int WORD_BEGIN = (1<<8);
+ final int WORD_END = (1<<9);
+ final int PREC_READ = (1<<10);
+ final int PREC_READ_NOT = (1<<11);
+ final int LOOK_BEHIND = (1<<12);
+ final int LOOK_BEHIND_NOT = (1<<13);
+
+ final int ANYCHAR_STAR = (1<<14); /* ".*" optimize info */
+ final int ANYCHAR_STAR_ML = (1<<15); /* ".*" optimize info (multi-line) */
+
+ final int ANYCHAR_STAR_MASK = (ANYCHAR_STAR | ANYCHAR_STAR_ML);
+ final int END_BUF_MASK = (END_BUF | SEMI_END_BUF);
+
+ final int ALLOWED_IN_LB = ( LOOK_BEHIND |
+ BEGIN_LINE |
+ END_LINE |
+ BEGIN_BUF |
+ BEGIN_POSITION );
+
+ final int ALLOWED_IN_LB_NOT = ( LOOK_BEHIND |
+ LOOK_BEHIND_NOT |
+ BEGIN_LINE |
+ END_LINE |
+ BEGIN_BUF |
+ BEGIN_POSITION );
+
+}
diff --git a/src/org/joni/constants/Arguments.java b/src/org/joni/constants/Arguments.java
new file mode 100644
index 0000000..1aacfdd
--- /dev/null
+++ b/src/org/joni/constants/Arguments.java
@@ -0,0 +1,31 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface Arguments {
+ final int SPECIAL = -1;
+ final int NON = 0;
+ final int RELADDR = 1;
+ final int ABSADDR = 2;
+ final int LENGTH = 3;
+ final int MEMNUM = 4;
+ final int OPTION = 5;
+ final int STATE_CHECK = 6;
+}
diff --git a/src/org/joni/constants/CCSTATE.java b/src/org/joni/constants/CCSTATE.java
new file mode 100644
index 0000000..23baa87
--- /dev/null
+++ b/src/org/joni/constants/CCSTATE.java
@@ -0,0 +1,27 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public enum CCSTATE {
+ VALUE,
+ RANGE,
+ COMPLETE,
+ START
+}
diff --git a/src/org/joni/constants/CCVALTYPE.java b/src/org/joni/constants/CCVALTYPE.java
new file mode 100644
index 0000000..b531e30
--- /dev/null
+++ b/src/org/joni/constants/CCVALTYPE.java
@@ -0,0 +1,26 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public enum CCVALTYPE {
+ SB,
+ CODE_POINT,
+ CLASS
+}
diff --git a/src/org/joni/constants/CharacterType.java b/src/org/joni/constants/CharacterType.java
new file mode 100644
index 0000000..1be7a1c
--- /dev/null
+++ b/src/org/joni/constants/CharacterType.java
@@ -0,0 +1,57 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface CharacterType {
+ final int NEWLINE = 0;
+ final int ALPHA = 1;
+ final int BLANK = 2;
+ final int CNTRL = 3;
+ final int DIGIT = 4;
+ final int GRAPH = 5;
+ final int LOWER = 6;
+ final int PRINT = 7;
+ final int PUNCT = 8;
+ final int SPACE = 9;
+ final int UPPER = 10;
+ final int XDIGIT = 11;
+ final int WORD = 12;
+ final int ALNUM = 13; /* alpha || digit */
+ final int ASCII = 14;
+
+ final int MAX_STD_CTYPE = 14;
+
+ final int BIT_NEWLINE = (1<< NEWLINE);
+ final int BIT_ALPHA = (1<< ALPHA);
+ final int BIT_BLANK = (1<< BLANK);
+ final int BIT_CNTRL = (1<< CNTRL);
+ final int BIT_DIGIT = (1<< DIGIT);
+ final int BIT_GRAPH = (1<< GRAPH);
+ final int BIT_LOWER = (1<< LOWER);
+ final int BIT_PRINT = (1<< PRINT);
+ final int BIT_PUNCT = (1<< PUNCT);
+ final int BIT_SPACE = (1<< SPACE);
+ final int BIT_UPPER = (1<< UPPER);
+ final int BIT_XDIGIT = (1<< XDIGIT);
+ final int BIT_WORD = (1<< WORD);
+ final int BIT_ALNUM = (1<< ALNUM);
+ final int BIT_ASCII = (1<< ASCII);
+
+}
diff --git a/src/org/joni/constants/EncloseType.java b/src/org/joni/constants/EncloseType.java
new file mode 100644
index 0000000..553b5dc
--- /dev/null
+++ b/src/org/joni/constants/EncloseType.java
@@ -0,0 +1,29 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface EncloseType {
+ final int MEMORY = 1<<0;
+ final int OPTION = 1<<1;
+ final int STOP_BACKTRACK = 1<<2;
+
+ final int ALLOWED_IN_LB = MEMORY;
+ final int ALLOWED_IN_LB_NOT = 0;
+}
diff --git a/src/org/joni/constants/MetaChar.java b/src/org/joni/constants/MetaChar.java
new file mode 100644
index 0000000..3589aff
--- /dev/null
+++ b/src/org/joni/constants/MetaChar.java
@@ -0,0 +1,31 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface MetaChar {
+ final int ESCAPE = 0;
+ final int ANYCHAR = 1;
+ final int ANYTIME = 2;
+ final int ZERO_OR_ONE_TIME = 3;
+ final int ONE_OR_MORE_TIME = 4;
+ final int ANYCHAR_ANYTIME = 5;
+
+ final int INEFFECTIVE_META_CHAR = 0;
+}
diff --git a/src/org/joni/constants/NodeStatus.java b/src/org/joni/constants/NodeStatus.java
new file mode 100644
index 0000000..901d47d
--- /dev/null
+++ b/src/org/joni/constants/NodeStatus.java
@@ -0,0 +1,39 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface NodeStatus {
+ /* status bits */
+ final int NST_MIN_FIXED = (1<<0);
+ final int NST_MAX_FIXED = (1<<1);
+ final int NST_CLEN_FIXED = (1<<2);
+ final int NST_MARK1 = (1<<3);
+ final int NST_MARK2 = (1<<4);
+ final int NST_MEM_BACKREFED = (1<<5);
+ final int NST_STOP_BT_SIMPLE_REPEAT= (1<<6);
+ final int NST_RECURSION = (1<<7);
+ final int NST_CALLED = (1<<8);
+ final int NST_ADDR_FIXED = (1<<9);
+ final int NST_NAMED_GROUP = (1<<10);
+ final int NST_NAME_REF = (1<<11);
+ final int NST_IN_REPEAT = (1<<12); /* STK_REPEAT is nested in stack. */
+ final int NST_NEST_LEVEL = (1<<13);
+ final int NST_BY_NUMBER = (1<<14); /* {n,m} */
+}
diff --git a/src/org/joni/constants/NodeType.java b/src/org/joni/constants/NodeType.java
new file mode 100644
index 0000000..dccece2
--- /dev/null
+++ b/src/org/joni/constants/NodeType.java
@@ -0,0 +1,66 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface NodeType {
+ /* node type */
+ final int STR = 0;
+ final int CCLASS = 1;
+ final int CTYPE = 2;
+ final int CANY = 3;
+ final int BREF = 4;
+ final int QTFR = 5;
+ final int ENCLOSE = 6;
+ final int ANCHOR = 7;
+ final int LIST = 8;
+ final int ALT = 9;
+ final int CALL = 10;
+
+ final int BIT_STR = 1 << STR;
+ final int BIT_CCLASS = 1 << CCLASS;
+ final int BIT_CTYPE = 1 << CTYPE;
+ final int BIT_CANY = 1 << CANY;
+ final int BIT_BREF = 1 << BREF;
+ final int BIT_QTFR = 1 << QTFR;
+ final int BIT_ENCLOSE = 1 << ENCLOSE;
+ final int BIT_ANCHOR = 1 << ANCHOR;
+ final int BIT_LIST = 1 << LIST;
+ final int BIT_ALT = 1 << ALT;
+ final int BIT_CALL = 1 << CALL;
+
+ /* allowed node types in look-behind */
+ final int ALLOWED_IN_LB = ( BIT_LIST |
+ BIT_ALT |
+ BIT_STR |
+ BIT_CCLASS |
+ BIT_CTYPE |
+ BIT_CANY |
+ BIT_ANCHOR |
+ BIT_ENCLOSE |
+ BIT_QTFR |
+ BIT_CALL );
+
+ final int SIMPLE = ( BIT_STR |
+ BIT_CCLASS |
+ BIT_CTYPE |
+ BIT_CANY |
+ BIT_BREF);
+
+}
diff --git a/src/org/joni/constants/OPCode.java b/src/org/joni/constants/OPCode.java
new file mode 100644
index 0000000..8e06f88
--- /dev/null
+++ b/src/org/joni/constants/OPCode.java
@@ -0,0 +1,387 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+import org.joni.Config;
+
+public interface OPCode {
+ final int FINISH = 0; /* matching process terminator (no more alternative) */
+ final int END = 1; /* pattern code terminator (success end) */
+
+ final int EXACT1 = 2; /* single byte, N = 1 */
+ final int EXACT2 = 3; /* single byte, N = 2 */
+ final int EXACT3 = 4; /* single byte, N = 3 */
+ final int EXACT4 = 5; /* single byte, N = 4 */
+ final int EXACT5 = 6; /* single byte, N = 5 */
+ final int EXACTN = 7; /* single byte */
+ final int EXACTMB2N1 = 8; /* mb-length = 2 N = 1 */
+ final int EXACTMB2N2 = 9; /* mb-length = 2 N = 2 */
+ final int EXACTMB2N3 = 10; /* mb-length = 2 N = 3 */
+ final int EXACTMB2N = 11; /* mb-length = 2 */
+ final int EXACTMB3N = 12; /* mb-length = 3 */
+ final int EXACTMBN = 13; /* other length */
+
+ final int EXACT1_IC = 14; /* single byte, N = 1, ignore case */
+ final int EXACTN_IC = 15; /* single byte, ignore case */
+
+ final int CCLASS = 16;
+ final int CCLASS_MB = 17;
+ final int CCLASS_MIX = 18;
+ final int CCLASS_NOT = 19;
+ final int CCLASS_MB_NOT = 20;
+ final int CCLASS_MIX_NOT = 21;
+ final int CCLASS_NODE = 22; /* pointer to CClassNode node */
+
+ final int ANYCHAR = 23; /* "." */
+ final int ANYCHAR_ML = 24; /* "." multi-line */
+ final int ANYCHAR_STAR = 25; /* ".*" */
+ final int ANYCHAR_ML_STAR = 26; /* ".*" multi-line */
+ final int ANYCHAR_STAR_PEEK_NEXT = 27;
+ final int ANYCHAR_ML_STAR_PEEK_NEXT = 28;
+
+ final int WORD = 29;
+ final int NOT_WORD = 30;
+ final int WORD_BOUND = 31;
+ final int NOT_WORD_BOUND = 32;
+ final int WORD_BEGIN = 33;
+ final int WORD_END = 34;
+
+ final int BEGIN_BUF = 35;
+ final int END_BUF = 36;
+ final int BEGIN_LINE = 37;
+ final int END_LINE = 38;
+ final int SEMI_END_BUF = 39;
+ final int BEGIN_POSITION = 40;
+
+ final int BACKREF1 = 41;
+ final int BACKREF2 = 42;
+ final int BACKREFN = 43;
+ final int BACKREFN_IC = 44;
+ final int BACKREF_MULTI = 45;
+ final int BACKREF_MULTI_IC = 46;
+ final int BACKREF_WITH_LEVEL = 47; /* \k<xxx+n>, \k<xxx-n> */
+
+ final int MEMORY_START = 48;
+ final int MEMORY_START_PUSH = 49; /* push back-tracker to stack */
+ final int MEMORY_END_PUSH = 50; /* push back-tracker to stack */
+ final int MEMORY_END_PUSH_REC = 51; /* push back-tracker to stack */
+ final int MEMORY_END = 52;
+ final int MEMORY_END_REC = 53; /* push marker to stack */
+
+ final int FAIL = 54; /* pop stack and move */
+ final int JUMP = 55;
+ final int PUSH = 56;
+ final int POP = 57;
+ final int PUSH_OR_JUMP_EXACT1 = 58; /* if match exact then push, else jump. */
+ final int PUSH_IF_PEEK_NEXT = 59; /* if match exact then push, else none. */
+
+ final int REPEAT = 60; /* {n,m} */
+ final int REPEAT_NG = 61; /* {n,m}? (non greedy) */
+ final int REPEAT_INC = 62;
+ final int REPEAT_INC_NG = 63; /* non greedy */
+ final int REPEAT_INC_SG = 64; /* search and get in stack */
+ final int REPEAT_INC_NG_SG = 65; /* search and get in stack (non greedy) */
+
+ final int NULL_CHECK_START = 66; /* null loop checker start */
+ final int NULL_CHECK_END = 67; /* null loop checker end */
+ final int NULL_CHECK_END_MEMST = 68; /* null loop checker end (with capture status) */
+ final int NULL_CHECK_END_MEMST_PUSH = 69; /* with capture status and push check-end */
+
+ final int PUSH_POS = 70; /* (?=...) start */
+ final int POP_POS = 71; /* (?=...) end */
+ final int PUSH_POS_NOT = 72; /* (?!...) start */
+ final int FAIL_POS = 73; /* (?!...) end */
+ final int PUSH_STOP_BT = 74; /* (?>...) start */
+ final int POP_STOP_BT = 75; /* (?>...) end */
+ final int LOOK_BEHIND = 76; /* (?<=...) start (no needs end opcode) */
+ final int PUSH_LOOK_BEHIND_NOT = 77; /* (?<!...) start */
+ final int FAIL_LOOK_BEHIND_NOT = 78; /* (?<!...) end */
+
+ final int CALL = 79; /* \g<name> */
+ final int RETURN = 80;
+
+ final int STATE_CHECK_PUSH = 81; /* combination explosion check and push */
+ final int STATE_CHECK_PUSH_OR_JUMP = 82; /* check ok -> push, else jump */
+ final int STATE_CHECK = 83; /* check only */
+ final int STATE_CHECK_ANYCHAR_STAR = 84;
+ final int STATE_CHECK_ANYCHAR_ML_STAR = 85;
+
+ /* no need: IS_DYNAMIC_OPTION() == 0 */
+ final int SET_OPTION_PUSH = 86; /* set option and push recover option */
+ final int SET_OPTION = 87; /* set option */
+
+ // single byte versions
+ final int ANYCHAR_SB = 88; /* "." */
+ final int ANYCHAR_ML_SB = 89; /* "." multi-line */
+ final int ANYCHAR_STAR_SB = 90; /* ".*" */
+ final int ANYCHAR_ML_STAR_SB = 91; /* ".*" multi-line */
+ final int ANYCHAR_STAR_PEEK_NEXT_SB = 92;
+ final int ANYCHAR_ML_STAR_PEEK_NEXT_SB = 93;
+ final int STATE_CHECK_ANYCHAR_STAR_SB = 94;
+ final int STATE_CHECK_ANYCHAR_ML_STAR_SB= 95;
+
+ final int CCLASS_SB = 96;
+ final int CCLASS_NOT_SB = 97;
+ final int WORD_SB = 98;
+ final int NOT_WORD_SB = 99;
+ final int WORD_BOUND_SB = 100;
+ final int NOT_WORD_BOUND_SB = 101;
+ final int WORD_BEGIN_SB = 102;
+ final int WORD_END_SB = 103;
+
+ final int LOOK_BEHIND_SB = 104;
+
+ final int EXACT1_IC_SB = 105; /* single byte, N = 1, ignore case */
+ final int EXACTN_IC_SB = 106; /* single byte, ignore case */
+
+
+ public final String OpCodeNames[] = Config.DEBUG_COMPILE ? new String[] {
+ "finish", /*OP_FINISH*/
+ "end", /*OP_END*/
+ "exact1", /*OP_EXACT1*/
+ "exact2", /*OP_EXACT2*/
+ "exact3", /*OP_EXACT3*/
+ "exact4", /*OP_EXACT4*/
+ "exact5", /*OP_EXACT5*/
+ "exactn", /*OP_EXACTN*/
+ "exactmb2-n1", /*OP_EXACTMB2N1*/
+ "exactmb2-n2", /*OP_EXACTMB2N2*/
+ "exactmb2-n3", /*OP_EXACTMB2N3*/
+ "exactmb2-n", /*OP_EXACTMB2N*/
+ "exactmb3n", /*OP_EXACTMB3N*/
+ "exactmbn", /*OP_EXACTMBN*/
+ "exact1-ic", /*OP_EXACT1_IC*/
+ "exactn-ic", /*OP_EXACTN_IC*/
+ "cclass", /*OP_CCLASS*/
+ "cclass-mb", /*OP_CCLASS_MB*/
+ "cclass-mix", /*OP_CCLASS_MIX*/
+ "cclass-not", /*OP_CCLASS_NOT*/
+ "cclass-mb-not", /*OP_CCLASS_MB_NOT*/
+ "cclass-mix-not", /*OP_CCLASS_MIX_NOT*/
+ "cclass-node", /*OP_CCLASS_NODE*/
+ "anychar", /*OP_ANYCHAR*/
+ "anychar-ml", /*OP_ANYCHAR_ML*/
+ "anychar*", /*OP_ANYCHAR_STAR*/
+ "anychar-ml*", /*OP_ANYCHAR_ML_STAR*/
+ "anychar*-peek-next", /*OP_ANYCHAR_STAR_PEEK_NEXT*/
+ "anychar-ml*-peek-next", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
+ "word", /*OP_WORD*/
+ "not-word", /*OP_NOT_WORD*/
+ "word-bound", /*OP_WORD_BOUND*/
+ "not-word-bound", /*OP_NOT_WORD_BOUND*/
+ "word-begin", /*OP_WORD_BEGIN*/
+ "word-end", /*OP_WORD_END*/
+ "begin-buf", /*OP_BEGIN_BUF*/
+ "end-buf", /*OP_END_BUF*/
+ "begin-line", /*OP_BEGIN_LINE*/
+ "end-line", /*OP_END_LINE*/
+ "semi-end-buf", /*OP_SEMI_END_BUF*/
+ "begin-position", /*OP_BEGIN_POSITION*/
+ "backref1", /*OP_BACKREF1*/
+ "backref2", /*OP_BACKREF2*/
+ "backrefn", /*OP_BACKREFN*/
+ "backrefn-ic", /*OP_BACKREFN_IC*/
+ "backref_multi", /*OP_BACKREF_MULTI*/
+ "backref_multi-ic", /*OP_BACKREF_MULTI_IC*/
+ "backref_at_level", /*OP_BACKREF_AT_LEVEL*/
+ "mem-start", /*OP_MEMORY_START*/
+ "mem-start-push", /*OP_MEMORY_START_PUSH*/
+ "mem-end-push", /*OP_MEMORY_END_PUSH*/
+ "mem-end-push-rec", /*OP_MEMORY_END_PUSH_REC*/
+ "mem-end", /*OP_MEMORY_END*/
+ "mem-end-rec", /*OP_MEMORY_END_REC*/
+ "fail", /*OP_FAIL*/
+ "jump", /*OP_JUMP*/
+ "push", /*OP_PUSH*/
+ "pop", /*OP_POP*/
+ "push-or-jump-e1", /*OP_PUSH_OR_JUMP_EXACT1*/
+ "push-if-peek-next", /*OP_PUSH_IF_PEEK_NEXT*/
+ "repeat", /*OP_REPEAT*/
+ "repeat-ng", /*OP_REPEAT_NG*/
+ "repeat-inc", /*OP_REPEAT_INC*/
+ "repeat-inc-ng", /*OP_REPEAT_INC_NG*/
+ "repeat-inc-sg", /*OP_REPEAT_INC_SG*/
+ "repeat-inc-ng-sg", /*OP_REPEAT_INC_NG_SG*/
+ "null-check-start", /*OP_NULL_CHECK_START*/
+ "null-check-end", /*OP_NULL_CHECK_END*/
+ "null-check-end-memst", /*OP_NULL_CHECK_END_MEMST*/
+ "null-check-end-memst-push", /*OP_NULL_CHECK_END_MEMST_PUSH*/
+ "push-pos", /*OP_PUSH_POS*/
+ "pop-pos", /*OP_POP_POS*/
+ "push-pos-not", /*OP_PUSH_POS_NOT*/
+ "fail-pos", /*OP_FAIL_POS*/
+ "push-stop-bt", /*OP_PUSH_STOP_BT*/
+ "pop-stop-bt", /*OP_POP_STOP_BT*/
+ "look-behind", /*OP_LOOK_BEHIND*/
+ "push-look-behind-not", /*OP_PUSH_LOOK_BEHIND_NOT*/
+ "fail-look-behind-not", /*OP_FAIL_LOOK_BEHIND_NOT*/
+ "call", /*OP_CALL*/
+ "return", /*OP_RETURN*/
+ "state-check-push", /*OP_STATE_CHECK_PUSH*/
+ "state-check-push-or-jump", /*OP_STATE_CHECK_PUSH_OR_JUMP*/
+ "state-check", /*OP_STATE_CHECK*/
+ "state-check-anychar*", /*OP_STATE_CHECK_ANYCHAR_STAR*/
+ "state-check-anychar-ml*", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
+ "set-option-push", /*OP_SET_OPTION_PUSH*/
+ "set-option", /*OP_SET_OPTION*/
+
+ // single byte versions
+ "anychar-sb", /*OP_ANYCHAR*/
+ "anychar-ml-sb", /*OP_ANYCHAR_ML*/
+ "anychar*-sb", /*OP_ANYCHAR_STAR*/
+ "anychar-ml*-sb", /*OP_ANYCHAR_ML_STAR*/
+ "anychar*-peek-next-sb", /*OP_ANYCHAR_STAR_PEEK_NEXT*/
+ "anychar-ml*-peek-next-sb", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
+ "state-check-anychar*-sb", /*OP_STATE_CHECK_ANYCHAR_STAR*/
+ "state-check-anychar-ml*-sb", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
+
+ "cclass-sb", /*OP_CCLASS*/
+ "cclass-not-sb", /*OP_CCLASS_NOT*/
+
+ "word-sb", /*OP_WORD*/
+ "not-word-sb", /*OP_NOT_WORD*/
+ "word-bound-sb", /*OP_WORD_BOUND*/
+ "not-word-bound-sb", /*OP_NOT_WORD_BOUND*/
+ "word-begin-sb", /*OP_WORD_BEGIN*/
+ "word-end-sb", /*OP_WORD_END*/
+
+ "look-behind-sb", /*OP_LOOK_BEHIND*/
+
+ "exact1-ic-sb", /*OP_EXACT1_IC*/
+ "exactn-ic-sb", /*OP_EXACTN_IC*/
+
+ } : null;
+
+ public final int OpCodeArgTypes[] = Config.DEBUG_COMPILE ? new int[] {
+ Arguments.NON, /*OP_FINISH*/
+ Arguments.NON, /*OP_END*/
+ Arguments.SPECIAL, /*OP_EXACT1*/
+ Arguments.SPECIAL, /*OP_EXACT2*/
+ Arguments.SPECIAL, /*OP_EXACT3*/
+ Arguments.SPECIAL, /*OP_EXACT4*/
+ Arguments.SPECIAL, /*OP_EXACT5*/
+ Arguments.SPECIAL, /*OP_EXACTN*/
+ Arguments.SPECIAL, /*OP_EXACTMB2N1*/
+ Arguments.SPECIAL, /*OP_EXACTMB2N2*/
+ Arguments.SPECIAL, /*OP_EXACTMB2N3*/
+ Arguments.SPECIAL, /*OP_EXACTMB2N*/
+ Arguments.SPECIAL, /*OP_EXACTMB3N*/
+ Arguments.SPECIAL, /*OP_EXACTMBN*/
+ Arguments.SPECIAL, /*OP_EXACT1_IC*/
+ Arguments.SPECIAL, /*OP_EXACTN_IC*/
+ Arguments.SPECIAL, /*OP_CCLASS*/
+ Arguments.SPECIAL, /*OP_CCLASS_MB*/
+ Arguments.SPECIAL, /*OP_CCLASS_MIX*/
+ Arguments.SPECIAL, /*OP_CCLASS_NOT*/
+ Arguments.SPECIAL, /*OP_CCLASS_MB_NOT*/
+ Arguments.SPECIAL, /*OP_CCLASS_MIX_NOT*/
+ Arguments.SPECIAL, /*OP_CCLASS_NODE*/
+ Arguments.NON, /*OP_ANYCHAR*/
+ Arguments.NON, /*OP_ANYCHAR_ML*/
+ Arguments.NON, /*OP_ANYCHAR_STAR*/
+ Arguments.NON, /*OP_ANYCHAR_ML_STAR*/
+ Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/
+ Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
+ Arguments.NON, /*OP_WORD*/
+ Arguments.NON, /*OP_NOT_WORD*/
+ Arguments.NON, /*OP_WORD_BOUND*/
+ Arguments.NON, /*OP_NOT_WORD_BOUND*/
+ Arguments.NON, /*OP_WORD_BEGIN*/
+ Arguments.NON, /*OP_WORD_END*/
+ Arguments.NON, /*OP_BEGIN_BUF*/
+ Arguments.NON, /*OP_END_BUF*/
+ Arguments.NON, /*OP_BEGIN_LINE*/
+ Arguments.NON, /*OP_END_LINE*/
+ Arguments.NON, /*OP_SEMI_END_BUF*/
+ Arguments.NON, /*OP_BEGIN_POSITION*/
+ Arguments.NON, /*OP_BACKREF1*/
+ Arguments.NON, /*OP_BACKREF2*/
+ Arguments.MEMNUM, /*OP_BACKREFN*/
+ Arguments.SPECIAL, /*OP_BACKREFN_IC*/
+ Arguments.SPECIAL, /*OP_BACKREF_MULTI*/
+ Arguments.SPECIAL, /*OP_BACKREF_MULTI_IC*/
+ Arguments.SPECIAL, /*OP_BACKREF_AT_LEVEL*/
+ Arguments.MEMNUM, /*OP_MEMORY_START*/
+ Arguments.MEMNUM, /*OP_MEMORY_START_PUSH*/
+ Arguments.MEMNUM, /*OP_MEMORY_END_PUSH*/
+ Arguments.MEMNUM, /*OP_MEMORY_END_PUSH_REC*/
+ Arguments.MEMNUM, /*OP_MEMORY_END*/
+ Arguments.MEMNUM, /*OP_MEMORY_END_REC*/
+ Arguments.NON, /*OP_FAIL*/
+ Arguments.RELADDR, /*OP_JUMP*/
+ Arguments.RELADDR, /*OP_PUSH*/
+ Arguments.NON, /*OP_POP*/
+ Arguments.SPECIAL, /*OP_PUSH_OR_JUMP_EXACT1*/
+ Arguments.SPECIAL, /*OP_PUSH_IF_PEEK_NEXT*/
+ Arguments.SPECIAL, /*OP_REPEAT*/
+ Arguments.SPECIAL, /*OP_REPEAT_NG*/
+ Arguments.MEMNUM, /*OP_REPEAT_INC*/
+ Arguments.MEMNUM, /*OP_REPEAT_INC_NG*/
+ Arguments.MEMNUM, /*OP_REPEAT_INC_SG*/
+ Arguments.MEMNUM, /*OP_REPEAT_INC_NG_SG*/
+ Arguments.MEMNUM, /*OP_NULL_CHECK_START*/
+ Arguments.MEMNUM, /*OP_NULL_CHECK_END*/
+ Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST*/
+ Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST_PUSH*/
+ Arguments.NON, /*OP_PUSH_POS*/
+ Arguments.NON, /*OP_POP_POS*/
+ Arguments.RELADDR, /*OP_PUSH_POS_NOT*/
+ Arguments.NON, /*OP_FAIL_POS*/
+ Arguments.NON, /*OP_PUSH_STOP_BT*/
+ Arguments.NON, /*OP_POP_STOP_BT*/
+ Arguments.SPECIAL, /*OP_LOOK_BEHIND*/
+ Arguments.SPECIAL, /*OP_PUSH_LOOK_BEHIND_NOT*/
+ Arguments.NON, /*OP_FAIL_LOOK_BEHIND_NOT*/
+ Arguments.ABSADDR, /*OP_CALL*/
+ Arguments.NON, /*OP_RETURN*/
+ Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH*/
+ Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH_OR_JUMP*/
+ Arguments.STATE_CHECK, /*OP_STATE_CHECK*/
+ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/
+ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
+ Arguments.OPTION, /*OP_SET_OPTION_PUSH*/
+ Arguments.OPTION, /*OP_SET_OPTION*/
+
+ // single byte versions
+ Arguments.NON, /*OP_ANYCHAR*/
+ Arguments.NON, /*OP_ANYCHAR_ML*/
+ Arguments.NON, /*OP_ANYCHAR_STAR*/
+ Arguments.NON, /*OP_ANYCHAR_ML_STAR*/
+ Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/
+ Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
+ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/
+ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
+
+ Arguments.SPECIAL, /*OP_CCLASS*/
+ Arguments.SPECIAL, /*OP_CCLASS_NOT*/
+
+ Arguments.NON, /*OP_WORD*/
+ Arguments.NON, /*OP_NOT_WORD*/
+ Arguments.NON, /*OP_WORD_BOUND*/
+ Arguments.NON, /*OP_NOT_WORD_BOUND*/
+ Arguments.NON, /*OP_WORD_BEGIN*/
+ Arguments.NON, /*OP_WORD_END*/
+
+ Arguments.SPECIAL, /*OP_LOOK_BEHIND*/
+
+ Arguments.SPECIAL, /*OP_EXACT1_IC*/
+ Arguments.SPECIAL, /*OP_EXACTN_IC*/
+ } : null;
+}
diff --git a/src/org/joni/constants/OPSize.java b/src/org/joni/constants/OPSize.java
new file mode 100644
index 0000000..dcd419b
--- /dev/null
+++ b/src/org/joni/constants/OPSize.java
@@ -0,0 +1,75 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface OPSize {
+
+ // this might be helpful for potential byte[] migration
+ final int OPCODE = 1;
+ final int RELADDR = 1;
+ final int ABSADDR = 1;
+ final int LENGTH = 1;
+ final int MEMNUM = 1;
+ final int STATE_CHECK_NUM = 1;
+ final int REPEATNUM = 1;
+ final int OPTION = 1;
+ final int CODE_POINT = 1;
+ final int POINTER = 1;
+
+ /* op-code + arg size */
+
+ final int ANYCHAR_STAR = OPCODE;
+ final int ANYCHAR_STAR_PEEK_NEXT = (OPCODE + 1);
+ final int JUMP = (OPCODE + RELADDR);
+ final int PUSH = (OPCODE + RELADDR);
+ final int POP = OPCODE;
+ final int PUSH_OR_JUMP_EXACT1 = (OPCODE + RELADDR + 1);
+ final int PUSH_IF_PEEK_NEXT = (OPCODE + RELADDR + 1);
+ final int REPEAT_INC = (OPCODE + MEMNUM);
+ final int REPEAT_INC_NG = (OPCODE + MEMNUM);
+ final int PUSH_POS = OPCODE;
+ final int PUSH_POS_NOT = (OPCODE + RELADDR);
+ final int POP_POS = OPCODE;
+ final int FAIL_POS = OPCODE;
+ final int SET_OPTION = (OPCODE + OPTION);
+ final int SET_OPTION_PUSH = (OPCODE + OPTION);
+ final int FAIL = OPCODE;
+ final int MEMORY_START = (OPCODE + MEMNUM);
+ final int MEMORY_START_PUSH = (OPCODE + MEMNUM);
+ final int MEMORY_END_PUSH = (OPCODE + MEMNUM);
+ final int MEMORY_END_PUSH_REC = (OPCODE + MEMNUM);
+ final int MEMORY_END = (OPCODE + MEMNUM);
+ final int MEMORY_END_REC = (OPCODE + MEMNUM);
+ final int PUSH_STOP_BT = OPCODE;
+ final int POP_STOP_BT = OPCODE;
+ final int NULL_CHECK_START = (OPCODE + MEMNUM);
+ final int NULL_CHECK_END = (OPCODE + MEMNUM);
+ final int LOOK_BEHIND = (OPCODE + LENGTH);
+ final int PUSH_LOOK_BEHIND_NOT = (OPCODE + RELADDR + LENGTH);
+ final int FAIL_LOOK_BEHIND_NOT = OPCODE;
+ final int CALL = (OPCODE + ABSADDR);
+ final int RETURN = OPCODE;
+
+ // #ifdef USE_COMBINATION_EXPLOSION_CHECK
+ final int STATE_CHECK = (OPCODE + STATE_CHECK_NUM);
+ final int STATE_CHECK_PUSH = (OPCODE + STATE_CHECK_NUM + RELADDR);
+ final int STATE_CHECK_PUSH_OR_JUMP = (OPCODE + STATE_CHECK_NUM + RELADDR);
+ final int STATE_CHECK_ANYCHAR_STAR = (OPCODE + STATE_CHECK_NUM);
+}
diff --git a/src/org/joni/constants/PosixBracket.java b/src/org/joni/constants/PosixBracket.java
new file mode 100644
index 0000000..6f66ba5
--- /dev/null
+++ b/src/org/joni/constants/PosixBracket.java
@@ -0,0 +1,83 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+import org.joni.util.BytesHash;
+
+public class PosixBracket{
+
+ public static final byte[][] PBSNamesUpper = {
+ "Alnum".getBytes(),
+ "Alpha".getBytes(),
+ "Blank".getBytes(),
+ "Cntrl".getBytes(),
+ "Digit".getBytes(),
+ "Graph".getBytes(),
+ "Lower".getBytes(),
+ "Print".getBytes(),
+ "Punct".getBytes(),
+ "Space".getBytes(),
+ "Upper".getBytes(),
+ "XDigit".getBytes(),
+ "ASCII".getBytes(),
+ "Word".getBytes()
+ };
+
+ public static final byte[][] PBSNamesLower = {
+ "alnum".getBytes(),
+ "alpha".getBytes(),
+ "blank".getBytes(),
+ "cntrl".getBytes(),
+ "digit".getBytes(),
+ "graph".getBytes(),
+ "lower".getBytes(),
+ "print".getBytes(),
+ "punct".getBytes(),
+ "space".getBytes(),
+ "upper".getBytes(),
+ "xdigit".getBytes(),
+ "ascii".getBytes(),
+ "word".getBytes()
+ };
+
+ public static final int PBSValues[] = {
+ CharacterType.ALNUM,
+ CharacterType.ALPHA,
+ CharacterType.BLANK,
+ CharacterType.CNTRL,
+ CharacterType.DIGIT,
+ CharacterType.GRAPH,
+ CharacterType.LOWER,
+ CharacterType.PRINT,
+ CharacterType.PUNCT,
+ CharacterType.SPACE,
+ CharacterType.UPPER,
+ CharacterType.XDIGIT,
+ CharacterType.ASCII,
+ CharacterType.WORD,
+ };
+
+ public static final BytesHash<Integer> PBSTableUpper = new BytesHash<Integer>(15);
+
+ static {
+ for (int i=0; i<PBSValues.length; i++) PBSTableUpper.put(PBSNamesUpper[i], PBSValues[i]);
+ }
+
+}
diff --git a/src/org/joni/constants/Reduce.java b/src/org/joni/constants/Reduce.java
new file mode 100644
index 0000000..e62de7f
--- /dev/null
+++ b/src/org/joni/constants/Reduce.java
@@ -0,0 +1,60 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+import static org.joni.constants.Reduce.ReduceType.A;
+import static org.joni.constants.Reduce.ReduceType.AQ;
+import static org.joni.constants.Reduce.ReduceType.ASIS;
+import static org.joni.constants.Reduce.ReduceType.DEL;
+import static org.joni.constants.Reduce.ReduceType.PQ_Q;
+import static org.joni.constants.Reduce.ReduceType.P_QQ;
+import static org.joni.constants.Reduce.ReduceType.QQ;
+
+public interface Reduce {
+
+ enum ReduceType {
+ ASIS, /* as is */
+ DEL, /* delete parent */
+ A, /* to '*' */
+ AQ, /* to '*?' */
+ QQ, /* to '??' */
+ P_QQ, /* to '+)??' */
+ PQ_Q, /* to '+?)?' */
+ }
+
+ final ReduceType[][]REDUCE_TABLE = {
+ {DEL, A, A, QQ, AQ, ASIS}, /* '?' */
+ {DEL, DEL, DEL, P_QQ, P_QQ, DEL}, /* '*' */
+ {A, A, DEL, ASIS, P_QQ, DEL}, /* '+' */
+ {DEL, AQ, AQ, DEL, AQ, AQ}, /* '??' */
+ {DEL, DEL, DEL, DEL, DEL, DEL}, /* '*?' */
+ {ASIS, PQ_Q, DEL, AQ, AQ, DEL} /* '+?' */
+ };
+
+
+ final String PopularQStr[] = new String[] {
+ "?", "*", "+", "??", "*?", "+?"
+ };
+
+ String ReduceQStr[]= new String[] {
+ "", "", "*", "*?", "??", "+ and ??", "+? and ?"
+ };
+
+}
+
diff --git a/src/org/joni/constants/RegexState.java b/src/org/joni/constants/RegexState.java
new file mode 100644
index 0000000..acc6d84
--- /dev/null
+++ b/src/org/joni/constants/RegexState.java
@@ -0,0 +1,28 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+// we dont need this ATM
+public interface RegexState {
+ final int NORMAL = 0;
+ final int SEARCHING = 1;
+ final int COMPILING = -1;
+ final int MODIFY = -2;
+}
diff --git a/src/org/joni/constants/ReturnCodes.java b/src/org/joni/constants/ReturnCodes.java
new file mode 100644
index 0000000..7afcde6
--- /dev/null
+++ b/src/org/joni/constants/ReturnCodes.java
@@ -0,0 +1,26 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface ReturnCodes {
+ final int NORMAL = 0;
+ final int MISMATCH = -1;
+ final int NO_SUPPORT_CONFIG = -2;
+}
diff --git a/src/org/joni/constants/StackPopLevel.java b/src/org/joni/constants/StackPopLevel.java
new file mode 100644
index 0000000..f1f93bd
--- /dev/null
+++ b/src/org/joni/constants/StackPopLevel.java
@@ -0,0 +1,27 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface StackPopLevel {
+ final int FREE = 0;
+ final int MEM_START = 1;
+ final int ALL = 2;
+
+}
diff --git a/src/org/joni/constants/StackType.java b/src/org/joni/constants/StackType.java
new file mode 100644
index 0000000..34ea41f
--- /dev/null
+++ b/src/org/joni/constants/StackType.java
@@ -0,0 +1,51 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface StackType {
+ /** stack **/
+ final int INVALID_STACK_INDEX = -1;
+
+ /* stack type */
+ /* used by normal-POP */
+ final int ALT = 0x0001;
+ final int LOOK_BEHIND_NOT = 0x0002;
+ final int POS_NOT = 0x0003;
+ /* handled by normal-POP */
+ final int MEM_START = 0x0100;
+ final int MEM_END = 0x8200;
+ final int REPEAT_INC = 0x0300;
+ final int STATE_CHECK_MARK = 0x1000;
+ /* avoided by normal-POP */
+ final int NULL_CHECK_START = 0x3000;
+ final int NULL_CHECK_END = 0x5000; /* for recursive call */
+ final int MEM_END_MARK = 0x8400;
+ final int POS = 0x0500; /* used when POP-POS */
+ final int STOP_BT = 0x0600; /* mark for "(?>...)" */
+ final int REPEAT = 0x0700;
+ final int CALL_FRAME = 0x0800;
+ final int RETURN = 0x0900;
+ final int VOID = 0x0a00; /* for fill a blank */
+
+ /* stack type check mask */
+ final int MASK_POP_USED = 0x00ff;
+ final int MASK_TO_VOID_TARGET = 0x10ff;
+ final int MASK_MEM_END_OR_MARK = 0x8000; /* MEM_END or MEM_END_MARK */
+}
diff --git a/src/org/joni/constants/StringType.java b/src/org/joni/constants/StringType.java
new file mode 100644
index 0000000..46972e4
--- /dev/null
+++ b/src/org/joni/constants/StringType.java
@@ -0,0 +1,27 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface StringType {
+ final int NSTR_RAW = 1<<0;
+ final int NSTR_AMBIG = 1<<1;
+ final int NSTR_DONT_GET_OPT_INFO = 1<<2;
+ final int NSTR_SHARED = 1<<3;
+}
diff --git a/src/org/joni/constants/SyntaxProperties.java b/src/org/joni/constants/SyntaxProperties.java
new file mode 100644
index 0000000..3fd5b4c
--- /dev/null
+++ b/src/org/joni/constants/SyntaxProperties.java
@@ -0,0 +1,124 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface SyntaxProperties {
+ /* syntax (operators); */
+ final int OP_VARIABLE_META_CHARACTERS = (1<<0);
+ final int OP_DOT_ANYCHAR = (1<<1); /* . */
+ final int OP_ASTERISK_ZERO_INF = (1<<2); /* * */
+ final int OP_ESC_ASTERISK_ZERO_INF = (1<<3);
+ final int OP_PLUS_ONE_INF = (1<<4); /* + */
+ final int OP_ESC_PLUS_ONE_INF = (1<<5);
+ final int OP_QMARK_ZERO_ONE = (1<<6); /* ? */
+ final int OP_ESC_QMARK_ZERO_ONE = (1<<7);
+ final int OP_BRACE_INTERVAL = (1<<8); /* {lower,upper} */
+ final int OP_ESC_BRACE_INTERVAL = (1<<9); /* \{lower,upper\} */
+ final int OP_VBAR_ALT = (1<<10); /* | */
+ final int OP_ESC_VBAR_ALT = (1<<11); /* \| */
+ final int OP_LPAREN_SUBEXP = (1<<12); /* (...); */
+ final int OP_ESC_LPAREN_SUBEXP = (1<<13); /* \(...\); */
+ final int OP_ESC_AZ_BUF_ANCHOR = (1<<14); /* \A, \Z, \z */
+ final int OP_ESC_CAPITAL_G_BEGIN_ANCHOR = (1<<15); /* \G */
+ final int OP_DECIMAL_BACKREF = (1<<16); /* \num */
+ final int OP_BRACKET_CC = (1<<17); /* [...] */
+ final int OP_ESC_W_WORD = (1<<18); /* \w, \W */
+ final int OP_ESC_LTGT_WORD_BEGIN_END = (1<<19); /* \<. \> */
+ final int OP_ESC_B_WORD_BOUND = (1<<20); /* \b, \B */
+ final int OP_ESC_S_WHITE_SPACE = (1<<21); /* \s, \S */
+ final int OP_ESC_D_DIGIT = (1<<22); /* \d, \D */
+ final int OP_LINE_ANCHOR = (1<<23); /* ^, $ */
+ final int OP_POSIX_BRACKET = (1<<24); /* [:xxxx:] */
+ final int OP_QMARK_NON_GREEDY = (1<<25); /* ??,*?,+?,{n,m}? */
+ final int OP_ESC_CONTROL_CHARS = (1<<26); /* \n,\r,\t,\a ... */
+ final int OP_ESC_C_CONTROL = (1<<27); /* \cx */
+ final int OP_ESC_OCTAL3 = (1<<28); /* \OOO */
+ final int OP_ESC_X_HEX2 = (1<<29); /* \xHH */
+ final int OP_ESC_X_BRACE_HEX8 = (1<<30); /* \x{7HHHHHHH} */
+
+ final int OP2_ESC_CAPITAL_Q_QUOTE = (1<<0); /* \Q...\E */
+ final int OP2_QMARK_GROUP_EFFECT = (1<<1); /* (?...); */
+ final int OP2_OPTION_PERL = (1<<2); /* (?imsx);,(?-imsx); */
+ final int OP2_OPTION_RUBY = (1<<3); /* (?imx);, (?-imx); */
+ final int OP2_PLUS_POSSESSIVE_REPEAT = (1<<4); /* ?+,*+,++ */
+ final int OP2_PLUS_POSSESSIVE_INTERVAL = (1<<5); /* {n,m}+ */
+ final int OP2_CCLASS_SET_OP = (1<<6); /* [...&&..[..]..] */
+ final int OP2_QMARK_LT_NAMED_GROUP = (1<<7); /* (?<name>...); */
+ final int OP2_ESC_K_NAMED_BACKREF = (1<<8); /* \k<name> */
+ final int OP2_ESC_G_SUBEXP_CALL = (1<<9); /* \g<name>, \g<n> */
+ final int OP2_ATMARK_CAPTURE_HISTORY = (1<<10); /* (?@..);,(?@<x>..); */
+ final int OP2_ESC_CAPITAL_C_BAR_CONTROL = (1<<11); /* \C-x */
+ final int OP2_ESC_CAPITAL_M_BAR_META = (1<<12); /* \M-x */
+ final int OP2_ESC_V_VTAB = (1<<13); /* \v as VTAB */
+ final int OP2_ESC_U_HEX4 = (1<<14); /* \\uHHHH */
+ final int OP2_ESC_GNU_BUF_ANCHOR = (1<<15); /* \`, \' */
+ final int OP2_ESC_P_BRACE_CHAR_PROPERTY = (1<<16); /* \p{...}, \P{...} */
+ final int OP2_ESC_P_BRACE_CIRCUMFLEX_NOT = (1<<17); /* \p{^..}, \P{^..} */
+ /* final int OP2_CHAR_PROPERTY_PREFIX_IS = (1<<18); */
+ final int OP2_ESC_H_XDIGIT = (1<<19); /* \h, \H */
+ final int OP2_INEFFECTIVE_ESCAPE = (1<<20); /* \ */
+
+ /* syntax (behavior); */
+ final int CONTEXT_INDEP_ANCHORS = (1<<31); /* not implemented */
+ final int CONTEXT_INDEP_REPEAT_OPS = (1<<0); /* ?, *, +, {n,m} */
+ final int CONTEXT_INVALID_REPEAT_OPS = (1<<1); /* error or ignore */
+ final int ALLOW_UNMATCHED_CLOSE_SUBEXP = (1<<2); /* ...);... */
+ final int ALLOW_INVALID_INTERVAL = (1<<3); /* {??? */
+ final int ALLOW_INTERVAL_LOW_ABBREV = (1<<4); /* {,n} => {0,n} */
+ final int STRICT_CHECK_BACKREF = (1<<5); /* /(\1);/,/\1();/ ..*/
+ final int DIFFERENT_LEN_ALT_LOOK_BEHIND = (1<<6); /* (?<=a|bc); */
+ final int CAPTURE_ONLY_NAMED_GROUP = (1<<7); /* see doc/RE */
+ final int ALLOW_MULTIPLEX_DEFINITION_NAME = (1<<8); /* (?<x>);(?<x>); */
+ final int FIXED_INTERVAL_IS_GREEDY_ONLY = (1<<9); /* a{n}?=(?:a{n});? */
+
+ /* syntax (behavior); in char class [...] */
+ final int NOT_NEWLINE_IN_NEGATIVE_CC = (1<<20); /* [^...] */
+ final int BACKSLASH_ESCAPE_IN_CC = (1<<21); /* [..\w..] etc.. */
+ final int ALLOW_EMPTY_RANGE_IN_CC = (1<<22);
+ final int ALLOW_DOUBLE_RANGE_OP_IN_CC = (1<<23); /* [0-9-a]=[0-9\-a] */
+ /* syntax (behavior); warning */
+ final int WARN_CC_OP_NOT_ESCAPED = (1<<24); /* [,-,] */
+ final int WARN_REDUNDANT_NESTED_REPEAT = (1<<25); /* (?:a*);+ */
+
+ final int POSIX_COMMON_OP =
+ OP_DOT_ANYCHAR | OP_POSIX_BRACKET |
+ OP_DECIMAL_BACKREF |
+ OP_BRACKET_CC | OP_ASTERISK_ZERO_INF |
+ OP_LINE_ANCHOR |
+ OP_ESC_CONTROL_CHARS;
+
+ final int GNU_REGEX_OP =
+ OP_DOT_ANYCHAR | OP_BRACKET_CC |
+ OP_POSIX_BRACKET | OP_DECIMAL_BACKREF |
+ OP_BRACE_INTERVAL | OP_LPAREN_SUBEXP |
+ OP_VBAR_ALT |
+ OP_ASTERISK_ZERO_INF | OP_PLUS_ONE_INF |
+ OP_QMARK_ZERO_ONE |
+ OP_ESC_AZ_BUF_ANCHOR | OP_ESC_CAPITAL_G_BEGIN_ANCHOR |
+ OP_ESC_W_WORD |
+ OP_ESC_B_WORD_BOUND | OP_ESC_LTGT_WORD_BEGIN_END |
+ OP_ESC_S_WHITE_SPACE | OP_ESC_D_DIGIT |
+ OP_LINE_ANCHOR;
+
+ final int GNU_REGEX_BV =
+ CONTEXT_INDEP_ANCHORS | CONTEXT_INDEP_REPEAT_OPS |
+ CONTEXT_INVALID_REPEAT_OPS | ALLOW_INVALID_INTERVAL |
+ BACKSLASH_ESCAPE_IN_CC | ALLOW_DOUBLE_RANGE_OP_IN_CC;
+}
diff --git a/src/org/joni/constants/TargetInfo.java b/src/org/joni/constants/TargetInfo.java
new file mode 100644
index 0000000..3fdbe5a
--- /dev/null
+++ b/src/org/joni/constants/TargetInfo.java
@@ -0,0 +1,27 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface TargetInfo {
+ final int ISNOT_EMPTY = 0;
+ final int IS_EMPTY = 1;
+ final int IS_EMPTY_MEM = 2;
+ final int IS_EMPTY_REC = 3;
+}
diff --git a/src/org/joni/constants/TokenType.java b/src/org/joni/constants/TokenType.java
new file mode 100644
index 0000000..9ea159d
--- /dev/null
+++ b/src/org/joni/constants/TokenType.java
@@ -0,0 +1,48 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public enum TokenType {
+ EOT, /* end of token */
+ RAW_BYTE,
+ CHAR,
+ STRING,
+ CODE_POINT,
+ ANYCHAR,
+ CHAR_TYPE,
+ BACKREF,
+ CALL,
+ ANCHOR,
+ OP_REPEAT,
+ INTERVAL,
+ ANYCHAR_ANYTIME, /* SQL '%' == .* */
+ ALT,
+ SUBEXP_OPEN,
+ SUBEXP_CLOSE,
+ CC_OPEN,
+ QUOTE_OPEN,
+ CHAR_PROPERTY, /* \p{...}, \P{...} */
+ /* in cc */
+ CC_CLOSE,
+ CC_RANGE,
+ POSIX_BRACKET_OPEN,
+ CC_AND, /* && */
+ CC_CC_OPEN /* [ */
+}
diff --git a/src/org/joni/constants/Traverse.java b/src/org/joni/constants/Traverse.java
new file mode 100644
index 0000000..1c08ea5
--- /dev/null
+++ b/src/org/joni/constants/Traverse.java
@@ -0,0 +1,26 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.constants;
+
+public interface Traverse {
+ final int TRAVERSE_CALLBACK_AT_FIRST = 1;
+ final int TRAVERSE_CALLBACK_AT_LAST = 2;
+ final int TRAVERSE_CALLBACK_AT_BOTH = TRAVERSE_CALLBACK_AT_FIRST | TRAVERSE_CALLBACK_AT_LAST;
+}
diff --git a/src/org/joni/encoding/AbstractEncoding.java b/src/org/joni/encoding/AbstractEncoding.java
new file mode 100644
index 0000000..8f407cd
--- /dev/null
+++ b/src/org/joni/encoding/AbstractEncoding.java
@@ -0,0 +1,217 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding;
+
+import org.joni.ApplyAllCaseFoldFunction;
+import org.joni.CaseFoldCodeItem;
+import org.joni.IntHolder;
+import org.joni.constants.PosixBracket;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.ValueException;
+
+abstract class AbstractEncoding extends Encoding {
+
+ private final short CTypeTable[];
+
+ protected AbstractEncoding(short[]CTypeTable) {
+ this.CTypeTable = CTypeTable;
+ }
+
+ /** CTYPE_TO_BIT
+ */
+ private static int CTypeToBit(int ctype) {
+ return 1 << ctype;
+ }
+
+ /** ONIGENC_IS_XXXXXX_CODE_CTYPE
+ */
+ protected final boolean isCodeCTypeInternal(int code, int ctype) {
+ return (CTypeTable[code] & CTypeToBit(ctype)) != 0;
+ }
+
+ /** onigenc_is_mbc_newline_0x0a / used also by multibyte encodings
+ *
+ */
+ @Override
+ public boolean isNewLine(byte[]bytes, int p, int end) {
+ return p < end ? bytes[p] == (byte)0x0a : false;
+ }
+
+ protected final int asciiMbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+ lower[0] = AsciiToLowerCaseTable[bytes[pp.value] & 0xff];
+ pp.value++;
+ return 1;
+ }
+
+ /** onigenc_ascii_mbc_case_fold
+ */
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+ return asciiMbcCaseFold(flag, bytes, pp, end, lower);
+ }
+
+ public static final byte AsciiToLowerCaseTable[] = new byte[]{
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\241', (byte)'\242', (byte)'\243', (byte)'\244', (byte)'\245', (byte)'\246', (byte)'\247',
+ (byte)'\250', (byte)'\251', (byte)'\252', (byte)'\253', (byte)'\254', (byte)'\255', (byte)'\256', (byte)'\257',
+ (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\275', (byte)'\276', (byte)'\277',
+ (byte)'\300', (byte)'\301', (byte)'\302', (byte)'\303', (byte)'\304', (byte)'\305', (byte)'\306', (byte)'\307',
+ (byte)'\310', (byte)'\311', (byte)'\312', (byte)'\313', (byte)'\314', (byte)'\315', (byte)'\316', (byte)'\317',
+ (byte)'\320', (byte)'\321', (byte)'\322', (byte)'\323', (byte)'\324', (byte)'\325', (byte)'\326', (byte)'\327',
+ (byte)'\330', (byte)'\331', (byte)'\332', (byte)'\333', (byte)'\334', (byte)'\335', (byte)'\336', (byte)'\337',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377',
+ };
+
+ public static final byte AsciiToUpperCaseTable[] = new byte[]{
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\101', (byte)'\102', (byte)'\103', (byte)'\104', (byte)'\105', (byte)'\106', (byte)'\107',
+ (byte)'\110', (byte)'\111', (byte)'\112', (byte)'\113', (byte)'\114', (byte)'\115', (byte)'\116', (byte)'\117',
+ (byte)'\120', (byte)'\121', (byte)'\122', (byte)'\123', (byte)'\124', (byte)'\125', (byte)'\126', (byte)'\127',
+ (byte)'\130', (byte)'\131', (byte)'\132', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\101', (byte)'\102', (byte)'\103', (byte)'\104', (byte)'\105', (byte)'\106', (byte)'\107',
+ (byte)'\110', (byte)'\111', (byte)'\112', (byte)'\113', (byte)'\114', (byte)'\115', (byte)'\116', (byte)'\117',
+ (byte)'\120', (byte)'\121', (byte)'\122', (byte)'\123', (byte)'\124', (byte)'\125', (byte)'\126', (byte)'\127',
+ (byte)'\130', (byte)'\131', (byte)'\132', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\241', (byte)'\242', (byte)'\243', (byte)'\244', (byte)'\245', (byte)'\246', (byte)'\247',
+ (byte)'\250', (byte)'\251', (byte)'\252', (byte)'\253', (byte)'\254', (byte)'\255', (byte)'\256', (byte)'\257',
+ (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\275', (byte)'\276', (byte)'\277',
+ (byte)'\300', (byte)'\301', (byte)'\302', (byte)'\303', (byte)'\304', (byte)'\305', (byte)'\306', (byte)'\307',
+ (byte)'\310', (byte)'\311', (byte)'\312', (byte)'\313', (byte)'\314', (byte)'\315', (byte)'\316', (byte)'\317',
+ (byte)'\320', (byte)'\321', (byte)'\322', (byte)'\323', (byte)'\324', (byte)'\325', (byte)'\326', (byte)'\327',
+ (byte)'\330', (byte)'\331', (byte)'\332', (byte)'\333', (byte)'\334', (byte)'\335', (byte)'\336', (byte)'\337',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377',
+ };
+
+ protected final void asciiApplyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, Object arg) {
+ int[]code = new int[]{0};
+
+ for (int i=0; i<AsciiLowerMap.length; i++) {
+ code[0] = AsciiLowerMap[i][1];
+ fun.apply(AsciiLowerMap[i][0], code, 1, arg);
+
+ code[0] = AsciiLowerMap[i][0];
+ fun.apply(AsciiLowerMap[i][1], code, 1, arg);
+ }
+ }
+
+ /** onigenc_ascii_apply_all_case_fold / used also by multibyte encodings
+ */
+ @Override
+ public void applyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, Object arg) {
+ asciiApplyAllCaseFold(flag, fun, arg);
+ }
+
+ protected static final int AsciiLowerMap[][] = {
+ {0x41, 0x61},
+ {0x42, 0x62},
+ {0x43, 0x63},
+ {0x44, 0x64},
+ {0x45, 0x65},
+ {0x46, 0x66},
+ {0x47, 0x67},
+ {0x48, 0x68},
+ {0x49, 0x69},
+ {0x4a, 0x6a},
+ {0x4b, 0x6b},
+ {0x4c, 0x6c},
+ {0x4d, 0x6d},
+ {0x4e, 0x6e},
+ {0x4f, 0x6f},
+ {0x50, 0x70},
+ {0x51, 0x71},
+ {0x52, 0x72},
+ {0x53, 0x73},
+ {0x54, 0x74},
+ {0x55, 0x75},
+ {0x56, 0x76},
+ {0x57, 0x77},
+ {0x58, 0x78},
+ {0x59, 0x79},
+ {0x5a, 0x7a}
+ };
+
+ protected static final CaseFoldCodeItem[] EMPTY_FOLD_CODES = new CaseFoldCodeItem[]{};
+ protected final CaseFoldCodeItem[]asciiCaseFoldCodesByString(int flag, byte[]bytes, int p, int end) {
+ int b = bytes[p] & 0xff;
+
+ if (0x41 <= b && b <= 0x5a) {
+ return new CaseFoldCodeItem[]{new CaseFoldCodeItem(1, 1, new int[]{b + 0x20})};
+ } else if (0x61 <= b && b <= 0x7a) {
+ return new CaseFoldCodeItem[]{new CaseFoldCodeItem(1, 1, new int[]{b - 0x20})};
+ } else {
+ return EMPTY_FOLD_CODES;
+ }
+ }
+
+ /** onigenc_ascii_get_case_fold_codes_by_str / used also by multibyte encodings
+ */
+ @Override
+ public CaseFoldCodeItem[]caseFoldCodesByString(int flag, byte[]bytes, int p, int end) {
+ return asciiCaseFoldCodesByString(flag, bytes, p, end);
+ }
+
+ /** onigenc_minimum_property_name_to_ctype
+ * notably overridden by unicode encodings
+ */
+ @Override
+ public int propertyNameToCType(byte[]bytes, int p, int end) {
+ Integer ctype = PosixBracket.PBSTableUpper.get(bytes, p, end);
+ if (ctype != null) return ctype;
+ throw new ValueException(ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME, new String(bytes, p, end - p));
+ }
+}
diff --git a/src/org/joni/encoding/CaseFoldMapEncoding.java b/src/org/joni/encoding/CaseFoldMapEncoding.java
new file mode 100644
index 0000000..7bb1bd9
--- /dev/null
+++ b/src/org/joni/encoding/CaseFoldMapEncoding.java
@@ -0,0 +1,129 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding;
+
+import org.joni.ApplyAllCaseFoldFunction;
+import org.joni.CaseFoldCodeItem;
+
+public abstract class CaseFoldMapEncoding extends SingleByteEncoding {
+
+ protected final int[][]CaseFoldMap;
+ protected final boolean foldFlag;
+
+ protected CaseFoldMapEncoding(short[]CTypeTable, byte[]LowerCaseTable, int[][]CaseFoldMap) {
+ this(CTypeTable, LowerCaseTable, CaseFoldMap, true);
+ }
+
+ protected CaseFoldMapEncoding(short[]CTypeTable, byte[]LowerCaseTable, int[][]CaseFoldMap, boolean foldFlag) {
+ super(CTypeTable, LowerCaseTable);
+ this.CaseFoldMap = CaseFoldMap;
+ this.foldFlag = foldFlag;
+ }
+
+ /** onigenc_apply_all_case_fold_with_map
+ */
+ protected final int applyAllCaseFoldWithMap(int mapSize, int[][]map, boolean essTsettFlag, int flag,
+ ApplyAllCaseFoldFunction fun, Object arg) {
+
+ asciiApplyAllCaseFold(flag, fun, arg);
+ int[]code = new int[]{0};
+
+ for (int i=0; i<mapSize; i++) {
+ code[0] = map[i][1];
+
+ fun.apply(map[i][0], code, 1, arg);
+
+ code[0] = map[i][0];
+ fun.apply(map[i][1], code, 1, arg);
+ }
+
+ if (essTsettFlag) ssApplyAllCaseFold(flag, fun, arg);
+ return 0;
+ }
+
+ static final int[] SS = new int []{0x73, 0x73};
+ /** ss_apply_all_case_fold
+ */
+ private void ssApplyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, Object arg) {
+ fun.apply(0xdf, SS, 2, arg);
+ }
+
+ /** onigenc_get_case_fold_codes_by_str_with_map
+ */
+ protected final CaseFoldCodeItem[]getCaseFoldCodesByStringWithMap(int mapSize, int[][]map,
+ boolean essTsettFlag, int flag,
+ byte[]bytes, int p, int end) {
+ int b = bytes[p] & 0xff;
+
+ if (0x41 <= b && b <= 0x5a) {
+ CaseFoldCodeItem item0 = new CaseFoldCodeItem(1, 1, new int[]{b + 0x20});
+
+ if (b == 0x53 && essTsettFlag && end > p + 1 &&
+ (bytes[p+1] == (byte)0x53 || bytes[p+1] == (byte)0x73)) { /* SS */
+ CaseFoldCodeItem item1 = new CaseFoldCodeItem(2, 1, new int[]{0xdf});
+ return new CaseFoldCodeItem[]{item0, item1};
+ } else {
+ return new CaseFoldCodeItem[]{item0};
+ }
+ } else if (0x61 <= b && b <= 0x7a) {
+ CaseFoldCodeItem item0 = new CaseFoldCodeItem(1, 1, new int[]{b - 0x20});
+
+ if (b == 0x73 && essTsettFlag && end >p + 1 &&
+ (bytes[p+1] == (byte)0x73 || bytes[p+1] == (byte)0x53)) { /* ss */
+ CaseFoldCodeItem item1 = new CaseFoldCodeItem(2, 1, new int[]{0xdf});
+
+ return new CaseFoldCodeItem[]{item0, item1};
+ } else {
+ return new CaseFoldCodeItem[]{item0};
+ }
+ } else if (b == 0xdf && essTsettFlag) {
+ CaseFoldCodeItem item0 = new CaseFoldCodeItem(1, 2, new int[]{'s', 's'});
+ CaseFoldCodeItem item1 = new CaseFoldCodeItem(1, 2, new int[]{'S', 'S'});
+ CaseFoldCodeItem item2 = new CaseFoldCodeItem(1, 2, new int[]{'s', 'S'});
+ CaseFoldCodeItem item3 = new CaseFoldCodeItem(1, 2, new int[]{'S', 's'});
+
+ return new CaseFoldCodeItem[]{item0, item1, item2, item3};
+ } else {
+ for (int i=0; i<mapSize; i++) {
+ if (b == map[i][0]) {
+ return new CaseFoldCodeItem[]{new CaseFoldCodeItem(1, 1, new int[]{map[i][1]})};
+ } else if (b == map[i][1]) {
+ return new CaseFoldCodeItem[]{new CaseFoldCodeItem(1, 1, new int[]{map[i][0]})};
+ }
+ }
+ }
+ return EMPTY_FOLD_CODES;
+ }
+
+ @Override
+ public void applyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, Object arg) {
+ applyAllCaseFoldWithMap(CaseFoldMap.length, CaseFoldMap, foldFlag, flag, fun, arg);
+ }
+
+ @Override
+ public CaseFoldCodeItem[]caseFoldCodesByString(int flag, byte[]bytes, int p, int end) {
+ return getCaseFoldCodesByStringWithMap(CaseFoldMap.length, CaseFoldMap, foldFlag, flag, bytes, p, end);
+ }
+
+ @Override
+ public boolean isCodeCType(int code, int ctype) {
+ return code < 256 ? isCodeCTypeInternal(code, ctype) : false;
+ }
+}
diff --git a/src/org/joni/encoding/Encoding.java b/src/org/joni/encoding/Encoding.java
new file mode 100644
index 0000000..91cbaca
--- /dev/null
+++ b/src/org/joni/encoding/Encoding.java
@@ -0,0 +1,427 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding;
+
+import org.joni.ApplyAllCaseFoldFunction;
+import org.joni.CaseFoldCodeItem;
+import org.joni.IntHolder;
+import org.joni.constants.CharacterType;
+import org.joni.util.BytesHash;
+
+public abstract class Encoding {
+ protected byte[]name;
+ protected int hashCode;
+
+ @Override
+ public abstract String toString();
+
+ @Override
+ public final boolean equals(Object other) {
+ return this == other;
+ }
+
+ @Override
+ public final int hashCode() {
+ if (name == null) getName();
+ return hashCode;
+ }
+
+ public final byte[]getName() {
+ if (name == null) {
+ name = toString().getBytes();
+ hashCode = BytesHash.hashCode(name, 0, name.length);
+ }
+ return name;
+ }
+
+ /**
+ * Returns character length given the character head
+ * returns <code>1</code> for singlebyte encodings or performs direct length table lookup for multibyte ones.
+ *
+ * @param c
+ * Character head
+ * Oniguruma equivalent: <code>mbc_enc_len</code>
+ */
+ public abstract int length(byte c);
+
+ /**
+ * Returns maximum character byte length that can appear in an encoding
+ *
+ * Oniguruma equivalent: <code>max_enc_len</code>
+ */
+ public abstract int maxLength();
+
+ /* ONIGENC_MBC_MAXLEN_DIST */
+ public final int maxLengthDistance() {
+ return maxLength();
+ }
+
+ /**
+ * Returns minimum character byte length that can appear in an encoding
+ *
+ * Oniguruma equivalent: <code>min_enc_len</code>
+ */
+ public abstract int minLength();
+
+ /**
+ * Returns true if <code>bytes[p]</code> is a head of a new line character
+ *
+ * Oniguruma equivalent: <code>is_mbc_newline</code>
+ */
+ public abstract boolean isNewLine(byte[]bytes, int p, int end);
+
+ /**
+ * Returns code point for a character
+ *
+ * Oniguruma equivalent: <code>mbc_to_code</code>
+ */
+ public abstract int mbcToCode(byte[]bytes, int p, int end);
+
+ /**
+ * Returns character length given a code point
+ *
+ * Oniguruma equivalent: <code>code_to_mbclen</code>
+ */
+ public abstract int codeToMbcLength(int code);
+
+ /**
+ * Extracts code point into it's multibyte representation
+ *
+ * @return character length for the given code point
+ *
+ * Oniguruma equivalent: <code>code_to_mbc</code>
+ */
+ public abstract int codeToMbc(int code, byte[]bytes, int p);
+
+ /**
+ * Performs case folding for a character at <code>bytes[pp.value]</code>
+ *
+ * @param flag case fold flag
+ * @param pp an <code>IntHolder</code> that points at character head
+ * @param to a buffer where to extract case folded character
+ *
+ * Oniguruma equivalent: <code>mbc_case_fold</code>
+ */
+ public abstract int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]to);
+
+ /**
+ * Returns lower case table if it's safe to use it directly, otherwise <code>null</code>
+ * Used for fast case insensitive matching for some singlebyte encodings
+ *
+ * @return lower case table
+ */
+ public byte[] toLowerCaseTable() {return null;}
+
+ /**
+ * Expand case folds given a character class (used for case insensitive matching)
+ *
+ * @param flag case fold flag
+ * @param fun case folding functor (look at: <code>ApplyCaseFold</code>)
+ * @param arg case folding functor argument (look at: <code>ApplyCaseFoldArg</code>)
+ *
+ * Oniguruma equivalent: <code>apply_all_case_fold</code>
+ */
+ public abstract void applyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, Object arg);
+
+ /**
+ * Expand AST string nodes into their folded alternatives (look at: <code>Analyser.expandCaseFoldString</code>)
+ *
+ * Oniguruma equivalent: <code>get_case_fold_codes_by_str</code>
+ */
+ public abstract CaseFoldCodeItem[]caseFoldCodesByString(int flag, byte[]bytes, int p, int end);
+
+ /**
+ * Returns character type given character type name (used when e.g. \p{Alpha})
+ *
+ * Oniguruma equivalent: <code>property_name_to_ctype</code>
+ */
+ public abstract int propertyNameToCType(byte[]bytes, int p, int end);
+
+ /**
+ * Perform a check whether given code is of given character type (e.g. used by isWord(someByte) and similar methods)
+ *
+ * @param code a code point of a character
+ * @param ctype a character type to check against
+ *
+ * Oniguruma equivalent: <code>is_code_ctype</code>
+ */
+ public abstract boolean isCodeCType(int code, int ctype);
+
+ /**
+ * Returns code range for a given character type
+ *
+ * Oniguruma equivalent: <code>get_ctype_code_range</code>
+ */
+ public abstract int[]ctypeCodeRange(int ctype, IntHolder sbOut);
+
+ /**
+ * Seeks the previous character head in a stream
+ *
+ * Oniguruma equivalent: <code>left_adjust_char_head</code>
+ */
+ public abstract int leftAdjustCharHead(byte[]bytes, int p, int end);
+
+ /**
+ * Returns true if it's safe to use reversal Boyer-Moore search fail fast algorithm
+ *
+ * Oniguruma equivalent: <code>is_allowed_reverse_match</code>
+ */
+ public abstract boolean isReverseMatchAllowed(byte[]bytes, int p, int end);
+
+ /* onigenc_get_right_adjust_char_head / ONIGENC_LEFT_ADJUST_CHAR_HEAD */
+ public final int rightAdjustCharHead(byte[]bytes, int p, int end) {
+ int p_ = leftAdjustCharHead(bytes, p, end);
+ if (p_ < end) p_ += length(bytes[p_]);
+ return p_;
+ }
+
+ /* onigenc_get_right_adjust_char_head_with_prev */
+ public final int rightAdjustCharHeadWithPrev(byte[]bytes, int p, int end, IntHolder prev) {
+ int p_ = leftAdjustCharHead(bytes, p, end);
+ if (p_ < end) {
+ if (prev != null) prev.value = p_;
+ p_ += length(bytes[p_]);
+ } else {
+ if (prev != null) prev.value = -1; /* Sorry */
+ }
+ return p_;
+ }
+
+ /* onigenc_get_prev_char_head */
+ public final int prevCharHead(byte[]bytes, int p, int end) {
+ if (end <= p) return -1; // ??
+ return leftAdjustCharHead(bytes, p, end - 1);
+ }
+
+ /* onigenc_step_back */
+ public final int stepBack(byte[]bytes, int p, int end, int n) {
+ while (end != -1 && n-- > 0) {
+ if (end <= p) return -1;
+ end = leftAdjustCharHead(bytes, p, end - 1);
+ }
+ return end;
+ }
+
+ /* onigenc_step */
+ public final int step(byte[]bytes, int p, int end, int n) {
+ int q = p;
+ while (n-- > 0) {
+ q += length(bytes[q]);
+ }
+ return q <= end ? q : -1;
+ }
+
+ /* onigenc_strlen */
+ public int strLength(byte[]bytes, int p, int end) {
+ int n = 0;
+ int q = p;
+ while (q < end) {
+ q += length(bytes[q]);
+ n++;
+ }
+ return n;
+ }
+
+ /* onigenc_strlen_null */
+ public final int strLengthNull(byte[]bytes, int p) {
+ int n = 0;
+
+ while(true) {
+ if (bytes[p] == 0) {
+ int len = minLength();
+
+ if (len == 1) return n;
+ int q = p + 1;
+
+ while (len > 1) {
+ if (bytes[q] != 0) break;
+ q++;
+ len--;
+ }
+ if (len == 1) return n;
+ }
+ p += length(bytes[p]);
+ n++;
+ }
+ }
+
+ /* onigenc_str_bytelen_null */
+ public final int strByteLengthNull(byte[]bytes, int p) {
+ int p_, start;
+ p_ = start = 0;
+
+ while(true) {
+ if (bytes[p_] == 0) {
+ int len = minLength();
+ if (len == 1) return p_ - start;
+ int q = p_ + 1;
+ while (len > 1) {
+ if (q >= bytes.length) return p_ - start;
+ if (bytes[q] != 0) break;
+ q++;
+ len--;
+ }
+ if (len == 1) return p_ - start;
+ }
+ p_ += length(bytes[p_]);
+ }
+ }
+
+ /* onigenc_with_ascii_strncmp */
+ public final int strNCmp(byte[]bytes, int p, int end, byte[]ascii, int asciiP, int n) {
+ while (n-- > 0) {
+ if (p >= end) return ascii[asciiP];
+ int c = mbcToCode(bytes, p, end);
+ int x = ascii[asciiP] - c;
+ if (x != 0) return x;
+
+ asciiP++;
+ p += length(bytes[p]);
+ }
+ return 0;
+ }
+
+ public final boolean isNewLine(int code) {
+ return isCodeCType(code, CharacterType.NEWLINE);
+ }
+
+ public final boolean isGraph(int code) {
+ return isCodeCType(code, CharacterType.GRAPH);
+ }
+
+ public final boolean isPrint(int code) {
+ return isCodeCType(code, CharacterType.PRINT);
+ }
+
+ public final boolean isAlnum(int code) {
+ return isCodeCType(code, CharacterType.ALNUM);
+ }
+
+ public final boolean isAlpha(int code) {
+ return isCodeCType(code, CharacterType.ALPHA);
+ }
+
+ public final boolean isLower(int code) {
+ return isCodeCType(code, CharacterType.LOWER);
+ }
+
+ public final boolean isUpper(int code) {
+ return isCodeCType(code, CharacterType.UPPER);
+ }
+
+ public final boolean isCntrl(int code) {
+ return isCodeCType(code, CharacterType.CNTRL);
+ }
+
+ public final boolean isPunct(int code) {
+ return isCodeCType(code, CharacterType.PUNCT);
+ }
+
+ public final boolean isSpace(int code) {
+ return isCodeCType(code, CharacterType.SPACE);
+ }
+
+ public final boolean isBlank(int code) {
+ return isCodeCType(code, CharacterType.BLANK);
+ }
+
+ public final boolean isDigit(int code) {
+ return isCodeCType(code, CharacterType.DIGIT);
+ }
+
+ public final boolean isXDigit(int code) {
+ return isCodeCType(code, CharacterType.XDIGIT);
+ }
+
+ public final boolean isWord(int code) {
+ return isCodeCType(code, CharacterType.WORD);
+ }
+
+ // ONIGENC_IS_MBC_WORD
+ public final boolean isMbcWord(byte[]bytes, int p, int end) {
+ return isWord(mbcToCode(bytes, p, end));
+ }
+
+ // IS_CODE_SB_WORD
+ public final boolean isSbWord(int code) {
+ return isAscii(code) && isWord(code);
+ }
+
+ // ONIGENC_IS_MBC_HEAD
+ public final boolean isMbcHead(byte b) {
+ return length(b) != 1;
+ }
+
+ public boolean isMbcCrnl(byte[]bytes, int p, int end) {
+ return mbcToCode(bytes, p, end) == 13 && isNewLine(bytes, p + length(bytes[p]), end);
+ }
+
+ // ============================================================
+ // helpers
+ // ============================================================
+ public static int digitVal(int code) {
+ return code - '0';
+ }
+
+ public static int odigitVal(int code) {
+ return digitVal(code);
+ }
+
+ public final int xdigitVal(int code) {
+ if (isDigit(code)) {
+ return digitVal(code);
+ } else {
+ return isUpper(code) ? code - 'A' + 10 : code - 'a' + 10;
+ }
+ }
+
+ // ONIGENC_IS_MBC_ASCII
+ public static boolean isMbcAscii(byte b) {
+ return (b & 0xff) < 128; // b > 0 ?
+ }
+
+ // ONIGENC_IS_CODE_ASCII
+ public static boolean isAscii(int code) {
+ return code < 128;
+ }
+
+ public static int asciiToLower(int c) {
+ return AbstractEncoding.AsciiToLowerCaseTable[c];
+ }
+
+ public static int asciiToUpper(int c) {
+ return AbstractEncoding.AsciiToUpperCaseTable[c];
+ }
+
+ public static boolean isWordGraphPrint(int ctype) {
+ return ctype == CharacterType.WORD ||
+ ctype == CharacterType.GRAPH ||
+ ctype == CharacterType.PRINT;
+ }
+
+ public final int mbcodeStartPosition() {
+ return minLength() > 1 ? 0 : 0x80;
+ }
+
+ public abstract boolean isSingleByte();
+ public abstract boolean isFixedWidth();
+
+ public static final byte NEW_LINE = (byte)0x0a;
+}
diff --git a/src/org/joni/encoding/EucEncoding.java b/src/org/joni/encoding/EucEncoding.java
new file mode 100644
index 0000000..2cb26cd
--- /dev/null
+++ b/src/org/joni/encoding/EucEncoding.java
@@ -0,0 +1,42 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding;
+
+public abstract class EucEncoding extends MultiByteEncoding {
+
+ protected EucEncoding(int[]EncLen, short[]CTypeTable) {
+ super(EncLen, CTypeTable);
+ }
+
+ protected abstract boolean isLead(int c);
+
+ @Override
+ public int leftAdjustCharHead(byte[]bytes, int p, int end) {
+ /* In this encoding mb-trail bytes doesn't mix with single bytes. */
+ if (end <= p) return end;
+ int p_ = end;
+ while (!isLead(bytes[p_] & 0xff) && p_ > p) p_--;
+ int len = length(bytes[p_]);
+ if (p_ + len > end) return p_;
+
+ p_ += len;
+ return p_ + ((end - p_) & ~1);
+ }
+}
diff --git a/src/org/joni/encoding/ISOEncoding.java b/src/org/joni/encoding/ISOEncoding.java
new file mode 100644
index 0000000..7b113ee
--- /dev/null
+++ b/src/org/joni/encoding/ISOEncoding.java
@@ -0,0 +1,58 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding;
+
+import org.joni.Config;
+import org.joni.IntHolder;
+
+public abstract class ISOEncoding extends CaseFoldMapEncoding {
+
+ protected ISOEncoding(short[]CTypeTable, byte[]LowerCaseTable, int[][]CaseFoldMap) {
+ this(CTypeTable, LowerCaseTable, CaseFoldMap, true);
+ }
+
+ protected ISOEncoding(short[]CTypeTable, byte[]LowerCaseTable, int[][]CaseFoldMap, boolean foldFlag) {
+ super(CTypeTable, LowerCaseTable, CaseFoldMap, foldFlag);
+ }
+
+ /** iso_*_mbc_case_fold
+ */
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+ int p = pp.value;
+ int lowerP = 0;
+
+ if (bytes[p] == (byte)0xdf && (flag & Config.INTERNAL_ENC_CASE_FOLD_MULTI_CHAR) != 0) {
+ lower[lowerP++] = 's';
+ lower[lowerP] = 's';
+ pp.value++;
+ return 2;
+ }
+
+ lower[lowerP] = LowerCaseTable[bytes[p] & 0xff];
+ pp.value++;
+ return 1;
+ }
+
+ @Override
+ public boolean isCodeCType(int code, int ctype) {
+ return code < 256 ? isCodeCTypeInternal(code, ctype) : false;
+ }
+}
diff --git a/src/org/joni/encoding/MultiByteEncoding.java b/src/org/joni/encoding/MultiByteEncoding.java
new file mode 100644
index 0000000..1ab58a8
--- /dev/null
+++ b/src/org/joni/encoding/MultiByteEncoding.java
@@ -0,0 +1,131 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding;
+
+import org.joni.IntHolder;
+import org.joni.encoding.specific.ASCIIEncoding;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.ValueException;
+
+public abstract class MultiByteEncoding extends AbstractEncoding {
+
+ protected final int EncLen[];
+
+ protected MultiByteEncoding(int[]EncLen, short[]CTypeTable) {
+ super(CTypeTable);
+ this.EncLen = EncLen;
+ }
+
+ @Override
+ public int length(byte c) {
+ return EncLen[c & 0xff];
+ }
+
+ @Override
+ public boolean isSingleByte() {
+ return false;
+ }
+
+ protected final int mbnMbcToCode(byte[]bytes, int p, int end) {
+ int len = length(bytes[p]);
+ int n = bytes[p++] & 0xff;
+ if (len == 1) return n;
+
+ for (int i=1; i<len; i++) {
+ if (p >= end) break;
+ int c = bytes[p++] & 0xff;
+ n <<= 8;
+ n += c;
+ }
+ return n;
+ }
+
+ protected final int mbnMbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+ int p = pp.value;
+ int lowerP = 0;
+
+ if (isAscii(bytes[p] & 0xff)) {
+ lower[lowerP] = ASCIIEncoding.AsciiToLowerCaseTable[bytes[p] & 0xff];
+ pp.value++;
+ return 1;
+ } else {
+ int len = length(bytes[p]);
+ for (int i=0; i<len; i++) {
+ lower[lowerP++] = bytes[p++];
+ }
+ pp.value += len;
+ return len; /* return byte length of converted to lower char */
+ }
+ }
+
+ protected final int mb2CodeToMbcLength(int code) {
+ return ((code & 0xff00) != 0) ? 2 : 1;
+ }
+
+ protected final int mb4CodeToMbcLength(int code) {
+ if ((code & 0xff000000) != 0) {
+ return 4;
+ } else if ((code & 0xff0000) != 0) {
+ return 3;
+ } else if ((code & 0xff00) != 0) {
+ return 2;
+ } else {
+ return 1;
+ }
+ }
+
+ protected final int mb2CodeToMbc(int code, byte[]bytes, int p) {
+ int p_ = p;
+ if ((code & 0xff00) != 0) {
+ bytes[p_++] = (byte)((code >>> 8) & 0xff);
+ }
+ bytes[p_++] = (byte)(code & 0xff);
+
+ if (length(bytes[p]) != (p_ - p)) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+ return p_ - p;
+ }
+
+ protected final int mb4CodeToMbc(int code, byte[]bytes, int p) {
+ int p_ = p;
+ if ((code & 0xff000000) != 0) bytes[p_++] = (byte)((code >>> 24) & 0xff);
+ if ((code & 0xff0000) != 0 || p_ != p) bytes[p_++] = (byte)((code >>> 16) & 0xff);
+ if ((code & 0xff00) != 0 || p_ != p) bytes[p_++] = (byte)((code >>> 8) & 0xff);
+ bytes[p_++] = (byte)(code & 0xff);
+
+ if (length(bytes[p]) != (p_ - p)) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+ return p_ - p;
+ }
+
+ protected final boolean mb2IsCodeCType(int code, int ctype) {
+ if (code < 128) {
+ return isCodeCTypeInternal(code, ctype); // configured with ascii
+ } else {
+ if (isWordGraphPrint(ctype)) {
+ return codeToMbcLength(code) > 1;
+ }
+ }
+ return false;
+ }
+
+ protected final boolean mb4IsCodeCType(int code, int ctype) {
+ return mb2IsCodeCType(code, ctype);
+ }
+
+}
diff --git a/src/org/joni/encoding/SingleByteEncoding.java b/src/org/joni/encoding/SingleByteEncoding.java
new file mode 100644
index 0000000..a0c0da5
--- /dev/null
+++ b/src/org/joni/encoding/SingleByteEncoding.java
@@ -0,0 +1,112 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding;
+
+import org.joni.IntHolder;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.ValueException;
+
+public abstract class SingleByteEncoding extends AbstractEncoding {
+
+ protected final byte[]LowerCaseTable;
+
+ protected SingleByteEncoding(short[]CTypeTable, byte[]LowerCaseTable) {
+ super(CTypeTable);
+ this.LowerCaseTable = LowerCaseTable;
+ }
+
+ /** onigenc_single_byte_mbc_enc_len
+ */
+ @Override
+ public final int length(byte c) {
+ return 1;
+ }
+
+ @Override
+ public final int strLength(byte[]bytes, int p, int end) {
+ return end - p;
+ }
+
+ @Override
+ public final int maxLength() {
+ return 1;
+ }
+
+ @Override
+ public final int minLength() {
+ return 1;
+ }
+
+ // onigenc_is_mbc_newline_0x0a here
+
+ /** onigenc_single_byte_mbc_to_code
+ */
+ @Override
+ public int mbcToCode(byte[]bytes, int p, int end){
+ return bytes[p] & 0xff;
+ }
+
+ /** onigenc_single_byte_code_to_mbclen
+ */
+ @Override
+ public final int codeToMbcLength(int code) {
+ if (code < 0x100) return 1;
+ throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+ }
+
+ /** onigenc_single_byte_code_to_mbc
+ */
+ @Override
+ public final int codeToMbc(int code, byte[]bytes, int p) {
+ bytes[p] = (byte)(code & 0xff); // c implementation also uses mask here
+ return 1;
+ }
+
+ /** onigenc_not_support_get_ctype_code_range
+ */
+ @Override
+ public final int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
+ return null;
+ }
+
+ /** onigenc_single_byte_left_adjust_char_head
+ */
+ @Override
+ public final int leftAdjustCharHead(byte[]bytes, int start, int p) {
+ return p;
+ }
+
+ /** onigenc_always_true_is_allowed_reverse_match
+ */
+ @Override
+ public final boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
+ return true;
+ }
+
+ @Override
+ public final boolean isSingleByte() {
+ return true;
+ }
+
+ @Override
+ public final boolean isFixedWidth() {
+ return true;
+ }
+}
diff --git a/src/org/joni/encoding/specific/ASCIIEncoding.java b/src/org/joni/encoding/specific/ASCIIEncoding.java
new file mode 100644
index 0000000..69e4d89
--- /dev/null
+++ b/src/org/joni/encoding/specific/ASCIIEncoding.java
@@ -0,0 +1,83 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.encoding.SingleByteEncoding;
+
+public final class ASCIIEncoding extends SingleByteEncoding {
+
+ protected ASCIIEncoding() {
+ super(AsciiCtypeTable, AsciiToLowerCaseTable);
+ }
+
+ @Override
+ public String toString() {
+ return "US-ASCII";
+ }
+
+ @Override
+ public final byte[]toLowerCaseTable() {
+ return LowerCaseTable;
+ }
+
+ /** ascii_is_code_ctype / ONIGENC_IS_ASCII_CODE_CTYPE
+ */
+ @Override
+ public boolean isCodeCType(int code, int ctype) {
+ return code < 128 ? isCodeCTypeInternal(code, ctype) : false;
+ }
+
+ static final short AsciiCtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+ };
+
+ public static final ASCIIEncoding INSTANCE = new ASCIIEncoding();
+}
diff --git a/src/org/joni/encoding/specific/BIG5Encoding.java b/src/org/joni/encoding/specific/BIG5Encoding.java
new file mode 100644
index 0000000..cf7a832
--- /dev/null
+++ b/src/org/joni/encoding/specific/BIG5Encoding.java
@@ -0,0 +1,154 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.IntHolder;
+import org.joni.encoding.MultiByteEncoding;
+
+public final class BIG5Encoding extends MultiByteEncoding {
+
+ protected BIG5Encoding() {
+ super(Big5EncLen, ASCIIEncoding.AsciiCtypeTable);
+ }
+
+ @Override
+ public String toString() {
+ return "Big5";
+ }
+
+ @Override
+ public int maxLength() {
+ return 2;
+ }
+
+ @Override
+ public int minLength() {
+ return 1;
+ }
+
+ @Override
+ public boolean isFixedWidth() {
+ return false;
+ }
+
+ @Override
+ public int mbcToCode(byte[]bytes, int p, int end) {
+ return mbnMbcToCode(bytes, p, end);
+ }
+
+ @Override
+ public int codeToMbcLength(int code) {
+ return mb2CodeToMbcLength(code);
+ }
+
+ @Override
+ public int codeToMbc(int code, byte[]bytes, int p) {
+ return mb2CodeToMbc(code, bytes, p);
+ }
+
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+ return mbnMbcCaseFold(flag, bytes, pp, end, lower);
+ }
+
+ @Override
+ public boolean isCodeCType(int code, int ctype) {
+ return mb2IsCodeCType(code, ctype);
+ }
+
+ static final boolean BIG5_CAN_BE_TRAIL_TABLE[] = {
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false
+ };
+
+ private static boolean isBig5MbFirst(int b) {
+ return Big5EncLen[b] > 1;
+ }
+
+ private static boolean isBig5MbTrail(int b) {
+ return BIG5_CAN_BE_TRAIL_TABLE[b];
+ }
+
+ @Override
+ public int leftAdjustCharHead(byte[]bytes, int p, int end) {
+ if (end <= p) return end;
+
+ int p_ = end;
+
+ if (isBig5MbTrail(bytes[p_] & 0xff)) {
+ while (p_ > p) {
+ if (!isBig5MbFirst(bytes[--p_] & 0xff)) {
+ p_++;
+ break;
+ }
+ }
+ }
+ int len = length(bytes[p_]);
+ if (p_ + len > end) return p_;
+ p_ += len;
+ return p_ + ((end - p_) & ~1);
+ }
+
+ @Override
+ public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
+ return null;
+ }
+
+ @Override
+ public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
+ int c = bytes[p] & 0xff;
+ return isBig5MbTrail(c);
+ }
+
+ static final int Big5EncLen[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+ };
+
+ public static final BIG5Encoding INSTANCE = new BIG5Encoding();
+}
diff --git a/src/org/joni/encoding/specific/CP1251Encoding.java b/src/org/joni/encoding/specific/CP1251Encoding.java
new file mode 100644
index 0000000..b3a9f2d
--- /dev/null
+++ b/src/org/joni/encoding/specific/CP1251Encoding.java
@@ -0,0 +1,159 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.IntHolder;
+import org.joni.encoding.CaseFoldMapEncoding;
+
+final public class CP1251Encoding extends CaseFoldMapEncoding {
+
+ protected CP1251Encoding() {
+ super(CP1251_CtypeTable, CP1251_ToLowerCaseTable, CP1251_CaseFoldMap, false);
+ }
+
+ @Override
+ public String toString() {
+ return "CP1251";
+ }
+
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+ int p = pp.value;
+ int lowerP = 0;
+
+ lower[lowerP] = LowerCaseTable[bytes[p] & 0xff];
+ return 1;
+ }
+
+ @Override
+ public boolean isCodeCType(int code, int ctype) {
+ return code < 256 ? isCodeCTypeInternal(code, ctype) : false;
+ }
+
+ static final short CP1251_CtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x428c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x34a2, 0x34a2, 0x01a0, 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0,
+ 0x0000, 0x01a0, 0x34a2, 0x01a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0,
+ 0x0008, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x0280, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0,
+ 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x34a2,
+ 0x01a0, 0x01a0, 0x34a2, 0x30e2, 0x30e2, 0x31e2, 0x01a0, 0x01a0,
+ 0x30e2, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x34a2, 0x30e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
+ };
+
+ static final byte CP1251_ToLowerCaseTable[] = new byte[]{
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\220', (byte)'\203', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\232', (byte)'\213', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\242', (byte)'\242', (byte)'\274', (byte)'\244', (byte)'\264', (byte)'\246', (byte)'\247',
+ (byte)'\270', (byte)'\251', (byte)'\272', (byte)'\253', (byte)'\254', (byte)'\255', (byte)'\256', (byte)'\277',
+ (byte)'\260', (byte)'\261', (byte)'\263', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\276', (byte)'\276', (byte)'\277',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377'
+ };
+
+ static final int CP1251_CaseFoldMap[][] = {
+ { 0xb8, 0xa8 },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde },
+ { 0xff, 0xdf }
+ };
+
+ public static final CP1251Encoding INSTANCE = new CP1251Encoding();
+}
diff --git a/src/org/joni/encoding/specific/EUCCNEncoding.java b/src/org/joni/encoding/specific/EUCCNEncoding.java
new file mode 100644
index 0000000..66c65c5
--- /dev/null
+++ b/src/org/joni/encoding/specific/EUCCNEncoding.java
@@ -0,0 +1,93 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.IntHolder;
+import org.joni.encoding.EucEncoding;
+
+public final class EUCCNEncoding extends EucEncoding {
+
+ protected EUCCNEncoding() {
+ super(EUCKREncoding.EUCKREncLen, ASCIIEncoding.AsciiCtypeTable);
+ }
+
+ @Override
+ public String toString() {
+ return "EUC-CN";
+ }
+
+ @Override
+ public int maxLength() {
+ return 2;
+ }
+
+ @Override
+ public int minLength() {
+ return 1;
+ }
+
+ @Override
+ public boolean isFixedWidth() {
+ return false;
+ }
+
+ @Override
+ public int mbcToCode(byte[]bytes, int p, int end) {
+ return mbnMbcToCode(bytes, p, end);
+ }
+
+ @Override
+ public int codeToMbcLength(int code) {
+ return mb2CodeToMbcLength(code);
+ }
+
+ @Override
+ public int codeToMbc(int code, byte[]bytes, int p) {
+ return mb2CodeToMbc(code, bytes, p);
+ }
+
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+ return mbnMbcCaseFold(flag, bytes, pp, end, lower);
+ }
+
+ @Override
+ public boolean isCodeCType(int code, int ctype) {
+ return mb2IsCodeCType(code, ctype);
+ }
+
+ @Override
+ public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
+ return null;
+ }
+
+ // euckr_islead
+ protected boolean isLead(int c) {
+ return (c < 0xa1 || c == 0xff);
+ }
+
+ @Override
+ public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
+ int c = bytes[p] & 0xff;
+ return c <= 0x7e;
+ }
+
+ public static final EUCKREncoding INSTANCE = new EUCKREncoding();
+}
diff --git a/src/org/joni/encoding/specific/EUCJPEncoding.java b/src/org/joni/encoding/specific/EUCJPEncoding.java
new file mode 100644
index 0000000..9fcca25
--- /dev/null
+++ b/src/org/joni/encoding/specific/EUCJPEncoding.java
@@ -0,0 +1,205 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.CodeRangeBuffer;
+import org.joni.IntHolder;
+import org.joni.constants.CharacterType;
+import org.joni.encoding.EucEncoding;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.InternalException;
+import org.joni.exception.ValueException;
+import org.joni.util.BytesHash;
+
+public final class EUCJPEncoding extends EucEncoding {
+
+ protected EUCJPEncoding() {
+ super(EUCJPEncLen, ASCIIEncoding.AsciiCtypeTable);
+ }
+
+ @Override
+ public String toString() {
+ return "EUC-JP";
+ }
+
+ @Override
+ public int maxLength() {
+ return 3;
+ }
+
+ @Override
+ public int minLength() {
+ return 1;
+ }
+
+ @Override
+ public boolean isFixedWidth() {
+ return false;
+ }
+
+ @Override
+ public int mbcToCode(byte[]bytes, int p, int end) {
+ int len = length(bytes[p]);
+ int n = bytes[p++] & 0xff;
+ if (len == 1) return n;
+
+ for (int i=1; i<len; i++) {
+ if (p >= end) break;
+ int c = bytes[p++] & 0xff;
+ n <<= 8;
+ n += c;
+ }
+ return n;
+ }
+
+ @Override
+ public int codeToMbcLength(int code) {
+ if (isAscii(code)) return 1;
+ if ((code & 0xff0000) != 0) return 3;
+ if ((code & 0xff00) != 0) return 2;
+ throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+ }
+
+ @Override
+ public int codeToMbc(int code, byte[]bytes, int p) {
+ int p_ = p;
+ if ((code & 0xff0000) != 0) bytes[p_++] = (byte)((code >> 16) & 0xff); // need mask here ??
+ if ((code & 0xff00) != 0) bytes[p_++] = (byte)((code >> 8) & 0xff);
+ bytes[p_++] = (byte)(code & 0xff);
+
+ if (length(bytes[p_]) != p_ - p) throw new InternalException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+ return p_ - p;
+ }
+
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+ int p = pp.value;
+ int lowerP = 0;
+
+ if (isMbcAscii(bytes[p])) {
+ lower[lowerP] = ASCIIEncoding.AsciiToLowerCaseTable[bytes[p] & 0xff];
+ pp.value++;
+ return 1;
+ } else {
+ int len = length(bytes[p]);
+ for (int i=0; i<len; i++) {
+ lower[lowerP++] = bytes[p++];
+ }
+ pp.value += len;
+ return len; /* return byte length of converted char to lower */
+ }
+ }
+
+ protected boolean isLead(int c) {
+ return ((c - 0xa1) & 0xff) > 0xfe - 0xa1;
+ }
+
+ @Override
+ public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
+ int c = bytes[p] & 0xff;
+ return c <= 0x7e || c == 0x8e || c == 0x8f;
+ }
+
+ private static final int CR_Hiragana[] = {
+ 1,
+ 0xa4a1, 0xa4f3
+ }; /* CR_Hiragana */
+
+ private static final int CR_Katakana[] = {
+ 3,
+ 0xa5a1, 0xa5f6,
+ 0xaaa6, 0xaaaf,
+ 0xaab1, 0xaadd
+ }; /* CR_Katakana */
+
+ private static final int PropertyList[][] = new int[][] {
+ CR_Hiragana,
+ CR_Katakana
+ };
+
+ private static final BytesHash<Integer> CTypeNameHash = new BytesHash<Integer>();
+
+ static {
+ CTypeNameHash.put("Hiragana".getBytes(), 1 + CharacterType.MAX_STD_CTYPE);
+ CTypeNameHash.put("Katakana".getBytes(), 2 + CharacterType.MAX_STD_CTYPE);
+ }
+
+ @Override
+ public int propertyNameToCType(byte[]bytes, int p, int end) {
+ Integer ctype;
+ if ((ctype = CTypeNameHash.get(bytes, p, end)) == null) {
+ return super.propertyNameToCType(bytes, p, end);
+ }
+ return ctype;
+ }
+
+ @Override
+ public boolean isCodeCType(int code, int ctype) {
+ if (ctype <= CharacterType.MAX_STD_CTYPE) {
+ if (code < 128) {
+ // ctype table is configured with ASCII
+ return isCodeCTypeInternal(code, ctype);
+ } else {
+ if (isWordGraphPrint(ctype)) {
+ return codeToMbcLength(code) > 1;
+ }
+ }
+ } else {
+ ctype -= (CharacterType.MAX_STD_CTYPE + 1);
+ if (ctype >= PropertyList.length) throw new InternalException(ErrorMessages.ERR_TYPE_BUG);
+ return CodeRangeBuffer.isInCodeRange(PropertyList[ctype], code);
+ }
+ return false;
+ }
+
+ @Override
+ public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
+ if (ctype <= CharacterType.MAX_STD_CTYPE) {
+ return null;
+ } else {
+ sbOut.value = 0x80;
+
+ ctype -= (CharacterType.MAX_STD_CTYPE + 1);
+ if (ctype >= PropertyList.length) throw new InternalException(ErrorMessages.ERR_TYPE_BUG);
+ return PropertyList[ctype];
+ }
+ }
+
+ static final int EUCJPEncLen[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+ };
+
+ public static final EUCJPEncoding INSTANCE = new EUCJPEncoding();
+}
diff --git a/src/org/joni/encoding/specific/EUCKREncoding.java b/src/org/joni/encoding/specific/EUCKREncoding.java
new file mode 100644
index 0000000..4d5583f
--- /dev/null
+++ b/src/org/joni/encoding/specific/EUCKREncoding.java
@@ -0,0 +1,112 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.IntHolder;
+import org.joni.encoding.EucEncoding;
+
+public final class EUCKREncoding extends EucEncoding {
+
+ protected EUCKREncoding() {
+ super(EUCKREncLen, ASCIIEncoding.AsciiCtypeTable);
+ }
+
+ @Override
+ public String toString() {
+ return "EUC-KR";
+ }
+
+ @Override
+ public int maxLength() {
+ return 2;
+ }
+
+ @Override
+ public int minLength() {
+ return 1;
+ }
+
+ @Override
+ public boolean isFixedWidth() {
+ return false;
+ }
+
+ @Override
+ public int mbcToCode(byte[]bytes, int p, int end) {
+ return mbnMbcToCode(bytes, p, end);
+ }
+
+ @Override
+ public int codeToMbcLength(int code) {
+ return mb2CodeToMbcLength(code);
+ }
+
+ @Override
+ public int codeToMbc(int code, byte[]bytes, int p) {
+ return mb2CodeToMbc(code, bytes, p);
+ }
+
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+ return mbnMbcCaseFold(flag, bytes, pp, end, lower);
+ }
+
+ @Override
+ public boolean isCodeCType(int code, int ctype) {
+ return mb2IsCodeCType(code, ctype);
+ }
+
+ @Override
+ public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
+ return null;
+ }
+
+ // euckr_islead
+ protected boolean isLead(int c) {
+ return ((c) < 0xa1 || (c) == 0xff);
+ }
+
+ @Override
+ public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
+ int c = bytes[p] & 0xff;
+ return c <= 0x7e;
+ }
+
+ static final int EUCKREncLen[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+ };
+
+ public static final EUCKREncoding INSTANCE = new EUCKREncoding();
+}
diff --git a/src/org/joni/encoding/specific/EUCTWEncoding.java b/src/org/joni/encoding/specific/EUCTWEncoding.java
new file mode 100644
index 0000000..6210ede
--- /dev/null
+++ b/src/org/joni/encoding/specific/EUCTWEncoding.java
@@ -0,0 +1,112 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.IntHolder;
+import org.joni.encoding.EucEncoding;
+
+public final class EUCTWEncoding extends EucEncoding {
+
+ protected EUCTWEncoding() {
+ super(EUCTWEncLen, ASCIIEncoding.AsciiCtypeTable);
+ }
+
+ @Override
+ public String toString() {
+ return "EUC-TW";
+ }
+
+ @Override
+ public int maxLength() {
+ return 4;
+ }
+
+ @Override
+ public int minLength() {
+ return 1;
+ }
+
+ @Override
+ public boolean isFixedWidth() {
+ return false;
+ }
+
+ @Override
+ public int mbcToCode(byte[]bytes, int p, int end) {
+ return mbnMbcToCode(bytes, p, end);
+ }
+
+ @Override
+ public int codeToMbcLength(int code) {
+ return mb4CodeToMbcLength(code);
+ }
+
+ @Override
+ public int codeToMbc(int code, byte[]bytes, int p) {
+ return mb4CodeToMbc(code, bytes, p);
+ }
+
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+ return mbnMbcCaseFold(flag, bytes, pp, end, lower);
+ }
+
+ @Override
+ public boolean isCodeCType(int code, int ctype) {
+ return mb4IsCodeCType(code, ctype);
+ }
+
+ @Override
+ public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
+ return null;
+ }
+
+ // euckr_islead
+ protected boolean isLead(int c) {
+ return ((c < 0xa1 && c != 0x8e) || c == 0xff);
+ }
+
+ @Override
+ public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
+ int c = bytes[p] & 0xff;
+ return c <= 0x7e;
+ }
+
+ static final int EUCTWEncLen[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+ };
+
+ public static final EUCTWEncoding INSTANCE = new EUCTWEncoding();
+}
diff --git a/src/org/joni/encoding/specific/ISO8859_10Encoding.java b/src/org/joni/encoding/specific/ISO8859_10Encoding.java
new file mode 100644
index 0000000..4ddfc8f
--- /dev/null
+++ b/src/org/joni/encoding/specific/ISO8859_10Encoding.java
@@ -0,0 +1,155 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.encoding.ISOEncoding;
+
+public final class ISO8859_10Encoding extends ISOEncoding {
+
+ protected ISO8859_10Encoding() {
+ super(ISO8859_10CtypeTable, ISO8859_10ToLowerCaseTable, ISO8859_10CaseFoldMap);
+ }
+
+ @Override
+ public String toString() {
+ return "ISO-8859-10";
+ }
+
+ static final short ISO8859_10CtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2,
+ 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x30e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
+ };
+
+ static final byte ISO8859_10ToLowerCaseTable[] = new byte[]{
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\247',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\255', (byte)'\276', (byte)'\277',
+ (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\275', (byte)'\276', (byte)'\277',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\337',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377'
+ };
+
+ static final int ISO8859_10CaseFoldMap[][] = {
+ { 0xa1, 0xb1 },
+ { 0xa2, 0xb2 },
+ { 0xa3, 0xb3 },
+ { 0xa4, 0xb4 },
+ { 0xa5, 0xb5 },
+ { 0xa6, 0xb6 },
+ { 0xa8, 0xb8 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xae, 0xbe },
+ { 0xaf, 0xbf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+ };
+
+ public static final ISO8859_10Encoding INSTANCE = new ISO8859_10Encoding();
+}
diff --git a/src/org/joni/encoding/specific/ISO8859_11Encoding.java b/src/org/joni/encoding/specific/ISO8859_11Encoding.java
new file mode 100644
index 0000000..9c3a605
--- /dev/null
+++ b/src/org/joni/encoding/specific/ISO8859_11Encoding.java
@@ -0,0 +1,94 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.ApplyAllCaseFoldFunction;
+import org.joni.CaseFoldCodeItem;
+import org.joni.IntHolder;
+import org.joni.encoding.ISOEncoding;
+
+public final class ISO8859_11Encoding extends ISOEncoding {
+
+ protected ISO8859_11Encoding() {
+ super(ISO8859_11CtypeTable, ASCIIEncoding.AsciiToLowerCaseTable, null);
+ }
+
+ @Override
+ public String toString() {
+ return "ISO-8859-11";
+ }
+
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+ return asciiMbcCaseFold(flag, bytes, pp, end, lower);
+ }
+
+ @Override
+ public final byte[]toLowerCaseTable() {
+ return LowerCaseTable;
+ }
+
+ @Override
+ public void applyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, Object arg) {
+ asciiApplyAllCaseFold(flag, fun, arg);
+ }
+
+ @Override
+ public CaseFoldCodeItem[]caseFoldCodesByString(int flag, byte[]bytes, int p, int end) {
+ return asciiCaseFoldCodesByString(flag, bytes, p, end);
+ }
+
+ static final short ISO8859_11CtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000
+ };
+
+ public static final ISO8859_11Encoding INSTANCE = new ISO8859_11Encoding();
+}
diff --git a/src/org/joni/encoding/specific/ISO8859_13Encoding.java b/src/org/joni/encoding/specific/ISO8859_13Encoding.java
new file mode 100644
index 0000000..a629d5f
--- /dev/null
+++ b/src/org/joni/encoding/specific/ISO8859_13Encoding.java
@@ -0,0 +1,140 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.encoding.ISOEncoding;
+
+public final class ISO8859_13Encoding extends ISOEncoding {
+
+ protected ISO8859_13Encoding() {
+ super(ISO8859_13CtypeTable, ISO8859_13ToLowerCaseTable, ISO8859_13CaseFoldMap);
+ }
+
+ @Override
+ public String toString() {
+ return "ISO-8859-13";
+ }
+
+ static final short ISO8859_13CtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x34a2, 0x00a0, 0x34a2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x34a2,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x01a0, 0x30e2, 0x00a0, 0x01a0,
+ 0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0
+ };
+
+ static final byte ISO8859_13ToLowerCaseTable[] = new byte[]{
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\241', (byte)'\242', (byte)'\243', (byte)'\244', (byte)'\245', (byte)'\246', (byte)'\247',
+ (byte)'\270', (byte)'\251', (byte)'\272', (byte)'\253', (byte)'\254', (byte)'\255', (byte)'\256', (byte)'\277',
+ (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\275', (byte)'\276', (byte)'\277',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\327',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\337',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377'
+ };
+
+ static final int ISO8859_13CaseFoldMap[][] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+ };
+
+ public static final ISO8859_13Encoding INSTANCE = new ISO8859_13Encoding();
+}
diff --git a/src/org/joni/encoding/specific/ISO8859_14Encoding.java b/src/org/joni/encoding/specific/ISO8859_14Encoding.java
new file mode 100644
index 0000000..2d5a36b
--- /dev/null
+++ b/src/org/joni/encoding/specific/ISO8859_14Encoding.java
@@ -0,0 +1,156 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.encoding.ISOEncoding;
+
+public final class ISO8859_14Encoding extends ISOEncoding {
+
+ protected ISO8859_14Encoding() {
+ super(ISO8859_14CtypeTable, ISO8859_14ToLowerCaseTable, ISO8859_14CaseFoldMap);
+ }
+
+ @Override
+ public String toString() {
+ return "ISO-8859-14";
+ }
+
+ static final short ISO8859_14CtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x34a2, 0x30e2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x00a0,
+ 0x34a2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x34a2,
+ 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x00a0, 0x34a2,
+ 0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
+ };
+
+ static final byte ISO8859_14ToLowerCaseTable[] = new byte[]{
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\242', (byte)'\242', (byte)'\243', (byte)'\245', (byte)'\245', (byte)'\253', (byte)'\247',
+ (byte)'\270', (byte)'\251', (byte)'\272', (byte)'\253', (byte)'\274', (byte)'\255', (byte)'\256', (byte)'\377',
+ (byte)'\261', (byte)'\261', (byte)'\263', (byte)'\263', (byte)'\265', (byte)'\265', (byte)'\266', (byte)'\271',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\277', (byte)'\274', (byte)'\276', (byte)'\276', (byte)'\277',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\337',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377'
+ };
+
+ static final int ISO8859_14CaseFoldMap[][] = {
+ { 0xa1, 0xa2 },
+ { 0xa4, 0xa5 },
+ { 0xa6, 0xab },
+ { 0xa8, 0xb8 },
+ { 0xaa, 0xba },
+ { 0xac, 0xbc },
+ { 0xaf, 0xff },
+
+ { 0xb0, 0xb1 },
+ { 0xb2, 0xb3 },
+ { 0xb4, 0xb5 },
+ { 0xb7, 0xb9 },
+ { 0xbb, 0xbf },
+ { 0xbd, 0xbe },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+ };
+
+ public static final ISO8859_14Encoding INSTANCE = new ISO8859_14Encoding();
+}
diff --git a/src/org/joni/encoding/specific/ISO8859_15Encoding.java b/src/org/joni/encoding/specific/ISO8859_15Encoding.java
new file mode 100644
index 0000000..a612e3b
--- /dev/null
+++ b/src/org/joni/encoding/specific/ISO8859_15Encoding.java
@@ -0,0 +1,146 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.encoding.ISOEncoding;
+
+public final class ISO8859_15Encoding extends ISOEncoding {
+
+ protected ISO8859_15Encoding() {
+ super(ISO8859_15CtypeTable, ISO8859_15ToLowerCaseTable, ISO8859_15CaseFoldMap);
+ }
+
+ @Override
+ public String toString() {
+ return "ISO-8859-15";
+ }
+
+ static final short ISO8859_15CtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0,
+ 0x30e2, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x34a2, 0x30e2, 0x00a0, 0x01a0,
+ 0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
+ };
+
+ static final byte ISO8859_15ToLowerCaseTable[] = new byte[]{
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\241', (byte)'\242', (byte)'\243', (byte)'\244', (byte)'\245', (byte)'\250', (byte)'\247',
+ (byte)'\250', (byte)'\251', (byte)'\252', (byte)'\253', (byte)'\254', (byte)'\255', (byte)'\256', (byte)'\257',
+ (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\270', (byte)'\265', (byte)'\266', (byte)'\267',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\275', (byte)'\275', (byte)'\377', (byte)'\277',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\327',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\337',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377'
+ };
+
+ static final int ISO8859_15CaseFoldMap[][] = {
+ { 0xa6, 0xa8 },
+
+ { 0xb4, 0xb8 },
+ { 0xbc, 0xbd },
+ { 0xbe, 0xff },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+ };
+
+ public static final ISO8859_15Encoding INSTANCE = new ISO8859_15Encoding();
+}
diff --git a/src/org/joni/encoding/specific/ISO8859_16Encoding.java b/src/org/joni/encoding/specific/ISO8859_16Encoding.java
new file mode 100644
index 0000000..8670766
--- /dev/null
+++ b/src/org/joni/encoding/specific/ISO8859_16Encoding.java
@@ -0,0 +1,153 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.encoding.ISOEncoding;
+
+public final class ISO8859_16Encoding extends ISOEncoding {
+
+ protected ISO8859_16Encoding() {
+ super(ISO8859_16CtypeTable, ISO8859_16ToLowerCaseTable, ISO8859_16CaseFoldMap);
+ }
+
+ @Override
+ public String toString() {
+ return "ISO-8859-16";
+ }
+
+ static final short ISO8859_16CtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x01a0, 0x34a2, 0x00a0,
+ 0x30e2, 0x00a0, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x30e2, 0x34a2,
+ 0x00a0, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x01a0,
+ 0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
+ };
+
+ static final byte ISO8859_16ToLowerCaseTable[] = new byte[]{
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\242', (byte)'\242', (byte)'\263', (byte)'\245', (byte)'\245', (byte)'\250', (byte)'\247',
+ (byte)'\250', (byte)'\251', (byte)'\272', (byte)'\253', (byte)'\256', (byte)'\255', (byte)'\256', (byte)'\277',
+ (byte)'\260', (byte)'\261', (byte)'\271', (byte)'\263', (byte)'\270', (byte)'\265', (byte)'\266', (byte)'\267',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\275', (byte)'\275', (byte)'\377', (byte)'\277',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\337',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377'
+ };
+
+ static final int ISO8859_16CaseFoldMap[][] = {
+ { 0xa1, 0xa2 },
+ { 0xa3, 0xb3 },
+ { 0xa6, 0xa8 },
+ { 0xaa, 0xba },
+ { 0xac, 0xae },
+ { 0xaf, 0xbf },
+
+ { 0xb2, 0xb9 },
+ { 0xb4, 0xb8 },
+ { 0xbc, 0xbd },
+ { 0xbe, 0xff },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+ };
+
+ public static final ISO8859_16Encoding INSTANCE = new ISO8859_16Encoding();
+}
diff --git a/src/org/joni/encoding/specific/ISO8859_1Encoding.java b/src/org/joni/encoding/specific/ISO8859_1Encoding.java
new file mode 100644
index 0000000..ac0f3a4
--- /dev/null
+++ b/src/org/joni/encoding/specific/ISO8859_1Encoding.java
@@ -0,0 +1,233 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.ApplyAllCaseFoldFunction;
+import org.joni.CaseFoldCodeItem;
+import org.joni.encoding.ISOEncoding;
+
+public final class ISO8859_1Encoding extends ISOEncoding {
+
+ protected ISO8859_1Encoding() {
+ super(ISO8859_1CtypeTable, ISO8859_1ToLowerCaseTable, ISO8859_1CaseFoldMap);
+ }
+
+ @Override
+ public String toString() {
+ return "ISO-8859-1";
+ }
+
+ @Override
+ public void applyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, Object arg) {
+ applyAllCaseFoldWithMap(CaseFoldMap.length, CaseFoldMap, true, flag, fun, arg);
+ }
+
+ /** get_case_fold_codes_by_str
+ */
+ @Override
+ public CaseFoldCodeItem[]caseFoldCodesByString(int flag, byte[]bytes, int p, int end) {
+ int b = bytes[p] & 0xff;
+
+ if (0x41 <= b && b <= 0x5a) {
+ CaseFoldCodeItem item0 = new CaseFoldCodeItem(1, 1, new int[]{b + 0x20});
+
+ if (b == 0x53 && end > p + 1 &&
+ (bytes[p+1] == (byte)0x53 || bytes[p+1] == (byte)0x73)) { /* ss */
+ CaseFoldCodeItem item1 = new CaseFoldCodeItem(2, 1, new int[]{0xdf});
+
+ return new CaseFoldCodeItem[]{item0, item1};
+ } else {
+ return new CaseFoldCodeItem[]{item0};
+ }
+ } else if (0x61 <= b && b <= 0x7a) {
+ CaseFoldCodeItem item0 = new CaseFoldCodeItem(1, 1, new int[]{b - 0x20});
+
+ if (b == 0x73 && end > p + 1 &&
+ (bytes[p+1] == (byte)0x73 || bytes[p+1] == (byte)0x53)) { /* ss */
+ CaseFoldCodeItem item1 = new CaseFoldCodeItem(2, 1, new int[]{0xdf});
+ return new CaseFoldCodeItem[]{item0, item1};
+ } else {
+ return new CaseFoldCodeItem[]{item0};
+ }
+
+ } else if (0xc0 <= b && b <= 0xcf) {
+ return new CaseFoldCodeItem[]{new CaseFoldCodeItem(1, 1, new int[]{b + 0x20})};
+ } else if (0xd0 <= b && b <= 0xdf) {
+ if (b == 0xdf) {
+ CaseFoldCodeItem item0 = new CaseFoldCodeItem(1, 2, new int[]{'s', 's'});
+ CaseFoldCodeItem item1 = new CaseFoldCodeItem(1, 2, new int[]{'S', 'S'});
+ CaseFoldCodeItem item2 = new CaseFoldCodeItem(1, 2, new int[]{'s', 'S'});
+ CaseFoldCodeItem item3 = new CaseFoldCodeItem(1, 2, new int[]{'S', 's'});
+
+ return new CaseFoldCodeItem[]{item0, item1, item2, item3};
+ } else if (b != 0xd7) {
+ return new CaseFoldCodeItem[]{new CaseFoldCodeItem(1, 1, new int[]{b + 0x20})};
+ }
+ } else if (0xe0 <= b && b <= 0xef) {
+ return new CaseFoldCodeItem[]{new CaseFoldCodeItem(1, 1, new int[]{b - 0x20})};
+ } else if (0xf0 <= b && b <= 0xfe) {
+ if (b != 0xf7) {
+ return new CaseFoldCodeItem[]{new CaseFoldCodeItem(1, 1, new int[]{b - 0x20})};
+ }
+ }
+ return EMPTY_FOLD_CODES;
+ }
+
+ static final short ISO8859_1CtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
+ };
+
+ static final byte ISO8859_1ToLowerCaseTable[] = new byte[]{
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\241', (byte)'\242', (byte)'\243', (byte)'\244', (byte)'\245', (byte)'\246', (byte)'\247',
+ (byte)'\250', (byte)'\251', (byte)'\252', (byte)'\253', (byte)'\254', (byte)'\255', (byte)'\256', (byte)'\257',
+ (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\275', (byte)'\276', (byte)'\277',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\327',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\337',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377'
+ };
+
+ static final byte ISO8859_1ToUpperCaseTable[] = new byte[]{
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\101', (byte)'\102', (byte)'\103', (byte)'\104', (byte)'\105', (byte)'\106', (byte)'\107',
+ (byte)'\110', (byte)'\111', (byte)'\112', (byte)'\113', (byte)'\114', (byte)'\115', (byte)'\116', (byte)'\117',
+ (byte)'\120', (byte)'\121', (byte)'\122', (byte)'\123', (byte)'\124', (byte)'\125', (byte)'\126', (byte)'\127',
+ (byte)'\130', (byte)'\131', (byte)'\132', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\101', (byte)'\102', (byte)'\103', (byte)'\104', (byte)'\105', (byte)'\106', (byte)'\107',
+ (byte)'\110', (byte)'\111', (byte)'\112', (byte)'\113', (byte)'\114', (byte)'\115', (byte)'\116', (byte)'\117',
+ (byte)'\120', (byte)'\121', (byte)'\122', (byte)'\123', (byte)'\124', (byte)'\125', (byte)'\126', (byte)'\127',
+ (byte)'\130', (byte)'\131', (byte)'\132', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\241', (byte)'\242', (byte)'\243', (byte)'\244', (byte)'\245', (byte)'\246', (byte)'\247',
+ (byte)'\250', (byte)'\251', (byte)'\252', (byte)'\253', (byte)'\254', (byte)'\255', (byte)'\256', (byte)'\257',
+ (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\275', (byte)'\276', (byte)'\277',
+ (byte)'\300', (byte)'\301', (byte)'\302', (byte)'\303', (byte)'\304', (byte)'\305', (byte)'\306', (byte)'\307',
+ (byte)'\310', (byte)'\311', (byte)'\312', (byte)'\313', (byte)'\314', (byte)'\315', (byte)'\316', (byte)'\317',
+ (byte)'\320', (byte)'\321', (byte)'\322', (byte)'\323', (byte)'\324', (byte)'\325', (byte)'\326', (byte)'\327',
+ (byte)'\330', (byte)'\331', (byte)'\332', (byte)'\333', (byte)'\334', (byte)'\335', (byte)'\336', (byte)'\337',
+ (byte)'\300', (byte)'\301', (byte)'\302', (byte)'\303', (byte)'\304', (byte)'\305', (byte)'\306', (byte)'\307',
+ (byte)'\310', (byte)'\311', (byte)'\312', (byte)'\313', (byte)'\314', (byte)'\315', (byte)'\316', (byte)'\317',
+ (byte)'\320', (byte)'\321', (byte)'\322', (byte)'\323', (byte)'\324', (byte)'\325', (byte)'\326', (byte)'\367',
+ (byte)'\330', (byte)'\331', (byte)'\332', (byte)'\333', (byte)'\334', (byte)'\335', (byte)'\336', (byte)'\377',
+ };
+
+ static final int ISO8859_1CaseFoldMap[][] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+ };
+
+ public static final ISO8859_1Encoding INSTANCE = new ISO8859_1Encoding();
+}
diff --git a/src/org/joni/encoding/specific/ISO8859_2Encoding.java b/src/org/joni/encoding/specific/ISO8859_2Encoding.java
new file mode 100644
index 0000000..f08061b
--- /dev/null
+++ b/src/org/joni/encoding/specific/ISO8859_2Encoding.java
@@ -0,0 +1,151 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.encoding.ISOEncoding;
+
+public final class ISO8859_2Encoding extends ISOEncoding {
+
+ protected ISO8859_2Encoding() {
+ super(ISO8859_2CtypeTable, ISO8859_2ToLowerCaseTable, ISO8859_2CaseFoldMap);
+ }
+
+ @Override
+ public String toString() {
+ return "ISO-8859-2";
+ }
+
+ static final short ISO8859_2CtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x34a2, 0x00a0, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0,
+ 0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2,
+ 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0,
+ 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0
+ };
+
+ static final byte ISO8859_2ToLowerCaseTable[] = new byte[]{
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\261', (byte)'\242', (byte)'\263', (byte)'\244', (byte)'\265', (byte)'\266', (byte)'\247',
+ (byte)'\250', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\255', (byte)'\276', (byte)'\277',
+ (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\275', (byte)'\276', (byte)'\277',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\327',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\337',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377'
+ };
+
+ static final int ISO8859_2CaseFoldMap[][] = {
+ { 0xa1, 0xb1 },
+ { 0xa3, 0xb3 },
+ { 0xa5, 0xb5 },
+ { 0xa6, 0xb6 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xae, 0xbe },
+ { 0xaf, 0xbf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+ };
+
+ public static final ISO8859_2Encoding INSTANCE = new ISO8859_2Encoding();
+}
diff --git a/src/org/joni/encoding/specific/ISO8859_3Encoding.java b/src/org/joni/encoding/specific/ISO8859_3Encoding.java
new file mode 100644
index 0000000..34927d9
--- /dev/null
+++ b/src/org/joni/encoding/specific/ISO8859_3Encoding.java
@@ -0,0 +1,147 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.encoding.ISOEncoding;
+
+public final class ISO8859_3Encoding extends ISOEncoding {
+
+ protected ISO8859_3Encoding() {
+ super(ISO8859_3CtypeTable, ISO8859_3ToLowerCaseTable, ISO8859_3CaseFoldMap);
+ }
+
+ @Override
+ public String toString() {
+ return "ISO-8859-3";
+ }
+
+ static final short ISO8859_3CtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x34a2, 0x00a0,
+ 0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x0000, 0x34a2,
+ 0x00a0, 0x30e2, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x30e2, 0x01a0,
+ 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x11a0, 0x0000, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0
+ };
+
+ static final byte ISO8859_3ToLowerCaseTable[] = new byte[]{
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\261', (byte)'\242', (byte)'\243', (byte)'\244', (byte)'\245', (byte)'\266', (byte)'\247',
+ (byte)'\250', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\255', (byte)'\256', (byte)'\277',
+ (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\275', (byte)'\276', (byte)'\277',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\303', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\320', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\327',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\337',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377'
+ };
+
+ static final int ISO8859_3CaseFoldMap[][] = {
+ { 0xa1, 0xb1 },
+ { 0xa6, 0xb6 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xaf, 0xbf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+
+ };
+
+ public static final ISO8859_3Encoding INSTANCE = new ISO8859_3Encoding();
+}
diff --git a/src/org/joni/encoding/specific/ISO8859_4Encoding.java b/src/org/joni/encoding/specific/ISO8859_4Encoding.java
new file mode 100644
index 0000000..3cd7a08
--- /dev/null
+++ b/src/org/joni/encoding/specific/ISO8859_4Encoding.java
@@ -0,0 +1,150 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.encoding.ISOEncoding;
+
+public final class ISO8859_4Encoding extends ISOEncoding {
+
+ protected ISO8859_4Encoding() {
+ super(ISO8859_4CtypeTable, ISO8859_4ToLowerCaseTable, ISO8859_4CaseFoldMap);
+ }
+
+ @Override
+ public String toString() {
+ return "ISO-8859-4";
+ }
+
+ static final short ISO8859_4CtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0,
+ 0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x00a0,
+ 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0,
+ 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0
+ };
+
+ static final byte ISO8859_4ToLowerCaseTable[] = new byte[]{
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\261', (byte)'\242', (byte)'\263', (byte)'\244', (byte)'\265', (byte)'\266', (byte)'\247',
+ (byte)'\250', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\255', (byte)'\276', (byte)'\257',
+ (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\277', (byte)'\276', (byte)'\277',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\327',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\337',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377'
+ };
+
+ static final int ISO8859_4CaseFoldMap[][] = {
+ { 0xa1, 0xb1 },
+ { 0xa3, 0xb3 },
+ { 0xa5, 0xb5 },
+ { 0xa6, 0xb6 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xae, 0xbe },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+ };
+
+ public static final ISO8859_4Encoding INSTANCE = new ISO8859_4Encoding();
+}
diff --git a/src/org/joni/encoding/specific/ISO8859_5Encoding.java b/src/org/joni/encoding/specific/ISO8859_5Encoding.java
new file mode 100644
index 0000000..b890b61
--- /dev/null
+++ b/src/org/joni/encoding/specific/ISO8859_5Encoding.java
@@ -0,0 +1,171 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.IntHolder;
+import org.joni.encoding.ISOEncoding;
+
+public final class ISO8859_5Encoding extends ISOEncoding {
+
+ protected ISO8859_5Encoding() {
+ super(ISO8859_5CtypeTable, ISO8859_5ToLowerCaseTable, ISO8859_5CaseFoldMap, false);
+ }
+
+ @Override
+ public String toString() {
+ return "ISO-8859-5";
+ }
+
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+ int p = pp.value;
+ lower[0] = LowerCaseTable[bytes[p] & 0xff];
+ pp.value++;
+ return 1;
+ }
+
+ @Override
+ public final byte[]toLowerCaseTable() {
+ return LowerCaseTable;
+ }
+
+ static final short ISO8859_5CtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2
+ };
+
+ static final byte ISO8859_5ToLowerCaseTable[] = new byte[]{
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\255', (byte)'\376', (byte)'\377',
+ (byte)'\320', (byte)'\321', (byte)'\322', (byte)'\323', (byte)'\324', (byte)'\325', (byte)'\326', (byte)'\327',
+ (byte)'\330', (byte)'\331', (byte)'\332', (byte)'\333', (byte)'\334', (byte)'\335', (byte)'\336', (byte)'\337',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\320', (byte)'\321', (byte)'\322', (byte)'\323', (byte)'\324', (byte)'\325', (byte)'\326', (byte)'\327',
+ (byte)'\330', (byte)'\331', (byte)'\332', (byte)'\333', (byte)'\334', (byte)'\335', (byte)'\336', (byte)'\337',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377'
+ };
+
+ static final int ISO8859_5CaseFoldMap[][] = {
+ { 0xa1, 0xf1 },
+ { 0xa2, 0xf2 },
+ { 0xa3, 0xf3 },
+ { 0xa4, 0xf4 },
+ { 0xa5, 0xf5 },
+ { 0xa6, 0xf6 },
+ { 0xa7, 0xf7 },
+ { 0xa8, 0xf8 },
+ { 0xa9, 0xf9 },
+ { 0xaa, 0xfa },
+ { 0xab, 0xfb },
+ { 0xac, 0xfc },
+ { 0xae, 0xfe },
+ { 0xaf, 0xff },
+
+ { 0xb0, 0xd0 },
+ { 0xb1, 0xd1 },
+ { 0xb2, 0xd2 },
+ { 0xb3, 0xd3 },
+ { 0xb4, 0xd4 },
+ { 0xb5, 0xd5 },
+ { 0xb6, 0xd6 },
+ { 0xb7, 0xd7 },
+ { 0xb8, 0xd8 },
+ { 0xb9, 0xd9 },
+ { 0xba, 0xda },
+ { 0xbb, 0xdb },
+ { 0xbc, 0xdc },
+ { 0xbd, 0xdd },
+ { 0xbe, 0xdf },
+ { 0xbf, 0xdf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef }
+ };
+
+ public static final ISO8859_5Encoding INSTANCE = new ISO8859_5Encoding();
+}
diff --git a/src/org/joni/encoding/specific/ISO8859_6Encoding.java b/src/org/joni/encoding/specific/ISO8859_6Encoding.java
new file mode 100644
index 0000000..5725b8f
--- /dev/null
+++ b/src/org/joni/encoding/specific/ISO8859_6Encoding.java
@@ -0,0 +1,94 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.ApplyAllCaseFoldFunction;
+import org.joni.CaseFoldCodeItem;
+import org.joni.IntHolder;
+import org.joni.encoding.ISOEncoding;
+
+public final class ISO8859_6Encoding extends ISOEncoding {
+
+ protected ISO8859_6Encoding() {
+ super(ISO8859_6CtypeTable, ASCIIEncoding.AsciiToLowerCaseTable, null);
+ }
+
+ @Override
+ public String toString() {
+ return "ISO-8859-6";
+ }
+
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+ return asciiMbcCaseFold(flag, bytes, pp, end, lower);
+ }
+
+ @Override
+ public final byte[]toLowerCaseTable() {
+ return LowerCaseTable;
+ }
+
+ @Override
+ public void applyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, Object arg) {
+ asciiApplyAllCaseFold(flag, fun, arg);
+ }
+
+ @Override
+ public CaseFoldCodeItem[]caseFoldCodesByString(int flag, byte[]bytes, int p, int end) {
+ return asciiCaseFoldCodesByString(flag, bytes, p, end);
+ }
+
+ static final short ISO8859_6CtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x0000, 0x0000, 0x0000, 0x00a0, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x01a0, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x01a0, 0x0000, 0x0000, 0x0000, 0x01a0,
+ 0x0000, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+ };
+
+ public static final ISO8859_6Encoding INSTANCE = new ISO8859_6Encoding();
+}
diff --git a/src/org/joni/encoding/specific/ISO8859_7Encoding.java b/src/org/joni/encoding/specific/ISO8859_7Encoding.java
new file mode 100644
index 0000000..fefd90e
--- /dev/null
+++ b/src/org/joni/encoding/specific/ISO8859_7Encoding.java
@@ -0,0 +1,159 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.IntHolder;
+import org.joni.encoding.ISOEncoding;
+
+public final class ISO8859_7Encoding extends ISOEncoding {
+
+ protected ISO8859_7Encoding() {
+ super(ISO8859_7CtypeTable, ISO8859_7ToLowerCaseTable, ISO8859_7CaseFoldMap, false);
+ }
+
+ @Override
+ public String toString() {
+ return "ISO-8859-7";
+ }
+
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+ int p = pp.value;
+ lower[0] = LowerCaseTable[bytes[p] & 0xff];
+ pp.value++;
+ return 1;
+ }
+
+ @Override
+ public final byte[]toLowerCaseTable() {
+ return LowerCaseTable;
+ }
+
+ static final short ISO8859_7CtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x34a2, 0x01a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x10a0, 0x34a2, 0x34a2,
+ 0x30e2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x0000
+ };
+
+ static final byte ISO8859_7ToLowerCaseTable[] = new byte[]{
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\241', (byte)'\242', (byte)'\243', (byte)'\244', (byte)'\245', (byte)'\246', (byte)'\247',
+ (byte)'\250', (byte)'\251', (byte)'\252', (byte)'\253', (byte)'\254', (byte)'\255', (byte)'\256', (byte)'\257',
+ (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\334', (byte)'\267',
+ (byte)'\335', (byte)'\336', (byte)'\337', (byte)'\273', (byte)'\374', (byte)'\275', (byte)'\375', (byte)'\376',
+ (byte)'\300', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\322', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\334', (byte)'\335', (byte)'\336', (byte)'\337',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377'
+ };
+
+ static final int ISO8859_7CaseFoldMap[][] = {
+ { 0xb6, 0xdc },
+ { 0xb8, 0xdd },
+ { 0xb9, 0xde },
+ { 0xba, 0xdf },
+ { 0xbc, 0xfc },
+ { 0xbe, 0xfd },
+ { 0xbf, 0xfe },
+
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb }
+ };
+
+ public static final ISO8859_7Encoding INSTANCE = new ISO8859_7Encoding();
+}
diff --git a/src/org/joni/encoding/specific/ISO8859_8Encoding.java b/src/org/joni/encoding/specific/ISO8859_8Encoding.java
new file mode 100644
index 0000000..ed69078
--- /dev/null
+++ b/src/org/joni/encoding/specific/ISO8859_8Encoding.java
@@ -0,0 +1,94 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.ApplyAllCaseFoldFunction;
+import org.joni.CaseFoldCodeItem;
+import org.joni.IntHolder;
+import org.joni.encoding.ISOEncoding;
+
+public final class ISO8859_8Encoding extends ISOEncoding {
+
+ protected ISO8859_8Encoding() {
+ super(ISO8859_8CtypeTable, ASCIIEncoding.AsciiToLowerCaseTable, null);
+ }
+
+ @Override
+ public String toString() {
+ return "ISO-8859-8";
+ }
+
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+ return asciiMbcCaseFold(flag, bytes, pp, end, lower);
+ }
+
+ @Override
+ public final byte[]toLowerCaseTable() {
+ return LowerCaseTable;
+ }
+
+ @Override
+ public void applyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, Object arg) {
+ asciiApplyAllCaseFold(flag, fun, arg);
+ }
+
+ @Override
+ public CaseFoldCodeItem[]caseFoldCodesByString(int flag, byte[]bytes, int p, int end) {
+ return asciiCaseFoldCodesByString(flag, bytes, p, end);
+ }
+
+ static final short ISO8859_8CtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x00a0, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0,
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+ 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+ };
+
+ public static final ISO8859_8Encoding INSTANCE = new ISO8859_8Encoding();
+}
diff --git a/src/org/joni/encoding/specific/ISO8859_9Encoding.java b/src/org/joni/encoding/specific/ISO8859_9Encoding.java
new file mode 100644
index 0000000..a715a4f
--- /dev/null
+++ b/src/org/joni/encoding/specific/ISO8859_9Encoding.java
@@ -0,0 +1,140 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.encoding.ISOEncoding;
+
+public final class ISO8859_9Encoding extends ISOEncoding {
+
+ protected ISO8859_9Encoding() {
+ super(ISO8859_9CtypeTable, ISO8859_9ToLowerCaseTable, ISO8859_9CaseFoldMap);
+ }
+
+ @Override
+ public String toString() {
+ return "ISO-8859-9";
+ }
+
+ static final short ISO8859_9CtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
+ };
+
+ static final byte ISO8859_9ToLowerCaseTable[] = new byte[]{
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\241', (byte)'\242', (byte)'\243', (byte)'\244', (byte)'\245', (byte)'\246', (byte)'\247',
+ (byte)'\250', (byte)'\251', (byte)'\252', (byte)'\253', (byte)'\254', (byte)'\255', (byte)'\256', (byte)'\257',
+ (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\275', (byte)'\276', (byte)'\277',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\327',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\335', (byte)'\376', (byte)'\337',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377'
+ };
+
+ static final int ISO8859_9CaseFoldMap[][] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+ };
+
+ public static final ISO8859_9Encoding INSTANCE = new ISO8859_9Encoding();
+}
diff --git a/src/org/joni/encoding/specific/KOI8Encoding.java b/src/org/joni/encoding/specific/KOI8Encoding.java
new file mode 100644
index 0000000..fbcb19a
--- /dev/null
+++ b/src/org/joni/encoding/specific/KOI8Encoding.java
@@ -0,0 +1,200 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.IntHolder;
+import org.joni.encoding.CaseFoldMapEncoding;
+
+final public class KOI8Encoding extends CaseFoldMapEncoding {
+
+ protected KOI8Encoding() {
+ super(KOI8_CtypeTable, KOI8_ToLowerCaseTable, KOI8_CaseFoldMap);
+ }
+
+ @Override
+ public String toString() {
+ return "KOI8";
+ }
+
+ private static final int ENC_CASE_FOLD_ASCII_CASE = 0;
+ private static final int ONIGENC_CASE_FOLD_NONASCII_CASE = 0;
+
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+ int p = pp.value;
+ int lowerP = 0;
+
+ if (((flag & ENC_CASE_FOLD_ASCII_CASE) !=0 && isAscii(bytes[p] & 0xff)) ||
+ ((flag & ONIGENC_CASE_FOLD_NONASCII_CASE) !=0 && !isAscii(bytes[p] & 0xff))) {
+ lower[lowerP] = LowerCaseTable[bytes[p] & 0xff];
+ } else {
+ lower[lowerP] = bytes[p];
+ }
+
+ pp.value++;
+ return 1; /* return byte length of converted char to lower */
+ }
+
+ @Override
+ public boolean isCodeCType(int code, int ctype) {
+ return code < 256 ? isCodeCTypeInternal(code, ctype) : false;
+ }
+
+ static final short KOI8_CtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2
+ };
+
+ static final byte KOI8_ToLowerCaseTable[] = new byte[]{
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\241', (byte)'\242', (byte)'\243', (byte)'\244', (byte)'\245', (byte)'\246', (byte)'\247',
+ (byte)'\250', (byte)'\251', (byte)'\252', (byte)'\253', (byte)'\254', (byte)'\255', (byte)'\256', (byte)'\257',
+ (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\275', (byte)'\276', (byte)'\277',
+ (byte)'\300', (byte)'\301', (byte)'\302', (byte)'\303', (byte)'\304', (byte)'\305', (byte)'\306', (byte)'\307',
+ (byte)'\310', (byte)'\311', (byte)'\312', (byte)'\313', (byte)'\314', (byte)'\315', (byte)'\316', (byte)'\317',
+ (byte)'\320', (byte)'\321', (byte)'\322', (byte)'\323', (byte)'\324', (byte)'\325', (byte)'\326', (byte)'\327',
+ (byte)'\330', (byte)'\331', (byte)'\332', (byte)'\333', (byte)'\334', (byte)'\335', (byte)'\336', (byte)'\337',
+ (byte)'\300', (byte)'\301', (byte)'\302', (byte)'\303', (byte)'\304', (byte)'\305', (byte)'\306', (byte)'\307',
+ (byte)'\310', (byte)'\311', (byte)'\312', (byte)'\313', (byte)'\314', (byte)'\315', (byte)'\316', (byte)'\317',
+ (byte)'\320', (byte)'\321', (byte)'\322', (byte)'\323', (byte)'\324', (byte)'\325', (byte)'\326', (byte)'\327',
+ (byte)'\330', (byte)'\331', (byte)'\332', (byte)'\333', (byte)'\334', (byte)'\335', (byte)'\336', (byte)'\337'
+ };
+
+ static final int KOI8_CaseFoldMap[][] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+ { 0xdf, 0xff },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfe, 0xde },
+ { 0xff, 0xdf }
+ };
+
+ public static final KOI8Encoding INSTANCE = new KOI8Encoding();
+}
diff --git a/src/org/joni/encoding/specific/SJISEncoding.java b/src/org/joni/encoding/specific/SJISEncoding.java
new file mode 100644
index 0000000..bd1adb9
--- /dev/null
+++ b/src/org/joni/encoding/specific/SJISEncoding.java
@@ -0,0 +1,225 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.CodeRangeBuffer;
+import org.joni.IntHolder;
+import org.joni.constants.CharacterType;
+import org.joni.encoding.MultiByteEncoding;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.InternalException;
+import org.joni.util.BytesHash;
+
+public final class SJISEncoding extends MultiByteEncoding {
+
+ protected SJISEncoding() {
+ super(SjisEncLen, ASCIIEncoding.AsciiCtypeTable);
+ }
+
+ @Override
+ public String toString() {
+ return "Shift_JIS";
+ }
+
+ @Override
+ public int maxLength() {
+ return 2;
+ }
+
+ @Override
+ public int minLength() {
+ return 1;
+ }
+
+ @Override
+ public boolean isFixedWidth() {
+ return false;
+ }
+
+ @Override
+ public int mbcToCode(byte[]bytes, int p, int end) {
+ return mbnMbcToCode(bytes, p, end);
+ }
+
+ @Override
+ public int codeToMbcLength(int code) {
+ if (code < 256) {
+ return SjisEncLen[code] == 1 ? 1 : 0;
+ } else if (code <= 0xffff) {
+ return 2;
+ } else {
+ throw new InternalException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+ }
+ }
+
+ @Override
+ public int codeToMbc(int code, byte[]bytes, int p) {
+ int p_ = p;
+ if ((code & 0xff00) != 0) bytes[p_++] = (byte)(((code >> 8) & 0xff));
+ bytes[p_++] = (byte)(code & 0xff);
+ return p_ - p;
+ }
+
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+ return mbnMbcCaseFold(flag, bytes, pp, end, lower);
+ }
+
+ static final boolean SJIS_CAN_BE_TRAIL_TABLE[] = {
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true, true, true, true, false, false, false
+ };
+
+ private static boolean isSjisMbFirst(int b) {
+ return SjisEncLen[b] > 1;
+ }
+
+ private static boolean isSjisMbTrail(int b) {
+ return SJIS_CAN_BE_TRAIL_TABLE[b];
+ }
+
+ @Override
+ public int leftAdjustCharHead(byte[]bytes, int p, int end) {
+ if (end <= p) return end;
+
+ int p_ = end;
+
+ if (isSjisMbTrail(bytes[p_] & 0xff)) {
+ while (p_ > p) {
+ if (!isSjisMbFirst(bytes[--p_] & 0xff)) {
+ p_++;
+ break;
+ }
+ }
+ }
+ int len = length(bytes[p_]);
+ if (p_ + len > end) return p_;
+ p_ += len;
+ return p_ + ((end - p_) & ~1);
+ }
+
+ @Override
+ public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
+ int c = bytes[p] & 0xff;
+ return isSjisMbTrail(c);
+ }
+
+ private static final int CR_Hiragana[] = {
+ 1,
+ 0x829f, 0x82f1
+ }; /* CR_Hiragana */
+
+ private static final int CR_Katakana[] = {
+ 4,
+ 0x00a6, 0x00af,
+ 0x00b1, 0x00dd,
+ 0x8340, 0x837e,
+ 0x8380, 0x8396,
+ }; /* CR_Katakana */
+
+ private static final int PropertyList[][] = new int[][] {
+ CR_Hiragana,
+ CR_Katakana
+ };
+
+ private static final BytesHash<Integer> CTypeNameHash = new BytesHash<Integer>();
+
+ static {
+ CTypeNameHash.put("Hiragana".getBytes(), 1 + CharacterType.MAX_STD_CTYPE);
+ CTypeNameHash.put("Katakana".getBytes(), 2 + CharacterType.MAX_STD_CTYPE);
+ }
+
+ @Override
+ public int propertyNameToCType(byte[]bytes, int p, int end) {
+ Integer ctype;
+ if ((ctype = CTypeNameHash.get(bytes, p, end)) == null) {
+ return super.propertyNameToCType(bytes, p, end);
+ }
+ return ctype;
+ }
+
+
+ @Override
+ public boolean isCodeCType(int code, int ctype) {
+ if (ctype <= CharacterType.MAX_STD_CTYPE) {
+ if (code < 128) {
+ // ctype table is configured with ASCII
+ return isCodeCTypeInternal(code, ctype);
+ } else {
+ if (isWordGraphPrint(ctype)) {
+ return codeToMbcLength(code) > 1;
+ }
+ }
+ } else {
+ ctype -= (CharacterType.MAX_STD_CTYPE + 1);
+ if (ctype >= PropertyList.length) throw new InternalException(ErrorMessages.ERR_TYPE_BUG);
+ return CodeRangeBuffer.isInCodeRange(PropertyList[ctype], code);
+ }
+ return false;
+ }
+
+ @Override
+ public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
+ if (ctype <= CharacterType.MAX_STD_CTYPE) {
+ return null;
+ } else {
+ sbOut.value = 0x80;
+
+ ctype -= (CharacterType.MAX_STD_CTYPE + 1);
+ if (ctype >= PropertyList.length) throw new InternalException(ErrorMessages.ERR_TYPE_BUG);
+ return PropertyList[ctype];
+ }
+ }
+
+ static final int SjisEncLen[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
+ };
+
+ public static final SJISEncoding INSTANCE = new SJISEncoding();
+}
diff --git a/src/org/joni/encoding/specific/UTF16BEEncoding.java b/src/org/joni/encoding/specific/UTF16BEEncoding.java
new file mode 100644
index 0000000..951e5a8
--- /dev/null
+++ b/src/org/joni/encoding/specific/UTF16BEEncoding.java
@@ -0,0 +1,185 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.Config;
+import org.joni.IntHolder;
+import org.joni.encoding.unicode.UnicodeEncoding;
+
+public final class UTF16BEEncoding extends UnicodeEncoding {
+
+ protected UTF16BEEncoding() {
+ super(UTF16EncLen);
+ }
+
+ @Override
+ public String toString() {
+ return "UTF-16BE";
+ }
+
+ @Override
+ public int maxLength() {
+ return 4;
+ }
+
+ @Override
+ public int minLength() {
+ return 2;
+ }
+
+ @Override
+ public boolean isFixedWidth() {
+ return false;
+ }
+
+ @Override
+ public boolean isNewLine(byte[]bytes, int p, int end) {
+ if (p + 1 < end) {
+ if (bytes[p + 1] == (byte)0x0a && bytes[p] == (byte)0x00) return true;
+
+ if (Config.USE_UNICODE_ALL_LINE_TERMINATORS) {
+ if ((!Config.USE_CRNL_AS_LINE_TERMINATOR && bytes[p+1] == (byte)0x0d) ||
+ bytes[p+1] == (byte)0x85 && bytes[p] == (byte)0x00) return true;
+
+ if (bytes[p] == (byte)0x20 && (bytes[p+1] == (byte)0x29 || bytes[p+1] == (byte)0x28)) return true;
+ }
+ }
+ return false;
+ }
+
+ private static boolean isSurrogateFirst(int c) {
+ return c >= 0xd8 && c <= 0xdb;
+ }
+
+ @Override
+ public int mbcToCode(byte[]bytes, int p, int end) {
+ int code;
+ if (isSurrogateFirst(bytes[p] & 0xff)) {
+ code = ((((bytes[p + 0] & 0xff - 0xd8) << 2) +
+ ((bytes[p + 1] & 0xff & 0xc0) >> 6) + 1) << 16) +
+ ((((bytes[p + 1] & 0xff & 0x3f) << 2) +
+ (bytes[p + 2] & 0xff - 0xdc)) << 8) +
+ bytes[p + 3] & 0xff;
+ } else {
+ code = (bytes[p + 0] & 0xff) * 256 + (bytes[p + 1] & 0xff);
+ }
+ return code;
+ }
+
+ @Override
+ public int codeToMbcLength(int code) {
+ return code > 0xffff ? 4 : 2;
+ }
+
+ @Override
+ public int codeToMbc(int code, byte[]bytes, int p) {
+ int p_ = p;
+ if (code > 0xffff) {
+ int plane = code >>> 16;
+ bytes[p_++] = (byte)((plane >>> 2) + 0xd8);
+ int high = (code & 0xff00) >>> 8;
+ bytes[p_++] = (byte)(((plane & 0x03) << 6) + (high >>> 2));
+ bytes[p_++] = (byte)((high & 0x02) + 0xdc);
+ bytes[p_] = (byte)(code & 0xff);
+ return 4;
+ } else {
+ bytes[p_++] = (byte)((code & 0xff00) >>> 8);
+ bytes[p_++] = (byte)(code & 0xff);
+ return 2;
+ }
+ }
+
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]fold) {
+ int p = pp.value;
+ int foldP = 0;
+
+ if (isAscii(bytes[p+1] & 0xff) && bytes[p] == 0) {
+ p++;
+
+ if (Config.USE_UNICODE_CASE_FOLD_TURKISH_AZERI) {
+ if ((flag & Config.ENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ if (bytes[p] == (byte)0x49) {
+ fold[foldP++] = (byte)0x01;
+ fold[foldP] = (byte)0x31;
+ pp.value += 2;
+ return 2;
+ }
+ }
+ } // USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+
+ fold[foldP++] = 0;
+ fold[foldP] = ASCIIEncoding.AsciiToLowerCaseTable[bytes[p] & 0xff];
+ pp.value += 2;
+ return 2;
+ } else {
+ return super.mbcCaseFold(flag, bytes, pp, end, fold);
+ }
+ }
+
+ /** onigenc_utf16_32_get_ctype_code_range
+ */
+ @Override
+ public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
+ sbOut.value = 0x00;
+ return super.ctypeCodeRange(ctype);
+ }
+
+ private static boolean isSurrogateSecond(int c) {
+ return c >= 0xdc && c <= 0xdf;
+ }
+
+ @Override
+ public int leftAdjustCharHead(byte[]bytes, int p, int end) {
+ if (end <= p) return end;
+
+ if ((end - p) % 2 == 1) end--;
+
+ if (isSurrogateSecond(bytes[end] & 0xff) && end > p + 1) end -= 2;
+
+ return end;
+ }
+
+ @Override
+ public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
+ return false;
+ }
+
+ static final int UTF16EncLen[] = {
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+ };
+
+ public static final UTF16BEEncoding INSTANCE = new UTF16BEEncoding();
+}
diff --git a/src/org/joni/encoding/specific/UTF16LEEncoding.java b/src/org/joni/encoding/specific/UTF16LEEncoding.java
new file mode 100644
index 0000000..d369802
--- /dev/null
+++ b/src/org/joni/encoding/specific/UTF16LEEncoding.java
@@ -0,0 +1,170 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.Config;
+import org.joni.IntHolder;
+import org.joni.encoding.unicode.UnicodeEncoding;
+
+public final class UTF16LEEncoding extends UnicodeEncoding {
+
+ protected UTF16LEEncoding() {
+ super(UTF16BEEncoding.UTF16EncLen);
+ }
+
+ @Override
+ public int length(byte c) {
+ return EncLen[(c & 0xff) + 1];
+ }
+
+ @Override
+ public String toString() {
+ return "UTF-16LE";
+ }
+
+ @Override
+ public int maxLength() {
+ return 4;
+ }
+
+ @Override
+ public int minLength() {
+ return 2;
+ }
+
+ @Override
+ public boolean isFixedWidth() {
+ return false;
+ }
+
+ @Override
+ public boolean isNewLine(byte[]bytes, int p, int end) {
+ if (p + 1 < end) {
+ if (bytes[p] == (byte)0x0a && bytes[p + 1] == (byte)0x00) return true;
+
+ if (Config.USE_UNICODE_ALL_LINE_TERMINATORS) {
+ if ((!Config.USE_CRNL_AS_LINE_TERMINATOR && bytes[p] == (byte)0x0d) ||
+ bytes[p] == (byte)0x85 && bytes[p + 1] == (byte)0x00) return true;
+
+ if (bytes[p + 1] == (byte)0x20 && (bytes[p] == (byte)0x29 || bytes[p] == (byte)0x28)) return true;
+ }
+ }
+ return false;
+ }
+
+ private static boolean isSurrogateFirst(int c) {
+ return c >= 0xd8 && c <= 0xdb;
+ }
+
+ @Override
+ public int mbcToCode(byte[]bytes, int p, int end) {
+ int code;
+ if (isSurrogateFirst(bytes[p + 1] & 0xff)) {
+ code = ((((bytes[p + 1] & 0xff - 0xd8) << 2) +
+ ((bytes[p + 0] & 0xff & 0xc0) >> 6) + 1) << 16) +
+ ((((bytes[p + 0] & 0xff & 0x3f) << 2) +
+ (bytes[p + 2] & 0xff - 0xdc)) << 8) +
+ bytes[p + 3] & 0xff;
+ } else {
+ code = (bytes[p + 1] & 0xff) * 256 + (bytes[p + 0] & 0xff);
+ }
+ return code;
+ }
+
+ @Override
+ public int codeToMbcLength(int code) {
+ return code > 0xffff ? 4 : 2;
+ }
+
+ @Override
+ public int codeToMbc(int code, byte[]bytes, int p) {
+ int p_ = p;
+ if (code > 0xffff) {
+ int plane = code >>> 16;
+ int high = (code & 0xff00) >>> 8;
+ bytes[p_++] = (byte)(((plane & 0x03) << 6) + (high >>> 2));
+ bytes[p_++] = (byte)((plane >>> 2) + 0xd8);
+ bytes[p_++] = (byte)(code & 0xff);
+ bytes[p_ ] = (byte)((high & 0x02) + 0xdc);
+ return 4;
+ } else {
+ bytes[p_++] = (byte)(code & 0xff);
+ bytes[p_++] = (byte)((code & 0xff00) >>> 8);
+ return 2;
+ }
+ }
+
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]fold) {
+ int p = pp.value;
+ int foldP = 0;
+
+ if (isAscii(bytes[p] & 0xff) && bytes[p + 1] == 0) {
+
+ if (Config.USE_UNICODE_CASE_FOLD_TURKISH_AZERI) {
+ if ((flag & Config.ENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ if (bytes[p] == (byte)0x49) {
+ fold[foldP++] = (byte)0x01;
+ fold[foldP] = (byte)0x31;
+ pp.value += 2;
+ return 2;
+ }
+ }
+ } // USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+
+ fold[foldP++] = ASCIIEncoding.AsciiToLowerCaseTable[bytes[p] & 0xff];
+ fold[foldP] = 0;
+ pp.value += 2;
+ return 2;
+ } else {
+ return super.mbcCaseFold(flag, bytes, pp, end, fold);
+ }
+ }
+
+ /** onigenc_utf16_32_get_ctype_code_range
+ */
+ @Override
+ public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
+ sbOut.value = 0x00;
+ return super.ctypeCodeRange(ctype);
+ }
+
+ private static boolean isSurrogateSecond(int c) {
+ return c >= 0xdc && c <= 0xdf;
+ }
+
+ @Override
+ public int leftAdjustCharHead(byte[]bytes, int p, int end) {
+ if (end <= p) return end;
+
+ if ((end - p) % 2 == 1) end--;
+
+ if (isSurrogateSecond(bytes[end + 1] & 0xff) && end > p + 1) end -= 2;
+
+ return end;
+ }
+
+ @Override
+ public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
+ return false;
+ }
+
+ public static final UTF16LEEncoding INSTANCE = new UTF16LEEncoding();
+}
diff --git a/src/org/joni/encoding/specific/UTF32BEEncoding.java b/src/org/joni/encoding/specific/UTF32BEEncoding.java
new file mode 100644
index 0000000..2907fca
--- /dev/null
+++ b/src/org/joni/encoding/specific/UTF32BEEncoding.java
@@ -0,0 +1,148 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.Config;
+import org.joni.IntHolder;
+import org.joni.encoding.unicode.UnicodeEncoding;
+
+public final class UTF32BEEncoding extends UnicodeEncoding {
+
+ protected UTF32BEEncoding() {
+ super(null);
+ }
+
+ @Override
+ public int length(byte c) {
+ return 4;
+ }
+
+ @Override
+ public int strLength(byte[]bytes, int p, int end) {
+ return (end - p) >>> 2;
+ }
+
+ @Override
+ public String toString() {
+ return "UTF-32BE";
+ }
+
+ @Override
+ public int maxLength() {
+ return 4;
+ }
+
+ @Override
+ public int minLength() {
+ return 4;
+ }
+
+ @Override
+ public boolean isFixedWidth() {
+ return true;
+ }
+
+ @Override
+ public boolean isNewLine(byte[]bytes, int p, int end) {
+ if (p + 3 < end) {
+ if (bytes[p + 3] == (byte)0x0a && bytes[p + 2] == 0 && bytes[p + 1] == 0 && bytes[p] == 0) return true;
+
+ if (Config.USE_UNICODE_ALL_LINE_TERMINATORS) {
+ if ((Config.USE_CRNL_AS_LINE_TERMINATOR && bytes[p + 3] == (byte)0x0d) ||
+ bytes[p + 3] == (byte)0x85 && bytes[p + 2] == 0 && bytes[p + 1] == 0 && bytes[p] == 0) return true;
+
+ if (bytes[p + 2] == (byte)0x20 &&
+ (bytes[p + 3] == (byte)0x29 || bytes[p + 3] == (byte)0x28) &&
+ bytes[p + 1] == 0 && bytes[p] == 0) return true;
+ } // USE_UNICODE_ALL_LINE_TERMINATORS
+ }
+ return false;
+ }
+
+ @Override
+ public int mbcToCode(byte[]bytes, int p, int end) {
+ return (((bytes[p] & 0xff) * 256 + (bytes[p + 1] & 0xff)) * 256 + (bytes[p + 2] & 0xff)) * 256 + (bytes[p + 3] & 0xff);
+ }
+
+ @Override
+ public int codeToMbcLength(int code) {
+ return 4;
+ }
+
+ @Override
+ public int codeToMbc(int code, byte[]bytes, int p) {
+ int p_ = p;
+ bytes[p_++] = (byte)((code & 0xff000000) >>> 24);
+ bytes[p_++] = (byte)((code & 0xff0000) >>> 16);
+ bytes[p_++] = (byte)((code & 0xff00) >>> 8);
+ bytes[p_++] = (byte) (code & 0xff);
+ return 4;
+ }
+
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]fold) {
+ int p = pp.value;
+ int foldP = 0;
+ if (isAscii(bytes[p + 3] & 0xff) && bytes[p + 2] == 0 && bytes[p + 1] == 0 && bytes[p] == 0) {
+ fold[foldP++] = 0;
+ fold[foldP++] = 0;
+
+ if (Config.USE_UNICODE_CASE_FOLD_TURKISH_AZERI) {
+ if ((flag & Config.ENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ if (bytes[p + 3] == (byte)0x49) {
+ fold[foldP++] = (byte)0x01;
+ fold[foldP] = (byte)0x31;
+ pp.value += 4;
+ return 4;
+ }
+ }
+ } // USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+
+ fold[foldP++] = 0;
+ fold[foldP] = ASCIIEncoding.AsciiToLowerCaseTable[bytes[p + 3] & 0xff];
+ pp.value += 4;
+ return 4;
+ } else {
+ return super.mbcCaseFold(flag, bytes, pp, end, fold);
+ }
+ }
+
+ /** onigenc_utf16_32_get_ctype_code_range
+ */
+ @Override
+ public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
+ sbOut.value = 0x00;
+ return super.ctypeCodeRange(ctype);
+ }
+
+ @Override
+ public int leftAdjustCharHead(byte[]bytes, int p, int end) {
+ if (end <= p) return end;
+
+ return end - ((end - p) % 4);
+ }
+
+ @Override
+ public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
+ return false;
+ }
+
+ public static UTF32BEEncoding INSTANCE = new UTF32BEEncoding();
+}
diff --git a/src/org/joni/encoding/specific/UTF32LEEncoding.java b/src/org/joni/encoding/specific/UTF32LEEncoding.java
new file mode 100644
index 0000000..66f4ee7
--- /dev/null
+++ b/src/org/joni/encoding/specific/UTF32LEEncoding.java
@@ -0,0 +1,146 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.Config;
+import org.joni.IntHolder;
+import org.joni.encoding.unicode.UnicodeEncoding;
+
+public final class UTF32LEEncoding extends UnicodeEncoding {
+
+ protected UTF32LEEncoding() {
+ super(null);
+ }
+
+ @Override
+ public int length(byte c) {
+ return 4;
+ }
+
+ @Override
+ public int strLength(byte[]bytes, int p, int end) {
+ return (end - p) >>> 2;
+ }
+
+ @Override
+ public String toString() {
+ return "UTF-32LE";
+ }
+
+ @Override
+ public int maxLength() {
+ return 4;
+ }
+
+ @Override
+ public int minLength() {
+ return 4;
+ }
+
+ @Override
+ public boolean isFixedWidth() {
+ return true;
+ }
+
+ @Override
+ public boolean isNewLine(byte[]bytes, int p, int end) {
+ if (p + 3 < end) {
+ if (bytes[p] == (byte)0x0a && bytes[p + 1] == 0 && bytes[p + 2] == 0 && bytes[p + 3] == 0) return true;
+
+ if (Config.USE_UNICODE_ALL_LINE_TERMINATORS) {
+ if ((Config.USE_CRNL_AS_LINE_TERMINATOR && bytes[p] == (byte)0x0d) ||
+ bytes[p] == (byte)0x85 && bytes[p + 1] == 0 && bytes[p + 2] == 0 && bytes[3] == 0) return true;
+
+ if (bytes[p + 1] == (byte)0x20 &&
+ (bytes[p] == (byte)0x29 || bytes[p] == (byte)0x28) &&
+ bytes[p + 2] == 0 && bytes[p + 3] == 0) return true;
+ } // USE_UNICODE_ALL_LINE_TERMINATORS
+ }
+ return false;
+ }
+
+ @Override
+ public int mbcToCode(byte[]bytes, int p, int end) {
+ return (((bytes[p + 3] & 0xff) * 256 + (bytes[p + 2] & 0xff)) * 256 + (bytes[p + 1] & 0xff)) * 256 + (bytes[p] & 0xff);
+ }
+
+ @Override
+ public int codeToMbcLength(int code) {
+ return 4;
+ }
+
+ @Override
+ public int codeToMbc(int code, byte[]bytes, int p) {
+ int p_ = p;
+ bytes[p_++] = (byte) (code & 0xff);
+ bytes[p_++] = (byte)((code & 0xff00) >>> 8);
+ bytes[p_++] = (byte)((code & 0xff0000) >>> 16);
+ bytes[p_++] = (byte)((code & 0xff000000) >>> 24);
+ return 4;
+ }
+
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]fold) {
+ int p = pp.value;
+ int foldP = 0;
+ if (isAscii(bytes[p] & 0xff) && bytes[p + 1] == 0 && bytes[p + 2] == 0 && bytes[p + 3] == 0) {
+
+ if (Config.USE_UNICODE_CASE_FOLD_TURKISH_AZERI && (flag & Config.ENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ if (bytes[p] == (byte)0x49) {
+ fold[foldP++] = (byte)0x31;
+ fold[foldP] = (byte)0x01;
+ }
+ } else {
+ fold[foldP++] = ASCIIEncoding.AsciiToLowerCaseTable[bytes[p] & 0xff];
+ fold[foldP++] = 0;
+ } // USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+
+ fold[foldP++] = 0;
+ fold[foldP] = 0;
+
+ pp.value += 4;
+ return 4;
+ } else {
+ return super.mbcCaseFold(flag, bytes, pp, end, fold);
+ }
+ }
+
+ /** onigenc_utf16_32_get_ctype_code_range
+ */
+ @Override
+ public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
+ sbOut.value = 0x00;
+ return super.ctypeCodeRange(ctype);
+ }
+
+ @Override
+ public int leftAdjustCharHead(byte[]bytes, int p, int end) {
+ if (end <= p) return end;
+
+ return end - ((end - p) % 4);
+ }
+
+ @Override
+ public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
+ return false;
+ }
+
+ public static UTF32LEEncoding INSTANCE = new UTF32LEEncoding();
+}
diff --git a/src/org/joni/encoding/specific/UTF8Encoding.java b/src/org/joni/encoding/specific/UTF8Encoding.java
new file mode 100644
index 0000000..ac1fd2a
--- /dev/null
+++ b/src/org/joni/encoding/specific/UTF8Encoding.java
@@ -0,0 +1,250 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.Config;
+import org.joni.IntHolder;
+import org.joni.encoding.unicode.UnicodeEncoding;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.ValueException;
+
+public final class UTF8Encoding extends UnicodeEncoding {
+ static final boolean USE_INVALID_CODE_SCHEME = true;
+
+ protected UTF8Encoding() {
+ super(UTF8EncLen);
+ }
+
+ @Override
+ public String toString() {
+ return "UTF-8";
+ }
+
+ @Override
+ public int maxLength() {
+ return 6;
+ }
+
+ @Override
+ public int minLength() {
+ return 1;
+ }
+
+ @Override
+ public boolean isFixedWidth() {
+ return false;
+ }
+
+ @Override
+ public boolean isNewLine(byte[]bytes, int p, int end) {
+ if (p < end) {
+ if (bytes[p] == (byte)0x0a) return true;
+
+ if (Config.USE_UNICODE_ALL_LINE_TERMINATORS) {
+ if (!Config.USE_CRNL_AS_LINE_TERMINATOR) {
+ if (bytes[p] == (byte)0x0d) return true;
+ }
+
+ if (p + 1 < end) { // & 0xff...
+ if (bytes[p+1] == (byte)0x85 && bytes[p] == (byte)0xc2) return true; /* U+0085 */
+ if (p + 2 < end) {
+ if ((bytes[p+2] == (byte)0xa8 || bytes[p+2] == (byte)0xa9) &&
+ bytes[p+1] == (byte)0x80 && bytes[p] == (byte)0xe2) return true; /* U+2028, U+2029 */
+ }
+ }
+ } // USE_UNICODE_ALL_LINE_TERMINATORS
+ }
+ return false;
+ }
+
+ private static final int INVALID_CODE_FE = 0xfffffffe;
+ private static final int INVALID_CODE_FF = 0xffffffff;
+ // private static final int VALID_CODE_LIMIT = 0x7fffffff;
+ @Override
+ public int codeToMbcLength(int code) {
+ if ((code & 0xffffff80) == 0) {
+ return 1;
+ } else if ((code & 0xfffff800) == 0) {
+ return 2;
+ } else if ((code & 0xffff0000) == 0) {
+ return 3;
+ } else if ((code & 0xffe00000) == 0) {
+ return 4;
+ } else if ((code & 0xfc000000) == 0) {
+ return 5;
+ } else if ((code & 0x80000000) == 0) {
+ return 6;
+ } else if (USE_INVALID_CODE_SCHEME && code == INVALID_CODE_FE) {
+ return 1;
+ } else if (USE_INVALID_CODE_SCHEME && code == INVALID_CODE_FF) {
+ return 1;
+ } else {
+ throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+ }
+ }
+
+ @Override
+ public int mbcToCode(byte[]bytes, int p, int end) {
+ int len = length(bytes[p]);
+
+ int c = bytes[p++] & 0xff;
+
+ if (len > 1) {
+ len--;
+ int n = c & ((1 << (6 - len)) - 1);
+ while (len-- != 0) {
+ c = bytes[p++] & 0xff;
+ n = (n << 6) | (c & ((1 << 6) - 1));
+ }
+ return n;
+ } else {
+ if (USE_INVALID_CODE_SCHEME) {
+ if (c > 0xfd) return c == 0xfe ? INVALID_CODE_FE : INVALID_CODE_FF;
+ }
+ return c;
+ }
+ }
+
+ static byte trailS(int code, int shift) {
+ return (byte)((((code) >>> (shift)) & 0x3f) | 0x80);
+ }
+
+ static byte trail0(int code) {
+ return (byte)(((code) & 0x3f) | 0x80);
+ }
+
+ @Override
+ public int codeToMbc(int code, byte[]bytes, int p) {
+ int p_ = p;
+ if ((code & 0xffffff80) == 0) {
+ bytes[p_] = (byte)code;
+ return 1;
+ } else {
+ if ((code & 0xfffff800) == 0) {
+ bytes[p_++] = (byte)(((code >>> 6) & 0x1f) | 0xc0);
+ } else if ((code & 0xffff0000) == 0) {
+ bytes[p_++] = (byte)(((code >>> 12) & 0x0f) | 0xe0);
+ bytes[p_++] = trailS(code, 6);
+ } else if ((code & 0xffe00000) == 0) {
+ bytes[p_++] = (byte)(((code >>> 18) & 0x07) | 0xf0);
+ bytes[p_++] = trailS(code, 12);
+ bytes[p_++] = trailS(code, 6);
+ } else if ((code & 0xfc000000) == 0) {
+ bytes[p_++] = (byte)(((code >>> 24) & 0x03) | 0xf8);
+ bytes[p_++] = trailS(code, 18);
+ bytes[p_++] = trailS(code, 12);
+ bytes[p_++] = trailS(code, 6);
+ } else if ((code & 0x80000000) == 0) {
+ bytes[p_++] = (byte)(((code >>> 30) & 0x01) | 0xfc);
+ bytes[p_++] = trailS(code, 24);
+ bytes[p_++] = trailS(code, 18);
+ bytes[p_++] = trailS(code, 12);
+ bytes[p_++] = trailS(code, 6);
+ } else if (USE_INVALID_CODE_SCHEME && code == INVALID_CODE_FE) {
+ bytes[p_] = (byte)0xfe;
+ return 1;
+ } else if (USE_INVALID_CODE_SCHEME && code == INVALID_CODE_FF) {
+ bytes[p_] = (byte)0xff;
+ return 1;
+ } else {
+ throw new ValueException(ErrorMessages.ERR_TOO_BIG_WIDE_CHAR_VALUE);
+ }
+ bytes[p_++] = trail0(code);
+ return p_ - p;
+ }
+ }
+
+ // utf8_mbc_case_fold
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]fold) {
+ int p = pp.value;
+ int foldP = 0;
+
+ if (isMbcAscii(bytes[p])) {
+
+ if (Config.USE_UNICODE_CASE_FOLD_TURKISH_AZERI) {
+ if ((flag & Config.ENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ if (bytes[p] == (byte)0x49) {
+ fold[foldP++] = (byte)0xc4l;
+ fold[foldP] = (byte)0xb1;
+ pp.value++;
+ return 2;
+ }
+ }
+ } // USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+
+ fold[foldP] = ASCIIEncoding.AsciiToLowerCaseTable[bytes[p] & 0xff];
+ pp.value++;
+ return 1; /* return byte length of converted char to lower */
+ } else {
+ return super.mbcCaseFold(flag, bytes, pp, end, fold);
+ }
+ }
+
+ /** utf8_get_ctype_code_range
+ */
+ @Override
+ public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
+ sbOut.value = 0x80;
+ return super.ctypeCodeRange(ctype); // onigenc_unicode_ctype_code_range
+ }
+
+ private static boolean utf8IsLead(int c) {
+ return ((c & 0xc0) & 0xff) != 0x80;
+ }
+
+ /** utf8_left_adjust_char_head
+ */
+ @Override
+ public int leftAdjustCharHead(byte[]bytes, int p, int end) {
+ if (end <= p) return end;
+ int p_ = end;
+ while (!utf8IsLead(bytes[p_] & 0xff) && p_ > p) p_--;
+ return p_;
+ }
+
+ /** onigenc_always_true_is_allowed_reverse_match
+ */
+ @Override
+ public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
+ return true;
+ }
+
+ static final int UTF8EncLen[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
+ };
+
+ public static final UTF8Encoding INSTANCE = new UTF8Encoding();
+}
diff --git a/src/org/joni/encoding/unicode/UnicodeCTypeNames.java b/src/org/joni/encoding/unicode/UnicodeCTypeNames.java
new file mode 100644
index 0000000..d69f044
--- /dev/null
+++ b/src/org/joni/encoding/unicode/UnicodeCTypeNames.java
@@ -0,0 +1,160 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.unicode;
+
+import org.joni.Config;
+import org.joni.util.BytesHash;
+
+public class UnicodeCTypeNames {
+
+ static void initializeCTypeNameTable() {
+ BytesHash<Integer> table = new BytesHash<Integer>();
+
+ int limit = Config.USE_UNICODE_PROPERTIES ? CTypeNameTable.length : 15;
+
+ for (int i=0; i<limit; i++)
+ table.putDirect(CTypeNameTable[i], i);
+
+ CTypeNameHash = table;
+ }
+
+ static BytesHash<Integer> CTypeNameHash;
+
+ private static final byte CTypeNameTable[][] = new byte[][] {
+ "NEWLINE".getBytes(),
+ "Alpha".getBytes(),
+ "Blank".getBytes(),
+ "Cntrl".getBytes(),
+ "Digit".getBytes(),
+ "Graph".getBytes(),
+ "Lower".getBytes(),
+ "Print".getBytes(),
+ "Punct".getBytes(),
+ "Space".getBytes(),
+ "Upper".getBytes(),
+ "XDigit".getBytes(),
+ "Word".getBytes(),
+ "Alnum".getBytes(),
+ "ASCII".getBytes(),
+
+ // unicode properties
+ "Any".getBytes(),
+ "Assigned".getBytes(),
+ "C".getBytes(),
+ "Cc".getBytes(),
+ "Cf".getBytes(),
+ "Cn".getBytes(),
+ "Co".getBytes(),
+ "Cs".getBytes(),
+ "L".getBytes(),
+ "Ll".getBytes(),
+ "Lm".getBytes(),
+ "Lo".getBytes(),
+ "Lt".getBytes(),
+ "Lu".getBytes(),
+ "M".getBytes(),
+ "Mc".getBytes(),
+ "Me".getBytes(),
+ "Mn".getBytes(),
+ "N".getBytes(),
+ "Nd".getBytes(),
+ "Nl".getBytes(),
+ "No".getBytes(),
+ "P".getBytes(),
+ "Pc".getBytes(),
+ "Pd".getBytes(),
+ "Pe".getBytes(),
+ "Pf".getBytes(),
+ "Pi".getBytes(),
+ "Po".getBytes(),
+ "Ps".getBytes(),
+ "S".getBytes(),
+ "Sc".getBytes(),
+ "Sk".getBytes(),
+ "Sm".getBytes(),
+ "So".getBytes(),
+ "Z".getBytes(),
+ "Zl".getBytes(),
+ "Zp".getBytes(),
+ "Zs".getBytes(),
+ "Arabic".getBytes(),
+ "Armenian".getBytes(),
+ "Bengali".getBytes(),
+ "Bopomofo".getBytes(),
+ "Braille".getBytes(),
+ "Buginese".getBytes(),
+ "Buhid".getBytes(),
+ "Canadian_Aboriginal".getBytes(),
+ "Cherokee".getBytes(),
+ "Common".getBytes(),
+ "Coptic".getBytes(),
+ "Cypriot".getBytes(),
+ "Cyrillic".getBytes(),
+ "Deseret".getBytes(),
+ "Devanagari".getBytes(),
+ "Ethiopic".getBytes(),
+ "Georgian".getBytes(),
+ "Glagolitic".getBytes(),
+ "Gothic".getBytes(),
+ "Greek".getBytes(),
+ "Gujarati".getBytes(),
+ "Gurmukhi".getBytes(),
+ "Han".getBytes(),
+ "Hangul".getBytes(),
+ "Hanunoo".getBytes(),
+ "Hebrew".getBytes(),
+ "Hiragana".getBytes(),
+ "Inherited".getBytes(),
+ "Kannada".getBytes(),
+ "Katakana".getBytes(),
+ "Kharoshthi".getBytes(),
+ "Khmer".getBytes(),
+ "Lao".getBytes(),
+ "Latin".getBytes(),
+ "Limbu".getBytes(),
+ "Linear_B".getBytes(),
+ "Malayalam".getBytes(),
+ "Mongolian".getBytes(),
+ "Myanmar".getBytes(),
+ "New_Tai_Lue".getBytes(),
+ "Ogham".getBytes(),
+ "Old_Italic".getBytes(),
+ "Old_Persian".getBytes(),
+ "Oriya".getBytes(),
+ "Osmanya".getBytes(),
+ "Runic".getBytes(),
+ "Shavian".getBytes(),
+ "Sinhala".getBytes(),
+ "Syloti_Nagri".getBytes(),
+ "Syriac".getBytes(),
+ "Tagalog".getBytes(),
+ "Tagbanwa".getBytes(),
+ "Tai_Le".getBytes(),
+ "Tamil".getBytes(),
+ "Telugu".getBytes(),
+ "Thaana".getBytes(),
+ "Thai".getBytes(),
+ "Tibetan".getBytes(),
+ "Tifinagh".getBytes(),
+ "Ugaritic".getBytes(),
+ "Yi".getBytes()
+ };
+
+}
diff --git a/src/org/joni/encoding/unicode/UnicodeCaseFolds.java b/src/org/joni/encoding/unicode/UnicodeCaseFolds.java
new file mode 100644
index 0000000..94b0324
--- /dev/null
+++ b/src/org/joni/encoding/unicode/UnicodeCaseFolds.java
@@ -0,0 +1,3841 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.unicode;
+
+import org.joni.util.IntArrayHash;
+import org.joni.util.IntHash;
+
+public class UnicodeCaseFolds {
+
+ static void initializeCaseFoldTables() {
+ IntHash<int[]> fold = new IntHash<int[]>(1200);
+ for (int i=0; i<CaseFold1_From.length; i++)
+ fold.putDirect(CaseFold1_From[i], CaseFold1_To[i]);
+ for (int i=0; i<CaseFold_Locale_From.length; i++)
+ fold.putDirect(CaseFold_Locale_From[i], CaseFold_Locale_To[i]);
+ FoldHash = fold;
+
+ IntHash<int[]> unfold1 = new IntHash<int[]>(1000);
+ for (int i=0; i<CaseUnfold_11_From.length; i++)
+ unfold1.putDirect(CaseUnfold_11_From[i], CaseUnfold_11_To[i]);
+ for (int i=0; i<CaseUnfold_11_Locale_From.length; i++)
+ unfold1.putDirect(CaseUnfold_11_Locale_From[i], CaseUnfold_11_Locale_To[i]);
+ Unfold1Hash = unfold1;
+
+ IntArrayHash<int[]> unfold2 = new IntArrayHash<int[]>(200);
+ for (int i=0; i<CaseUnfold_12.length; i+=2)
+ unfold2.putDirect(CaseUnfold_12[i], CaseUnfold_12[i + 1]);
+ for (int i=0; i<CaseUnfold_12_Locale.length; i+=2)
+ unfold2.putDirect(CaseUnfold_12_Locale[i], CaseUnfold_12_Locale[i + 1]);
+ Unfold2Hash = unfold2;
+
+ IntArrayHash<int[]> unfold3 = new IntArrayHash<int[]>(30);
+ for (int i=0; i<CaseUnfold_13.length; i+=2)
+ unfold3.putDirect(CaseUnfold_13[i], CaseUnfold_13[i + 1]);
+ Unfold3Hash = unfold3;
+ }
+
+ static IntHash<int[]> FoldHash;
+ static IntHash<int[]> Unfold1Hash;
+ static IntArrayHash<int[]> Unfold2Hash;
+ static IntArrayHash<int[]> Unfold3Hash;
+
+ private static final int CaseFold1_From[] = new int[] {
+ 0x0041,
+ 0x0042,
+ 0x0043,
+ 0x0044,
+ 0x0045,
+ 0x0046,
+ 0x0047,
+ 0x0048,
+ 0x004a,
+ 0x004b,
+ 0x004c,
+ 0x004d,
+ 0x004e,
+ 0x004f,
+ 0x0050,
+ 0x0051,
+ 0x0052,
+ 0x0053,
+ 0x0054,
+ 0x0055,
+ 0x0056,
+ 0x0057,
+ 0x0058,
+ 0x0059,
+ 0x005a,
+ 0x00b5,
+ 0x00c0,
+ 0x00c1,
+ 0x00c2,
+ 0x00c3,
+ 0x00c4,
+ 0x00c5,
+ 0x00c6,
+ 0x00c7,
+ 0x00c8,
+ 0x00c9,
+ 0x00ca,
+ 0x00cb,
+ 0x00cc,
+ 0x00cd,
+ 0x00ce,
+ 0x00cf,
+ 0x00d0,
+ 0x00d1,
+ 0x00d2,
+ 0x00d3,
+ 0x00d4,
+ 0x00d5,
+ 0x00d6,
+ 0x00d8,
+ 0x00d9,
+ 0x00da,
+ 0x00db,
+ 0x00dc,
+ 0x00dd,
+ 0x00de,
+ 0x00df,
+ 0x0100,
+ 0x0102,
+ 0x0104,
+ 0x0106,
+ 0x0108,
+ 0x010a,
+ 0x010c,
+ 0x010e,
+ 0x0110,
+ 0x0112,
+ 0x0114,
+ 0x0116,
+ 0x0118,
+ 0x011a,
+ 0x011c,
+ 0x011e,
+ 0x0120,
+ 0x0122,
+ 0x0124,
+ 0x0126,
+ 0x0128,
+ 0x012a,
+ 0x012c,
+ 0x012e,
+ 0x0132,
+ 0x0134,
+ 0x0136,
+ 0x0139,
+ 0x013b,
+ 0x013d,
+ 0x013f,
+ 0x0141,
+ 0x0143,
+ 0x0145,
+ 0x0147,
+ 0x0149,
+ 0x014a,
+ 0x014c,
+ 0x014e,
+ 0x0150,
+ 0x0152,
+ 0x0154,
+ 0x0156,
+ 0x0158,
+ 0x015a,
+ 0x015c,
+ 0x015e,
+ 0x0160,
+ 0x0162,
+ 0x0164,
+ 0x0166,
+ 0x0168,
+ 0x016a,
+ 0x016c,
+ 0x016e,
+ 0x0170,
+ 0x0172,
+ 0x0174,
+ 0x0176,
+ 0x0178,
+ 0x0179,
+ 0x017b,
+ 0x017d,
+ 0x017f,
+ 0x0181,
+ 0x0182,
+ 0x0184,
+ 0x0186,
+ 0x0187,
+ 0x0189,
+ 0x018a,
+ 0x018b,
+ 0x018e,
+ 0x018f,
+ 0x0190,
+ 0x0191,
+ 0x0193,
+ 0x0194,
+ 0x0196,
+ 0x0197,
+ 0x0198,
+ 0x019c,
+ 0x019d,
+ 0x019f,
+ 0x01a0,
+ 0x01a2,
+ 0x01a4,
+ 0x01a6,
+ 0x01a7,
+ 0x01a9,
+ 0x01ac,
+ 0x01ae,
+ 0x01af,
+ 0x01b1,
+ 0x01b2,
+ 0x01b3,
+ 0x01b5,
+ 0x01b7,
+ 0x01b8,
+ 0x01bc,
+ 0x01c4,
+ 0x01c5,
+ 0x01c7,
+ 0x01c8,
+ 0x01ca,
+ 0x01cb,
+ 0x01cd,
+ 0x01cf,
+ 0x01d1,
+ 0x01d3,
+ 0x01d5,
+ 0x01d7,
+ 0x01d9,
+ 0x01db,
+ 0x01de,
+ 0x01e0,
+ 0x01e2,
+ 0x01e4,
+ 0x01e6,
+ 0x01e8,
+ 0x01ea,
+ 0x01ec,
+ 0x01ee,
+ 0x01f0,
+ 0x01f1,
+ 0x01f2,
+ 0x01f4,
+ 0x01f6,
+ 0x01f7,
+ 0x01f8,
+ 0x01fa,
+ 0x01fc,
+ 0x01fe,
+ 0x0200,
+ 0x0202,
+ 0x0204,
+ 0x0206,
+ 0x0208,
+ 0x020a,
+ 0x020c,
+ 0x020e,
+ 0x0210,
+ 0x0212,
+ 0x0214,
+ 0x0216,
+ 0x0218,
+ 0x021a,
+ 0x021c,
+ 0x021e,
+ 0x0220,
+ 0x0222,
+ 0x0224,
+ 0x0226,
+ 0x0228,
+ 0x022a,
+ 0x022c,
+ 0x022e,
+ 0x0230,
+ 0x0232,
+ 0x023b,
+ 0x023d,
+ 0x0241,
+ 0x0345,
+ 0x0386,
+ 0x0388,
+ 0x0389,
+ 0x038a,
+ 0x038c,
+ 0x038e,
+ 0x038f,
+ 0x0390,
+ 0x0391,
+ 0x0392,
+ 0x0393,
+ 0x0394,
+ 0x0395,
+ 0x0396,
+ 0x0397,
+ 0x0398,
+ 0x0399,
+ 0x039a,
+ 0x039b,
+ 0x039c,
+ 0x039d,
+ 0x039e,
+ 0x039f,
+ 0x03a0,
+ 0x03a1,
+ 0x03a3,
+ 0x03a4,
+ 0x03a5,
+ 0x03a6,
+ 0x03a7,
+ 0x03a8,
+ 0x03a9,
+ 0x03aa,
+ 0x03ab,
+ 0x03b0,
+ 0x03c2,
+ 0x03d0,
+ 0x03d1,
+ 0x03d5,
+ 0x03d6,
+ 0x03d8,
+ 0x03da,
+ 0x03dc,
+ 0x03de,
+ 0x03e0,
+ 0x03e2,
+ 0x03e4,
+ 0x03e6,
+ 0x03e8,
+ 0x03ea,
+ 0x03ec,
+ 0x03ee,
+ 0x03f0,
+ 0x03f1,
+ 0x03f4,
+ 0x03f5,
+ 0x03f7,
+ 0x03f9,
+ 0x03fa,
+ 0x0400,
+ 0x0401,
+ 0x0402,
+ 0x0403,
+ 0x0404,
+ 0x0405,
+ 0x0406,
+ 0x0407,
+ 0x0408,
+ 0x0409,
+ 0x040a,
+ 0x040b,
+ 0x040c,
+ 0x040d,
+ 0x040e,
+ 0x040f,
+ 0x0410,
+ 0x0411,
+ 0x0412,
+ 0x0413,
+ 0x0414,
+ 0x0415,
+ 0x0416,
+ 0x0417,
+ 0x0418,
+ 0x0419,
+ 0x041a,
+ 0x041b,
+ 0x041c,
+ 0x041d,
+ 0x041e,
+ 0x041f,
+ 0x0420,
+ 0x0421,
+ 0x0422,
+ 0x0423,
+ 0x0424,
+ 0x0425,
+ 0x0426,
+ 0x0427,
+ 0x0428,
+ 0x0429,
+ 0x042a,
+ 0x042b,
+ 0x042c,
+ 0x042d,
+ 0x042e,
+ 0x042f,
+ 0x0460,
+ 0x0462,
+ 0x0464,
+ 0x0466,
+ 0x0468,
+ 0x046a,
+ 0x046c,
+ 0x046e,
+ 0x0470,
+ 0x0472,
+ 0x0474,
+ 0x0476,
+ 0x0478,
+ 0x047a,
+ 0x047c,
+ 0x047e,
+ 0x0480,
+ 0x048a,
+ 0x048c,
+ 0x048e,
+ 0x0490,
+ 0x0492,
+ 0x0494,
+ 0x0496,
+ 0x0498,
+ 0x049a,
+ 0x049c,
+ 0x049e,
+ 0x04a0,
+ 0x04a2,
+ 0x04a4,
+ 0x04a6,
+ 0x04a8,
+ 0x04aa,
+ 0x04ac,
+ 0x04ae,
+ 0x04b0,
+ 0x04b2,
+ 0x04b4,
+ 0x04b6,
+ 0x04b8,
+ 0x04ba,
+ 0x04bc,
+ 0x04be,
+ 0x04c1,
+ 0x04c3,
+ 0x04c5,
+ 0x04c7,
+ 0x04c9,
+ 0x04cb,
+ 0x04cd,
+ 0x04d0,
+ 0x04d2,
+ 0x04d4,
+ 0x04d6,
+ 0x04d8,
+ 0x04da,
+ 0x04dc,
+ 0x04de,
+ 0x04e0,
+ 0x04e2,
+ 0x04e4,
+ 0x04e6,
+ 0x04e8,
+ 0x04ea,
+ 0x04ec,
+ 0x04ee,
+ 0x04f0,
+ 0x04f2,
+ 0x04f4,
+ 0x04f6,
+ 0x04f8,
+ 0x0500,
+ 0x0502,
+ 0x0504,
+ 0x0506,
+ 0x0508,
+ 0x050a,
+ 0x050c,
+ 0x050e,
+ 0x0531,
+ 0x0532,
+ 0x0533,
+ 0x0534,
+ 0x0535,
+ 0x0536,
+ 0x0537,
+ 0x0538,
+ 0x0539,
+ 0x053a,
+ 0x053b,
+ 0x053c,
+ 0x053d,
+ 0x053e,
+ 0x053f,
+ 0x0540,
+ 0x0541,
+ 0x0542,
+ 0x0543,
+ 0x0544,
+ 0x0545,
+ 0x0546,
+ 0x0547,
+ 0x0548,
+ 0x0549,
+ 0x054a,
+ 0x054b,
+ 0x054c,
+ 0x054d,
+ 0x054e,
+ 0x054f,
+ 0x0550,
+ 0x0551,
+ 0x0552,
+ 0x0553,
+ 0x0554,
+ 0x0555,
+ 0x0556,
+ 0x0587,
+ 0x10a0,
+ 0x10a1,
+ 0x10a2,
+ 0x10a3,
+ 0x10a4,
+ 0x10a5,
+ 0x10a6,
+ 0x10a7,
+ 0x10a8,
+ 0x10a9,
+ 0x10aa,
+ 0x10ab,
+ 0x10ac,
+ 0x10ad,
+ 0x10ae,
+ 0x10af,
+ 0x10b0,
+ 0x10b1,
+ 0x10b2,
+ 0x10b3,
+ 0x10b4,
+ 0x10b5,
+ 0x10b6,
+ 0x10b7,
+ 0x10b8,
+ 0x10b9,
+ 0x10ba,
+ 0x10bb,
+ 0x10bc,
+ 0x10bd,
+ 0x10be,
+ 0x10bf,
+ 0x10c0,
+ 0x10c1,
+ 0x10c2,
+ 0x10c3,
+ 0x10c4,
+ 0x10c5,
+ 0x1e00,
+ 0x1e02,
+ 0x1e04,
+ 0x1e06,
+ 0x1e08,
+ 0x1e0a,
+ 0x1e0c,
+ 0x1e0e,
+ 0x1e10,
+ 0x1e12,
+ 0x1e14,
+ 0x1e16,
+ 0x1e18,
+ 0x1e1a,
+ 0x1e1c,
+ 0x1e1e,
+ 0x1e20,
+ 0x1e22,
+ 0x1e24,
+ 0x1e26,
+ 0x1e28,
+ 0x1e2a,
+ 0x1e2c,
+ 0x1e2e,
+ 0x1e30,
+ 0x1e32,
+ 0x1e34,
+ 0x1e36,
+ 0x1e38,
+ 0x1e3a,
+ 0x1e3c,
+ 0x1e3e,
+ 0x1e40,
+ 0x1e42,
+ 0x1e44,
+ 0x1e46,
+ 0x1e48,
+ 0x1e4a,
+ 0x1e4c,
+ 0x1e4e,
+ 0x1e50,
+ 0x1e52,
+ 0x1e54,
+ 0x1e56,
+ 0x1e58,
+ 0x1e5a,
+ 0x1e5c,
+ 0x1e5e,
+ 0x1e60,
+ 0x1e62,
+ 0x1e64,
+ 0x1e66,
+ 0x1e68,
+ 0x1e6a,
+ 0x1e6c,
+ 0x1e6e,
+ 0x1e70,
+ 0x1e72,
+ 0x1e74,
+ 0x1e76,
+ 0x1e78,
+ 0x1e7a,
+ 0x1e7c,
+ 0x1e7e,
+ 0x1e80,
+ 0x1e82,
+ 0x1e84,
+ 0x1e86,
+ 0x1e88,
+ 0x1e8a,
+ 0x1e8c,
+ 0x1e8e,
+ 0x1e90,
+ 0x1e92,
+ 0x1e94,
+ 0x1e96,
+ 0x1e97,
+ 0x1e98,
+ 0x1e99,
+ 0x1e9a,
+ 0x1e9b,
+ 0x1ea0,
+ 0x1ea2,
+ 0x1ea4,
+ 0x1ea6,
+ 0x1ea8,
+ 0x1eaa,
+ 0x1eac,
+ 0x1eae,
+ 0x1eb0,
+ 0x1eb2,
+ 0x1eb4,
+ 0x1eb6,
+ 0x1eb8,
+ 0x1eba,
+ 0x1ebc,
+ 0x1ebe,
+ 0x1ec0,
+ 0x1ec2,
+ 0x1ec4,
+ 0x1ec6,
+ 0x1ec8,
+ 0x1eca,
+ 0x1ecc,
+ 0x1ece,
+ 0x1ed0,
+ 0x1ed2,
+ 0x1ed4,
+ 0x1ed6,
+ 0x1ed8,
+ 0x1eda,
+ 0x1edc,
+ 0x1ede,
+ 0x1ee0,
+ 0x1ee2,
+ 0x1ee4,
+ 0x1ee6,
+ 0x1ee8,
+ 0x1eea,
+ 0x1eec,
+ 0x1eee,
+ 0x1ef0,
+ 0x1ef2,
+ 0x1ef4,
+ 0x1ef6,
+ 0x1ef8,
+ 0x1f08,
+ 0x1f09,
+ 0x1f0a,
+ 0x1f0b,
+ 0x1f0c,
+ 0x1f0d,
+ 0x1f0e,
+ 0x1f0f,
+ 0x1f18,
+ 0x1f19,
+ 0x1f1a,
+ 0x1f1b,
+ 0x1f1c,
+ 0x1f1d,
+ 0x1f28,
+ 0x1f29,
+ 0x1f2a,
+ 0x1f2b,
+ 0x1f2c,
+ 0x1f2d,
+ 0x1f2e,
+ 0x1f2f,
+ 0x1f38,
+ 0x1f39,
+ 0x1f3a,
+ 0x1f3b,
+ 0x1f3c,
+ 0x1f3d,
+ 0x1f3e,
+ 0x1f3f,
+ 0x1f48,
+ 0x1f49,
+ 0x1f4a,
+ 0x1f4b,
+ 0x1f4c,
+ 0x1f4d,
+ 0x1f50,
+ 0x1f52,
+ 0x1f54,
+ 0x1f56,
+ 0x1f59,
+ 0x1f5b,
+ 0x1f5d,
+ 0x1f5f,
+ 0x1f68,
+ 0x1f69,
+ 0x1f6a,
+ 0x1f6b,
+ 0x1f6c,
+ 0x1f6d,
+ 0x1f6e,
+ 0x1f6f,
+ 0x1f80,
+ 0x1f81,
+ 0x1f82,
+ 0x1f83,
+ 0x1f84,
+ 0x1f85,
+ 0x1f86,
+ 0x1f87,
+ 0x1f88,
+ 0x1f89,
+ 0x1f8a,
+ 0x1f8b,
+ 0x1f8c,
+ 0x1f8d,
+ 0x1f8e,
+ 0x1f8f,
+ 0x1f90,
+ 0x1f91,
+ 0x1f92,
+ 0x1f93,
+ 0x1f94,
+ 0x1f95,
+ 0x1f96,
+ 0x1f97,
+ 0x1f98,
+ 0x1f99,
+ 0x1f9a,
+ 0x1f9b,
+ 0x1f9c,
+ 0x1f9d,
+ 0x1f9e,
+ 0x1f9f,
+ 0x1fa0,
+ 0x1fa1,
+ 0x1fa2,
+ 0x1fa3,
+ 0x1fa4,
+ 0x1fa5,
+ 0x1fa6,
+ 0x1fa7,
+ 0x1fa8,
+ 0x1fa9,
+ 0x1faa,
+ 0x1fab,
+ 0x1fac,
+ 0x1fad,
+ 0x1fae,
+ 0x1faf,
+ 0x1fb2,
+ 0x1fb3,
+ 0x1fb4,
+ 0x1fb6,
+ 0x1fb7,
+ 0x1fb8,
+ 0x1fb9,
+ 0x1fba,
+ 0x1fbb,
+ 0x1fbc,
+ 0x1fbe,
+ 0x1fc2,
+ 0x1fc3,
+ 0x1fc4,
+ 0x1fc6,
+ 0x1fc7,
+ 0x1fc8,
+ 0x1fc9,
+ 0x1fca,
+ 0x1fcb,
+ 0x1fcc,
+ 0x1fd2,
+ 0x1fd3,
+ 0x1fd6,
+ 0x1fd7,
+ 0x1fd8,
+ 0x1fd9,
+ 0x1fda,
+ 0x1fdb,
+ 0x1fe2,
+ 0x1fe3,
+ 0x1fe4,
+ 0x1fe6,
+ 0x1fe7,
+ 0x1fe8,
+ 0x1fe9,
+ 0x1fea,
+ 0x1feb,
+ 0x1fec,
+ 0x1ff2,
+ 0x1ff3,
+ 0x1ff4,
+ 0x1ff6,
+ 0x1ff7,
+ 0x1ff8,
+ 0x1ff9,
+ 0x1ffa,
+ 0x1ffb,
+ 0x1ffc,
+ 0x2126,
+ 0x212a,
+ 0x212b,
+ 0x2160,
+ 0x2161,
+ 0x2162,
+ 0x2163,
+ 0x2164,
+ 0x2165,
+ 0x2166,
+ 0x2167,
+ 0x2168,
+ 0x2169,
+ 0x216a,
+ 0x216b,
+ 0x216c,
+ 0x216d,
+ 0x216e,
+ 0x216f,
+ 0x24b6,
+ 0x24b7,
+ 0x24b8,
+ 0x24b9,
+ 0x24ba,
+ 0x24bb,
+ 0x24bc,
+ 0x24bd,
+ 0x24be,
+ 0x24bf,
+ 0x24c0,
+ 0x24c1,
+ 0x24c2,
+ 0x24c3,
+ 0x24c4,
+ 0x24c5,
+ 0x24c6,
+ 0x24c7,
+ 0x24c8,
+ 0x24c9,
+ 0x24ca,
+ 0x24cb,
+ 0x24cc,
+ 0x24cd,
+ 0x24ce,
+ 0x24cf,
+ 0x2c00,
+ 0x2c01,
+ 0x2c02,
+ 0x2c03,
+ 0x2c04,
+ 0x2c05,
+ 0x2c06,
+ 0x2c07,
+ 0x2c08,
+ 0x2c09,
+ 0x2c0a,
+ 0x2c0b,
+ 0x2c0c,
+ 0x2c0d,
+ 0x2c0e,
+ 0x2c0f,
+ 0x2c10,
+ 0x2c11,
+ 0x2c12,
+ 0x2c13,
+ 0x2c14,
+ 0x2c15,
+ 0x2c16,
+ 0x2c17,
+ 0x2c18,
+ 0x2c19,
+ 0x2c1a,
+ 0x2c1b,
+ 0x2c1c,
+ 0x2c1d,
+ 0x2c1e,
+ 0x2c1f,
+ 0x2c20,
+ 0x2c21,
+ 0x2c22,
+ 0x2c23,
+ 0x2c24,
+ 0x2c25,
+ 0x2c26,
+ 0x2c27,
+ 0x2c28,
+ 0x2c29,
+ 0x2c2a,
+ 0x2c2b,
+ 0x2c2c,
+ 0x2c2d,
+ 0x2c2e,
+ 0x2c80,
+ 0x2c82,
+ 0x2c84,
+ 0x2c86,
+ 0x2c88,
+ 0x2c8a,
+ 0x2c8c,
+ 0x2c8e,
+ 0x2c90,
+ 0x2c92,
+ 0x2c94,
+ 0x2c96,
+ 0x2c98,
+ 0x2c9a,
+ 0x2c9c,
+ 0x2c9e,
+ 0x2ca0,
+ 0x2ca2,
+ 0x2ca4,
+ 0x2ca6,
+ 0x2ca8,
+ 0x2caa,
+ 0x2cac,
+ 0x2cae,
+ 0x2cb0,
+ 0x2cb2,
+ 0x2cb4,
+ 0x2cb6,
+ 0x2cb8,
+ 0x2cba,
+ 0x2cbc,
+ 0x2cbe,
+ 0x2cc0,
+ 0x2cc2,
+ 0x2cc4,
+ 0x2cc6,
+ 0x2cc8,
+ 0x2cca,
+ 0x2ccc,
+ 0x2cce,
+ 0x2cd0,
+ 0x2cd2,
+ 0x2cd4,
+ 0x2cd6,
+ 0x2cd8,
+ 0x2cda,
+ 0x2cdc,
+ 0x2cde,
+ 0x2ce0,
+ 0x2ce2,
+ 0xfb00,
+ 0xfb01,
+ 0xfb02,
+ 0xfb03,
+ 0xfb04,
+ 0xfb05,
+ 0xfb06,
+ 0xfb13,
+ 0xfb14,
+ 0xfb15,
+ 0xfb16,
+ 0xfb17,
+ 0xff21,
+ 0xff22,
+ 0xff23,
+ 0xff24,
+ 0xff25,
+ 0xff26,
+ 0xff27,
+ 0xff28,
+ 0xff29,
+ 0xff2a,
+ 0xff2b,
+ 0xff2c,
+ 0xff2d,
+ 0xff2e,
+ 0xff2f,
+ 0xff30,
+ 0xff31,
+ 0xff32,
+ 0xff33,
+ 0xff34,
+ 0xff35,
+ 0xff36,
+ 0xff37,
+ 0xff38,
+ 0xff39,
+ 0xff3a,
+ 0x10400,
+ 0x10401,
+ 0x10402,
+ 0x10403,
+ 0x10404,
+ 0x10405,
+ 0x10406,
+ 0x10407,
+ 0x10408,
+ 0x10409,
+ 0x1040a,
+ 0x1040b,
+ 0x1040c,
+ 0x1040d,
+ 0x1040e,
+ 0x1040f,
+ 0x10410,
+ 0x10411,
+ 0x10412,
+ 0x10413,
+ 0x10414,
+ 0x10415,
+ 0x10416,
+ 0x10417,
+ 0x10418,
+ 0x10419,
+ 0x1041a,
+ 0x1041b,
+ 0x1041c,
+ 0x1041d,
+ 0x1041e,
+ 0x1041f,
+ 0x10420,
+ 0x10421,
+ 0x10422,
+ 0x10423,
+ 0x10424,
+ 0x10425,
+ 0x10426,
+ 0x10427,
+ };
+
+ private static final int CaseFold1_To[][] = new int[][] {
+ {0x0061},
+ {0x0062},
+ {0x0063},
+ {0x0064},
+ {0x0065},
+ {0x0066},
+ {0x0067},
+ {0x0068},
+ {0x006a},
+ {0x006b},
+ {0x006c},
+ {0x006d},
+ {0x006e},
+ {0x006f},
+ {0x0070},
+ {0x0071},
+ {0x0072},
+ {0x0073},
+ {0x0074},
+ {0x0075},
+ {0x0076},
+ {0x0077},
+ {0x0078},
+ {0x0079},
+ {0x007a},
+ {0x03bc},
+ {0x00e0},
+ {0x00e1},
+ {0x00e2},
+ {0x00e3},
+ {0x00e4},
+ {0x00e5},
+ {0x00e6},
+ {0x00e7},
+ {0x00e8},
+ {0x00e9},
+ {0x00ea},
+ {0x00eb},
+ {0x00ec},
+ {0x00ed},
+ {0x00ee},
+ {0x00ef},
+ {0x00f0},
+ {0x00f1},
+ {0x00f2},
+ {0x00f3},
+ {0x00f4},
+ {0x00f5},
+ {0x00f6},
+ {0x00f8},
+ {0x00f9},
+ {0x00fa},
+ {0x00fb},
+ {0x00fc},
+ {0x00fd},
+ {0x00fe},
+ {0x0073, 0x0073},
+ {0x0101},
+ {0x0103},
+ {0x0105},
+ {0x0107},
+ {0x0109},
+ {0x010b},
+ {0x010d},
+ {0x010f},
+ {0x0111},
+ {0x0113},
+ {0x0115},
+ {0x0117},
+ {0x0119},
+ {0x011b},
+ {0x011d},
+ {0x011f},
+ {0x0121},
+ {0x0123},
+ {0x0125},
+ {0x0127},
+ {0x0129},
+ {0x012b},
+ {0x012d},
+ {0x012f},
+ {0x0133},
+ {0x0135},
+ {0x0137},
+ {0x013a},
+ {0x013c},
+ {0x013e},
+ {0x0140},
+ {0x0142},
+ {0x0144},
+ {0x0146},
+ {0x0148},
+ {0x02bc, 0x006e},
+ {0x014b},
+ {0x014d},
+ {0x014f},
+ {0x0151},
+ {0x0153},
+ {0x0155},
+ {0x0157},
+ {0x0159},
+ {0x015b},
+ {0x015d},
+ {0x015f},
+ {0x0161},
+ {0x0163},
+ {0x0165},
+ {0x0167},
+ {0x0169},
+ {0x016b},
+ {0x016d},
+ {0x016f},
+ {0x0171},
+ {0x0173},
+ {0x0175},
+ {0x0177},
+ {0x00ff},
+ {0x017a},
+ {0x017c},
+ {0x017e},
+ {0x0073},
+ {0x0253},
+ {0x0183},
+ {0x0185},
+ {0x0254},
+ {0x0188},
+ {0x0256},
+ {0x0257},
+ {0x018c},
+ {0x01dd},
+ {0x0259},
+ {0x025b},
+ {0x0192},
+ {0x0260},
+ {0x0263},
+ {0x0269},
+ {0x0268},
+ {0x0199},
+ {0x026f},
+ {0x0272},
+ {0x0275},
+ {0x01a1},
+ {0x01a3},
+ {0x01a5},
+ {0x0280},
+ {0x01a8},
+ {0x0283},
+ {0x01ad},
+ {0x0288},
+ {0x01b0},
+ {0x028a},
+ {0x028b},
+ {0x01b4},
+ {0x01b6},
+ {0x0292},
+ {0x01b9},
+ {0x01bd},
+ {0x01c6},
+ {0x01c6},
+ {0x01c9},
+ {0x01c9},
+ {0x01cc},
+ {0x01cc},
+ {0x01ce},
+ {0x01d0},
+ {0x01d2},
+ {0x01d4},
+ {0x01d6},
+ {0x01d8},
+ {0x01da},
+ {0x01dc},
+ {0x01df},
+ {0x01e1},
+ {0x01e3},
+ {0x01e5},
+ {0x01e7},
+ {0x01e9},
+ {0x01eb},
+ {0x01ed},
+ {0x01ef},
+ {0x006a, 0x030c},
+ {0x01f3},
+ {0x01f3},
+ {0x01f5},
+ {0x0195},
+ {0x01bf},
+ {0x01f9},
+ {0x01fb},
+ {0x01fd},
+ {0x01ff},
+ {0x0201},
+ {0x0203},
+ {0x0205},
+ {0x0207},
+ {0x0209},
+ {0x020b},
+ {0x020d},
+ {0x020f},
+ {0x0211},
+ {0x0213},
+ {0x0215},
+ {0x0217},
+ {0x0219},
+ {0x021b},
+ {0x021d},
+ {0x021f},
+ {0x019e},
+ {0x0223},
+ {0x0225},
+ {0x0227},
+ {0x0229},
+ {0x022b},
+ {0x022d},
+ {0x022f},
+ {0x0231},
+ {0x0233},
+ {0x023c},
+ {0x019a},
+ {0x0294},
+ {0x03b9},
+ {0x03ac},
+ {0x03ad},
+ {0x03ae},
+ {0x03af},
+ {0x03cc},
+ {0x03cd},
+ {0x03ce},
+ {0x03b9, 0x0308, 0x0301},
+ {0x03b1},
+ {0x03b2},
+ {0x03b3},
+ {0x03b4},
+ {0x03b5},
+ {0x03b6},
+ {0x03b7},
+ {0x03b8},
+ {0x03b9},
+ {0x03ba},
+ {0x03bb},
+ {0x03bc},
+ {0x03bd},
+ {0x03be},
+ {0x03bf},
+ {0x03c0},
+ {0x03c1},
+ {0x03c3},
+ {0x03c4},
+ {0x03c5},
+ {0x03c6},
+ {0x03c7},
+ {0x03c8},
+ {0x03c9},
+ {0x03ca},
+ {0x03cb},
+ {0x03c5, 0x0308, 0x0301},
+ {0x03c3},
+ {0x03b2},
+ {0x03b8},
+ {0x03c6},
+ {0x03c0},
+ {0x03d9},
+ {0x03db},
+ {0x03dd},
+ {0x03df},
+ {0x03e1},
+ {0x03e3},
+ {0x03e5},
+ {0x03e7},
+ {0x03e9},
+ {0x03eb},
+ {0x03ed},
+ {0x03ef},
+ {0x03ba},
+ {0x03c1},
+ {0x03b8},
+ {0x03b5},
+ {0x03f8},
+ {0x03f2},
+ {0x03fb},
+ {0x0450},
+ {0x0451},
+ {0x0452},
+ {0x0453},
+ {0x0454},
+ {0x0455},
+ {0x0456},
+ {0x0457},
+ {0x0458},
+ {0x0459},
+ {0x045a},
+ {0x045b},
+ {0x045c},
+ {0x045d},
+ {0x045e},
+ {0x045f},
+ {0x0430},
+ {0x0431},
+ {0x0432},
+ {0x0433},
+ {0x0434},
+ {0x0435},
+ {0x0436},
+ {0x0437},
+ {0x0438},
+ {0x0439},
+ {0x043a},
+ {0x043b},
+ {0x043c},
+ {0x043d},
+ {0x043e},
+ {0x043f},
+ {0x0440},
+ {0x0441},
+ {0x0442},
+ {0x0443},
+ {0x0444},
+ {0x0445},
+ {0x0446},
+ {0x0447},
+ {0x0448},
+ {0x0449},
+ {0x044a},
+ {0x044b},
+ {0x044c},
+ {0x044d},
+ {0x044e},
+ {0x044f},
+ {0x0461},
+ {0x0463},
+ {0x0465},
+ {0x0467},
+ {0x0469},
+ {0x046b},
+ {0x046d},
+ {0x046f},
+ {0x0471},
+ {0x0473},
+ {0x0475},
+ {0x0477},
+ {0x0479},
+ {0x047b},
+ {0x047d},
+ {0x047f},
+ {0x0481},
+ {0x048b},
+ {0x048d},
+ {0x048f},
+ {0x0491},
+ {0x0493},
+ {0x0495},
+ {0x0497},
+ {0x0499},
+ {0x049b},
+ {0x049d},
+ {0x049f},
+ {0x04a1},
+ {0x04a3},
+ {0x04a5},
+ {0x04a7},
+ {0x04a9},
+ {0x04ab},
+ {0x04ad},
+ {0x04af},
+ {0x04b1},
+ {0x04b3},
+ {0x04b5},
+ {0x04b7},
+ {0x04b9},
+ {0x04bb},
+ {0x04bd},
+ {0x04bf},
+ {0x04c2},
+ {0x04c4},
+ {0x04c6},
+ {0x04c8},
+ {0x04ca},
+ {0x04cc},
+ {0x04ce},
+ {0x04d1},
+ {0x04d3},
+ {0x04d5},
+ {0x04d7},
+ {0x04d9},
+ {0x04db},
+ {0x04dd},
+ {0x04df},
+ {0x04e1},
+ {0x04e3},
+ {0x04e5},
+ {0x04e7},
+ {0x04e9},
+ {0x04eb},
+ {0x04ed},
+ {0x04ef},
+ {0x04f1},
+ {0x04f3},
+ {0x04f5},
+ {0x04f7},
+ {0x04f9},
+ {0x0501},
+ {0x0503},
+ {0x0505},
+ {0x0507},
+ {0x0509},
+ {0x050b},
+ {0x050d},
+ {0x050f},
+ {0x0561},
+ {0x0562},
+ {0x0563},
+ {0x0564},
+ {0x0565},
+ {0x0566},
+ {0x0567},
+ {0x0568},
+ {0x0569},
+ {0x056a},
+ {0x056b},
+ {0x056c},
+ {0x056d},
+ {0x056e},
+ {0x056f},
+ {0x0570},
+ {0x0571},
+ {0x0572},
+ {0x0573},
+ {0x0574},
+ {0x0575},
+ {0x0576},
+ {0x0577},
+ {0x0578},
+ {0x0579},
+ {0x057a},
+ {0x057b},
+ {0x057c},
+ {0x057d},
+ {0x057e},
+ {0x057f},
+ {0x0580},
+ {0x0581},
+ {0x0582},
+ {0x0583},
+ {0x0584},
+ {0x0585},
+ {0x0586},
+ {0x0565, 0x0582},
+ {0x2d00},
+ {0x2d01},
+ {0x2d02},
+ {0x2d03},
+ {0x2d04},
+ {0x2d05},
+ {0x2d06},
+ {0x2d07},
+ {0x2d08},
+ {0x2d09},
+ {0x2d0a},
+ {0x2d0b},
+ {0x2d0c},
+ {0x2d0d},
+ {0x2d0e},
+ {0x2d0f},
+ {0x2d10},
+ {0x2d11},
+ {0x2d12},
+ {0x2d13},
+ {0x2d14},
+ {0x2d15},
+ {0x2d16},
+ {0x2d17},
+ {0x2d18},
+ {0x2d19},
+ {0x2d1a},
+ {0x2d1b},
+ {0x2d1c},
+ {0x2d1d},
+ {0x2d1e},
+ {0x2d1f},
+ {0x2d20},
+ {0x2d21},
+ {0x2d22},
+ {0x2d23},
+ {0x2d24},
+ {0x2d25},
+ {0x1e01},
+ {0x1e03},
+ {0x1e05},
+ {0x1e07},
+ {0x1e09},
+ {0x1e0b},
+ {0x1e0d},
+ {0x1e0f},
+ {0x1e11},
+ {0x1e13},
+ {0x1e15},
+ {0x1e17},
+ {0x1e19},
+ {0x1e1b},
+ {0x1e1d},
+ {0x1e1f},
+ {0x1e21},
+ {0x1e23},
+ {0x1e25},
+ {0x1e27},
+ {0x1e29},
+ {0x1e2b},
+ {0x1e2d},
+ {0x1e2f},
+ {0x1e31},
+ {0x1e33},
+ {0x1e35},
+ {0x1e37},
+ {0x1e39},
+ {0x1e3b},
+ {0x1e3d},
+ {0x1e3f},
+ {0x1e41},
+ {0x1e43},
+ {0x1e45},
+ {0x1e47},
+ {0x1e49},
+ {0x1e4b},
+ {0x1e4d},
+ {0x1e4f},
+ {0x1e51},
+ {0x1e53},
+ {0x1e55},
+ {0x1e57},
+ {0x1e59},
+ {0x1e5b},
+ {0x1e5d},
+ {0x1e5f},
+ {0x1e61},
+ {0x1e63},
+ {0x1e65},
+ {0x1e67},
+ {0x1e69},
+ {0x1e6b},
+ {0x1e6d},
+ {0x1e6f},
+ {0x1e71},
+ {0x1e73},
+ {0x1e75},
+ {0x1e77},
+ {0x1e79},
+ {0x1e7b},
+ {0x1e7d},
+ {0x1e7f},
+ {0x1e81},
+ {0x1e83},
+ {0x1e85},
+ {0x1e87},
+ {0x1e89},
+ {0x1e8b},
+ {0x1e8d},
+ {0x1e8f},
+ {0x1e91},
+ {0x1e93},
+ {0x1e95},
+ {0x0068, 0x0331},
+ {0x0074, 0x0308},
+ {0x0077, 0x030a},
+ {0x0079, 0x030a},
+ {0x0061, 0x02be},
+ {0x1e61},
+ {0x1ea1},
+ {0x1ea3},
+ {0x1ea5},
+ {0x1ea7},
+ {0x1ea9},
+ {0x1eab},
+ {0x1ead},
+ {0x1eaf},
+ {0x1eb1},
+ {0x1eb3},
+ {0x1eb5},
+ {0x1eb7},
+ {0x1eb9},
+ {0x1ebb},
+ {0x1ebd},
+ {0x1ebf},
+ {0x1ec1},
+ {0x1ec3},
+ {0x1ec5},
+ {0x1ec7},
+ {0x1ec9},
+ {0x1ecb},
+ {0x1ecd},
+ {0x1ecf},
+ {0x1ed1},
+ {0x1ed3},
+ {0x1ed5},
+ {0x1ed7},
+ {0x1ed9},
+ {0x1edb},
+ {0x1edd},
+ {0x1edf},
+ {0x1ee1},
+ {0x1ee3},
+ {0x1ee5},
+ {0x1ee7},
+ {0x1ee9},
+ {0x1eeb},
+ {0x1eed},
+ {0x1eef},
+ {0x1ef1},
+ {0x1ef3},
+ {0x1ef5},
+ {0x1ef7},
+ {0x1ef9},
+ {0x1f00},
+ {0x1f01},
+ {0x1f02},
+ {0x1f03},
+ {0x1f04},
+ {0x1f05},
+ {0x1f06},
+ {0x1f07},
+ {0x1f10},
+ {0x1f11},
+ {0x1f12},
+ {0x1f13},
+ {0x1f14},
+ {0x1f15},
+ {0x1f20},
+ {0x1f21},
+ {0x1f22},
+ {0x1f23},
+ {0x1f24},
+ {0x1f25},
+ {0x1f26},
+ {0x1f27},
+ {0x1f30},
+ {0x1f31},
+ {0x1f32},
+ {0x1f33},
+ {0x1f34},
+ {0x1f35},
+ {0x1f36},
+ {0x1f37},
+ {0x1f40},
+ {0x1f41},
+ {0x1f42},
+ {0x1f43},
+ {0x1f44},
+ {0x1f45},
+ {0x03c5, 0x0313},
+ {0x03c5, 0x0313, 0x0300},
+ {0x03c5, 0x0313, 0x0301},
+ {0x03c5, 0x0313, 0x0342},
+ {0x1f51},
+ {0x1f53},
+ {0x1f55},
+ {0x1f57},
+ {0x1f60},
+ {0x1f61},
+ {0x1f62},
+ {0x1f63},
+ {0x1f64},
+ {0x1f65},
+ {0x1f66},
+ {0x1f67},
+ {0x1f00, 0x03b9},
+ {0x1f01, 0x03b9},
+ {0x1f02, 0x03b9},
+ {0x1f03, 0x03b9},
+ {0x1f04, 0x03b9},
+ {0x1f05, 0x03b9},
+ {0x1f06, 0x03b9},
+ {0x1f07, 0x03b9},
+ {0x1f00, 0x03b9},
+ {0x1f01, 0x03b9},
+ {0x1f02, 0x03b9},
+ {0x1f03, 0x03b9},
+ {0x1f04, 0x03b9},
+ {0x1f05, 0x03b9},
+ {0x1f06, 0x03b9},
+ {0x1f07, 0x03b9},
+ {0x1f20, 0x03b9},
+ {0x1f21, 0x03b9},
+ {0x1f22, 0x03b9},
+ {0x1f23, 0x03b9},
+ {0x1f24, 0x03b9},
+ {0x1f25, 0x03b9},
+ {0x1f26, 0x03b9},
+ {0x1f27, 0x03b9},
+ {0x1f20, 0x03b9},
+ {0x1f21, 0x03b9},
+ {0x1f22, 0x03b9},
+ {0x1f23, 0x03b9},
+ {0x1f24, 0x03b9},
+ {0x1f25, 0x03b9},
+ {0x1f26, 0x03b9},
+ {0x1f27, 0x03b9},
+ {0x1f60, 0x03b9},
+ {0x1f61, 0x03b9},
+ {0x1f62, 0x03b9},
+ {0x1f63, 0x03b9},
+ {0x1f64, 0x03b9},
+ {0x1f65, 0x03b9},
+ {0x1f66, 0x03b9},
+ {0x1f67, 0x03b9},
+ {0x1f60, 0x03b9},
+ {0x1f61, 0x03b9},
+ {0x1f62, 0x03b9},
+ {0x1f63, 0x03b9},
+ {0x1f64, 0x03b9},
+ {0x1f65, 0x03b9},
+ {0x1f66, 0x03b9},
+ {0x1f67, 0x03b9},
+ {0x1f70, 0x03b9},
+ {0x03b1, 0x03b9},
+ {0x03ac, 0x03b9},
+ {0x03b1, 0x0342},
+ {0x03b1, 0x0342, 0x03b9},
+ {0x1fb0},
+ {0x1fb1},
+ {0x1f70},
+ {0x1f71},
+ {0x03b1, 0x03b9},
+ {0x03b9},
+ {0x1f74, 0x03b9},
+ {0x03b7, 0x03b9},
+ {0x03ae, 0x03b9},
+ {0x03b7, 0x0342},
+ {0x03b7, 0x0342, 0x03b9},
+ {0x1f72},
+ {0x1f73},
+ {0x1f74},
+ {0x1f75},
+ {0x03b7, 0x03b9},
+ {0x03b9, 0x0308, 0x0300},
+ {0x03b9, 0x0308, 0x0301},
+ {0x03b9, 0x0342},
+ {0x03b9, 0x0308, 0x0342},
+ {0x1fd0},
+ {0x1fd1},
+ {0x1f76},
+ {0x1f77},
+ {0x03c5, 0x0308, 0x0300},
+ {0x03c5, 0x0308, 0x0301},
+ {0x03c1, 0x0313},
+ {0x03c5, 0x0342},
+ {0x03c5, 0x0308, 0x0342},
+ {0x1fe0},
+ {0x1fe1},
+ {0x1f7a},
+ {0x1f7b},
+ {0x1fe5},
+ {0x1f7c, 0x03b9},
+ {0x03c9, 0x03b9},
+ {0x03ce, 0x03b9},
+ {0x03c9, 0x0342},
+ {0x03c9, 0x0342, 0x03b9},
+ {0x1f78},
+ {0x1f79},
+ {0x1f7c},
+ {0x1f7d},
+ {0x03c9, 0x03b9},
+ {0x03c9},
+ {0x006b},
+ {0x00e5},
+ {0x2170},
+ {0x2171},
+ {0x2172},
+ {0x2173},
+ {0x2174},
+ {0x2175},
+ {0x2176},
+ {0x2177},
+ {0x2178},
+ {0x2179},
+ {0x217a},
+ {0x217b},
+ {0x217c},
+ {0x217d},
+ {0x217e},
+ {0x217f},
+ {0x24d0},
+ {0x24d1},
+ {0x24d2},
+ {0x24d3},
+ {0x24d4},
+ {0x24d5},
+ {0x24d6},
+ {0x24d7},
+ {0x24d8},
+ {0x24d9},
+ {0x24da},
+ {0x24db},
+ {0x24dc},
+ {0x24dd},
+ {0x24de},
+ {0x24df},
+ {0x24e0},
+ {0x24e1},
+ {0x24e2},
+ {0x24e3},
+ {0x24e4},
+ {0x24e5},
+ {0x24e6},
+ {0x24e7},
+ {0x24e8},
+ {0x24e9},
+ {0x2c30},
+ {0x2c31},
+ {0x2c32},
+ {0x2c33},
+ {0x2c34},
+ {0x2c35},
+ {0x2c36},
+ {0x2c37},
+ {0x2c38},
+ {0x2c39},
+ {0x2c3a},
+ {0x2c3b},
+ {0x2c3c},
+ {0x2c3d},
+ {0x2c3e},
+ {0x2c3f},
+ {0x2c40},
+ {0x2c41},
+ {0x2c42},
+ {0x2c43},
+ {0x2c44},
+ {0x2c45},
+ {0x2c46},
+ {0x2c47},
+ {0x2c48},
+ {0x2c49},
+ {0x2c4a},
+ {0x2c4b},
+ {0x2c4c},
+ {0x2c4d},
+ {0x2c4e},
+ {0x2c4f},
+ {0x2c50},
+ {0x2c51},
+ {0x2c52},
+ {0x2c53},
+ {0x2c54},
+ {0x2c55},
+ {0x2c56},
+ {0x2c57},
+ {0x2c58},
+ {0x2c59},
+ {0x2c5a},
+ {0x2c5b},
+ {0x2c5c},
+ {0x2c5d},
+ {0x2c5e},
+ {0x2c81},
+ {0x2c83},
+ {0x2c85},
+ {0x2c87},
+ {0x2c89},
+ {0x2c8b},
+ {0x2c8d},
+ {0x2c8f},
+ {0x2c91},
+ {0x2c93},
+ {0x2c95},
+ {0x2c97},
+ {0x2c99},
+ {0x2c9b},
+ {0x2c9d},
+ {0x2c9f},
+ {0x2ca1},
+ {0x2ca3},
+ {0x2ca5},
+ {0x2ca7},
+ {0x2ca9},
+ {0x2cab},
+ {0x2cad},
+ {0x2caf},
+ {0x2cb1},
+ {0x2cb3},
+ {0x2cb5},
+ {0x2cb7},
+ {0x2cb9},
+ {0x2cbb},
+ {0x2cbd},
+ {0x2cbf},
+ {0x2cc1},
+ {0x2cc3},
+ {0x2cc5},
+ {0x2cc7},
+ {0x2cc9},
+ {0x2ccb},
+ {0x2ccd},
+ {0x2ccf},
+ {0x2cd1},
+ {0x2cd3},
+ {0x2cd5},
+ {0x2cd7},
+ {0x2cd9},
+ {0x2cdb},
+ {0x2cdd},
+ {0x2cdf},
+ {0x2ce1},
+ {0x2ce3},
+ {0x0066, 0x0066},
+ {0x0066, 0x0069},
+ {0x0066, 0x006c},
+ {0x0066, 0x0066, 0x0069},
+ {0x0066, 0x0066, 0x006c},
+ {0x0073, 0x0074},
+ {0x0073, 0x0074},
+ {0x0574, 0x0576},
+ {0x0574, 0x0565},
+ {0x0574, 0x056b},
+ {0x057e, 0x0576},
+ {0x0574, 0x056d},
+ {0xff41},
+ {0xff42},
+ {0xff43},
+ {0xff44},
+ {0xff45},
+ {0xff46},
+ {0xff47},
+ {0xff48},
+ {0xff49},
+ {0xff4a},
+ {0xff4b},
+ {0xff4c},
+ {0xff4d},
+ {0xff4e},
+ {0xff4f},
+ {0xff50},
+ {0xff51},
+ {0xff52},
+ {0xff53},
+ {0xff54},
+ {0xff55},
+ {0xff56},
+ {0xff57},
+ {0xff58},
+ {0xff59},
+ {0xff5a},
+ {0x10428},
+ {0x10429},
+ {0x1042a},
+ {0x1042b},
+ {0x1042c},
+ {0x1042d},
+ {0x1042e},
+ {0x1042f},
+ {0x10430},
+ {0x10431},
+ {0x10432},
+ {0x10433},
+ {0x10434},
+ {0x10435},
+ {0x10436},
+ {0x10437},
+ {0x10438},
+ {0x10439},
+ {0x1043a},
+ {0x1043b},
+ {0x1043c},
+ {0x1043d},
+ {0x1043e},
+ {0x1043f},
+ {0x10440},
+ {0x10441},
+ {0x10442},
+ {0x10443},
+ {0x10444},
+ {0x10445},
+ {0x10446},
+ {0x10447},
+ {0x10448},
+ {0x10449},
+ {0x1044a},
+ {0x1044b},
+ {0x1044c},
+ {0x1044d},
+ {0x1044e},
+ {0x1044f}
+ };
+
+ private static final int[] CaseFold_Locale_From = new int[] {
+ 0x0049,
+ 0x0130
+ };
+
+ private static final int[][] CaseFold_Locale_To = new int[][] {
+ {0x0069},
+ {0x0069, 0x0307}
+ };
+
+ static final int[] CaseUnfold_11_From = new int[] {
+ 0x0061,
+ 0x0062,
+ 0x0063,
+ 0x0064,
+ 0x0065,
+ 0x0066,
+ 0x0067,
+ 0x0068,
+ 0x006a,
+ 0x006b,
+ 0x006c,
+ 0x006d,
+ 0x006e,
+ 0x006f,
+ 0x0070,
+ 0x0071,
+ 0x0072,
+ 0x0073,
+ 0x0074,
+ 0x0075,
+ 0x0076,
+ 0x0077,
+ 0x0078,
+ 0x0079,
+ 0x007a,
+ 0x00e0,
+ 0x00e1,
+ 0x00e2,
+ 0x00e3,
+ 0x00e4,
+ 0x00e5,
+ 0x00e6,
+ 0x00e7,
+ 0x00e8,
+ 0x00e9,
+ 0x00ea,
+ 0x00eb,
+ 0x00ec,
+ 0x00ed,
+ 0x00ee,
+ 0x00ef,
+ 0x00f0,
+ 0x00f1,
+ 0x00f2,
+ 0x00f3,
+ 0x00f4,
+ 0x00f5,
+ 0x00f6,
+ 0x00f8,
+ 0x00f9,
+ 0x00fa,
+ 0x00fb,
+ 0x00fc,
+ 0x00fd,
+ 0x00fe,
+ 0x00ff,
+ 0x0101,
+ 0x0103,
+ 0x0105,
+ 0x0107,
+ 0x0109,
+ 0x010b,
+ 0x010d,
+ 0x010f,
+ 0x0111,
+ 0x0113,
+ 0x0115,
+ 0x0117,
+ 0x0119,
+ 0x011b,
+ 0x011d,
+ 0x011f,
+ 0x0121,
+ 0x0123,
+ 0x0125,
+ 0x0127,
+ 0x0129,
+ 0x012b,
+ 0x012d,
+ 0x012f,
+ 0x0133,
+ 0x0135,
+ 0x0137,
+ 0x013a,
+ 0x013c,
+ 0x013e,
+ 0x0140,
+ 0x0142,
+ 0x0144,
+ 0x0146,
+ 0x0148,
+ 0x014b,
+ 0x014d,
+ 0x014f,
+ 0x0151,
+ 0x0153,
+ 0x0155,
+ 0x0157,
+ 0x0159,
+ 0x015b,
+ 0x015d,
+ 0x015f,
+ 0x0161,
+ 0x0163,
+ 0x0165,
+ 0x0167,
+ 0x0169,
+ 0x016b,
+ 0x016d,
+ 0x016f,
+ 0x0171,
+ 0x0173,
+ 0x0175,
+ 0x0177,
+ 0x017a,
+ 0x017c,
+ 0x017e,
+ 0x0183,
+ 0x0185,
+ 0x0188,
+ 0x018c,
+ 0x0192,
+ 0x0195,
+ 0x0199,
+ 0x019a,
+ 0x019e,
+ 0x01a1,
+ 0x01a3,
+ 0x01a5,
+ 0x01a8,
+ 0x01ad,
+ 0x01b0,
+ 0x01b4,
+ 0x01b6,
+ 0x01b9,
+ 0x01bd,
+ 0x01bf,
+ 0x01c6,
+ 0x01c9,
+ 0x01cc,
+ 0x01ce,
+ 0x01d0,
+ 0x01d2,
+ 0x01d4,
+ 0x01d6,
+ 0x01d8,
+ 0x01da,
+ 0x01dc,
+ 0x01dd,
+ 0x01df,
+ 0x01e1,
+ 0x01e3,
+ 0x01e5,
+ 0x01e7,
+ 0x01e9,
+ 0x01eb,
+ 0x01ed,
+ 0x01ef,
+ 0x01f3,
+ 0x01f5,
+ 0x01f9,
+ 0x01fb,
+ 0x01fd,
+ 0x01ff,
+ 0x0201,
+ 0x0203,
+ 0x0205,
+ 0x0207,
+ 0x0209,
+ 0x020b,
+ 0x020d,
+ 0x020f,
+ 0x0211,
+ 0x0213,
+ 0x0215,
+ 0x0217,
+ 0x0219,
+ 0x021b,
+ 0x021d,
+ 0x021f,
+ 0x0223,
+ 0x0225,
+ 0x0227,
+ 0x0229,
+ 0x022b,
+ 0x022d,
+ 0x022f,
+ 0x0231,
+ 0x0233,
+ 0x023c,
+ 0x0253,
+ 0x0254,
+ 0x0256,
+ 0x0257,
+ 0x0259,
+ 0x025b,
+ 0x0260,
+ 0x0263,
+ 0x0268,
+ 0x0269,
+ 0x026f,
+ 0x0272,
+ 0x0275,
+ 0x0280,
+ 0x0283,
+ 0x0288,
+ 0x028a,
+ 0x028b,
+ 0x0292,
+ 0x0294,
+ 0x03ac,
+ 0x03ad,
+ 0x03ae,
+ 0x03af,
+ 0x03b1,
+ 0x03b2,
+ 0x03b3,
+ 0x03b4,
+ 0x03b5,
+ 0x03b6,
+ 0x03b7,
+ 0x03b8,
+ 0x03b9,
+ 0x03ba,
+ 0x03bb,
+ 0x03bc,
+ 0x03bd,
+ 0x03be,
+ 0x03bf,
+ 0x03c0,
+ 0x03c1,
+ 0x03c3,
+ 0x03c4,
+ 0x03c5,
+ 0x03c6,
+ 0x03c7,
+ 0x03c8,
+ 0x03c9,
+ 0x03ca,
+ 0x03cb,
+ 0x03cc,
+ 0x03cd,
+ 0x03ce,
+ 0x03d9,
+ 0x03db,
+ 0x03dd,
+ 0x03df,
+ 0x03e1,
+ 0x03e3,
+ 0x03e5,
+ 0x03e7,
+ 0x03e9,
+ 0x03eb,
+ 0x03ed,
+ 0x03ef,
+ 0x03f2,
+ 0x03f8,
+ 0x03fb,
+ 0x0430,
+ 0x0431,
+ 0x0432,
+ 0x0433,
+ 0x0434,
+ 0x0435,
+ 0x0436,
+ 0x0437,
+ 0x0438,
+ 0x0439,
+ 0x043a,
+ 0x043b,
+ 0x043c,
+ 0x043d,
+ 0x043e,
+ 0x043f,
+ 0x0440,
+ 0x0441,
+ 0x0442,
+ 0x0443,
+ 0x0444,
+ 0x0445,
+ 0x0446,
+ 0x0447,
+ 0x0448,
+ 0x0449,
+ 0x044a,
+ 0x044b,
+ 0x044c,
+ 0x044d,
+ 0x044e,
+ 0x044f,
+ 0x0450,
+ 0x0451,
+ 0x0452,
+ 0x0453,
+ 0x0454,
+ 0x0455,
+ 0x0456,
+ 0x0457,
+ 0x0458,
+ 0x0459,
+ 0x045a,
+ 0x045b,
+ 0x045c,
+ 0x045d,
+ 0x045e,
+ 0x045f,
+ 0x0461,
+ 0x0463,
+ 0x0465,
+ 0x0467,
+ 0x0469,
+ 0x046b,
+ 0x046d,
+ 0x046f,
+ 0x0471,
+ 0x0473,
+ 0x0475,
+ 0x0477,
+ 0x0479,
+ 0x047b,
+ 0x047d,
+ 0x047f,
+ 0x0481,
+ 0x048b,
+ 0x048d,
+ 0x048f,
+ 0x0491,
+ 0x0493,
+ 0x0495,
+ 0x0497,
+ 0x0499,
+ 0x049b,
+ 0x049d,
+ 0x049f,
+ 0x04a1,
+ 0x04a3,
+ 0x04a5,
+ 0x04a7,
+ 0x04a9,
+ 0x04ab,
+ 0x04ad,
+ 0x04af,
+ 0x04b1,
+ 0x04b3,
+ 0x04b5,
+ 0x04b7,
+ 0x04b9,
+ 0x04bb,
+ 0x04bd,
+ 0x04bf,
+ 0x04c2,
+ 0x04c4,
+ 0x04c6,
+ 0x04c8,
+ 0x04ca,
+ 0x04cc,
+ 0x04ce,
+ 0x04d1,
+ 0x04d3,
+ 0x04d5,
+ 0x04d7,
+ 0x04d9,
+ 0x04db,
+ 0x04dd,
+ 0x04df,
+ 0x04e1,
+ 0x04e3,
+ 0x04e5,
+ 0x04e7,
+ 0x04e9,
+ 0x04eb,
+ 0x04ed,
+ 0x04ef,
+ 0x04f1,
+ 0x04f3,
+ 0x04f5,
+ 0x04f7,
+ 0x04f9,
+ 0x0501,
+ 0x0503,
+ 0x0505,
+ 0x0507,
+ 0x0509,
+ 0x050b,
+ 0x050d,
+ 0x050f,
+ 0x0561,
+ 0x0562,
+ 0x0563,
+ 0x0564,
+ 0x0565,
+ 0x0566,
+ 0x0567,
+ 0x0568,
+ 0x0569,
+ 0x056a,
+ 0x056b,
+ 0x056c,
+ 0x056d,
+ 0x056e,
+ 0x056f,
+ 0x0570,
+ 0x0571,
+ 0x0572,
+ 0x0573,
+ 0x0574,
+ 0x0575,
+ 0x0576,
+ 0x0577,
+ 0x0578,
+ 0x0579,
+ 0x057a,
+ 0x057b,
+ 0x057c,
+ 0x057d,
+ 0x057e,
+ 0x057f,
+ 0x0580,
+ 0x0581,
+ 0x0582,
+ 0x0583,
+ 0x0584,
+ 0x0585,
+ 0x0586,
+ 0x1e01,
+ 0x1e03,
+ 0x1e05,
+ 0x1e07,
+ 0x1e09,
+ 0x1e0b,
+ 0x1e0d,
+ 0x1e0f,
+ 0x1e11,
+ 0x1e13,
+ 0x1e15,
+ 0x1e17,
+ 0x1e19,
+ 0x1e1b,
+ 0x1e1d,
+ 0x1e1f,
+ 0x1e21,
+ 0x1e23,
+ 0x1e25,
+ 0x1e27,
+ 0x1e29,
+ 0x1e2b,
+ 0x1e2d,
+ 0x1e2f,
+ 0x1e31,
+ 0x1e33,
+ 0x1e35,
+ 0x1e37,
+ 0x1e39,
+ 0x1e3b,
+ 0x1e3d,
+ 0x1e3f,
+ 0x1e41,
+ 0x1e43,
+ 0x1e45,
+ 0x1e47,
+ 0x1e49,
+ 0x1e4b,
+ 0x1e4d,
+ 0x1e4f,
+ 0x1e51,
+ 0x1e53,
+ 0x1e55,
+ 0x1e57,
+ 0x1e59,
+ 0x1e5b,
+ 0x1e5d,
+ 0x1e5f,
+ 0x1e61,
+ 0x1e63,
+ 0x1e65,
+ 0x1e67,
+ 0x1e69,
+ 0x1e6b,
+ 0x1e6d,
+ 0x1e6f,
+ 0x1e71,
+ 0x1e73,
+ 0x1e75,
+ 0x1e77,
+ 0x1e79,
+ 0x1e7b,
+ 0x1e7d,
+ 0x1e7f,
+ 0x1e81,
+ 0x1e83,
+ 0x1e85,
+ 0x1e87,
+ 0x1e89,
+ 0x1e8b,
+ 0x1e8d,
+ 0x1e8f,
+ 0x1e91,
+ 0x1e93,
+ 0x1e95,
+ 0x1ea1,
+ 0x1ea3,
+ 0x1ea5,
+ 0x1ea7,
+ 0x1ea9,
+ 0x1eab,
+ 0x1ead,
+ 0x1eaf,
+ 0x1eb1,
+ 0x1eb3,
+ 0x1eb5,
+ 0x1eb7,
+ 0x1eb9,
+ 0x1ebb,
+ 0x1ebd,
+ 0x1ebf,
+ 0x1ec1,
+ 0x1ec3,
+ 0x1ec5,
+ 0x1ec7,
+ 0x1ec9,
+ 0x1ecb,
+ 0x1ecd,
+ 0x1ecf,
+ 0x1ed1,
+ 0x1ed3,
+ 0x1ed5,
+ 0x1ed7,
+ 0x1ed9,
+ 0x1edb,
+ 0x1edd,
+ 0x1edf,
+ 0x1ee1,
+ 0x1ee3,
+ 0x1ee5,
+ 0x1ee7,
+ 0x1ee9,
+ 0x1eeb,
+ 0x1eed,
+ 0x1eef,
+ 0x1ef1,
+ 0x1ef3,
+ 0x1ef5,
+ 0x1ef7,
+ 0x1ef9,
+ 0x1f00,
+ 0x1f01,
+ 0x1f02,
+ 0x1f03,
+ 0x1f04,
+ 0x1f05,
+ 0x1f06,
+ 0x1f07,
+ 0x1f10,
+ 0x1f11,
+ 0x1f12,
+ 0x1f13,
+ 0x1f14,
+ 0x1f15,
+ 0x1f20,
+ 0x1f21,
+ 0x1f22,
+ 0x1f23,
+ 0x1f24,
+ 0x1f25,
+ 0x1f26,
+ 0x1f27,
+ 0x1f30,
+ 0x1f31,
+ 0x1f32,
+ 0x1f33,
+ 0x1f34,
+ 0x1f35,
+ 0x1f36,
+ 0x1f37,
+ 0x1f40,
+ 0x1f41,
+ 0x1f42,
+ 0x1f43,
+ 0x1f44,
+ 0x1f45,
+ 0x1f51,
+ 0x1f53,
+ 0x1f55,
+ 0x1f57,
+ 0x1f60,
+ 0x1f61,
+ 0x1f62,
+ 0x1f63,
+ 0x1f64,
+ 0x1f65,
+ 0x1f66,
+ 0x1f67,
+ 0x1f70,
+ 0x1f71,
+ 0x1f72,
+ 0x1f73,
+ 0x1f74,
+ 0x1f75,
+ 0x1f76,
+ 0x1f77,
+ 0x1f78,
+ 0x1f79,
+ 0x1f7a,
+ 0x1f7b,
+ 0x1f7c,
+ 0x1f7d,
+ 0x1fb0,
+ 0x1fb1,
+ 0x1fd0,
+ 0x1fd1,
+ 0x1fe0,
+ 0x1fe1,
+ 0x1fe5,
+ 0x2170,
+ 0x2171,
+ 0x2172,
+ 0x2173,
+ 0x2174,
+ 0x2175,
+ 0x2176,
+ 0x2177,
+ 0x2178,
+ 0x2179,
+ 0x217a,
+ 0x217b,
+ 0x217c,
+ 0x217d,
+ 0x217e,
+ 0x217f,
+ 0x24d0,
+ 0x24d1,
+ 0x24d2,
+ 0x24d3,
+ 0x24d4,
+ 0x24d5,
+ 0x24d6,
+ 0x24d7,
+ 0x24d8,
+ 0x24d9,
+ 0x24da,
+ 0x24db,
+ 0x24dc,
+ 0x24dd,
+ 0x24de,
+ 0x24df,
+ 0x24e0,
+ 0x24e1,
+ 0x24e2,
+ 0x24e3,
+ 0x24e4,
+ 0x24e5,
+ 0x24e6,
+ 0x24e7,
+ 0x24e8,
+ 0x24e9,
+ 0x2c30,
+ 0x2c31,
+ 0x2c32,
+ 0x2c33,
+ 0x2c34,
+ 0x2c35,
+ 0x2c36,
+ 0x2c37,
+ 0x2c38,
+ 0x2c39,
+ 0x2c3a,
+ 0x2c3b,
+ 0x2c3c,
+ 0x2c3d,
+ 0x2c3e,
+ 0x2c3f,
+ 0x2c40,
+ 0x2c41,
+ 0x2c42,
+ 0x2c43,
+ 0x2c44,
+ 0x2c45,
+ 0x2c46,
+ 0x2c47,
+ 0x2c48,
+ 0x2c49,
+ 0x2c4a,
+ 0x2c4b,
+ 0x2c4c,
+ 0x2c4d,
+ 0x2c4e,
+ 0x2c4f,
+ 0x2c50,
+ 0x2c51,
+ 0x2c52,
+ 0x2c53,
+ 0x2c54,
+ 0x2c55,
+ 0x2c56,
+ 0x2c57,
+ 0x2c58,
+ 0x2c59,
+ 0x2c5a,
+ 0x2c5b,
+ 0x2c5c,
+ 0x2c5d,
+ 0x2c5e,
+ 0x2c81,
+ 0x2c83,
+ 0x2c85,
+ 0x2c87,
+ 0x2c89,
+ 0x2c8b,
+ 0x2c8d,
+ 0x2c8f,
+ 0x2c91,
+ 0x2c93,
+ 0x2c95,
+ 0x2c97,
+ 0x2c99,
+ 0x2c9b,
+ 0x2c9d,
+ 0x2c9f,
+ 0x2ca1,
+ 0x2ca3,
+ 0x2ca5,
+ 0x2ca7,
+ 0x2ca9,
+ 0x2cab,
+ 0x2cad,
+ 0x2caf,
+ 0x2cb1,
+ 0x2cb3,
+ 0x2cb5,
+ 0x2cb7,
+ 0x2cb9,
+ 0x2cbb,
+ 0x2cbd,
+ 0x2cbf,
+ 0x2cc1,
+ 0x2cc3,
+ 0x2cc5,
+ 0x2cc7,
+ 0x2cc9,
+ 0x2ccb,
+ 0x2ccd,
+ 0x2ccf,
+ 0x2cd1,
+ 0x2cd3,
+ 0x2cd5,
+ 0x2cd7,
+ 0x2cd9,
+ 0x2cdb,
+ 0x2cdd,
+ 0x2cdf,
+ 0x2ce1,
+ 0x2ce3,
+ 0x2d00,
+ 0x2d01,
+ 0x2d02,
+ 0x2d03,
+ 0x2d04,
+ 0x2d05,
+ 0x2d06,
+ 0x2d07,
+ 0x2d08,
+ 0x2d09,
+ 0x2d0a,
+ 0x2d0b,
+ 0x2d0c,
+ 0x2d0d,
+ 0x2d0e,
+ 0x2d0f,
+ 0x2d10,
+ 0x2d11,
+ 0x2d12,
+ 0x2d13,
+ 0x2d14,
+ 0x2d15,
+ 0x2d16,
+ 0x2d17,
+ 0x2d18,
+ 0x2d19,
+ 0x2d1a,
+ 0x2d1b,
+ 0x2d1c,
+ 0x2d1d,
+ 0x2d1e,
+ 0x2d1f,
+ 0x2d20,
+ 0x2d21,
+ 0x2d22,
+ 0x2d23,
+ 0x2d24,
+ 0x2d25,
+ 0xff41,
+ 0xff42,
+ 0xff43,
+ 0xff44,
+ 0xff45,
+ 0xff46,
+ 0xff47,
+ 0xff48,
+ 0xff49,
+ 0xff4a,
+ 0xff4b,
+ 0xff4c,
+ 0xff4d,
+ 0xff4e,
+ 0xff4f,
+ 0xff50,
+ 0xff51,
+ 0xff52,
+ 0xff53,
+ 0xff54,
+ 0xff55,
+ 0xff56,
+ 0xff57,
+ 0xff58,
+ 0xff59,
+ 0xff5a,
+ 0x10428,
+ 0x10429,
+ 0x1042a,
+ 0x1042b,
+ 0x1042c,
+ 0x1042d,
+ 0x1042e,
+ 0x1042f,
+ 0x10430,
+ 0x10431,
+ 0x10432,
+ 0x10433,
+ 0x10434,
+ 0x10435,
+ 0x10436,
+ 0x10437,
+ 0x10438,
+ 0x10439,
+ 0x1043a,
+ 0x1043b,
+ 0x1043c,
+ 0x1043d,
+ 0x1043e,
+ 0x1043f,
+ 0x10440,
+ 0x10441,
+ 0x10442,
+ 0x10443,
+ 0x10444,
+ 0x10445,
+ 0x10446,
+ 0x10447,
+ 0x10448,
+ 0x10449,
+ 0x1044a,
+ 0x1044b,
+ 0x1044c,
+ 0x1044d,
+ 0x1044e,
+ 0x1044f
+ };
+
+ static final int CaseUnfold_11_To[][] = new int[][] {
+ {0x0041},
+ {0x0042},
+ {0x0043},
+ {0x0044},
+ {0x0045},
+ {0x0046},
+ {0x0047},
+ {0x0048},
+ {0x004a},
+ {0x212a, 0x004b},
+ {0x004c},
+ {0x004d},
+ {0x004e},
+ {0x004f},
+ {0x0050},
+ {0x0051},
+ {0x0052},
+ {0x0053, 0x017f},
+ {0x0054},
+ {0x0055},
+ {0x0056},
+ {0x0057},
+ {0x0058},
+ {0x0059},
+ {0x005a},
+ {0x00c0},
+ {0x00c1},
+ {0x00c2},
+ {0x00c3},
+ {0x00c4},
+ {0x212b, 0x00c5},
+ {0x00c6},
+ {0x00c7},
+ {0x00c8},
+ {0x00c9},
+ {0x00ca},
+ {0x00cb},
+ {0x00cc},
+ {0x00cd},
+ {0x00ce},
+ {0x00cf},
+ {0x00d0},
+ {0x00d1},
+ {0x00d2},
+ {0x00d3},
+ {0x00d4},
+ {0x00d5},
+ {0x00d6},
+ {0x00d8},
+ {0x00d9},
+ {0x00da},
+ {0x00db},
+ {0x00dc},
+ {0x00dd},
+ {0x00de},
+ {0x0178},
+ {0x0100},
+ {0x0102},
+ {0x0104},
+ {0x0106},
+ {0x0108},
+ {0x010a},
+ {0x010c},
+ {0x010e},
+ {0x0110},
+ {0x0112},
+ {0x0114},
+ {0x0116},
+ {0x0118},
+ {0x011a},
+ {0x011c},
+ {0x011e},
+ {0x0120},
+ {0x0122},
+ {0x0124},
+ {0x0126},
+ {0x0128},
+ {0x012a},
+ {0x012c},
+ {0x012e},
+ {0x0132},
+ {0x0134},
+ {0x0136},
+ {0x0139},
+ {0x013b},
+ {0x013d},
+ {0x013f},
+ {0x0141},
+ {0x0143},
+ {0x0145},
+ {0x0147},
+ {0x014a},
+ {0x014c},
+ {0x014e},
+ {0x0150},
+ {0x0152},
+ {0x0154},
+ {0x0156},
+ {0x0158},
+ {0x015a},
+ {0x015c},
+ {0x015e},
+ {0x0160},
+ {0x0162},
+ {0x0164},
+ {0x0166},
+ {0x0168},
+ {0x016a},
+ {0x016c},
+ {0x016e},
+ {0x0170},
+ {0x0172},
+ {0x0174},
+ {0x0176},
+ {0x0179},
+ {0x017b},
+ {0x017d},
+ {0x0182},
+ {0x0184},
+ {0x0187},
+ {0x018b},
+ {0x0191},
+ {0x01f6},
+ {0x0198},
+ {0x023d},
+ {0x0220},
+ {0x01a0},
+ {0x01a2},
+ {0x01a4},
+ {0x01a7},
+ {0x01ac},
+ {0x01af},
+ {0x01b3},
+ {0x01b5},
+ {0x01b8},
+ {0x01bc},
+ {0x01f7},
+ {0x01c4, 0x01c5},
+ {0x01c7, 0x01c8},
+ {0x01ca, 0x01cb},
+ {0x01cd},
+ {0x01cf},
+ {0x01d1},
+ {0x01d3},
+ {0x01d5},
+ {0x01d7},
+ {0x01d9},
+ {0x01db},
+ {0x018e},
+ {0x01de},
+ {0x01e0},
+ {0x01e2},
+ {0x01e4},
+ {0x01e6},
+ {0x01e8},
+ {0x01ea},
+ {0x01ec},
+ {0x01ee},
+ {0x01f1, 0x01f2},
+ {0x01f4},
+ {0x01f8},
+ {0x01fa},
+ {0x01fc},
+ {0x01fe},
+ {0x0200},
+ {0x0202},
+ {0x0204},
+ {0x0206},
+ {0x0208},
+ {0x020a},
+ {0x020c},
+ {0x020e},
+ {0x0210},
+ {0x0212},
+ {0x0214},
+ {0x0216},
+ {0x0218},
+ {0x021a},
+ {0x021c},
+ {0x021e},
+ {0x0222},
+ {0x0224},
+ {0x0226},
+ {0x0228},
+ {0x022a},
+ {0x022c},
+ {0x022e},
+ {0x0230},
+ {0x0232},
+ {0x023b},
+ {0x0181},
+ {0x0186},
+ {0x0189},
+ {0x018a},
+ {0x018f},
+ {0x0190},
+ {0x0193},
+ {0x0194},
+ {0x0197},
+ {0x0196},
+ {0x019c},
+ {0x019d},
+ {0x019f},
+ {0x01a6},
+ {0x01a9},
+ {0x01ae},
+ {0x01b1},
+ {0x01b2},
+ {0x01b7},
+ {0x0241},
+ {0x0386},
+ {0x0388},
+ {0x0389},
+ {0x038a},
+ {0x0391},
+ {0x0392, 0x03d0},
+ {0x0393},
+ {0x0394},
+ {0x03f5, 0x0395},
+ {0x0396},
+ {0x0397},
+ {0x03f4, 0x0398, 0x03d1},
+ {0x1fbe, 0x0399, 0x0345},
+ {0x03f0, 0x039a},
+ {0x039b},
+ {0x00b5, 0x039c},
+ {0x039d},
+ {0x039e},
+ {0x039f},
+ {0x03a0, 0x03d6},
+ {0x03f1, 0x03a1},
+ {0x03a3, 0x03c2},
+ {0x03a4},
+ {0x03a5},
+ {0x03a6, 0x03d5},
+ {0x03a7},
+ {0x03a8},
+ {0x03a9, 0x2126},
+ {0x03aa},
+ {0x03ab},
+ {0x038c},
+ {0x038e},
+ {0x038f},
+ {0x03d8},
+ {0x03da},
+ {0x03dc},
+ {0x03de},
+ {0x03e0},
+ {0x03e2},
+ {0x03e4},
+ {0x03e6},
+ {0x03e8},
+ {0x03ea},
+ {0x03ec},
+ {0x03ee},
+ {0x03f9},
+ {0x03f7},
+ {0x03fa},
+ {0x0410},
+ {0x0411},
+ {0x0412},
+ {0x0413},
+ {0x0414},
+ {0x0415},
+ {0x0416},
+ {0x0417},
+ {0x0418},
+ {0x0419},
+ {0x041a},
+ {0x041b},
+ {0x041c},
+ {0x041d},
+ {0x041e},
+ {0x041f},
+ {0x0420},
+ {0x0421},
+ {0x0422},
+ {0x0423},
+ {0x0424},
+ {0x0425},
+ {0x0426},
+ {0x0427},
+ {0x0428},
+ {0x0429},
+ {0x042a},
+ {0x042b},
+ {0x042c},
+ {0x042d},
+ {0x042e},
+ {0x042f},
+ {0x0400},
+ {0x0401},
+ {0x0402},
+ {0x0403},
+ {0x0404},
+ {0x0405},
+ {0x0406},
+ {0x0407},
+ {0x0408},
+ {0x0409},
+ {0x040a},
+ {0x040b},
+ {0x040c},
+ {0x040d},
+ {0x040e},
+ {0x040f},
+ {0x0460},
+ {0x0462},
+ {0x0464},
+ {0x0466},
+ {0x0468},
+ {0x046a},
+ {0x046c},
+ {0x046e},
+ {0x0470},
+ {0x0472},
+ {0x0474},
+ {0x0476},
+ {0x0478},
+ {0x047a},
+ {0x047c},
+ {0x047e},
+ {0x0480},
+ {0x048a},
+ {0x048c},
+ {0x048e},
+ {0x0490},
+ {0x0492},
+ {0x0494},
+ {0x0496},
+ {0x0498},
+ {0x049a},
+ {0x049c},
+ {0x049e},
+ {0x04a0},
+ {0x04a2},
+ {0x04a4},
+ {0x04a6},
+ {0x04a8},
+ {0x04aa},
+ {0x04ac},
+ {0x04ae},
+ {0x04b0},
+ {0x04b2},
+ {0x04b4},
+ {0x04b6},
+ {0x04b8},
+ {0x04ba},
+ {0x04bc},
+ {0x04be},
+ {0x04c1},
+ {0x04c3},
+ {0x04c5},
+ {0x04c7},
+ {0x04c9},
+ {0x04cb},
+ {0x04cd},
+ {0x04d0},
+ {0x04d2},
+ {0x04d4},
+ {0x04d6},
+ {0x04d8},
+ {0x04da},
+ {0x04dc},
+ {0x04de},
+ {0x04e0},
+ {0x04e2},
+ {0x04e4},
+ {0x04e6},
+ {0x04e8},
+ {0x04ea},
+ {0x04ec},
+ {0x04ee},
+ {0x04f0},
+ {0x04f2},
+ {0x04f4},
+ {0x04f6},
+ {0x04f8},
+ {0x0500},
+ {0x0502},
+ {0x0504},
+ {0x0506},
+ {0x0508},
+ {0x050a},
+ {0x050c},
+ {0x050e},
+ {0x0531},
+ {0x0532},
+ {0x0533},
+ {0x0534},
+ {0x0535},
+ {0x0536},
+ {0x0537},
+ {0x0538},
+ {0x0539},
+ {0x053a},
+ {0x053b},
+ {0x053c},
+ {0x053d},
+ {0x053e},
+ {0x053f},
+ {0x0540},
+ {0x0541},
+ {0x0542},
+ {0x0543},
+ {0x0544},
+ {0x0545},
+ {0x0546},
+ {0x0547},
+ {0x0548},
+ {0x0549},
+ {0x054a},
+ {0x054b},
+ {0x054c},
+ {0x054d},
+ {0x054e},
+ {0x054f},
+ {0x0550},
+ {0x0551},
+ {0x0552},
+ {0x0553},
+ {0x0554},
+ {0x0555},
+ {0x0556},
+ {0x1e00},
+ {0x1e02},
+ {0x1e04},
+ {0x1e06},
+ {0x1e08},
+ {0x1e0a},
+ {0x1e0c},
+ {0x1e0e},
+ {0x1e10},
+ {0x1e12},
+ {0x1e14},
+ {0x1e16},
+ {0x1e18},
+ {0x1e1a},
+ {0x1e1c},
+ {0x1e1e},
+ {0x1e20},
+ {0x1e22},
+ {0x1e24},
+ {0x1e26},
+ {0x1e28},
+ {0x1e2a},
+ {0x1e2c},
+ {0x1e2e},
+ {0x1e30},
+ {0x1e32},
+ {0x1e34},
+ {0x1e36},
+ {0x1e38},
+ {0x1e3a},
+ {0x1e3c},
+ {0x1e3e},
+ {0x1e40},
+ {0x1e42},
+ {0x1e44},
+ {0x1e46},
+ {0x1e48},
+ {0x1e4a},
+ {0x1e4c},
+ {0x1e4e},
+ {0x1e50},
+ {0x1e52},
+ {0x1e54},
+ {0x1e56},
+ {0x1e58},
+ {0x1e5a},
+ {0x1e5c},
+ {0x1e5e},
+ {0x1e9b, 0x1e60},
+ {0x1e62},
+ {0x1e64},
+ {0x1e66},
+ {0x1e68},
+ {0x1e6a},
+ {0x1e6c},
+ {0x1e6e},
+ {0x1e70},
+ {0x1e72},
+ {0x1e74},
+ {0x1e76},
+ {0x1e78},
+ {0x1e7a},
+ {0x1e7c},
+ {0x1e7e},
+ {0x1e80},
+ {0x1e82},
+ {0x1e84},
+ {0x1e86},
+ {0x1e88},
+ {0x1e8a},
+ {0x1e8c},
+ {0x1e8e},
+ {0x1e90},
+ {0x1e92},
+ {0x1e94},
+ {0x1ea0},
+ {0x1ea2},
+ {0x1ea4},
+ {0x1ea6},
+ {0x1ea8},
+ {0x1eaa},
+ {0x1eac},
+ {0x1eae},
+ {0x1eb0},
+ {0x1eb2},
+ {0x1eb4},
+ {0x1eb6},
+ {0x1eb8},
+ {0x1eba},
+ {0x1ebc},
+ {0x1ebe},
+ {0x1ec0},
+ {0x1ec2},
+ {0x1ec4},
+ {0x1ec6},
+ {0x1ec8},
+ {0x1eca},
+ {0x1ecc},
+ {0x1ece},
+ {0x1ed0},
+ {0x1ed2},
+ {0x1ed4},
+ {0x1ed6},
+ {0x1ed8},
+ {0x1eda},
+ {0x1edc},
+ {0x1ede},
+ {0x1ee0},
+ {0x1ee2},
+ {0x1ee4},
+ {0x1ee6},
+ {0x1ee8},
+ {0x1eea},
+ {0x1eec},
+ {0x1eee},
+ {0x1ef0},
+ {0x1ef2},
+ {0x1ef4},
+ {0x1ef6},
+ {0x1ef8},
+ {0x1f08},
+ {0x1f09},
+ {0x1f0a},
+ {0x1f0b},
+ {0x1f0c},
+ {0x1f0d},
+ {0x1f0e},
+ {0x1f0f},
+ {0x1f18},
+ {0x1f19},
+ {0x1f1a},
+ {0x1f1b},
+ {0x1f1c},
+ {0x1f1d},
+ {0x1f28},
+ {0x1f29},
+ {0x1f2a},
+ {0x1f2b},
+ {0x1f2c},
+ {0x1f2d},
+ {0x1f2e},
+ {0x1f2f},
+ {0x1f38},
+ {0x1f39},
+ {0x1f3a},
+ {0x1f3b},
+ {0x1f3c},
+ {0x1f3d},
+ {0x1f3e},
+ {0x1f3f},
+ {0x1f48},
+ {0x1f49},
+ {0x1f4a},
+ {0x1f4b},
+ {0x1f4c},
+ {0x1f4d},
+ {0x1f59},
+ {0x1f5b},
+ {0x1f5d},
+ {0x1f5f},
+ {0x1f68},
+ {0x1f69},
+ {0x1f6a},
+ {0x1f6b},
+ {0x1f6c},
+ {0x1f6d},
+ {0x1f6e},
+ {0x1f6f},
+ {0x1fba},
+ {0x1fbb},
+ {0x1fc8},
+ {0x1fc9},
+ {0x1fca},
+ {0x1fcb},
+ {0x1fda},
+ {0x1fdb},
+ {0x1ff8},
+ {0x1ff9},
+ {0x1fea},
+ {0x1feb},
+ {0x1ffa},
+ {0x1ffb},
+ {0x1fb8},
+ {0x1fb9},
+ {0x1fd8},
+ {0x1fd9},
+ {0x1fe8},
+ {0x1fe9},
+ {0x1fec},
+ {0x2160},
+ {0x2161},
+ {0x2162},
+ {0x2163},
+ {0x2164},
+ {0x2165},
+ {0x2166},
+ {0x2167},
+ {0x2168},
+ {0x2169},
+ {0x216a},
+ {0x216b},
+ {0x216c},
+ {0x216d},
+ {0x216e},
+ {0x216f},
+ {0x24b6},
+ {0x24b7},
+ {0x24b8},
+ {0x24b9},
+ {0x24ba},
+ {0x24bb},
+ {0x24bc},
+ {0x24bd},
+ {0x24be},
+ {0x24bf},
+ {0x24c0},
+ {0x24c1},
+ {0x24c2},
+ {0x24c3},
+ {0x24c4},
+ {0x24c5},
+ {0x24c6},
+ {0x24c7},
+ {0x24c8},
+ {0x24c9},
+ {0x24ca},
+ {0x24cb},
+ {0x24cc},
+ {0x24cd},
+ {0x24ce},
+ {0x24cf},
+ {0x2c00},
+ {0x2c01},
+ {0x2c02},
+ {0x2c03},
+ {0x2c04},
+ {0x2c05},
+ {0x2c06},
+ {0x2c07},
+ {0x2c08},
+ {0x2c09},
+ {0x2c0a},
+ {0x2c0b},
+ {0x2c0c},
+ {0x2c0d},
+ {0x2c0e},
+ {0x2c0f},
+ {0x2c10},
+ {0x2c11},
+ {0x2c12},
+ {0x2c13},
+ {0x2c14},
+ {0x2c15},
+ {0x2c16},
+ {0x2c17},
+ {0x2c18},
+ {0x2c19},
+ {0x2c1a},
+ {0x2c1b},
+ {0x2c1c},
+ {0x2c1d},
+ {0x2c1e},
+ {0x2c1f},
+ {0x2c20},
+ {0x2c21},
+ {0x2c22},
+ {0x2c23},
+ {0x2c24},
+ {0x2c25},
+ {0x2c26},
+ {0x2c27},
+ {0x2c28},
+ {0x2c29},
+ {0x2c2a},
+ {0x2c2b},
+ {0x2c2c},
+ {0x2c2d},
+ {0x2c2e},
+ {0x2c80},
+ {0x2c82},
+ {0x2c84},
+ {0x2c86},
+ {0x2c88},
+ {0x2c8a},
+ {0x2c8c},
+ {0x2c8e},
+ {0x2c90},
+ {0x2c92},
+ {0x2c94},
+ {0x2c96},
+ {0x2c98},
+ {0x2c9a},
+ {0x2c9c},
+ {0x2c9e},
+ {0x2ca0},
+ {0x2ca2},
+ {0x2ca4},
+ {0x2ca6},
+ {0x2ca8},
+ {0x2caa},
+ {0x2cac},
+ {0x2cae},
+ {0x2cb0},
+ {0x2cb2},
+ {0x2cb4},
+ {0x2cb6},
+ {0x2cb8},
+ {0x2cba},
+ {0x2cbc},
+ {0x2cbe},
+ {0x2cc0},
+ {0x2cc2},
+ {0x2cc4},
+ {0x2cc6},
+ {0x2cc8},
+ {0x2cca},
+ {0x2ccc},
+ {0x2cce},
+ {0x2cd0},
+ {0x2cd2},
+ {0x2cd4},
+ {0x2cd6},
+ {0x2cd8},
+ {0x2cda},
+ {0x2cdc},
+ {0x2cde},
+ {0x2ce0},
+ {0x2ce2},
+ {0x10a0},
+ {0x10a1},
+ {0x10a2},
+ {0x10a3},
+ {0x10a4},
+ {0x10a5},
+ {0x10a6},
+ {0x10a7},
+ {0x10a8},
+ {0x10a9},
+ {0x10aa},
+ {0x10ab},
+ {0x10ac},
+ {0x10ad},
+ {0x10ae},
+ {0x10af},
+ {0x10b0},
+ {0x10b1},
+ {0x10b2},
+ {0x10b3},
+ {0x10b4},
+ {0x10b5},
+ {0x10b6},
+ {0x10b7},
+ {0x10b8},
+ {0x10b9},
+ {0x10ba},
+ {0x10bb},
+ {0x10bc},
+ {0x10bd},
+ {0x10be},
+ {0x10bf},
+ {0x10c0},
+ {0x10c1},
+ {0x10c2},
+ {0x10c3},
+ {0x10c4},
+ {0x10c5},
+ {0xff21},
+ {0xff22},
+ {0xff23},
+ {0xff24},
+ {0xff25},
+ {0xff26},
+ {0xff27},
+ {0xff28},
+ {0xff29},
+ {0xff2a},
+ {0xff2b},
+ {0xff2c},
+ {0xff2d},
+ {0xff2e},
+ {0xff2f},
+ {0xff30},
+ {0xff31},
+ {0xff32},
+ {0xff33},
+ {0xff34},
+ {0xff35},
+ {0xff36},
+ {0xff37},
+ {0xff38},
+ {0xff39},
+ {0xff3a},
+ {0x10400},
+ {0x10401},
+ {0x10402},
+ {0x10403},
+ {0x10404},
+ {0x10405},
+ {0x10406},
+ {0x10407},
+ {0x10408},
+ {0x10409},
+ {0x1040a},
+ {0x1040b},
+ {0x1040c},
+ {0x1040d},
+ {0x1040e},
+ {0x1040f},
+ {0x10410},
+ {0x10411},
+ {0x10412},
+ {0x10413},
+ {0x10414},
+ {0x10415},
+ {0x10416},
+ {0x10417},
+ {0x10418},
+ {0x10419},
+ {0x1041a},
+ {0x1041b},
+ {0x1041c},
+ {0x1041d},
+ {0x1041e},
+ {0x1041f},
+ {0x10420},
+ {0x10421},
+ {0x10422},
+ {0x10423},
+ {0x10424},
+ {0x10425},
+ {0x10426},
+ {0x10427}
+ };
+
+ static final int CaseUnfold_11_Locale_From[] = new int[] {
+ 0x0069
+ };
+
+ static final int CaseUnfold_11_Locale_To[][]= new int[][] {
+ {0x0049}
+ };
+
+ static final int CaseUnfold_12[][] = new int[][] {
+ {0x0061, 0x02be}, {0x1e9a},
+ {0x0066, 0x0066}, {0xfb00},
+ {0x0066, 0x0069}, {0xfb01},
+ {0x0066, 0x006c}, {0xfb02},
+ {0x0068, 0x0331}, {0x1e96},
+ {0x006a, 0x030c}, {0x01f0},
+ {0x0073, 0x0073}, {0x00df},
+ {0x0073, 0x0074}, {0xfb05, 0xfb06},
+ {0x0074, 0x0308}, {0x1e97},
+ {0x0077, 0x030a}, {0x1e98},
+ {0x0079, 0x030a}, {0x1e99},
+ {0x02bc, 0x006e}, {0x0149},
+ {0x03ac, 0x03b9}, {0x1fb4},
+ {0x03ae, 0x03b9}, {0x1fc4},
+ {0x03b1, 0x0342}, {0x1fb6},
+ {0x03b1, 0x03b9}, {0x1fb3, 0x1fbc},
+ {0x03b7, 0x0342}, {0x1fc6},
+ {0x03b7, 0x03b9}, {0x1fc3, 0x1fcc},
+ {0x03b9, 0x0342}, {0x1fd6},
+ {0x03c1, 0x0313}, {0x1fe4},
+ {0x03c5, 0x0313}, {0x1f50},
+ {0x03c5, 0x0342}, {0x1fe6},
+ {0x03c9, 0x0342}, {0x1ff6},
+ {0x03c9, 0x03b9}, {0x1ff3, 0x1ffc},
+ {0x03ce, 0x03b9}, {0x1ff4},
+ {0x0565, 0x0582}, {0x0587},
+ {0x0574, 0x0565}, {0xfb14},
+ {0x0574, 0x056b}, {0xfb15},
+ {0x0574, 0x056d}, {0xfb17},
+ {0x0574, 0x0576}, {0xfb13},
+ {0x057e, 0x0576}, {0xfb16},
+ {0x1f00, 0x03b9}, {0x1f88, 0x1f80},
+ {0x1f01, 0x03b9}, {0x1f81, 0x1f89},
+ {0x1f02, 0x03b9}, {0x1f82, 0x1f8a},
+ {0x1f03, 0x03b9}, {0x1f83, 0x1f8b},
+ {0x1f04, 0x03b9}, {0x1f84, 0x1f8c},
+ {0x1f05, 0x03b9}, {0x1f85, 0x1f8d},
+ {0x1f06, 0x03b9}, {0x1f86, 0x1f8e},
+ {0x1f07, 0x03b9}, {0x1f87, 0x1f8f},
+ {0x1f20, 0x03b9}, {0x1f90, 0x1f98},
+ {0x1f21, 0x03b9}, {0x1f91, 0x1f99},
+ {0x1f22, 0x03b9}, {0x1f92, 0x1f9a},
+ {0x1f23, 0x03b9}, {0x1f93, 0x1f9b},
+ {0x1f24, 0x03b9}, {0x1f94, 0x1f9c},
+ {0x1f25, 0x03b9}, {0x1f95, 0x1f9d},
+ {0x1f26, 0x03b9}, {0x1f96, 0x1f9e},
+ {0x1f27, 0x03b9}, {0x1f97, 0x1f9f},
+ {0x1f60, 0x03b9}, {0x1fa0, 0x1fa8},
+ {0x1f61, 0x03b9}, {0x1fa1, 0x1fa9},
+ {0x1f62, 0x03b9}, {0x1fa2, 0x1faa},
+ {0x1f63, 0x03b9}, {0x1fa3, 0x1fab},
+ {0x1f64, 0x03b9}, {0x1fa4, 0x1fac},
+ {0x1f65, 0x03b9}, {0x1fa5, 0x1fad},
+ {0x1f66, 0x03b9}, {0x1fa6, 0x1fae},
+ {0x1f67, 0x03b9}, {0x1fa7, 0x1faf},
+ {0x1f70, 0x03b9}, {0x1fb2},
+ {0x1f74, 0x03b9}, {0x1fc2},
+ {0x1f7c, 0x03b9}, {0x1ff2}
+ };
+
+ static final int CaseUnfold_12_Locale[][] = new int[][] {
+ {0x0069, 0x0307}, {0x0130}
+ };
+
+ static final int CaseUnfold_13[][] = new int[][] {
+ {0x0066, 0x0066, 0x0069}, {0xfb03},
+ {0x0066, 0x0066, 0x006c}, {0xfb04},
+ {0x03b1, 0x0342, 0x03b9}, {0x1fb7},
+ {0x03b7, 0x0342, 0x03b9}, {0x1fc7},
+ {0x03b9, 0x0308, 0x0300}, {0x1fd2},
+ {0x03b9, 0x0308, 0x0301}, {0x0390, 0x1fd3},
+ {0x03b9, 0x0308, 0x0342}, {0x1fd7},
+ {0x03c5, 0x0308, 0x0300}, {0x1fe2},
+ {0x03c5, 0x0308, 0x0301}, {0x03b0, 0x1fe3},
+ {0x03c5, 0x0308, 0x0342}, {0x1fe7},
+ {0x03c5, 0x0313, 0x0300}, {0x1f52},
+ {0x03c5, 0x0313, 0x0301}, {0x1f54},
+ {0x03c5, 0x0313, 0x0342}, {0x1f56},
+ {0x03c9, 0x0342, 0x03b9}, {0x1ff7}
+ };
+}
diff --git a/src/org/joni/encoding/unicode/UnicodeCodeRanges.java b/src/org/joni/encoding/unicode/UnicodeCodeRanges.java
new file mode 100644
index 0000000..987199d
--- /dev/null
+++ b/src/org/joni/encoding/unicode/UnicodeCodeRanges.java
@@ -0,0 +1,3529 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.unicode;
+
+import org.joni.Config;
+
+public class UnicodeCodeRanges {
+
+ public static void initializeCodeRanges() {
+ if (Config.USE_UNICODE_PROPERTIES) {
+ CodeRangeTable = new int[][] {
+ UnicodeCodeRanges.CR_NEWLINE,
+ UnicodeCodeRanges.CR_Alpha,
+ UnicodeCodeRanges.CR_Blank,
+ UnicodeCodeRanges.CR_Cntrl,
+ UnicodeCodeRanges.CR_Digit,
+ UnicodeCodeRanges.CR_Graph,
+ UnicodeCodeRanges.CR_Lower,
+ UnicodeCodeRanges.CR_Print,
+ UnicodeCodeRanges.CR_Punct,
+ UnicodeCodeRanges.CR_Space,
+ UnicodeCodeRanges.CR_Upper,
+ UnicodeCodeRanges.CR_XDigit,
+ UnicodeCodeRanges.CR_Word,
+ UnicodeCodeRanges.CR_Alnum,
+ UnicodeCodeRanges.CR_ASCII,
+
+ // unicode properties
+ UnicodeProperties.CR_Any,
+ UnicodeProperties.CR_Assigned,
+ UnicodeProperties.CR_C,
+ UnicodeProperties.CR_Cc,
+ UnicodeProperties.CR_Cf,
+ UnicodeProperties.CR_Cn,
+ UnicodeProperties.CR_Co,
+ UnicodeProperties.CR_Cs,
+ UnicodeProperties.CR_L,
+ UnicodeProperties.CR_Ll,
+ UnicodeProperties.CR_Lm,
+ UnicodeProperties.CR_Lo,
+ UnicodeProperties.CR_Lt,
+ UnicodeProperties.CR_Lu,
+ UnicodeProperties.CR_M,
+ UnicodeProperties.CR_Mc,
+ UnicodeProperties.CR_Me,
+ UnicodeProperties.CR_Mn,
+ UnicodeProperties.CR_N,
+ UnicodeProperties.CR_Nd,
+ UnicodeProperties.CR_Nl,
+ UnicodeProperties.CR_No,
+ UnicodeProperties.CR_P,
+ UnicodeProperties.CR_Pc,
+ UnicodeProperties.CR_Pd,
+ UnicodeProperties.CR_Pe,
+ UnicodeProperties.CR_Pf,
+ UnicodeProperties.CR_Pi,
+ UnicodeProperties.CR_Po,
+ UnicodeProperties.CR_Ps,
+ UnicodeProperties.CR_S,
+ UnicodeProperties.CR_Sc,
+ UnicodeProperties.CR_Sk,
+ UnicodeProperties.CR_Sm,
+ UnicodeProperties.CR_So,
+ UnicodeProperties.CR_Z,
+ UnicodeProperties.CR_Zl,
+ UnicodeProperties.CR_Zp,
+ UnicodeProperties.CR_Zs,
+ UnicodePropertiesScripts.CR_Arabic,
+ UnicodePropertiesScripts.CR_Armenian,
+ UnicodePropertiesScripts.CR_Bengali,
+ UnicodePropertiesScripts.CR_Bopomofo,
+ UnicodePropertiesScripts.CR_Braille,
+ UnicodePropertiesScripts.CR_Buginese,
+ UnicodePropertiesScripts.CR_Buhid,
+ UnicodePropertiesScripts.CR_Canadian_Aboriginal,
+ UnicodePropertiesScripts.CR_Cherokee,
+ UnicodePropertiesScripts.CR_Common,
+ UnicodePropertiesScripts.CR_Coptic,
+ UnicodePropertiesScripts.CR_Cypriot,
+ UnicodePropertiesScripts.CR_Cyrillic,
+ UnicodePropertiesScripts.CR_Deseret,
+ UnicodePropertiesScripts.CR_Devanagari,
+ UnicodePropertiesScripts.CR_Ethiopic,
+ UnicodePropertiesScripts.CR_Georgian,
+ UnicodePropertiesScripts.CR_Glagolitic,
+ UnicodePropertiesScripts.CR_Gothic,
+ UnicodePropertiesScripts.CR_Greek,
+ UnicodePropertiesScripts.CR_Gujarati,
+ UnicodePropertiesScripts.CR_Gurmukhi,
+ UnicodePropertiesScripts.CR_Han,
+ UnicodePropertiesScripts.CR_Hangul,
+ UnicodePropertiesScripts.CR_Hanunoo,
+ UnicodePropertiesScripts.CR_Hebrew,
+ UnicodePropertiesScripts.CR_Hiragana,
+ UnicodePropertiesScripts.CR_Inherited,
+ UnicodePropertiesScripts.CR_Kannada,
+ UnicodePropertiesScripts.CR_Katakana,
+ UnicodePropertiesScripts.CR_Kharoshthi,
+ UnicodePropertiesScripts.CR_Khmer,
+ UnicodePropertiesScripts.CR_Lao,
+ UnicodePropertiesScripts.CR_Latin,
+ UnicodePropertiesScripts.CR_Limbu,
+ UnicodePropertiesScripts.CR_Linear_B,
+ UnicodePropertiesScripts.CR_Malayalam,
+ UnicodePropertiesScripts.CR_Mongolian,
+ UnicodePropertiesScripts.CR_Myanmar,
+ UnicodePropertiesScripts.CR_New_Tai_Lue,
+ UnicodePropertiesScripts.CR_Ogham,
+ UnicodePropertiesScripts.CR_Old_Italic,
+ UnicodePropertiesScripts.CR_Old_Persian,
+ UnicodePropertiesScripts.CR_Oriya,
+ UnicodePropertiesScripts.CR_Osmanya,
+ UnicodePropertiesScripts.CR_Runic,
+ UnicodePropertiesScripts.CR_Shavian,
+ UnicodePropertiesScripts.CR_Sinhala,
+ UnicodePropertiesScripts.CR_Syloti_Nagri,
+ UnicodePropertiesScripts.CR_Syriac,
+ UnicodePropertiesScripts.CR_Tagalog,
+ UnicodePropertiesScripts.CR_Tagbanwa,
+ UnicodePropertiesScripts.CR_Tai_Le,
+ UnicodePropertiesScripts.CR_Tamil,
+ UnicodePropertiesScripts.CR_Telugu,
+ UnicodePropertiesScripts.CR_Thaana,
+ UnicodePropertiesScripts.CR_Thai,
+ UnicodePropertiesScripts.CR_Tibetan,
+ UnicodePropertiesScripts.CR_Tifinagh,
+ UnicodePropertiesScripts.CR_Ugaritic,
+ UnicodePropertiesScripts.CR_Yi,
+ };
+ } else {
+ CodeRangeTable = new int[][] {
+ UnicodeCodeRanges.CR_NEWLINE,
+ UnicodeCodeRanges.CR_Alpha,
+ UnicodeCodeRanges.CR_Blank,
+ UnicodeCodeRanges.CR_Cntrl,
+ UnicodeCodeRanges.CR_Digit,
+ UnicodeCodeRanges.CR_Graph,
+ UnicodeCodeRanges.CR_Lower,
+ UnicodeCodeRanges.CR_Print,
+ UnicodeCodeRanges.CR_Punct,
+ UnicodeCodeRanges.CR_Space,
+ UnicodeCodeRanges.CR_Upper,
+ UnicodeCodeRanges.CR_XDigit,
+ UnicodeCodeRanges.CR_Word,
+ UnicodeCodeRanges.CR_Alnum,
+ UnicodeCodeRanges.CR_ASCII,
+ };
+ }
+ }
+
+ static int[][]CodeRangeTable;
+
+ /* 'NEWLINE' */
+ static final int CR_NEWLINE[] = {
+ 1,
+ 0x000a, 0x000a
+ }; /* CR_NEWLINE */
+
+ /* 'Alpha': [[:Alpha:]] */
+ static final int CR_Alpha[] = {
+ 418,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0241,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x065e,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x076d,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x097d, 0x097d,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09f0, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a70, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x135f,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x180b, 0x180d,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19b0, 0x19c9,
+ 0x1a00, 0x1a1b,
+ 0x1d00, 0x1dc3,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x20d0, 0x20eb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c80, 0x2ce4,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fbb,
+ 0xa000, 0xa48c,
+ 0xa800, 0xa827,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a3f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+ }; /* CR_Alpha */
+
+ /* 'Blank': [[:Blank:]] */
+ static final int CR_Blank[] = {
+ 9,
+ 0x0009, 0x0009,
+ 0x0020, 0x0020,
+ 0x00a0, 0x00a0,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+ }; /* CR_Blank */
+
+ /* 'Cntrl': [[:Cntrl:]] */
+ static final int CR_Cntrl[] = {
+ 19,
+ 0x0000, 0x001f,
+ 0x007f, 0x009f,
+ 0x00ad, 0x00ad,
+ 0x0600, 0x0603,
+ 0x06dd, 0x06dd,
+ 0x070f, 0x070f,
+ 0x17b4, 0x17b5,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x2063,
+ 0x206a, 0x206f,
+ 0xd800, 0xf8ff,
+ 0xfeff, 0xfeff,
+ 0xfff9, 0xfffb,
+ 0x1d173, 0x1d17a,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+ }; /* CR_Cntrl */
+
+ /* 'Digit': [[:Digit:]] */
+ static final int CR_Digit[] = {
+ 23,
+ 0x0030, 0x0039,
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be6, 0x0bef,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f29,
+ 0x1040, 0x1049,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0x19d0, 0x19d9,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x1d7ce, 0x1d7ff
+ }; /* CR_Digit */
+
+ /* 'Graph': [[:Graph:]] */
+ static final int CR_Graph[] = {
+ 424,
+ 0x0021, 0x007e,
+ 0x00a1, 0x0241,
+ 0x0250, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060b, 0x0615,
+ 0x061b, 0x061b,
+ 0x061e, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x065e,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x076d,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x097d, 0x097d,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fd1,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fc,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x137c,
+ 0x1380, 0x1399,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1681, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19d9,
+ 0x19de, 0x1a1b,
+ 0x1a1e, 0x1a1f,
+ 0x1d00, 0x1dc3,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x200b, 0x2027,
+ 0x202a, 0x202e,
+ 0x2030, 0x205e,
+ 0x2060, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x2090, 0x2094,
+ 0x20a0, 0x20b5,
+ 0x20d0, 0x20eb,
+ 0x2100, 0x214c,
+ 0x2153, 0x2183,
+ 0x2190, 0x23db,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x269c,
+ 0x26a0, 0x26b1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27c0, 0x27c6,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b13,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c80, 0x2cea,
+ 0x2cf9, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2e00, 0x2e17,
+ 0x2e1c, 0x2e1d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3001, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31c0, 0x31cf,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fbb,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xa700, 0xa716,
+ 0xa800, 0xa82b,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe19,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1018a,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x103c3,
+ 0x103c8, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a47,
+ 0x10a50, 0x10a58,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d200, 0x1d245,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+ }; /* CR_Graph */
+
+ /* 'Lower': [[:Lower:]] */
+ static final int CR_Lower[] = {
+ 480,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff,
+ 0x0101, 0x0101,
+ 0x0103, 0x0103,
+ 0x0105, 0x0105,
+ 0x0107, 0x0107,
+ 0x0109, 0x0109,
+ 0x010b, 0x010b,
+ 0x010d, 0x010d,
+ 0x010f, 0x010f,
+ 0x0111, 0x0111,
+ 0x0113, 0x0113,
+ 0x0115, 0x0115,
+ 0x0117, 0x0117,
+ 0x0119, 0x0119,
+ 0x011b, 0x011b,
+ 0x011d, 0x011d,
+ 0x011f, 0x011f,
+ 0x0121, 0x0121,
+ 0x0123, 0x0123,
+ 0x0125, 0x0125,
+ 0x0127, 0x0127,
+ 0x0129, 0x0129,
+ 0x012b, 0x012b,
+ 0x012d, 0x012d,
+ 0x012f, 0x012f,
+ 0x0131, 0x0131,
+ 0x0133, 0x0133,
+ 0x0135, 0x0135,
+ 0x0137, 0x0138,
+ 0x013a, 0x013a,
+ 0x013c, 0x013c,
+ 0x013e, 0x013e,
+ 0x0140, 0x0140,
+ 0x0142, 0x0142,
+ 0x0144, 0x0144,
+ 0x0146, 0x0146,
+ 0x0148, 0x0149,
+ 0x014b, 0x014b,
+ 0x014d, 0x014d,
+ 0x014f, 0x014f,
+ 0x0151, 0x0151,
+ 0x0153, 0x0153,
+ 0x0155, 0x0155,
+ 0x0157, 0x0157,
+ 0x0159, 0x0159,
+ 0x015b, 0x015b,
+ 0x015d, 0x015d,
+ 0x015f, 0x015f,
+ 0x0161, 0x0161,
+ 0x0163, 0x0163,
+ 0x0165, 0x0165,
+ 0x0167, 0x0167,
+ 0x0169, 0x0169,
+ 0x016b, 0x016b,
+ 0x016d, 0x016d,
+ 0x016f, 0x016f,
+ 0x0171, 0x0171,
+ 0x0173, 0x0173,
+ 0x0175, 0x0175,
+ 0x0177, 0x0177,
+ 0x017a, 0x017a,
+ 0x017c, 0x017c,
+ 0x017e, 0x0180,
+ 0x0183, 0x0183,
+ 0x0185, 0x0185,
+ 0x0188, 0x0188,
+ 0x018c, 0x018d,
+ 0x0192, 0x0192,
+ 0x0195, 0x0195,
+ 0x0199, 0x019b,
+ 0x019e, 0x019e,
+ 0x01a1, 0x01a1,
+ 0x01a3, 0x01a3,
+ 0x01a5, 0x01a5,
+ 0x01a8, 0x01a8,
+ 0x01aa, 0x01ab,
+ 0x01ad, 0x01ad,
+ 0x01b0, 0x01b0,
+ 0x01b4, 0x01b4,
+ 0x01b6, 0x01b6,
+ 0x01b9, 0x01ba,
+ 0x01bd, 0x01bf,
+ 0x01c6, 0x01c6,
+ 0x01c9, 0x01c9,
+ 0x01cc, 0x01cc,
+ 0x01ce, 0x01ce,
+ 0x01d0, 0x01d0,
+ 0x01d2, 0x01d2,
+ 0x01d4, 0x01d4,
+ 0x01d6, 0x01d6,
+ 0x01d8, 0x01d8,
+ 0x01da, 0x01da,
+ 0x01dc, 0x01dd,
+ 0x01df, 0x01df,
+ 0x01e1, 0x01e1,
+ 0x01e3, 0x01e3,
+ 0x01e5, 0x01e5,
+ 0x01e7, 0x01e7,
+ 0x01e9, 0x01e9,
+ 0x01eb, 0x01eb,
+ 0x01ed, 0x01ed,
+ 0x01ef, 0x01f0,
+ 0x01f3, 0x01f3,
+ 0x01f5, 0x01f5,
+ 0x01f9, 0x01f9,
+ 0x01fb, 0x01fb,
+ 0x01fd, 0x01fd,
+ 0x01ff, 0x01ff,
+ 0x0201, 0x0201,
+ 0x0203, 0x0203,
+ 0x0205, 0x0205,
+ 0x0207, 0x0207,
+ 0x0209, 0x0209,
+ 0x020b, 0x020b,
+ 0x020d, 0x020d,
+ 0x020f, 0x020f,
+ 0x0211, 0x0211,
+ 0x0213, 0x0213,
+ 0x0215, 0x0215,
+ 0x0217, 0x0217,
+ 0x0219, 0x0219,
+ 0x021b, 0x021b,
+ 0x021d, 0x021d,
+ 0x021f, 0x021f,
+ 0x0221, 0x0221,
+ 0x0223, 0x0223,
+ 0x0225, 0x0225,
+ 0x0227, 0x0227,
+ 0x0229, 0x0229,
+ 0x022b, 0x022b,
+ 0x022d, 0x022d,
+ 0x022f, 0x022f,
+ 0x0231, 0x0231,
+ 0x0233, 0x0239,
+ 0x023c, 0x023c,
+ 0x023f, 0x0240,
+ 0x0250, 0x02af,
+ 0x0390, 0x0390,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03d9, 0x03d9,
+ 0x03db, 0x03db,
+ 0x03dd, 0x03dd,
+ 0x03df, 0x03df,
+ 0x03e1, 0x03e1,
+ 0x03e3, 0x03e3,
+ 0x03e5, 0x03e5,
+ 0x03e7, 0x03e7,
+ 0x03e9, 0x03e9,
+ 0x03eb, 0x03eb,
+ 0x03ed, 0x03ed,
+ 0x03ef, 0x03f3,
+ 0x03f5, 0x03f5,
+ 0x03f8, 0x03f8,
+ 0x03fb, 0x03fc,
+ 0x0430, 0x045f,
+ 0x0461, 0x0461,
+ 0x0463, 0x0463,
+ 0x0465, 0x0465,
+ 0x0467, 0x0467,
+ 0x0469, 0x0469,
+ 0x046b, 0x046b,
+ 0x046d, 0x046d,
+ 0x046f, 0x046f,
+ 0x0471, 0x0471,
+ 0x0473, 0x0473,
+ 0x0475, 0x0475,
+ 0x0477, 0x0477,
+ 0x0479, 0x0479,
+ 0x047b, 0x047b,
+ 0x047d, 0x047d,
+ 0x047f, 0x047f,
+ 0x0481, 0x0481,
+ 0x048b, 0x048b,
+ 0x048d, 0x048d,
+ 0x048f, 0x048f,
+ 0x0491, 0x0491,
+ 0x0493, 0x0493,
+ 0x0495, 0x0495,
+ 0x0497, 0x0497,
+ 0x0499, 0x0499,
+ 0x049b, 0x049b,
+ 0x049d, 0x049d,
+ 0x049f, 0x049f,
+ 0x04a1, 0x04a1,
+ 0x04a3, 0x04a3,
+ 0x04a5, 0x04a5,
+ 0x04a7, 0x04a7,
+ 0x04a9, 0x04a9,
+ 0x04ab, 0x04ab,
+ 0x04ad, 0x04ad,
+ 0x04af, 0x04af,
+ 0x04b1, 0x04b1,
+ 0x04b3, 0x04b3,
+ 0x04b5, 0x04b5,
+ 0x04b7, 0x04b7,
+ 0x04b9, 0x04b9,
+ 0x04bb, 0x04bb,
+ 0x04bd, 0x04bd,
+ 0x04bf, 0x04bf,
+ 0x04c2, 0x04c2,
+ 0x04c4, 0x04c4,
+ 0x04c6, 0x04c6,
+ 0x04c8, 0x04c8,
+ 0x04ca, 0x04ca,
+ 0x04cc, 0x04cc,
+ 0x04ce, 0x04ce,
+ 0x04d1, 0x04d1,
+ 0x04d3, 0x04d3,
+ 0x04d5, 0x04d5,
+ 0x04d7, 0x04d7,
+ 0x04d9, 0x04d9,
+ 0x04db, 0x04db,
+ 0x04dd, 0x04dd,
+ 0x04df, 0x04df,
+ 0x04e1, 0x04e1,
+ 0x04e3, 0x04e3,
+ 0x04e5, 0x04e5,
+ 0x04e7, 0x04e7,
+ 0x04e9, 0x04e9,
+ 0x04eb, 0x04eb,
+ 0x04ed, 0x04ed,
+ 0x04ef, 0x04ef,
+ 0x04f1, 0x04f1,
+ 0x04f3, 0x04f3,
+ 0x04f5, 0x04f5,
+ 0x04f7, 0x04f7,
+ 0x04f9, 0x04f9,
+ 0x0501, 0x0501,
+ 0x0503, 0x0503,
+ 0x0505, 0x0505,
+ 0x0507, 0x0507,
+ 0x0509, 0x0509,
+ 0x050b, 0x050b,
+ 0x050d, 0x050d,
+ 0x050f, 0x050f,
+ 0x0561, 0x0587,
+ 0x1d00, 0x1d2b,
+ 0x1d62, 0x1d77,
+ 0x1d79, 0x1d9a,
+ 0x1e01, 0x1e01,
+ 0x1e03, 0x1e03,
+ 0x1e05, 0x1e05,
+ 0x1e07, 0x1e07,
+ 0x1e09, 0x1e09,
+ 0x1e0b, 0x1e0b,
+ 0x1e0d, 0x1e0d,
+ 0x1e0f, 0x1e0f,
+ 0x1e11, 0x1e11,
+ 0x1e13, 0x1e13,
+ 0x1e15, 0x1e15,
+ 0x1e17, 0x1e17,
+ 0x1e19, 0x1e19,
+ 0x1e1b, 0x1e1b,
+ 0x1e1d, 0x1e1d,
+ 0x1e1f, 0x1e1f,
+ 0x1e21, 0x1e21,
+ 0x1e23, 0x1e23,
+ 0x1e25, 0x1e25,
+ 0x1e27, 0x1e27,
+ 0x1e29, 0x1e29,
+ 0x1e2b, 0x1e2b,
+ 0x1e2d, 0x1e2d,
+ 0x1e2f, 0x1e2f,
+ 0x1e31, 0x1e31,
+ 0x1e33, 0x1e33,
+ 0x1e35, 0x1e35,
+ 0x1e37, 0x1e37,
+ 0x1e39, 0x1e39,
+ 0x1e3b, 0x1e3b,
+ 0x1e3d, 0x1e3d,
+ 0x1e3f, 0x1e3f,
+ 0x1e41, 0x1e41,
+ 0x1e43, 0x1e43,
+ 0x1e45, 0x1e45,
+ 0x1e47, 0x1e47,
+ 0x1e49, 0x1e49,
+ 0x1e4b, 0x1e4b,
+ 0x1e4d, 0x1e4d,
+ 0x1e4f, 0x1e4f,
+ 0x1e51, 0x1e51,
+ 0x1e53, 0x1e53,
+ 0x1e55, 0x1e55,
+ 0x1e57, 0x1e57,
+ 0x1e59, 0x1e59,
+ 0x1e5b, 0x1e5b,
+ 0x1e5d, 0x1e5d,
+ 0x1e5f, 0x1e5f,
+ 0x1e61, 0x1e61,
+ 0x1e63, 0x1e63,
+ 0x1e65, 0x1e65,
+ 0x1e67, 0x1e67,
+ 0x1e69, 0x1e69,
+ 0x1e6b, 0x1e6b,
+ 0x1e6d, 0x1e6d,
+ 0x1e6f, 0x1e6f,
+ 0x1e71, 0x1e71,
+ 0x1e73, 0x1e73,
+ 0x1e75, 0x1e75,
+ 0x1e77, 0x1e77,
+ 0x1e79, 0x1e79,
+ 0x1e7b, 0x1e7b,
+ 0x1e7d, 0x1e7d,
+ 0x1e7f, 0x1e7f,
+ 0x1e81, 0x1e81,
+ 0x1e83, 0x1e83,
+ 0x1e85, 0x1e85,
+ 0x1e87, 0x1e87,
+ 0x1e89, 0x1e89,
+ 0x1e8b, 0x1e8b,
+ 0x1e8d, 0x1e8d,
+ 0x1e8f, 0x1e8f,
+ 0x1e91, 0x1e91,
+ 0x1e93, 0x1e93,
+ 0x1e95, 0x1e9b,
+ 0x1ea1, 0x1ea1,
+ 0x1ea3, 0x1ea3,
+ 0x1ea5, 0x1ea5,
+ 0x1ea7, 0x1ea7,
+ 0x1ea9, 0x1ea9,
+ 0x1eab, 0x1eab,
+ 0x1ead, 0x1ead,
+ 0x1eaf, 0x1eaf,
+ 0x1eb1, 0x1eb1,
+ 0x1eb3, 0x1eb3,
+ 0x1eb5, 0x1eb5,
+ 0x1eb7, 0x1eb7,
+ 0x1eb9, 0x1eb9,
+ 0x1ebb, 0x1ebb,
+ 0x1ebd, 0x1ebd,
+ 0x1ebf, 0x1ebf,
+ 0x1ec1, 0x1ec1,
+ 0x1ec3, 0x1ec3,
+ 0x1ec5, 0x1ec5,
+ 0x1ec7, 0x1ec7,
+ 0x1ec9, 0x1ec9,
+ 0x1ecb, 0x1ecb,
+ 0x1ecd, 0x1ecd,
+ 0x1ecf, 0x1ecf,
+ 0x1ed1, 0x1ed1,
+ 0x1ed3, 0x1ed3,
+ 0x1ed5, 0x1ed5,
+ 0x1ed7, 0x1ed7,
+ 0x1ed9, 0x1ed9,
+ 0x1edb, 0x1edb,
+ 0x1edd, 0x1edd,
+ 0x1edf, 0x1edf,
+ 0x1ee1, 0x1ee1,
+ 0x1ee3, 0x1ee3,
+ 0x1ee5, 0x1ee5,
+ 0x1ee7, 0x1ee7,
+ 0x1ee9, 0x1ee9,
+ 0x1eeb, 0x1eeb,
+ 0x1eed, 0x1eed,
+ 0x1eef, 0x1eef,
+ 0x1ef1, 0x1ef1,
+ 0x1ef3, 0x1ef3,
+ 0x1ef5, 0x1ef5,
+ 0x1ef7, 0x1ef7,
+ 0x1ef9, 0x1ef9,
+ 0x1f00, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x210a, 0x210a,
+ 0x210e, 0x210f,
+ 0x2113, 0x2113,
+ 0x212f, 0x212f,
+ 0x2134, 0x2134,
+ 0x2139, 0x2139,
+ 0x213c, 0x213d,
+ 0x2146, 0x2149,
+ 0x2c30, 0x2c5e,
+ 0x2c81, 0x2c81,
+ 0x2c83, 0x2c83,
+ 0x2c85, 0x2c85,
+ 0x2c87, 0x2c87,
+ 0x2c89, 0x2c89,
+ 0x2c8b, 0x2c8b,
+ 0x2c8d, 0x2c8d,
+ 0x2c8f, 0x2c8f,
+ 0x2c91, 0x2c91,
+ 0x2c93, 0x2c93,
+ 0x2c95, 0x2c95,
+ 0x2c97, 0x2c97,
+ 0x2c99, 0x2c99,
+ 0x2c9b, 0x2c9b,
+ 0x2c9d, 0x2c9d,
+ 0x2c9f, 0x2c9f,
+ 0x2ca1, 0x2ca1,
+ 0x2ca3, 0x2ca3,
+ 0x2ca5, 0x2ca5,
+ 0x2ca7, 0x2ca7,
+ 0x2ca9, 0x2ca9,
+ 0x2cab, 0x2cab,
+ 0x2cad, 0x2cad,
+ 0x2caf, 0x2caf,
+ 0x2cb1, 0x2cb1,
+ 0x2cb3, 0x2cb3,
+ 0x2cb5, 0x2cb5,
+ 0x2cb7, 0x2cb7,
+ 0x2cb9, 0x2cb9,
+ 0x2cbb, 0x2cbb,
+ 0x2cbd, 0x2cbd,
+ 0x2cbf, 0x2cbf,
+ 0x2cc1, 0x2cc1,
+ 0x2cc3, 0x2cc3,
+ 0x2cc5, 0x2cc5,
+ 0x2cc7, 0x2cc7,
+ 0x2cc9, 0x2cc9,
+ 0x2ccb, 0x2ccb,
+ 0x2ccd, 0x2ccd,
+ 0x2ccf, 0x2ccf,
+ 0x2cd1, 0x2cd1,
+ 0x2cd3, 0x2cd3,
+ 0x2cd5, 0x2cd5,
+ 0x2cd7, 0x2cd7,
+ 0x2cd9, 0x2cd9,
+ 0x2cdb, 0x2cdb,
+ 0x2cdd, 0x2cdd,
+ 0x2cdf, 0x2cdf,
+ 0x2ce1, 0x2ce1,
+ 0x2ce3, 0x2ce4,
+ 0x2d00, 0x2d25,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a5,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9
+ }; /* CR_Lower */
+
+ /* 'Print': [[:Print:]] */
+ static final int CR_Print[] = {
+ 423,
+ 0x0009, 0x000d,
+ 0x0020, 0x007e,
+ 0x0085, 0x0085,
+ 0x00a0, 0x0241,
+ 0x0250, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060b, 0x0615,
+ 0x061b, 0x061b,
+ 0x061e, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x065e,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x076d,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x097d, 0x097d,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fd1,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fc,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x137c,
+ 0x1380, 0x1399,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1680, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19d9,
+ 0x19de, 0x1a1b,
+ 0x1a1e, 0x1a1f,
+ 0x1d00, 0x1dc3,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2000, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x2090, 0x2094,
+ 0x20a0, 0x20b5,
+ 0x20d0, 0x20eb,
+ 0x2100, 0x214c,
+ 0x2153, 0x2183,
+ 0x2190, 0x23db,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x269c,
+ 0x26a0, 0x26b1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27c0, 0x27c6,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b13,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c80, 0x2cea,
+ 0x2cf9, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2e00, 0x2e17,
+ 0x2e1c, 0x2e1d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3000, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31c0, 0x31cf,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fbb,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xa700, 0xa716,
+ 0xa800, 0xa82b,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe19,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1018a,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x103c3,
+ 0x103c8, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a47,
+ 0x10a50, 0x10a58,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d200, 0x1d245,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+ }; /* CR_Print */
+
+ /* 'Punct': [[:Punct:]] */
+ static final int CR_Punct[] = {
+ 96,
+ 0x0021, 0x0023,
+ 0x0025, 0x002a,
+ 0x002c, 0x002f,
+ 0x003a, 0x003b,
+ 0x003f, 0x0040,
+ 0x005b, 0x005d,
+ 0x005f, 0x005f,
+ 0x007b, 0x007b,
+ 0x007d, 0x007d,
+ 0x00a1, 0x00a1,
+ 0x00ab, 0x00ab,
+ 0x00b7, 0x00b7,
+ 0x00bb, 0x00bb,
+ 0x00bf, 0x00bf,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x055a, 0x055f,
+ 0x0589, 0x058a,
+ 0x05be, 0x05be,
+ 0x05c0, 0x05c0,
+ 0x05c3, 0x05c3,
+ 0x05c6, 0x05c6,
+ 0x05f3, 0x05f4,
+ 0x060c, 0x060d,
+ 0x061b, 0x061b,
+ 0x061e, 0x061f,
+ 0x066a, 0x066d,
+ 0x06d4, 0x06d4,
+ 0x0700, 0x070d,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0df4, 0x0df4,
+ 0x0e4f, 0x0e4f,
+ 0x0e5a, 0x0e5b,
+ 0x0f04, 0x0f12,
+ 0x0f3a, 0x0f3d,
+ 0x0f85, 0x0f85,
+ 0x0fd0, 0x0fd1,
+ 0x104a, 0x104f,
+ 0x10fb, 0x10fb,
+ 0x1361, 0x1368,
+ 0x166d, 0x166e,
+ 0x169b, 0x169c,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x17d4, 0x17d6,
+ 0x17d8, 0x17da,
+ 0x1800, 0x180a,
+ 0x1944, 0x1945,
+ 0x19de, 0x19df,
+ 0x1a1e, 0x1a1f,
+ 0x2010, 0x2027,
+ 0x2030, 0x2043,
+ 0x2045, 0x2051,
+ 0x2053, 0x205e,
+ 0x207d, 0x207e,
+ 0x208d, 0x208e,
+ 0x2329, 0x232a,
+ 0x23b4, 0x23b6,
+ 0x2768, 0x2775,
+ 0x27c5, 0x27c6,
+ 0x27e6, 0x27eb,
+ 0x2983, 0x2998,
+ 0x29d8, 0x29db,
+ 0x29fc, 0x29fd,
+ 0x2cf9, 0x2cfc,
+ 0x2cfe, 0x2cff,
+ 0x2e00, 0x2e17,
+ 0x2e1c, 0x2e1d,
+ 0x3001, 0x3003,
+ 0x3008, 0x3011,
+ 0x3014, 0x301f,
+ 0x3030, 0x3030,
+ 0x303d, 0x303d,
+ 0x30a0, 0x30a0,
+ 0x30fb, 0x30fb,
+ 0xfd3e, 0xfd3f,
+ 0xfe10, 0xfe19,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe61,
+ 0xfe63, 0xfe63,
+ 0xfe68, 0xfe68,
+ 0xfe6a, 0xfe6b,
+ 0xff01, 0xff03,
+ 0xff05, 0xff0a,
+ 0xff0c, 0xff0f,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff20,
+ 0xff3b, 0xff3d,
+ 0xff3f, 0xff3f,
+ 0xff5b, 0xff5b,
+ 0xff5d, 0xff5d,
+ 0xff5f, 0xff65,
+ 0x10100, 0x10101,
+ 0x1039f, 0x1039f,
+ 0x10a50, 0x10a58
+ }; /* CR_Punct */
+
+ /* 'Space': [[:Space:]] */
+ static final int CR_Space[] = {
+ 11,
+ 0x0009, 0x000d,
+ 0x0020, 0x0020,
+ 0x0085, 0x0085,
+ 0x00a0, 0x00a0,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+ }; /* CR_Space */
+
+ /* 'Upper': [[:Upper:]] */
+ static final int CR_Upper[] = {
+ 476,
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de,
+ 0x0100, 0x0100,
+ 0x0102, 0x0102,
+ 0x0104, 0x0104,
+ 0x0106, 0x0106,
+ 0x0108, 0x0108,
+ 0x010a, 0x010a,
+ 0x010c, 0x010c,
+ 0x010e, 0x010e,
+ 0x0110, 0x0110,
+ 0x0112, 0x0112,
+ 0x0114, 0x0114,
+ 0x0116, 0x0116,
+ 0x0118, 0x0118,
+ 0x011a, 0x011a,
+ 0x011c, 0x011c,
+ 0x011e, 0x011e,
+ 0x0120, 0x0120,
+ 0x0122, 0x0122,
+ 0x0124, 0x0124,
+ 0x0126, 0x0126,
+ 0x0128, 0x0128,
+ 0x012a, 0x012a,
+ 0x012c, 0x012c,
+ 0x012e, 0x012e,
+ 0x0130, 0x0130,
+ 0x0132, 0x0132,
+ 0x0134, 0x0134,
+ 0x0136, 0x0136,
+ 0x0139, 0x0139,
+ 0x013b, 0x013b,
+ 0x013d, 0x013d,
+ 0x013f, 0x013f,
+ 0x0141, 0x0141,
+ 0x0143, 0x0143,
+ 0x0145, 0x0145,
+ 0x0147, 0x0147,
+ 0x014a, 0x014a,
+ 0x014c, 0x014c,
+ 0x014e, 0x014e,
+ 0x0150, 0x0150,
+ 0x0152, 0x0152,
+ 0x0154, 0x0154,
+ 0x0156, 0x0156,
+ 0x0158, 0x0158,
+ 0x015a, 0x015a,
+ 0x015c, 0x015c,
+ 0x015e, 0x015e,
+ 0x0160, 0x0160,
+ 0x0162, 0x0162,
+ 0x0164, 0x0164,
+ 0x0166, 0x0166,
+ 0x0168, 0x0168,
+ 0x016a, 0x016a,
+ 0x016c, 0x016c,
+ 0x016e, 0x016e,
+ 0x0170, 0x0170,
+ 0x0172, 0x0172,
+ 0x0174, 0x0174,
+ 0x0176, 0x0176,
+ 0x0178, 0x0179,
+ 0x017b, 0x017b,
+ 0x017d, 0x017d,
+ 0x0181, 0x0182,
+ 0x0184, 0x0184,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a2, 0x01a2,
+ 0x01a4, 0x01a4,
+ 0x01a6, 0x01a7,
+ 0x01a9, 0x01a9,
+ 0x01ac, 0x01ac,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b5, 0x01b5,
+ 0x01b7, 0x01b8,
+ 0x01bc, 0x01bc,
+ 0x01c4, 0x01c4,
+ 0x01c7, 0x01c7,
+ 0x01ca, 0x01ca,
+ 0x01cd, 0x01cd,
+ 0x01cf, 0x01cf,
+ 0x01d1, 0x01d1,
+ 0x01d3, 0x01d3,
+ 0x01d5, 0x01d5,
+ 0x01d7, 0x01d7,
+ 0x01d9, 0x01d9,
+ 0x01db, 0x01db,
+ 0x01de, 0x01de,
+ 0x01e0, 0x01e0,
+ 0x01e2, 0x01e2,
+ 0x01e4, 0x01e4,
+ 0x01e6, 0x01e6,
+ 0x01e8, 0x01e8,
+ 0x01ea, 0x01ea,
+ 0x01ec, 0x01ec,
+ 0x01ee, 0x01ee,
+ 0x01f1, 0x01f1,
+ 0x01f4, 0x01f4,
+ 0x01f6, 0x01f8,
+ 0x01fa, 0x01fa,
+ 0x01fc, 0x01fc,
+ 0x01fe, 0x01fe,
+ 0x0200, 0x0200,
+ 0x0202, 0x0202,
+ 0x0204, 0x0204,
+ 0x0206, 0x0206,
+ 0x0208, 0x0208,
+ 0x020a, 0x020a,
+ 0x020c, 0x020c,
+ 0x020e, 0x020e,
+ 0x0210, 0x0210,
+ 0x0212, 0x0212,
+ 0x0214, 0x0214,
+ 0x0216, 0x0216,
+ 0x0218, 0x0218,
+ 0x021a, 0x021a,
+ 0x021c, 0x021c,
+ 0x021e, 0x021e,
+ 0x0220, 0x0220,
+ 0x0222, 0x0222,
+ 0x0224, 0x0224,
+ 0x0226, 0x0226,
+ 0x0228, 0x0228,
+ 0x022a, 0x022a,
+ 0x022c, 0x022c,
+ 0x022e, 0x022e,
+ 0x0230, 0x0230,
+ 0x0232, 0x0232,
+ 0x023a, 0x023b,
+ 0x023d, 0x023e,
+ 0x0241, 0x0241,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03d2, 0x03d4,
+ 0x03d8, 0x03d8,
+ 0x03da, 0x03da,
+ 0x03dc, 0x03dc,
+ 0x03de, 0x03de,
+ 0x03e0, 0x03e0,
+ 0x03e2, 0x03e2,
+ 0x03e4, 0x03e4,
+ 0x03e6, 0x03e6,
+ 0x03e8, 0x03e8,
+ 0x03ea, 0x03ea,
+ 0x03ec, 0x03ec,
+ 0x03ee, 0x03ee,
+ 0x03f4, 0x03f4,
+ 0x03f7, 0x03f7,
+ 0x03f9, 0x03fa,
+ 0x03fd, 0x042f,
+ 0x0460, 0x0460,
+ 0x0462, 0x0462,
+ 0x0464, 0x0464,
+ 0x0466, 0x0466,
+ 0x0468, 0x0468,
+ 0x046a, 0x046a,
+ 0x046c, 0x046c,
+ 0x046e, 0x046e,
+ 0x0470, 0x0470,
+ 0x0472, 0x0472,
+ 0x0474, 0x0474,
+ 0x0476, 0x0476,
+ 0x0478, 0x0478,
+ 0x047a, 0x047a,
+ 0x047c, 0x047c,
+ 0x047e, 0x047e,
+ 0x0480, 0x0480,
+ 0x048a, 0x048a,
+ 0x048c, 0x048c,
+ 0x048e, 0x048e,
+ 0x0490, 0x0490,
+ 0x0492, 0x0492,
+ 0x0494, 0x0494,
+ 0x0496, 0x0496,
+ 0x0498, 0x0498,
+ 0x049a, 0x049a,
+ 0x049c, 0x049c,
+ 0x049e, 0x049e,
+ 0x04a0, 0x04a0,
+ 0x04a2, 0x04a2,
+ 0x04a4, 0x04a4,
+ 0x04a6, 0x04a6,
+ 0x04a8, 0x04a8,
+ 0x04aa, 0x04aa,
+ 0x04ac, 0x04ac,
+ 0x04ae, 0x04ae,
+ 0x04b0, 0x04b0,
+ 0x04b2, 0x04b2,
+ 0x04b4, 0x04b4,
+ 0x04b6, 0x04b6,
+ 0x04b8, 0x04b8,
+ 0x04ba, 0x04ba,
+ 0x04bc, 0x04bc,
+ 0x04be, 0x04be,
+ 0x04c0, 0x04c1,
+ 0x04c3, 0x04c3,
+ 0x04c5, 0x04c5,
+ 0x04c7, 0x04c7,
+ 0x04c9, 0x04c9,
+ 0x04cb, 0x04cb,
+ 0x04cd, 0x04cd,
+ 0x04d0, 0x04d0,
+ 0x04d2, 0x04d2,
+ 0x04d4, 0x04d4,
+ 0x04d6, 0x04d6,
+ 0x04d8, 0x04d8,
+ 0x04da, 0x04da,
+ 0x04dc, 0x04dc,
+ 0x04de, 0x04de,
+ 0x04e0, 0x04e0,
+ 0x04e2, 0x04e2,
+ 0x04e4, 0x04e4,
+ 0x04e6, 0x04e6,
+ 0x04e8, 0x04e8,
+ 0x04ea, 0x04ea,
+ 0x04ec, 0x04ec,
+ 0x04ee, 0x04ee,
+ 0x04f0, 0x04f0,
+ 0x04f2, 0x04f2,
+ 0x04f4, 0x04f4,
+ 0x04f6, 0x04f6,
+ 0x04f8, 0x04f8,
+ 0x0500, 0x0500,
+ 0x0502, 0x0502,
+ 0x0504, 0x0504,
+ 0x0506, 0x0506,
+ 0x0508, 0x0508,
+ 0x050a, 0x050a,
+ 0x050c, 0x050c,
+ 0x050e, 0x050e,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1e00, 0x1e00,
+ 0x1e02, 0x1e02,
+ 0x1e04, 0x1e04,
+ 0x1e06, 0x1e06,
+ 0x1e08, 0x1e08,
+ 0x1e0a, 0x1e0a,
+ 0x1e0c, 0x1e0c,
+ 0x1e0e, 0x1e0e,
+ 0x1e10, 0x1e10,
+ 0x1e12, 0x1e12,
+ 0x1e14, 0x1e14,
+ 0x1e16, 0x1e16,
+ 0x1e18, 0x1e18,
+ 0x1e1a, 0x1e1a,
+ 0x1e1c, 0x1e1c,
+ 0x1e1e, 0x1e1e,
+ 0x1e20, 0x1e20,
+ 0x1e22, 0x1e22,
+ 0x1e24, 0x1e24,
+ 0x1e26, 0x1e26,
+ 0x1e28, 0x1e28,
+ 0x1e2a, 0x1e2a,
+ 0x1e2c, 0x1e2c,
+ 0x1e2e, 0x1e2e,
+ 0x1e30, 0x1e30,
+ 0x1e32, 0x1e32,
+ 0x1e34, 0x1e34,
+ 0x1e36, 0x1e36,
+ 0x1e38, 0x1e38,
+ 0x1e3a, 0x1e3a,
+ 0x1e3c, 0x1e3c,
+ 0x1e3e, 0x1e3e,
+ 0x1e40, 0x1e40,
+ 0x1e42, 0x1e42,
+ 0x1e44, 0x1e44,
+ 0x1e46, 0x1e46,
+ 0x1e48, 0x1e48,
+ 0x1e4a, 0x1e4a,
+ 0x1e4c, 0x1e4c,
+ 0x1e4e, 0x1e4e,
+ 0x1e50, 0x1e50,
+ 0x1e52, 0x1e52,
+ 0x1e54, 0x1e54,
+ 0x1e56, 0x1e56,
+ 0x1e58, 0x1e58,
+ 0x1e5a, 0x1e5a,
+ 0x1e5c, 0x1e5c,
+ 0x1e5e, 0x1e5e,
+ 0x1e60, 0x1e60,
+ 0x1e62, 0x1e62,
+ 0x1e64, 0x1e64,
+ 0x1e66, 0x1e66,
+ 0x1e68, 0x1e68,
+ 0x1e6a, 0x1e6a,
+ 0x1e6c, 0x1e6c,
+ 0x1e6e, 0x1e6e,
+ 0x1e70, 0x1e70,
+ 0x1e72, 0x1e72,
+ 0x1e74, 0x1e74,
+ 0x1e76, 0x1e76,
+ 0x1e78, 0x1e78,
+ 0x1e7a, 0x1e7a,
+ 0x1e7c, 0x1e7c,
+ 0x1e7e, 0x1e7e,
+ 0x1e80, 0x1e80,
+ 0x1e82, 0x1e82,
+ 0x1e84, 0x1e84,
+ 0x1e86, 0x1e86,
+ 0x1e88, 0x1e88,
+ 0x1e8a, 0x1e8a,
+ 0x1e8c, 0x1e8c,
+ 0x1e8e, 0x1e8e,
+ 0x1e90, 0x1e90,
+ 0x1e92, 0x1e92,
+ 0x1e94, 0x1e94,
+ 0x1ea0, 0x1ea0,
+ 0x1ea2, 0x1ea2,
+ 0x1ea4, 0x1ea4,
+ 0x1ea6, 0x1ea6,
+ 0x1ea8, 0x1ea8,
+ 0x1eaa, 0x1eaa,
+ 0x1eac, 0x1eac,
+ 0x1eae, 0x1eae,
+ 0x1eb0, 0x1eb0,
+ 0x1eb2, 0x1eb2,
+ 0x1eb4, 0x1eb4,
+ 0x1eb6, 0x1eb6,
+ 0x1eb8, 0x1eb8,
+ 0x1eba, 0x1eba,
+ 0x1ebc, 0x1ebc,
+ 0x1ebe, 0x1ebe,
+ 0x1ec0, 0x1ec0,
+ 0x1ec2, 0x1ec2,
+ 0x1ec4, 0x1ec4,
+ 0x1ec6, 0x1ec6,
+ 0x1ec8, 0x1ec8,
+ 0x1eca, 0x1eca,
+ 0x1ecc, 0x1ecc,
+ 0x1ece, 0x1ece,
+ 0x1ed0, 0x1ed0,
+ 0x1ed2, 0x1ed2,
+ 0x1ed4, 0x1ed4,
+ 0x1ed6, 0x1ed6,
+ 0x1ed8, 0x1ed8,
+ 0x1eda, 0x1eda,
+ 0x1edc, 0x1edc,
+ 0x1ede, 0x1ede,
+ 0x1ee0, 0x1ee0,
+ 0x1ee2, 0x1ee2,
+ 0x1ee4, 0x1ee4,
+ 0x1ee6, 0x1ee6,
+ 0x1ee8, 0x1ee8,
+ 0x1eea, 0x1eea,
+ 0x1eec, 0x1eec,
+ 0x1eee, 0x1eee,
+ 0x1ef0, 0x1ef0,
+ 0x1ef2, 0x1ef2,
+ 0x1ef4, 0x1ef4,
+ 0x1ef6, 0x1ef6,
+ 0x1ef8, 0x1ef8,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f5f,
+ 0x1f68, 0x1f6f,
+ 0x1fb8, 0x1fbb,
+ 0x1fc8, 0x1fcb,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x2130, 0x2131,
+ 0x2133, 0x2133,
+ 0x213e, 0x213f,
+ 0x2145, 0x2145,
+ 0x2c00, 0x2c2e,
+ 0x2c80, 0x2c80,
+ 0x2c82, 0x2c82,
+ 0x2c84, 0x2c84,
+ 0x2c86, 0x2c86,
+ 0x2c88, 0x2c88,
+ 0x2c8a, 0x2c8a,
+ 0x2c8c, 0x2c8c,
+ 0x2c8e, 0x2c8e,
+ 0x2c90, 0x2c90,
+ 0x2c92, 0x2c92,
+ 0x2c94, 0x2c94,
+ 0x2c96, 0x2c96,
+ 0x2c98, 0x2c98,
+ 0x2c9a, 0x2c9a,
+ 0x2c9c, 0x2c9c,
+ 0x2c9e, 0x2c9e,
+ 0x2ca0, 0x2ca0,
+ 0x2ca2, 0x2ca2,
+ 0x2ca4, 0x2ca4,
+ 0x2ca6, 0x2ca6,
+ 0x2ca8, 0x2ca8,
+ 0x2caa, 0x2caa,
+ 0x2cac, 0x2cac,
+ 0x2cae, 0x2cae,
+ 0x2cb0, 0x2cb0,
+ 0x2cb2, 0x2cb2,
+ 0x2cb4, 0x2cb4,
+ 0x2cb6, 0x2cb6,
+ 0x2cb8, 0x2cb8,
+ 0x2cba, 0x2cba,
+ 0x2cbc, 0x2cbc,
+ 0x2cbe, 0x2cbe,
+ 0x2cc0, 0x2cc0,
+ 0x2cc2, 0x2cc2,
+ 0x2cc4, 0x2cc4,
+ 0x2cc6, 0x2cc6,
+ 0x2cc8, 0x2cc8,
+ 0x2cca, 0x2cca,
+ 0x2ccc, 0x2ccc,
+ 0x2cce, 0x2cce,
+ 0x2cd0, 0x2cd0,
+ 0x2cd2, 0x2cd2,
+ 0x2cd4, 0x2cd4,
+ 0x2cd6, 0x2cd6,
+ 0x2cd8, 0x2cd8,
+ 0x2cda, 0x2cda,
+ 0x2cdc, 0x2cdc,
+ 0x2cde, 0x2cde,
+ 0x2ce0, 0x2ce0,
+ 0x2ce2, 0x2ce2,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49c, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8
+ }; /* CR_Upper */
+
+ /* 'XDigit': [[:XDigit:]] */
+ static final int CR_XDigit[] = {
+ 3,
+ 0x0030, 0x0039,
+ 0x0041, 0x0046,
+ 0x0061, 0x0066
+ }; /* CR_XDigit */
+
+ /* 'Word': [[:Word:]] */
+ static final int CR_Word[] = {
+ 464,
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x005f, 0x005f,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b2, 0x00b3,
+ 0x00b5, 0x00b5,
+ 0x00b9, 0x00ba,
+ 0x00bc, 0x00be,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0241,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x065e,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x076d,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x097d, 0x097d,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x09f4, 0x09f9,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bf2,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f33,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x135f,
+ 0x1369, 0x137c,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19d9,
+ 0x1a00, 0x1a1b,
+ 0x1d00, 0x1dc3,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x203f, 0x2040,
+ 0x2054, 0x2054,
+ 0x2070, 0x2071,
+ 0x2074, 0x2079,
+ 0x207f, 0x2089,
+ 0x2090, 0x2094,
+ 0x20d0, 0x20eb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x2153, 0x2183,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c80, 0x2ce4,
+ 0x2cfd, 0x2cfd,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3005, 0x3007,
+ 0x3021, 0x302f,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3192, 0x3195,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3220, 0x3229,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fbb,
+ 0xa000, 0xa48c,
+ 0xa800, 0xa827,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe33, 0xfe34,
+ 0xfe4d, 0xfe4f,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff3f, 0xff3f,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10107, 0x10133,
+ 0x10140, 0x10178,
+ 0x1018a, 0x1018a,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x103d1, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a47,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+ }; /* CR_Word */
+
+ /* 'Alnum': [[:Alnum:]] */
+ static final int CR_Alnum[] = {
+ 436,
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0241,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x065e,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x076d,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x097d, 0x097d,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bef,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f29,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x135f,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19d9,
+ 0x1a00, 0x1a1b,
+ 0x1d00, 0x1dc3,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x20d0, 0x20eb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c80, 0x2ce4,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fbb,
+ 0xa000, 0xa48c,
+ 0xa800, 0xa827,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a3f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+ }; /* CR_Alnum */
+
+ /* 'ASCII': [[:ASCII:]] */
+ static final int CR_ASCII[] = {
+ 1,
+ 0x0000, 0x007f
+ }; /* CR_ASCII */
+
+}
\ No newline at end of file
diff --git a/src/org/joni/encoding/unicode/UnicodeEncoding.java b/src/org/joni/encoding/unicode/UnicodeEncoding.java
new file mode 100644
index 0000000..17d0c29
--- /dev/null
+++ b/src/org/joni/encoding/unicode/UnicodeEncoding.java
@@ -0,0 +1,454 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.unicode;
+
+import org.joni.ApplyAllCaseFoldFunction;
+import org.joni.CaseFoldCodeItem;
+import org.joni.CodeRangeBuffer;
+import org.joni.Config;
+import org.joni.IntHolder;
+import org.joni.constants.CharacterType;
+import org.joni.encoding.MultiByteEncoding;
+import org.joni.exception.ErrorMessages;
+import org.joni.exception.ValueException;
+
+
+public abstract class UnicodeEncoding extends MultiByteEncoding {
+ private static final int PROPERTY_NAME_MAX_SIZE = 20;
+
+ protected UnicodeEncoding(int[]EncLen) {
+ // ASCII type tables for all Unicode encodings
+ super(EncLen, UNICODE_ISO_8859_1_CTypeTable);
+ }
+
+ // onigenc_unicode_is_code_ctype
+ @Override
+ public boolean isCodeCType(int code, int ctype) {
+ if (Config.USE_UNICODE_PROPERTIES) {
+ if (ctype <= CharacterType.MAX_STD_CTYPE && code < 256)
+ return isCodeCTypeInternal(code, ctype);
+ } else {
+ if (code < 256) return isCodeCTypeInternal(code, ctype);
+ }
+
+ if (UnicodeCodeRanges.CodeRangeTable == null) synchronized(getClass()) {
+ UnicodeCodeRanges.initializeCodeRanges();
+ }
+
+ if (ctype > UnicodeCodeRanges.CodeRangeTable.length) throw new InternalError(ErrorMessages.ERR_TYPE_BUG);
+
+ return CodeRangeBuffer.isInCodeRange(UnicodeCodeRanges.CodeRangeTable[ctype], code);
+
+ }
+
+ // onigenc_unicode_ctype_code_range
+ protected final int[]ctypeCodeRange(int ctype) {
+ if (UnicodeCodeRanges.CodeRangeTable == null) synchronized(getClass()) {
+ UnicodeCodeRanges.initializeCodeRanges();
+ }
+
+ if (ctype >= UnicodeCodeRanges.CodeRangeTable.length) throw new InternalError(ErrorMessages.ERR_TYPE_BUG);
+
+ return UnicodeCodeRanges.CodeRangeTable[ctype];
+ }
+
+ // onigenc_unicode_property_name_to_ctype
+ @Override
+ public int propertyNameToCType(byte[]name, int p, int end) {
+ byte[]buf = new byte[PROPERTY_NAME_MAX_SIZE];
+
+ int p_ = p;
+ int len = 0;
+
+ while(p_ < end) {
+ int code = mbcToCode(name, p_, end);
+ if (code >= 0x80) throw new ValueException(ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME);
+ buf[len++] = (byte)code;
+ if (len >= PROPERTY_NAME_MAX_SIZE) throw new ValueException(ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME, name, p, end);
+ p_ += length(name[p_]);
+ }
+
+ if (UnicodeCTypeNames.CTypeNameHash == null) synchronized(getClass()) {
+ UnicodeCTypeNames.initializeCTypeNameTable();
+ }
+
+ Integer ctype = UnicodeCTypeNames.CTypeNameHash.get(buf, 0, len);
+ if (ctype == null) throw new ValueException(ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME, name, p, end);
+ return ctype;
+ }
+
+ // onigenc_unicode_mbc_case_fold
+ @Override
+ public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]fold) {
+ if (UnicodeCaseFolds.FoldHash == null) synchronized (getClass()) {
+ UnicodeCaseFolds.initializeCaseFoldTables();
+ }
+
+ int p = pp.value;
+ int foldP = 0;
+
+ int code = mbcToCode(bytes, p, end);
+ int len = length(bytes[p]);
+ pp.value += len;
+
+ if (Config.USE_UNICODE_CASE_FOLD_TURKISH_AZERI) {
+ if ((flag & Config.ENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ if (code == 0x0049) {
+ return codeToMbc(0x0131, fold, foldP);
+ } else if (code == 0x0130) {
+ return codeToMbc(0x0069, fold, foldP);
+ }
+ }
+ }
+
+ int to[] = UnicodeCaseFolds.FoldHash.get(code);
+ if (to != null) {
+ if (to.length == 1) {
+ return codeToMbc(to[0], fold, foldP);
+ } else {
+ int rlen = 0;
+ for (int i=0; i<to.length; i++) {
+ len = codeToMbc(to[i], fold, foldP);
+ foldP += len;
+ rlen += len;
+ }
+ return rlen;
+ }
+ }
+
+ for (int i=0; i<len; i++) {
+ fold[foldP++] = bytes[p++];
+ }
+ return len;
+ }
+
+ // onigenc_unicode_apply_all_case_fold
+ @Override
+ public void applyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, Object arg) {
+ /* if (CaseFoldInited == 0) init_case_fold_table(); */
+
+ int[]code = new int[]{0};
+ for (int i=0; i<UnicodeCaseFolds.CaseUnfold_11_From.length; i++) {
+ int from = UnicodeCaseFolds.CaseUnfold_11_From[i];
+ int[]to = UnicodeCaseFolds.CaseUnfold_11_To[i];
+
+ for (int j=0; j<to.length; j++) {
+ code[0] = from;
+ fun.apply(to[j], code, 1, arg);
+
+ code[0] = to[j];
+ fun.apply(from, code, 1, arg);
+
+ for (int k=0; k<j; k++) {
+ code[0] = to[k];
+ fun.apply(to[j], code, 1, arg);
+
+ code[0] = to[j];
+ fun.apply(to[k], code, 1, arg);
+ }
+
+ }
+ }
+
+ if (Config.USE_UNICODE_CASE_FOLD_TURKISH_AZERI && (flag & Config.ENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ code[0] = 0x0131;
+ fun.apply(0x0049, code, 1, arg);
+ code[0] = 0x0049;
+ fun.apply(0x0131, code, 1, arg);
+ code[0] = 0x0130;
+ fun.apply(0x0069, code, 1, arg);
+ code[0] = 0x0069;
+ fun.apply(0x0130, code, 1, arg);
+ } else {
+ for (int i=0; i<UnicodeCaseFolds.CaseUnfold_11_Locale_From.length; i++) {
+ int from = UnicodeCaseFolds.CaseUnfold_11_Locale_From[i];
+ int[]to = UnicodeCaseFolds.CaseUnfold_11_Locale_To[i];
+
+ for (int j=0; j<to.length; j++) {
+ code[0] = from;
+ fun.apply(to[j], code, 1, arg);
+
+ code[0] = to[j];
+ fun.apply(from, code, 1, arg);
+
+ for (int k = 0; k<j; k++) {
+ code[0] = to[k];
+ fun.apply(to[j], code, 1, arg);
+
+ code[0] = to[j];
+ fun.apply(to[k], code, 1, arg);
+ }
+ }
+ }
+ } // USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+
+ if ((flag & Config.INTERNAL_ENC_CASE_FOLD_MULTI_CHAR) != 0) {
+ for (int i=0; i<UnicodeCaseFolds.CaseUnfold_12.length; i+=2) {
+ int[]from = UnicodeCaseFolds.CaseUnfold_12[i];
+ int[]to = UnicodeCaseFolds.CaseUnfold_12[i + 1];
+ for (int j=0; j<to.length; j++) {
+ fun.apply(to[j], from, 2, arg);
+
+ for (int k=0; k<to.length; k++) {
+ if (k == j) continue;
+ code[0] = to[k];
+ fun.apply(to[j], code, 1, arg);
+ }
+ }
+ }
+
+ if (!Config.USE_UNICODE_CASE_FOLD_TURKISH_AZERI || (flag & Config.ENC_CASE_FOLD_TURKISH_AZERI) == 0) {
+ for (int i=0; i<UnicodeCaseFolds.CaseUnfold_12_Locale.length; i+=2) {
+ int[]from = UnicodeCaseFolds.CaseUnfold_12_Locale[i];
+ int[]to = UnicodeCaseFolds.CaseUnfold_12_Locale[i + 1];
+ for (int j=0; j<to.length; j++) {
+ fun.apply(to[j], from, 2, arg);
+
+ for (int k=0; k<to.length; k++) {
+ if (k == j) continue;
+ code[0] = to[k];
+ fun.apply(to[j], code, 1, arg);
+ }
+ }
+ }
+ } // !USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+
+ for (int i=0; i<UnicodeCaseFolds.CaseUnfold_13.length; i+=2) {
+ int[]from = UnicodeCaseFolds.CaseUnfold_13[i];
+ int[]to = UnicodeCaseFolds.CaseUnfold_13[i + 1];
+
+ for (int j=0; j<to.length; j++) {
+ fun.apply(to[j], from, 3, arg); //// ????
+
+ for (int k=0; k<to.length; k++) {
+ if (k == j) continue;
+ code[0] = to[k];
+ fun.apply(to[j], code, 1, arg);
+ }
+ }
+ }
+
+ } // INTERNAL_ENC_CASE_FOLD_MULTI_CHAR
+ }
+
+ // onigenc_unicode_get_case_fold_codes_by_str
+ @Override
+ public CaseFoldCodeItem[]caseFoldCodesByString(int flag, byte[]bytes, int p, int end) {
+ if (UnicodeCaseFolds.FoldHash == null) synchronized (getClass()) {
+ UnicodeCaseFolds.initializeCaseFoldTables();
+ }
+ int code = mbcToCode(bytes, p, end);
+ int len = length(bytes[p]);
+
+ if (Config.USE_UNICODE_CASE_FOLD_TURKISH_AZERI) {
+ if ((flag & Config.ENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ if (code == 0x0049) {
+ return new CaseFoldCodeItem[]{new CaseFoldCodeItem(len, 1, new int[]{0x0131})};
+ } else if(code == 0x0130) {
+ return new CaseFoldCodeItem[]{new CaseFoldCodeItem(len, 1, new int[]{0x0069})};
+ } else if(code == 0x0131) {
+ return new CaseFoldCodeItem[]{new CaseFoldCodeItem(len, 1, new int[]{0x0049})};
+ } else if(code == 0x0069) {
+ return new CaseFoldCodeItem[]{new CaseFoldCodeItem(len, 1, new int[]{0x0130})};
+ }
+ }
+ } // USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+
+ int n = 0;
+ int fn = 0;
+
+ int[]to = UnicodeCaseFolds.FoldHash.get(code);
+ CaseFoldCodeItem[]items = null;
+ if (to != null) {
+ items = new CaseFoldCodeItem[Config.ENC_GET_CASE_FOLD_CODES_MAX_NUM];
+
+ if (to.length == 1) {
+ int origCode = code;
+
+ items[0] = new CaseFoldCodeItem(len, 1, new int[]{to[0]});
+ n++;
+
+ code = to[0];
+ to = UnicodeCaseFolds.Unfold1Hash.get(code);
+ if (to != null) {
+ for (int i=0; i<to.length; i++) {
+ if (to[i] != origCode) {
+ items[n] = new CaseFoldCodeItem(len, 1, new int[]{to[i]});
+ n++;
+ }
+ }
+ }
+ } else if ((flag & Config.INTERNAL_ENC_CASE_FOLD_MULTI_CHAR) != 0) {
+ int[][]cs = new int[3][4];
+ int[]ncs = new int[3];
+
+ for (fn=0; fn<to.length; fn++) {
+ cs[fn][0] = to[fn];
+ int[]z3 = UnicodeCaseFolds.Unfold1Hash.get(cs[fn][0]);
+ if (z3 != null) {
+ for (int i=0; i<z3.length; i++) {
+ cs[fn][i+1] = z3[i];
+ }
+ ncs[fn] = z3.length + 1;
+ } else {
+ ncs[fn] = 1;
+ }
+ }
+
+ if (fn == 2) {
+ for (int i=0; i<ncs[0]; i++) {
+ for (int j=0; j<ncs[1]; j++) {
+ items[n] = new CaseFoldCodeItem(len, 2, new int[]{cs[0][i], cs[1][j]});
+ n++;
+ }
+ }
+
+ int[]z2 = UnicodeCaseFolds.Unfold2Hash.get(to);
+ if (z2 != null) {
+ for (int i=0; i<z2.length; i++) {
+ if (z2[i] == code) continue;
+ items[n] = new CaseFoldCodeItem(len, 1, new int[]{z2[i]});
+ n++;
+ }
+ }
+ } else {
+ for (int i=0; i<ncs[0]; i++) {
+ for (int j=0; j<ncs[1]; j++) {
+ for (int k=0; k<ncs[2]; k++) {
+ items[n] = new CaseFoldCodeItem(len, 3, new int[]{cs[0][i], cs[1][j], cs[2][k]});
+ n++;
+ }
+ }
+ }
+ int[]z2 = UnicodeCaseFolds.Unfold3Hash.get(to);
+ if (z2 != null) {
+ for (int i=0; i<z2.length; i++) {
+ if (z2[i] == code) continue;
+ items[n] = new CaseFoldCodeItem(len, 1, new int[]{z2[i]});
+ n++;
+ }
+ }
+ }
+ /* multi char folded code is not head of another folded multi char */
+ flag = 0; /* DISABLE_CASE_FOLD_MULTI_CHAR(flag); */
+ }
+ } else {
+ to = UnicodeCaseFolds.Unfold1Hash.get(code);
+ if (to != null) {
+ items = new CaseFoldCodeItem[Config.ENC_GET_CASE_FOLD_CODES_MAX_NUM];
+ for (int i=0; i<to.length; i++) {
+ items[n] = new CaseFoldCodeItem(len, 1, new int[]{to[i]});
+ n++;
+ }
+ }
+ }
+
+ if ((flag & Config.INTERNAL_ENC_CASE_FOLD_MULTI_CHAR) != 0) {
+ if (items == null) items = new CaseFoldCodeItem[Config.ENC_GET_CASE_FOLD_CODES_MAX_NUM];
+
+ p += len;
+ if (p < end) {
+ int[]codes = new int[3];
+ codes[0] = code;
+ code = mbcToCode(bytes, p, end);
+ to = UnicodeCaseFolds.FoldHash.get(code);
+ if (to != null && to.length == 1) {
+ codes[1] = to[0];
+ } else {
+ codes[1] = code;
+ }
+
+ int clen = length(bytes[p]);
+ len += clen;
+ int[]z2 = UnicodeCaseFolds.Unfold2Hash.get(codes);
+ if (z2 != null) {
+ for (int i=0; i<z2.length; i++) {
+ items[n] = new CaseFoldCodeItem(len, 1, new int[]{z2[i]});
+ n++;
+ }
+ }
+ p += clen;
+ if (p < end) {
+ code = mbcToCode(bytes, p, end);
+ to = UnicodeCaseFolds.FoldHash.get(code);
+ if (to != null && to.length == 1) {
+ codes[2] = to[0];
+ } else {
+ codes[2] = code;
+ }
+ clen = length(bytes[p]);
+ len += clen;
+ z2 = UnicodeCaseFolds.Unfold3Hash.get(codes);
+ if (z2 != null) {
+ for (int i=0; i<z2.length; i++) {
+ items[n] = new CaseFoldCodeItem(len, 1, new int[]{z2[i]});
+ n++;
+ }
+ }
+ }
+ }
+ }
+
+ if (items == null || n == 0) return EMPTY_FOLD_CODES;
+ if (n < items.length) {
+ CaseFoldCodeItem [] tmp = new CaseFoldCodeItem[n];
+ System.arraycopy(items, 0, tmp, 0, n);
+ return tmp;
+ } else {
+ return items;
+ }
+ }
+
+ static final short UNICODE_ISO_8859_1_CTypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
+ };
+
+}
diff --git a/src/org/joni/encoding/unicode/UnicodeProperties.java b/src/org/joni/encoding/unicode/UnicodeProperties.java
new file mode 100644
index 0000000..8b6516a
--- /dev/null
+++ b/src/org/joni/encoding/unicode/UnicodeProperties.java
@@ -0,0 +1,4270 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.unicode;
+
+public interface UnicodeProperties {
+ // #ifdef USE_UNICODE_PROPERTIES
+
+ /* 'Any': - */
+ static final int CR_Any[] = {
+ 1,
+ 0x0000, 0x10ffff
+ }; /* CR_Any */
+
+ /* 'Assigned': - */
+ static final int CR_Assigned[] = {
+ 420,
+ 0x0000, 0x0241,
+ 0x0250, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060b, 0x0615,
+ 0x061b, 0x061b,
+ 0x061e, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x065e,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x076d,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x097d, 0x097d,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fd1,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fc,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x137c,
+ 0x1380, 0x1399,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1680, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19d9,
+ 0x19de, 0x1a1b,
+ 0x1a1e, 0x1a1f,
+ 0x1d00, 0x1dc3,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2000, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x2090, 0x2094,
+ 0x20a0, 0x20b5,
+ 0x20d0, 0x20eb,
+ 0x2100, 0x214c,
+ 0x2153, 0x2183,
+ 0x2190, 0x23db,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x269c,
+ 0x26a0, 0x26b1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27c0, 0x27c6,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b13,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c80, 0x2cea,
+ 0x2cf9, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2e00, 0x2e17,
+ 0x2e1c, 0x2e1d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3000, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31c0, 0x31cf,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fbb,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xa700, 0xa716,
+ 0xa800, 0xa82b,
+ 0xac00, 0xd7a3,
+ 0xd800, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe19,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1018a,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x103c3,
+ 0x103c8, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a47,
+ 0x10a50, 0x10a58,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d200, 0x1d245,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+ }; /* CR_Assigned */
+
+ /* 'C': Major Category */
+ static final int CR_C[] = {
+ 422,
+ 0x0000, 0x001f,
+ 0x007f, 0x009f,
+ 0x00ad, 0x00ad,
+ 0x0242, 0x024f,
+ 0x0370, 0x0373,
+ 0x0376, 0x0379,
+ 0x037b, 0x037d,
+ 0x037f, 0x0383,
+ 0x038b, 0x038b,
+ 0x038d, 0x038d,
+ 0x03a2, 0x03a2,
+ 0x03cf, 0x03cf,
+ 0x0487, 0x0487,
+ 0x04cf, 0x04cf,
+ 0x04fa, 0x04ff,
+ 0x0510, 0x0530,
+ 0x0557, 0x0558,
+ 0x0560, 0x0560,
+ 0x0588, 0x0588,
+ 0x058b, 0x0590,
+ 0x05ba, 0x05ba,
+ 0x05c8, 0x05cf,
+ 0x05eb, 0x05ef,
+ 0x05f5, 0x060a,
+ 0x0616, 0x061a,
+ 0x061c, 0x061d,
+ 0x0620, 0x0620,
+ 0x063b, 0x063f,
+ 0x065f, 0x065f,
+ 0x06dd, 0x06dd,
+ 0x070e, 0x070f,
+ 0x074b, 0x074c,
+ 0x076e, 0x077f,
+ 0x07b2, 0x0900,
+ 0x093a, 0x093b,
+ 0x094e, 0x094f,
+ 0x0955, 0x0957,
+ 0x0971, 0x097c,
+ 0x097e, 0x0980,
+ 0x0984, 0x0984,
+ 0x098d, 0x098e,
+ 0x0991, 0x0992,
+ 0x09a9, 0x09a9,
+ 0x09b1, 0x09b1,
+ 0x09b3, 0x09b5,
+ 0x09ba, 0x09bb,
+ 0x09c5, 0x09c6,
+ 0x09c9, 0x09ca,
+ 0x09cf, 0x09d6,
+ 0x09d8, 0x09db,
+ 0x09de, 0x09de,
+ 0x09e4, 0x09e5,
+ 0x09fb, 0x0a00,
+ 0x0a04, 0x0a04,
+ 0x0a0b, 0x0a0e,
+ 0x0a11, 0x0a12,
+ 0x0a29, 0x0a29,
+ 0x0a31, 0x0a31,
+ 0x0a34, 0x0a34,
+ 0x0a37, 0x0a37,
+ 0x0a3a, 0x0a3b,
+ 0x0a3d, 0x0a3d,
+ 0x0a43, 0x0a46,
+ 0x0a49, 0x0a4a,
+ 0x0a4e, 0x0a58,
+ 0x0a5d, 0x0a5d,
+ 0x0a5f, 0x0a65,
+ 0x0a75, 0x0a80,
+ 0x0a84, 0x0a84,
+ 0x0a8e, 0x0a8e,
+ 0x0a92, 0x0a92,
+ 0x0aa9, 0x0aa9,
+ 0x0ab1, 0x0ab1,
+ 0x0ab4, 0x0ab4,
+ 0x0aba, 0x0abb,
+ 0x0ac6, 0x0ac6,
+ 0x0aca, 0x0aca,
+ 0x0ace, 0x0acf,
+ 0x0ad1, 0x0adf,
+ 0x0ae4, 0x0ae5,
+ 0x0af0, 0x0af0,
+ 0x0af2, 0x0b00,
+ 0x0b04, 0x0b04,
+ 0x0b0d, 0x0b0e,
+ 0x0b11, 0x0b12,
+ 0x0b29, 0x0b29,
+ 0x0b31, 0x0b31,
+ 0x0b34, 0x0b34,
+ 0x0b3a, 0x0b3b,
+ 0x0b44, 0x0b46,
+ 0x0b49, 0x0b4a,
+ 0x0b4e, 0x0b55,
+ 0x0b58, 0x0b5b,
+ 0x0b5e, 0x0b5e,
+ 0x0b62, 0x0b65,
+ 0x0b72, 0x0b81,
+ 0x0b84, 0x0b84,
+ 0x0b8b, 0x0b8d,
+ 0x0b91, 0x0b91,
+ 0x0b96, 0x0b98,
+ 0x0b9b, 0x0b9b,
+ 0x0b9d, 0x0b9d,
+ 0x0ba0, 0x0ba2,
+ 0x0ba5, 0x0ba7,
+ 0x0bab, 0x0bad,
+ 0x0bba, 0x0bbd,
+ 0x0bc3, 0x0bc5,
+ 0x0bc9, 0x0bc9,
+ 0x0bce, 0x0bd6,
+ 0x0bd8, 0x0be5,
+ 0x0bfb, 0x0c00,
+ 0x0c04, 0x0c04,
+ 0x0c0d, 0x0c0d,
+ 0x0c11, 0x0c11,
+ 0x0c29, 0x0c29,
+ 0x0c34, 0x0c34,
+ 0x0c3a, 0x0c3d,
+ 0x0c45, 0x0c45,
+ 0x0c49, 0x0c49,
+ 0x0c4e, 0x0c54,
+ 0x0c57, 0x0c5f,
+ 0x0c62, 0x0c65,
+ 0x0c70, 0x0c81,
+ 0x0c84, 0x0c84,
+ 0x0c8d, 0x0c8d,
+ 0x0c91, 0x0c91,
+ 0x0ca9, 0x0ca9,
+ 0x0cb4, 0x0cb4,
+ 0x0cba, 0x0cbb,
+ 0x0cc5, 0x0cc5,
+ 0x0cc9, 0x0cc9,
+ 0x0cce, 0x0cd4,
+ 0x0cd7, 0x0cdd,
+ 0x0cdf, 0x0cdf,
+ 0x0ce2, 0x0ce5,
+ 0x0cf0, 0x0d01,
+ 0x0d04, 0x0d04,
+ 0x0d0d, 0x0d0d,
+ 0x0d11, 0x0d11,
+ 0x0d29, 0x0d29,
+ 0x0d3a, 0x0d3d,
+ 0x0d44, 0x0d45,
+ 0x0d49, 0x0d49,
+ 0x0d4e, 0x0d56,
+ 0x0d58, 0x0d5f,
+ 0x0d62, 0x0d65,
+ 0x0d70, 0x0d81,
+ 0x0d84, 0x0d84,
+ 0x0d97, 0x0d99,
+ 0x0db2, 0x0db2,
+ 0x0dbc, 0x0dbc,
+ 0x0dbe, 0x0dbf,
+ 0x0dc7, 0x0dc9,
+ 0x0dcb, 0x0dce,
+ 0x0dd5, 0x0dd5,
+ 0x0dd7, 0x0dd7,
+ 0x0de0, 0x0df1,
+ 0x0df5, 0x0e00,
+ 0x0e3b, 0x0e3e,
+ 0x0e5c, 0x0e80,
+ 0x0e83, 0x0e83,
+ 0x0e85, 0x0e86,
+ 0x0e89, 0x0e89,
+ 0x0e8b, 0x0e8c,
+ 0x0e8e, 0x0e93,
+ 0x0e98, 0x0e98,
+ 0x0ea0, 0x0ea0,
+ 0x0ea4, 0x0ea4,
+ 0x0ea6, 0x0ea6,
+ 0x0ea8, 0x0ea9,
+ 0x0eac, 0x0eac,
+ 0x0eba, 0x0eba,
+ 0x0ebe, 0x0ebf,
+ 0x0ec5, 0x0ec5,
+ 0x0ec7, 0x0ec7,
+ 0x0ece, 0x0ecf,
+ 0x0eda, 0x0edb,
+ 0x0ede, 0x0eff,
+ 0x0f48, 0x0f48,
+ 0x0f6b, 0x0f70,
+ 0x0f8c, 0x0f8f,
+ 0x0f98, 0x0f98,
+ 0x0fbd, 0x0fbd,
+ 0x0fcd, 0x0fce,
+ 0x0fd2, 0x0fff,
+ 0x1022, 0x1022,
+ 0x1028, 0x1028,
+ 0x102b, 0x102b,
+ 0x1033, 0x1035,
+ 0x103a, 0x103f,
+ 0x105a, 0x109f,
+ 0x10c6, 0x10cf,
+ 0x10fd, 0x10ff,
+ 0x115a, 0x115e,
+ 0x11a3, 0x11a7,
+ 0x11fa, 0x11ff,
+ 0x1249, 0x1249,
+ 0x124e, 0x124f,
+ 0x1257, 0x1257,
+ 0x1259, 0x1259,
+ 0x125e, 0x125f,
+ 0x1289, 0x1289,
+ 0x128e, 0x128f,
+ 0x12b1, 0x12b1,
+ 0x12b6, 0x12b7,
+ 0x12bf, 0x12bf,
+ 0x12c1, 0x12c1,
+ 0x12c6, 0x12c7,
+ 0x12d7, 0x12d7,
+ 0x1311, 0x1311,
+ 0x1316, 0x1317,
+ 0x135b, 0x135e,
+ 0x137d, 0x137f,
+ 0x139a, 0x139f,
+ 0x13f5, 0x1400,
+ 0x1677, 0x167f,
+ 0x169d, 0x169f,
+ 0x16f1, 0x16ff,
+ 0x170d, 0x170d,
+ 0x1715, 0x171f,
+ 0x1737, 0x173f,
+ 0x1754, 0x175f,
+ 0x176d, 0x176d,
+ 0x1771, 0x1771,
+ 0x1774, 0x177f,
+ 0x17b4, 0x17b5,
+ 0x17de, 0x17df,
+ 0x17ea, 0x17ef,
+ 0x17fa, 0x17ff,
+ 0x180f, 0x180f,
+ 0x181a, 0x181f,
+ 0x1878, 0x187f,
+ 0x18aa, 0x18ff,
+ 0x191d, 0x191f,
+ 0x192c, 0x192f,
+ 0x193c, 0x193f,
+ 0x1941, 0x1943,
+ 0x196e, 0x196f,
+ 0x1975, 0x197f,
+ 0x19aa, 0x19af,
+ 0x19ca, 0x19cf,
+ 0x19da, 0x19dd,
+ 0x1a1c, 0x1a1d,
+ 0x1a20, 0x1cff,
+ 0x1dc4, 0x1dff,
+ 0x1e9c, 0x1e9f,
+ 0x1efa, 0x1eff,
+ 0x1f16, 0x1f17,
+ 0x1f1e, 0x1f1f,
+ 0x1f46, 0x1f47,
+ 0x1f4e, 0x1f4f,
+ 0x1f58, 0x1f58,
+ 0x1f5a, 0x1f5a,
+ 0x1f5c, 0x1f5c,
+ 0x1f5e, 0x1f5e,
+ 0x1f7e, 0x1f7f,
+ 0x1fb5, 0x1fb5,
+ 0x1fc5, 0x1fc5,
+ 0x1fd4, 0x1fd5,
+ 0x1fdc, 0x1fdc,
+ 0x1ff0, 0x1ff1,
+ 0x1ff5, 0x1ff5,
+ 0x1fff, 0x1fff,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x206f,
+ 0x2072, 0x2073,
+ 0x208f, 0x208f,
+ 0x2095, 0x209f,
+ 0x20b6, 0x20cf,
+ 0x20ec, 0x20ff,
+ 0x214d, 0x2152,
+ 0x2184, 0x218f,
+ 0x23dc, 0x23ff,
+ 0x2427, 0x243f,
+ 0x244b, 0x245f,
+ 0x269d, 0x269f,
+ 0x26b2, 0x2700,
+ 0x2705, 0x2705,
+ 0x270a, 0x270b,
+ 0x2728, 0x2728,
+ 0x274c, 0x274c,
+ 0x274e, 0x274e,
+ 0x2753, 0x2755,
+ 0x2757, 0x2757,
+ 0x275f, 0x2760,
+ 0x2795, 0x2797,
+ 0x27b0, 0x27b0,
+ 0x27bf, 0x27bf,
+ 0x27c7, 0x27cf,
+ 0x27ec, 0x27ef,
+ 0x2b14, 0x2bff,
+ 0x2c2f, 0x2c2f,
+ 0x2c5f, 0x2c7f,
+ 0x2ceb, 0x2cf8,
+ 0x2d26, 0x2d2f,
+ 0x2d66, 0x2d6e,
+ 0x2d70, 0x2d7f,
+ 0x2d97, 0x2d9f,
+ 0x2da7, 0x2da7,
+ 0x2daf, 0x2daf,
+ 0x2db7, 0x2db7,
+ 0x2dbf, 0x2dbf,
+ 0x2dc7, 0x2dc7,
+ 0x2dcf, 0x2dcf,
+ 0x2dd7, 0x2dd7,
+ 0x2ddf, 0x2dff,
+ 0x2e18, 0x2e1b,
+ 0x2e1e, 0x2e7f,
+ 0x2e9a, 0x2e9a,
+ 0x2ef4, 0x2eff,
+ 0x2fd6, 0x2fef,
+ 0x2ffc, 0x2fff,
+ 0x3040, 0x3040,
+ 0x3097, 0x3098,
+ 0x3100, 0x3104,
+ 0x312d, 0x3130,
+ 0x318f, 0x318f,
+ 0x31b8, 0x31bf,
+ 0x31d0, 0x31ef,
+ 0x321f, 0x321f,
+ 0x3244, 0x324f,
+ 0x32ff, 0x32ff,
+ 0x4db6, 0x4dbf,
+ 0x9fbc, 0x9fff,
+ 0xa48d, 0xa48f,
+ 0xa4c7, 0xa6ff,
+ 0xa717, 0xa7ff,
+ 0xa82c, 0xabff,
+ 0xd7a4, 0xf8ff,
+ 0xfa2e, 0xfa2f,
+ 0xfa6b, 0xfa6f,
+ 0xfada, 0xfaff,
+ 0xfb07, 0xfb12,
+ 0xfb18, 0xfb1c,
+ 0xfb37, 0xfb37,
+ 0xfb3d, 0xfb3d,
+ 0xfb3f, 0xfb3f,
+ 0xfb42, 0xfb42,
+ 0xfb45, 0xfb45,
+ 0xfbb2, 0xfbd2,
+ 0xfd40, 0xfd4f,
+ 0xfd90, 0xfd91,
+ 0xfdc8, 0xfdef,
+ 0xfdfe, 0xfdff,
+ 0xfe1a, 0xfe1f,
+ 0xfe24, 0xfe2f,
+ 0xfe53, 0xfe53,
+ 0xfe67, 0xfe67,
+ 0xfe6c, 0xfe6f,
+ 0xfe75, 0xfe75,
+ 0xfefd, 0xff00,
+ 0xffbf, 0xffc1,
+ 0xffc8, 0xffc9,
+ 0xffd0, 0xffd1,
+ 0xffd8, 0xffd9,
+ 0xffdd, 0xffdf,
+ 0xffe7, 0xffe7,
+ 0xffef, 0xfffb,
+ 0xfffe, 0xffff,
+ 0x1000c, 0x1000c,
+ 0x10027, 0x10027,
+ 0x1003b, 0x1003b,
+ 0x1003e, 0x1003e,
+ 0x1004e, 0x1004f,
+ 0x1005e, 0x1007f,
+ 0x100fb, 0x100ff,
+ 0x10103, 0x10106,
+ 0x10134, 0x10136,
+ 0x1018b, 0x102ff,
+ 0x1031f, 0x1031f,
+ 0x10324, 0x1032f,
+ 0x1034b, 0x1037f,
+ 0x1039e, 0x1039e,
+ 0x103c4, 0x103c7,
+ 0x103d6, 0x103ff,
+ 0x1049e, 0x1049f,
+ 0x104aa, 0x107ff,
+ 0x10806, 0x10807,
+ 0x10809, 0x10809,
+ 0x10836, 0x10836,
+ 0x10839, 0x1083b,
+ 0x1083d, 0x1083e,
+ 0x10840, 0x109ff,
+ 0x10a04, 0x10a04,
+ 0x10a07, 0x10a0b,
+ 0x10a14, 0x10a14,
+ 0x10a18, 0x10a18,
+ 0x10a34, 0x10a37,
+ 0x10a3b, 0x10a3e,
+ 0x10a48, 0x10a4f,
+ 0x10a59, 0x1cfff,
+ 0x1d0f6, 0x1d0ff,
+ 0x1d127, 0x1d129,
+ 0x1d173, 0x1d17a,
+ 0x1d1de, 0x1d1ff,
+ 0x1d246, 0x1d2ff,
+ 0x1d357, 0x1d3ff,
+ 0x1d455, 0x1d455,
+ 0x1d49d, 0x1d49d,
+ 0x1d4a0, 0x1d4a1,
+ 0x1d4a3, 0x1d4a4,
+ 0x1d4a7, 0x1d4a8,
+ 0x1d4ad, 0x1d4ad,
+ 0x1d4ba, 0x1d4ba,
+ 0x1d4bc, 0x1d4bc,
+ 0x1d4c4, 0x1d4c4,
+ 0x1d506, 0x1d506,
+ 0x1d50b, 0x1d50c,
+ 0x1d515, 0x1d515,
+ 0x1d51d, 0x1d51d,
+ 0x1d53a, 0x1d53a,
+ 0x1d53f, 0x1d53f,
+ 0x1d545, 0x1d545,
+ 0x1d547, 0x1d549,
+ 0x1d551, 0x1d551,
+ 0x1d6a6, 0x1d6a7,
+ 0x1d7ca, 0x1d7cd,
+ 0x1d800, 0x1ffff,
+ 0x2a6d7, 0x2f7ff,
+ 0x2fa1e, 0xe00ff,
+ 0xe01f0, 0x10ffff
+ }; /* CR_C */
+
+ /* 'Cc': General Category */
+ static final int CR_Cc[] = {
+ 2,
+ 0x0000, 0x001f,
+ 0x007f, 0x009f
+ }; /* CR_Cc */
+
+ /* 'Cf': General Category */
+ static final int CR_Cf[] = {
+ 14,
+ 0x00ad, 0x00ad,
+ 0x0600, 0x0603,
+ 0x06dd, 0x06dd,
+ 0x070f, 0x070f,
+ 0x17b4, 0x17b5,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x2063,
+ 0x206a, 0x206f,
+ 0xfeff, 0xfeff,
+ 0xfff9, 0xfffb,
+ 0x1d173, 0x1d17a,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f
+ }; /* CR_Cf */
+
+ /* 'Cn': General Category */
+ static final int CR_Cn[] = {
+ 420,
+ 0x0242, 0x024f,
+ 0x0370, 0x0373,
+ 0x0376, 0x0379,
+ 0x037b, 0x037d,
+ 0x037f, 0x0383,
+ 0x038b, 0x038b,
+ 0x038d, 0x038d,
+ 0x03a2, 0x03a2,
+ 0x03cf, 0x03cf,
+ 0x0487, 0x0487,
+ 0x04cf, 0x04cf,
+ 0x04fa, 0x04ff,
+ 0x0510, 0x0530,
+ 0x0557, 0x0558,
+ 0x0560, 0x0560,
+ 0x0588, 0x0588,
+ 0x058b, 0x0590,
+ 0x05ba, 0x05ba,
+ 0x05c8, 0x05cf,
+ 0x05eb, 0x05ef,
+ 0x05f5, 0x05ff,
+ 0x0604, 0x060a,
+ 0x0616, 0x061a,
+ 0x061c, 0x061d,
+ 0x0620, 0x0620,
+ 0x063b, 0x063f,
+ 0x065f, 0x065f,
+ 0x070e, 0x070e,
+ 0x074b, 0x074c,
+ 0x076e, 0x077f,
+ 0x07b2, 0x0900,
+ 0x093a, 0x093b,
+ 0x094e, 0x094f,
+ 0x0955, 0x0957,
+ 0x0971, 0x097c,
+ 0x097e, 0x0980,
+ 0x0984, 0x0984,
+ 0x098d, 0x098e,
+ 0x0991, 0x0992,
+ 0x09a9, 0x09a9,
+ 0x09b1, 0x09b1,
+ 0x09b3, 0x09b5,
+ 0x09ba, 0x09bb,
+ 0x09c5, 0x09c6,
+ 0x09c9, 0x09ca,
+ 0x09cf, 0x09d6,
+ 0x09d8, 0x09db,
+ 0x09de, 0x09de,
+ 0x09e4, 0x09e5,
+ 0x09fb, 0x0a00,
+ 0x0a04, 0x0a04,
+ 0x0a0b, 0x0a0e,
+ 0x0a11, 0x0a12,
+ 0x0a29, 0x0a29,
+ 0x0a31, 0x0a31,
+ 0x0a34, 0x0a34,
+ 0x0a37, 0x0a37,
+ 0x0a3a, 0x0a3b,
+ 0x0a3d, 0x0a3d,
+ 0x0a43, 0x0a46,
+ 0x0a49, 0x0a4a,
+ 0x0a4e, 0x0a58,
+ 0x0a5d, 0x0a5d,
+ 0x0a5f, 0x0a65,
+ 0x0a75, 0x0a80,
+ 0x0a84, 0x0a84,
+ 0x0a8e, 0x0a8e,
+ 0x0a92, 0x0a92,
+ 0x0aa9, 0x0aa9,
+ 0x0ab1, 0x0ab1,
+ 0x0ab4, 0x0ab4,
+ 0x0aba, 0x0abb,
+ 0x0ac6, 0x0ac6,
+ 0x0aca, 0x0aca,
+ 0x0ace, 0x0acf,
+ 0x0ad1, 0x0adf,
+ 0x0ae4, 0x0ae5,
+ 0x0af0, 0x0af0,
+ 0x0af2, 0x0b00,
+ 0x0b04, 0x0b04,
+ 0x0b0d, 0x0b0e,
+ 0x0b11, 0x0b12,
+ 0x0b29, 0x0b29,
+ 0x0b31, 0x0b31,
+ 0x0b34, 0x0b34,
+ 0x0b3a, 0x0b3b,
+ 0x0b44, 0x0b46,
+ 0x0b49, 0x0b4a,
+ 0x0b4e, 0x0b55,
+ 0x0b58, 0x0b5b,
+ 0x0b5e, 0x0b5e,
+ 0x0b62, 0x0b65,
+ 0x0b72, 0x0b81,
+ 0x0b84, 0x0b84,
+ 0x0b8b, 0x0b8d,
+ 0x0b91, 0x0b91,
+ 0x0b96, 0x0b98,
+ 0x0b9b, 0x0b9b,
+ 0x0b9d, 0x0b9d,
+ 0x0ba0, 0x0ba2,
+ 0x0ba5, 0x0ba7,
+ 0x0bab, 0x0bad,
+ 0x0bba, 0x0bbd,
+ 0x0bc3, 0x0bc5,
+ 0x0bc9, 0x0bc9,
+ 0x0bce, 0x0bd6,
+ 0x0bd8, 0x0be5,
+ 0x0bfb, 0x0c00,
+ 0x0c04, 0x0c04,
+ 0x0c0d, 0x0c0d,
+ 0x0c11, 0x0c11,
+ 0x0c29, 0x0c29,
+ 0x0c34, 0x0c34,
+ 0x0c3a, 0x0c3d,
+ 0x0c45, 0x0c45,
+ 0x0c49, 0x0c49,
+ 0x0c4e, 0x0c54,
+ 0x0c57, 0x0c5f,
+ 0x0c62, 0x0c65,
+ 0x0c70, 0x0c81,
+ 0x0c84, 0x0c84,
+ 0x0c8d, 0x0c8d,
+ 0x0c91, 0x0c91,
+ 0x0ca9, 0x0ca9,
+ 0x0cb4, 0x0cb4,
+ 0x0cba, 0x0cbb,
+ 0x0cc5, 0x0cc5,
+ 0x0cc9, 0x0cc9,
+ 0x0cce, 0x0cd4,
+ 0x0cd7, 0x0cdd,
+ 0x0cdf, 0x0cdf,
+ 0x0ce2, 0x0ce5,
+ 0x0cf0, 0x0d01,
+ 0x0d04, 0x0d04,
+ 0x0d0d, 0x0d0d,
+ 0x0d11, 0x0d11,
+ 0x0d29, 0x0d29,
+ 0x0d3a, 0x0d3d,
+ 0x0d44, 0x0d45,
+ 0x0d49, 0x0d49,
+ 0x0d4e, 0x0d56,
+ 0x0d58, 0x0d5f,
+ 0x0d62, 0x0d65,
+ 0x0d70, 0x0d81,
+ 0x0d84, 0x0d84,
+ 0x0d97, 0x0d99,
+ 0x0db2, 0x0db2,
+ 0x0dbc, 0x0dbc,
+ 0x0dbe, 0x0dbf,
+ 0x0dc7, 0x0dc9,
+ 0x0dcb, 0x0dce,
+ 0x0dd5, 0x0dd5,
+ 0x0dd7, 0x0dd7,
+ 0x0de0, 0x0df1,
+ 0x0df5, 0x0e00,
+ 0x0e3b, 0x0e3e,
+ 0x0e5c, 0x0e80,
+ 0x0e83, 0x0e83,
+ 0x0e85, 0x0e86,
+ 0x0e89, 0x0e89,
+ 0x0e8b, 0x0e8c,
+ 0x0e8e, 0x0e93,
+ 0x0e98, 0x0e98,
+ 0x0ea0, 0x0ea0,
+ 0x0ea4, 0x0ea4,
+ 0x0ea6, 0x0ea6,
+ 0x0ea8, 0x0ea9,
+ 0x0eac, 0x0eac,
+ 0x0eba, 0x0eba,
+ 0x0ebe, 0x0ebf,
+ 0x0ec5, 0x0ec5,
+ 0x0ec7, 0x0ec7,
+ 0x0ece, 0x0ecf,
+ 0x0eda, 0x0edb,
+ 0x0ede, 0x0eff,
+ 0x0f48, 0x0f48,
+ 0x0f6b, 0x0f70,
+ 0x0f8c, 0x0f8f,
+ 0x0f98, 0x0f98,
+ 0x0fbd, 0x0fbd,
+ 0x0fcd, 0x0fce,
+ 0x0fd2, 0x0fff,
+ 0x1022, 0x1022,
+ 0x1028, 0x1028,
+ 0x102b, 0x102b,
+ 0x1033, 0x1035,
+ 0x103a, 0x103f,
+ 0x105a, 0x109f,
+ 0x10c6, 0x10cf,
+ 0x10fd, 0x10ff,
+ 0x115a, 0x115e,
+ 0x11a3, 0x11a7,
+ 0x11fa, 0x11ff,
+ 0x1249, 0x1249,
+ 0x124e, 0x124f,
+ 0x1257, 0x1257,
+ 0x1259, 0x1259,
+ 0x125e, 0x125f,
+ 0x1289, 0x1289,
+ 0x128e, 0x128f,
+ 0x12b1, 0x12b1,
+ 0x12b6, 0x12b7,
+ 0x12bf, 0x12bf,
+ 0x12c1, 0x12c1,
+ 0x12c6, 0x12c7,
+ 0x12d7, 0x12d7,
+ 0x1311, 0x1311,
+ 0x1316, 0x1317,
+ 0x135b, 0x135e,
+ 0x137d, 0x137f,
+ 0x139a, 0x139f,
+ 0x13f5, 0x1400,
+ 0x1677, 0x167f,
+ 0x169d, 0x169f,
+ 0x16f1, 0x16ff,
+ 0x170d, 0x170d,
+ 0x1715, 0x171f,
+ 0x1737, 0x173f,
+ 0x1754, 0x175f,
+ 0x176d, 0x176d,
+ 0x1771, 0x1771,
+ 0x1774, 0x177f,
+ 0x17de, 0x17df,
+ 0x17ea, 0x17ef,
+ 0x17fa, 0x17ff,
+ 0x180f, 0x180f,
+ 0x181a, 0x181f,
+ 0x1878, 0x187f,
+ 0x18aa, 0x18ff,
+ 0x191d, 0x191f,
+ 0x192c, 0x192f,
+ 0x193c, 0x193f,
+ 0x1941, 0x1943,
+ 0x196e, 0x196f,
+ 0x1975, 0x197f,
+ 0x19aa, 0x19af,
+ 0x19ca, 0x19cf,
+ 0x19da, 0x19dd,
+ 0x1a1c, 0x1a1d,
+ 0x1a20, 0x1cff,
+ 0x1dc4, 0x1dff,
+ 0x1e9c, 0x1e9f,
+ 0x1efa, 0x1eff,
+ 0x1f16, 0x1f17,
+ 0x1f1e, 0x1f1f,
+ 0x1f46, 0x1f47,
+ 0x1f4e, 0x1f4f,
+ 0x1f58, 0x1f58,
+ 0x1f5a, 0x1f5a,
+ 0x1f5c, 0x1f5c,
+ 0x1f5e, 0x1f5e,
+ 0x1f7e, 0x1f7f,
+ 0x1fb5, 0x1fb5,
+ 0x1fc5, 0x1fc5,
+ 0x1fd4, 0x1fd5,
+ 0x1fdc, 0x1fdc,
+ 0x1ff0, 0x1ff1,
+ 0x1ff5, 0x1ff5,
+ 0x1fff, 0x1fff,
+ 0x2064, 0x2069,
+ 0x2072, 0x2073,
+ 0x208f, 0x208f,
+ 0x2095, 0x209f,
+ 0x20b6, 0x20cf,
+ 0x20ec, 0x20ff,
+ 0x214d, 0x2152,
+ 0x2184, 0x218f,
+ 0x23dc, 0x23ff,
+ 0x2427, 0x243f,
+ 0x244b, 0x245f,
+ 0x269d, 0x269f,
+ 0x26b2, 0x2700,
+ 0x2705, 0x2705,
+ 0x270a, 0x270b,
+ 0x2728, 0x2728,
+ 0x274c, 0x274c,
+ 0x274e, 0x274e,
+ 0x2753, 0x2755,
+ 0x2757, 0x2757,
+ 0x275f, 0x2760,
+ 0x2795, 0x2797,
+ 0x27b0, 0x27b0,
+ 0x27bf, 0x27bf,
+ 0x27c7, 0x27cf,
+ 0x27ec, 0x27ef,
+ 0x2b14, 0x2bff,
+ 0x2c2f, 0x2c2f,
+ 0x2c5f, 0x2c7f,
+ 0x2ceb, 0x2cf8,
+ 0x2d26, 0x2d2f,
+ 0x2d66, 0x2d6e,
+ 0x2d70, 0x2d7f,
+ 0x2d97, 0x2d9f,
+ 0x2da7, 0x2da7,
+ 0x2daf, 0x2daf,
+ 0x2db7, 0x2db7,
+ 0x2dbf, 0x2dbf,
+ 0x2dc7, 0x2dc7,
+ 0x2dcf, 0x2dcf,
+ 0x2dd7, 0x2dd7,
+ 0x2ddf, 0x2dff,
+ 0x2e18, 0x2e1b,
+ 0x2e1e, 0x2e7f,
+ 0x2e9a, 0x2e9a,
+ 0x2ef4, 0x2eff,
+ 0x2fd6, 0x2fef,
+ 0x2ffc, 0x2fff,
+ 0x3040, 0x3040,
+ 0x3097, 0x3098,
+ 0x3100, 0x3104,
+ 0x312d, 0x3130,
+ 0x318f, 0x318f,
+ 0x31b8, 0x31bf,
+ 0x31d0, 0x31ef,
+ 0x321f, 0x321f,
+ 0x3244, 0x324f,
+ 0x32ff, 0x32ff,
+ 0x4db6, 0x4dbf,
+ 0x9fbc, 0x9fff,
+ 0xa48d, 0xa48f,
+ 0xa4c7, 0xa6ff,
+ 0xa717, 0xa7ff,
+ 0xa82c, 0xabff,
+ 0xd7a4, 0xd7ff,
+ 0xfa2e, 0xfa2f,
+ 0xfa6b, 0xfa6f,
+ 0xfada, 0xfaff,
+ 0xfb07, 0xfb12,
+ 0xfb18, 0xfb1c,
+ 0xfb37, 0xfb37,
+ 0xfb3d, 0xfb3d,
+ 0xfb3f, 0xfb3f,
+ 0xfb42, 0xfb42,
+ 0xfb45, 0xfb45,
+ 0xfbb2, 0xfbd2,
+ 0xfd40, 0xfd4f,
+ 0xfd90, 0xfd91,
+ 0xfdc8, 0xfdef,
+ 0xfdfe, 0xfdff,
+ 0xfe1a, 0xfe1f,
+ 0xfe24, 0xfe2f,
+ 0xfe53, 0xfe53,
+ 0xfe67, 0xfe67,
+ 0xfe6c, 0xfe6f,
+ 0xfe75, 0xfe75,
+ 0xfefd, 0xfefe,
+ 0xff00, 0xff00,
+ 0xffbf, 0xffc1,
+ 0xffc8, 0xffc9,
+ 0xffd0, 0xffd1,
+ 0xffd8, 0xffd9,
+ 0xffdd, 0xffdf,
+ 0xffe7, 0xffe7,
+ 0xffef, 0xfff8,
+ 0xfffe, 0xffff,
+ 0x1000c, 0x1000c,
+ 0x10027, 0x10027,
+ 0x1003b, 0x1003b,
+ 0x1003e, 0x1003e,
+ 0x1004e, 0x1004f,
+ 0x1005e, 0x1007f,
+ 0x100fb, 0x100ff,
+ 0x10103, 0x10106,
+ 0x10134, 0x10136,
+ 0x1018b, 0x102ff,
+ 0x1031f, 0x1031f,
+ 0x10324, 0x1032f,
+ 0x1034b, 0x1037f,
+ 0x1039e, 0x1039e,
+ 0x103c4, 0x103c7,
+ 0x103d6, 0x103ff,
+ 0x1049e, 0x1049f,
+ 0x104aa, 0x107ff,
+ 0x10806, 0x10807,
+ 0x10809, 0x10809,
+ 0x10836, 0x10836,
+ 0x10839, 0x1083b,
+ 0x1083d, 0x1083e,
+ 0x10840, 0x109ff,
+ 0x10a04, 0x10a04,
+ 0x10a07, 0x10a0b,
+ 0x10a14, 0x10a14,
+ 0x10a18, 0x10a18,
+ 0x10a34, 0x10a37,
+ 0x10a3b, 0x10a3e,
+ 0x10a48, 0x10a4f,
+ 0x10a59, 0x1cfff,
+ 0x1d0f6, 0x1d0ff,
+ 0x1d127, 0x1d129,
+ 0x1d1de, 0x1d1ff,
+ 0x1d246, 0x1d2ff,
+ 0x1d357, 0x1d3ff,
+ 0x1d455, 0x1d455,
+ 0x1d49d, 0x1d49d,
+ 0x1d4a0, 0x1d4a1,
+ 0x1d4a3, 0x1d4a4,
+ 0x1d4a7, 0x1d4a8,
+ 0x1d4ad, 0x1d4ad,
+ 0x1d4ba, 0x1d4ba,
+ 0x1d4bc, 0x1d4bc,
+ 0x1d4c4, 0x1d4c4,
+ 0x1d506, 0x1d506,
+ 0x1d50b, 0x1d50c,
+ 0x1d515, 0x1d515,
+ 0x1d51d, 0x1d51d,
+ 0x1d53a, 0x1d53a,
+ 0x1d53f, 0x1d53f,
+ 0x1d545, 0x1d545,
+ 0x1d547, 0x1d549,
+ 0x1d551, 0x1d551,
+ 0x1d6a6, 0x1d6a7,
+ 0x1d7ca, 0x1d7cd,
+ 0x1d800, 0x1ffff,
+ 0x2a6d7, 0x2f7ff,
+ 0x2fa1e, 0xe0000,
+ 0xe0002, 0xe001f,
+ 0xe0080, 0xe00ff,
+ 0xe01f0, 0xeffff,
+ 0xffffe, 0xfffff,
+ 0x10fffe, 0x10ffff
+ }; /* CR_Cn */
+
+ /* 'Co': General Category */
+ static final int CR_Co[] = {
+ 3,
+ 0xe000, 0xf8ff,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+ }; /* CR_Co */
+
+ /* 'Cs': General Category */
+ static final int CR_Cs[] = {
+ 1,
+ 0xd800, 0xdfff
+ }; /* CR_Cs */
+
+ /* 'L': Major Category */
+ static final int CR_L[] = {
+ 347,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0241,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x048a, 0x04ce,
+ 0x04d0, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0621, 0x063a,
+ 0x0640, 0x064a,
+ 0x066e, 0x066f,
+ 0x0671, 0x06d3,
+ 0x06d5, 0x06d5,
+ 0x06e5, 0x06e6,
+ 0x06ee, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x0710,
+ 0x0712, 0x072f,
+ 0x074d, 0x076d,
+ 0x0780, 0x07a5,
+ 0x07b1, 0x07b1,
+ 0x0904, 0x0939,
+ 0x093d, 0x093d,
+ 0x0950, 0x0950,
+ 0x0958, 0x0961,
+ 0x097d, 0x097d,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bd, 0x09bd,
+ 0x09ce, 0x09ce,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e1,
+ 0x09f0, 0x09f1,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a72, 0x0a74,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abd, 0x0abd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae1,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3d, 0x0b3d,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b71, 0x0b71,
+ 0x0b83, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c60, 0x0c61,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbd, 0x0cbd,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d60, 0x0d61,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0e01, 0x0e30,
+ 0x0e32, 0x0e33,
+ 0x0e40, 0x0e46,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb0,
+ 0x0eb2, 0x0eb3,
+ 0x0ebd, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f40, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f88, 0x0f8b,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x1050, 0x1055,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1711,
+ 0x1720, 0x1731,
+ 0x1740, 0x1751,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1780, 0x17b3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dc,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a8,
+ 0x1900, 0x191c,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19c1, 0x19c7,
+ 0x1a00, 0x1a16,
+ 0x1d00, 0x1dbf,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c80, 0x2ce4,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3005, 0x3006,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fbb,
+ 0xa000, 0xa48c,
+ 0xa800, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa822,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb1d,
+ 0xfb1f, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x10a00, 0x10a00,
+ 0x10a10, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d
+ }; /* CR_L */
+
+ /* 'Ll': General Category */
+ static final int CR_Ll[] = {
+ 480,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff,
+ 0x0101, 0x0101,
+ 0x0103, 0x0103,
+ 0x0105, 0x0105,
+ 0x0107, 0x0107,
+ 0x0109, 0x0109,
+ 0x010b, 0x010b,
+ 0x010d, 0x010d,
+ 0x010f, 0x010f,
+ 0x0111, 0x0111,
+ 0x0113, 0x0113,
+ 0x0115, 0x0115,
+ 0x0117, 0x0117,
+ 0x0119, 0x0119,
+ 0x011b, 0x011b,
+ 0x011d, 0x011d,
+ 0x011f, 0x011f,
+ 0x0121, 0x0121,
+ 0x0123, 0x0123,
+ 0x0125, 0x0125,
+ 0x0127, 0x0127,
+ 0x0129, 0x0129,
+ 0x012b, 0x012b,
+ 0x012d, 0x012d,
+ 0x012f, 0x012f,
+ 0x0131, 0x0131,
+ 0x0133, 0x0133,
+ 0x0135, 0x0135,
+ 0x0137, 0x0138,
+ 0x013a, 0x013a,
+ 0x013c, 0x013c,
+ 0x013e, 0x013e,
+ 0x0140, 0x0140,
+ 0x0142, 0x0142,
+ 0x0144, 0x0144,
+ 0x0146, 0x0146,
+ 0x0148, 0x0149,
+ 0x014b, 0x014b,
+ 0x014d, 0x014d,
+ 0x014f, 0x014f,
+ 0x0151, 0x0151,
+ 0x0153, 0x0153,
+ 0x0155, 0x0155,
+ 0x0157, 0x0157,
+ 0x0159, 0x0159,
+ 0x015b, 0x015b,
+ 0x015d, 0x015d,
+ 0x015f, 0x015f,
+ 0x0161, 0x0161,
+ 0x0163, 0x0163,
+ 0x0165, 0x0165,
+ 0x0167, 0x0167,
+ 0x0169, 0x0169,
+ 0x016b, 0x016b,
+ 0x016d, 0x016d,
+ 0x016f, 0x016f,
+ 0x0171, 0x0171,
+ 0x0173, 0x0173,
+ 0x0175, 0x0175,
+ 0x0177, 0x0177,
+ 0x017a, 0x017a,
+ 0x017c, 0x017c,
+ 0x017e, 0x0180,
+ 0x0183, 0x0183,
+ 0x0185, 0x0185,
+ 0x0188, 0x0188,
+ 0x018c, 0x018d,
+ 0x0192, 0x0192,
+ 0x0195, 0x0195,
+ 0x0199, 0x019b,
+ 0x019e, 0x019e,
+ 0x01a1, 0x01a1,
+ 0x01a3, 0x01a3,
+ 0x01a5, 0x01a5,
+ 0x01a8, 0x01a8,
+ 0x01aa, 0x01ab,
+ 0x01ad, 0x01ad,
+ 0x01b0, 0x01b0,
+ 0x01b4, 0x01b4,
+ 0x01b6, 0x01b6,
+ 0x01b9, 0x01ba,
+ 0x01bd, 0x01bf,
+ 0x01c6, 0x01c6,
+ 0x01c9, 0x01c9,
+ 0x01cc, 0x01cc,
+ 0x01ce, 0x01ce,
+ 0x01d0, 0x01d0,
+ 0x01d2, 0x01d2,
+ 0x01d4, 0x01d4,
+ 0x01d6, 0x01d6,
+ 0x01d8, 0x01d8,
+ 0x01da, 0x01da,
+ 0x01dc, 0x01dd,
+ 0x01df, 0x01df,
+ 0x01e1, 0x01e1,
+ 0x01e3, 0x01e3,
+ 0x01e5, 0x01e5,
+ 0x01e7, 0x01e7,
+ 0x01e9, 0x01e9,
+ 0x01eb, 0x01eb,
+ 0x01ed, 0x01ed,
+ 0x01ef, 0x01f0,
+ 0x01f3, 0x01f3,
+ 0x01f5, 0x01f5,
+ 0x01f9, 0x01f9,
+ 0x01fb, 0x01fb,
+ 0x01fd, 0x01fd,
+ 0x01ff, 0x01ff,
+ 0x0201, 0x0201,
+ 0x0203, 0x0203,
+ 0x0205, 0x0205,
+ 0x0207, 0x0207,
+ 0x0209, 0x0209,
+ 0x020b, 0x020b,
+ 0x020d, 0x020d,
+ 0x020f, 0x020f,
+ 0x0211, 0x0211,
+ 0x0213, 0x0213,
+ 0x0215, 0x0215,
+ 0x0217, 0x0217,
+ 0x0219, 0x0219,
+ 0x021b, 0x021b,
+ 0x021d, 0x021d,
+ 0x021f, 0x021f,
+ 0x0221, 0x0221,
+ 0x0223, 0x0223,
+ 0x0225, 0x0225,
+ 0x0227, 0x0227,
+ 0x0229, 0x0229,
+ 0x022b, 0x022b,
+ 0x022d, 0x022d,
+ 0x022f, 0x022f,
+ 0x0231, 0x0231,
+ 0x0233, 0x0239,
+ 0x023c, 0x023c,
+ 0x023f, 0x0240,
+ 0x0250, 0x02af,
+ 0x0390, 0x0390,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03d9, 0x03d9,
+ 0x03db, 0x03db,
+ 0x03dd, 0x03dd,
+ 0x03df, 0x03df,
+ 0x03e1, 0x03e1,
+ 0x03e3, 0x03e3,
+ 0x03e5, 0x03e5,
+ 0x03e7, 0x03e7,
+ 0x03e9, 0x03e9,
+ 0x03eb, 0x03eb,
+ 0x03ed, 0x03ed,
+ 0x03ef, 0x03f3,
+ 0x03f5, 0x03f5,
+ 0x03f8, 0x03f8,
+ 0x03fb, 0x03fc,
+ 0x0430, 0x045f,
+ 0x0461, 0x0461,
+ 0x0463, 0x0463,
+ 0x0465, 0x0465,
+ 0x0467, 0x0467,
+ 0x0469, 0x0469,
+ 0x046b, 0x046b,
+ 0x046d, 0x046d,
+ 0x046f, 0x046f,
+ 0x0471, 0x0471,
+ 0x0473, 0x0473,
+ 0x0475, 0x0475,
+ 0x0477, 0x0477,
+ 0x0479, 0x0479,
+ 0x047b, 0x047b,
+ 0x047d, 0x047d,
+ 0x047f, 0x047f,
+ 0x0481, 0x0481,
+ 0x048b, 0x048b,
+ 0x048d, 0x048d,
+ 0x048f, 0x048f,
+ 0x0491, 0x0491,
+ 0x0493, 0x0493,
+ 0x0495, 0x0495,
+ 0x0497, 0x0497,
+ 0x0499, 0x0499,
+ 0x049b, 0x049b,
+ 0x049d, 0x049d,
+ 0x049f, 0x049f,
+ 0x04a1, 0x04a1,
+ 0x04a3, 0x04a3,
+ 0x04a5, 0x04a5,
+ 0x04a7, 0x04a7,
+ 0x04a9, 0x04a9,
+ 0x04ab, 0x04ab,
+ 0x04ad, 0x04ad,
+ 0x04af, 0x04af,
+ 0x04b1, 0x04b1,
+ 0x04b3, 0x04b3,
+ 0x04b5, 0x04b5,
+ 0x04b7, 0x04b7,
+ 0x04b9, 0x04b9,
+ 0x04bb, 0x04bb,
+ 0x04bd, 0x04bd,
+ 0x04bf, 0x04bf,
+ 0x04c2, 0x04c2,
+ 0x04c4, 0x04c4,
+ 0x04c6, 0x04c6,
+ 0x04c8, 0x04c8,
+ 0x04ca, 0x04ca,
+ 0x04cc, 0x04cc,
+ 0x04ce, 0x04ce,
+ 0x04d1, 0x04d1,
+ 0x04d3, 0x04d3,
+ 0x04d5, 0x04d5,
+ 0x04d7, 0x04d7,
+ 0x04d9, 0x04d9,
+ 0x04db, 0x04db,
+ 0x04dd, 0x04dd,
+ 0x04df, 0x04df,
+ 0x04e1, 0x04e1,
+ 0x04e3, 0x04e3,
+ 0x04e5, 0x04e5,
+ 0x04e7, 0x04e7,
+ 0x04e9, 0x04e9,
+ 0x04eb, 0x04eb,
+ 0x04ed, 0x04ed,
+ 0x04ef, 0x04ef,
+ 0x04f1, 0x04f1,
+ 0x04f3, 0x04f3,
+ 0x04f5, 0x04f5,
+ 0x04f7, 0x04f7,
+ 0x04f9, 0x04f9,
+ 0x0501, 0x0501,
+ 0x0503, 0x0503,
+ 0x0505, 0x0505,
+ 0x0507, 0x0507,
+ 0x0509, 0x0509,
+ 0x050b, 0x050b,
+ 0x050d, 0x050d,
+ 0x050f, 0x050f,
+ 0x0561, 0x0587,
+ 0x1d00, 0x1d2b,
+ 0x1d62, 0x1d77,
+ 0x1d79, 0x1d9a,
+ 0x1e01, 0x1e01,
+ 0x1e03, 0x1e03,
+ 0x1e05, 0x1e05,
+ 0x1e07, 0x1e07,
+ 0x1e09, 0x1e09,
+ 0x1e0b, 0x1e0b,
+ 0x1e0d, 0x1e0d,
+ 0x1e0f, 0x1e0f,
+ 0x1e11, 0x1e11,
+ 0x1e13, 0x1e13,
+ 0x1e15, 0x1e15,
+ 0x1e17, 0x1e17,
+ 0x1e19, 0x1e19,
+ 0x1e1b, 0x1e1b,
+ 0x1e1d, 0x1e1d,
+ 0x1e1f, 0x1e1f,
+ 0x1e21, 0x1e21,
+ 0x1e23, 0x1e23,
+ 0x1e25, 0x1e25,
+ 0x1e27, 0x1e27,
+ 0x1e29, 0x1e29,
+ 0x1e2b, 0x1e2b,
+ 0x1e2d, 0x1e2d,
+ 0x1e2f, 0x1e2f,
+ 0x1e31, 0x1e31,
+ 0x1e33, 0x1e33,
+ 0x1e35, 0x1e35,
+ 0x1e37, 0x1e37,
+ 0x1e39, 0x1e39,
+ 0x1e3b, 0x1e3b,
+ 0x1e3d, 0x1e3d,
+ 0x1e3f, 0x1e3f,
+ 0x1e41, 0x1e41,
+ 0x1e43, 0x1e43,
+ 0x1e45, 0x1e45,
+ 0x1e47, 0x1e47,
+ 0x1e49, 0x1e49,
+ 0x1e4b, 0x1e4b,
+ 0x1e4d, 0x1e4d,
+ 0x1e4f, 0x1e4f,
+ 0x1e51, 0x1e51,
+ 0x1e53, 0x1e53,
+ 0x1e55, 0x1e55,
+ 0x1e57, 0x1e57,
+ 0x1e59, 0x1e59,
+ 0x1e5b, 0x1e5b,
+ 0x1e5d, 0x1e5d,
+ 0x1e5f, 0x1e5f,
+ 0x1e61, 0x1e61,
+ 0x1e63, 0x1e63,
+ 0x1e65, 0x1e65,
+ 0x1e67, 0x1e67,
+ 0x1e69, 0x1e69,
+ 0x1e6b, 0x1e6b,
+ 0x1e6d, 0x1e6d,
+ 0x1e6f, 0x1e6f,
+ 0x1e71, 0x1e71,
+ 0x1e73, 0x1e73,
+ 0x1e75, 0x1e75,
+ 0x1e77, 0x1e77,
+ 0x1e79, 0x1e79,
+ 0x1e7b, 0x1e7b,
+ 0x1e7d, 0x1e7d,
+ 0x1e7f, 0x1e7f,
+ 0x1e81, 0x1e81,
+ 0x1e83, 0x1e83,
+ 0x1e85, 0x1e85,
+ 0x1e87, 0x1e87,
+ 0x1e89, 0x1e89,
+ 0x1e8b, 0x1e8b,
+ 0x1e8d, 0x1e8d,
+ 0x1e8f, 0x1e8f,
+ 0x1e91, 0x1e91,
+ 0x1e93, 0x1e93,
+ 0x1e95, 0x1e9b,
+ 0x1ea1, 0x1ea1,
+ 0x1ea3, 0x1ea3,
+ 0x1ea5, 0x1ea5,
+ 0x1ea7, 0x1ea7,
+ 0x1ea9, 0x1ea9,
+ 0x1eab, 0x1eab,
+ 0x1ead, 0x1ead,
+ 0x1eaf, 0x1eaf,
+ 0x1eb1, 0x1eb1,
+ 0x1eb3, 0x1eb3,
+ 0x1eb5, 0x1eb5,
+ 0x1eb7, 0x1eb7,
+ 0x1eb9, 0x1eb9,
+ 0x1ebb, 0x1ebb,
+ 0x1ebd, 0x1ebd,
+ 0x1ebf, 0x1ebf,
+ 0x1ec1, 0x1ec1,
+ 0x1ec3, 0x1ec3,
+ 0x1ec5, 0x1ec5,
+ 0x1ec7, 0x1ec7,
+ 0x1ec9, 0x1ec9,
+ 0x1ecb, 0x1ecb,
+ 0x1ecd, 0x1ecd,
+ 0x1ecf, 0x1ecf,
+ 0x1ed1, 0x1ed1,
+ 0x1ed3, 0x1ed3,
+ 0x1ed5, 0x1ed5,
+ 0x1ed7, 0x1ed7,
+ 0x1ed9, 0x1ed9,
+ 0x1edb, 0x1edb,
+ 0x1edd, 0x1edd,
+ 0x1edf, 0x1edf,
+ 0x1ee1, 0x1ee1,
+ 0x1ee3, 0x1ee3,
+ 0x1ee5, 0x1ee5,
+ 0x1ee7, 0x1ee7,
+ 0x1ee9, 0x1ee9,
+ 0x1eeb, 0x1eeb,
+ 0x1eed, 0x1eed,
+ 0x1eef, 0x1eef,
+ 0x1ef1, 0x1ef1,
+ 0x1ef3, 0x1ef3,
+ 0x1ef5, 0x1ef5,
+ 0x1ef7, 0x1ef7,
+ 0x1ef9, 0x1ef9,
+ 0x1f00, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x210a, 0x210a,
+ 0x210e, 0x210f,
+ 0x2113, 0x2113,
+ 0x212f, 0x212f,
+ 0x2134, 0x2134,
+ 0x2139, 0x2139,
+ 0x213c, 0x213d,
+ 0x2146, 0x2149,
+ 0x2c30, 0x2c5e,
+ 0x2c81, 0x2c81,
+ 0x2c83, 0x2c83,
+ 0x2c85, 0x2c85,
+ 0x2c87, 0x2c87,
+ 0x2c89, 0x2c89,
+ 0x2c8b, 0x2c8b,
+ 0x2c8d, 0x2c8d,
+ 0x2c8f, 0x2c8f,
+ 0x2c91, 0x2c91,
+ 0x2c93, 0x2c93,
+ 0x2c95, 0x2c95,
+ 0x2c97, 0x2c97,
+ 0x2c99, 0x2c99,
+ 0x2c9b, 0x2c9b,
+ 0x2c9d, 0x2c9d,
+ 0x2c9f, 0x2c9f,
+ 0x2ca1, 0x2ca1,
+ 0x2ca3, 0x2ca3,
+ 0x2ca5, 0x2ca5,
+ 0x2ca7, 0x2ca7,
+ 0x2ca9, 0x2ca9,
+ 0x2cab, 0x2cab,
+ 0x2cad, 0x2cad,
+ 0x2caf, 0x2caf,
+ 0x2cb1, 0x2cb1,
+ 0x2cb3, 0x2cb3,
+ 0x2cb5, 0x2cb5,
+ 0x2cb7, 0x2cb7,
+ 0x2cb9, 0x2cb9,
+ 0x2cbb, 0x2cbb,
+ 0x2cbd, 0x2cbd,
+ 0x2cbf, 0x2cbf,
+ 0x2cc1, 0x2cc1,
+ 0x2cc3, 0x2cc3,
+ 0x2cc5, 0x2cc5,
+ 0x2cc7, 0x2cc7,
+ 0x2cc9, 0x2cc9,
+ 0x2ccb, 0x2ccb,
+ 0x2ccd, 0x2ccd,
+ 0x2ccf, 0x2ccf,
+ 0x2cd1, 0x2cd1,
+ 0x2cd3, 0x2cd3,
+ 0x2cd5, 0x2cd5,
+ 0x2cd7, 0x2cd7,
+ 0x2cd9, 0x2cd9,
+ 0x2cdb, 0x2cdb,
+ 0x2cdd, 0x2cdd,
+ 0x2cdf, 0x2cdf,
+ 0x2ce1, 0x2ce1,
+ 0x2ce3, 0x2ce4,
+ 0x2d00, 0x2d25,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a5,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9
+ }; /* CR_Ll */
+
+ /* 'Lm': General Category */
+ static final int CR_Lm[] = {
+ 26,
+ 0x02b0, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x037a, 0x037a,
+ 0x0559, 0x0559,
+ 0x0640, 0x0640,
+ 0x06e5, 0x06e6,
+ 0x0e46, 0x0e46,
+ 0x0ec6, 0x0ec6,
+ 0x10fc, 0x10fc,
+ 0x17d7, 0x17d7,
+ 0x1843, 0x1843,
+ 0x1d2c, 0x1d61,
+ 0x1d78, 0x1d78,
+ 0x1d9b, 0x1dbf,
+ 0x2090, 0x2094,
+ 0x2d6f, 0x2d6f,
+ 0x3005, 0x3005,
+ 0x3031, 0x3035,
+ 0x303b, 0x303b,
+ 0x309d, 0x309e,
+ 0x30fc, 0x30fe,
+ 0xa015, 0xa015,
+ 0xff70, 0xff70,
+ 0xff9e, 0xff9f
+ }; /* CR_Lm */
+
+ /* 'Lo': General Category */
+ static final int CR_Lo[] = {
+ 245,
+ 0x01bb, 0x01bb,
+ 0x01c0, 0x01c3,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0621, 0x063a,
+ 0x0641, 0x064a,
+ 0x066e, 0x066f,
+ 0x0671, 0x06d3,
+ 0x06d5, 0x06d5,
+ 0x06ee, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x0710,
+ 0x0712, 0x072f,
+ 0x074d, 0x076d,
+ 0x0780, 0x07a5,
+ 0x07b1, 0x07b1,
+ 0x0904, 0x0939,
+ 0x093d, 0x093d,
+ 0x0950, 0x0950,
+ 0x0958, 0x0961,
+ 0x097d, 0x097d,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bd, 0x09bd,
+ 0x09ce, 0x09ce,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e1,
+ 0x09f0, 0x09f1,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a72, 0x0a74,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abd, 0x0abd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae1,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3d, 0x0b3d,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b71, 0x0b71,
+ 0x0b83, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c60, 0x0c61,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbd, 0x0cbd,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d60, 0x0d61,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0e01, 0x0e30,
+ 0x0e32, 0x0e33,
+ 0x0e40, 0x0e45,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb0,
+ 0x0eb2, 0x0eb3,
+ 0x0ebd, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f40, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f88, 0x0f8b,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x1050, 0x1055,
+ 0x10d0, 0x10fa,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1711,
+ 0x1720, 0x1731,
+ 0x1740, 0x1751,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1780, 0x17b3,
+ 0x17dc, 0x17dc,
+ 0x1820, 0x1842,
+ 0x1844, 0x1877,
+ 0x1880, 0x18a8,
+ 0x1900, 0x191c,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19c1, 0x19c7,
+ 0x1a00, 0x1a16,
+ 0x2135, 0x2138,
+ 0x2d30, 0x2d65,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3006, 0x3006,
+ 0x303c, 0x303c,
+ 0x3041, 0x3096,
+ 0x309f, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30ff, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fbb,
+ 0xa000, 0xa014,
+ 0xa016, 0xa48c,
+ 0xa800, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa822,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb1d, 0xfb1d,
+ 0xfb1f, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff66, 0xff6f,
+ 0xff71, 0xff9d,
+ 0xffa0, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x10450, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x10a00, 0x10a00,
+ 0x10a10, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d
+ }; /* CR_Lo */
+
+ /* 'Lt': General Category */
+ static final int CR_Lt[] = {
+ 10,
+ 0x01c5, 0x01c5,
+ 0x01c8, 0x01c8,
+ 0x01cb, 0x01cb,
+ 0x01f2, 0x01f2,
+ 0x1f88, 0x1f8f,
+ 0x1f98, 0x1f9f,
+ 0x1fa8, 0x1faf,
+ 0x1fbc, 0x1fbc,
+ 0x1fcc, 0x1fcc,
+ 0x1ffc, 0x1ffc
+ }; /* CR_Lt */
+
+ /* 'Lu': General Category */
+ static final int CR_Lu[] = {
+ 476,
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de,
+ 0x0100, 0x0100,
+ 0x0102, 0x0102,
+ 0x0104, 0x0104,
+ 0x0106, 0x0106,
+ 0x0108, 0x0108,
+ 0x010a, 0x010a,
+ 0x010c, 0x010c,
+ 0x010e, 0x010e,
+ 0x0110, 0x0110,
+ 0x0112, 0x0112,
+ 0x0114, 0x0114,
+ 0x0116, 0x0116,
+ 0x0118, 0x0118,
+ 0x011a, 0x011a,
+ 0x011c, 0x011c,
+ 0x011e, 0x011e,
+ 0x0120, 0x0120,
+ 0x0122, 0x0122,
+ 0x0124, 0x0124,
+ 0x0126, 0x0126,
+ 0x0128, 0x0128,
+ 0x012a, 0x012a,
+ 0x012c, 0x012c,
+ 0x012e, 0x012e,
+ 0x0130, 0x0130,
+ 0x0132, 0x0132,
+ 0x0134, 0x0134,
+ 0x0136, 0x0136,
+ 0x0139, 0x0139,
+ 0x013b, 0x013b,
+ 0x013d, 0x013d,
+ 0x013f, 0x013f,
+ 0x0141, 0x0141,
+ 0x0143, 0x0143,
+ 0x0145, 0x0145,
+ 0x0147, 0x0147,
+ 0x014a, 0x014a,
+ 0x014c, 0x014c,
+ 0x014e, 0x014e,
+ 0x0150, 0x0150,
+ 0x0152, 0x0152,
+ 0x0154, 0x0154,
+ 0x0156, 0x0156,
+ 0x0158, 0x0158,
+ 0x015a, 0x015a,
+ 0x015c, 0x015c,
+ 0x015e, 0x015e,
+ 0x0160, 0x0160,
+ 0x0162, 0x0162,
+ 0x0164, 0x0164,
+ 0x0166, 0x0166,
+ 0x0168, 0x0168,
+ 0x016a, 0x016a,
+ 0x016c, 0x016c,
+ 0x016e, 0x016e,
+ 0x0170, 0x0170,
+ 0x0172, 0x0172,
+ 0x0174, 0x0174,
+ 0x0176, 0x0176,
+ 0x0178, 0x0179,
+ 0x017b, 0x017b,
+ 0x017d, 0x017d,
+ 0x0181, 0x0182,
+ 0x0184, 0x0184,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a2, 0x01a2,
+ 0x01a4, 0x01a4,
+ 0x01a6, 0x01a7,
+ 0x01a9, 0x01a9,
+ 0x01ac, 0x01ac,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b5, 0x01b5,
+ 0x01b7, 0x01b8,
+ 0x01bc, 0x01bc,
+ 0x01c4, 0x01c4,
+ 0x01c7, 0x01c7,
+ 0x01ca, 0x01ca,
+ 0x01cd, 0x01cd,
+ 0x01cf, 0x01cf,
+ 0x01d1, 0x01d1,
+ 0x01d3, 0x01d3,
+ 0x01d5, 0x01d5,
+ 0x01d7, 0x01d7,
+ 0x01d9, 0x01d9,
+ 0x01db, 0x01db,
+ 0x01de, 0x01de,
+ 0x01e0, 0x01e0,
+ 0x01e2, 0x01e2,
+ 0x01e4, 0x01e4,
+ 0x01e6, 0x01e6,
+ 0x01e8, 0x01e8,
+ 0x01ea, 0x01ea,
+ 0x01ec, 0x01ec,
+ 0x01ee, 0x01ee,
+ 0x01f1, 0x01f1,
+ 0x01f4, 0x01f4,
+ 0x01f6, 0x01f8,
+ 0x01fa, 0x01fa,
+ 0x01fc, 0x01fc,
+ 0x01fe, 0x01fe,
+ 0x0200, 0x0200,
+ 0x0202, 0x0202,
+ 0x0204, 0x0204,
+ 0x0206, 0x0206,
+ 0x0208, 0x0208,
+ 0x020a, 0x020a,
+ 0x020c, 0x020c,
+ 0x020e, 0x020e,
+ 0x0210, 0x0210,
+ 0x0212, 0x0212,
+ 0x0214, 0x0214,
+ 0x0216, 0x0216,
+ 0x0218, 0x0218,
+ 0x021a, 0x021a,
+ 0x021c, 0x021c,
+ 0x021e, 0x021e,
+ 0x0220, 0x0220,
+ 0x0222, 0x0222,
+ 0x0224, 0x0224,
+ 0x0226, 0x0226,
+ 0x0228, 0x0228,
+ 0x022a, 0x022a,
+ 0x022c, 0x022c,
+ 0x022e, 0x022e,
+ 0x0230, 0x0230,
+ 0x0232, 0x0232,
+ 0x023a, 0x023b,
+ 0x023d, 0x023e,
+ 0x0241, 0x0241,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03d2, 0x03d4,
+ 0x03d8, 0x03d8,
+ 0x03da, 0x03da,
+ 0x03dc, 0x03dc,
+ 0x03de, 0x03de,
+ 0x03e0, 0x03e0,
+ 0x03e2, 0x03e2,
+ 0x03e4, 0x03e4,
+ 0x03e6, 0x03e6,
+ 0x03e8, 0x03e8,
+ 0x03ea, 0x03ea,
+ 0x03ec, 0x03ec,
+ 0x03ee, 0x03ee,
+ 0x03f4, 0x03f4,
+ 0x03f7, 0x03f7,
+ 0x03f9, 0x03fa,
+ 0x03fd, 0x042f,
+ 0x0460, 0x0460,
+ 0x0462, 0x0462,
+ 0x0464, 0x0464,
+ 0x0466, 0x0466,
+ 0x0468, 0x0468,
+ 0x046a, 0x046a,
+ 0x046c, 0x046c,
+ 0x046e, 0x046e,
+ 0x0470, 0x0470,
+ 0x0472, 0x0472,
+ 0x0474, 0x0474,
+ 0x0476, 0x0476,
+ 0x0478, 0x0478,
+ 0x047a, 0x047a,
+ 0x047c, 0x047c,
+ 0x047e, 0x047e,
+ 0x0480, 0x0480,
+ 0x048a, 0x048a,
+ 0x048c, 0x048c,
+ 0x048e, 0x048e,
+ 0x0490, 0x0490,
+ 0x0492, 0x0492,
+ 0x0494, 0x0494,
+ 0x0496, 0x0496,
+ 0x0498, 0x0498,
+ 0x049a, 0x049a,
+ 0x049c, 0x049c,
+ 0x049e, 0x049e,
+ 0x04a0, 0x04a0,
+ 0x04a2, 0x04a2,
+ 0x04a4, 0x04a4,
+ 0x04a6, 0x04a6,
+ 0x04a8, 0x04a8,
+ 0x04aa, 0x04aa,
+ 0x04ac, 0x04ac,
+ 0x04ae, 0x04ae,
+ 0x04b0, 0x04b0,
+ 0x04b2, 0x04b2,
+ 0x04b4, 0x04b4,
+ 0x04b6, 0x04b6,
+ 0x04b8, 0x04b8,
+ 0x04ba, 0x04ba,
+ 0x04bc, 0x04bc,
+ 0x04be, 0x04be,
+ 0x04c0, 0x04c1,
+ 0x04c3, 0x04c3,
+ 0x04c5, 0x04c5,
+ 0x04c7, 0x04c7,
+ 0x04c9, 0x04c9,
+ 0x04cb, 0x04cb,
+ 0x04cd, 0x04cd,
+ 0x04d0, 0x04d0,
+ 0x04d2, 0x04d2,
+ 0x04d4, 0x04d4,
+ 0x04d6, 0x04d6,
+ 0x04d8, 0x04d8,
+ 0x04da, 0x04da,
+ 0x04dc, 0x04dc,
+ 0x04de, 0x04de,
+ 0x04e0, 0x04e0,
+ 0x04e2, 0x04e2,
+ 0x04e4, 0x04e4,
+ 0x04e6, 0x04e6,
+ 0x04e8, 0x04e8,
+ 0x04ea, 0x04ea,
+ 0x04ec, 0x04ec,
+ 0x04ee, 0x04ee,
+ 0x04f0, 0x04f0,
+ 0x04f2, 0x04f2,
+ 0x04f4, 0x04f4,
+ 0x04f6, 0x04f6,
+ 0x04f8, 0x04f8,
+ 0x0500, 0x0500,
+ 0x0502, 0x0502,
+ 0x0504, 0x0504,
+ 0x0506, 0x0506,
+ 0x0508, 0x0508,
+ 0x050a, 0x050a,
+ 0x050c, 0x050c,
+ 0x050e, 0x050e,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1e00, 0x1e00,
+ 0x1e02, 0x1e02,
+ 0x1e04, 0x1e04,
+ 0x1e06, 0x1e06,
+ 0x1e08, 0x1e08,
+ 0x1e0a, 0x1e0a,
+ 0x1e0c, 0x1e0c,
+ 0x1e0e, 0x1e0e,
+ 0x1e10, 0x1e10,
+ 0x1e12, 0x1e12,
+ 0x1e14, 0x1e14,
+ 0x1e16, 0x1e16,
+ 0x1e18, 0x1e18,
+ 0x1e1a, 0x1e1a,
+ 0x1e1c, 0x1e1c,
+ 0x1e1e, 0x1e1e,
+ 0x1e20, 0x1e20,
+ 0x1e22, 0x1e22,
+ 0x1e24, 0x1e24,
+ 0x1e26, 0x1e26,
+ 0x1e28, 0x1e28,
+ 0x1e2a, 0x1e2a,
+ 0x1e2c, 0x1e2c,
+ 0x1e2e, 0x1e2e,
+ 0x1e30, 0x1e30,
+ 0x1e32, 0x1e32,
+ 0x1e34, 0x1e34,
+ 0x1e36, 0x1e36,
+ 0x1e38, 0x1e38,
+ 0x1e3a, 0x1e3a,
+ 0x1e3c, 0x1e3c,
+ 0x1e3e, 0x1e3e,
+ 0x1e40, 0x1e40,
+ 0x1e42, 0x1e42,
+ 0x1e44, 0x1e44,
+ 0x1e46, 0x1e46,
+ 0x1e48, 0x1e48,
+ 0x1e4a, 0x1e4a,
+ 0x1e4c, 0x1e4c,
+ 0x1e4e, 0x1e4e,
+ 0x1e50, 0x1e50,
+ 0x1e52, 0x1e52,
+ 0x1e54, 0x1e54,
+ 0x1e56, 0x1e56,
+ 0x1e58, 0x1e58,
+ 0x1e5a, 0x1e5a,
+ 0x1e5c, 0x1e5c,
+ 0x1e5e, 0x1e5e,
+ 0x1e60, 0x1e60,
+ 0x1e62, 0x1e62,
+ 0x1e64, 0x1e64,
+ 0x1e66, 0x1e66,
+ 0x1e68, 0x1e68,
+ 0x1e6a, 0x1e6a,
+ 0x1e6c, 0x1e6c,
+ 0x1e6e, 0x1e6e,
+ 0x1e70, 0x1e70,
+ 0x1e72, 0x1e72,
+ 0x1e74, 0x1e74,
+ 0x1e76, 0x1e76,
+ 0x1e78, 0x1e78,
+ 0x1e7a, 0x1e7a,
+ 0x1e7c, 0x1e7c,
+ 0x1e7e, 0x1e7e,
+ 0x1e80, 0x1e80,
+ 0x1e82, 0x1e82,
+ 0x1e84, 0x1e84,
+ 0x1e86, 0x1e86,
+ 0x1e88, 0x1e88,
+ 0x1e8a, 0x1e8a,
+ 0x1e8c, 0x1e8c,
+ 0x1e8e, 0x1e8e,
+ 0x1e90, 0x1e90,
+ 0x1e92, 0x1e92,
+ 0x1e94, 0x1e94,
+ 0x1ea0, 0x1ea0,
+ 0x1ea2, 0x1ea2,
+ 0x1ea4, 0x1ea4,
+ 0x1ea6, 0x1ea6,
+ 0x1ea8, 0x1ea8,
+ 0x1eaa, 0x1eaa,
+ 0x1eac, 0x1eac,
+ 0x1eae, 0x1eae,
+ 0x1eb0, 0x1eb0,
+ 0x1eb2, 0x1eb2,
+ 0x1eb4, 0x1eb4,
+ 0x1eb6, 0x1eb6,
+ 0x1eb8, 0x1eb8,
+ 0x1eba, 0x1eba,
+ 0x1ebc, 0x1ebc,
+ 0x1ebe, 0x1ebe,
+ 0x1ec0, 0x1ec0,
+ 0x1ec2, 0x1ec2,
+ 0x1ec4, 0x1ec4,
+ 0x1ec6, 0x1ec6,
+ 0x1ec8, 0x1ec8,
+ 0x1eca, 0x1eca,
+ 0x1ecc, 0x1ecc,
+ 0x1ece, 0x1ece,
+ 0x1ed0, 0x1ed0,
+ 0x1ed2, 0x1ed2,
+ 0x1ed4, 0x1ed4,
+ 0x1ed6, 0x1ed6,
+ 0x1ed8, 0x1ed8,
+ 0x1eda, 0x1eda,
+ 0x1edc, 0x1edc,
+ 0x1ede, 0x1ede,
+ 0x1ee0, 0x1ee0,
+ 0x1ee2, 0x1ee2,
+ 0x1ee4, 0x1ee4,
+ 0x1ee6, 0x1ee6,
+ 0x1ee8, 0x1ee8,
+ 0x1eea, 0x1eea,
+ 0x1eec, 0x1eec,
+ 0x1eee, 0x1eee,
+ 0x1ef0, 0x1ef0,
+ 0x1ef2, 0x1ef2,
+ 0x1ef4, 0x1ef4,
+ 0x1ef6, 0x1ef6,
+ 0x1ef8, 0x1ef8,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f5f,
+ 0x1f68, 0x1f6f,
+ 0x1fb8, 0x1fbb,
+ 0x1fc8, 0x1fcb,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x2130, 0x2131,
+ 0x2133, 0x2133,
+ 0x213e, 0x213f,
+ 0x2145, 0x2145,
+ 0x2c00, 0x2c2e,
+ 0x2c80, 0x2c80,
+ 0x2c82, 0x2c82,
+ 0x2c84, 0x2c84,
+ 0x2c86, 0x2c86,
+ 0x2c88, 0x2c88,
+ 0x2c8a, 0x2c8a,
+ 0x2c8c, 0x2c8c,
+ 0x2c8e, 0x2c8e,
+ 0x2c90, 0x2c90,
+ 0x2c92, 0x2c92,
+ 0x2c94, 0x2c94,
+ 0x2c96, 0x2c96,
+ 0x2c98, 0x2c98,
+ 0x2c9a, 0x2c9a,
+ 0x2c9c, 0x2c9c,
+ 0x2c9e, 0x2c9e,
+ 0x2ca0, 0x2ca0,
+ 0x2ca2, 0x2ca2,
+ 0x2ca4, 0x2ca4,
+ 0x2ca6, 0x2ca6,
+ 0x2ca8, 0x2ca8,
+ 0x2caa, 0x2caa,
+ 0x2cac, 0x2cac,
+ 0x2cae, 0x2cae,
+ 0x2cb0, 0x2cb0,
+ 0x2cb2, 0x2cb2,
+ 0x2cb4, 0x2cb4,
+ 0x2cb6, 0x2cb6,
+ 0x2cb8, 0x2cb8,
+ 0x2cba, 0x2cba,
+ 0x2cbc, 0x2cbc,
+ 0x2cbe, 0x2cbe,
+ 0x2cc0, 0x2cc0,
+ 0x2cc2, 0x2cc2,
+ 0x2cc4, 0x2cc4,
+ 0x2cc6, 0x2cc6,
+ 0x2cc8, 0x2cc8,
+ 0x2cca, 0x2cca,
+ 0x2ccc, 0x2ccc,
+ 0x2cce, 0x2cce,
+ 0x2cd0, 0x2cd0,
+ 0x2cd2, 0x2cd2,
+ 0x2cd4, 0x2cd4,
+ 0x2cd6, 0x2cd6,
+ 0x2cd8, 0x2cd8,
+ 0x2cda, 0x2cda,
+ 0x2cdc, 0x2cdc,
+ 0x2cde, 0x2cde,
+ 0x2ce0, 0x2ce0,
+ 0x2ce2, 0x2ce2,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49c, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8
+ }; /* CR_Lu */
+
+ /* 'M': Major Category */
+ static final int CR_M[] = {
+ 133,
+ 0x0300, 0x036f,
+ 0x0483, 0x0486,
+ 0x0488, 0x0489,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x0610, 0x0615,
+ 0x064b, 0x065e,
+ 0x0670, 0x0670,
+ 0x06d6, 0x06dc,
+ 0x06de, 0x06e4,
+ 0x06e7, 0x06e8,
+ 0x06ea, 0x06ed,
+ 0x0711, 0x0711,
+ 0x0730, 0x074a,
+ 0x07a6, 0x07b0,
+ 0x0901, 0x0903,
+ 0x093c, 0x093c,
+ 0x093e, 0x094d,
+ 0x0951, 0x0954,
+ 0x0962, 0x0963,
+ 0x0981, 0x0983,
+ 0x09bc, 0x09bc,
+ 0x09be, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09e2, 0x09e3,
+ 0x0a01, 0x0a03,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a70, 0x0a71,
+ 0x0a81, 0x0a83,
+ 0x0abc, 0x0abc,
+ 0x0abe, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ae2, 0x0ae3,
+ 0x0b01, 0x0b03,
+ 0x0b3c, 0x0b3c,
+ 0x0b3e, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b82, 0x0b82,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0c01, 0x0c03,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c82, 0x0c83,
+ 0x0cbc, 0x0cbc,
+ 0x0cbe, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0d02, 0x0d03,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d82, 0x0d83,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e31, 0x0e31,
+ 0x0e34, 0x0e3a,
+ 0x0e47, 0x0e4e,
+ 0x0eb1, 0x0eb1,
+ 0x0eb4, 0x0eb9,
+ 0x0ebb, 0x0ebc,
+ 0x0ec8, 0x0ecd,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f3f,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f87,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1056, 0x1059,
+ 0x135f, 0x135f,
+ 0x1712, 0x1714,
+ 0x1732, 0x1734,
+ 0x1752, 0x1753,
+ 0x1772, 0x1773,
+ 0x17b6, 0x17d3,
+ 0x17dd, 0x17dd,
+ 0x180b, 0x180d,
+ 0x18a9, 0x18a9,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x19b0, 0x19c0,
+ 0x19c8, 0x19c9,
+ 0x1a17, 0x1a1b,
+ 0x1dc0, 0x1dc3,
+ 0x20d0, 0x20eb,
+ 0x302a, 0x302f,
+ 0x3099, 0x309a,
+ 0xa802, 0xa802,
+ 0xa806, 0xa806,
+ 0xa80b, 0xa80b,
+ 0xa823, 0xa827,
+ 0xfb1e, 0xfb1e,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0x10a01, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a0f,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a3f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0xe0100, 0xe01ef
+ }; /* CR_M */
+
+ /* 'Mc': General Category */
+ static final int CR_Mc[] = {
+ 63,
+ 0x0903, 0x0903,
+ 0x093e, 0x0940,
+ 0x0949, 0x094c,
+ 0x0982, 0x0983,
+ 0x09be, 0x09c0,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cc,
+ 0x09d7, 0x09d7,
+ 0x0a03, 0x0a03,
+ 0x0a3e, 0x0a40,
+ 0x0a83, 0x0a83,
+ 0x0abe, 0x0ac0,
+ 0x0ac9, 0x0ac9,
+ 0x0acb, 0x0acc,
+ 0x0b02, 0x0b03,
+ 0x0b3e, 0x0b3e,
+ 0x0b40, 0x0b40,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4c,
+ 0x0b57, 0x0b57,
+ 0x0bbe, 0x0bbf,
+ 0x0bc1, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcc,
+ 0x0bd7, 0x0bd7,
+ 0x0c01, 0x0c03,
+ 0x0c41, 0x0c44,
+ 0x0c82, 0x0c83,
+ 0x0cbe, 0x0cbe,
+ 0x0cc0, 0x0cc4,
+ 0x0cc7, 0x0cc8,
+ 0x0cca, 0x0ccb,
+ 0x0cd5, 0x0cd6,
+ 0x0d02, 0x0d03,
+ 0x0d3e, 0x0d40,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4c,
+ 0x0d57, 0x0d57,
+ 0x0d82, 0x0d83,
+ 0x0dcf, 0x0dd1,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0f3e, 0x0f3f,
+ 0x0f7f, 0x0f7f,
+ 0x102c, 0x102c,
+ 0x1031, 0x1031,
+ 0x1038, 0x1038,
+ 0x1056, 0x1057,
+ 0x17b6, 0x17b6,
+ 0x17be, 0x17c5,
+ 0x17c7, 0x17c8,
+ 0x1923, 0x1926,
+ 0x1929, 0x192b,
+ 0x1930, 0x1931,
+ 0x1933, 0x1938,
+ 0x19b0, 0x19c0,
+ 0x19c8, 0x19c9,
+ 0x1a19, 0x1a1b,
+ 0xa802, 0xa802,
+ 0xa823, 0xa824,
+ 0xa827, 0xa827,
+ 0x1d165, 0x1d166,
+ 0x1d16d, 0x1d172
+ }; /* CR_Mc */
+
+ /* 'Me': General Category */
+ static final int CR_Me[] = {
+ 4,
+ 0x0488, 0x0489,
+ 0x06de, 0x06de,
+ 0x20dd, 0x20e0,
+ 0x20e2, 0x20e4
+ }; /* CR_Me */
+
+ /* 'Mn': General Category */
+ static final int CR_Mn[] = {
+ 124,
+ 0x0300, 0x036f,
+ 0x0483, 0x0486,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x0610, 0x0615,
+ 0x064b, 0x065e,
+ 0x0670, 0x0670,
+ 0x06d6, 0x06dc,
+ 0x06df, 0x06e4,
+ 0x06e7, 0x06e8,
+ 0x06ea, 0x06ed,
+ 0x0711, 0x0711,
+ 0x0730, 0x074a,
+ 0x07a6, 0x07b0,
+ 0x0901, 0x0902,
+ 0x093c, 0x093c,
+ 0x0941, 0x0948,
+ 0x094d, 0x094d,
+ 0x0951, 0x0954,
+ 0x0962, 0x0963,
+ 0x0981, 0x0981,
+ 0x09bc, 0x09bc,
+ 0x09c1, 0x09c4,
+ 0x09cd, 0x09cd,
+ 0x09e2, 0x09e3,
+ 0x0a01, 0x0a02,
+ 0x0a3c, 0x0a3c,
+ 0x0a41, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a70, 0x0a71,
+ 0x0a81, 0x0a82,
+ 0x0abc, 0x0abc,
+ 0x0ac1, 0x0ac5,
+ 0x0ac7, 0x0ac8,
+ 0x0acd, 0x0acd,
+ 0x0ae2, 0x0ae3,
+ 0x0b01, 0x0b01,
+ 0x0b3c, 0x0b3c,
+ 0x0b3f, 0x0b3f,
+ 0x0b41, 0x0b43,
+ 0x0b4d, 0x0b4d,
+ 0x0b56, 0x0b56,
+ 0x0b82, 0x0b82,
+ 0x0bc0, 0x0bc0,
+ 0x0bcd, 0x0bcd,
+ 0x0c3e, 0x0c40,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0cbc, 0x0cbc,
+ 0x0cbf, 0x0cbf,
+ 0x0cc6, 0x0cc6,
+ 0x0ccc, 0x0ccd,
+ 0x0d41, 0x0d43,
+ 0x0d4d, 0x0d4d,
+ 0x0dca, 0x0dca,
+ 0x0dd2, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0e31, 0x0e31,
+ 0x0e34, 0x0e3a,
+ 0x0e47, 0x0e4e,
+ 0x0eb1, 0x0eb1,
+ 0x0eb4, 0x0eb9,
+ 0x0ebb, 0x0ebc,
+ 0x0ec8, 0x0ecd,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f71, 0x0f7e,
+ 0x0f80, 0x0f84,
+ 0x0f86, 0x0f87,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x102d, 0x1030,
+ 0x1032, 0x1032,
+ 0x1036, 0x1037,
+ 0x1039, 0x1039,
+ 0x1058, 0x1059,
+ 0x135f, 0x135f,
+ 0x1712, 0x1714,
+ 0x1732, 0x1734,
+ 0x1752, 0x1753,
+ 0x1772, 0x1773,
+ 0x17b7, 0x17bd,
+ 0x17c6, 0x17c6,
+ 0x17c9, 0x17d3,
+ 0x17dd, 0x17dd,
+ 0x180b, 0x180d,
+ 0x18a9, 0x18a9,
+ 0x1920, 0x1922,
+ 0x1927, 0x1928,
+ 0x1932, 0x1932,
+ 0x1939, 0x193b,
+ 0x1a17, 0x1a18,
+ 0x1dc0, 0x1dc3,
+ 0x20d0, 0x20dc,
+ 0x20e1, 0x20e1,
+ 0x20e5, 0x20eb,
+ 0x302a, 0x302f,
+ 0x3099, 0x309a,
+ 0xa806, 0xa806,
+ 0xa80b, 0xa80b,
+ 0xa825, 0xa826,
+ 0xfb1e, 0xfb1e,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0x10a01, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a0f,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a3f,
+ 0x1d167, 0x1d169,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0xe0100, 0xe01ef
+ }; /* CR_Mn */
+
+ /* 'N': Major Category */
+ static final int CR_N[] = {
+ 53,
+ 0x0030, 0x0039,
+ 0x00b2, 0x00b3,
+ 0x00b9, 0x00b9,
+ 0x00bc, 0x00be,
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x09f4, 0x09f9,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be6, 0x0bf2,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f33,
+ 0x1040, 0x1049,
+ 0x1369, 0x137c,
+ 0x16ee, 0x16f0,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0x19d0, 0x19d9,
+ 0x2070, 0x2070,
+ 0x2074, 0x2079,
+ 0x2080, 0x2089,
+ 0x2153, 0x2183,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x2cfd, 0x2cfd,
+ 0x3007, 0x3007,
+ 0x3021, 0x3029,
+ 0x3038, 0x303a,
+ 0x3192, 0x3195,
+ 0x3220, 0x3229,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0xff10, 0xff19,
+ 0x10107, 0x10133,
+ 0x10140, 0x10178,
+ 0x1018a, 0x1018a,
+ 0x10320, 0x10323,
+ 0x1034a, 0x1034a,
+ 0x103d1, 0x103d5,
+ 0x104a0, 0x104a9,
+ 0x10a40, 0x10a47,
+ 0x1d7ce, 0x1d7ff
+ }; /* CR_N */
+
+ /* 'Nd': General Category */
+ static final int CR_Nd[] = {
+ 23,
+ 0x0030, 0x0039,
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be6, 0x0bef,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f29,
+ 0x1040, 0x1049,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0x19d0, 0x19d9,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x1d7ce, 0x1d7ff
+ }; /* CR_Nd */
+
+ /* 'Nl': General Category */
+ static final int CR_Nl[] = {
+ 8,
+ 0x16ee, 0x16f0,
+ 0x2160, 0x2183,
+ 0x3007, 0x3007,
+ 0x3021, 0x3029,
+ 0x3038, 0x303a,
+ 0x10140, 0x10174,
+ 0x1034a, 0x1034a,
+ 0x103d1, 0x103d5
+ }; /* CR_Nl */
+
+ /* 'No': General Category */
+ static final int CR_No[] = {
+ 26,
+ 0x00b2, 0x00b3,
+ 0x00b9, 0x00b9,
+ 0x00bc, 0x00be,
+ 0x09f4, 0x09f9,
+ 0x0bf0, 0x0bf2,
+ 0x0f2a, 0x0f33,
+ 0x1369, 0x137c,
+ 0x17f0, 0x17f9,
+ 0x2070, 0x2070,
+ 0x2074, 0x2079,
+ 0x2080, 0x2089,
+ 0x2153, 0x215f,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x2cfd, 0x2cfd,
+ 0x3192, 0x3195,
+ 0x3220, 0x3229,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0x10107, 0x10133,
+ 0x10175, 0x10178,
+ 0x1018a, 0x1018a,
+ 0x10320, 0x10323,
+ 0x10a40, 0x10a47
+ }; /* CR_No */
+
+ /* 'P': Major Category */
+ static final int CR_P[] = {
+ 96,
+ 0x0021, 0x0023,
+ 0x0025, 0x002a,
+ 0x002c, 0x002f,
+ 0x003a, 0x003b,
+ 0x003f, 0x0040,
+ 0x005b, 0x005d,
+ 0x005f, 0x005f,
+ 0x007b, 0x007b,
+ 0x007d, 0x007d,
+ 0x00a1, 0x00a1,
+ 0x00ab, 0x00ab,
+ 0x00b7, 0x00b7,
+ 0x00bb, 0x00bb,
+ 0x00bf, 0x00bf,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x055a, 0x055f,
+ 0x0589, 0x058a,
+ 0x05be, 0x05be,
+ 0x05c0, 0x05c0,
+ 0x05c3, 0x05c3,
+ 0x05c6, 0x05c6,
+ 0x05f3, 0x05f4,
+ 0x060c, 0x060d,
+ 0x061b, 0x061b,
+ 0x061e, 0x061f,
+ 0x066a, 0x066d,
+ 0x06d4, 0x06d4,
+ 0x0700, 0x070d,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0df4, 0x0df4,
+ 0x0e4f, 0x0e4f,
+ 0x0e5a, 0x0e5b,
+ 0x0f04, 0x0f12,
+ 0x0f3a, 0x0f3d,
+ 0x0f85, 0x0f85,
+ 0x0fd0, 0x0fd1,
+ 0x104a, 0x104f,
+ 0x10fb, 0x10fb,
+ 0x1361, 0x1368,
+ 0x166d, 0x166e,
+ 0x169b, 0x169c,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x17d4, 0x17d6,
+ 0x17d8, 0x17da,
+ 0x1800, 0x180a,
+ 0x1944, 0x1945,
+ 0x19de, 0x19df,
+ 0x1a1e, 0x1a1f,
+ 0x2010, 0x2027,
+ 0x2030, 0x2043,
+ 0x2045, 0x2051,
+ 0x2053, 0x205e,
+ 0x207d, 0x207e,
+ 0x208d, 0x208e,
+ 0x2329, 0x232a,
+ 0x23b4, 0x23b6,
+ 0x2768, 0x2775,
+ 0x27c5, 0x27c6,
+ 0x27e6, 0x27eb,
+ 0x2983, 0x2998,
+ 0x29d8, 0x29db,
+ 0x29fc, 0x29fd,
+ 0x2cf9, 0x2cfc,
+ 0x2cfe, 0x2cff,
+ 0x2e00, 0x2e17,
+ 0x2e1c, 0x2e1d,
+ 0x3001, 0x3003,
+ 0x3008, 0x3011,
+ 0x3014, 0x301f,
+ 0x3030, 0x3030,
+ 0x303d, 0x303d,
+ 0x30a0, 0x30a0,
+ 0x30fb, 0x30fb,
+ 0xfd3e, 0xfd3f,
+ 0xfe10, 0xfe19,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe61,
+ 0xfe63, 0xfe63,
+ 0xfe68, 0xfe68,
+ 0xfe6a, 0xfe6b,
+ 0xff01, 0xff03,
+ 0xff05, 0xff0a,
+ 0xff0c, 0xff0f,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff20,
+ 0xff3b, 0xff3d,
+ 0xff3f, 0xff3f,
+ 0xff5b, 0xff5b,
+ 0xff5d, 0xff5d,
+ 0xff5f, 0xff65,
+ 0x10100, 0x10101,
+ 0x1039f, 0x1039f,
+ 0x10a50, 0x10a58
+ }; /* CR_P */
+
+ /* 'Pc': General Category */
+ static final int CR_Pc[] = {
+ 6,
+ 0x005f, 0x005f,
+ 0x203f, 0x2040,
+ 0x2054, 0x2054,
+ 0xfe33, 0xfe34,
+ 0xfe4d, 0xfe4f,
+ 0xff3f, 0xff3f
+ }; /* CR_Pc */
+
+ /* 'Pd': General Category */
+ static final int CR_Pd[] = {
+ 12,
+ 0x002d, 0x002d,
+ 0x058a, 0x058a,
+ 0x1806, 0x1806,
+ 0x2010, 0x2015,
+ 0x2e17, 0x2e17,
+ 0x301c, 0x301c,
+ 0x3030, 0x3030,
+ 0x30a0, 0x30a0,
+ 0xfe31, 0xfe32,
+ 0xfe58, 0xfe58,
+ 0xfe63, 0xfe63,
+ 0xff0d, 0xff0d
+ }; /* CR_Pd */
+
+ /* 'Pe': General Category */
+ static final int CR_Pe[] = {
+ 65,
+ 0x0029, 0x0029,
+ 0x005d, 0x005d,
+ 0x007d, 0x007d,
+ 0x0f3b, 0x0f3b,
+ 0x0f3d, 0x0f3d,
+ 0x169c, 0x169c,
+ 0x2046, 0x2046,
+ 0x207e, 0x207e,
+ 0x208e, 0x208e,
+ 0x232a, 0x232a,
+ 0x23b5, 0x23b5,
+ 0x2769, 0x2769,
+ 0x276b, 0x276b,
+ 0x276d, 0x276d,
+ 0x276f, 0x276f,
+ 0x2771, 0x2771,
+ 0x2773, 0x2773,
+ 0x2775, 0x2775,
+ 0x27c6, 0x27c6,
+ 0x27e7, 0x27e7,
+ 0x27e9, 0x27e9,
+ 0x27eb, 0x27eb,
+ 0x2984, 0x2984,
+ 0x2986, 0x2986,
+ 0x2988, 0x2988,
+ 0x298a, 0x298a,
+ 0x298c, 0x298c,
+ 0x298e, 0x298e,
+ 0x2990, 0x2990,
+ 0x2992, 0x2992,
+ 0x2994, 0x2994,
+ 0x2996, 0x2996,
+ 0x2998, 0x2998,
+ 0x29d9, 0x29d9,
+ 0x29db, 0x29db,
+ 0x29fd, 0x29fd,
+ 0x3009, 0x3009,
+ 0x300b, 0x300b,
+ 0x300d, 0x300d,
+ 0x300f, 0x300f,
+ 0x3011, 0x3011,
+ 0x3015, 0x3015,
+ 0x3017, 0x3017,
+ 0x3019, 0x3019,
+ 0x301b, 0x301b,
+ 0x301e, 0x301f,
+ 0xfd3f, 0xfd3f,
+ 0xfe18, 0xfe18,
+ 0xfe36, 0xfe36,
+ 0xfe38, 0xfe38,
+ 0xfe3a, 0xfe3a,
+ 0xfe3c, 0xfe3c,
+ 0xfe3e, 0xfe3e,
+ 0xfe40, 0xfe40,
+ 0xfe42, 0xfe42,
+ 0xfe44, 0xfe44,
+ 0xfe48, 0xfe48,
+ 0xfe5a, 0xfe5a,
+ 0xfe5c, 0xfe5c,
+ 0xfe5e, 0xfe5e,
+ 0xff09, 0xff09,
+ 0xff3d, 0xff3d,
+ 0xff5d, 0xff5d,
+ 0xff60, 0xff60,
+ 0xff63, 0xff63
+ }; /* CR_Pe */
+
+ /* 'Pf': General Category */
+ static final int CR_Pf[] = {
+ 9,
+ 0x00bb, 0x00bb,
+ 0x2019, 0x2019,
+ 0x201d, 0x201d,
+ 0x203a, 0x203a,
+ 0x2e03, 0x2e03,
+ 0x2e05, 0x2e05,
+ 0x2e0a, 0x2e0a,
+ 0x2e0d, 0x2e0d,
+ 0x2e1d, 0x2e1d
+ }; /* CR_Pf */
+
+ /* 'Pi': General Category */
+ static final int CR_Pi[] = {
+ 10,
+ 0x00ab, 0x00ab,
+ 0x2018, 0x2018,
+ 0x201b, 0x201c,
+ 0x201f, 0x201f,
+ 0x2039, 0x2039,
+ 0x2e02, 0x2e02,
+ 0x2e04, 0x2e04,
+ 0x2e09, 0x2e09,
+ 0x2e0c, 0x2e0c,
+ 0x2e1c, 0x2e1c
+ }; /* CR_Pi */
+
+ /* 'Po': General Category */
+ static final int CR_Po[] = {
+ 88,
+ 0x0021, 0x0023,
+ 0x0025, 0x0027,
+ 0x002a, 0x002a,
+ 0x002c, 0x002c,
+ 0x002e, 0x002f,
+ 0x003a, 0x003b,
+ 0x003f, 0x0040,
+ 0x005c, 0x005c,
+ 0x00a1, 0x00a1,
+ 0x00b7, 0x00b7,
+ 0x00bf, 0x00bf,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x055a, 0x055f,
+ 0x0589, 0x0589,
+ 0x05be, 0x05be,
+ 0x05c0, 0x05c0,
+ 0x05c3, 0x05c3,
+ 0x05c6, 0x05c6,
+ 0x05f3, 0x05f4,
+ 0x060c, 0x060d,
+ 0x061b, 0x061b,
+ 0x061e, 0x061f,
+ 0x066a, 0x066d,
+ 0x06d4, 0x06d4,
+ 0x0700, 0x070d,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0df4, 0x0df4,
+ 0x0e4f, 0x0e4f,
+ 0x0e5a, 0x0e5b,
+ 0x0f04, 0x0f12,
+ 0x0f85, 0x0f85,
+ 0x0fd0, 0x0fd1,
+ 0x104a, 0x104f,
+ 0x10fb, 0x10fb,
+ 0x1361, 0x1368,
+ 0x166d, 0x166e,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x17d4, 0x17d6,
+ 0x17d8, 0x17da,
+ 0x1800, 0x1805,
+ 0x1807, 0x180a,
+ 0x1944, 0x1945,
+ 0x19de, 0x19df,
+ 0x1a1e, 0x1a1f,
+ 0x2016, 0x2017,
+ 0x2020, 0x2027,
+ 0x2030, 0x2038,
+ 0x203b, 0x203e,
+ 0x2041, 0x2043,
+ 0x2047, 0x2051,
+ 0x2053, 0x2053,
+ 0x2055, 0x205e,
+ 0x23b6, 0x23b6,
+ 0x2cf9, 0x2cfc,
+ 0x2cfe, 0x2cff,
+ 0x2e00, 0x2e01,
+ 0x2e06, 0x2e08,
+ 0x2e0b, 0x2e0b,
+ 0x2e0e, 0x2e16,
+ 0x3001, 0x3003,
+ 0x303d, 0x303d,
+ 0x30fb, 0x30fb,
+ 0xfe10, 0xfe16,
+ 0xfe19, 0xfe19,
+ 0xfe30, 0xfe30,
+ 0xfe45, 0xfe46,
+ 0xfe49, 0xfe4c,
+ 0xfe50, 0xfe52,
+ 0xfe54, 0xfe57,
+ 0xfe5f, 0xfe61,
+ 0xfe68, 0xfe68,
+ 0xfe6a, 0xfe6b,
+ 0xff01, 0xff03,
+ 0xff05, 0xff07,
+ 0xff0a, 0xff0a,
+ 0xff0c, 0xff0c,
+ 0xff0e, 0xff0f,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff20,
+ 0xff3c, 0xff3c,
+ 0xff61, 0xff61,
+ 0xff64, 0xff65,
+ 0x10100, 0x10101,
+ 0x1039f, 0x1039f,
+ 0x10a50, 0x10a58
+ }; /* CR_Po */
+
+ /* 'Ps': General Category */
+ static final int CR_Ps[] = {
+ 67,
+ 0x0028, 0x0028,
+ 0x005b, 0x005b,
+ 0x007b, 0x007b,
+ 0x0f3a, 0x0f3a,
+ 0x0f3c, 0x0f3c,
+ 0x169b, 0x169b,
+ 0x201a, 0x201a,
+ 0x201e, 0x201e,
+ 0x2045, 0x2045,
+ 0x207d, 0x207d,
+ 0x208d, 0x208d,
+ 0x2329, 0x2329,
+ 0x23b4, 0x23b4,
+ 0x2768, 0x2768,
+ 0x276a, 0x276a,
+ 0x276c, 0x276c,
+ 0x276e, 0x276e,
+ 0x2770, 0x2770,
+ 0x2772, 0x2772,
+ 0x2774, 0x2774,
+ 0x27c5, 0x27c5,
+ 0x27e6, 0x27e6,
+ 0x27e8, 0x27e8,
+ 0x27ea, 0x27ea,
+ 0x2983, 0x2983,
+ 0x2985, 0x2985,
+ 0x2987, 0x2987,
+ 0x2989, 0x2989,
+ 0x298b, 0x298b,
+ 0x298d, 0x298d,
+ 0x298f, 0x298f,
+ 0x2991, 0x2991,
+ 0x2993, 0x2993,
+ 0x2995, 0x2995,
+ 0x2997, 0x2997,
+ 0x29d8, 0x29d8,
+ 0x29da, 0x29da,
+ 0x29fc, 0x29fc,
+ 0x3008, 0x3008,
+ 0x300a, 0x300a,
+ 0x300c, 0x300c,
+ 0x300e, 0x300e,
+ 0x3010, 0x3010,
+ 0x3014, 0x3014,
+ 0x3016, 0x3016,
+ 0x3018, 0x3018,
+ 0x301a, 0x301a,
+ 0x301d, 0x301d,
+ 0xfd3e, 0xfd3e,
+ 0xfe17, 0xfe17,
+ 0xfe35, 0xfe35,
+ 0xfe37, 0xfe37,
+ 0xfe39, 0xfe39,
+ 0xfe3b, 0xfe3b,
+ 0xfe3d, 0xfe3d,
+ 0xfe3f, 0xfe3f,
+ 0xfe41, 0xfe41,
+ 0xfe43, 0xfe43,
+ 0xfe47, 0xfe47,
+ 0xfe59, 0xfe59,
+ 0xfe5b, 0xfe5b,
+ 0xfe5d, 0xfe5d,
+ 0xff08, 0xff08,
+ 0xff3b, 0xff3b,
+ 0xff5b, 0xff5b,
+ 0xff5f, 0xff5f,
+ 0xff62, 0xff62
+ }; /* CR_Ps */
+
+ /* 'S': Major Category */
+ static final int CR_S[] = {
+ 162,
+ 0x0024, 0x0024,
+ 0x002b, 0x002b,
+ 0x003c, 0x003e,
+ 0x005e, 0x005e,
+ 0x0060, 0x0060,
+ 0x007c, 0x007c,
+ 0x007e, 0x007e,
+ 0x00a2, 0x00a9,
+ 0x00ac, 0x00ac,
+ 0x00ae, 0x00b1,
+ 0x00b4, 0x00b4,
+ 0x00b6, 0x00b6,
+ 0x00b8, 0x00b8,
+ 0x00d7, 0x00d7,
+ 0x00f7, 0x00f7,
+ 0x02c2, 0x02c5,
+ 0x02d2, 0x02df,
+ 0x02e5, 0x02ed,
+ 0x02ef, 0x02ff,
+ 0x0374, 0x0375,
+ 0x0384, 0x0385,
+ 0x03f6, 0x03f6,
+ 0x0482, 0x0482,
+ 0x060b, 0x060b,
+ 0x060e, 0x060f,
+ 0x06e9, 0x06e9,
+ 0x06fd, 0x06fe,
+ 0x09f2, 0x09f3,
+ 0x09fa, 0x09fa,
+ 0x0af1, 0x0af1,
+ 0x0b70, 0x0b70,
+ 0x0bf3, 0x0bfa,
+ 0x0e3f, 0x0e3f,
+ 0x0f01, 0x0f03,
+ 0x0f13, 0x0f17,
+ 0x0f1a, 0x0f1f,
+ 0x0f34, 0x0f34,
+ 0x0f36, 0x0f36,
+ 0x0f38, 0x0f38,
+ 0x0fbe, 0x0fc5,
+ 0x0fc7, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1360, 0x1360,
+ 0x1390, 0x1399,
+ 0x17db, 0x17db,
+ 0x1940, 0x1940,
+ 0x19e0, 0x19ff,
+ 0x1fbd, 0x1fbd,
+ 0x1fbf, 0x1fc1,
+ 0x1fcd, 0x1fcf,
+ 0x1fdd, 0x1fdf,
+ 0x1fed, 0x1fef,
+ 0x1ffd, 0x1ffe,
+ 0x2044, 0x2044,
+ 0x2052, 0x2052,
+ 0x207a, 0x207c,
+ 0x208a, 0x208c,
+ 0x20a0, 0x20b5,
+ 0x2100, 0x2101,
+ 0x2103, 0x2106,
+ 0x2108, 0x2109,
+ 0x2114, 0x2114,
+ 0x2116, 0x2118,
+ 0x211e, 0x2123,
+ 0x2125, 0x2125,
+ 0x2127, 0x2127,
+ 0x2129, 0x2129,
+ 0x212e, 0x212e,
+ 0x2132, 0x2132,
+ 0x213a, 0x213b,
+ 0x2140, 0x2144,
+ 0x214a, 0x214c,
+ 0x2190, 0x2328,
+ 0x232b, 0x23b3,
+ 0x23b7, 0x23db,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x249c, 0x24e9,
+ 0x2500, 0x269c,
+ 0x26a0, 0x26b1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2767,
+ 0x2794, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27c0, 0x27c4,
+ 0x27d0, 0x27e5,
+ 0x27f0, 0x2982,
+ 0x2999, 0x29d7,
+ 0x29dc, 0x29fb,
+ 0x29fe, 0x2b13,
+ 0x2ce5, 0x2cea,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3004, 0x3004,
+ 0x3012, 0x3013,
+ 0x3020, 0x3020,
+ 0x3036, 0x3037,
+ 0x303e, 0x303f,
+ 0x309b, 0x309c,
+ 0x3190, 0x3191,
+ 0x3196, 0x319f,
+ 0x31c0, 0x31cf,
+ 0x3200, 0x321e,
+ 0x322a, 0x3243,
+ 0x3250, 0x3250,
+ 0x3260, 0x327f,
+ 0x328a, 0x32b0,
+ 0x32c0, 0x32fe,
+ 0x3300, 0x33ff,
+ 0x4dc0, 0x4dff,
+ 0xa490, 0xa4c6,
+ 0xa700, 0xa716,
+ 0xa828, 0xa82b,
+ 0xfb29, 0xfb29,
+ 0xfdfc, 0xfdfd,
+ 0xfe62, 0xfe62,
+ 0xfe64, 0xfe66,
+ 0xfe69, 0xfe69,
+ 0xff04, 0xff04,
+ 0xff0b, 0xff0b,
+ 0xff1c, 0xff1e,
+ 0xff3e, 0xff3e,
+ 0xff40, 0xff40,
+ 0xff5c, 0xff5c,
+ 0xff5e, 0xff5e,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfffc, 0xfffd,
+ 0x10102, 0x10102,
+ 0x10137, 0x1013f,
+ 0x10179, 0x10189,
+ 0x103d0, 0x103d0,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d164,
+ 0x1d16a, 0x1d16c,
+ 0x1d183, 0x1d184,
+ 0x1d18c, 0x1d1a9,
+ 0x1d1ae, 0x1d1dd,
+ 0x1d200, 0x1d241,
+ 0x1d245, 0x1d245,
+ 0x1d300, 0x1d356,
+ 0x1d6c1, 0x1d6c1,
+ 0x1d6db, 0x1d6db,
+ 0x1d6fb, 0x1d6fb,
+ 0x1d715, 0x1d715,
+ 0x1d735, 0x1d735,
+ 0x1d74f, 0x1d74f,
+ 0x1d76f, 0x1d76f,
+ 0x1d789, 0x1d789,
+ 0x1d7a9, 0x1d7a9,
+ 0x1d7c3, 0x1d7c3
+ }; /* CR_S */
+
+ /* 'Sc': General Category */
+ static final int CR_Sc[] = {
+ 14,
+ 0x0024, 0x0024,
+ 0x00a2, 0x00a5,
+ 0x060b, 0x060b,
+ 0x09f2, 0x09f3,
+ 0x0af1, 0x0af1,
+ 0x0bf9, 0x0bf9,
+ 0x0e3f, 0x0e3f,
+ 0x17db, 0x17db,
+ 0x20a0, 0x20b5,
+ 0xfdfc, 0xfdfc,
+ 0xfe69, 0xfe69,
+ 0xff04, 0xff04,
+ 0xffe0, 0xffe1,
+ 0xffe5, 0xffe6
+ }; /* CR_Sc */
+
+ /* 'Sk': General Category */
+ static final int CR_Sk[] = {
+ 23,
+ 0x005e, 0x005e,
+ 0x0060, 0x0060,
+ 0x00a8, 0x00a8,
+ 0x00af, 0x00af,
+ 0x00b4, 0x00b4,
+ 0x00b8, 0x00b8,
+ 0x02c2, 0x02c5,
+ 0x02d2, 0x02df,
+ 0x02e5, 0x02ed,
+ 0x02ef, 0x02ff,
+ 0x0374, 0x0375,
+ 0x0384, 0x0385,
+ 0x1fbd, 0x1fbd,
+ 0x1fbf, 0x1fc1,
+ 0x1fcd, 0x1fcf,
+ 0x1fdd, 0x1fdf,
+ 0x1fed, 0x1fef,
+ 0x1ffd, 0x1ffe,
+ 0x309b, 0x309c,
+ 0xa700, 0xa716,
+ 0xff3e, 0xff3e,
+ 0xff40, 0xff40,
+ 0xffe3, 0xffe3
+ }; /* CR_Sk */
+
+ /* 'Sm': General Category */
+ static final int CR_Sm[] = {
+ 59,
+ 0x002b, 0x002b,
+ 0x003c, 0x003e,
+ 0x007c, 0x007c,
+ 0x007e, 0x007e,
+ 0x00ac, 0x00ac,
+ 0x00b1, 0x00b1,
+ 0x00d7, 0x00d7,
+ 0x00f7, 0x00f7,
+ 0x03f6, 0x03f6,
+ 0x2044, 0x2044,
+ 0x2052, 0x2052,
+ 0x207a, 0x207c,
+ 0x208a, 0x208c,
+ 0x2140, 0x2144,
+ 0x214b, 0x214b,
+ 0x2190, 0x2194,
+ 0x219a, 0x219b,
+ 0x21a0, 0x21a0,
+ 0x21a3, 0x21a3,
+ 0x21a6, 0x21a6,
+ 0x21ae, 0x21ae,
+ 0x21ce, 0x21cf,
+ 0x21d2, 0x21d2,
+ 0x21d4, 0x21d4,
+ 0x21f4, 0x22ff,
+ 0x2308, 0x230b,
+ 0x2320, 0x2321,
+ 0x237c, 0x237c,
+ 0x239b, 0x23b3,
+ 0x25b7, 0x25b7,
+ 0x25c1, 0x25c1,
+ 0x25f8, 0x25ff,
+ 0x266f, 0x266f,
+ 0x27c0, 0x27c4,
+ 0x27d0, 0x27e5,
+ 0x27f0, 0x27ff,
+ 0x2900, 0x2982,
+ 0x2999, 0x29d7,
+ 0x29dc, 0x29fb,
+ 0x29fe, 0x2aff,
+ 0xfb29, 0xfb29,
+ 0xfe62, 0xfe62,
+ 0xfe64, 0xfe66,
+ 0xff0b, 0xff0b,
+ 0xff1c, 0xff1e,
+ 0xff5c, 0xff5c,
+ 0xff5e, 0xff5e,
+ 0xffe2, 0xffe2,
+ 0xffe9, 0xffec,
+ 0x1d6c1, 0x1d6c1,
+ 0x1d6db, 0x1d6db,
+ 0x1d6fb, 0x1d6fb,
+ 0x1d715, 0x1d715,
+ 0x1d735, 0x1d735,
+ 0x1d74f, 0x1d74f,
+ 0x1d76f, 0x1d76f,
+ 0x1d789, 0x1d789,
+ 0x1d7a9, 0x1d7a9,
+ 0x1d7c3, 0x1d7c3
+ }; /* CR_Sm */
+
+ /* 'So': General Category */
+ static final int CR_So[] = {
+ 120,
+ 0x00a6, 0x00a7,
+ 0x00a9, 0x00a9,
+ 0x00ae, 0x00ae,
+ 0x00b0, 0x00b0,
+ 0x00b6, 0x00b6,
+ 0x0482, 0x0482,
+ 0x060e, 0x060f,
+ 0x06e9, 0x06e9,
+ 0x06fd, 0x06fe,
+ 0x09fa, 0x09fa,
+ 0x0b70, 0x0b70,
+ 0x0bf3, 0x0bf8,
+ 0x0bfa, 0x0bfa,
+ 0x0f01, 0x0f03,
+ 0x0f13, 0x0f17,
+ 0x0f1a, 0x0f1f,
+ 0x0f34, 0x0f34,
+ 0x0f36, 0x0f36,
+ 0x0f38, 0x0f38,
+ 0x0fbe, 0x0fc5,
+ 0x0fc7, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1360, 0x1360,
+ 0x1390, 0x1399,
+ 0x1940, 0x1940,
+ 0x19e0, 0x19ff,
+ 0x2100, 0x2101,
+ 0x2103, 0x2106,
+ 0x2108, 0x2109,
+ 0x2114, 0x2114,
+ 0x2116, 0x2118,
+ 0x211e, 0x2123,
+ 0x2125, 0x2125,
+ 0x2127, 0x2127,
+ 0x2129, 0x2129,
+ 0x212e, 0x212e,
+ 0x2132, 0x2132,
+ 0x213a, 0x213b,
+ 0x214a, 0x214a,
+ 0x214c, 0x214c,
+ 0x2195, 0x2199,
+ 0x219c, 0x219f,
+ 0x21a1, 0x21a2,
+ 0x21a4, 0x21a5,
+ 0x21a7, 0x21ad,
+ 0x21af, 0x21cd,
+ 0x21d0, 0x21d1,
+ 0x21d3, 0x21d3,
+ 0x21d5, 0x21f3,
+ 0x2300, 0x2307,
+ 0x230c, 0x231f,
+ 0x2322, 0x2328,
+ 0x232b, 0x237b,
+ 0x237d, 0x239a,
+ 0x23b7, 0x23db,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x249c, 0x24e9,
+ 0x2500, 0x25b6,
+ 0x25b8, 0x25c0,
+ 0x25c2, 0x25f7,
+ 0x2600, 0x266e,
+ 0x2670, 0x269c,
+ 0x26a0, 0x26b1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2767,
+ 0x2794, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x2800, 0x28ff,
+ 0x2b00, 0x2b13,
+ 0x2ce5, 0x2cea,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3004, 0x3004,
+ 0x3012, 0x3013,
+ 0x3020, 0x3020,
+ 0x3036, 0x3037,
+ 0x303e, 0x303f,
+ 0x3190, 0x3191,
+ 0x3196, 0x319f,
+ 0x31c0, 0x31cf,
+ 0x3200, 0x321e,
+ 0x322a, 0x3243,
+ 0x3250, 0x3250,
+ 0x3260, 0x327f,
+ 0x328a, 0x32b0,
+ 0x32c0, 0x32fe,
+ 0x3300, 0x33ff,
+ 0x4dc0, 0x4dff,
+ 0xa490, 0xa4c6,
+ 0xa828, 0xa82b,
+ 0xfdfd, 0xfdfd,
+ 0xffe4, 0xffe4,
+ 0xffe8, 0xffe8,
+ 0xffed, 0xffee,
+ 0xfffc, 0xfffd,
+ 0x10102, 0x10102,
+ 0x10137, 0x1013f,
+ 0x10179, 0x10189,
+ 0x103d0, 0x103d0,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d164,
+ 0x1d16a, 0x1d16c,
+ 0x1d183, 0x1d184,
+ 0x1d18c, 0x1d1a9,
+ 0x1d1ae, 0x1d1dd,
+ 0x1d200, 0x1d241,
+ 0x1d245, 0x1d245,
+ 0x1d300, 0x1d356
+ }; /* CR_So */
+
+ /* 'Z': Major Category */
+ static final int CR_Z[] = {
+ 9,
+ 0x0020, 0x0020,
+ 0x00a0, 0x00a0,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+ }; /* CR_Z */
+
+ /* 'Zl': General Category */
+ static final int CR_Zl[] = {
+ 1,
+ 0x2028, 0x2028
+ }; /* CR_Zl */
+
+ /* 'Zp': General Category */
+ static final int CR_Zp[] = {
+ 1,
+ 0x2029, 0x2029
+ }; /* CR_Zp */
+
+ /* 'Zs': General Category */
+ static final int CR_Zs[] = {
+ 8,
+ 0x0020, 0x0020,
+ 0x00a0, 0x00a0,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+ }; /* CR_Zs */
+}
diff --git a/src/org/joni/encoding/unicode/UnicodePropertiesScripts.java b/src/org/joni/encoding/unicode/UnicodePropertiesScripts.java
new file mode 100644
index 0000000..f68b74d
--- /dev/null
+++ b/src/org/joni/encoding/unicode/UnicodePropertiesScripts.java
@@ -0,0 +1,866 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.encoding.unicode;
+
+public interface UnicodePropertiesScripts {
+ /* 'Arabic': Script */
+ static final int CR_Arabic[] = {
+ 17,
+ 0x060b, 0x060b,
+ 0x060d, 0x0615,
+ 0x061e, 0x061e,
+ 0x0621, 0x063a,
+ 0x0641, 0x064a,
+ 0x0656, 0x065e,
+ 0x066a, 0x066f,
+ 0x0671, 0x06dc,
+ 0x06de, 0x06ff,
+ 0x0750, 0x076d,
+ 0xfb50, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfc,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc
+ }; /* CR_Arabic */
+
+ /* 'Armenian': Script */
+ static final int CR_Armenian[] = {
+ 5,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x058a, 0x058a,
+ 0xfb13, 0xfb17
+ }; /* CR_Armenian */
+
+ /* 'Bengali': Script */
+ static final int CR_Bengali[] = {
+ 14,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa
+ }; /* CR_Bengali */
+
+ /* 'Bopomofo': Script */
+ static final int CR_Bopomofo[] = {
+ 2,
+ 0x3105, 0x312c,
+ 0x31a0, 0x31b7
+ }; /* CR_Bopomofo */
+
+ /* 'Braille': Script */
+ static final int CR_Braille[] = {
+ 1,
+ 0x2800, 0x28ff
+ }; /* CR_Braille */
+
+ /* 'Buginese': Script */
+ static final int CR_Buginese[] = {
+ 2,
+ 0x1a00, 0x1a1b,
+ 0x1a1e, 0x1a1f
+ }; /* CR_Buginese */
+
+ /* 'Buhid': Script */
+ static final int CR_Buhid[] = {
+ 1,
+ 0x1740, 0x1753
+ }; /* CR_Buhid */
+
+ /* 'Canadian_Aboriginal': Script */
+ static final int CR_Canadian_Aboriginal[] = {
+ 1,
+ 0x1401, 0x1676
+ }; /* CR_Canadian_Aboriginal */
+
+ /* 'Cherokee': Script */
+ static final int CR_Cherokee[] = {
+ 1,
+ 0x13a0, 0x13f4
+ }; /* CR_Cherokee */
+
+ /* 'Common': Script */
+ static final int CR_Common[] = {
+ 126,
+ 0x0000, 0x0040,
+ 0x005b, 0x0060,
+ 0x007b, 0x00a9,
+ 0x00ab, 0x00b9,
+ 0x00bb, 0x00bf,
+ 0x00d7, 0x00d7,
+ 0x00f7, 0x00f7,
+ 0x02b9, 0x02df,
+ 0x02e5, 0x02ff,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x0589, 0x0589,
+ 0x0600, 0x0603,
+ 0x060c, 0x060c,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x0640, 0x0640,
+ 0x0660, 0x0669,
+ 0x06dd, 0x06dd,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0e3f, 0x0e3f,
+ 0x10fb, 0x10fb,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x2000, 0x200b,
+ 0x200e, 0x2063,
+ 0x206a, 0x2070,
+ 0x2074, 0x207e,
+ 0x2080, 0x208e,
+ 0x20a0, 0x20b5,
+ 0x2100, 0x2125,
+ 0x2127, 0x2129,
+ 0x212c, 0x214c,
+ 0x2153, 0x2183,
+ 0x2190, 0x23db,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x269c,
+ 0x26a0, 0x26b1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27c0, 0x27c6,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x27ff,
+ 0x2900, 0x2b13,
+ 0x2e00, 0x2e17,
+ 0x2e1c, 0x2e1d,
+ 0x2ff0, 0x2ffb,
+ 0x3000, 0x3004,
+ 0x3006, 0x3006,
+ 0x3008, 0x3020,
+ 0x3030, 0x3037,
+ 0x303c, 0x303f,
+ 0x309b, 0x309c,
+ 0x30a0, 0x30a0,
+ 0x30fb, 0x30fc,
+ 0x3190, 0x319f,
+ 0x31c0, 0x31cf,
+ 0x3220, 0x3243,
+ 0x3250, 0x325f,
+ 0x327e, 0x32fe,
+ 0x3300, 0x33ff,
+ 0x4dc0, 0x4dff,
+ 0xa700, 0xa716,
+ 0xe000, 0xf8ff,
+ 0xfd3e, 0xfd3f,
+ 0xfdfd, 0xfdfd,
+ 0xfe10, 0xfe19,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xff20,
+ 0xff3b, 0xff40,
+ 0xff5b, 0xff65,
+ 0xff70, 0xff70,
+ 0xff9e, 0xff9f,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1013f,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d166,
+ 0x1d16a, 0x1d17a,
+ 0x1d183, 0x1d184,
+ 0x1d18c, 0x1d1a9,
+ 0x1d1ae, 0x1d1dd,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+ }; /* CR_Common */
+
+ /* 'Coptic': Script */
+ static final int CR_Coptic[] = {
+ 3,
+ 0x03e2, 0x03ef,
+ 0x2c80, 0x2cea,
+ 0x2cf9, 0x2cff
+ }; /* CR_Coptic */
+
+ /* 'Cypriot': Script */
+ static final int CR_Cypriot[] = {
+ 6,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f
+ }; /* CR_Cypriot */
+
+ /* 'Cyrillic': Script */
+ static final int CR_Cyrillic[] = {
+ 6,
+ 0x0400, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f9,
+ 0x0500, 0x050f,
+ 0x1d2b, 0x1d2b,
+ 0x1d78, 0x1d78
+ }; /* CR_Cyrillic */
+
+ /* 'Deseret': Script */
+ static final int CR_Deseret[] = {
+ 1,
+ 0x10400, 0x1044f
+ }; /* CR_Deseret */
+
+ /* 'Devanagari': Script */
+ static final int CR_Devanagari[] = {
+ 6,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x097d, 0x097d
+ }; /* CR_Devanagari */
+
+ /* 'Ethiopic': Script */
+ static final int CR_Ethiopic[] = {
+ 27,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x137c,
+ 0x1380, 0x1399,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde
+ }; /* CR_Ethiopic */
+
+ /* 'Georgian': Script */
+ static final int CR_Georgian[] = {
+ 4,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x2d00, 0x2d25
+ }; /* CR_Georgian */
+
+ /* 'Glagolitic': Script */
+ static final int CR_Glagolitic[] = {
+ 2,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e
+ }; /* CR_Glagolitic */
+
+ /* 'Gothic': Script */
+ static final int CR_Gothic[] = {
+ 1,
+ 0x10330, 0x1034a
+ }; /* CR_Gothic */
+
+ /* 'Greek': Script */
+ static final int CR_Greek[] = {
+ 31,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x0384, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03e1,
+ 0x03f0, 0x03ff,
+ 0x1d26, 0x1d2a,
+ 0x1d5d, 0x1d61,
+ 0x1d66, 0x1d6a,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2126, 0x2126,
+ 0x10140, 0x1018a,
+ 0x1d200, 0x1d245
+ }; /* CR_Greek */
+
+ /* 'Gujarati': Script */
+ static final int CR_Gujarati[] = {
+ 14,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1
+ }; /* CR_Gujarati */
+
+ /* 'Gurmukhi': Script */
+ static final int CR_Gurmukhi[] = {
+ 15,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74
+ }; /* CR_Gurmukhi */
+
+ /* 'Han': Script */
+ static final int CR_Han[] = {
+ 14,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x3005, 0x3005,
+ 0x3007, 0x3007,
+ 0x3021, 0x3029,
+ 0x3038, 0x303b,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fbb,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d
+ }; /* CR_Han */
+
+ /* 'Hangul': Script */
+ static final int CR_Hangul[] = {
+ 12,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x3131, 0x318e,
+ 0x3200, 0x321e,
+ 0x3260, 0x327d,
+ 0xac00, 0xd7a3,
+ 0xffa0, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc
+ }; /* CR_Hangul */
+
+ /* 'Hanunoo': Script */
+ static final int CR_Hanunoo[] = {
+ 1,
+ 0x1720, 0x1734
+ }; /* CR_Hanunoo */
+
+ /* 'Hebrew': Script */
+ static final int CR_Hebrew[] = {
+ 10,
+ 0x0591, 0x05b9,
+ 0x05bb, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfb4f
+ }; /* CR_Hebrew */
+
+ /* 'Hiragana': Script */
+ static final int CR_Hiragana[] = {
+ 2,
+ 0x3041, 0x3096,
+ 0x309d, 0x309f
+ }; /* CR_Hiragana */
+
+ /* 'Inherited': Script */
+ static final int CR_Inherited[] = {
+ 15,
+ 0x0300, 0x036f,
+ 0x064b, 0x0655,
+ 0x0670, 0x0670,
+ 0x1dc0, 0x1dc3,
+ 0x200c, 0x200d,
+ 0x20d0, 0x20eb,
+ 0x302a, 0x302f,
+ 0x3099, 0x309a,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0x1d167, 0x1d169,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0xe0100, 0xe01ef
+ }; /* CR_Inherited */
+
+ /* 'Kannada': Script */
+ static final int CR_Kannada[] = {
+ 13,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef
+ }; /* CR_Kannada */
+
+ /* 'Katakana': Script */
+ static final int CR_Katakana[] = {
+ 5,
+ 0x30a1, 0x30fa,
+ 0x30fd, 0x30ff,
+ 0x31f0, 0x31ff,
+ 0xff66, 0xff6f,
+ 0xff71, 0xff9d
+ }; /* CR_Katakana */
+
+ /* 'Kharoshthi': Script */
+ static final int CR_Kharoshthi[] = {
+ 8,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a47,
+ 0x10a50, 0x10a58
+ }; /* CR_Kharoshthi */
+
+ /* 'Khmer': Script */
+ static final int CR_Khmer[] = {
+ 4,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x19e0, 0x19ff
+ }; /* CR_Khmer */
+
+ /* 'Lao': Script */
+ static final int CR_Lao[] = {
+ 18,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd
+ }; /* CR_Lao */
+
+ /* 'Latin': Script */
+ static final int CR_Latin[] = {
+ 23,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0241,
+ 0x0250, 0x02b8,
+ 0x02e0, 0x02e4,
+ 0x1d00, 0x1d25,
+ 0x1d2c, 0x1d5c,
+ 0x1d62, 0x1d65,
+ 0x1d6b, 0x1d77,
+ 0x1d79, 0x1dbf,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x212a, 0x212b,
+ 0xfb00, 0xfb06,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a
+ }; /* CR_Latin */
+
+ /* 'Limbu': Script */
+ static final int CR_Limbu[] = {
+ 5,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x194f
+ }; /* CR_Limbu */
+
+ /* 'Linear_B': Script */
+ static final int CR_Linear_B[] = {
+ 7,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa
+ }; /* CR_Linear_B */
+
+ /* 'Malayalam': Script */
+ static final int CR_Malayalam[] = {
+ 11,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f
+ }; /* CR_Malayalam */
+
+ /* 'Mongolian': Script */
+ static final int CR_Mongolian[] = {
+ 4,
+ 0x1800, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9
+ }; /* CR_Mongolian */
+
+ /* 'Myanmar': Script */
+ static final int CR_Myanmar[] = {
+ 6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059
+ }; /* CR_Myanmar */
+
+ /* 'New_Tai_Lue': Script */
+ static final int CR_New_Tai_Lue[] = {
+ 4,
+ 0x1980, 0x19a9,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19d9,
+ 0x19de, 0x19df
+ }; /* CR_New_Tai_Lue */
+
+ /* 'Ogham': Script */
+ static final int CR_Ogham[] = {
+ 1,
+ 0x1680, 0x169c
+ }; /* CR_Ogham */
+
+ /* 'Old_Italic': Script */
+ static final int CR_Old_Italic[] = {
+ 2,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323
+ }; /* CR_Old_Italic */
+
+ /* 'Old_Persian': Script */
+ static final int CR_Old_Persian[] = {
+ 2,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103d5
+ }; /* CR_Old_Persian */
+
+ /* 'Oriya': Script */
+ static final int CR_Oriya[] = {
+ 14,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71
+ }; /* CR_Oriya */
+
+ /* 'Osmanya': Script */
+ static final int CR_Osmanya[] = {
+ 2,
+ 0x10480, 0x1049d,
+ 0x104a0, 0x104a9
+ }; /* CR_Osmanya */
+
+ /* 'Runic': Script */
+ static final int CR_Runic[] = {
+ 2,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0
+ }; /* CR_Runic */
+
+ /* 'Shavian': Script */
+ static final int CR_Shavian[] = {
+ 1,
+ 0x10450, 0x1047f
+ }; /* CR_Shavian */
+
+ /* 'Sinhala': Script */
+ static final int CR_Sinhala[] = {
+ 11,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4
+ }; /* CR_Sinhala */
+
+ /* 'Syloti_Nagri': Script */
+ static final int CR_Syloti_Nagri[] = {
+ 1,
+ 0xa800, 0xa82b
+ }; /* CR_Syloti_Nagri */
+
+ /* 'Syriac': Script */
+ static final int CR_Syriac[] = {
+ 3,
+ 0x0700, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x074f
+ }; /* CR_Syriac */
+
+ /* 'Tagalog': Script */
+ static final int CR_Tagalog[] = {
+ 2,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714
+ }; /* CR_Tagalog */
+
+ /* 'Tagbanwa': Script */
+ static final int CR_Tagbanwa[] = {
+ 3,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773
+ }; /* CR_Tagbanwa */
+
+ /* 'Tai_Le': Script */
+ static final int CR_Tai_Le[] = {
+ 2,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974
+ }; /* CR_Tai_Le */
+
+ /* 'Tamil': Script */
+ static final int CR_Tamil[] = {
+ 15,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bfa
+ }; /* CR_Tamil */
+
+ /* 'Telugu': Script */
+ static final int CR_Telugu[] = {
+ 12,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f
+ }; /* CR_Telugu */
+
+ /* 'Thaana': Script */
+ static final int CR_Thaana[] = {
+ 1,
+ 0x0780, 0x07b1
+ }; /* CR_Thaana */
+
+ /* 'Thai': Script */
+ static final int CR_Thai[] = {
+ 2,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e5b
+ }; /* CR_Thai */
+
+ /* 'Tibetan': Script */
+ static final int CR_Tibetan[] = {
+ 7,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fd1
+ }; /* CR_Tibetan */
+
+ /* 'Tifinagh': Script */
+ static final int CR_Tifinagh[] = {
+ 2,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f
+ }; /* CR_Tifinagh */
+
+ /* 'Ugaritic': Script */
+ static final int CR_Ugaritic[] = {
+ 2,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f
+ }; /* CR_Ugaritic */
+
+ /* 'Yi': Script */
+ static final int CR_Yi[] = {
+ 2,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6
+ }; /* CR_Yi */
+
+ // #endif /* USE_UNICODE_PROPERTIES */
+}
diff --git a/src/org/joni/exception/ErrorMessages.java b/src/org/joni/exception/ErrorMessages.java
new file mode 100644
index 0000000..86d019e
--- /dev/null
+++ b/src/org/joni/exception/ErrorMessages.java
@@ -0,0 +1,95 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.exception;
+
+import org.joni.Config;
+
+public interface ErrorMessages {
+ final String MISMATCH = "mismatch";
+ final String NO_SUPPORT_CONFIG = "no support in this configuration";
+
+ /* internal error */
+ final String ERR_MEMORY = "fail to memory allocation";
+ final String ERR_MATCH_STACK_LIMIT_OVER = "match-stack limit over";
+ final String ERR_TYPE_BUG = "undefined type (bug)";
+ final String ERR_PARSER_BUG = "internal parser error (bug)";
+ final String ERR_STACK_BUG = "stack error (bug)";
+ final String ERR_UNDEFINED_BYTECODE = "undefined bytecode (bug)";
+ final String ERR_UNEXPECTED_BYTECODE = "unexpected bytecode (bug)";
+ final String ERR_DEFAULT_ENCODING_IS_NOT_SETTED = "default multibyte-encoding is not setted";
+ final String ERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR = "can't convert to wide-char on specified multibyte-encoding";
+
+ /* general error */
+ final String ERR_INVALID_ARGUMENT = "invalid argument";
+
+ /* syntax error */
+ final String ERR_END_PATTERN_AT_LEFT_BRACE = "end pattern at left brace";
+ final String ERR_END_PATTERN_AT_LEFT_BRACKET = "end pattern at left bracket";
+ final String ERR_EMPTY_CHAR_CLASS = "empty char-class";
+ final String ERR_PREMATURE_END_OF_CHAR_CLASS = "premature end of char-class";
+ final String ERR_END_PATTERN_AT_ESCAPE = "end pattern at escape";
+ final String ERR_END_PATTERN_AT_META = "end pattern at meta";
+ final String ERR_END_PATTERN_AT_CONTROL = "end pattern at control";
+ final String ERR_META_CODE_SYNTAX = "invalid meta-code syntax";
+ final String ERR_CONTROL_CODE_SYNTAX = "invalid control-code syntax";
+ final String ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE = "char-class value at end of range";
+ final String ERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE = "char-class value at start of range";
+ final String ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS = "unmatched range specifier in char-class";
+ final String ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED = "target of repeat operator is not specified";
+ final String ERR_TARGET_OF_REPEAT_OPERATOR_INVALID = "target of repeat operator is invalid";
+ final String ERR_NESTED_REPEAT_OPERATOR = "nested repeat operator";
+ final String ERR_UNMATCHED_CLOSE_PARENTHESIS = "unmatched close parenthesis";
+ final String ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS = "end pattern with unmatched parenthesis";
+ final String ERR_END_PATTERN_IN_GROUP = "end pattern in group";
+ final String ERR_UNDEFINED_GROUP_OPTION = "undefined group option";
+ final String ERR_INVALID_POSIX_BRACKET_TYPE = "invalid POSIX bracket type";
+ final String ERR_INVALID_LOOK_BEHIND_PATTERN = "invalid pattern in look-behind";
+ final String ERR_INVALID_REPEAT_RANGE_PATTERN = "invalid repeat range {lower,upper}";
+
+ /* values error (syntax error) */
+ final String ERR_TOO_BIG_NUMBER = "too big number";
+ final String ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE = "too big number for repeat range";
+ final String ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE = "upper is smaller than lower in repeat range";
+ final String ERR_EMPTY_RANGE_IN_CHAR_CLASS = "empty range in char class";
+ final String ERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE = "mismatch multibyte code length in char-class range";
+ final String ERR_TOO_MANY_MULTI_BYTE_RANGES = "too many multibyte code ranges are specified";
+ final String ERR_TOO_SHORT_MULTI_BYTE_STRING = "too short multibyte code string";
+ final String ERR_TOO_BIG_BACKREF_NUMBER = "too big backref number";
+ final String ERR_INVALID_BACKREF = Config.USE_NAMED_GROUP ? "invalid backref number/name" : "invalid backref number";
+ final String ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED = "numbered backref/call is not allowed. (use name)";
+ final String ERR_TOO_BIG_WIDE_CHAR_VALUE = "too big wide-char value";
+ final String ERR_TOO_LONG_WIDE_CHAR_VALUE = "too long wide-char value";
+ final String ERR_INVALID_WIDE_CHAR_VALUE = "invalid wide-char value";
+ final String ERR_EMPTY_GROUP_NAME = "group name is empty";
+ final String ERR_INVALID_GROUP_NAME = "invalid group name <%n>";
+ final String ERR_INVALID_CHAR_IN_GROUP_NAME = Config.USE_NAMED_GROUP ? "invalid char in group name <%n>" : "invalid char in group number <%n>";
+ final String ERR_UNDEFINED_NAME_REFERENCE = "undefined name <%n> reference";
+ final String ERR_UNDEFINED_GROUP_REFERENCE = "undefined group <%n> reference";
+ final String ERR_MULTIPLEX_DEFINED_NAME = "multiplex defined name <%n>";
+ final String ERR_MULTIPLEX_DEFINITION_NAME_CALL = "multiplex definition name <%n> call";
+ final String ERR_NEVER_ENDING_RECURSION = "never ending recursion";
+ final String ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY = "group number is too big for capture history";
+ final String ERR_INVALID_CHAR_PROPERTY_NAME = "invalid character property name {%n}";
+ final String ERR_INVALID_CODE_POINT_VALUE = "invalid code point value";
+ final String ERR_NOT_SUPPORTED_ENCODING_COMBINATION = "not supported encoding combination";
+ final String ERR_INVALID_COMBINATION_OF_OPTIONS = "invalid combination of options";
+ final String ERR_OVER_THREAD_PASS_LIMIT_COUNT = "over thread pass limit count";
+ final String ERR_TOO_BIG_SB_CHAR_VALUE = "too big singlebyte char value";
+}
diff --git a/src/org/joni/exception/InternalException.java b/src/org/joni/exception/InternalException.java
new file mode 100644
index 0000000..959f44f
--- /dev/null
+++ b/src/org/joni/exception/InternalException.java
@@ -0,0 +1,28 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.exception;
+
+public class InternalException extends JOniException{
+ private static final long serialVersionUID = -3871816465397927992L;
+
+ public InternalException(String message) {
+ super(message);
+ }
+}
diff --git a/src/org/joni/exception/JOniException.java b/src/org/joni/exception/JOniException.java
new file mode 100644
index 0000000..f5d728c
--- /dev/null
+++ b/src/org/joni/exception/JOniException.java
@@ -0,0 +1,28 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.exception;
+
+public class JOniException extends RuntimeException{
+ private static final long serialVersionUID = -6027192180014164667L;
+
+ public JOniException(String message) {
+ super(message);
+ }
+}
diff --git a/src/org/joni/exception/StandardException.java b/src/org/joni/exception/StandardException.java
new file mode 100644
index 0000000..72a5344
--- /dev/null
+++ b/src/org/joni/exception/StandardException.java
@@ -0,0 +1,28 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.exception;
+
+public class StandardException extends JOniException{
+ private static final long serialVersionUID = 5024868876657136769L;
+
+ public StandardException(String message) {
+ super(message);
+ }
+}
diff --git a/src/org/joni/exception/SyntaxException.java b/src/org/joni/exception/SyntaxException.java
new file mode 100644
index 0000000..7b00d1c
--- /dev/null
+++ b/src/org/joni/exception/SyntaxException.java
@@ -0,0 +1,28 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.exception;
+
+public class SyntaxException extends JOniException{
+ private static final long serialVersionUID = 7862720128961874288L;
+
+ public SyntaxException(String message) {
+ super(message);
+ }
+}
diff --git a/src/org/joni/exception/ValueException.java b/src/org/joni/exception/ValueException.java
new file mode 100644
index 0000000..b07ac42
--- /dev/null
+++ b/src/org/joni/exception/ValueException.java
@@ -0,0 +1,37 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.exception;
+
+public class ValueException extends SyntaxException{
+ private static final long serialVersionUID = -196013852479929134L;
+
+ public ValueException(String message) {
+ super(message);
+ }
+
+ public ValueException(String message, String str) {
+ super(message.replaceAll("%n", str));
+ }
+
+ public ValueException(String message, byte[]bytes, int p, int end) {
+ this(message, new String(bytes, p, end - p));
+ }
+
+}
diff --git a/src/org/joni/util/ArrayCopy.java b/src/org/joni/util/ArrayCopy.java
new file mode 100644
index 0000000..c03b75d
--- /dev/null
+++ b/src/org/joni/util/ArrayCopy.java
@@ -0,0 +1,53 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.util;
+
+public class ArrayCopy {
+ public static byte[] int2byte(int[]src) {
+ int srcLength = src.length;
+ byte[]dst = new byte[srcLength << 2];
+
+ for (int i=0; i<srcLength; i++) {
+ int x = src[i];
+ int j = i << 2;
+ dst[j++] = (byte) ((x >>> 0) & 0xff);
+ dst[j++] = (byte) ((x >>> 8) & 0xff);
+ dst[j++] = (byte) ((x >>> 16) & 0xff);
+ dst[j++] = (byte) ((x >>> 24) & 0xff);
+ }
+ return dst;
+ }
+
+ public static int[] byte2int(byte[]src) {
+ int dstLength = src.length >>> 2;
+ int[]dst = new int[dstLength];
+
+ for (int i=0; i<dstLength; i++) {
+ int j = i << 2;
+ int x = 0;
+ x += (src[j++] & 0xff) << 0;
+ x += (src[j++] & 0xff) << 8;
+ x += (src[j++] & 0xff) << 16;
+ x += (src[j++] & 0xff) << 24;
+ dst[i] = x;
+ }
+ return dst;
+ }
+}
diff --git a/src/org/joni/util/BytesHash.java b/src/org/joni/util/BytesHash.java
new file mode 100644
index 0000000..03097aa
--- /dev/null
+++ b/src/org/joni/util/BytesHash.java
@@ -0,0 +1,134 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.util;
+
+public final class BytesHash<V> extends Hash<V>{
+
+ public BytesHash() {
+ super();
+ }
+
+ public BytesHash(int size) {
+ super(size);
+ }
+
+ public final static class OniBytesHashEntry<V> extends OniHashEntry<V> {
+ public final byte[]bytes;
+ public final int p;
+ public final int end;
+
+ public OniBytesHashEntry(int hash, OniHashEntry<V> next, V value, byte[]bytes, int p, int end) {
+ super(hash, next, value);
+ this.bytes = bytes;
+ this.p = p;
+ this.end = end;
+ }
+
+ public boolean equals(byte[]bytes, int p, int end) {
+ if (this.end - this.p != end - p) return false;
+ if (this.bytes == bytes) return true;
+ int q = this.p;
+ while (q < this.end) if (this.bytes[q++] != bytes[p++]) return false;
+ return true;
+ }
+ }
+
+ public static int hashCode(byte[]bytes, int p, int end) {
+ int key = 0;
+ while (p < end) key = ((key << 16) + (key << 6) - key) + (int)(bytes[p++]); // & 0xff ? we have to match jruby string hash
+ key = key + (key >> 5);
+ return key;
+ }
+
+ public V put(byte[]bytes, V value) {
+ return put(bytes, 0, bytes.length, value);
+ }
+
+ public V put(byte[]bytes, int p, int end, V value) {
+ checkResize();
+ int hash = hashValue(hashCode(bytes, p, end));
+ int i = bucketIndex(hash, table.length);
+
+ for (OniBytesHashEntry<V> entry = (OniBytesHashEntry<V>)table[i]; entry != null; entry = (OniBytesHashEntry<V>)entry.next) {
+ if (entry.hash == hash && entry.equals(bytes, p, end)) {
+ entry.value = value;
+ return value;
+ }
+ }
+
+ table[i] = new OniBytesHashEntry<V>(hash, table[i], value, bytes, p, end);
+ size++;
+ return null;
+ }
+
+ public void putDirect(byte[]bytes, V value) {
+ putDirect(bytes, 0, bytes.length, value);
+ }
+
+ public void putDirect(byte[]bytes, int p, int end, V value) {
+ checkResize();
+ final int hash = hashValue(hashCode(bytes, p, end));
+ final int i = bucketIndex(hash, table.length);
+ table[i] = new OniBytesHashEntry<V>(hash, table[i], value, bytes, p, end);
+ size++;
+ }
+
+ public V get(byte[]bytes) {
+ return get(bytes, 0, bytes.length);
+ }
+
+ public V get(byte[]bytes, int p, int end) {
+ int hash = hashValue(hashCode(bytes, p, end));
+ for (OniBytesHashEntry<V> entry = (OniBytesHashEntry<V>)table[bucketIndex(hash, table.length)]; entry != null; entry = (OniBytesHashEntry<V>)entry.next) {
+ if (entry.hash == hash && entry.equals(bytes, p, end)) return entry.value;
+ }
+ return null;
+ }
+
+ public V delete(byte[]bytes) {
+ return delete(bytes, 0, bytes.length);
+ }
+
+ public V delete(byte[]bytes, int p, int end) {
+ int hash = hashValue(hashCode(bytes, p, end));
+ int i = bucketIndex(hash, table.length);
+
+ OniBytesHashEntry<V> entry = (OniBytesHashEntry<V>)table[i];
+
+ if (entry == null) return null;
+
+ if (entry.hash == hash && entry.equals(bytes, p, end)) {
+ table[i] = entry.next;
+ size--;
+ return entry.value;
+ }
+
+ for (; entry.next != null; entry = (OniBytesHashEntry<V>)entry.next) {
+ OniHashEntry<V> tmp = entry.next;
+ if (tmp.hash == hash && entry.equals(bytes, p, end)) {
+ entry.next = entry.next.next;
+ size--;
+ return tmp.value;
+ }
+ }
+ return null;
+ }
+
+}
diff --git a/src/org/joni/util/Hash.java b/src/org/joni/util/Hash.java
new file mode 100644
index 0000000..c2b2f71
--- /dev/null
+++ b/src/org/joni/util/Hash.java
@@ -0,0 +1,180 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.util;
+
+import java.util.Iterator;
+
+import org.joni.exception.InternalException;
+
+abstract class Hash<V> implements Iterable<V> {
+ protected OniHashEntry<V>[]table;
+ protected int size;
+
+ private static final int PRIMES[] = {
+ 8 + 3, 16 + 3, 32 + 5, 64 + 3, 128 + 3, 256 + 27, 512 + 9, 1024 + 9, 2048 + 5, 4096 + 3,
+ 8192 + 27, 16384 + 43, 32768 + 3, 65536 + 45, 131072 + 29, 262144 + 3, 524288 + 21, 1048576 + 7,
+ 2097152 + 17, 4194304 + 15, 8388608 + 9, 16777216 + 43, 33554432 + 35, 67108864 + 15,
+ 134217728 + 29, 268435456 + 3, 536870912 + 11, 1073741824 + 85, 0
+ };
+
+ private static final int INITIAL_CAPACITY = PRIMES[0];
+ private static final int MAXIMUM_CAPACITY = 1 << 30;
+
+
+ public Hash() {
+ table = new OniHashEntry[INITIAL_CAPACITY];
+ }
+
+ public Hash(int size) {
+ for (int i=0, n=MIN_CAPA; i<PRIMES.length; i++, n <<=1) {
+ if (n > size) {
+ table = new OniHashEntry[PRIMES[i]];
+ return;
+ }
+ }
+ throw new InternalException("run out of polynomials");
+ }
+
+ public final int size() {
+ return size;
+ }
+
+ static class OniHashEntry<V> {
+ final int hash;
+ OniHashEntry<V> next;
+ public V value;
+
+ OniHashEntry(int hash, OniHashEntry<V> next, V value) {
+ this.hash = hash;
+ this.next = next;
+ this.value = value;
+ }
+
+ }
+
+ private static final int MIN_CAPA = 8;
+ // private static final int DENSITY = 5;
+ protected final void checkResize() {
+ if (size == table.length) { // size / table.length > DENSITY
+ int forSize = table.length + 1; // size + 1;
+ for (int i=0, newCapacity = MIN_CAPA; i < PRIMES.length; i++, newCapacity <<= 1) {
+ if (newCapacity > forSize) {
+ resize(PRIMES[i]);
+ return;
+ }
+ }
+ return;
+ }
+ }
+
+ protected final void resize(int newCapacity) {
+ final OniHashEntry<V>[] oldTable = table;
+ final OniHashEntry<V>[] newTable = new OniHashEntry[newCapacity];
+ for (int j = 0; j < oldTable.length; j++) {
+ OniHashEntry<V> entry = oldTable[j];
+ oldTable[j] = null;
+ while (entry != null) {
+ OniHashEntry<V> next = entry.next;
+ int i = bucketIndex(entry.hash, newCapacity);
+ entry.next = newTable[i];
+ newTable[i] = entry;
+ entry = next;
+ }
+ }
+ table = newTable;
+ }
+
+ protected static int bucketIndex(final int h, final int length) {
+ return (h % length);
+ }
+
+ private static final int HASH_SIGN_BIT_MASK = ~(1 << 31);
+ protected static int hashValue(int h) {
+ return h & HASH_SIGN_BIT_MASK;
+ }
+
+ public Iterator<V> iterator() {
+ return new OniHashIterator();
+ }
+
+ public class OniHashIterator implements Iterator<V> {
+ OniHashEntry<V> next, current;
+ int index;
+
+ public OniHashIterator() {
+ if (size > 0) {
+ OniHashEntry<V>[]t= table;
+ while (index < t.length && (next = t[index++]) == null);
+ }
+ }
+
+ public boolean hasNext() {
+ return next != null;
+ }
+
+ public V next() {
+ OniHashEntry<V> e = next;
+ if ((next = e.next) == null) {
+ OniHashEntry<V>[]t = table;
+ while (index < t.length && (next = t[index++]) == null);
+ }
+ current = e;
+ return e.value;
+ }
+
+ public void remove() {
+ throw new InternalException("not supported operation exception");
+ }
+ }
+
+ public OniHashIterator entryIterator() {
+ return new OniHashIterator();
+ }
+
+ public class OniHashEntryIterator implements Iterator<OniHashEntry<V>> {
+ OniHashEntry<V> next, current;
+ int index;
+
+ public OniHashEntryIterator() {
+ if (size > 0) {
+ OniHashEntry<V>[]t= table;
+ while (index < t.length && (next = t[index++]) == null);
+ }
+ }
+
+ public boolean hasNext() {
+ return next != null;
+ }
+
+ public OniHashEntry<V> next() {
+ OniHashEntry<V> e = next;
+ if ((next = e.next) == null) {
+ OniHashEntry<V>[]t = table;
+ while (index < t.length && (next = t[index++]) == null);
+ }
+ current = e;
+ return e;
+ }
+
+ public void remove() {
+ throw new InternalException("not supported operation exception");
+ }
+ }
+}
diff --git a/src/org/joni/util/IntArrayHash.java b/src/org/joni/util/IntArrayHash.java
new file mode 100644
index 0000000..65655be
--- /dev/null
+++ b/src/org/joni/util/IntArrayHash.java
@@ -0,0 +1,126 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.util;
+
+public final class IntArrayHash<V> extends Hash<V>{
+
+ public IntArrayHash() {
+ super();
+ }
+
+ public IntArrayHash(int size) {
+ super(size);
+ }
+
+ public final static class OniIntArrayHashEntry<V> extends OniHashEntry<V> {
+ public final int[]key;
+
+ public OniIntArrayHashEntry(int hash, OniHashEntry<V> next, V value, int[]key) {
+ super(hash, next, value);
+ this.key = key;
+ }
+
+ public boolean equals(int[]key) {
+ if (this.key == key) return true;
+ if (this.key.length != key.length) return false;
+
+ switch(key.length) {
+ case 1: return this.key[0] == key[0];
+ case 2: return this.key[0] == key[0] && this.key[1] == key[1];
+ case 3: return this.key[0] == key[0] && this.key[1] == key[1] && this.key[2] == key[2];
+ case 4: return this.key[0] == key[0] && this.key[1] == key[1] && this.key[2] == key[2] && this.key[3] == key[3];
+ default: for (int i=0; i<key.length;i++) if (this.key[i] != key[i]) return false;
+ }
+ return true;
+ }
+ }
+
+ private int hashCode(int[]key) {
+ switch(key.length) {
+ case 1: return key[0];
+ case 2: return key[0] + key[1];
+ case 3: return key[0] + key[1] + key[2];
+ case 4: return key[0] + key[1] + key[2] + key[3];
+ default:
+ int h = 0;
+ for (int i=0; i<key.length; i++) h += key[i];
+ return h;
+ }
+ }
+
+ public V put(int[]key, V value) {
+ checkResize();
+ int hash = hashValue(hashCode(key));
+ int i = bucketIndex(hash, table.length);
+
+ for (OniIntArrayHashEntry<V> entry = (OniIntArrayHashEntry<V>)table[i]; entry != null; entry = (OniIntArrayHashEntry<V>)entry.next) {
+ if (entry.hash == hash && entry.equals(key)) {
+ entry.value = value;
+ return value;
+ }
+ }
+
+ table[i] = new OniIntArrayHashEntry<V>(hash, table[i], value, key);
+ size++;
+ return null;
+ }
+
+ public void putDirect(int[]key, V value) {
+ checkResize();
+ final int hash = hashValue(hashCode(key));
+ final int i = bucketIndex(hash, table.length);
+ table[i] = new OniIntArrayHashEntry<V>(hash, table[i], value, key);
+ size++;
+ }
+
+
+ public V get(int ... key) {
+ int hash = hashValue(hashCode(key));
+ for (OniIntArrayHashEntry<V> entry = (OniIntArrayHashEntry<V>)table[bucketIndex(hash, table.length)]; entry != null; entry = (OniIntArrayHashEntry<V>)entry.next) {
+ if (entry.hash == hash && entry.equals(key)) return entry.value;
+ }
+ return null;
+ }
+
+ public V delete(int ... key) {
+ int hash = hashValue(hashCode(key));
+ int i = bucketIndex(hash, table.length);
+
+ OniIntArrayHashEntry<V> entry = (OniIntArrayHashEntry<V>)table[i];
+
+ if (entry == null) return null;
+
+ if (entry.hash == hash && entry.equals(key)) {
+ table[i] = entry.next;
+ size--;
+ return entry.value;
+ }
+
+ for (; entry.next != null; entry = (OniIntArrayHashEntry<V>)entry.next) {
+ OniHashEntry<V> tmp = entry.next;
+ if (tmp.hash == hash && entry.equals(key)) {
+ entry.next = entry.next.next;
+ size--;
+ return tmp.value;
+ }
+ }
+ return null;
+ }
+}
diff --git a/src/org/joni/util/IntHash.java b/src/org/joni/util/IntHash.java
new file mode 100644
index 0000000..8314923
--- /dev/null
+++ b/src/org/joni/util/IntHash.java
@@ -0,0 +1,95 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.util;
+
+public class IntHash<V> extends Hash<V> {
+
+ public IntHash() {
+ super();
+ }
+
+ public IntHash(int size) {
+ super(size);
+ }
+
+ public static final class OniIntHashEntry<V> extends OniHashEntry<V> {
+ public OniIntHashEntry(int hash, OniHashEntry<V> next, V value) {
+ super(hash, next, value);
+ }
+ }
+
+ public V put(int key, V value) {
+ checkResize();
+ int hash = hashValue(key);
+ int i = bucketIndex(hash, table.length);
+
+ for (OniIntHashEntry<V> entry = (OniIntHashEntry<V>)table[i]; entry != null; entry = (OniIntHashEntry<V>)entry.next) {
+ if (entry.hash == hash) {
+ entry.value = value;
+ return value;
+ }
+ }
+
+ table[i] = new OniIntHashEntry<V>(hash, table[i], value);
+ size++;
+ return null;
+ }
+
+ public void putDirect(int key, V value) {
+ checkResize();
+ final int hash = hashValue(key);
+ final int i = bucketIndex(hash, table.length);
+ table[i] = new OniIntHashEntry<V>(hash, table[i], value);
+ size++;
+ }
+
+ public V get(int key) {
+ int hash = hashValue(key);
+ for (OniIntHashEntry<V> entry = (OniIntHashEntry<V>)table[bucketIndex(hash, table.length)]; entry != null; entry = (OniIntHashEntry<V>)entry.next) {
+ if (entry.hash == hash) return entry.value;
+ }
+ return null;
+ }
+
+ public V delete(int key) {
+ int hash = hashValue(key);
+ int i = bucketIndex(hash, table.length);
+
+ OniIntHashEntry<V> entry = (OniIntHashEntry<V>)table[i];
+
+ if (entry == null) return null;
+
+ if (entry.hash == hash) {
+ table[i] = entry.next;
+ size--;
+ return entry.value;
+ }
+
+ for (; entry.next != null; entry = (OniIntHashEntry<V>)entry.next) {
+ OniHashEntry<V> tmp = entry.next;
+ if (tmp.hash == hash && entry.equals(key)) {
+ entry.next = entry.next.next;
+ size--;
+ return tmp.value;
+ }
+ }
+ return null;
+ }
+}
diff --git a/src/org/joni/util/ObjHash.java b/src/org/joni/util/ObjHash.java
new file mode 100644
index 0000000..13e8f2c
--- /dev/null
+++ b/src/org/joni/util/ObjHash.java
@@ -0,0 +1,99 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package org.joni.util;
+
+public final class ObjHash<K, V> extends Hash<V> {
+ public final static class OniObjHashEntry<K, V> extends OniHashEntry<V> {
+ public final K key;
+
+ public OniObjHashEntry(int hash, OniHashEntry<V> next, V value, K key) {
+ super(hash, next, value);
+ this.key = key;
+ }
+
+ public boolean equals(Object key) {
+ if (this.key == key) return true;
+ return this.key.equals(key);
+ }
+ }
+
+ public V put(K key, V value) {
+ checkResize();
+ int hash = hashValue(key.hashCode());
+ int i = bucketIndex(hash, table.length);
+
+ K k;
+ for (OniObjHashEntry<K, V> entry = (OniObjHashEntry<K, V>)table[i]; entry != null; entry = (OniObjHashEntry<K, V>)entry.next) {
+ if (entry.hash == hash && ((k = entry.key) == key || key.equals(k))) {
+ entry.value = value;
+ return value;
+ }
+ }
+
+ table[i] = new OniObjHashEntry<K, V>(hash, table[i], value, key);
+ size++;
+ return null;
+ }
+
+ public void putDirect(K key, V value) {
+ checkResize();
+ final int hash = hashValue(key.hashCode());
+ final int i = bucketIndex(hash, table.length);
+ table[i] = new OniObjHashEntry<K, V>(hash, table[i], value, key);
+ size++;
+ }
+
+
+ public V get(K key) {
+ int hash = hashValue(key.hashCode());
+ K k;
+ for (OniObjHashEntry<K, V> entry = (OniObjHashEntry<K, V>)table[bucketIndex(hash, table.length)]; entry != null; entry = (OniObjHashEntry<K, V>)entry.next) {
+ if (entry.hash == hash && ((k = entry.key) == key || key.equals(k))) return entry.value;
+ }
+ return null;
+ }
+
+ public V delete(K key) {
+ int hash = hashValue(key.hashCode());
+ int i = bucketIndex(hash, table.length);
+
+ OniObjHashEntry<K, V> entry = (OniObjHashEntry<K, V>)table[i];
+
+ if (entry == null) return null;
+
+ K k;
+ if (entry.hash == hash && ((k = entry.key) == key || key.equals(k))) {
+ table[i] = entry.next;
+ size--;
+ return entry.value;
+ }
+
+ for (; entry.next != null; entry = (OniObjHashEntry<K, V>)entry.next) {
+ OniHashEntry<V> tmp = entry.next;
+ if (tmp.hash == hash && ((k = entry.key) == key || key.equals(k))) {
+ entry.next = entry.next.next;
+ size--;
+ return tmp.value;
+ }
+ }
+ return null;
+ }
+
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jruby-joni.git
More information about the pkg-java-commits
mailing list