[jruby-joni] 38/194: first batch for ascii/unicode modifiers

Hideki Yamane henrich at moszumanska.debian.org
Thu Feb 1 12:04:19 UTC 2018


This is an automated email from the git hooks/post-receive script.

henrich pushed a commit to branch debian/sid
in repository jruby-joni.

commit 416f5f9bcd8ce9482648702fbe428aa5ae0b51e6
Author: Marcin Mielzynski <lopx at gazeta.pl>
Date:   Thu Dec 28 19:26:13 2017 +0100

    first batch for ascii/unicode modifiers
---
 src/org/joni/Analyser.java         |  43 +++++-
 src/org/joni/ApplyCaseFold.java    |  40 +++--
 src/org/joni/ApplyCaseFoldArg.java |   5 +-
 src/org/joni/CodeRangeBuffer.java  |   2 +-
 src/org/joni/Config.java           |   2 +-
 src/org/joni/Lexer.java            |  29 ++--
 src/org/joni/Option.java           |  12 ++
 src/org/joni/Parser.java           | 303 +++++++++++++++++++++----------------
 src/org/joni/ScanEnvironment.java  |   2 +-
 src/org/joni/ast/CClassNode.java   | 115 +++++++-------
 test/org/joni/test/TestU8.java     |  24 +--
 11 files changed, 329 insertions(+), 248 deletions(-)

diff --git a/src/org/joni/Analyser.java b/src/org/joni/Analyser.java
index 3458468..b3458b7 100644
--- a/src/org/joni/Analyser.java
+++ b/src/org/joni/Analyser.java
@@ -35,9 +35,11 @@ import static org.joni.ast.QuantifierNode.isRepeatInfinite;
 import java.util.HashSet;
 
 import org.jcodings.CaseFoldCodeItem;
+import org.jcodings.Encoding;
 import org.jcodings.ObjPtr;
 import org.jcodings.Ptr;
 import org.jcodings.constants.CharacterType;
+import org.jcodings.specific.ASCIIEncoding;
 import org.joni.ast.AnchorNode;
 import org.joni.ast.BackRefNode;
 import org.joni.ast.CClassNode;
@@ -740,6 +742,10 @@ final class Analyser extends Parser {
         return len;
     }
 
+    boolean isMbcAsciiWord(Encoding enc, byte[]bytes, int p, int end) { // ONIGENC_IS_MBC_ASCII_WORD
+        return ASCIIEncoding.INSTANCE.isCodeCType(enc.mbcToCode(bytes, p, end), CharacterType.WORD);
+    }
+
     /* x is not included y ==>  1 : 0 */
     private boolean isNotIncluded(Node x, Node y) {
         Node tmp;
@@ -755,7 +761,7 @@ final class Analyser extends Parser {
             case NodeType.CTYPE:
                 CTypeNode cny = (CTypeNode)y;
                 CTypeNode cnx = (CTypeNode)x;
-                return cny.ctype == cnx.ctype && cny.not != cnx.not;
+                return cny.ctype == cnx.ctype && cny.not != cnx.not && cny.asciiRange == cnx.asciiRange;
 
             case NodeType.CCLASS:
                 // !swap:!
@@ -788,15 +794,27 @@ final class Analyser extends Parser {
                         if (xc.mbuf == null && !xc.isNot()) {
                             for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
                                 if (xc.bs.at(i)) {
-                                    if (enc.isSbWord(i)) return false;
+                                    if (((CTypeNode)y).asciiRange) {
+                                        if (enc.isSbWord(i)) return false;
+                                    } else {
+                                        if (enc.isWord(i)) return false;
+                                    }
                                 }
                             }
                             return true;
                         }
                         return false;
                     } else {
+                        if (xc.mbuf != null) return false;
                         for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
-                            if (!enc.isSbWord(i)) {
+                            boolean isWord;
+                            if (((CTypeNode)y).asciiRange) {
+                                isWord = enc.isSbWord(i);
+                            } else {
+                                isWord = enc.isWord(i);
+                            }
+
+                            if (!isWord) {
                                 if (!xc.isNot()) {
                                     if (xc.bs.at(i)) return false;
                                 } else {
@@ -849,10 +867,18 @@ final class Analyser extends Parser {
                 CTypeNode cy = ((CTypeNode)y);
                 switch (cy.ctype) {
                 case CharacterType.WORD:
-                    if (enc.isMbcWord(xs.bytes, xs.p, xs.end)) {
-                        return cy.not;
+                    if (cy.asciiRange) {
+                        if (isMbcAsciiWord(enc, xs.bytes, xs.p, xs.end)) {
+                            return cy.not;
+                        } else {
+                            return !cy.not;
+                        }
                     } else {
-                        return !cy.not;
+                        if (enc.isMbcWord(xs.bytes, xs.p, xs.end)) {
+                            return cy.not;
+                        } else {
+                            return !cy.not;
+                        }
                     }
 
                 default:
@@ -2062,16 +2088,17 @@ final class Analyser extends Parser {
                 min = 1;
                 CTypeNode cn = (CTypeNode)node;
 
+                int maxCode = cn.asciiRange ? 0x80 : BitSet.SINGLE_BYTE_SIZE;
                 switch (cn.ctype) {
                 case CharacterType.WORD:
                     if (cn.not) {
                         for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
-                            if (!enc.isWord(i)) {
+                            if (!enc.isWord(i) || i >= maxCode) {
                                 opt.map.addChar((byte)i, enc);
                             }
                         }
                     } else {
-                        for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+                        for (int i=0; i<maxCode; i++) {
                             if (enc.isWord(i)) {
                                 opt.map.addChar((byte)i, enc);
                             }
diff --git a/src/org/joni/ApplyCaseFold.java b/src/org/joni/ApplyCaseFold.java
index 7dd84ce..496ff71 100644
--- a/src/org/joni/ApplyCaseFold.java
+++ b/src/org/joni/ApplyCaseFold.java
@@ -34,30 +34,44 @@ final class ApplyCaseFold implements ApplyAllCaseFoldFunction {
         ScanEnvironment env = arg.env;
         Encoding enc = env.enc;
         CClassNode cc = arg.cc;
+        CClassNode ascCc = arg.ascCc;
         BitSet bs = cc.bs;
+        boolean addFlag;
+
+        if (ascCc == null) {
+            addFlag = false;
+        } else if (Encoding.isAscii(from) == Encoding.isAscii(to[0])) {
+            addFlag = true;
+        } else {
+            addFlag = ascCc.isCodeInCC(enc, from);
+            if (ascCc.isNot()) addFlag = !addFlag;
+        }
 
         if (length == 1) {
             boolean inCC = cc.isCodeInCC(enc, from);
-
             if (Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS) {
                 if ((inCC && !cc.isNot()) || (!inCC && cc.isNot())) {
-                    if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE) {
-                        cc.addCodeRange(env, to[0], to[0]);
-                    } else {
-                        /* /(?i:[^A-C])/.match("a") ==> fail. */
-                        bs.set(to[0]);
+                    if (addFlag) {
+                        if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE) {
+                            cc.addCodeRange(env, to[0], to[0]);
+                        } else {
+                            /* /(?i:[^A-C])/.match("a") ==> fail. */
+                            bs.set(to[0]);
+                        }
                     }
                 }
             } else {
                 if (inCC) {
-                    if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE) {
-                        if (cc.isNot()) cc.clearNotFlag(enc);
-                        cc.addCodeRange(env, to[0], to[0]);
-                    } else {
-                        if (cc.isNot()) {
-                            bs.clear(to[0]);
+                    if (addFlag) {
+                        if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE) {
+                            if (cc.isNot()) cc.clearNotFlag(enc);
+                            cc.addCodeRange(env, to[0], to[0]);
                         } else {
-                            bs.set(to[0]);
+                            if (cc.isNot()) {
+                                bs.clear(to[0]);
+                            } else {
+                                bs.set(to[0]);
+                            }
                         }
                     }
                 }
diff --git a/src/org/joni/ApplyCaseFoldArg.java b/src/org/joni/ApplyCaseFoldArg.java
index 10b297f..ec7cbaa 100644
--- a/src/org/joni/ApplyCaseFoldArg.java
+++ b/src/org/joni/ApplyCaseFoldArg.java
@@ -24,12 +24,13 @@ import org.joni.ast.ConsAltNode;
 
 public final class ApplyCaseFoldArg {
     final ScanEnvironment env;
-    final CClassNode cc;
+    final CClassNode cc, ascCc;
     ConsAltNode altRoot;
     ConsAltNode tail;
 
-    public ApplyCaseFoldArg(ScanEnvironment env, CClassNode cc) {
+    public ApplyCaseFoldArg(ScanEnvironment env, CClassNode cc, CClassNode ascCc) {
         this.env = env;
         this.cc = cc;
+        this.ascCc = ascCc;
     }
 }
diff --git a/src/org/joni/CodeRangeBuffer.java b/src/org/joni/CodeRangeBuffer.java
index 137772a..5b4edea 100644
--- a/src/org/joni/CodeRangeBuffer.java
+++ b/src/org/joni/CodeRangeBuffer.java
@@ -25,7 +25,7 @@ import org.joni.exception.ValueException;
 
 public final class CodeRangeBuffer {
     private static final int INIT_MULTI_BYTE_RANGE_SIZE = 5;
-    private static final int ALL_MULTI_BYTE_RANGE = 0x7fffffff;
+    public static final int ALL_MULTI_BYTE_RANGE = 0x7fffffff;
 
     int[]p;
     int used;
diff --git a/src/org/joni/Config.java b/src/org/joni/Config.java
index 6802817..42b007b 100644
--- a/src/org/joni/Config.java
+++ b/src/org/joni/Config.java
@@ -75,7 +75,7 @@ public interface Config extends org.jcodings.Config {
     final int CHECK_STRING_THRESHOLD_LEN            = 7;
     final int CHECK_BUFF_MAX_SIZE                   = 0x4000;
 
-    final boolean NON_UNICODE_SDW                   = true;
+    final boolean NON_UNICODE_SDW                   = false;
 
 
     final PrintStream log = System.out;
diff --git a/src/org/joni/Lexer.java b/src/org/joni/Lexer.java
index 886f660..24b5a8d 100644
--- a/src/org/joni/Lexer.java
+++ b/src/org/joni/Lexer.java
@@ -21,6 +21,7 @@ package org.joni;
 
 import static org.joni.Option.isAsciiRange;
 import static org.joni.Option.isSingleline;
+import static org.joni.Option.isWordBoundAllRange;
 import static org.joni.ast.QuantifierNode.isRepeatInfinite;
 
 import org.jcodings.Ptr;
@@ -672,22 +673,22 @@ class Lexer extends ScannerSupport {
 
             switch (c) {
             case 'w':
-                fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
+                fetchTokenInCCFor_charType(false, CharacterType.WORD);
                 break;
             case 'W':
-                fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
+                fetchTokenInCCFor_charType(true, CharacterType.WORD);
                 break;
             case 'd':
-                fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
+                fetchTokenInCCFor_charType(false, CharacterType.DIGIT);
                 break;
             case 'D':
-                fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
+                fetchTokenInCCFor_charType(true, CharacterType.DIGIT);
                 break;
             case 's':
-                fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
+                fetchTokenInCCFor_charType(false, CharacterType.SPACE);
                 break;
             case 'S':
-                fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
+                fetchTokenInCCFor_charType(true, CharacterType.SPACE);
                 break;
             case 'h':
                 if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
@@ -1058,21 +1059,21 @@ class Lexer extends ScannerSupport {
                     if (syntax.opEscLParenSubexp()) token.type = TokenType.SUBEXP_CLOSE;
                     break;
                 case 'w':
-                    if (syntax.opEscWWord()) fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
+                    if (syntax.opEscWWord()) fetchTokenInCCFor_charType(false, CharacterType.WORD);
                     break;
                 case 'W':
-                    if (syntax.opEscWWord()) fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
+                    if (syntax.opEscWWord()) fetchTokenInCCFor_charType(true, CharacterType.WORD);
                     break;
                 case 'b':
                     if (syntax.opEscBWordBound()) {
                         fetchTokenFor_anchor(AnchorType.WORD_BOUND);
-                        token.setAnchorASCIIRange(isAsciiRange(env.option));
+                        token.setAnchorASCIIRange(isAsciiRange(env.option) && !isWordBoundAllRange(env.option));
                     }
                     break;
                 case 'B':
                     if (syntax.opEscBWordBound()) {
                         fetchTokenFor_anchor(AnchorType.NOT_WORD_BOUND);
-                        token.setAnchorASCIIRange(isAsciiRange(env.option));
+                        token.setAnchorASCIIRange(isAsciiRange(env.option) && !isWordBoundAllRange(env.option));
                     }
                     break;
                 case '<':
@@ -1088,16 +1089,16 @@ class Lexer extends ScannerSupport {
                     }
                     break;
                 case 's':
-                    if (syntax.opEscSWhiteSpace()) fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
+                    if (syntax.opEscSWhiteSpace()) fetchTokenInCCFor_charType(false, CharacterType.SPACE);
                     break;
                 case 'S':
-                    if (syntax.opEscSWhiteSpace()) fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
+                    if (syntax.opEscSWhiteSpace()) fetchTokenInCCFor_charType(true, CharacterType.SPACE);
                     break;
                 case 'd':
-                    if (syntax.opEscDDigit()) fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
+                    if (syntax.opEscDDigit()) fetchTokenInCCFor_charType(false, CharacterType.DIGIT);
                     break;
                 case 'D':
-                    if (syntax.opEscDDigit()) fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
+                    if (syntax.opEscDDigit()) fetchTokenInCCFor_charType(true, CharacterType.DIGIT);
                     break;
                 case 'h':
                     if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
diff --git a/src/org/joni/Option.java b/src/org/joni/Option.java
index ce086dd..93c7d6a 100644
--- a/src/org/joni/Option.java
+++ b/src/org/joni/Option.java
@@ -126,6 +126,18 @@ public class Option {
         return (option & ASCII_RANGE) != 0;
     }
 
+    public static boolean isPosixBracketAllRange(int option) {
+        return (option & POSIX_BRACKET_ALL_RANGE) != 0;
+    }
+
+    public static boolean isWordBoundAllRange(int option) {
+        return (option & WORD_BOUND_ALL_RANGE) != 0;
+    }
+
+    public static boolean isNewlineCRLF(int option) {
+        return (option & NEWLINE_CRLF) != 0;
+    }
+
     /* OP_SET_OPTION is required for these options.  ??? */
     //    public static boolean isDynamic(int option) {
     //        return (option & (MULTILINE | IGNORECASE)) != 0;
diff --git a/src/org/joni/Parser.java b/src/org/joni/Parser.java
index 47c3aa0..dfd21e2 100644
--- a/src/org/joni/Parser.java
+++ b/src/org/joni/Parser.java
@@ -21,10 +21,13 @@ package org.joni;
 
 import static org.joni.BitStatus.bsOnAtSimple;
 import static org.joni.BitStatus.bsOnOff;
+import static org.joni.Option.isAsciiRange;
 import static org.joni.Option.isDontCaptureGroup;
 import static org.joni.Option.isIgnoreCase;
+import static org.joni.Option.isPosixBracketAllRange;
 
 import org.jcodings.Encoding;
+import org.jcodings.ObjPtr;
 import org.jcodings.Ptr;
 import org.jcodings.constants.CharacterType;
 import org.jcodings.constants.PosixBracket;
@@ -70,7 +73,7 @@ class Parser extends Lexer {
     private static final int POSIX_BRACKET_NAME_MIN_LEN            = 4;
     private static final int POSIX_BRACKET_CHECK_LIMIT_LENGTH      = 20;
     private static final byte BRACKET_END[]                        = ":]".getBytes();
-    private boolean parsePosixBracket(CClassNode cc) {
+    private boolean parsePosixBracket(CClassNode cc, CClassNode ascCc) {
         mark();
 
         boolean not;
@@ -81,16 +84,23 @@ class Parser extends Lexer {
             not = false;
         }
         if (enc.strLength(bytes, p, stop) >= POSIX_BRACKET_NAME_MIN_LEN + 3) { // else goto not_posix_bracket
-            byte[][] pbs= PosixBracket.PBSNamesLower;
-            for (int i=0; i<pbs.length; i++) {
-                byte[]name = pbs[i];
+            boolean asciiRange = isAsciiRange(env.option) && !isPosixBracketAllRange(env.option);
+
+            for (int i=0; i<PosixBracket.PBSNamesLower.length; i++) {
+                byte[]name = PosixBracket.PBSNamesLower[i];
                 // hash lookup here ?
                 if (enc.strNCmp(bytes, p, stop, name, 0, name.length) == 0) {
                     p = enc.step(bytes, p, stop, name.length);
                     if (enc.strNCmp(bytes, p, stop, BRACKET_END, 0, BRACKET_END.length) != 0) {
                         newSyntaxException(ERR_INVALID_POSIX_BRACKET_TYPE);
                     }
-                    cc.addCType(PosixBracket.PBSValues[i], not, env, this);
+                    int ctype = PosixBracket.PBSValues[i];
+                    cc.addCType(ctype, not, asciiRange, env, this);
+                    if (ascCc != null) {
+                        if (ctype != CharacterType.WORD && ctype != CharacterType.ASCII && !asciiRange) {
+                            ascCc.addCType(ctype, not, asciiRange, env, this);
+                        }
+                    }
                     inc();
                     inc();
                     return false;
@@ -139,10 +149,12 @@ class Parser extends Lexer {
         return false;
     }
 
-    private CClassNode parseCharClass() {
-        fetchTokenInCC();
-
+    private CClassNode parseCharClass(ObjPtr<CClassNode> ascNode) {
         final boolean neg;
+        CClassNode cc, prevCc = null, ascCc = null, ascPrevCc = null, workCc = null, ascWorkCc = null;
+        CCStateArg arg = new CCStateArg();
+
+        fetchTokenInCC();
         if (token.type == TokenType.CHAR && token.getC() == '^' && !token.escaped) {
             neg = true;
             fetchTokenInCC();
@@ -150,26 +162,21 @@ class Parser extends Lexer {
             neg = false;
         }
 
-        if (token.type == TokenType.CC_CLOSE && !syntax.op2OptionECMAScript()) {
+        if (token.type == TokenType.CC_CLOSE) {
             if (!codeExistCheck(']', true)) newSyntaxException(ERR_EMPTY_CHAR_CLASS);
             env.ccEscWarn("]");
             token.type = TokenType.CHAR; /* allow []...] */
         }
 
-        CClassNode cc = new CClassNode();
-        CClassNode prevCC = null;
-        CClassNode workCC = null;
-
-        CCStateArg arg = new CCStateArg();
+        cc = new CClassNode();
+        if (isIgnoreCase(env.option)) ascNode.p = new CClassNode();
 
         boolean andStart = false;
         arg.state = CCSTATE.START;
-
         while (token.type != TokenType.CC_CLOSE) {
             boolean fetched = false;
 
             switch (token.type) {
-
             case CHAR:
                 final int len;
                 if (token.getCode() >= BitSet.SINGLE_BYTE_SIZE || (len = enc.codeToMbcLength(token.getC())) > 1) {
@@ -177,9 +184,9 @@ class Parser extends Lexer {
                 } else {
                     arg.inType = CCVALTYPE.SB; // sb_char:
                 }
-                arg.v = token.getC();
-                arg.vIsRaw = false;
-                parseCharClassValEntry2(cc, arg); // goto val_entry2
+                arg.to = token.getC();
+                arg.toIsRaw = false;
+                parseCharClassValEntry2(cc, ascCc, arg); // goto val_entry2
                 break;
 
             case RAW_BYTE:
@@ -208,47 +215,57 @@ class Parser extends Lexer {
                         fetched = false;
                     }
                     if (i == 1) {
-                        arg.v = buf[0] & 0xff;
+                        arg.to = buf[0] & 0xff;
                         arg.inType = CCVALTYPE.SB; // goto raw_single
                     } else {
-                        arg.v = enc.mbcToCode(buf, 0, buf.length);
+                        arg.to = enc.mbcToCode(buf, 0, buf.length);
                         arg.inType = CCVALTYPE.CODE_POINT;
                     }
                 } else {
-                    arg.v = token.getC();
+                    arg.to = token.getC();
                     arg.inType = CCVALTYPE.SB; // raw_single:
                 }
-                arg.vIsRaw = true;
-                parseCharClassValEntry2(cc, arg); // goto val_entry2
+                arg.toIsRaw = true;
+                parseCharClassValEntry2(cc, ascCc, arg); // goto val_entry2
                 break;
 
             case CODE_POINT:
-                arg.v = token.getCode();
-                arg.vIsRaw = true;
-                parseCharClassValEntry(cc, arg); // val_entry:, val_entry2
+                arg.to = token.getCode();
+                arg.toIsRaw = true;
+                parseCharClassValEntry(cc, ascCc, arg); // val_entry:, val_entry2
                 break;
 
             case POSIX_BRACKET_OPEN:
-                if (parsePosixBracket(cc)) { /* true: is not POSIX bracket */
+                if (parsePosixBracket(cc, ascCc)) { /* true: is not POSIX bracket */
                     env.ccEscWarn("[");
                     p = token.backP;
-                    arg.v = token.getC();
-                    arg.vIsRaw = false;
-                    parseCharClassValEntry(cc, arg); // goto val_entry
+                    arg.to = token.getC();
+                    arg.toIsRaw = false;
+                    parseCharClassValEntry(cc, ascCc, arg); // goto val_entry
                     break;
                 }
-                cc.nextStateClass(arg, env); // goto next_class
+                cc.nextStateClass(arg, ascCc, env); // goto next_class
                 break;
 
             case CHAR_TYPE:
-                cc.addCType(token.getPropCType(), token.getPropNot(), env, this);
-                cc.nextStateClass(arg, env); // next_class:
+                cc.addCType(token.getPropCType(), token.getPropNot(), isAsciiRange(env.option), env, this);
+                if (ascCc != null) {
+                    if (token.getPropCType() != CharacterType.WORD) {
+                        ascCc.addCType(token.getPropCType(), token.getPropNot(), isAsciiRange(env.option), env, this);
+                    }
+                }
+                cc.nextStateClass(arg, ascCc, env); // next_class:
                 break;
 
             case CHAR_PROPERTY:
                 int ctype = fetchCharPropertyToCType();
-                cc.addCType(ctype, token.getPropNot(), env, this);
-                cc.nextStateClass(arg, env); // goto next_class
+                cc.addCType(ctype, token.getPropNot(), false, env, this);
+                if (ascCc != null) {
+                    if (ctype != CharacterType.ASCII) {
+                        ascCc.addCType(ctype, token.getPropNot(), false, env, this);
+                    }
+                }
+                cc.nextStateClass(arg, ascCc, env); // goto next_class
                 break;
 
             case CC_RANGE:
@@ -256,41 +273,43 @@ class Parser extends Lexer {
                     fetchTokenInCC();
                     fetched = true;
                     if (token.type == TokenType.CC_CLOSE) { /* allow [x-] */
-                        parseCharClassRangeEndVal(cc, arg); // range_end_val:, goto val_entry;
+                        parseCharClassRangeEndVal(cc, ascCc, arg); // range_end_val:, goto val_entry;
                         break;
                     } else if (token.type == TokenType.CC_AND) {
                         env.ccEscWarn("-");
-                        parseCharClassRangeEndVal(cc, arg); // goto range_end_val
+                        parseCharClassRangeEndVal(cc, ascCc, arg); // goto range_end_val
                         break;
                     }
+                    if (arg.type == CCVALTYPE.CLASS) newValueException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
                     arg.state = CCSTATE.RANGE;
                 } else if (arg.state == CCSTATE.START) {
-                    arg.v = token.getC(); /* [-xa] is allowed */
-                    arg.vIsRaw = false;
+                    arg.to = token.getC(); /* [-xa] is allowed */
+                    arg.toIsRaw = false;
                     fetchTokenInCC();
                     fetched = true;
                     if (token.type == TokenType.CC_RANGE || andStart) env.ccEscWarn("-"); /* [--x] or [a&&-x] is warned. */
-                    parseCharClassValEntry(cc, arg); // goto val_entry
+                    parseCharClassValEntry(cc, ascCc, arg); // goto val_entry
                     break;
                 } else if (arg.state == CCSTATE.RANGE) {
                     env.ccEscWarn("-");
-                    parseCharClassSbChar(cc, arg); // goto sb_char /* [!--x] is allowed */
+                    parseCharClassSbChar(cc, ascCc, arg); // goto sb_char /* [!--x] is allowed */
                     break;
                 } else { /* CCS_COMPLETE */
                     fetchTokenInCC();
                     fetched = true;
                     if (token.type == TokenType.CC_CLOSE) { /* allow [a-b-] */
-                        parseCharClassRangeEndVal(cc, arg); // goto range_end_val
+                        parseCharClassRangeEndVal(cc, ascCc, arg); // goto range_end_val
                         break;
                     } else if (token.type == TokenType.CC_AND) {
                         env.ccEscWarn("-");
-                        parseCharClassRangeEndVal(cc, arg); // goto range_end_val
+                        parseCharClassRangeEndVal(cc, ascCc, arg); // goto range_end_val
                         break;
                     }
 
                     if (syntax.allowDoubleRangeOpInCC()) {
                         env.ccEscWarn("-");
-                        parseCharClassSbChar(cc, arg); // goto sb_char /* [0-9-a] is allowed as [0-9\-a] */
+                        // parseCharClassSbChar(cc, ascCc, arg); // goto sb_char /* [0-9-a] is allowed as [0-9\-a] */
+                        parseCharClassRangeEndVal(cc, ascCc, arg); // goto range_end_val
                         break;
                     }
                     newSyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
@@ -298,27 +317,40 @@ class Parser extends Lexer {
                 break;
 
             case CC_CC_OPEN: /* [ */
-                CClassNode acc = parseCharClass();
+                ObjPtr<CClassNode> ascPtr = new ObjPtr<CClassNode>();
+                CClassNode acc = parseCharClass(ascPtr);
                 cc.or(acc, enc);
+                if (ascPtr.p != null) {
+                    ascCc.or(ascPtr.p, enc);
+                }
                 break;
 
             case CC_AND:     /* && */
                 if (arg.state == CCSTATE.VALUE) {
-                    arg.v = 0; // ??? safe v ?
-                    arg.vIsRaw = false;
-                    cc.nextStateValue(arg, env);
+                    arg.to = 0;
+                    arg.toIsRaw = false;
+                    cc.nextStateValue(arg, ascCc, env);
                 }
                 /* initialize local variables */
                 andStart = true;
                 arg.state = CCSTATE.START;
-                if (prevCC != null) {
-                    prevCC.and(cc, enc);
+                if (prevCc != null) {
+                    prevCc.and(cc, enc);
+                    if (ascCc != null) {
+                        ascPrevCc.and(ascCc, enc);
+                    }
                 } else {
-                    prevCC = cc;
-                    if (workCC == null) workCC = new CClassNode();
-                    cc = workCC;
+                    prevCc = cc;
+                    if (workCc == null) workCc = new CClassNode();
+                    cc = workCc;
+                    if (ascCc != null) {
+                        ascPrevCc = ascCc;
+                        if (ascWorkCc == null) ascWorkCc = new CClassNode();
+                        ascCc = ascWorkCc;
+                    }
                 }
                 cc.clear();
+                if (ascCc != null) ascCc.clear();
                 break;
 
             case EOT:
@@ -333,24 +365,30 @@ class Parser extends Lexer {
         } // while
 
         if (arg.state == CCSTATE.VALUE) {
-            arg.v = 0; // ??? safe v ?
-            arg.vIsRaw = false;
-            cc.nextStateValue(arg, env);
+            arg.to = 0;
+            arg.toIsRaw = false;
+            cc.nextStateValue(arg, ascCc, env);
         }
 
-        if (prevCC != null) {
-            prevCC.and(cc, enc);
-            cc = prevCC;
+        if (prevCc != null) {
+            prevCc.and(cc, enc);
+            cc = prevCc;
+            if (ascCc != null) {
+                ascPrevCc.and(ascCc, enc);
+                ascCc = ascPrevCc;
+            }
         }
 
         if (neg) {
             cc.setNot();
+            if (ascCc != null) ascCc.setNot();
         } else {
             cc.clearNot();
+            if (ascCc != null) ascCc.clearNot();
         }
 
         if (cc.isNot() && syntax.notNewlineInNegativeCC()) {
-            if (!cc.isEmpty()) {
+            if (!cc.isEmpty()) { // ???
                 final int NEW_LINE = 0x0a;
                 if (enc.isNewLine(NEW_LINE)) {
                     if (enc.codeToMbcLength(NEW_LINE) == 1) {
@@ -365,27 +403,27 @@ class Parser extends Lexer {
         return cc;
     }
 
-    private void parseCharClassSbChar(CClassNode cc, CCStateArg arg) {
+    private void parseCharClassSbChar(CClassNode cc, CClassNode ascCc, CCStateArg arg) {
         arg.inType = CCVALTYPE.SB;
-        arg.v = token.getC();
-        arg.vIsRaw = false;
-        parseCharClassValEntry2(cc, arg); // goto val_entry2
+        arg.to = token.getC();
+        arg.toIsRaw = false;
+        parseCharClassValEntry2(cc, ascCc, arg); // goto val_entry2
     }
 
-    private void parseCharClassRangeEndVal(CClassNode cc, CCStateArg arg) {
-        arg.v = '-';
-        arg.vIsRaw = false;
-        parseCharClassValEntry(cc, arg); // goto val_entry
+    private void parseCharClassRangeEndVal(CClassNode cc, CClassNode ascCc, CCStateArg arg) {
+        arg.to = '-';
+        arg.toIsRaw = false;
+        parseCharClassValEntry(cc, ascCc, arg); // goto val_entry
     }
 
-    private void parseCharClassValEntry(CClassNode cc, CCStateArg arg) {
-        int len = enc.codeToMbcLength(arg.v);
+    private void parseCharClassValEntry(CClassNode cc, CClassNode ascCc, CCStateArg arg) {
+        int len = enc.codeToMbcLength(arg.to);
         arg.inType = len == 1 ? CCVALTYPE.SB : CCVALTYPE.CODE_POINT;
-        parseCharClassValEntry2(cc, arg); // val_entry2:
+        parseCharClassValEntry2(cc, ascCc, arg); // val_entry2:
     }
 
-    private void parseCharClassValEntry2(CClassNode cc, CCStateArg arg) {
-        cc.nextStateValue(arg, env);
+    private void parseCharClassValEntry2(CClassNode cc, CClassNode ascCc, CCStateArg arg) {
+        cc.nextStateValue(arg, ascCc, env);
     }
 
     private Node parseEnclose(TokenType term) {
@@ -728,6 +766,8 @@ class Parser extends Lexer {
         Node node = null;
         boolean group = false;
 
+        // if (tok->type == (enum TokenSyms )term) goto end_of_token; ???
+
         switch(token.type) {
         case ALT:
         case EOT:
@@ -742,6 +782,7 @@ class Parser extends Lexer {
                 EncloseNode en = (EncloseNode)node;
                 env.option = en.option;
                 fetchToken();
+                // env.option = prev; // ???
                 Node target = parseSubExp(term);
                 env.option = prev;
                 en.setTarget(target);
@@ -811,6 +852,7 @@ class Parser extends Lexer {
 
         case ANCHOR:
             node = new AnchorNode(token.getAnchorSubtype());
+            ((AnchorNode)node).asciiRange = token.getAnchorASCIIRange();
             break;
 
         case OP_REPEAT:
@@ -926,8 +968,8 @@ class Parser extends Lexer {
             int sbOut = enc.minLength() > 1 ? 0x00 : 0x80;
             int extend = GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_Extend);
             CClassNode cc = new CClassNode();
-            cc.addCType(extend, false, env, this);
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_SpacingMark), false, env, this);
+            cc.addCType(extend, false, false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_SpacingMark), false, false, env, this);
             cc.addCodeRange(env, 0x200D, 0x200D);
             QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
             qn.setTarget(cc);
@@ -937,7 +979,7 @@ class Parser extends Lexer {
 
             /* !Control */
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_Control), true, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_Control), true, false, env, this);
             if (enc.minLength() > 1) {
                 CodeRangeBuffer buff = new CodeRangeBuffer();
                 buff = CodeRangeBuffer.addCodeRange(buff, env, 0x0a, 0x0a);
@@ -959,21 +1001,21 @@ class Parser extends Lexer {
 
             /* T+ */
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_T), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_T), false, false, env, this);
             qn = new QuantifierNode(1, QuantifierNode.REPEAT_INFINITE, false);
             qn.setTarget(cc);
             alt = ConsAltNode.newAltNode(qn, alt);
 
             /* L+ */
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_L), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_L), false, false, env, this);
             qn = new QuantifierNode(1, QuantifierNode.REPEAT_INFINITE, false);
             qn.setTarget(cc);
             alt = ConsAltNode.newAltNode(qn, alt);
 
             /* L* LVT T* */
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_T), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_T), false, false, env, this);
             qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
             qn.setTarget(cc);
 
@@ -981,11 +1023,11 @@ class Parser extends Lexer {
             list2 = ConsAltNode.newListNode(qn, null);
 
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_LVT), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_LVT), false, false, env, this);
             list2 = ConsAltNode.newListNode(cc, list2);
 
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_L), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_L), false, false, env, this);
             qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
             qn.setTarget(cc);
             list2 = ConsAltNode.newListNode(qn, list2);
@@ -994,23 +1036,23 @@ class Parser extends Lexer {
 
             /* L* LV V* T* */
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_T), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_T), false, false, env, this);
             qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
             qn.setTarget(cc);
             list2 = ConsAltNode.newListNode(qn, null);
 
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_V), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_V), false, false, env, this);
             qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
             qn.setTarget(cc);
             list2 = ConsAltNode.newListNode(qn, list2);
 
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_LV), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_LV), false, false, env, this);
             list2 = ConsAltNode.newListNode(cc, list2);
 
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_L), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_L), false, false, env, this);
             qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
             qn.setTarget(cc);
             list2 = ConsAltNode.newListNode(qn, list2);
@@ -1019,19 +1061,19 @@ class Parser extends Lexer {
 
             /* L* V+ T* */
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_T), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_T), false, false, env, this);
             qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
             qn.setTarget(cc);
             list2 = ConsAltNode.newListNode(qn, null);
 
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_V), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_V), false, false, env, this);
             qn = new QuantifierNode(1, QuantifierNode.REPEAT_INFINITE, false);
             qn.setTarget(cc);
             list2 = ConsAltNode.newListNode(qn, list2);
 
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_L), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_L), false, false, env, this);
             qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
             qn.setTarget(cc);
             list2 = ConsAltNode.newListNode(qn, list2);
@@ -1044,33 +1086,33 @@ class Parser extends Lexer {
             /* ZWJ (Glue_After_Zwj | E_Base_GAZ Extend* E_Modifier?) */
 
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_E_Modifier), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_E_Modifier), false, false, env, this);
             qn = new QuantifierNode(0, 1, false);
             qn.setTarget(cc);
             list2 = ConsAltNode.newListNode(qn, null);
 
             cc = new CClassNode();
-            cc.addCType(extend, false, env, this);
+            cc.addCType(extend, false, false, env, this);
             qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
             qn.setTarget(cc);
             list2 = ConsAltNode.newListNode(qn, list2);
 
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_E_Base_GAZ), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_E_Base_GAZ), false, false, env, this);
             list2 = ConsAltNode.newListNode(cc, list2);
 
             ConsAltNode alt2 = ConsAltNode.newAltNode(list2, null);
 
             /* Glue_After_Zwj */
             cc = new CClassNode();
-            cc.addCType(extend, false, env, this);
+            cc.addCType(extend, false, false, env, this);
             qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
             qn.setTarget(cc);
             list2 = ConsAltNode.newListNode(qn, null);
 
             cc = new CClassNode();
             cc.addCTypeByRange(-1, false, enc, sbOut, GraphemeNames.Glue_After_Zwj_Ranges);
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_Glue_After_Zwj), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_Glue_After_Zwj), false, false, env, this);
             list2 = ConsAltNode.newListNode(cc, list2);
 
             alt2 = ConsAltNode.newAltNode(list2, alt2);
@@ -1106,14 +1148,14 @@ class Parser extends Lexer {
 
             /* E_Modifier? */
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_E_Modifier), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_E_Modifier), false, false, env, this);
             qn = new QuantifierNode(0, 1, false);
             qn.setTarget(cc);
             list2 = ConsAltNode.newListNode(qn, list2);
 
             /* Extend* */
             cc = new CClassNode();
-            cc.addCType(extend, false, env, this);
+            cc.addCType(extend, false, false, env, this);
             qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
             qn.setTarget(cc);
             list2 = ConsAltNode.newListNode(qn, list2);
@@ -1121,8 +1163,8 @@ class Parser extends Lexer {
             /* (E_Base | EBG) */
             cc = new CClassNode();
             cc.addCTypeByRange(-1, false, enc, sbOut, GraphemeNames.E_Base_Ranges);
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_E_Base), false, env, this);
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_E_Base_GAZ), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_E_Base), false, false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_E_Base_GAZ), false, false, env, this);
             list2 = ConsAltNode.newListNode(cc, list2);
 
             alt = ConsAltNode.newAltNode(list2, alt);
@@ -1133,14 +1175,14 @@ class Parser extends Lexer {
              * http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakTest.html
              */
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_E_Modifier), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_E_Modifier), false, false, env, this);
             qn = new QuantifierNode(0, 1, false);
             qn.setTarget(cc);
             list2 = ConsAltNode.newListNode(qn, null);
 
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_Glue_After_Zwj), false, env, this);
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_E_Base_GAZ), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_Glue_After_Zwj), false, false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_E_Base_GAZ), false, false, env, this);
             list2 = ConsAltNode.newListNode(cc, list2);
 
             str = new StringNode();
@@ -1163,7 +1205,7 @@ class Parser extends Lexer {
 
             /* Prepend* */
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_Prepend), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_Prepend), false, false, env, this);
             qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
             qn.setTarget(cc);
             list = ConsAltNode.newListNode(qn, list);
@@ -1185,7 +1227,7 @@ class Parser extends Lexer {
             list2 = ConsAltNode.newListNode(qn, null);
 
             cc = new CClassNode();
-            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_Prepend), false, env, this);
+            cc.addCType(GraphemeNames.nameToCtype(enc, GraphemeNames.Grapheme_Cluster_Break_Prepend), false, false, env, this);
             qn = new QuantifierNode(1, QuantifierNode.REPEAT_INFINITE, false);
             qn.setTarget(cc);
             list2 = ConsAltNode.newListNode(qn, list2);
@@ -1341,7 +1383,7 @@ class Parser extends Lexer {
     private Node parseCodePoint() {
         byte[]buf = new byte[Config.ENC_CODE_TO_MBC_MAXLEN];
         int num = enc.codeToMbc(token.getCode(), buf, 0);
-        // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else
+        // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else // ???
         return new StringNode(buf, 0, num);
     }
 
@@ -1358,55 +1400,56 @@ class Parser extends Lexer {
 
     private Node parseCharType(Node node) {
         switch(token.getPropCType()) {
-        case CharacterType.D:
-        case CharacterType.S:
-        case CharacterType.W:
-            if (Config.NON_UNICODE_SDW) {
-                CClassNode cc = new CClassNode();
-                cc.addCType(token.getPropCType(), false, env, this);
-                if (token.getPropNot()) cc.setNot();
-                node = cc;
-            }
-            break;
-
         case CharacterType.WORD:
-            node = new CTypeNode(token.getPropCType(), token.getPropNot(), false);
+            node = new CTypeNode(token.getPropCType(), token.getPropNot(), isAsciiRange(env.option));
             break;
 
         case CharacterType.SPACE:
         case CharacterType.DIGIT:
         case CharacterType.XDIGIT:
             CClassNode ccn = new CClassNode();
-            ccn.addCType(token.getPropCType(), false, env, this);
+            ccn.addCType(token.getPropCType(), false, isAsciiRange(env.option), env, this);
             if (token.getPropNot()) ccn.setNot();
             node = ccn;
             break;
 
         default:
             newInternalException(ERR_PARSER_BUG);
-
         } // inner switch
         return node;
     }
 
-    private CClassNode parseCharProperty() {
+    private Node cClassCaseFold(Node node, CClassNode cc, CClassNode ascCc) {
+        ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc, ascCc);
+        enc.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg);
+        if (arg.altRoot != null) {
+            node = ConsAltNode.newAltNode(node, arg.altRoot);
+        }
+        return node;
+    }
+
+    private Node parseCharProperty() {
         int ctype = fetchCharPropertyToCType();
-        CClassNode n = new CClassNode();
-        n.addCType(ctype, false, env, this);
-        if (token.getPropNot()) n.setNot();
-        return n;
+        CClassNode cc = new CClassNode();
+        Node node = cc;
+        cc.addCType(ctype, false, false, env, this);
+        if (token.getPropNot()) cc.setNot();
+
+        if (isIgnoreCase(env.option)) {
+            if (ctype != CharacterType.ASCII) {
+                node = cClassCaseFold(node, cc, cc);
+            }
+        }
+        return node;
     }
 
     private Node parseCcCcOpen() {
-        CClassNode cc = parseCharClass();
+        ObjPtr<CClassNode> ascPtr = new ObjPtr<CClassNode>();
+        CClassNode cc = parseCharClass(ascPtr);
         Node node = cc;
-        if (isIgnoreCase(env.option)) {
-            ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc);
-            enc.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg);
 
-            if (arg.altRoot != null) {
-                node = ConsAltNode.newAltNode(node, arg.altRoot);
-            }
+        if (isIgnoreCase(env.option)) {
+            node = cClassCaseFold(node, cc, ascPtr.p);
         }
         return node;
     }
diff --git a/src/org/joni/ScanEnvironment.java b/src/org/joni/ScanEnvironment.java
index 4c68a04..9731881 100644
--- a/src/org/joni/ScanEnvironment.java
+++ b/src/org/joni/ScanEnvironment.java
@@ -27,7 +27,7 @@ import org.joni.exception.ErrorMessages;
 import org.joni.exception.InternalException;
 
 public final class ScanEnvironment {
-    int option;
+    public int option;
     final int caseFoldFlag;
     final public Encoding enc;
     final public Syntax syntax;
diff --git a/src/org/joni/ast/CClassNode.java b/src/org/joni/ast/CClassNode.java
index 5bfe05a..3ee8fa4 100644
--- a/src/org/joni/ast/CClassNode.java
+++ b/src/org/joni/ast/CClassNode.java
@@ -256,41 +256,33 @@ public final class CClassNode extends Node {
         }
     }
 
-    public void addCType(int ctype, boolean not, ScanEnvironment env, IntHolder sbOut) {
+    // add_ctype_to_cc
+    public void addCType(int ctype, boolean not, boolean asciiRange, ScanEnvironment env, IntHolder sbOut) {
         Encoding enc = env.enc;
-
-        if (Config.NON_UNICODE_SDW) {
-            switch(ctype) {
-            case CharacterType.S:
-                if (!not && env.syntax.op2OptionECMAScript()) {
-                    // treat \u2028 and \u2029 as whitespace
-                    addCodeRange(env, 8232, 8233);
-                }
-            case CharacterType.D:
-            case CharacterType.W:
-                ctype ^= CharacterType.SPECIAL_MASK;
+        int[]ranges = enc.ctypeCodeRange(ctype, sbOut);
+        if (ranges != null) {
+            if (asciiRange) {
+                CClassNode ccWork = new CClassNode();
+                ccWork.addCTypeByRange(ctype, not, enc, sbOut.value, ranges);
                 if (not) {
-                    for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
-                        if (!ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
-                        //if ((AsciiTables.AsciiCtypeTable[c] & (1 << ctype)) == 0) bs.set(c);
-                    }
-                    addAllMultiByteRange(enc);
+                    ccWork.addCodeRangeToBuf(0x80, CodeRangeBuffer.ALL_MULTI_BYTE_RANGE); // add_code_range_to_buf0
                 } else {
-                    for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
-                        if (ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
-                        //if ((AsciiTables.AsciiCtypeTable[c] & (1 << ctype)) != 0) bs.set(c);
+                    CClassNode ccAscii = new CClassNode();
+                    if (enc.minLength() > 1) {
+                        ccAscii.addCodeRangeToBuf(0x00, 0x7F);
+                    } else {
+                        ccAscii.bs.setRange(0x00, 0x7F);
                     }
+                    ccWork.and(ccAscii, enc);
                 }
-                return;
+                or(ccWork, enc);
+            } else {
+                addCTypeByRange(ctype, not, enc, sbOut.value, ranges);
             }
-        }
-
-        int[]ranges = enc.ctypeCodeRange(ctype, sbOut);
-        if (ranges != null) {
-            addCTypeByRange(ctype, not, enc, sbOut.value, ranges);
             return;
         }
 
+        int maxCode = asciiRange ? 0x80 : BitSet.SINGLE_BYTE_SIZE;
         switch(ctype) {
         case CharacterType.ALPHA:
         case CharacterType.BLANK:
@@ -319,30 +311,29 @@ public final class CClassNode extends Node {
         case CharacterType.PRINT:
             if (not) {
                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
-                    if (!enc.isCodeCType(c, ctype)) bs.set(c);
+                    if (!enc.isCodeCType(c, ctype) || c >= maxCode) bs.set(c);
                 }
+                if (asciiRange) addAllMultiByteRange(enc);
             } else {
-                for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+                for (int c=0; c<maxCode; c++) {
                     if (enc.isCodeCType(c, ctype)) bs.set(c);
                 }
-                addAllMultiByteRange(enc);
+                if (!asciiRange) addAllMultiByteRange(enc);
             }
             break;
 
         case CharacterType.WORD:
             if (!not) {
-                for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+                for (int c=0; c<maxCode; c++) {
                     if (enc.isSbWord(c)) bs.set(c);
                 }
-
-                addAllMultiByteRange(enc);
+                if (!asciiRange) addAllMultiByteRange(enc);
             } else {
                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
-                    try {
-                        if (enc.codeToMbcLength(c) > 0 && /* check invalid code point */
-                                !enc.isWord(c)) bs.set(c);
-                    } catch (EncodingException ve) {};
+                    if (enc.codeToMbcLength(c) > 0 && /* check invalid code point */
+                            !(enc.isWord(c) || c >= maxCode)) bs.set(c);
                 }
+                if (asciiRange) addAllMultiByteRange(enc);
             }
             break;
 
@@ -365,47 +356,49 @@ public final class CClassNode extends Node {
     }
 
     public static final class CCStateArg {
-        public int v;
-        public int vs;
-        public boolean vsIsRaw;
-        public boolean vIsRaw;
+        public int from;
+        public int to;
+        public boolean fromIsRaw;
+        public boolean toIsRaw;
         public CCVALTYPE inType;
         public CCVALTYPE type;
         public CCSTATE state;
     }
 
-    public void nextStateClass(CCStateArg arg, ScanEnvironment env) {
+    public void nextStateClass(CCStateArg arg, CClassNode ascCC, ScanEnvironment env) {
         if (arg.state == CCSTATE.RANGE) throw new SyntaxException(ErrorMessages.ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE);
 
         if (arg.state == CCSTATE.VALUE && arg.type != CCVALTYPE.CLASS) {
             if (arg.type == CCVALTYPE.SB) {
-                bs.set(arg.vs);
+                bs.set(arg.from);
+                if (ascCC != null) ascCC.bs.set(arg.from);
             } else if (arg.type == CCVALTYPE.CODE_POINT) {
-                addCodeRange(env, arg.vs, arg.vs);
+                addCodeRange(env, arg.from, arg.from);
+                if (ascCC != null) ascCC.addCodeRange(env, arg.from, arg.from); // add_code_range0
             }
         }
         arg.state = CCSTATE.VALUE;
         arg.type = CCVALTYPE.CLASS;
     }
 
-    public void nextStateValue(CCStateArg arg, ScanEnvironment env) {
-
+    public void nextStateValue(CCStateArg arg, CClassNode ascCc, ScanEnvironment env) {
         switch(arg.state) {
         case VALUE:
             if (arg.type == CCVALTYPE.SB) {
-                if (arg.vs > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
-                bs.set(arg.vs);
+                bs.set(arg.from);
+                if (ascCc != null) ascCc.bs.set(arg.from);
             } else if (arg.type == CCVALTYPE.CODE_POINT) {
-                addCodeRange(env, arg.vs, arg.vs);
+                addCodeRange(env, arg.from, arg.from);
+                if (ascCc != null) ascCc.addCodeRange(env, arg.from, arg.from); // add_code_range0
             }
             break;
 
         case RANGE:
             if (arg.inType == arg.type) {
                 if (arg.inType == CCVALTYPE.SB) {
-                    if (arg.vs > 0xff || arg.v > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+                    if (arg.from > 0xff || arg.to > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
 
-                    if (arg.vs > arg.v) {
+                    if (arg.from > arg.to) {
                         if (env.syntax.allowEmptyRangeInCC()) {
                             // goto ccs_range_end
                             arg.state = CCSTATE.COMPLETE;
@@ -414,12 +407,14 @@ public final class CClassNode extends Node {
                             throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
                         }
                     }
-                    bs.setRange(arg.vs, arg.v);
+                    bs.setRange(arg.from, arg.to);
+                    if (ascCc != null) ascCc.bs.setRange(arg.from, arg.to);
                 } else {
-                    addCodeRange(env, arg.vs, arg.v);
+                    addCodeRange(env, arg.from, arg.to);
+                    if (ascCc != null) ascCc.addCodeRange(env, arg.from, arg.to); // add_code_range0
                 }
             } else {
-                if (arg.vs > arg.v) {
+                if (arg.from > arg.to) {
                     if (env.syntax.allowEmptyRangeInCC()) {
                         // goto ccs_range_end
                         arg.state = CCSTATE.COMPLETE;
@@ -428,8 +423,12 @@ public final class CClassNode extends Node {
                         throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
                     }
                 }
-                bs.setRange(arg.vs, arg.v < 0xff ? arg.v : 0xff);
-                addCodeRange(env, arg.vs, arg.v);
+                bs.setRange(arg.from, arg.to < 0xff ? arg.to : 0xff);
+                addCodeRange(env, arg.from, arg.to);
+                if (ascCc != null) {
+                    ascCc.bs.setRange(arg.from, arg.to < 0xff ? arg.to : 0xff);
+                    ascCc.addCodeRange(env, arg.from, arg.to); // add_code_range0
+                }
             }
             // ccs_range_end:
             arg.state = CCSTATE.COMPLETE;
@@ -445,8 +444,8 @@ public final class CClassNode extends Node {
 
         } // switch
 
-        arg.vsIsRaw = arg.vIsRaw;
-        arg.vs = arg.v;
+        arg.fromIsRaw = arg.toIsRaw;
+        arg.from = arg.to;
         arg.type = arg.inType;
     }
 
@@ -497,7 +496,7 @@ public final class CClassNode extends Node {
     private static int CR_FROM(int[] range, int i) {
         return range[(i * 2) + 1];
     }
-    
+
     private static int CR_TO(int[] range, int i) {
         return range[(i * 2) + 2];
     }
diff --git a/test/org/joni/test/TestU8.java b/test/org/joni/test/TestU8.java
index 04ba715..319b22c 100755
--- a/test/org/joni/test/TestU8.java
+++ b/test/org/joni/test/TestU8.java
@@ -203,26 +203,10 @@ public class TestU8 extends Test {
         x2s("(?i)АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", "абвгдеёжзийклмнопрстуфхцчшщъыьэюя", 0, 33 * 2);
         x2s("(?i)АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", 0, 33 * 2);
 
-        ns("(?ia)\\w+", "\u212a\u017f");
-        // ns("(?ia)[\\w]+", "\u212a\u017f");
-        ns("(?ia)[^\\W]+", "\u212a\u017f");
-        // x2s("(?ia)[^\\W]+", "ks", 0, 2);
-        ns("(?iu)\\p{ASCII}", "\u212a");
-        ns("(?iu)\\P{ASCII}", "s");
-        // ns("(?iu)[\\p{ASCII}]", "\u212a");
-        // ns("(?iu)[\\P{ASCII}]", "s");
-        ns("(?ia)\\p{ASCII}", "\u212a");
-        ns("(?ia)\\P{ASCII}", "s");
-        // ns("(?ia)[\\p{ASCII}]", "\u212a");
-        // ns("(?ia)[\\P{ASCII}]", "s");
-        x2s("(?iu)[s]+", "Ss\u017f ", 0, 4);
-        x2s("(?ia)[s]+", "Ss\u017f ", 0, 4);
-        x2s("(?iu)[^s]+", "Ss\u017f ", 4, 5);
-        x2s("(?ia)[^s]+", "Ss\u017f ", 4, 5);
-        x2s("(?iu)[[:lower:]]", "\u017f", 0, 2);
-        // ns("(?ia)[[:lower:]]", "\u017f");
-        x2s("(?u)[[:upper:]]", "\u212a", 0, 3);
-        // ns("(?a)[[:upper:]]", "\u212a");
+        x2s("(?iu)\\p{lower}\\p{upper}", "Ab", 0, 2);
+        x2s("(?ia)\\p{lower}\\p{upper}", "Ab", 0, 2);
+        x2s("(?iu)[[:lower:]][[:upper:]]", "Ab", 0, 2);
+        x2s("(?ia)[[:lower:]][[:upper:]]", "Ab", 0, 2);
         super.test();
     }
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jruby-joni.git



More information about the pkg-java-commits mailing list