[jruby-joni] 90/279: Bump dependency version for jcodings and support nonunicode \s \d \w

Hideki Yamane henrich at moszumanska.debian.org
Mon Nov 16 11:26:57 UTC 2015


This is an automated email from the git hooks/post-receive script.

henrich pushed a commit to branch debian/sid
in repository jruby-joni.

commit 7d7a5189d47bed7b5ec47e9f01b428706eceb249
Author: Marcin Mielzynski <lopx at gazeta.pl>
Date:   Mon Feb 13 01:36:57 2012 +0100

    Bump dependency version for jcodings and support nonunicode \s \d \w
---
 pom.xml                          |   2 +-
 src/org/joni/Config.java         |  56 +++----
 src/org/joni/Lexer.java          | 336 +++++++++++++++++++--------------------
 src/org/joni/Parser.java         | 319 ++++++++++++++++++++-----------------
 src/org/joni/ast/CClassNode.java |  25 ++-
 5 files changed, 392 insertions(+), 346 deletions(-)

diff --git a/pom.xml b/pom.xml
index dff5b07..a8a3eba 100644
--- a/pom.xml
+++ b/pom.xml
@@ -75,7 +75,7 @@
     <dependency>
        <groupId>org.jruby.jcodings</groupId>
        <artifactId>jcodings</artifactId>
-       <version>1.0.4</version>
+       <version>1.0.6</version>
     </dependency>
     <dependency>
       <groupId>junit</groupId>
diff --git a/src/org/joni/Config.java b/src/org/joni/Config.java
index 07762f0..f1f4947 100644
--- a/src/org/joni/Config.java
+++ b/src/org/joni/Config.java
@@ -1,20 +1,20 @@
 /*
- * Permission is hereby granted, free of charge, to any person obtaining a copy of 
- * this software and associated documentation files (the "Software"), to deal in 
- * the Software without restriction, including without limitation the rights to 
- * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  * of the Software, and to permit persons to whom the Software is furnished to do
  * so, subject to the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 package org.joni;
@@ -23,25 +23,25 @@ import java.io.PrintStream;
 
 public interface Config extends org.jcodings.Config {
     final int CHAR_TABLE_SIZE = 256;
-    
+
     final boolean USE_NAMED_GROUP = true;
     final boolean USE_SUBEXP_CALL = true;
     final boolean USE_BACKREF_WITH_LEVEL = true;                            /* \k<name+n>, \k<name-n> */
-    
+
     final boolean USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT = true; /* /(?:()|())*\2/ */
     final boolean USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE = true;     /* /\n$/ =~ "\n" */
     final boolean USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR = false;
 
     final boolean CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS = true;
-    
+
     final boolean USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE = false;
     final boolean USE_CAPTURE_HISTORY = false;
     final boolean USE_VARIABLE_META_CHARS = true;
     final boolean USE_WORD_BEGIN_END = true;                                /* "\<": word-begin, "\>": word-end */
-    final boolean USE_POSIX_API_REGION_OPTION = true;                           /* needed for POSIX API support */ 
+    final boolean USE_POSIX_API_REGION_OPTION = true;                           /* needed for POSIX API support */
     final boolean USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE = true;
     final boolean USE_COMBINATION_EXPLOSION_CHECK = false;
-    
+
     final int NREGION                   = 10;
     final int MAX_BACKREF_NUM           = 1000;
     final int MAX_REPEAT_NUM            = 100000;
@@ -53,34 +53,36 @@ public interface Config extends org.jcodings.Config {
     // internal config
     final boolean USE_PARSE_TREE_NODE_RECYCLE       = true;
     final boolean USE_OP_PUSH_OR_JUMP_EXACT         = true;
-    final boolean USE_SHARED_CCLASS_TABLE			= false;    
-    final boolean USE_QTFR_PEEK_NEXT                = true; 
+    final boolean USE_SHARED_CCLASS_TABLE			= false;
+    final boolean USE_QTFR_PEEK_NEXT                = true;
 
     final int INIT_MATCH_STACK_SIZE                 = 64;
     final int DEFAULT_MATCH_STACK_LIMIT_SIZE        = 0;        /* unlimited */
     final int NUMBER_OF_POOLED_STACKS               = 4;
 
-    
-    
+
+
     final boolean DONT_OPTIMIZE                     = false;
-    
-    
+
+
     final int MAX_CAPTURE_HISTORY_GROUP             = 31;
-    
+
 
     final int CHECK_STRING_THRESHOLD_LEN            = 7;
     final int CHECK_BUFF_MAX_SIZE                   = 0x4000;
-    
-    
+
+    final boolean NON_UNICODE_SDW                   = false;
+
+
     final PrintStream log = System.out;
     final PrintStream err = System.err;
 
     final boolean DEBUG_ALL                         = false;
-    final boolean DEBUG                             = DEBUG_ALL;    
+    final boolean DEBUG                             = DEBUG_ALL;
     final boolean DEBUG_PARSE_TREE                  = DEBUG_ALL;
     final boolean DEBUG_COMPILE                     = DEBUG_ALL;
     final boolean DEBUG_COMPILE_BYTE_CODE_INFO      = DEBUG_ALL;
-    final boolean DEBUG_SEARCH                      = DEBUG_ALL;    
+    final boolean DEBUG_SEARCH                      = DEBUG_ALL;
     final boolean DEBUG_MATCH                       = DEBUG_ALL;
     final boolean DEBUG_ASM                         = true;
     final boolean DEBUG_ASM_EXEC                    = true;
diff --git a/src/org/joni/Lexer.java b/src/org/joni/Lexer.java
index 172132f..9094757 100644
--- a/src/org/joni/Lexer.java
+++ b/src/org/joni/Lexer.java
@@ -1,20 +1,20 @@
 /*
- * Permission is hereby granted, free of charge, to any person obtaining a copy of 
- * this software and associated documentation files (the "Software"), to deal in 
- * the Software without restriction, including without limitation the rights to 
- * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  * of the Software, and to permit persons to whom the Software is furnished to do
  * so, subject to the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 package org.joni;
@@ -31,7 +31,7 @@ import org.joni.constants.TokenType;
 import org.joni.exception.ErrorMessages;
 
 class Lexer extends ScannerSupport {
-    protected final ScanEnvironment env; 
+    protected final ScanEnvironment env;
     protected final Syntax syntax;              // fast access to syntax
     protected final Token token = new Token();  // current token
 
@@ -40,17 +40,17 @@ class Lexer extends ScannerSupport {
         this.env = env;
         this.syntax = env.syntax;
     }
-    
+
     /**
      * @return 0: normal {n,m}, 2: fixed {n}
-     * !introduce returnCode here 
+     * !introduce returnCode here
      */
     private int fetchRangeQuantifier() {
         mark();
         boolean synAllow = syntax.allowInvalidInterval();
-        
+
         if (!left()) {
-            if (synAllow) { 
+            if (synAllow) {
                 return 1; /* "....{" : OK! */
             } else {
                 newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
@@ -63,7 +63,7 @@ class Lexer extends ScannerSupport {
                 newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
             }
         }
-        
+
         int low = scanUnsignedNumber();
         if (low < 0) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
         if (low > Config.MAX_REPEAT_NUM) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
@@ -77,18 +77,18 @@ class Lexer extends ScannerSupport {
                 return invalidRangeQuantifier(synAllow);
             }
         }
-        
+
         if (!left()) return invalidRangeQuantifier(synAllow);
-        
+
         fetch();
         int up;
         int ret = 0;
         if (c == ',') {
-            int prev = p; // ??? last            
+            int prev = p; // ??? last
             up = scanUnsignedNumber();
             if (up < 0) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
             if (up > Config.MAX_REPEAT_NUM) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
-            
+
             if (p == prev) {
                 if (nonLow) return invalidRangeQuantifier(synAllow);
                 up = QuantifierNode.REPEAT_INFINITE; /* {n,} : {n,infinite} */
@@ -99,28 +99,28 @@ class Lexer extends ScannerSupport {
             up = low; /* {n} : exact n times */
             ret = 2; /* fixed */
         }
-        
+
         if (!left()) return invalidRangeQuantifier(synAllow);
         fetch();
-        
+
         if (syntax.opEscBraceInterval()) {
             if (c != syntax.metaCharTable.esc) return invalidRangeQuantifier(synAllow);
             fetch();
         }
-        
+
         if (c != '}') return invalidRangeQuantifier(synAllow);
-        
+
         if (!isRepeatInfinite(up) && low > up) {
             newValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE);
         }
-        
+
         token.type = TokenType.INTERVAL;
         token.setRepeatLower(low);
         token.setRepeatUpper(up);
-        
+
         return ret; /* 0: normal {n,m}, 2: fixed {n} */
     }
-    
+
     private int invalidRangeQuantifier(boolean synAllow) {
         if (synAllow) {
             restore();
@@ -130,7 +130,7 @@ class Lexer extends ScannerSupport {
             return 0; // not reached
         }
     }
-    
+
     /* \M-, \C-, \c, or \... */
     private int fetchEscapedValue() {
         if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE);
@@ -164,20 +164,20 @@ class Lexer extends ScannerSupport {
                 fetchEscapedValueBackSlash();
             }
             break;
-            
+
         case 'c':
             if (syntax.opEscCControl()) {
                 fetchEscapedValueControl();
             }
             /* fall through */
-            
+
         default:
             fetchEscapedValueBackSlash();
         } // switch
-        
+
         return c; // ???
     }
-    
+
     private void fetchEscapedValueBackSlash() {
         c = env.convertBackslashValue(c);
     }
@@ -194,7 +194,7 @@ class Lexer extends ScannerSupport {
             c &= 0x9f;
         }
     }
-    
+
     private int nameEndCodePoint(int start) {
         switch(start) {
         case '<':
@@ -212,16 +212,16 @@ class Lexer extends ScannerSupport {
         \k<num+n>,  \k<num-n>
         \k<-num+n>, \k<-num-n>
      */
-    
+
     // value implicit (rnameEnd)
     private boolean fetchNameWithLevel(int startCode, int[]rbackNum, int[]rlevel) {
         int src = p;
         boolean existLevel = false;
         int isNum = 0;
         int sign = 1;
-        
+
         int endCode = nameEndCodePoint(startCode);
-        int pnumHead = p; 
+        int pnumHead = p;
         int nameEnd = stop;
 
         String err = null;
@@ -232,15 +232,15 @@ class Lexer extends ScannerSupport {
             if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME);
             if (enc.isDigit(c)) {
                 isNum = 1;
-            } else if (c == '-') { 
+            } else if (c == '-') {
                 isNum = 2;
                 sign = -1;
                 pnumHead = p;
-            } else if (!enc.isWord(c)) { 
+            } else if (!enc.isWord(c)) {
                 err = ERR_INVALID_GROUP_NAME;
             }
         }
-        
+
         while (left()) {
             nameEnd = p;
             fetch();
@@ -248,7 +248,7 @@ class Lexer extends ScannerSupport {
                 if (isNum == 2) err = ERR_INVALID_GROUP_NAME;
                 break;
             }
-            
+
             if (isNum != 0) {
                 if (enc.isDigit(c)) {
                     isNum = 1;
@@ -273,11 +273,11 @@ class Lexer extends ScannerSupport {
                 if (level < 0) newValueException(ERR_TOO_BIG_NUMBER);
                 rlevel[0] = level * flag;
                 existLevel = true;
-                
+
                 fetch();
                 isEndCode = c == endCode;
             }
-            
+
             if (!isEndCode) {
                 err = ERR_INVALID_GROUP_NAME;
                 nameEnd = stop;
@@ -295,7 +295,7 @@ class Lexer extends ScannerSupport {
                 } else if (backNum == 0) {
                     newValueException(ERR_INVALID_GROUP_NAME, src, stop);
                 }
-                rbackNum[0] = backNum * sign; 
+                rbackNum[0] = backNum * sign;
             }
             value = nameEnd;
             return existLevel;
@@ -304,14 +304,14 @@ class Lexer extends ScannerSupport {
             return false; // not reached
         }
     }
-    
+
     // USE_NAMED_GROUP
     // ref: 0 -> define name    (don't allow number name)
     //      1 -> reference name (allow number name)
     private int fetchNameForNamedGroup(int startCode, boolean ref) {
         int src = p;
         value = 0;
-        
+
         int isNum = 0;
         int sign = 1;
 
@@ -332,7 +332,7 @@ class Lexer extends ScannerSupport {
                     err = ERR_INVALID_GROUP_NAME;
                     // isNum = 0;
                 }
-            } else if (c == '-') { 
+            } else if (c == '-') {
                 if (ref) {
                     isNum = 2;
                     sign = -1;
@@ -342,10 +342,10 @@ class Lexer extends ScannerSupport {
                     // isNum = 0;
                 }
             } else if (!enc.isWord(c)) {
-                err = ERR_INVALID_CHAR_IN_GROUP_NAME; 
+                err = ERR_INVALID_CHAR_IN_GROUP_NAME;
             }
         }
-        
+
         if (err == null) {
             while (left()) {
                 nameEnd = p;
@@ -354,7 +354,7 @@ class Lexer extends ScannerSupport {
                     if (isNum == 2) err = ERR_INVALID_GROUP_NAME;
                     break;
                 }
-                
+
                 if (isNum != 0) {
                     if (enc.isDigit(c)) {
                         isNum = 1;
@@ -372,7 +372,7 @@ class Lexer extends ScannerSupport {
                     }
                 }
             }
-            
+
             if (c != endCode) {
                 err = ERR_INVALID_GROUP_NAME;
                 nameEnd = stop;
@@ -410,12 +410,12 @@ class Lexer extends ScannerSupport {
     private final int fetchNameForNoNamedGroup(int startCode, boolean ref) {
         int src = p;
         value = 0;
-        
+
         int isNum = 0;
         int sign = 1;
-        
+
         int endCode = nameEndCodePoint(startCode);
-        int pnumHead = p; 
+        int pnumHead = p;
         int nameEnd = stop;
 
         String err = null;
@@ -424,7 +424,7 @@ class Lexer extends ScannerSupport {
         } else {
             fetch();
             if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME);
-            
+
             if (enc.isDigit(c)) {
                 isNum = 1;
             } else if (c == '-') {
@@ -438,17 +438,17 @@ class Lexer extends ScannerSupport {
 
         while(left()) {
             nameEnd = p;
-            
+
             fetch();
             if (c == endCode || c == ')') break;
             if (!enc.isDigit(c)) err = ERR_INVALID_CHAR_IN_GROUP_NAME;
         }
-            
-        if (err == null && c != endCode) { 
+
+        if (err == null && c != endCode) {
             err = ERR_INVALID_GROUP_NAME;
             nameEnd = stop;
         }
-        
+
         if (err == null) {
             mark();
             p = pnumHead;
@@ -460,7 +460,7 @@ class Lexer extends ScannerSupport {
                 newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd);
             }
             backNum *= sign;
-            
+
             value = nameEnd;
             return backNum;
         } else {
@@ -468,7 +468,7 @@ class Lexer extends ScannerSupport {
             return 0; // not reached
         }
     }
-    
+
     protected final int fetchName(int startCode, boolean ref) {
         if (Config.USE_NAMED_GROUP) {
             return fetchNameForNamedGroup(startCode, ref);
@@ -476,11 +476,11 @@ class Lexer extends ScannerSupport {
             return fetchNameForNoNamedGroup(startCode, ref);
         }
     }
-    
+
     private boolean strExistCheckWithEsc(int[]s, int n, int bad) {
         int p = this.p;
         int to = this.stop;
-        
+
         boolean inEsc = false;
         int i=0;
 
@@ -508,14 +508,14 @@ class Lexer extends ScannerSupport {
             }
         }
         return false;
-    }    
-    
-    private static final int send[] = new int[]{':', ']'}; 
-    
+    }
+
+    private static final int send[] = new int[]{':', ']'};
+
     protected final TokenType fetchTokenInCC() {
         int last;
         int c2;
-        
+
         if (!left()) {
             token.type = TokenType.EOT;
             return token.type;
@@ -526,7 +526,7 @@ class Lexer extends ScannerSupport {
         token.base = 0;
         token.setC(c);
         token.escaped = false;
-        
+
         if (c == ']') {
             token.type = TokenType.CC_CLOSE;
         } else if (c == '-') {
@@ -539,40 +539,40 @@ class Lexer extends ScannerSupport {
             token.setC(c);
 
             switch (c) {
-            
+
             case 'w':
                 token.type = TokenType.CHAR_TYPE;
-                token.setPropCType(CharacterType.WORD);
+                token.setPropCType(Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
                 token.setPropNot(false);
                 break;
-                
+
             case 'W':
                 token.type = TokenType.CHAR_TYPE;
-                token.setPropCType(CharacterType.WORD);
+                token.setPropCType(Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
                 token.setPropNot(true);
                 break;
-                
+
             case 'd':
                 token.type = TokenType.CHAR_TYPE;
-                token.setPropCType(CharacterType.DIGIT);
+                token.setPropCType(Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
                 token.setPropNot(false);
                 break;
 
             case 'D':
                 token.type = TokenType.CHAR_TYPE;
-                token.setPropCType(CharacterType.DIGIT);
+                token.setPropCType(Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
                 token.setPropNot(true);
                 break;
 
             case 's':
                 token.type = TokenType.CHAR_TYPE;
-                token.setPropCType(CharacterType.SPACE);
+                token.setPropCType(Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
                 token.setPropNot(false);
                 break;
-            
+
             case 'S':
                 token.type = TokenType.CHAR_TYPE;
-                token.setPropCType(CharacterType.SPACE);
+                token.setPropCType(Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
                 token.setPropNot(true);
                 break;
 
@@ -589,41 +589,41 @@ class Lexer extends ScannerSupport {
                 token.setPropCType(CharacterType.XDIGIT);
                 token.setPropNot(true);
                 break;
-            
+
             case 'p':
             case 'P':
-                c2 = peek(); // !!! migrate to peekIs 
+                c2 = peek(); // !!! migrate to peekIs
                 if (c2 == '{' && syntax.op2EscPBraceCharProperty()) {
                     inc();
                     token.type = TokenType.CHAR_PROPERTY;
                     token.setPropNot(c == 'P');
-                    
+
                     if (syntax.op2EscPBraceCircumflexNot()) {
                         c2 = fetchTo();
                         if (c2 == '^') {
-                            token.setPropNot(!token.getPropNot()); 
+                            token.setPropNot(!token.getPropNot());
                         } else {
                             unfetch();
                         }
                     }
                 }
                 break;
-                
+
             case 'x':
                 if (!left()) break;
                 last = p;
-                
+
                 if (peekIs('{') && syntax.opEscXBraceHex8()) {
                     inc();
                     int num = scanUnsignedHexadecimalNumber(8);
                     if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
                     if (left()) {
                         c2 = peek();
-                        if (enc.isXDigit(c2)) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE); 
+                        if (enc.isXDigit(c2)) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
                     }
-                    
+
                     if (p > last + enc.length(bytes, last, stop) && left() && peekIs('}')) {
-                        inc();                      
+                        inc();
                         token.type = TokenType.CODE_POINT;
                         token.base = 16;
                         token.setCode(num);
@@ -642,11 +642,11 @@ class Lexer extends ScannerSupport {
                     token.setC(num);
                 }
                 break;
-                
+
             case 'u':
                 if (!left()) break;
                 last = p;
-                
+
                 if (syntax.op2EscUHex4()) {
                     int num = scanUnsignedHexadecimalNumber(4);
                     if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
@@ -658,7 +658,7 @@ class Lexer extends ScannerSupport {
                     token.setCode(num);
                 }
                 break;
-                
+
             case '0':
             case '1':
             case '2':
@@ -680,7 +680,7 @@ class Lexer extends ScannerSupport {
                     token.setC(num);
                 }
                 break;
-                
+
             default:
                 unfetch();
                 int num = fetchEscapedValue();
@@ -690,7 +690,7 @@ class Lexer extends ScannerSupport {
                 }
                 break;
             } // switch
-            
+
         } else if (c == '[') {
             if (syntax.opPosixBracket() && peekIs(':')) {
                 token.backP = p; /* point at '[' is readed */
@@ -721,24 +721,24 @@ class Lexer extends ScannerSupport {
         }
         return token.type;
     }
-    
+
     protected final int backrefRelToAbs(int relNo) {
         return env.numMem + 1 + relNo;
     }
-    
+
     protected final TokenType fetchToken() {
         int last;
-        
+
         // mark(); // out
-        
+
         start:
         while(true) {
-            
+
         if (!left()) {
             token.type = TokenType.EOT;
             return token.type;
         }
-        
+
         token.type = TokenType.STRING;
         token.base = 0;
         token.backP = p;
@@ -814,14 +814,14 @@ class Lexer extends ScannerSupport {
             case 'w':
                 if (!syntax.opEscWWord()) break;
                 token.type = TokenType.CHAR_TYPE;
-                token.setPropCType(CharacterType.WORD);
+                token.setPropCType(Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
                 token.setPropNot(false);
                 break;
 
             case 'W':
                 if (!syntax.opEscWWord()) break;
                 token.type = TokenType.CHAR_TYPE;
-                token.setPropCType(CharacterType.WORD);
+                token.setPropCType(Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
                 token.setPropNot(true);
                 break;
 
@@ -845,7 +845,7 @@ class Lexer extends ScannerSupport {
                     break;
                 } // USE_WORD_BEGIN_END
                 break; // ?
-                
+
             case '>':
                 if (Config.USE_WORD_BEGIN_END) {
                     if (!syntax.opEscLtGtWordBeginEnd()) break;
@@ -858,28 +858,28 @@ class Lexer extends ScannerSupport {
             case 's':
                 if (!syntax.opEscSWhiteSpace()) break;
                 token.type = TokenType.CHAR_TYPE;
-                token.setPropCType(CharacterType.SPACE);
+                token.setPropCType(Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
                 token.setPropNot(false);
                 break;
 
             case 'S':
                 if (!syntax.opEscSWhiteSpace()) break;
                 token.type = TokenType.CHAR_TYPE;
-                token.setPropCType(CharacterType.SPACE);
+                token.setPropCType(Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
                 token.setPropNot(true);
                 break;
-                
+
             case 'd':
                 if (!syntax.opEscDDigit()) break;
                 token.type = TokenType.CHAR_TYPE;
-                token.setPropCType(CharacterType.DIGIT);
+                token.setPropCType(Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
                 token.setPropNot(false);
                 break;
-                
+
             case 'D':
                 if (!syntax.opEscDDigit()) break;
                 token.type = TokenType.CHAR_TYPE;
-                token.setPropCType(CharacterType.DIGIT);
+                token.setPropCType(Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
                 token.setPropNot(true);
                 break;
 
@@ -903,26 +903,26 @@ class Lexer extends ScannerSupport {
                 token.type = TokenType.ANCHOR;
                 token.setSubtype(AnchorType.BEGIN_BUF);
                 break;
-                
+
             case 'Z':
                 if (!syntax.opEscAZBufAnchor()) break;
                 token.type = TokenType.ANCHOR;
                 token.setSubtype(AnchorType.SEMI_END_BUF);
                 break;
-                
+
             case 'z':
                 if (!syntax.opEscAZBufAnchor()) break;
-                // end_buf label                
-                token.type = TokenType.ANCHOR;                
+                // end_buf label
+                token.type = TokenType.ANCHOR;
                 token.setSubtype(AnchorType.END_BUF);
                 break;
-                
+
             case 'G':
                 if (!syntax.opEscCapitalGBeginAnchor()) break;
                 token.type = TokenType.ANCHOR;
                 token.setSubtype(AnchorType.BEGIN_POSITION);
                 break;
-                
+
             case '`':
                 if (!syntax.op2EscGnuBufAnchor()) break;
                 // goto begin_buf
@@ -932,8 +932,8 @@ class Lexer extends ScannerSupport {
 
             case '\'':
                 if (!syntax.op2EscGnuBufAnchor()) break;
-                // goto end_buf                
-                token.type = TokenType.ANCHOR;                
+                // goto end_buf
+                token.type = TokenType.ANCHOR;
                 token.setSubtype(AnchorType.END_BUF);
                 break;
 
@@ -945,9 +945,9 @@ class Lexer extends ScannerSupport {
                     int num = scanUnsignedHexadecimalNumber(8);
                     if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
                     if (left()) {
-                        if (enc.isXDigit(peek())) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE); 
+                        if (enc.isXDigit(peek())) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
                     }
-                    
+
                     if (p > last + enc.length(bytes, last, stop) && left() && peekIs('}')) {
                         inc();
                         token.type = TokenType.CODE_POINT;
@@ -967,11 +967,11 @@ class Lexer extends ScannerSupport {
                     token.setC(num);
                 }
                 break;
-                
+
             case 'u': // extract to helper
                 if (!left()) break;
                 last = p;
-                
+
                 if (syntax.op2EscUHex4()) {
                     int num = scanUnsignedHexadecimalNumber(4);
                     if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
@@ -983,7 +983,7 @@ class Lexer extends ScannerSupport {
                     token.setCode(num);
                 }
                 break;
-                
+
             case '1':
             case '2':
             case '3':
@@ -992,11 +992,11 @@ class Lexer extends ScannerSupport {
             case '6':
             case '7':
             case '8':
-            case '9':               
+            case '9':
                 unfetch();
                 last = p;
                 int num = scanUnsignedNumber();
-                if (num < 0 || num > Config.MAX_BACKREF_NUM) { 
+                if (num < 0 || num > Config.MAX_BACKREF_NUM) {
                     // goto skip_backref
                 } else if (syntax.opDecimalBackref() && (num <= env.numMem || num <= 9)) { /* This spec. from GNU regex */
                     if (syntax.strictCheckBackref()) {
@@ -1018,7 +1018,7 @@ class Lexer extends ScannerSupport {
                 }
                 p = last;
                 /* fall through */
-                
+
             case '0':
                 if (syntax.opEscOctal3()) {
                     last = p;
@@ -1034,7 +1034,7 @@ class Lexer extends ScannerSupport {
                     inc();
                 }
                 break;
-                
+
             case 'k':
                 if (Config.USE_NAMED_GROUP) {
                     if (syntax.op2EscKNamedBackref()) {
@@ -1052,13 +1052,13 @@ class Lexer extends ScannerSupport {
                                 backNum = fetchName(c, true);
                             } // USE_BACKREF_AT_LEVEL
                             int nameEnd = value; // set by fetchNameWithLevel/fetchName
-                            
+
                             if (backNum != 0) {
                                 if (backNum < 0) {
                                     backNum = backrefRelToAbs(backNum);
                                     if (backNum <= 0) newValueException(ERR_INVALID_BACKREF);
                                 }
-                                
+
                                 if (syntax.strictCheckBackref() && (backNum > env.numMem || env.memNodes == null)) {
                                     newValueException(ERR_INVALID_BACKREF);
                                 }
@@ -1099,11 +1099,11 @@ class Lexer extends ScannerSupport {
                             unfetch();
                         }
                     }
-                    
+
                     break;
                 } // USE_NAMED_GROUP
                 break;
-                
+
             case 'g':
                 if (Config.USE_SUBEXP_CALL) {
                     if (syntax.op2EscGSubexpCall()) {
@@ -1120,34 +1120,34 @@ class Lexer extends ScannerSupport {
                             unfetch();
                         }
                     }
-                    break;                    
+                    break;
                 } // USE_SUBEXP_CALL
                 break;
-                
+
             case 'Q':
                 if (syntax.op2EscCapitalQQuote()) {
                     token.type = TokenType.QUOTE_OPEN;
                 }
                 break;
-                
+
             case 'p':
             case 'P':
                 if (peekIs('{') && syntax.op2EscPBraceCharProperty()) {
                     inc();
                     token.type = TokenType.CHAR_PROPERTY;
                     token.setPropNot(c == 'P');
-                    
+
                     if (syntax.op2EscPBraceCircumflexNot()) {
                         fetch();
                         if (c == '^') {
-                            token.setPropNot(!token.getPropNot()); 
+                            token.setPropNot(!token.getPropNot());
                         } else {
                             unfetch();
                         }
                     }
                 }
                 break;
-                
+
             default:
                 unfetch();
                 num = fetchEscapedValue();
@@ -1160,13 +1160,13 @@ class Lexer extends ScannerSupport {
                     p = token.backP + enc.length(bytes, token.backP, stop);
                 }
                 break;
-                
+
             } // switch (c)
-            
+
         } else {
             token.setC(c);
             token.escaped = false;
-            
+
             // remove code duplication
             if (Config.USE_VARIABLE_META_CHARS) {
                 if (c != MetaChar.INEFFECTIVE_META_CHAR && syntax.opVariableMetaCharacters()) {
@@ -1198,16 +1198,16 @@ class Lexer extends ScannerSupport {
                     }
                 }
             } // USE_VARIABLE_META_CHARS
-            
-            { 
+
+            {
                 switch(c) {
-                
+
                 case '.':
                     if (!syntax.opDotAnyChar()) break;
                     // any_char:
                     token.type = TokenType.ANYCHAR;
                     break;
-                    
+
                 case '*':
                     if (!syntax.opAsteriskZeroInf()) break;
                     // anytime:
@@ -1225,8 +1225,8 @@ class Lexer extends ScannerSupport {
                     token.setRepeatUpper(QuantifierNode.REPEAT_INFINITE);
                     greedyCheck();
                     break;
-                    
-                case '?':                   
+
+                case '?':
                     if (!syntax.opQMarkZeroOne()) break;
                     // zero_or_one_time:
                     token.type = TokenType.OP_REPEAT;
@@ -1234,7 +1234,7 @@ class Lexer extends ScannerSupport {
                     token.setRepeatUpper(1);
                     greedyCheck();
                     break;
-                    
+
                 case '{':
                     if (!syntax.opBraceInterval()) break;
                     switch(fetchRangeQuantifier()) {
@@ -1251,12 +1251,12 @@ class Lexer extends ScannerSupport {
                     default: /* 1 : normal char */
                     } // inner switch
                     break;
-                    
+
                 case '|':
                     if (!syntax.opVBarAlt()) break;
                     token.type = TokenType.ALT;
                     break;
-                    
+
                 case '(':
                     if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
                         inc();
@@ -1275,49 +1275,49 @@ class Lexer extends ScannerSupport {
                         }
                         unfetch();
                     }
-                    
+
                     if (!syntax.opLParenSubexp()) break;
                     token.type = TokenType.SUBEXP_OPEN;
                     break;
-                    
+
                 case ')':
                     if (!syntax.opLParenSubexp()) break;
-                    token.type = TokenType.SUBEXP_CLOSE;                    
+                    token.type = TokenType.SUBEXP_CLOSE;
                     break;
-                    
+
                 case '^':
                     if (!syntax.opLineAnchor()) break;
                     token.type = TokenType.ANCHOR;
                     token.setSubtype(isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE);
                     break;
-                    
+
                 case '$':
                     if (!syntax.opLineAnchor()) break;
                     token.type = TokenType.ANCHOR;
                     token.setSubtype(isSingleline(env.option) ? AnchorType.SEMI_END_BUF : AnchorType.END_LINE);
                     break;
-                    
+
                 case '[':
                     if (!syntax.opBracketCC()) break;
                     token.type = TokenType.CC_CC_OPEN;
                     break;
-                    
+
                 case ']':
                     //if (*src > env->pattern)   /* /].../ is allowed. */
                     //CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
                     break;
-                    
+
                 case '#':
                     if (Option.isExtend(env.option)) {
                         while (left()) {
                             fetch();
                             if (enc.isNewLine(c)) break;
                         }
-                        continue start; // goto start 
-                        
+                        continue start; // goto start
+
                     }
                     break;
-                    
+
                 case ' ':
                 case '\t':
                 case '\n':
@@ -1327,22 +1327,22 @@ class Lexer extends ScannerSupport {
                         continue start; // goto start
                     }
                     break;
-                    
+
                 default: // string
                     break;
-                    
+
                 } // switch
             }
         }
-        
+
         break;
         } // while
-        return token.type;   
+        return token.type;
     }
-    
+
     private void greedyCheck() {
         if (left() && peekIs('?') && syntax.opQMarkNonGreedy()) {
-            
+
             fetch();
 
             token.setRepeatGreedy(false);
@@ -1351,14 +1351,14 @@ class Lexer extends ScannerSupport {
             possessiveCheck();
         }
     }
-    
+
     private void possessiveCheck() {
-        if (left() && peekIs('+') && 
+        if (left() && peekIs('+') &&
             (syntax.op2PlusPossessiveRepeat() && token.type != TokenType.INTERVAL ||
              syntax.op2PlusPossessiveInterval() && token.type == TokenType.INTERVAL)) {
-            
+
             fetch();
-            
+
             token.setRepeatGreedy(true);
             token.setRepeatPossessive(true);
         } else {
diff --git a/src/org/joni/Parser.java b/src/org/joni/Parser.java
index a787d16..71d29fd 100644
--- a/src/org/joni/Parser.java
+++ b/src/org/joni/Parser.java
@@ -1,20 +1,20 @@
 /*
- * Permission is hereby granted, free of charge, to any person obtaining a copy of 
- * this software and associated documentation files (the "Software"), to deal in 
- * the Software without restriction, including without limitation the rights to 
- * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  * of the Software, and to permit persons to whom the Software is furnished to do
  * so, subject to the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 package org.joni;
@@ -49,28 +49,28 @@ class Parser extends Lexer {
 
     protected final Regex regex;
     protected Node root;
-    
+
     protected int returnCode; // return code used by parser methods (they itself return parsed nodes)
-                              // this approach will not affect recursive calls 
-    
+                              // this approach will not affect recursive calls
+
     protected Parser(ScanEnvironment env, byte[]bytes, int p, int end) {
         super(env, bytes, p, end);
         regex = env.reg;
     }
-    
+
     // onig_parse_make_tree
     protected final Node parse() {
         root = parseRegexp();
         regex.numMem = env.numMem;
         return root;
     }
-    
+
     private static final int POSIX_BRACKET_NAME_MIN_LEN            = 4;
     private static final int POSIX_BRACKET_CHECK_LIMIT_LENGTH      = 20;
     private static final byte BRACKET_END[]                        = ":]".getBytes();
     private boolean parsePosixBracket(CClassNode cc) {
         mark();
-        
+
         boolean not;
         if (peekIs('^')) {
             inc();
@@ -94,7 +94,7 @@ class Parser extends Lexer {
                     return false;
                 }
             }
-            
+
         }
 
         // not_posix_bracket:
@@ -104,7 +104,7 @@ class Parser extends Lexer {
             inc();
             if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
         }
-        
+
         if (c == ':' && left()) {
             inc();
             if (left()) {
@@ -115,7 +115,7 @@ class Parser extends Lexer {
         restore();
         return true; /* 1: is not POSIX bracket, but no error. */
     }
-    
+
     private CClassNode parseCharProperty() {
         int ctype = fetchCharPropertyToCType();
         CClassNode n = new CClassNode();
@@ -123,28 +123,28 @@ class Parser extends Lexer {
         if (token.getPropNot()) n.setNot();
         return n;
     }
-    
+
     private boolean codeExistCheck(int code, boolean ignoreEscaped) {
         mark();
-        
+
         boolean inEsc = false;
         while(left()) {
-            if (ignoreEscaped && inEsc) { 
+            if (ignoreEscaped && inEsc) {
                 inEsc = false;
             } else {
                 fetch();
                 if (c == code) {
                     restore();
-                    return true; 
+                    return true;
                 }
                 if (c == syntax.metaCharTable.esc) inEsc = true;
             }
         }
-        
+
         restore();
         return false;
     }
-    
+
     private CClassNode parseCharClass() {
         fetchTokenInCC();
 
@@ -155,35 +155,45 @@ class Parser extends Lexer {
         } else {
             neg = false;
         }
-        
+
         if (token.type == TokenType.CC_CLOSE) {
             if (!codeExistCheck(']', true)) newSyntaxException(ERR_EMPTY_CHAR_CLASS);
             env.ccEscWarn("]");
             token.type = TokenType.CHAR; /* allow []...] */
         }
-        
+
         CClassNode cc = new CClassNode();
         CClassNode prevCC = null;
         CClassNode workCC = null;
 
         CCStateArg arg = new CCStateArg();
-        
+
         boolean andStart = false;
         arg.state = CCSTATE.START;
 
         while(token.type != TokenType.CC_CLOSE) {
             boolean fetched = false;
-            
+
             switch (token.type) {
-            
+
             case CHAR:
-                    int len = enc.codeToMbcLength(token.getC());
-                    if (len > 1) { 
-                        arg.inType = CCVALTYPE.CODE_POINT;
-                    } else {
-                        // !sb_char:!
-                        arg.inType = CCVALTYPE.SB;
-                    }
+                    int len;
+//                    if (Config.VANILLA) {
+                        len = enc.codeToMbcLength(token.getC());
+                        if (len > 1) {
+                            arg.inType = CCVALTYPE.CODE_POINT;
+                        } else {
+                            // !sb_char:!
+                            arg.inType = CCVALTYPE.SB;
+                        }
+//                    } else {
+//                        if (token.getCode() >= BitSet.SINGLE_BYTE_SIZE || (len = enc.codeToMbcLength(token.getC())) > 1) {
+//                            arg.inType = CCVALTYPE.CODE_POINT;
+//                        } else {
+//                            // !sb_char:!
+//                            arg.inType = CCVALTYPE.SB;
+//                        }
+//                    }
                     arg.v = token.getC();
                     arg.vIsRaw = false;
                     // !goto val_entry2;!
@@ -207,9 +217,9 @@ class Parser extends Lexer {
                         buf[i] = (byte)token.getC();
                     }
                     if (i < enc.minLength()) newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);
-                    
+
                     len = enc.length(buf, 0, i);
-                    if (i < len) { 
+                    if (i < len) {
                         newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);
                     } else if (i > len) { /* fetch back */
                         p = psave;
@@ -233,7 +243,7 @@ class Parser extends Lexer {
                 // !goto val_entry2;!
                 valEntry2(cc, arg);
                 break;
-                
+
             case CODE_POINT:
                 arg.v = token.getCode();
                 arg.vIsRaw = true;
@@ -241,7 +251,7 @@ class Parser extends Lexer {
                 // !val_entry2:!
                 valEntry(cc, arg);
                 break;
-                
+
             case POSIX_BRACKET_OPEN:
                 if (parsePosixBracket(cc)) { /* true: is not POSIX bracket */
                     env.ccEscWarn("[");
@@ -255,20 +265,20 @@ class Parser extends Lexer {
                 // !goto next_class;!
                 cc.nextStateClass(arg, env);
                 break;
-                
+
             case CHAR_TYPE:
                 cc.addCType(token.getPropCType(), token.getPropNot(), env, this);
                 // !next_class:!
                 cc.nextStateClass(arg, env);
                 break;
-                
+
             case CHAR_PROPERTY:
                 int ctype = fetchCharPropertyToCType();
                 cc.addCType(ctype, token.getPropNot(), env, this);
                 // !goto next_class;!
                 cc.nextStateClass(arg, env);
                 break;
-                
+
             case CC_RANGE:
                 if (arg.state == CCSTATE.VALUE) {
                     fetchTokenInCC();
@@ -315,7 +325,7 @@ class Parser extends Lexer {
                         rangeEndVal(cc, arg);
                         break;
                     }
-                    
+
                     if (syntax.allowDoubleRangeOpInCC()) {
                         env.ccEscWarn("-");
                         /* [0-9-a] is allowed as [0-9\-a] */
@@ -326,12 +336,12 @@ class Parser extends Lexer {
                     newSyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
                 }
                 break;
-                
+
             case CC_CC_OPEN: /* [ */
                 CClassNode acc = parseCharClass();
                 cc.or(acc, enc);
                 break;
-                
+
             case CC_AND:     /* && */
                 if (arg.state == CCSTATE.VALUE) {
                     arg.v = 0; // ??? safe v ?
@@ -349,36 +359,36 @@ class Parser extends Lexer {
                     cc = workCC;
                 }
                 // initialize_cclass(cc); // clear it ??
-                break;              
-                
+                break;
+
             case EOT:
                 newSyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS);
-                
-            default:                
-                newInternalException(ERR_PARSER_BUG);           
+
+            default:
+                newInternalException(ERR_PARSER_BUG);
             } // switch
-            
+
             if (!fetched) fetchTokenInCC();
-            
+
         } // while
-        
+
         if (arg.state == CCSTATE.VALUE) {
             arg.v = 0; // ??? safe v ?
             arg.vIsRaw = false;
             cc.nextStateValue(arg, env);
         }
-        
+
         if (prevCC != null) {
             prevCC.and(cc, enc);
             cc = prevCC;
         }
-        
+
         if (neg) {
             cc.setNot();
         } else {
             cc.clearNot();
         }
-        
+
         if (cc.isNot() && syntax.notNewlineInNegativeCC()) {
             if (!cc.isEmpty()) {
                 final int NEW_LINE = 0x0a;
@@ -391,21 +401,21 @@ class Parser extends Lexer {
                 }
             }
         }
-        
+
         return cc;
     }
-    
+
     private void valEntry2(CClassNode cc, CCStateArg arg) {
         cc.nextStateValue(arg, env);
     }
-    
+
     private void valEntry(CClassNode cc, CCStateArg arg) {
         int len = enc.codeToMbcLength(arg.v);
         arg.inType = len == 1 ? CCVALTYPE.SB : CCVALTYPE.CODE_POINT;
         // !val_entry2:!
         valEntry2(cc, arg);
     }
-    
+
     private void sbChar(CClassNode cc, CCStateArg arg) {
         arg.inType = CCVALTYPE.SB;
         arg.v = token.getC();
@@ -420,20 +430,20 @@ class Parser extends Lexer {
         // !goto val_entry;!
         valEntry(cc, arg);
     }
-    
+
     private Node parseEnclose(TokenType term) {
         Node node = null;
-        
+
         if (!left()) newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
-        
+
         int option = env.option;
-        
-        if (peekIs('?') && syntax.op2QMarkGroupEffect()) { 
+
+        if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
             inc();
             if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
-            
+
             boolean listCapture = false;
-            
+
             fetch();
             switch(c) {
             case ':':  /* (?:...) grouping only */
@@ -442,19 +452,19 @@ class Parser extends Lexer {
                 node = parseSubExp(term);
                 returnCode = 1; /* group */
                 return node;
-                
+
             case '=':
                 node = new AnchorNode(AnchorType.PREC_READ);
                 break;
-                
+
             case '!':  /*         preceding read */
                 node = new AnchorNode(AnchorType.PREC_READ_NOT);
                 break;
-                
+
             case '>':  /* (?>...) stop backtrack */
                 node = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
                 break;
-                
+
             case '\'':
                 if (Config.USE_NAMED_GROUP) {
                     if (syntax.op2QMarkLtNamedGroup()) {
@@ -479,7 +489,7 @@ class Parser extends Lexer {
                         if (syntax.op2QMarkLtNamedGroup()) {
                             unfetch();
                             c = '<';
-                            
+
                             // !named_group1:!
                             listCapture = false;
                             // !named_group2:!
@@ -488,17 +498,17 @@ class Parser extends Lexer {
                         } else {
                             newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
                         }
-                        
+
                     } else { // USE_NAMED_GROUP
                         newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
                     } // USE_NAMED_GROUP
                 }
                 break;
-                
+
             case '@':
-                if (syntax.op2AtMarkCaptureHistory()) {                 
+                if (syntax.op2AtMarkCaptureHistory()) {
                     if (Config.USE_NAMED_GROUP) {
-                        if (syntax.op2QMarkLtNamedGroup()) {                        
+                        if (syntax.op2QMarkLtNamedGroup()) {
                             fetch();
                             if (c == '<' || c == '\'') {
                                 listCapture = true;
@@ -518,7 +528,7 @@ class Parser extends Lexer {
                     newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
                 }
                 break;
-                
+
             // case 'p': #ifdef USE_POSIXLINE_OPTION
             case '-':
             case 'i':
@@ -531,19 +541,19 @@ class Parser extends Lexer {
                     case ':':
                     case ')':
                         break;
-                        
+
                     case '-':
                         neg = true;
                         break;
-                        
+
                     case 'x':
                         option = bsOnOff(option, Option.EXTEND, neg);
                         break;
-                        
+
                     case 'i':
                         option = bsOnOff(option, Option.IGNORECASE, neg);
                         break;
-                        
+
                     case 's':
                         if (syntax.op2OptionPerl()) {
                             option = bsOnOff(option, Option.MULTILINE, neg);
@@ -551,7 +561,7 @@ class Parser extends Lexer {
                             newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
                         }
                         break;
-                        
+
                     case 'm':
                         if (syntax.op2OptionPerl()) {
                             option = bsOnOff(option, Option.SINGLELINE, !neg);
@@ -561,15 +571,15 @@ class Parser extends Lexer {
                             newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
                         }
                         break;
-                        
+
                     // case 'p': #ifdef USE_POSIXLINE_OPTION // not defined
                     // option = bsOnOff(option, Option.MULTILINE|Option.SINGLELINE, neg);
                     // break;
-                    
+
                     default:
                         newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
                     } // switch
-                    
+
                     if (c == ')') {
                         EncloseNode en = new EncloseNode(option, 0); // node_new_option
                         node = en;
@@ -590,11 +600,11 @@ class Parser extends Lexer {
                     if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
                     fetch();
                 } // while
-            
+
             default:
                 newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
             } // switch
-            
+
         } else {
             if (isDontCaptureGroup(env.option)) {
                 // !goto group;!
@@ -608,7 +618,7 @@ class Parser extends Lexer {
             en.regNum = num;
             node = en;
         }
-        
+
         fetchToken();
         Node target = parseSubExp(term);
 
@@ -626,25 +636,25 @@ class Parser extends Lexer {
         returnCode = 0;
         return node; // ??
     }
-    
+
     private Node namedGroup2(boolean listCapture) {
         int nm = p;
         int num = fetchName(c, false);
         int nameEnd = value;
         num = env.addMemEntry();
         if (listCapture && num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
-        
+
         regex.nameAdd(bytes, nm, nameEnd, num, syntax);
         EncloseNode en = new EncloseNode(env.option, true); // node_new_enclose_memory
         en.regNum = num;
 
         Node node = en;
-        
+
         if (listCapture) env.captureHistory = bsOnAtSimple(env.captureHistory, num);
         env.numNamed++;
         return node;
     }
-    
+
     private int nextChar; // hidden var
     private int findStrPosition(int[]s, int n, int from, int to) {
         int x;
@@ -661,7 +671,7 @@ class Parser extends Lexer {
                     q += enc.length(bytes, q, to);
                 }
                 if (i >= n) {
-                    if (bytes[nextChar] != 0) nextChar = q; // we may need zero term semantics... 
+                    if (bytes[nextChar] != 0) nextChar = q; // we may need zero term semantics...
                     return p;
                 }
             }
@@ -669,13 +679,13 @@ class Parser extends Lexer {
         }
         return -1;
     }
-    
+
     private Node parseExp(TokenType term) {
         if (token.type == term) {
             //!goto end_of_token;!
             return new StringNode();
         }
-        
+
         Node node = null;
         boolean group = false;
 
@@ -684,7 +694,7 @@ class Parser extends Lexer {
         case EOT:
             // !end_of_token:!
             return new StringNode(); // node_new_empty
-            
+
         case SUBEXP_OPEN:
             node = parseEnclose(TokenType.SUBEXP_CLOSE);
             if (returnCode == 1) {
@@ -697,13 +707,13 @@ class Parser extends Lexer {
                 Node target = parseSubExp(term);
                 env.option = prev;
                 en.setTarget(target);
-                return node;                
+                return node;
             }
             break;
-            
+
         case SUBEXP_CLOSE:
             if (!syntax.allowUnmatchedCloseSubexp()) newSyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS);
-            
+
             if (token.escaped) {
                 // !goto tk_raw_byte;!
                 return parseExpTkRawByte(group);
@@ -711,22 +721,22 @@ class Parser extends Lexer {
                 // !goto tk_byte;!
                 return parseExpTkByte(group);
             }
-            
+
         case STRING:
             // !tk_byte:!
             return parseExpTkByte(group);
-            
+
         case RAW_BYTE:
             // !tk_raw_byte:!
             return parseExpTkRawByte(group);
-            
+
         case CODE_POINT:
             byte[]buf = new byte[Config.ENC_CODE_TO_MBC_MAXLEN];
             int num = enc.codeToMbc(token.getCode(), buf, 0);
-            // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else 
+            // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else
             node = new StringNode(buf, 0, num);
             break;
-            
+
         case QUOTE_OPEN:
             int[]endOp = new int[]{syntax.metaCharTable.esc, 'E'};
             int qstart = p;
@@ -740,10 +750,21 @@ class Parser extends Lexer {
 
         case CHAR_TYPE:
             switch(token.getPropCType()) {
+            case CharacterType.D:
+            case CharacterType.S:
+            case CharacterType.W:
+                if (Config.NON_UNICODE_SDW) {
+                    CClassNode cc = new CClassNode();
+                    cc.addCType(token.getPropCType(), false, env, this);
+                    if (token.getPropNot()) cc.setNot();
+                    node = cc;
+                }
+                break;
+
             case CharacterType.WORD:
                 node = new CTypeNode(token.getPropCType(), token.getPropNot());
                 break;
-            
+
             case CharacterType.SPACE:
             case CharacterType.DIGIT:
             case CharacterType.XDIGIT:
@@ -753,41 +774,41 @@ class Parser extends Lexer {
                 if (token.getPropNot()) ccn.setNot();
                 node = ccn;
                 break;
-                
+
             default:
                 newInternalException(ERR_PARSER_BUG);
-                
+
             } // inner switch
             break;
-            
+
         case CHAR_PROPERTY:
             node = parseCharProperty();
             break;
-            
+
         case CC_CC_OPEN:
             CClassNode cc = parseCharClass();
             node = cc;
             if (isIgnoreCase(env.option)) {
                 ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc);
                 enc.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg);
-            
+
                 if (arg.altRoot != null) {
                     node = ConsAltNode.newAltNode(node, arg.altRoot);
                 }
             }
             break;
-            
+
         case ANYCHAR:
             node = new AnyCharNode();
             break;
-            
+
         case ANYCHAR_ANYTIME:
             node = new AnyCharNode();
             QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
             qn.setTarget(node);
             node = qn;
             break;
-            
+
         case BACKREF:
             int[]backRefs = token.getBackrefNum() > 1 ? token.getBackrefRefs() : new int[]{token.getBackrefRef1()};
             node = new BackRefNode(token.getBackrefNum(),
@@ -796,9 +817,9 @@ class Parser extends Lexer {
                             token.getBackrefExistLevel(), // #ifdef USE_BACKREF_AT_LEVEL
                             token.getBackrefLevel(),      // ...
                             env);
-            
+
             break;
-            
+
         case CALL:
             if (Config.USE_SUBEXP_CALL) {
                 int gNum = token.getCallGNum();
@@ -816,7 +837,7 @@ class Parser extends Lexer {
         case ANCHOR:
             node = new AnchorNode(token.getAnchor()); // possible bug in oniguruma
             break;
-            
+
         case OP_REPEAT:
         case INTERVAL:
             if (syntax.contextIndepRepeatOps()) {
@@ -830,75 +851,75 @@ class Parser extends Lexer {
                 return parseExpTkByte(group);
             }
             break;
-            
+
         default:
             newInternalException(ERR_PARSER_BUG);
         } //switch
-        
+
         //targetp = node;
-        
+
         // !re_entry:!
         fetchToken();
-        
+
         // !repeat:!
         return parseExpRepeat(node, group);
     }
-    
+
     private Node parseExpTkByte(boolean group) {
         // !tk_byte:!
         StringNode node = new StringNode(bytes, token.backP, p);
         while (true) {
             fetchToken();
             if (token.type != TokenType.STRING) break;
-            
+
             if (token.backP == node.end) {
                 node.end = p; // non escaped character, remain shared, just increase shared range
             } else {
-                node.cat(bytes, token.backP, p); // non continuous string stream, need to COW 
+                node.cat(bytes, token.backP, p); // non continuous string stream, need to COW
             }
-        }       
+        }
         // !string_end:!
         // targetp = node;
         // !goto repeat;!
         return parseExpRepeat(node, group);
     }
-    
+
     private Node parseExpTkRawByte(boolean group) {
         // !tk_raw_byte:!
 
         // important: we don't use 0xff mask here neither in the compiler
         // (in the template string) so we won't have to mask target
-        // strings when comparing against them in the matcher 
+        // strings when comparing against them in the matcher
         StringNode node = new StringNode((byte)token.getC());
         node.setRaw();
 
-        int len = 1;            
+        int len = 1;
         while (true) {
-            if (len >= enc.minLength()) {               
-                if (len == enc.length(node.bytes, node.p, node.end)) {                  
+            if (len >= enc.minLength()) {
+                if (len == enc.length(node.bytes, node.p, node.end)) {
                     fetchToken();
                     node.clearRaw();
                     // !goto string_end;!
                     return parseExpRepeat(node, group);
                 }
             }
-            
+
             fetchToken();
             if (token.type != TokenType.RAW_BYTE) {
                 /* Don't use this, it is wrong for little endian encodings. */
                 // USE_PAD_TO_SHORT_BYTE_CHAR ...
-                
+
                 newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);
             }
 
             // important: we don't use 0xff mask here neither in the compiler
             // (in the template string) so we won't have to mask target
-            // strings when comparing against them in the matcher 
+            // strings when comparing against them in the matcher
             node.cat((byte)token.getC());
             len++;
         } // while
     }
-    
+
     private Node parseExpRepeat(Node target, boolean group) {
         // !repeat:!
         while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) {
@@ -907,11 +928,11 @@ class Parser extends Lexer {
             QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
                                                      token.getRepeatUpper(),
                                                      token.type == TokenType.INTERVAL);
-            
+
             qtfr.greedy = token.getRepeatGreedy();
             int ret = qtfr.setQuantifier(target, group, env, bytes, getBegin(), getEnd());
             Node qn = qtfr;
-            
+
             if (token.getRepeatPossessive()) {
                 EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
                 en.setTarget(qn);
@@ -923,7 +944,7 @@ class Parser extends Lexer {
             } else if (ret == 2) { /* split case: /abc+/ */
                 target = ConsAltNode.newListNode(target, null);
                 ConsAltNode tmp = ((ConsAltNode)target).setCdr(ConsAltNode.newListNode(qn, null));
-                
+
                 fetchToken();
                 return parseExpRepeatForCar(target, tmp, group);
             }
@@ -941,11 +962,11 @@ class Parser extends Lexer {
             QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
                                                      token.getRepeatUpper(),
                                                      token.type == TokenType.INTERVAL);
-            
+
             qtfr.greedy = token.getRepeatGreedy();
             int ret = qtfr.setQuantifier(target.car, group, env, bytes, getBegin(), getEnd());
             Node qn = qtfr;
-            
+
             if (token.getRepeatPossessive()) {
                 EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
                 en.setTarget(qn);
@@ -961,7 +982,7 @@ class Parser extends Lexer {
             fetchToken();
         }
         return top;
-    }   
+    }
 
     private Node parseBranch(TokenType term) {
         Node node = parseExp(term);
@@ -971,13 +992,13 @@ class Parser extends Lexer {
         } else {
             ConsAltNode top = ConsAltNode.newListNode(node, null);
             ConsAltNode t = top;
-            
+
             while (token.type != TokenType.EOT && token.type != term && token.type != TokenType.ALT) {
                 node = parseExp(term);
                 if (node.getType() == NodeType.LIST) {
                     t.setCdr((ConsAltNode)node);
                     while (((ConsAltNode)node).cdr != null ) node = ((ConsAltNode)node).cdr;
-                    
+
                     t = ((ConsAltNode)node);
                 } else {
                     t.setCdr(ConsAltNode.newListNode(node, null));
@@ -987,7 +1008,7 @@ class Parser extends Lexer {
             return top;
         }
     }
-    
+
     /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
     private Node parseSubExp(TokenType term) {
         Node node = parseBranch(term);
@@ -1000,11 +1021,11 @@ class Parser extends Lexer {
             while (token.type == TokenType.ALT) {
                 fetchToken();
                 node = parseBranch(term);
-                
+
                 t.setCdr(ConsAltNode.newAltNode(node, null));
                 t = t.cdr;
             }
-            
+
             if (token.type != term) parseSubExpError(term);
             return top;
         } else {
@@ -1012,7 +1033,7 @@ class Parser extends Lexer {
             return null; //not reached
         }
     }
-    
+
     private void parseSubExpError(TokenType term) {
         if (term == TokenType.SUBEXP_CLOSE) {
             newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
@@ -1020,7 +1041,7 @@ class Parser extends Lexer {
             newInternalException(ERR_PARSER_BUG);
         }
     }
-    
+
     private Node parseRegexp() {
         fetchToken();
         return parseSubExp(TokenType.EOT);
diff --git a/src/org/joni/ast/CClassNode.java b/src/org/joni/ast/CClassNode.java
index c05c9f3..86c82fb 100644
--- a/src/org/joni/ast/CClassNode.java
+++ b/src/org/joni/ast/CClassNode.java
@@ -22,8 +22,10 @@ package org.joni.ast;
 import org.jcodings.CodeRange;
 import org.jcodings.Encoding;
 import org.jcodings.IntHolder;
+import org.jcodings.ascii.AsciiTables;
 import org.jcodings.constants.CharacterType;
 import org.jcodings.exception.EncodingException;
+import org.jcodings.specific.ASCIIEncoding;
 import org.joni.BitSet;
 import org.joni.CodeRangeBuffer;
 import org.joni.Config;
@@ -326,8 +328,29 @@ public final class CClassNode extends Node {
     public void addCType(int ctype, boolean not, ScanEnvironment env, IntHolder sbOut) {
         Encoding enc = env.enc;
 
-        int[]ranges = enc.ctypeCodeRange(ctype, sbOut);
+        if (Config.NON_UNICODE_SDW) {
+            switch(ctype) {
+            case CharacterType.D:
+            case CharacterType.S:
+            case CharacterType.W:
+                ctype ^= CharacterType.SPECIAL_MASK;
+                if (not) {
+                    for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
+                        if (!ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
+                        //if ((AsciiTables.AsciiCtypeTable[c] & (1 << ctype)) == 0) bs.set(c);
+                    }
+                    addAllMultiByteRange(enc);
+                } else {
+                    for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
+                        if (ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
+                        //if ((AsciiTables.AsciiCtypeTable[c] & (1 << ctype)) != 0) bs.set(c);
+                    }
+                }
+                return;
+            }
+        }
 
+        int[]ranges = enc.ctypeCodeRange(ctype, sbOut);
         if (ranges != null) {
             addCTypeByRange(ctype, not, enc, sbOut.value, ranges);
             return;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jruby-joni.git



More information about the pkg-java-commits mailing list