[jruby-joni] 28/279: Big step on oniguruma/1.9 changes chase. Added length sanity checks for most encodings (all joni internals synced to length(byte[]bytes, int p, int end)) and a couple of additional encodings. Lots of megamorphic callsites removal. VANILLA flag introduced which indicates pure oniguruma or the one with 1.9 modifications.

Hideki Yamane henrich at moszumanska.debian.org
Mon Nov 16 11:26:34 UTC 2015


This is an automated email from the git hooks/post-receive script.

henrich pushed a commit to branch debian/sid
in repository jruby-joni.

commit a0bd128689d95a77830a6db01e02f17a69e5fe83
Author: Marcin Mielżyński <lopx at gazeta.pl>
Date:   Mon Aug 18 23:10:17 2008 +0000

    Big step on oniguruma/1.9 changes chase. Added length sanity checks for most encodings (all joni internals synced to length(byte[]bytes, int p, int end)) and a couple of additional encodings. Lots of megamorphic callsites removal. VANILLA flag introduced which indicates pure oniguruma or the one with 1.9 modifications.
    
    git-svn-id: http://svn.codehaus.org/jruby/joni/trunk@7495 961051c9-f516-0410-bf72-c9f7e237a7b7
---
 src/org/joni/Analyser.java                         |  16 +-
 src/org/joni/ArrayCompiler.java                    |   4 +-
 src/org/joni/ByteCodeMachine.java                  |  83 ++--
 src/org/joni/ByteCodePrinter.java                  |   5 +-
 src/org/joni/Compiler.java                         |   4 +-
 src/org/joni/Config.java                           |   2 +
 src/org/joni/Lexer.java                            |  14 +-
 src/org/joni/Matcher.java                          |  20 +-
 src/org/joni/OptAnchorInfo.java                    |   4 +-
 src/org/joni/OptExactInfo.java                     |  13 +-
 src/org/joni/Parser.java                           |  16 +-
 src/org/joni/Regex.java                            |   2 +-
 src/org/joni/ScannerSupport.java                   |   6 +-
 src/org/joni/SearchAlgorithm.java                  |  10 +-
 src/org/joni/ast/StringNode.java                   |   2 +-
 src/org/joni/encoding/AbstractEncoding.java        |   3 +-
 ...cEncoding.java => CanBeTrailTableEncoding.java} |  39 +-
 src/org/joni/encoding/Encoding.java                |  80 +++-
 src/org/joni/encoding/EucEncoding.java             |  19 +-
 src/org/joni/encoding/MultiByteEncoding.java       | 121 ++++-
 src/org/joni/encoding/SingleByteEncoding.java      |  37 +-
 src/org/joni/encoding/specific/BIG5Encoding.java   | 118 ++---
 src/org/joni/encoding/specific/CP1251Encoding.java |   3 +-
 .../{BIG5Encoding.java => CP949Encoding.java}      | 132 +++---
 src/org/joni/encoding/specific/EUCCNEncoding.java  |  93 ----
 src/org/joni/encoding/specific/EUCJPEncoding.java  | 110 +++--
 src/org/joni/encoding/specific/EUCKREncoding.java  |  64 ++-
 src/org/joni/encoding/specific/EUCTWEncoding.java  | 112 ++++-
 .../joni/encoding/specific/GB18030Encoding.java    | 516 +++++++++++++++++++++
 .../{BIG5Encoding.java => GBKEncoding.java}        | 136 +++---
 src/org/joni/encoding/specific/KOI8Encoding.java   |   2 +-
 .../{KOI8Encoding.java => KOI8REncoding.java}      |  88 +---
 .../{KOI8Encoding.java => KOI8UEncoding.java}      |  94 ++--
 src/org/joni/encoding/specific/SJISEncoding.java   | 148 +++---
 .../joni/encoding/specific/UTF16BEEncoding.java    |  91 ++--
 .../joni/encoding/specific/UTF16LEEncoding.java    |  93 ++--
 .../joni/encoding/specific/UTF32BEEncoding.java    |  28 +-
 .../joni/encoding/specific/UTF32LEEncoding.java    |  32 +-
 src/org/joni/encoding/specific/UTF8Encoding.java   | 197 +++++++-
 src/org/joni/encoding/unicode/UnicodeEncoding.java |  25 +-
 src/org/joni/exception/ErrorMessages.java          |   2 +
 .../IllegalCharacterException.java}                |  69 ++-
 42 files changed, 1736 insertions(+), 917 deletions(-)

diff --git a/src/org/joni/Analyser.java b/src/org/joni/Analyser.java
index 9cfe0dd..051c932 100644
--- a/src/org/joni/Analyser.java
+++ b/src/org/joni/Analyser.java
@@ -1482,24 +1482,22 @@ final class Analyser extends Parser {
     private static final int THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION = 8;
     private void expandCaseFoldString(Node node) {
         StringNode sn = (StringNode)node;
-        
-        if (sn.isAmbig()) return;
-        if (sn.length() <= 0) return;
-        
-        
+
+        if (sn.isAmbig() || sn.length() <= 0) return;
+
         byte[]bytes = sn.bytes;
         int p = sn.p;
         int end = sn.end;
         int altNum = 1;
-        
+
         ConsAltNode topRoot = null, root = null;
         Node[]prevNode = new Node[]{null};
         StringNode snode = null;
-        
+
         while (p < end) {
             CaseFoldCodeItem[]items = enc.caseFoldCodesByString(regex.caseFoldFlag, bytes, p, end);
-            int len = enc.length(bytes[p]);
-            
+            int len = enc.length(bytes, p, end);
+
             if (items.length == 0) {
                 if (snode == null) {
                     if (root == null && prevNode[0] != null) {
diff --git a/src/org/joni/ArrayCompiler.java b/src/org/joni/ArrayCompiler.java
index 430aa11..b9332bc 100644
--- a/src/org/joni/ArrayCompiler.java
+++ b/src/org/joni/ArrayCompiler.java
@@ -208,14 +208,14 @@ final class ArrayCompiler extends Compiler {
         p = prev = sn.p;
         int end = sn.end;
         byte[]bytes = sn.bytes;
-        int prevLen = enc.length(bytes[p]);
+        int prevLen = enc.length(bytes, p, end);
         p += prevLen;
         
         int slen = 1;
         int rlen = 0;
         
         while (p < end) {
-            int len = enc.length(bytes[p]);
+            int len = enc.length(bytes, p, end);
             if (len == prevLen) {
                 slen++;
             } else {
diff --git a/src/org/joni/ByteCodeMachine.java b/src/org/joni/ByteCodeMachine.java
index 0aa1bf8..ce03ba9 100644
--- a/src/org/joni/ByteCodeMachine.java
+++ b/src/org/joni/ByteCodeMachine.java
@@ -155,7 +155,7 @@ class ByteCodeMachine extends StackMachine {
             Config.log.printf("%4d", (s - str)).print("> \"");
             int q, i;
             for (i=0, q=s; i<7 && q<end && s>=0; i++) {
-                int len = enc.length(bytes[q]);
+                int len = enc.length(bytes, q, end);
                 while (len-- > 0) if (q < end) Config.log.print(new String(new byte[]{bytes[q++]}));
             }
             String str = q < end ? "...\"" : "\"";
@@ -621,7 +621,7 @@ class ByteCodeMachine extends StackMachine {
     private void opCClass() {
         if (s >= range || !isInBitSet()) {opFail(); return;}
         ip += BitSet.BITSET_SIZE;
-        s += enc.length(bytes[s]); /* OP_CCLASS can match mb-code. \D, \S */
+        s += enc.length(bytes, s, end); /* OP_CCLASS can match mb-code. \D, \S */
         sprev = sbegin; // break;
     }
     
@@ -635,7 +635,7 @@ class ByteCodeMachine extends StackMachine {
     private boolean isInClassMB() {
         int tlen = code[ip++];        
         if (s >= range) return false;
-        int mbLen = enc.length(bytes[s]);
+        int mbLen = enc.length(bytes, s, end);
         if (s + mbLen > range) return false;
         int ss = s;
         s += mbLen;
@@ -647,14 +647,14 @@ class ByteCodeMachine extends StackMachine {
     
     private void opCClassMB() {
         // beyond string check 
-        if (s >= range || !enc.isMbcHead(bytes[s])) {opFail(); return;}
+        if (s >= range || !enc.isMbcHead(bytes, s, end)) {opFail(); return;}
         if (!isInClassMB()) {opFail(); return;} // not!!!
         sprev = sbegin; // break;
     }
     
     private void opCClassMIX() {
         if (s >= range) {opFail(); return;}
-        if (enc.isMbcHead(bytes[s])) {
+        if (enc.isMbcHead(bytes, s, end)) {
             ip += BitSet.BITSET_SIZE;
             if (!isInClassMB()) {opFail(); return;}
         } else {
@@ -670,7 +670,7 @@ class ByteCodeMachine extends StackMachine {
     private void opCClassNot() {
         if (s >= range || isInBitSet()) {opFail(); return;}
         ip += BitSet.BITSET_SIZE;
-        s += enc.length(bytes[s]);
+        s += enc.length(bytes, s, end);
         sprev = sbegin; // break;
     }
     
@@ -683,7 +683,7 @@ class ByteCodeMachine extends StackMachine {
     
     private boolean isNotInClassMB() {
         int tlen = code[ip++];
-        int mbLen = enc.length(bytes[s]);
+        int mbLen = enc.length(bytes, s, end);
         
         if (!(s + mbLen <= range)) {
             if (s >= range) return false;
@@ -703,7 +703,7 @@ class ByteCodeMachine extends StackMachine {
     
     private void opCClassMBNot() {
         if (s >= range) {opFail(); return;}
-        if (!enc.isMbcHead(bytes[s])) {
+        if (!enc.isMbcHead(bytes, s, end)) {
             s++;
             int tlen = code[ip++];
             ip += tlen;
@@ -716,7 +716,7 @@ class ByteCodeMachine extends StackMachine {
     
     private void opCClassMIXNot() {
         if (s >= range) {opFail(); return;}
-        if (enc.isMbcHead(bytes[s])) {
+        if (enc.isMbcHead(bytes, s, end)) {
             ip += BitSet.BITSET_SIZE;
             if (!isNotInClassMB()) {opFail(); return;}
         } else {
@@ -732,7 +732,7 @@ class ByteCodeMachine extends StackMachine {
     private void opCClassNode() {
         if (s >= range) {opFail(); return;}
         CClassNode cc = (CClassNode)regex.operands[code[ip++]];
-        int mbLen = enc.length(bytes[s]);
+        int mbLen = enc.length(bytes, s, end);
         int ss = s;
         s += mbLen;
         if (s > range) {opFail(); return;}
@@ -743,7 +743,7 @@ class ByteCodeMachine extends StackMachine {
     
     private void opAnyChar() {
         if (s >= range) {opFail(); return;}
-        int n = enc.length(bytes[s]);
+        int n = enc.length(bytes, s, end);
         if (s + n > range) {opFail(); return;}
         if (enc.isNewLine(bytes, s, end)) {opFail(); return;}
         s += n;
@@ -759,7 +759,7 @@ class ByteCodeMachine extends StackMachine {
     
     private void opAnyCharML() {
         if (s >= range) {opFail(); return;}
-        int n = enc.length(bytes[s]);
+        int n = enc.length(bytes, s, end);
         if (s + n > range) {opFail(); return;}
         s += n;        
         sprev = sbegin; // break;        
@@ -775,7 +775,7 @@ class ByteCodeMachine extends StackMachine {
         final byte[]bytes = this.bytes;
         while (s < range) {
             pushAlt(ip, s, sprev);
-            int n = enc.length(bytes[s]);
+            int n = enc.length(bytes, s, end);
             if (s + n > range) {opFail(); return;}
             if (enc.isNewLine(bytes, s, end)) {opFail(); return;}
             sprev = s;
@@ -799,7 +799,7 @@ class ByteCodeMachine extends StackMachine {
         final byte[]bytes = this.bytes;
         while (s < range) {
             pushAlt(ip, s, sprev);
-            int n = enc.length(bytes[s]);
+            int n = enc.length(bytes, s, end);
             if (s + n > range) {opFail(); return;}
             sprev = s;
             s += n;
@@ -821,11 +821,9 @@ class ByteCodeMachine extends StackMachine {
         final byte[]bytes = this.bytes;
         
         while (s < range) {
-            byte b = bytes[s];
-            if (c == b) pushAlt(ip + 1, s, sprev);
-            int n = enc.length(b);
-            if (s + n > range) {opFail(); return;}
-            if (enc.isNewLine(bytes, s, end)) {opFail(); return;}
+            if (c == bytes[s]) pushAlt(ip + 1, s, sprev);
+            int n = enc.length(bytes, s, end);
+            if (s + n > range || enc.isNewLine(bytes, s, end)) {opFail(); return;}
             sprev = s;
             s += n;
         }
@@ -854,7 +852,7 @@ class ByteCodeMachine extends StackMachine {
         
         while (s < range) {
             if (c == bytes[s]) pushAlt(ip + 1, s, sprev);
-            int n = enc.length(bytes[s]);
+            int n = enc.length(bytes, s, end);
             if (s + n > range) {opFail(); return;}
             sprev = s;
             s += n;
@@ -884,9 +882,8 @@ class ByteCodeMachine extends StackMachine {
         while (s < range) {
             if (stateCheckVal(s, mem)) {opFail(); return;}
             pushAltWithStateCheck(ip, s, sprev, mem);
-            int n = enc.length(bytes[s]);
-            if (s + n > range) {opFail(); return;}
-            if (enc.isNewLine(bytes, s, end)) {opFail(); return;}
+            int n = enc.length(bytes, s, end);
+            if (s + n > range || enc.isNewLine(bytes, s, end)) {opFail(); return;}
             sprev = s;
             s += n;
         }
@@ -915,7 +912,7 @@ class ByteCodeMachine extends StackMachine {
         while (s < range) {
             if (stateCheckVal(s, mem)) {opFail(); return;}
             pushAltWithStateCheck(ip, s, sprev, mem);
-            int n = enc.length(bytes[s]);
+            int n = enc.length(bytes, s, end);
             if (s + n > range) {opFail(); return;}
             sprev = s;
             s += n;
@@ -936,37 +933,32 @@ class ByteCodeMachine extends StackMachine {
     }
     
     private void opWord() {
-        if (s >= range) {opFail(); return;}
-        if (!enc.isMbcWord(bytes, s, end)) {opFail(); return;}
-        s += enc.length(bytes[s]);
+        if (s >= range || !enc.isMbcWord(bytes, s, end)) {opFail(); return;}
+        s += enc.length(bytes, s, end);
         sprev = sbegin; // break;
     }
 
     private void opWordSb() {
-        if (s >= range) {opFail(); return;}
-        if (!enc.isWord(bytes[s] & 0xff)) {opFail(); return;}
+        if (s >= range || !enc.isWord(bytes[s] & 0xff)) {opFail(); return;}
         s++;        
         sprev = sbegin; // break;
     }
     
     private void opNotWord() {
-        if (s >= range) {opFail(); return;}
-        if (enc.isMbcWord(bytes, s, end)) {opFail(); return;}
-        s += enc.length(bytes[s]);
+        if (s >= range || enc.isMbcWord(bytes, s, end)) {opFail(); return;}
+        s += enc.length(bytes, s, end);
         sprev = sbegin; // break;
     }
     
     private void opNotWordSb() {
-        if (s >= range) {opFail(); return;}
-        if (enc.isWord(bytes[s] & 0xff)) {opFail(); return;}
+        if (s >= range || enc.isWord(bytes[s] & 0xff)) {opFail(); return;}
         s++;
         sprev = sbegin; // break;
     }
     
     private void opWordBound() {
         if (s == str) {
-            if (s >= range) {opFail(); return;}
-            if (!enc.isMbcWord(bytes, s, end)) {opFail(); return;}
+            if (s >= range || !enc.isMbcWord(bytes, s, end)) {opFail(); return;}
         } else if (s == end) {
             if (!enc.isMbcWord(bytes, sprev, end)) {opFail(); return;}
         } else {
@@ -976,8 +968,7 @@ class ByteCodeMachine extends StackMachine {
     
     private void opWordBoundSb() {
         if (s == str) {
-            if (s >= range) {opFail(); return;}
-            if (!enc.isWord(bytes[s] & 0xff)) {opFail(); return;}
+            if (s >= range || !enc.isWord(bytes[s] & 0xff)) {opFail(); return;}
         } else if (s == end) {
             if (sprev >= end || !enc.isWord(bytes[sprev] & 0xff)) {opFail(); return;}
         } else {
@@ -1079,11 +1070,11 @@ class ByteCodeMachine extends StackMachine {
                 if (isNotEol(msaOptions)) opFail();
                 return;
             }
-        } else if (enc.isNewLine(bytes, s, end) && (s + enc.length(bytes[s])) == end) {
+        } else if (enc.isNewLine(bytes, s, end) && (s + enc.length(bytes, s, end)) == end) {
             return;
         } else if (Config.USE_CRNL_AS_LINE_TERMINATOR && enc.isMbcCrnl(bytes, s, end)) {
-            int ss = s + enc.length(bytes[s]);
-            ss += enc.length(bytes[ss]);
+            int ss = s + enc.length(bytes, s, end);
+            ss += enc.length(bytes, ss, end);
             if (ss == end) return;
         }
         opFail();
@@ -1165,7 +1156,7 @@ class ByteCodeMachine extends StackMachine {
         
         // beyond string check
         if (sprev < range) {
-            while (sprev + (len = enc.length(bytes[sprev])) < s) sprev += len;
+            while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len;
         }
     }
     
@@ -1200,7 +1191,7 @@ class ByteCodeMachine extends StackMachine {
         
         int len;
         // if (sprev < bytes.length)
-        while (sprev + (len = enc.length(bytes[sprev])) < s) sprev += len;
+        while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len;
     }
     
     private void opBackRefMulti() {
@@ -1230,7 +1221,7 @@ class ByteCodeMachine extends StackMachine {
 
             // beyond string check
             if (sprev < range) {
-                while (sprev + (len = enc.length(bytes[sprev])) < s) sprev += len;
+                while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len;
             }
             
             ip += tlen - i  - 1; // * SIZE_MEMNUM (1)
@@ -1261,7 +1252,7 @@ class ByteCodeMachine extends StackMachine {
 
             int len;
             // if (sprev < bytes.length)
-            while (sprev + (len = enc.length(bytes[sprev])) < s) sprev += len;
+            while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len;
             
             ip += tlen - i  - 1; // * SIZE_MEMNUM (1)
             break;  /* success */
@@ -1333,7 +1324,7 @@ class ByteCodeMachine extends StackMachine {
         sprev = s;
         if (backrefMatchAtNestedLevel(ic != 0, caseFoldFlag, level, tlen, ip)) { // (s) and (end) implicit
             int len;
-            while (sprev + (len = enc.length(bytes[sprev])) < s) sprev += len;
+            while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len;
             ip += tlen; // * SIZE_MEMNUM
         } else {
             {opFail(); return;}
diff --git a/src/org/joni/ByteCodePrinter.java b/src/org/joni/ByteCodePrinter.java
index 708e76d..1606752 100644
--- a/src/org/joni/ByteCodePrinter.java
+++ b/src/org/joni/ByteCodePrinter.java
@@ -182,7 +182,10 @@ class ByteCodePrinter {
                 
             case OPCode.EXACT1_IC:
             case OPCode.EXACT1_IC_SB:
-                len = enc.length((byte)code[bp]);
+                final int MAX_CHAR_LENGTH = 6;
+                byte[]bytes = new byte[MAX_CHAR_LENGTH];
+                for (int i = 0; bp + i < code.length && i < MAX_CHAR_LENGTH; i++) bytes[i] = (byte)code[bp + i]; 
+                len = enc.length(bytes, 0, MAX_CHAR_LENGTH);
                 pString(sb, len, bp);
                 bp += len;
                 break;
diff --git a/src/org/joni/Compiler.java b/src/org/joni/Compiler.java
index 8227ba6..b31b205 100644
--- a/src/org/joni/Compiler.java
+++ b/src/org/joni/Compiler.java
@@ -72,12 +72,12 @@ abstract class Compiler implements ErrorMessages {
         p = prev = sn.p;
         int end = sn.end;
         byte[]bytes = sn.bytes;
-        int prevLen = enc.length(bytes[p]);
+        int prevLen = enc.length(bytes, p, end);
         p += prevLen;
         int slen = 1;
         
         while (p < end) {            
-            int len = enc.length(bytes[p]);
+            int len = enc.length(bytes, p, end);
             if (len == prevLen) {
                 slen++;
             } else {
diff --git a/src/org/joni/Config.java b/src/org/joni/Config.java
index 9adad03..10e0c44 100644
--- a/src/org/joni/Config.java
+++ b/src/org/joni/Config.java
@@ -22,6 +22,8 @@ package org.joni;
 import java.io.PrintStream;
 
 public interface Config {
+    final boolean VANILLA = true;
+
     final int CHAR_TABLE_SIZE = 256;
     
     
diff --git a/src/org/joni/Lexer.java b/src/org/joni/Lexer.java
index 4fc169d..3de56df 100644
--- a/src/org/joni/Lexer.java
+++ b/src/org/joni/Lexer.java
@@ -486,18 +486,18 @@ class Lexer extends ScannerSupport {
         while(p < to) {
             if (inEsc) {
                 inEsc = false;
-                p += enc.length(bytes[p]);
+                p += enc.length(bytes, p, to);
             } else {
                 int x = enc.mbcToCode(bytes, p, to);
-                int q = p + enc.length(bytes[p]);
+                int q = p + enc.length(bytes, p, to);
                 if (x == s[0]) {
                     for (i=1; i<n && q < to; i++) {
                         x = enc.mbcToCode(bytes, q, to);
                         if (x != s[i]) break;
-                        q += enc.length(bytes[q]);
+                        q += enc.length(bytes, q, to);
                     }
                     if (i >= n) return true;
-                    p += enc.length(bytes[p]);
+                    p += enc.length(bytes, p, to);
                 } else {
                     x = enc.mbcToCode(bytes, p, to);
                     if (x == bad) return false;
@@ -621,7 +621,7 @@ class Lexer extends ScannerSupport {
                         if (enc.isXDigit(c2)) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE); 
                     }
                     
-                    if (p > last + enc.length(bytes[last]) && left() && peekIs('}')) {
+                    if (p > last + enc.length(bytes, last, stop) && left() && peekIs('}')) {
                         inc();                      
                         token.type = TokenType.CODE_POINT;
                         token.base = 16;
@@ -947,7 +947,7 @@ class Lexer extends ScannerSupport {
                         if (enc.isXDigit(peek())) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE); 
                     }
                     
-                    if (p > last + enc.length(bytes[last]) && left() && peekIs('}')) {
+                    if (p > last + enc.length(bytes, last, stop) && left() && peekIs('}')) {
                         inc();
                         token.type = TokenType.CODE_POINT;
                         token.setCode(num);
@@ -1157,7 +1157,7 @@ class Lexer extends ScannerSupport {
                     token.type = TokenType.CODE_POINT;
                     token.setCode(num);
                 } else { /* string */
-                    p = token.backP + enc.length(bytes[token.backP]); // backP ???
+                    p = token.backP + enc.length(bytes, token.backP, stop);
                 }
                 break;
                 
diff --git a/src/org/joni/Matcher.java b/src/org/joni/Matcher.java
index cf27171..913842b 100644
--- a/src/org/joni/Matcher.java
+++ b/src/org/joni/Matcher.java
@@ -127,7 +127,7 @@ public abstract class Matcher extends IntHolder {
                 p += regex.dMin;
             } else {
                 int q = p + regex.dMin;
-                while (p < q) p += enc.length(bytes[p]);
+                while (p < q) p += enc.length(bytes, p, end);
             }
         }
 
@@ -138,7 +138,7 @@ public abstract class Matcher extends IntHolder {
                 if (p - regex.dMin < s) {
                     // retry_gate:
                     pprev = p;
-                    p += enc.length(bytes[p]);
+                    p += enc.length(bytes, p, end);
                     continue retry;
                 }
                 
@@ -150,7 +150,7 @@ public abstract class Matcher extends IntHolder {
                             if (!enc.isNewLine(bytes, prev, end)) {
                                 // goto retry_gate;
                                 pprev = p;
-                                p += enc.length(bytes[p]);
+                                p += enc.length(bytes, p, end);
                                 continue retry;
                             }
                         }
@@ -163,14 +163,14 @@ public abstract class Matcher extends IntHolder {
                                 if (prev != -1 && enc.isNewLine(bytes, prev, end)) {
                                     // goto retry_gate;
                                     pprev = p;
-                                    p += enc.length(bytes[p]);
+                                    p += enc.length(bytes, p, end);
                                     continue retry;
                                 }
                             } else if (!enc.isNewLine(bytes, p, end)) {
                                 if (Config.USE_CRNL_AS_LINE_TERMINATOR && enc.isMbcCrnl(bytes, p, end)) break;
                                 // goto retry_gate;
                                 pprev = p;
-                                p += enc.length(bytes[p]);
+                                p += enc.length(bytes, p, end);
                                 continue retry;
                             }
                         }
@@ -444,7 +444,7 @@ public abstract class Matcher extends IntHolder {
                         while (s <= high) {
                             if (matchCheck(origRange, s, prev)) return match(s); // ???
                             prev = s;
-                            s += enc.length(bytes[s]);
+                            s += enc.length(bytes, s, end);
                         }
                     } while (s < range);
                     return mismatch();
@@ -456,11 +456,11 @@ public abstract class Matcher extends IntHolder {
                         do {
                             if (matchCheck(origRange, s, prev)) return match(s);
                             prev = s;
-                            s += enc.length(bytes[s]);
+                            s += enc.length(bytes, s, end);
                             
                             while (!enc.isNewLine(bytes, prev, end) && s < range) {
                                 prev = s;
-                                s += enc.length(bytes[s]);
+                                s += enc.length(bytes, s, end);
                             }
                         } while (s < range);
                         return mismatch();
@@ -472,7 +472,7 @@ public abstract class Matcher extends IntHolder {
             do {
                 if (matchCheck(origRange, s, prev)) return match(s);
                 prev = s;
-                s += enc.length(bytes[s]);
+                s += enc.length(bytes, s, end);
             } while (s < range);
             
             if (s == range) { /* because empty match with /$/. */
@@ -481,7 +481,7 @@ public abstract class Matcher extends IntHolder {
         } else { /* backward search */
             if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {            
                 if (origStart < end) {
-                    origStart += enc.length(bytes[origStart]); // /* is upper range */ 
+                    origStart += enc.length(bytes, origStart, end); // /* is upper range */ 
                 }
             }
             
diff --git a/src/org/joni/OptAnchorInfo.java b/src/org/joni/OptAnchorInfo.java
index 8b9ff97..9084728 100644
--- a/src/org/joni/OptAnchorInfo.java
+++ b/src/org/joni/OptAnchorInfo.java
@@ -35,11 +35,9 @@ final class OptAnchorInfo implements AnchorType {
     }
     
     void concat(OptAnchorInfo left, OptAnchorInfo right, int leftLength, int rightLength) {
-        clear(); // ??? remove
-        
         leftAnchor = left.leftAnchor;
         if (leftLength == 0) leftAnchor |= right.leftAnchor;
-        
+
         rightAnchor = right.rightAnchor;
         if (rightLength == 0) rightAnchor |= left.rightAnchor;
     }
diff --git a/src/org/joni/OptExactInfo.java b/src/org/joni/OptExactInfo.java
index 33123f4..b880de0 100644
--- a/src/org/joni/OptExactInfo.java
+++ b/src/org/joni/OptExactInfo.java
@@ -68,17 +68,16 @@ final class OptExactInfo {
         
         int i;
         for (i=length; p < end;) {
-            int len = enc.length(other.s[p]);
+            int len = enc.length(other.s, p, end);
             if (i + len > OPT_EXACT_MAXLEN) break;
             for (int j=0; j<len && p < end; j++) {
                 s[i++] = other.s[p++]; // arraycopy or even don't copy anything ??
             }
         }
-        
+
         length = i;
         reachEnd = (p == end ? other.reachEnd : false);
-        
-        // !!! remove this temporary when we know it's safe
+
         OptAnchorInfo tmp = new OptAnchorInfo();
         tmp.concat(anchor, other.anchor, 1, 1);
         if (!other.reachEnd) tmp.rightAnchor = 0;
@@ -89,7 +88,7 @@ final class OptExactInfo {
     void concatStr(byte[]bytes, int p, int end, boolean raw, Encoding enc) {
         int i;
         for (i = length; p < end && i < OPT_EXACT_MAXLEN;) {
-            int len = enc.length(bytes[p]);
+            int len = enc.length(bytes, p, end);
             if (i + len > OPT_EXACT_MAXLEN) break;
             for (int j=0; j<len && p < end; j++) {
                 s[i++] = bytes[p++];
@@ -113,8 +112,8 @@ final class OptExactInfo {
         int i;
         for (i=0; i<length && i<other.length;) {
             if (s[i] != other.s[i]) break;
-            int len = env.enc.length(s[i]);
-            
+            int len = env.enc.length(s, i, length);
+
             int j;
             for (j=1; j<len; j++) {
                 if (s[i+j] != other.s[i+j]) break;
diff --git a/src/org/joni/Parser.java b/src/org/joni/Parser.java
index 6b1d696..a53426c 100644
--- a/src/org/joni/Parser.java
+++ b/src/org/joni/Parser.java
@@ -208,7 +208,7 @@ class Parser extends Lexer {
                     }
                     if (i < enc.minLength()) newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);
                     
-                    len = enc.length(buf[0]);
+                    len = enc.length(buf, 0, i);
                     if (i < len) { 
                         newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);
                     } else if (i > len) { /* fetch back */
@@ -653,15 +653,15 @@ class Parser extends Lexer {
         int i = 0;
         while(p < to) {
             x = enc.mbcToCode(bytes, p, to);
-            q = p + enc.length(bytes[p]);
+            q = p + enc.length(bytes, p, to);
             if (x == s[0]) {
                 for (i=1; i<n && q<to; i++) {
                     x = enc.mbcToCode(bytes, q, to);
                     if (x != s[i]) break;
-                    q += enc.length(bytes[q]);
+                    q += enc.length(bytes, q, to);
                 }
                 if (i >= n) {
-                    if (bytes[nextChar] != 0) nextChar = q; // ??????
+                    if (bytes[nextChar] != 0) nextChar = q; // we may need zero term semantics... 
                     return p;
                 }
             }
@@ -875,7 +875,7 @@ class Parser extends Lexer {
         int len = 1;            
         while (true) {
             if (len >= enc.minLength()) {               
-                if (len == enc.length(node.bytes[node.p])) {                  
+                if (len == enc.length(node.bytes, node.p, node.end)) {                  
                     fetchToken();
                     node.clearRaw();
                     // !goto string_end;!
@@ -954,12 +954,8 @@ class Parser extends Lexer {
 
             if (ret == 0) {
                 target.setCar(qn);
-            } else if (ret == 2) { /* split case: /abc+/ */ //!!! this shouldn't happen here, remove
+            } else if (ret == 2) { /* split case: /abc+/ */
                 assert false;
-                target.setCar(ConsAltNode.newListNode(target.car, null));
-                
-                Node tmp = (((ConsAltNode)(target).car).setCdr(ConsAltNode.newListNode(qn, null)));
-                target = (ConsAltNode)tmp;
             }
             // !goto re_entry;!
             fetchToken();
diff --git a/src/org/joni/Regex.java b/src/org/joni/Regex.java
index 06dc823..29034a4 100644
--- a/src/org/joni/Regex.java
+++ b/src/org/joni/Regex.java
@@ -350,7 +350,7 @@ public final class Regex implements RegexState {
                 } else {
                     sb.append((char)code);
                 }
-                p_ += enc.length(bytes[p_]);
+                p_ += enc.length(bytes, p_, end);
             }
         } else {
             while (p < end) {
diff --git a/src/org/joni/ScannerSupport.java b/src/org/joni/ScannerSupport.java
index 39a1f74..e4583ef 100644
--- a/src/org/joni/ScannerSupport.java
+++ b/src/org/joni/ScannerSupport.java
@@ -123,19 +123,19 @@ abstract class ScannerSupport extends IntHolder implements ErrorMessages {
     
     protected final void inc() {
         lastFetched = p;
-        p += enc.length(bytes[p]);
+        p += enc.length(bytes, p, stop);
     }
     
     protected final void fetch() {
         c = enc.mbcToCode(bytes, p, stop);
         lastFetched = p;
-        p += enc.length(bytes[p]);
+        p += enc.length(bytes, p, stop);
     }
     
     protected int fetchTo() {
         int to = enc.mbcToCode(bytes, p, stop);
         lastFetched = p;
-        p += enc.length(bytes[p]);
+        p += enc.length(bytes, p, stop);
         return to;
     }
     
diff --git a/src/org/joni/SearchAlgorithm.java b/src/org/joni/SearchAlgorithm.java
index ab9712f..f4e09ac 100644
--- a/src/org/joni/SearchAlgorithm.java
+++ b/src/org/joni/SearchAlgorithm.java
@@ -56,7 +56,7 @@ public abstract class SearchAlgorithm {
                     
                     if (t == targetEnd) return s;
                 }
-                s += enc.length(text[s]);
+                s += enc.length(text, s, textEnd);
             }
             
             return -1;            
@@ -184,7 +184,7 @@ public abstract class SearchAlgorithm {
             
             while (s < end) {
                 if (lowerCaseMatch(target, targetP, targetEnd, text, s, textEnd)) return s;
-                s += enc.length(text[s]);
+                s += enc.length(text, s, textEnd);
             }
             return -1;
         }
@@ -430,7 +430,7 @@ public abstract class SearchAlgorithm {
                     int skip = regex.map[text[se] & 0xff];
                     t = s;
                     do {
-                        s += enc.length(text[s]);
+                        s += enc.length(text, s, textEnd);
                     } while ((s - t) < skip && s < end);
                 }
             } else {
@@ -447,7 +447,7 @@ public abstract class SearchAlgorithm {
                     int skip = regex.intMap[text[se] & 0xff];
                     t = s;
                     do {
-                        s += enc.length(text[s]);
+                        s += enc.length(text, s, textEnd);
                     } while ((s - t) < skip && s < end);
                     
                 }
@@ -474,7 +474,7 @@ public abstract class SearchAlgorithm {
 
             while (s < textRange) {
                 if (map[text[s] & 0xff] != 0) return s;
-                s += enc.length(text[s]);
+                s += enc.length(text, s, textEnd);
             }
             return -1;
         }
diff --git a/src/org/joni/ast/StringNode.java b/src/org/joni/ast/StringNode.java
index 76a726a..433ac33 100644
--- a/src/org/joni/ast/StringNode.java
+++ b/src/org/joni/ast/StringNode.java
@@ -129,7 +129,7 @@ public final class StringNode extends Node implements StringType {
     
     public boolean canBeSplit(Encoding enc) {
         if (end > p) {
-            return enc.length(bytes[p]) < (end - p) ? true : false;
+            return enc.length(bytes, p, end) < (end - p);
         }
         return false;
     }
diff --git a/src/org/joni/encoding/AbstractEncoding.java b/src/org/joni/encoding/AbstractEncoding.java
index 8f407cd..4abbba6 100644
--- a/src/org/joni/encoding/AbstractEncoding.java
+++ b/src/org/joni/encoding/AbstractEncoding.java
@@ -30,7 +30,8 @@ abstract class AbstractEncoding extends Encoding {
 
     private final short CTypeTable[];
 
-    protected AbstractEncoding(short[]CTypeTable) {
+    protected AbstractEncoding(int minLength, int maxLength, short[]CTypeTable) {
+        super(minLength, maxLength);
         this.CTypeTable = CTypeTable;
     }
 
diff --git a/src/org/joni/encoding/EucEncoding.java b/src/org/joni/encoding/CanBeTrailTableEncoding.java
similarity index 61%
copy from src/org/joni/encoding/EucEncoding.java
copy to src/org/joni/encoding/CanBeTrailTableEncoding.java
index 2cb26cd..867312a 100644
--- a/src/org/joni/encoding/EucEncoding.java
+++ b/src/org/joni/encoding/CanBeTrailTableEncoding.java
@@ -19,24 +19,37 @@
  */
 package org.joni.encoding;
 
-public abstract class EucEncoding extends MultiByteEncoding {
-    
-    protected EucEncoding(int[]EncLen, short[]CTypeTable) {
-        super(EncLen, CTypeTable);
+public abstract class CanBeTrailTableEncoding extends MultiByteEncoding {
+
+    protected final boolean[] CanBeTrailTable;
+
+    protected CanBeTrailTableEncoding(int minLength, int maxLength, int[]EncLen, int[][]Trans, short[]CTypeTable, boolean[]CanBeTrailTable) {
+        super(minLength, maxLength, EncLen, Trans, CTypeTable);
+        this.CanBeTrailTable = CanBeTrailTable;
     }
-    
-    protected abstract boolean isLead(int c);
-    
+
     @Override
     public int leftAdjustCharHead(byte[]bytes, int p, int end) {
-        /* In this encoding mb-trail bytes doesn't mix with single bytes. */
         if (end <= p) return end;
+
         int p_ = end;
-        while (!isLead(bytes[p_] & 0xff) && p_ > p) p_--;
-        int len = length(bytes[p_]);
+
+        if (CanBeTrailTable[bytes[p_] & 0xff]) {
+            while (p_ > p) {
+                if (!(EncLen[bytes[--p_] & 0xff] > 1)) {
+                    p_++;
+                    break;
+                }
+            }
+        }
+        int len = length(bytes, p_, end);
         if (p_ + len > end) return p_;
-        
         p_ += len;
         return p_ + ((end - p_) & ~1);
-    }    
-}
+    }
+
+    @Override
+    public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
+        return !CanBeTrailTable[bytes[p] & 0xff];
+    }
+}
\ No newline at end of file
diff --git a/src/org/joni/encoding/Encoding.java b/src/org/joni/encoding/Encoding.java
index bac4a2d..a82742f 100644
--- a/src/org/joni/encoding/Encoding.java
+++ b/src/org/joni/encoding/Encoding.java
@@ -28,9 +28,20 @@ import org.joni.exception.ValueException;
 import org.joni.util.BytesHash;
 
 public abstract class Encoding {
+
+    protected final int minLength, maxLength;
+    protected final boolean isFixedWidth, isSingleByte;
+
     protected byte[]name;
     protected int hashCode;
 
+    protected Encoding(int minLength, int maxLength) {
+        this.minLength = minLength;
+        this.maxLength = maxLength;
+        this.isFixedWidth = minLength == maxLength;
+        this.isSingleByte = isFixedWidth && minLength == 1;
+    }
+    
     @Override
     public abstract String toString();
 
@@ -54,21 +65,39 @@ public abstract class Encoding {
     }
 
     /**
-     * Returns character length given the character head
+     * Returns character length given character head
      * returns <code>1</code> for singlebyte encodings or performs direct length table lookup for multibyte ones.   
      * 
      * @param   c
      *          Character head
      * Oniguruma equivalent: <code>mbc_enc_len</code>
+     * 
+     * To be deprecated very soon (use length(byte[]bytes, int p, int end) version) 
      */
     public abstract int length(byte c);
 
     /**
+     * Returns character length given stream, character position and stream end
+     * returns <code>1</code> for singlebyte encodings or performs sanity validations for multibyte ones 
+     * and returns the character length, missing characters in the stream otherwise
+     * 
+     * Throws IllegalCharacterLengthException if bad sequence is discovered
+     * 
+     * @param   c
+     *          Character head
+     * Oniguruma equivalent: <code>mbc_enc_len</code>
+     * modified for 1.9 purposes, 
+     */
+    public abstract int length(byte[]bytes, int p, int end);
+
+    /**
      * Returns maximum character byte length that can appear in an encoding  
      * 
      * Oniguruma equivalent: <code>max_enc_len</code>
      */
-    public abstract int maxLength();
+    public final int maxLength() {
+        return maxLength;
+    }
     
     /* ONIGENC_MBC_MAXLEN_DIST */
     public final int maxLengthDistance() {
@@ -80,7 +109,9 @@ public abstract class Encoding {
      * 
      * Oniguruma equivalent: <code>min_enc_len</code>
      */    
-    public abstract int minLength();
+    public final int minLength() {
+        return minLength;
+    }
 
     /**
      * Returns true if <code>bytes[p]</code> is a head of a new line character
@@ -190,7 +221,7 @@ public abstract class Encoding {
     /* onigenc_get_right_adjust_char_head / ONIGENC_LEFT_ADJUST_CHAR_HEAD */
     public final int rightAdjustCharHead(byte[]bytes, int p, int end) {
         int p_ = leftAdjustCharHead(bytes, p, end);
-        if (p_ < end) p_ += length(bytes[p_]);
+        if (p_ < end) p_ += length(bytes, p_, end);
         return p_;
     }
 
@@ -199,7 +230,7 @@ public abstract class Encoding {
         int p_ = leftAdjustCharHead(bytes, p, end);
         if (p_ < end) {
             if (prev != null) prev.value = p_;
-            p_ += length(bytes[p_]);
+            p_ += length(bytes, p_, end);
         } else {
             if (prev != null) prev.value = -1; /* Sorry */
         }
@@ -225,7 +256,7 @@ public abstract class Encoding {
     public final int step(byte[]bytes, int p, int end, int n) {
         int q = p;
         while (n-- > 0) {
-            q += length(bytes[q]);
+            q += length(bytes, q, end);
         }
         return q <= end ? q : -1;
     }
@@ -235,17 +266,17 @@ public abstract class Encoding {
         int n = 0;
         int q = p;
         while (q < end) {
-            q += length(bytes[q]);
+            q += length(bytes, q, end);
             n++;
         }
         return n;
     }
     
     /* onigenc_strlen_null */
-    public final int strLengthNull(byte[]bytes, int p) {
+    public final int strLengthNull(byte[]bytes, int p, int end) {
         int n = 0;
-        
-        while(true) {
+
+        while (true) {
             if (bytes[p] == 0) {
                 int len = minLength();
                 
@@ -259,17 +290,17 @@ public abstract class Encoding {
                 }
                 if (len == 1) return n;
             }
-            p += length(bytes[p]);
+            p += length(bytes, p, end);
             n++;
         }        
     }
     
     /* onigenc_str_bytelen_null */
-    public final int strByteLengthNull(byte[]bytes, int p) {
+    public final int strByteLengthNull(byte[]bytes, int p, int end) {
         int p_, start;
         p_ = start = 0;
-        
-        while(true) {
+
+        while (true) {
             if (bytes[p_] == 0) {
                 int len = minLength();
                 if (len == 1) return p_ - start;
@@ -282,7 +313,7 @@ public abstract class Encoding {
                 }
                 if (len == 1) return p_ - start;
             }
-            p_ += length(bytes[p_]);
+            p_ += length(bytes, p_, end);
         }   
     }
     
@@ -295,7 +326,7 @@ public abstract class Encoding {
             if (x != 0) return x;
             
             asciiP++;
-            p += length(bytes[p]);
+            p += length(bytes, p, end);
         }
         return 0;
     }   
@@ -367,12 +398,12 @@ public abstract class Encoding {
     }
 
     // ONIGENC_IS_MBC_HEAD
-    public final boolean isMbcHead(byte b) {
-        return length(b) != 1;
+    public final boolean isMbcHead(byte[]bytes, int p, int end) {
+        return length(bytes, p, end) != 1;
     }
     
     public boolean isMbcCrnl(byte[]bytes, int p, int end) {
-        return mbcToCode(bytes, p, end) == 13 && isNewLine(bytes, p + length(bytes[p]), end);
+        return mbcToCode(bytes, p, end) == 13 && isNewLine(bytes, p + length(bytes, p, end), end);
     }    
 
     // ============================================================
@@ -422,9 +453,14 @@ public abstract class Encoding {
         return minLength() > 1 ? 0 : 0x80;      
     }
 
-    public abstract boolean isSingleByte();
-    public abstract boolean isFixedWidth();
-    
+    public final boolean isSingleByte() {
+        return isSingleByte;
+    }
+
+    public final boolean isFixedWidth() {
+        return isFixedWidth;
+    }
+
     public static final byte NEW_LINE = (byte)0x0a;
 
     public static Encoding load(String name) { 
diff --git a/src/org/joni/encoding/EucEncoding.java b/src/org/joni/encoding/EucEncoding.java
index 2cb26cd..9e6c520 100644
--- a/src/org/joni/encoding/EucEncoding.java
+++ b/src/org/joni/encoding/EucEncoding.java
@@ -19,12 +19,14 @@
  */
 package org.joni.encoding;
 
+import org.joni.Config;
+
 public abstract class EucEncoding extends MultiByteEncoding {
-    
-    protected EucEncoding(int[]EncLen, short[]CTypeTable) {
-        super(EncLen, CTypeTable);
+
+    protected EucEncoding(int minLength, int maxLength, int[]EncLen, int[][]Trans, short[]CTypeTable) {
+        super(minLength, maxLength, EncLen, Trans, CTypeTable);
     }
-    
+
     protected abstract boolean isLead(int c);
     
     @Override
@@ -32,10 +34,15 @@ public abstract class EucEncoding extends MultiByteEncoding {
         /* In this encoding mb-trail bytes doesn't mix with single bytes. */
         if (end <= p) return end;
         int p_ = end;
+
         while (!isLead(bytes[p_] & 0xff) && p_ > p) p_--;
-        int len = length(bytes[p_]);
+
+        // TODO: length(bytes, p_, end) introduces regression for look-behind here:
+        // /(?<=��|����)��/ =~ '������', fix this or fix the test
+        int len = length(bytes, p_, p);
+
         if (p_ + len > end) return p_;
-        
+
         p_ += len;
         return p_ + ((end - p_) & ~1);
     }    
diff --git a/src/org/joni/encoding/MultiByteEncoding.java b/src/org/joni/encoding/MultiByteEncoding.java
index 1ab58a8..1ee2b2a 100644
--- a/src/org/joni/encoding/MultiByteEncoding.java
+++ b/src/org/joni/encoding/MultiByteEncoding.java
@@ -22,29 +22,111 @@ package org.joni.encoding;
 import org.joni.IntHolder;
 import org.joni.encoding.specific.ASCIIEncoding;
 import org.joni.exception.ErrorMessages;
+import org.joni.exception.IllegalCharacterException;
 import org.joni.exception.ValueException;
 
 public abstract class MultiByteEncoding extends AbstractEncoding {
-    
+
     protected final int EncLen[];
-    
-    protected MultiByteEncoding(int[]EncLen, short[]CTypeTable) {
-        super(CTypeTable);
+
+    protected static final int A = -1; // ACCEPT
+    protected static final int F = -2; // FAILURE
+
+    protected final int Trans[][];
+    protected final int TransZero[];
+
+    protected MultiByteEncoding(int minLength, int maxLength, int[]EncLen, int[][]Trans, short[]CTypeTable) {
+        super(minLength, maxLength, CTypeTable);
         this.EncLen = EncLen;
+        this.Trans = Trans;
+        this.TransZero = Trans != null ? Trans[0] : null;
     }
-    
+
     @Override
-    public int length(byte c) { 
+    public int length(byte c) {        
         return EncLen[c & 0xff];       
     }
-    
-    @Override
-    public boolean isSingleByte() {
-        return false;
+
+    protected final int safeLengthForUptoFour(byte[]bytes, int p ,int end) {
+        int b = bytes[p++] & 0xff;
+        int s = TransZero[b];
+        if (s < 0) {
+            if (s == A) return 1;
+            throw IllegalCharacterException.INSTANCE;
+        }
+        return lengthForTwoUptoFour(bytes, p, end, b, s);
     }
-    
+
+    private int lengthForTwoUptoFour(byte[]bytes, int p, int end, int b, int s) {
+        if (p == end) return -(EncLen[b] - 1);
+        s = Trans[s][bytes[p++] & 0xff];
+        if (s < 0) {
+            if (s == A) return 2;
+            throw IllegalCharacterException.INSTANCE;
+        }
+        return lengthForThreeUptoFour(bytes, p, end, b, s);
+    }
+
+    private int lengthForThreeUptoFour(byte[]bytes, int p, int end, int b, int s) {
+        if (p == end) return -(EncLen[b] - 2);
+        s = Trans[s][bytes[p++] & 0xff];
+        if (s < 0) {
+            if (s == A) return 3;
+            throw IllegalCharacterException.INSTANCE;
+        }
+        if (p == end) return -(EncLen[b] - 3);
+        s = Trans[s][bytes[p] & 0xff];
+        if (s == A) return 4;
+        throw IllegalCharacterException.INSTANCE;  
+    }
+
+    protected final int safeLengthForUptoThree(byte[]bytes, int p, int end) {
+        int b = bytes[p++] & 0xff;
+        int s = TransZero[b];
+        if (s < 0) {
+            if (s == A) return 1;
+            throw IllegalCharacterException.INSTANCE;
+        }
+        return lengthForTwoUptoThree(bytes, p, end, b, s);
+    }
+
+    private int lengthForTwoUptoThree(byte[]bytes, int p, int end, int b, int s) {
+        if (p == end) return -(EncLen[b] - 1);
+        s = Trans[s][bytes[p++] & 0xff];
+        if (s < 0) {
+            if (s == A) return 2;
+            throw IllegalCharacterException.INSTANCE;
+        }
+        return lengthForThree(bytes, p, end, b, s);
+    }
+
+    private int lengthForThree(byte[]bytes, int p, int end, int b, int s) {
+        if (p == end) return -(EncLen[b] - 2);
+        s = Trans[s][bytes[p++] & 0xff];
+        if (s == A) return 3;
+        throw IllegalCharacterException.INSTANCE;
+    }     
+
+    protected final int safeLengthForUptoTwo(byte[]bytes, int p, int end) {
+        int b = bytes[p++] & 0xff;
+        int s = TransZero[b];
+        
+        if (s < 0) {
+            if (s == A) return 1;
+            throw IllegalCharacterException.INSTANCE;
+        }
+        return lengthForTwo(bytes, p, end, b, s);
+    }    
+
+    private int lengthForTwo(byte[]bytes, int p, int end, int b, int s) {
+        if (p == end) return -(EncLen[b] - 1);
+        s = Trans[s][bytes[p++] & 0xff];
+        if (s == A) return 2;
+        throw IllegalCharacterException.INSTANCE;
+    }
+
     protected final int mbnMbcToCode(byte[]bytes, int p, int end) {
-        int len = length(bytes[p]);
+        int len = length(bytes, p, end);
         int n = bytes[p++] & 0xff;
         if (len == 1) return n;
         
@@ -66,7 +148,7 @@ public abstract class MultiByteEncoding extends AbstractEncoding {
             pp.value++;
             return 1;
         } else {
-            int len = length(bytes[p]);
+            int len = length(bytes, p, end);
             for (int i=0; i<len; i++) {
                 lower[lowerP++] = bytes[p++];
             }
@@ -90,7 +172,7 @@ public abstract class MultiByteEncoding extends AbstractEncoding {
             return 1;
         }
     }
-    
+
     protected final int mb2CodeToMbc(int code, byte[]bytes, int p) {
         int p_ = p;
         if ((code & 0xff00) != 0) {
@@ -98,10 +180,10 @@ public abstract class MultiByteEncoding extends AbstractEncoding {
         }
         bytes[p_++] = (byte)(code & 0xff);
         
-        if (length(bytes[p]) != (p_ - p)) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+        if (length(bytes, p, p_) != (p_ - p)) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
         return p_ - p;
     }
-    
+
     protected final int mb4CodeToMbc(int code, byte[]bytes, int p) {        
         int p_ = p;        
         if ((code & 0xff000000) != 0)           bytes[p_++] = (byte)((code >>> 24) & 0xff);
@@ -109,10 +191,10 @@ public abstract class MultiByteEncoding extends AbstractEncoding {
         if ((code & 0xff00) != 0 || p_ != p)    bytes[p_++] = (byte)((code >>> 8) & 0xff);
         bytes[p_++] = (byte)(code & 0xff);
         
-        if (length(bytes[p]) != (p_ - p)) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+        if (length(bytes, p, p_) != (p_ - p)) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
         return p_ - p;
     }
-    
+
     protected final boolean mb2IsCodeCType(int code, int ctype) {
         if (code < 128) {            
             return isCodeCTypeInternal(code, ctype); // configured with ascii
@@ -123,9 +205,8 @@ public abstract class MultiByteEncoding extends AbstractEncoding {
         }
         return false;
     }
-    
+
     protected final boolean mb4IsCodeCType(int code, int ctype) {
         return mb2IsCodeCType(code, ctype);
     }
-    
 }
diff --git a/src/org/joni/encoding/SingleByteEncoding.java b/src/org/joni/encoding/SingleByteEncoding.java
index a0c0da5..831995e 100644
--- a/src/org/joni/encoding/SingleByteEncoding.java
+++ b/src/org/joni/encoding/SingleByteEncoding.java
@@ -19,6 +19,7 @@
  */
 package org.joni.encoding;
 
+import org.joni.Config;
 import org.joni.IntHolder;
 import org.joni.exception.ErrorMessages;
 import org.joni.exception.ValueException;
@@ -28,7 +29,7 @@ public abstract class SingleByteEncoding extends AbstractEncoding {
     protected final byte[]LowerCaseTable; 
     
     protected SingleByteEncoding(short[]CTypeTable, byte[]LowerCaseTable) {
-        super(CTypeTable);
+        super(1, 1, CTypeTable);
         this.LowerCaseTable = LowerCaseTable;
     }
     
@@ -40,19 +41,14 @@ public abstract class SingleByteEncoding extends AbstractEncoding {
     }
 
     @Override
-    public final int strLength(byte[]bytes, int p, int end) {
-        return end - p;
-    }    
-
-    @Override
-    public final int maxLength() {
+    public int length(byte[]bytes, int p, int end) {
         return 1;
     }
 
-    @Override   
-    public final int minLength() {
-        return 1;
-    }
+    @Override
+    public final int strLength(byte[]bytes, int p, int end) {
+        return end - p;
+    }    
 
     // onigenc_is_mbc_newline_0x0a here
     
@@ -67,14 +63,19 @@ public abstract class SingleByteEncoding extends AbstractEncoding {
      */
     @Override
     public final int codeToMbcLength(int code) {
-        if (code < 0x100) return 1;
-        throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+        if (Config.VANILLA) {
+            if (code < 0x100) return 1;
+            throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+        } else {
+            return 1;
+        }
     }
 
     /** onigenc_single_byte_code_to_mbc
      */
     @Override
     public final int codeToMbc(int code, byte[]bytes, int p) {
+        // TODO: raise if (code > 0xff): range error "out of char range" 
         bytes[p] = (byte)(code & 0xff); // c implementation also uses mask here
         return 1;
     }
@@ -99,14 +100,4 @@ public abstract class SingleByteEncoding extends AbstractEncoding {
     public final boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
         return true;        
     }
-    
-    @Override
-    public final boolean isSingleByte() {
-        return true;
-    }
-    
-    @Override
-    public final boolean isFixedWidth() {
-        return true;
-    }
 }
diff --git a/src/org/joni/encoding/specific/BIG5Encoding.java b/src/org/joni/encoding/specific/BIG5Encoding.java
index cf7a832..e585e36 100644
--- a/src/org/joni/encoding/specific/BIG5Encoding.java
+++ b/src/org/joni/encoding/specific/BIG5Encoding.java
@@ -19,35 +19,30 @@
  */
 package org.joni.encoding.specific;
 
+import org.joni.Config;
 import org.joni.IntHolder;
-import org.joni.encoding.MultiByteEncoding;
+import org.joni.encoding.CanBeTrailTableEncoding;
 
-public final class BIG5Encoding extends MultiByteEncoding  {
+public final class BIG5Encoding extends CanBeTrailTableEncoding  {
 
     protected BIG5Encoding() {
-        super(Big5EncLen, ASCIIEncoding.AsciiCtypeTable);
+        super(1, 2, Big5EncLen, BIG5Trans, ASCIIEncoding.AsciiCtypeTable, BIG5_CAN_BE_TRAIL_TABLE);
     }
-    
+
     @Override
     public String toString() {
         return "Big5";
     }
-    
-    @Override
-    public int maxLength() {
-        return 2;
-    }
-    
-    @Override
-    public int minLength() {
-        return 1;
-    }
-    
+
     @Override
-    public boolean isFixedWidth() {
-        return false;
+    public int length(byte[]bytes, int p, int end) {
+        if (Config.VANILLA){
+            return length(bytes[p]);
+        } else {
+            return safeLengthForUptoTwo(bytes, p, end);
+        }
     }
-    
+
     @Override
     public int mbcToCode(byte[]bytes, int p, int end) {
         return mbnMbcToCode(bytes, p, end);
@@ -73,6 +68,11 @@ public final class BIG5Encoding extends MultiByteEncoding  {
         return mb2IsCodeCType(code, ctype);
     }
     
+    @Override
+    public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
+        return null;
+    }
+
     static final boolean BIG5_CAN_BE_TRAIL_TABLE[] = {
         false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
         false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
@@ -91,46 +91,7 @@ public final class BIG5Encoding extends MultiByteEncoding  {
         true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
         true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false
     };
-    
-    private static boolean isBig5MbFirst(int b) { 
-        return Big5EncLen[b] > 1;
-    }
-    
-    private static boolean isBig5MbTrail(int b) {
-        return BIG5_CAN_BE_TRAIL_TABLE[b];
-    }
-    
-    @Override
-    public int leftAdjustCharHead(byte[]bytes, int p, int end) {
-        if (end <= p) return end;
-        
-        int p_ = end;
-        
-        if (isBig5MbTrail(bytes[p_] & 0xff)) {
-            while (p_ > p) {
-                if (!isBig5MbFirst(bytes[--p_] & 0xff)) {
-                    p_++;
-                    break;
-                }
-            }
-        }
-        int len = length(bytes[p_]);
-        if (p_ + len > end) return p_;
-        p_ += len;
-        return p_ + ((end - p_) & ~1);
-    }    
-    
-    @Override
-    public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
-        return null;
-    }
-    
-    @Override    
-    public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
-        int c = bytes[p] & 0xff;
-        return isBig5MbTrail(c);
-    }
-    
+
     static final int Big5EncLen[] = {
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -149,6 +110,45 @@ public final class BIG5Encoding extends MultiByteEncoding  {
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
     };
-    
+
+    private static final int BIG5Trans[][] = Config.VANILLA ? null : new int[][]{
+        { /* S0   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F 
+        },
+        { /* S1   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F,
+          /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F 
+        }
+    };
+
     public static final BIG5Encoding INSTANCE = new BIG5Encoding();
 }
diff --git a/src/org/joni/encoding/specific/CP1251Encoding.java b/src/org/joni/encoding/specific/CP1251Encoding.java
index b3a9f2d..a95df79 100644
--- a/src/org/joni/encoding/specific/CP1251Encoding.java
+++ b/src/org/joni/encoding/specific/CP1251Encoding.java
@@ -38,7 +38,8 @@ final public class CP1251Encoding extends CaseFoldMapEncoding  {
         int p = pp.value;
         int lowerP = 0;
         
-        lower[lowerP] = LowerCaseTable[bytes[p] & 0xff]; 
+        lower[lowerP] = LowerCaseTable[bytes[p] & 0xff];
+        pp.value++;        
         return 1;
     }
     
diff --git a/src/org/joni/encoding/specific/BIG5Encoding.java b/src/org/joni/encoding/specific/CP949Encoding.java
similarity index 59%
copy from src/org/joni/encoding/specific/BIG5Encoding.java
copy to src/org/joni/encoding/specific/CP949Encoding.java
index cf7a832..35e4ee8 100644
--- a/src/org/joni/encoding/specific/BIG5Encoding.java
+++ b/src/org/joni/encoding/specific/CP949Encoding.java
@@ -19,35 +19,30 @@
  */
 package org.joni.encoding.specific;
 
+import org.joni.Config;
 import org.joni.IntHolder;
-import org.joni.encoding.MultiByteEncoding;
+import org.joni.encoding.CanBeTrailTableEncoding;
 
-public final class BIG5Encoding extends MultiByteEncoding  {
+public final class CP949Encoding extends CanBeTrailTableEncoding {
 
-    protected BIG5Encoding() {
-        super(Big5EncLen, ASCIIEncoding.AsciiCtypeTable);
+    protected CP949Encoding() {
+        super(1, 2, CP949EncLen, CP949Trans, ASCIIEncoding.AsciiCtypeTable, CP949_CAN_BE_TRAIL_TABLE);
     }
     
     @Override
     public String toString() {
-        return "Big5";
+        return "CP949";
     }
-    
-    @Override
-    public int maxLength() {
-        return 2;
-    }
-    
-    @Override
-    public int minLength() {
-        return 1;
-    }
-    
+
     @Override
-    public boolean isFixedWidth() {
-        return false;
+    public int length(byte[]bytes, int p, int end) {
+        if (Config.VANILLA){
+            return length(bytes[p]);
+        } else {
+            return safeLengthForUptoTwo(bytes, p, end);
+        }
     }
-    
+
     @Override
     public int mbcToCode(byte[]bytes, int p, int end) {
         return mbnMbcToCode(bytes, p, end);
@@ -57,7 +52,7 @@ public final class BIG5Encoding extends MultiByteEncoding  {
     public int codeToMbcLength(int code) {
         return mb2CodeToMbcLength(code);
     }
-
+    
     @Override
     public int codeToMbc(int code, byte[]bytes, int p) {
         return mb2CodeToMbc(code, bytes, p);
@@ -67,13 +62,18 @@ public final class BIG5Encoding extends MultiByteEncoding  {
     public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
         return mbnMbcCaseFold(flag, bytes, pp, end, lower);
     }
-    
+
     @Override
     public boolean isCodeCType(int code, int ctype) {
         return mb2IsCodeCType(code, ctype);
     }
     
-    static final boolean BIG5_CAN_BE_TRAIL_TABLE[] = {
+    @Override
+    public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
+        return null;
+    }
+
+    static final boolean CP949_CAN_BE_TRAIL_TABLE[] = {
         false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
         false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
         false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
@@ -91,49 +91,8 @@ public final class BIG5Encoding extends MultiByteEncoding  {
         true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
         true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false
     };
-    
-    private static boolean isBig5MbFirst(int b) { 
-        return Big5EncLen[b] > 1;
-    }
-    
-    private static boolean isBig5MbTrail(int b) {
-        return BIG5_CAN_BE_TRAIL_TABLE[b];
-    }
-    
-    @Override
-    public int leftAdjustCharHead(byte[]bytes, int p, int end) {
-        if (end <= p) return end;
-        
-        int p_ = end;
-        
-        if (isBig5MbTrail(bytes[p_] & 0xff)) {
-            while (p_ > p) {
-                if (!isBig5MbFirst(bytes[--p_] & 0xff)) {
-                    p_++;
-                    break;
-                }
-            }
-        }
-        int len = length(bytes[p_]);
-        if (p_ + len > end) return p_;
-        p_ += len;
-        return p_ + ((end - p_) & ~1);
-    }    
-    
-    @Override
-    public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
-        return null;
-    }
-    
-    @Override    
-    public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
-        int c = bytes[p] & 0xff;
-        return isBig5MbTrail(c);
-    }
-    
-    static final int Big5EncLen[] = {
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+    static final int CP949EncLen[] = {
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -147,8 +106,49 @@ public final class BIG5Encoding extends MultiByteEncoding  {
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
     };
+
+    private static final int CP949Trans[][] = Config.VANILLA ? null : new int[][]{
+        { /* S0   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 8 */ A, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F 
+        },
+        { /* S1   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 5 */ A, A, A, A, A, A, A, A, A, A, A, F, F, F, F, F,
+          /* 6 */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 7 */ A, A, A, A, A, A, A, A, A, A, A, F, F, F, F, F,
+          /* 8 */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F 
+        }
+    };
     
-    public static final BIG5Encoding INSTANCE = new BIG5Encoding();
+    public static final CP949Encoding INSTANCE = new CP949Encoding();
 }
diff --git a/src/org/joni/encoding/specific/EUCCNEncoding.java b/src/org/joni/encoding/specific/EUCCNEncoding.java
deleted file mode 100644
index 66c65c5..0000000
--- a/src/org/joni/encoding/specific/EUCCNEncoding.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Permission is hereby granted, free of charge, to any person obtaining a copy of 
- * this software and associated documentation files (the "Software"), to deal in 
- * the Software without restriction, including without limitation the rights to 
- * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
- * of the Software, and to permit persons to whom the Software is furnished to do
- * so, subject to the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
- * SOFTWARE.
- */
-package org.joni.encoding.specific;
-
-import org.joni.IntHolder;
-import org.joni.encoding.EucEncoding;
-
-public final class EUCCNEncoding extends EucEncoding  {
-
-    protected EUCCNEncoding() {
-        super(EUCKREncoding.EUCKREncLen, ASCIIEncoding.AsciiCtypeTable);
-    }
-    
-    @Override
-    public String toString() {
-        return "EUC-CN";
-    }
-    
-    @Override
-    public int maxLength() {
-        return 2;
-    }
-    
-    @Override
-    public int minLength() {
-        return 1;
-    }
-    
-    @Override
-    public boolean isFixedWidth() {
-        return false;
-    }
-    
-    @Override
-    public int mbcToCode(byte[]bytes, int p, int end) {
-        return mbnMbcToCode(bytes, p, end);
-    }
-    
-    @Override
-    public int codeToMbcLength(int code) {
-        return mb2CodeToMbcLength(code);
-    }
-    
-    @Override
-    public int codeToMbc(int code, byte[]bytes, int p) {
-        return mb2CodeToMbc(code, bytes, p);
-    }
-    
-    @Override
-    public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
-        return mbnMbcCaseFold(flag, bytes, pp, end, lower);
-    }
-    
-    @Override
-    public boolean isCodeCType(int code, int ctype) {
-        return mb2IsCodeCType(code, ctype);
-    }
-    
-    @Override
-    public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
-        return null;
-    }
-    
-    // euckr_islead
-    protected boolean isLead(int c) {
-        return (c < 0xa1 || c == 0xff);
-    }
-    
-    @Override
-    public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
-        int c = bytes[p] & 0xff;
-        return c <= 0x7e;
-    }
-    
-    public static final EUCKREncoding INSTANCE = new EUCKREncoding();
-}
diff --git a/src/org/joni/encoding/specific/EUCJPEncoding.java b/src/org/joni/encoding/specific/EUCJPEncoding.java
index 9fcca25..47b166e 100644
--- a/src/org/joni/encoding/specific/EUCJPEncoding.java
+++ b/src/org/joni/encoding/specific/EUCJPEncoding.java
@@ -20,6 +20,7 @@
 package org.joni.encoding.specific;
 
 import org.joni.CodeRangeBuffer;
+import org.joni.Config;
 import org.joni.IntHolder;
 import org.joni.constants.CharacterType;
 import org.joni.encoding.EucEncoding;
@@ -31,49 +32,39 @@ import org.joni.util.BytesHash;
 public final class EUCJPEncoding extends EucEncoding  {
 
     protected EUCJPEncoding() {
-        super(EUCJPEncLen, ASCIIEncoding.AsciiCtypeTable);
+        super(1, 3, EUCJPEncLen, EUCJPTrans, ASCIIEncoding.AsciiCtypeTable);
     }
     
     @Override
     public String toString() {
         return "EUC-JP";
     }
-    
-    @Override
-    public int maxLength() {
-        return 3;
-    }
-    
-    @Override
-    public int minLength() {
-        return 1;
-    }
-    
+
     @Override
-    public boolean isFixedWidth() {
-        return false;
+    public int length(byte[]bytes, int p, int end) {
+        if (Config.VANILLA) {
+            return length(bytes[p]);
+        } else {
+            return safeLengthForUptoThree(bytes, p, end);
+        }
     }
-    
+
     @Override
     public int mbcToCode(byte[]bytes, int p, int end) {
-        int len = length(bytes[p]);
-        int n = bytes[p++] & 0xff;
-        if (len == 1) return n;
-
-        for (int i=1; i<len; i++) {
-            if (p >= end) break;
-            int c = bytes[p++] & 0xff;
-            n <<= 8;
-            n += c;
-        }
-        return n;
+        return mbnMbcToCode(bytes, p, end);
     }
     
     @Override
     public int codeToMbcLength(int code) {
         if (isAscii(code)) return 1;
-        if ((code & 0xff0000) != 0) return 3;
-        if ((code &   0xff00) != 0) return 2;
+        if (Config.VANILLA) {
+            if ((code & 0xff0000) != 0) return 3;
+            if ((code &   0xff00) != 0) return 2;
+        } else {
+            if (code > 0xffffff) return 0;
+            if ((code & 0xff0000) >= 0x800000) return 3;
+            if ((code & 0xff00) >= 0x8000) return 2;
+        }
         throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
     }
     
@@ -84,7 +75,7 @@ public final class EUCJPEncoding extends EucEncoding  {
         if ((code &   0xff00) != 0) bytes[p_++] = (byte)((code >>  8) & 0xff);
         bytes[p_++] = (byte)(code & 0xff);
         
-        if (length(bytes[p_]) != p_ - p) throw new InternalException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);        
+        if (length(bytes, p, p_) != p_ - p) throw new InternalException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
         return p_ - p;
     }
     
@@ -98,7 +89,7 @@ public final class EUCJPEncoding extends EucEncoding  {
             pp.value++;
             return 1;
         } else {
-            int len = length(bytes[p]);
+            int len = length(bytes, p, end);
             for (int i=0; i<len; i++) {
                 lower[lowerP++] = bytes[p++];
             }
@@ -200,6 +191,63 @@ public final class EUCJPEncoding extends EucEncoding  {
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
     };
-    
+
+    private static final int EUCJPTrans[][] = Config.VANILLA ? null : new int[][]{
+        { /* S0   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, 1, 2,
+          /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F 
+        },
+        { /* S1   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F 
+        },
+        { /* S2   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F 
+        }
+    };
+
     public static final EUCJPEncoding INSTANCE = new EUCJPEncoding();
 }
diff --git a/src/org/joni/encoding/specific/EUCKREncoding.java b/src/org/joni/encoding/specific/EUCKREncoding.java
index 4d5583f..56632e3 100644
--- a/src/org/joni/encoding/specific/EUCKREncoding.java
+++ b/src/org/joni/encoding/specific/EUCKREncoding.java
@@ -19,35 +19,30 @@
  */
 package org.joni.encoding.specific;
 
+import org.joni.Config;
 import org.joni.IntHolder;
 import org.joni.encoding.EucEncoding;
 
 public final class EUCKREncoding extends EucEncoding  {
 
     protected EUCKREncoding() {
-        super(EUCKREncLen, ASCIIEncoding.AsciiCtypeTable);
+        super(1, 2, EUCKREncLen, EUCKRTrans, ASCIIEncoding.AsciiCtypeTable);
     }
     
     @Override
     public String toString() {
         return "EUC-KR";
     }
-    
-    @Override
-    public int maxLength() {
-        return 2;
-    }
-    
-    @Override
-    public int minLength() {
-        return 1;
-    }
-    
+
     @Override
-    public boolean isFixedWidth() {
-        return false;
+    public int length(byte[]bytes, int p, int end) {
+        if (Config.VANILLA){
+            return length(bytes[p]);
+        } else {
+            return safeLengthForUptoTwo(bytes, p, end);
+        }
     }
-    
+
     @Override
     public int mbcToCode(byte[]bytes, int p, int end) {
         return mbnMbcToCode(bytes, p, end);
@@ -107,6 +102,45 @@ public final class EUCKREncoding extends EucEncoding  {
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
     };
+
+    private static final int EUCKRTrans[][] = Config.VANILLA ? null : new int[][]{
+        { /* S0   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F 
+        },
+        { /* S1   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F 
+        }
+    };
     
     public static final EUCKREncoding INSTANCE = new EUCKREncoding();
 }
diff --git a/src/org/joni/encoding/specific/EUCTWEncoding.java b/src/org/joni/encoding/specific/EUCTWEncoding.java
index 6210ede..7d3c8aa 100644
--- a/src/org/joni/encoding/specific/EUCTWEncoding.java
+++ b/src/org/joni/encoding/specific/EUCTWEncoding.java
@@ -19,35 +19,30 @@
  */
 package org.joni.encoding.specific;
 
+import org.joni.Config;
 import org.joni.IntHolder;
 import org.joni.encoding.EucEncoding;
 
 public final class EUCTWEncoding extends EucEncoding  {
 
     protected EUCTWEncoding() {
-        super(EUCTWEncLen, ASCIIEncoding.AsciiCtypeTable);
+        super(1, 4, EUCTWEncLen, EUCTWTrans, ASCIIEncoding.AsciiCtypeTable);
     }
     
     @Override
     public String toString() {
         return "EUC-TW";
     }
-    
-    @Override
-    public int maxLength() {
-        return 4;
-    }
-    
-    @Override
-    public int minLength() {
-        return 1;
-    }
-    
+
     @Override
-    public boolean isFixedWidth() {
-        return false;
+    public int length(byte[]bytes, int p, int end) {
+        if (Config.VANILLA) {
+            return length(bytes[p]);
+        } else {
+            return safeLengthForUptoFour(bytes, p, end);
+        }
     }
-    
+
     @Override
     public int mbcToCode(byte[]bytes, int p, int end) {
         return mbnMbcToCode(bytes, p, end);
@@ -77,12 +72,16 @@ public final class EUCTWEncoding extends EucEncoding  {
     public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
         return null;
     }
-    
+
     // euckr_islead
     protected boolean isLead(int c) {
-        return ((c < 0xa1 && c != 0x8e) || c == 0xff);
+        if (Config.VANILLA) {
+            return ((c < 0xa1 && c != 0x8e) || c == 0xff);
+        } else {
+            return (c - 0xa1) > 0xfe - 0xa1;
+        }
     }
-    
+
     @Override    
     public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
         int c = bytes[p] & 0xff;
@@ -107,6 +106,81 @@ public final class EUCTWEncoding extends EucEncoding  {
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
     };
-    
+
+    private static final int EUCTWTrans[][] = Config.VANILLA ? null : new int[][]{
+        { /* S0   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, 2, F,
+          /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F 
+        },
+        { /* S1   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F 
+        },
+        { /* S2   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* a */ F, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          /* b */ 3, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F 
+        },
+        { /* S3   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F 
+        }
+    };
+
     public static final EUCTWEncoding INSTANCE = new EUCTWEncoding();
 }
diff --git a/src/org/joni/encoding/specific/GB18030Encoding.java b/src/org/joni/encoding/specific/GB18030Encoding.java
new file mode 100644
index 0000000..105523d
--- /dev/null
+++ b/src/org/joni/encoding/specific/GB18030Encoding.java
@@ -0,0 +1,516 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of 
+ * this software and associated documentation files (the "Software"), to deal in 
+ * the Software without restriction, including without limitation the rights to 
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
+ * SOFTWARE.
+ */
+package org.joni.encoding.specific;
+
+import org.joni.Config;
+import org.joni.IntHolder;
+import org.joni.encoding.MultiByteEncoding;
+import org.joni.exception.IllegalCharacterException;
+
+public final class GB18030Encoding extends MultiByteEncoding  {
+
+    protected GB18030Encoding() {
+        super(1, 4, null, GB18030Trans, ASCIIEncoding.AsciiCtypeTable);
+    }
+
+    @Override
+    public String toString() {
+        return "GB18030";
+    }
+
+    @Override
+    public int length(byte[]bytes, int p, int end) {
+        if (Config.VANILLA){
+            if (GB18030_MAP[bytes[p] & 0xff] != CM) return 1;
+            int c = GB18030_MAP[bytes[p + 1] & 0xff];
+            if (c == C4) return 4;
+            if (c == C1) return 1; /* illegal sequence */
+            return 2;
+        } else {
+            int s = TransZero[bytes[p++] & 0xff];
+            if (s < 0) {
+                if (s == A) return 1;
+                throw IllegalCharacterException.INSTANCE;
+            }
+            return lengthForTwoUptoFour(bytes, p, end, s);
+        }
+    }
+
+    private int lengthForTwoUptoFour(byte[]bytes, int p, int end, int s) {
+        if (p == end) return -1;
+        s = Trans[s][bytes[p++] & 0xff];
+        if (s < 0) {
+            if (s == A) return 2;
+            throw IllegalCharacterException.INSTANCE;
+        }
+        return lengthForThreeUptoFour(bytes, p, end, s);
+    }
+
+    private int lengthForThreeUptoFour(byte[]bytes, int p, int end, int s) {
+        if (p == end) return -2;
+        s = Trans[s][bytes[p++] & 0xff];
+        if (s < 0) {
+            if (s == A) return 3;
+            throw IllegalCharacterException.INSTANCE;
+        }
+        if (p == end) return -1;
+        s = Trans[s][bytes[p] & 0xff];
+        if (s == A) return 4;
+        throw IllegalCharacterException.INSTANCE;  
+    }    
+
+    @Override
+    public int mbcToCode(byte[]bytes, int p, int end) {
+        if (Config.VANILLA) {
+            return mbnMbcToCode(bytes, p, end);
+        } else {
+            return mbnMbcToCode(bytes, p, end) & 0x7FFFFFFF;
+        }
+    }
+    
+    @Override
+    public int codeToMbcLength(int code) {
+        return mb4CodeToMbcLength(code);
+    }
+
+    @Override
+    public int codeToMbc(int code, byte[]bytes, int p) {
+        if (Config.VANILLA) {
+            return mb4CodeToMbc(code, bytes, p);
+        } else {
+            if ((code & 0xff000000) != 0) code |= 0x80000000;
+            return mb4CodeToMbc(code, bytes, p);
+        }
+    }
+    
+    @Override
+    public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
+        return mbnMbcCaseFold(flag, bytes, pp, end, lower);
+    }
+    
+    @Override
+    public boolean isCodeCType(int code, int ctype) {
+        return mb4IsCodeCType(code, ctype);
+    }
+    
+    @Override
+    public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
+        return null;
+    }
+
+    private enum State {
+        START,
+        One_C2,
+        One_C4,
+        One_CM,
+
+        Odd_CM_One_CX,
+        Even_CM_One_CX,
+
+        /* CMC4 : pair of "CM C4" */
+        One_CMC4,
+        Odd_CMC4,
+        One_C4_Odd_CMC4,
+        Even_CMC4,
+        One_C4_Even_CMC4,
+
+        Odd_CM_Odd_CMC4,
+        Even_CM_Odd_CMC4,
+
+        Odd_CM_Even_CMC4,
+        Even_CM_Even_CMC4,
+
+        /* C4CM : pair of "C4 CM" */
+        Odd_C4CM,
+        One_CM_Odd_C4CM,
+        Even_C4CM,
+        One_CM_Even_C4CM,
+
+        Even_CM_Odd_C4CM,
+        Odd_CM_Odd_C4CM,
+        Even_CM_Even_C4CM,
+        Odd_CM_Even_C4CM
+    };
+
+    @Override
+    public int leftAdjustCharHead(byte[]bytes, int start, int s) {
+        State state = State.START;
+        
+        for (int p = s; p >= start; p--) {
+            switch (state) {
+            case START:
+                switch (GB18030_MAP[bytes[p] & 0xff]) {
+                case C1:    return s;
+                case C2:    state = State.One_C2; /* C2 */
+                    break;
+                case C4:    state = State.One_C4; /* C4 */
+                    break;
+                case CM:    state = State.One_CM; /* CM */
+                    break;
+                }
+                break;
+                case One_C2: /* C2 */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:
+                    case C4:    return s;
+                    case CM:    state = State.Odd_CM_One_CX; /* CM C2 */
+                        break;
+                    }
+                    break;
+                case One_C4: /* C4 */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:
+                    case C4:    return s;
+                    case CM:    state = State.One_CMC4;
+                        break;
+                    }
+                    break;
+                case One_CM: /* CM */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:    return s;
+                    case C4:    state = State.Odd_C4CM;
+                        break;
+                    case CM:    state = State.Odd_CM_One_CX; /* CM CM */
+                        break;
+                    }
+                    break;
+                case Odd_CM_One_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:
+                    case C4:    return (s - 1);
+                    case CM:    state = State.Even_CM_One_CX;
+                        break;
+                    }
+                    break;
+                case Even_CM_One_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:
+                    case C4:    return s;
+                    case CM:    state = State.Odd_CM_One_CX;
+                        break;
+                    }
+                    break;
+                case One_CMC4: /* CM C4 */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:    return (s - 1);
+                    case C4:    state = State.One_C4_Odd_CMC4; /* C4 CM C4 */
+                        break;
+                    case CM:    state = State.Even_CM_One_CX; /* CM CM C4 */
+                        break;
+                    }
+                    break;
+                case Odd_CMC4: /* CM C4 CM C4 CM C4 */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:    return (s - 1);
+                    case C4:    state = State.One_C4_Odd_CMC4;
+                        break;
+                    case CM:    state = State.Odd_CM_Odd_CMC4;
+                        break;
+                    }
+                    break;
+                case One_C4_Odd_CMC4: /* C4 CM C4 */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:
+                    case C4:    return (s - 1);
+                    case CM:    state = State.Even_CMC4; /* CM C4 CM C4 */
+                        break;
+                    }
+                    break;
+                case Even_CMC4: /* CM C4 CM C4 */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:    return (s - 3);
+                    case C4:    state = State.One_C4_Even_CMC4;
+                        break;
+                    case CM:    state = State.Odd_CM_Even_CMC4;
+                        break;
+                    }
+                    break;
+                case One_C4_Even_CMC4: /* C4 CM C4 CM C4 */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:
+                    case C4:    return (s - 3);
+                    case CM:    state = State.Odd_CMC4;
+                        break;
+                    }
+                    break;
+                case Odd_CM_Odd_CMC4: /* CM CM C4 CM C4 CM C4 */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:
+                    case C4:    return (s - 3);
+                    case CM:    state = State.Even_CM_Odd_CMC4;
+                        break;
+                    }
+                    break;
+                case Even_CM_Odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:
+                    case C4:    return (s - 1);
+                    case CM:    state = State.Odd_CM_Odd_CMC4;
+                        break;
+                    }
+                    break;
+                case Odd_CM_Even_CMC4: /* CM CM C4 CM C4 */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:
+                    case C4:    return (s - 1);
+                    case CM:    state = State.Even_CM_Even_CMC4;
+                        break;
+                    }
+                    break;
+                case Even_CM_Even_CMC4: /* CM CM CM C4 CM C4 */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:
+                    case C4:    return (s - 3);
+                    case CM:    state = State.Odd_CM_Even_CMC4;
+                        break;
+                    }
+                    break;
+                case Odd_C4CM: /* C4 CM */  /* C4 CM C4 CM C4 CM*/
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:
+                    case C4:    return s;
+                    case CM:    state = State.One_CM_Odd_C4CM; /* CM C4 CM */
+                        break;
+                    }
+                    break;
+                case One_CM_Odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:    return (s - 2); /* |CM C4 CM */
+                    case C4:    state = State.Even_C4CM;
+                        break;
+                    case CM:    state = State.Even_CM_Odd_C4CM;
+                        break;
+                    }
+                    break;
+                case Even_C4CM: /* C4 CM C4 CM */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:
+                    case C4:    return (s - 2);  /* C4|CM C4 CM */
+                    case CM:    state = State.One_CM_Even_C4CM;
+                        break;
+                    }
+                    break;
+                case One_CM_Even_C4CM: /* CM C4 CM C4 CM */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:    return (s - 0);  /*|CM C4 CM C4|CM */
+                    case C4:    state = State.Odd_C4CM;
+                        break;
+                    case CM:    state = State.Even_CM_Even_C4CM;
+                        break;
+                    }
+                    break;
+                case Even_CM_Odd_C4CM: /* CM CM C4 CM */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:
+                    case C4:    return (s - 0); /* |CM CM|C4|CM */
+                    case CM:    state = State.Odd_CM_Odd_C4CM;
+                        break;
+                    }
+                    break;
+                case Odd_CM_Odd_C4CM: /* CM CM CM C4 CM */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:
+                    case C4:    return (s - 2); /* |CM CM|CM C4 CM */
+                    case CM:    state = State.Even_CM_Odd_C4CM;
+                        break;
+                    }
+                    break;
+                case Even_CM_Even_C4CM: /* CM CM C4 CM C4 CM */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:
+                    case C4:    return (s - 2); /* |CM CM|C4|CM C4 CM */
+                    case CM:    state = State.Odd_CM_Even_C4CM;
+                        break;
+                    }
+                    break;
+                case Odd_CM_Even_C4CM: /* CM CM CM C4 CM C4 CM */
+                    switch (GB18030_MAP[bytes[p] & 0xff]) {
+                    case C1:
+                    case C2:
+                    case C4:    return (s - 0);  /* |CM CM|CM C4 CM C4|CM */
+                    case CM:    state = State.Even_CM_Even_C4CM;
+                        break;
+                    }
+                    break;
+            }
+        }
+        
+        switch (state) {
+        case START:             return (s - 0);
+        case One_C2:            return (s - 0);
+        case One_C4:            return (s - 0);
+        case One_CM:            return (s - 0);
+
+        case Odd_CM_One_CX:     return (s - 1);
+        case Even_CM_One_CX:    return (s - 0);
+
+        case One_CMC4:          return (s - 1);
+        case Odd_CMC4:          return (s - 1);
+        case One_C4_Odd_CMC4:   return (s - 1);
+        case Even_CMC4:         return (s - 3);
+        case One_C4_Even_CMC4:  return (s - 3);
+
+        case Odd_CM_Odd_CMC4:   return (s - 3);
+        case Even_CM_Odd_CMC4:  return (s - 1);
+
+        case Odd_CM_Even_CMC4:  return (s - 1);
+        case Even_CM_Even_CMC4: return (s - 3);
+
+        case Odd_C4CM:          return (s - 0);
+        case One_CM_Odd_C4CM:   return (s - 2);
+        case Even_C4CM:         return (s - 2);
+        case One_CM_Even_C4CM:  return (s - 0);
+
+        case Even_CM_Odd_C4CM:  return (s - 0);
+        case Odd_CM_Odd_C4CM:   return (s - 2);
+        case Even_CM_Even_C4CM: return (s - 2);
+        case Odd_CM_Even_C4CM:  return (s - 0);
+        }
+
+        return s;
+    }
+    
+    @Override
+    public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
+        return GB18030_MAP[bytes[p] & 0xff] == C1;
+    }
+
+    private static final int C1 = 0; /* one-byte char */
+    private static final int C2 = 1; /* one-byte or second of two-byte char */
+    private static final int C4 = 2; /* one-byte or second or fourth of four-byte char */
+    private static final int CM = 3; /* first of two- or four-byte char or second of two-byte char */
+
+    private static final int GB18030_MAP[] = {
+        C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
+        C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
+        C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
+        C4, C4, C4, C4, C4, C4, C4, C4, C4, C4, C1, C1, C1, C1, C1, C1,
+        C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
+        C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
+        C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
+        C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C1,
+        C2, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+        CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+        CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+        CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+        CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+        CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+        CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+        CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1
+    };
+
+    private static final int GB18030Trans[][] = Config.VANILLA ? null : new int[][]{
+        { /* S0   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 8 */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F 
+        },
+        { /* S1   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, F, F, F, F, F, F,
+          /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F,
+          /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F 
+        },
+        { /* S2   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 8 */ F, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          /* c */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          /* d */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          /* e */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          /* f */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, F 
+        },
+        { /* S3   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ A, A, A, A, A, A, A, A, A, A, F, F, F, F, F, F,
+          /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F 
+        }
+    };
+
+    public static final GB18030Encoding INSTANCE = new GB18030Encoding();
+}
diff --git a/src/org/joni/encoding/specific/BIG5Encoding.java b/src/org/joni/encoding/specific/GBKEncoding.java
similarity index 58%
copy from src/org/joni/encoding/specific/BIG5Encoding.java
copy to src/org/joni/encoding/specific/GBKEncoding.java
index cf7a832..4c3bf1f 100644
--- a/src/org/joni/encoding/specific/BIG5Encoding.java
+++ b/src/org/joni/encoding/specific/GBKEncoding.java
@@ -1,5 +1,5 @@
 /*
- * Permission is hereby granted, free of charge, to any person obtaining a copy of 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
  * this software and associated documentation files (the "Software"), to deal in 
  * the Software without restriction, including without limitation the rights to 
  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
@@ -19,35 +19,30 @@
  */
 package org.joni.encoding.specific;
 
+import org.joni.Config;
 import org.joni.IntHolder;
-import org.joni.encoding.MultiByteEncoding;
+import org.joni.encoding.CanBeTrailTableEncoding;
 
-public final class BIG5Encoding extends MultiByteEncoding  {
+public final class GBKEncoding extends CanBeTrailTableEncoding {
 
-    protected BIG5Encoding() {
-        super(Big5EncLen, ASCIIEncoding.AsciiCtypeTable);
+    protected GBKEncoding() {
+        super(1, 2, GBKEncLen, GBKTrans, ASCIIEncoding.AsciiCtypeTable, GBK_CAN_BE_TRAIL_TABLE);
     }
-    
+
     @Override
     public String toString() {
-        return "Big5";
-    }
-    
-    @Override
-    public int maxLength() {
-        return 2;
+        return "GBK";
     }
-    
-    @Override
-    public int minLength() {
-        return 1;
-    }
-    
+
     @Override
-    public boolean isFixedWidth() {
-        return false;
+    public int length(byte[]bytes, int p, int end) {
+        if (Config.VANILLA){
+            return length(bytes[p]);
+        } else {
+            return safeLengthForUptoTwo(bytes, p, end);
+        }
     }
-    
+
     @Override
     public int mbcToCode(byte[]bytes, int p, int end) {
         return mbnMbcToCode(bytes, p, end);
@@ -57,7 +52,7 @@ public final class BIG5Encoding extends MultiByteEncoding  {
     public int codeToMbcLength(int code) {
         return mb2CodeToMbcLength(code);
     }
-
+    
     @Override
     public int codeToMbc(int code, byte[]bytes, int p) {
         return mb2CodeToMbc(code, bytes, p);
@@ -67,13 +62,18 @@ public final class BIG5Encoding extends MultiByteEncoding  {
     public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
         return mbnMbcCaseFold(flag, bytes, pp, end, lower);
     }
-    
+
     @Override
     public boolean isCodeCType(int code, int ctype) {
         return mb2IsCodeCType(code, ctype);
     }
     
-    static final boolean BIG5_CAN_BE_TRAIL_TABLE[] = {
+    @Override
+    public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
+        return null;
+    }
+
+    static final boolean GBK_CAN_BE_TRAIL_TABLE[] = {
         false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
         false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
         false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
@@ -91,49 +91,8 @@ public final class BIG5Encoding extends MultiByteEncoding  {
         true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
         true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false
     };
-    
-    private static boolean isBig5MbFirst(int b) { 
-        return Big5EncLen[b] > 1;
-    }
-    
-    private static boolean isBig5MbTrail(int b) {
-        return BIG5_CAN_BE_TRAIL_TABLE[b];
-    }
-    
-    @Override
-    public int leftAdjustCharHead(byte[]bytes, int p, int end) {
-        if (end <= p) return end;
-        
-        int p_ = end;
-        
-        if (isBig5MbTrail(bytes[p_] & 0xff)) {
-            while (p_ > p) {
-                if (!isBig5MbFirst(bytes[--p_] & 0xff)) {
-                    p_++;
-                    break;
-                }
-            }
-        }
-        int len = length(bytes[p_]);
-        if (p_ + len > end) return p_;
-        p_ += len;
-        return p_ + ((end - p_) & ~1);
-    }    
-    
-    @Override
-    public int[]ctypeCodeRange(int ctype, IntHolder sbOut) {
-        return null;
-    }
-    
-    @Override    
-    public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
-        int c = bytes[p] & 0xff;
-        return isBig5MbTrail(c);
-    }
-    
-    static final int Big5EncLen[] = {
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+    static final int GBKEncLen[] = {
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -147,8 +106,49 @@ public final class BIG5Encoding extends MultiByteEncoding  {
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
     };
+
+    private static final int GBKTrans[][] = Config.VANILLA ? null : new int[][]{
+        { /* S0   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 8 */ A, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F 
+        },
+        { /* S1   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F,
+          /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F 
+          }
+    };
     
-    public static final BIG5Encoding INSTANCE = new BIG5Encoding();
+    public static final CP949Encoding INSTANCE = new CP949Encoding();
 }
diff --git a/src/org/joni/encoding/specific/KOI8Encoding.java b/src/org/joni/encoding/specific/KOI8Encoding.java
index fbcb19a..bd4dd1d 100644
--- a/src/org/joni/encoding/specific/KOI8Encoding.java
+++ b/src/org/joni/encoding/specific/KOI8Encoding.java
@@ -25,7 +25,7 @@ import org.joni.encoding.CaseFoldMapEncoding;
 final public class KOI8Encoding extends CaseFoldMapEncoding  {
 
     protected KOI8Encoding() {
-        super(KOI8_CtypeTable, KOI8_ToLowerCaseTable, KOI8_CaseFoldMap);
+        super(KOI8_CtypeTable, KOI8_ToLowerCaseTable, KOI8_CaseFoldMap, false);
     }
     
     @Override
diff --git a/src/org/joni/encoding/specific/KOI8Encoding.java b/src/org/joni/encoding/specific/KOI8REncoding.java
similarity index 77%
copy from src/org/joni/encoding/specific/KOI8Encoding.java
copy to src/org/joni/encoding/specific/KOI8REncoding.java
index fbcb19a..d870698 100644
--- a/src/org/joni/encoding/specific/KOI8Encoding.java
+++ b/src/org/joni/encoding/specific/KOI8REncoding.java
@@ -22,42 +22,33 @@ package org.joni.encoding.specific;
 import org.joni.IntHolder;
 import org.joni.encoding.CaseFoldMapEncoding;
 
-final public class KOI8Encoding extends CaseFoldMapEncoding  {
+final public class KOI8REncoding extends CaseFoldMapEncoding  {
 
-    protected KOI8Encoding() {
-        super(KOI8_CtypeTable, KOI8_ToLowerCaseTable, KOI8_CaseFoldMap);
+    protected KOI8REncoding() {
+        super(KOI8R_CtypeTable, KOI8R_ToLowerCaseTable, KOI8R_CaseFoldMap, false);
     }
     
     @Override
     public String toString() {
-        return "KOI8";
+        return "KOI8-R";
     }
-    
-    private static final int ENC_CASE_FOLD_ASCII_CASE           = 0; 
-    private static final int ONIGENC_CASE_FOLD_NONASCII_CASE    = 0;
-    
+
     @Override
     public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
         int p = pp.value;
         int lowerP = 0;
 
-        if (((flag & ENC_CASE_FOLD_ASCII_CASE) !=0 && isAscii(bytes[p] & 0xff)) ||
-            ((flag & ONIGENC_CASE_FOLD_NONASCII_CASE) !=0 && !isAscii(bytes[p] & 0xff))) {
-                lower[lowerP] = LowerCaseTable[bytes[p] & 0xff];
-        } else {
-            lower[lowerP] = bytes[p];
-        }
-        
+        lower[lowerP] = LowerCaseTable[bytes[p] & 0xff];
         pp.value++;
-        return 1; /* return byte length of converted char to lower */
+        return 1;
     }
-    
+
     @Override
     public boolean isCodeCType(int code, int ctype) {
         return code < 256 ? isCodeCTypeInternal(code, ctype) : false;
     }
     
-    static final short KOI8_CtypeTable[] = {
+    static final short KOI8R_CtypeTable[] = {
         0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
         0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
         0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
@@ -74,14 +65,14 @@ final public class KOI8Encoding extends CaseFoldMapEncoding  {
         0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
         0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
         0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
-        0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-        0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-        0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-        0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-        0x0284, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+        0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+        0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+        0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0,
+        0x00a0, 0x00a0, 0x00a0, 0x30e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+        0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+        0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+        0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
         0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
         0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
         0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
@@ -92,7 +83,7 @@ final public class KOI8Encoding extends CaseFoldMapEncoding  {
         0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2
     };
     
-    static final byte KOI8_ToLowerCaseTable[] = new byte[]{
+    static final byte KOI8R_ToLowerCaseTable[] = {
         (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
         (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
         (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
@@ -115,7 +106,7 @@ final public class KOI8Encoding extends CaseFoldMapEncoding  {
         (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
         (byte)'\240', (byte)'\241', (byte)'\242', (byte)'\243', (byte)'\244', (byte)'\245', (byte)'\246', (byte)'\247',
         (byte)'\250', (byte)'\251', (byte)'\252', (byte)'\253', (byte)'\254', (byte)'\255', (byte)'\256', (byte)'\257',
-        (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267',
+        (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\243', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267',
         (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\275', (byte)'\276', (byte)'\277',
         (byte)'\300', (byte)'\301', (byte)'\302', (byte)'\303', (byte)'\304', (byte)'\305', (byte)'\306', (byte)'\307',
         (byte)'\310', (byte)'\311', (byte)'\312', (byte)'\313', (byte)'\314', (byte)'\315', (byte)'\316', (byte)'\317',
@@ -127,7 +118,9 @@ final public class KOI8Encoding extends CaseFoldMapEncoding  {
         (byte)'\330', (byte)'\331', (byte)'\332', (byte)'\333', (byte)'\334', (byte)'\335', (byte)'\336', (byte)'\337'
     };
     
-    static final int KOI8_CaseFoldMap[][] = {
+    static final int KOI8R_CaseFoldMap[][] = {
+        { 0xa3, 0xb3 },
+
         { 0xc0, 0xe0 },
         { 0xc1, 0xe1 },
         { 0xc2, 0xe2 },
@@ -160,41 +153,8 @@ final public class KOI8Encoding extends CaseFoldMapEncoding  {
         { 0xdc, 0xfc },
         { 0xdd, 0xfd },
         { 0xde, 0xfe },
-        { 0xdf, 0xff },
-
-        { 0xe0, 0xc0 },
-        { 0xe1, 0xc1 },
-        { 0xe2, 0xc2 },
-        { 0xe3, 0xc3 },
-        { 0xe4, 0xc4 },
-        { 0xe5, 0xc5 },
-        { 0xe6, 0xc6 },
-        { 0xe7, 0xc7 },
-        { 0xe8, 0xc8 },
-        { 0xe9, 0xc9 },
-        { 0xea, 0xca },
-        { 0xeb, 0xcb },
-        { 0xec, 0xcc },
-        { 0xed, 0xcd },
-        { 0xee, 0xce },
-        { 0xef, 0xcf },
-
-        { 0xf0, 0xd0 },
-        { 0xf1, 0xd1 },
-        { 0xf2, 0xd2 },
-        { 0xf3, 0xd3 },
-        { 0xf4, 0xd4 },
-        { 0xf5, 0xd5 },
-        { 0xf6, 0xd6 },
-        { 0xf7, 0xd7 },
-        { 0xf8, 0xd8 },
-        { 0xf9, 0xd9 },
-        { 0xfa, 0xda },
-        { 0xfb, 0xdb },
-        { 0xfc, 0xdc },
-        { 0xfe, 0xde },
-        { 0xff, 0xdf }
+        { 0xdf, 0xff }
     };
     
-    public static final KOI8Encoding INSTANCE = new KOI8Encoding();
+    public static final KOI8REncoding INSTANCE = new KOI8REncoding();
 }
diff --git a/src/org/joni/encoding/specific/KOI8Encoding.java b/src/org/joni/encoding/specific/KOI8UEncoding.java
similarity index 75%
copy from src/org/joni/encoding/specific/KOI8Encoding.java
copy to src/org/joni/encoding/specific/KOI8UEncoding.java
index fbcb19a..40399b3 100644
--- a/src/org/joni/encoding/specific/KOI8Encoding.java
+++ b/src/org/joni/encoding/specific/KOI8UEncoding.java
@@ -22,34 +22,25 @@ package org.joni.encoding.specific;
 import org.joni.IntHolder;
 import org.joni.encoding.CaseFoldMapEncoding;
 
-final public class KOI8Encoding extends CaseFoldMapEncoding  {
+final public class KOI8UEncoding extends CaseFoldMapEncoding  {
 
-    protected KOI8Encoding() {
-        super(KOI8_CtypeTable, KOI8_ToLowerCaseTable, KOI8_CaseFoldMap);
+    protected KOI8UEncoding() {
+        super(KOI8U_CtypeTable, KOI8U_ToLowerCaseTable, KOI8U_CaseFoldMap, false);
     }
     
     @Override
     public String toString() {
-        return "KOI8";
+        return "KOI8-U";
     }
-    
-    private static final int ENC_CASE_FOLD_ASCII_CASE           = 0; 
-    private static final int ONIGENC_CASE_FOLD_NONASCII_CASE    = 0;
-    
+
     @Override
     public int mbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
         int p = pp.value;
         int lowerP = 0;
 
-        if (((flag & ENC_CASE_FOLD_ASCII_CASE) !=0 && isAscii(bytes[p] & 0xff)) ||
-            ((flag & ONIGENC_CASE_FOLD_NONASCII_CASE) !=0 && !isAscii(bytes[p] & 0xff))) {
-                lower[lowerP] = LowerCaseTable[bytes[p] & 0xff];
-        } else {
-            lower[lowerP] = bytes[p];
-        }
-        
+        lower[lowerP] = LowerCaseTable[bytes[p] & 0xff];
         pp.value++;
-        return 1; /* return byte length of converted char to lower */
+        return 1;
     }
     
     @Override
@@ -57,7 +48,7 @@ final public class KOI8Encoding extends CaseFoldMapEncoding  {
         return code < 256 ? isCodeCTypeInternal(code, ctype) : false;
     }
     
-    static final short KOI8_CtypeTable[] = {
+    static final short KOI8U_CtypeTable[] = {
         0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
         0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
         0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
@@ -74,14 +65,14 @@ final public class KOI8Encoding extends CaseFoldMapEncoding  {
         0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
         0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
         0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
-        0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-        0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-        0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-        0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-        0x0284, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+        0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+        0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+        0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0,
+        0x00a0, 0x00a0, 0x00a0, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2,
+        0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x30e2, 0x00a0, 0x00a0,
+        0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x34a2, 0x00a0, 0x34a2, 0x34a2,
+        0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0, 0x00a0,
         0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
         0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
         0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
@@ -92,7 +83,7 @@ final public class KOI8Encoding extends CaseFoldMapEncoding  {
         0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2
     };
     
-    static final byte KOI8_ToLowerCaseTable[] = new byte[]{
+    static final byte KOI8U_ToLowerCaseTable[] = {
         (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
         (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
         (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
@@ -115,8 +106,8 @@ final public class KOI8Encoding extends CaseFoldMapEncoding  {
         (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
         (byte)'\240', (byte)'\241', (byte)'\242', (byte)'\243', (byte)'\244', (byte)'\245', (byte)'\246', (byte)'\247',
         (byte)'\250', (byte)'\251', (byte)'\252', (byte)'\253', (byte)'\254', (byte)'\255', (byte)'\256', (byte)'\257',
-        (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267',
-        (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\275', (byte)'\276', (byte)'\277',
+        (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\243', (byte)'\244', (byte)'\265', (byte)'\246', (byte)'\247',
+        (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\255', (byte)'\276', (byte)'\277',
         (byte)'\300', (byte)'\301', (byte)'\302', (byte)'\303', (byte)'\304', (byte)'\305', (byte)'\306', (byte)'\307',
         (byte)'\310', (byte)'\311', (byte)'\312', (byte)'\313', (byte)'\314', (byte)'\315', (byte)'\316', (byte)'\317',
         (byte)'\320', (byte)'\321', (byte)'\322', (byte)'\323', (byte)'\324', (byte)'\325', (byte)'\326', (byte)'\327',
@@ -127,7 +118,13 @@ final public class KOI8Encoding extends CaseFoldMapEncoding  {
         (byte)'\330', (byte)'\331', (byte)'\332', (byte)'\333', (byte)'\334', (byte)'\335', (byte)'\336', (byte)'\337'
     };
     
-    static final int KOI8_CaseFoldMap[][] = {
+    static final int KOI8U_CaseFoldMap[][] = {
+        { 0xa3, 0xb3 },
+        { 0xa4, 0xb4 },
+        { 0xa6, 0xb6 },
+        { 0xa7, 0xb7 },
+        { 0xad, 0xbd },
+
         { 0xc0, 0xe0 },
         { 0xc1, 0xe1 },
         { 0xc2, 0xe2 },
@@ -160,41 +157,8 @@ final public class KOI8Encoding extends CaseFoldMapEncoding  {
         { 0xdc, 0xfc },
         { 0xdd, 0xfd },
         { 0xde, 0xfe },
-        { 0xdf, 0xff },
-
-        { 0xe0, 0xc0 },
-        { 0xe1, 0xc1 },
-        { 0xe2, 0xc2 },
-        { 0xe3, 0xc3 },
-        { 0xe4, 0xc4 },
-        { 0xe5, 0xc5 },
-        { 0xe6, 0xc6 },
-        { 0xe7, 0xc7 },
-        { 0xe8, 0xc8 },
-        { 0xe9, 0xc9 },
-        { 0xea, 0xca },
-        { 0xeb, 0xcb },
-        { 0xec, 0xcc },
-        { 0xed, 0xcd },
-        { 0xee, 0xce },
-        { 0xef, 0xcf },
-
-        { 0xf0, 0xd0 },
-        { 0xf1, 0xd1 },
-        { 0xf2, 0xd2 },
-        { 0xf3, 0xd3 },
-        { 0xf4, 0xd4 },
-        { 0xf5, 0xd5 },
-        { 0xf6, 0xd6 },
-        { 0xf7, 0xd7 },
-        { 0xf8, 0xd8 },
-        { 0xf9, 0xd9 },
-        { 0xfa, 0xda },
-        { 0xfb, 0xdb },
-        { 0xfc, 0xdc },
-        { 0xfe, 0xde },
-        { 0xff, 0xdf }
+        { 0xdf, 0xff }      
     };
-    
-    public static final KOI8Encoding INSTANCE = new KOI8Encoding();
+
+    public static final KOI8UEncoding INSTANCE = new KOI8UEncoding();
 }
diff --git a/src/org/joni/encoding/specific/SJISEncoding.java b/src/org/joni/encoding/specific/SJISEncoding.java
index bd1adb9..b22c3fb 100644
--- a/src/org/joni/encoding/specific/SJISEncoding.java
+++ b/src/org/joni/encoding/specific/SJISEncoding.java
@@ -20,39 +20,34 @@
 package org.joni.encoding.specific;
 
 import org.joni.CodeRangeBuffer;
+import org.joni.Config;
 import org.joni.IntHolder;
 import org.joni.constants.CharacterType;
-import org.joni.encoding.MultiByteEncoding;
+import org.joni.encoding.CanBeTrailTableEncoding;
 import org.joni.exception.ErrorMessages;
 import org.joni.exception.InternalException;
 import org.joni.util.BytesHash;
 
-public final class SJISEncoding extends MultiByteEncoding  {
+public final class SJISEncoding extends CanBeTrailTableEncoding  {
 
     protected SJISEncoding() {
-        super(SjisEncLen, ASCIIEncoding.AsciiCtypeTable);
+        super(1, 2, SjisEncLen, SjisTrans, ASCIIEncoding.AsciiCtypeTable, SJIS_CAN_BE_TRAIL_TABLE);
     }
-    
+
     @Override
     public String toString() {
         return "Shift_JIS";
     }
-    
-    @Override
-    public int maxLength() {
-        return 2;
-    }
-    
-    @Override
-    public int minLength() {
-        return 1;
-    }
-    
+
     @Override
-    public boolean isFixedWidth() {
-        return false;
+    public int length(byte[]bytes, int p, int end) {
+        if (Config.VANILLA) {
+            return length(bytes[p]);
+        } else {
+            return safeLengthForUptoTwo(bytes, p, end);
+        }
     }
-    
+
     @Override
     public int mbcToCode(byte[]bytes, int p, int end) {
         return mbnMbcToCode(bytes, p, end);
@@ -82,59 +77,6 @@ public final class SJISEncoding extends MultiByteEncoding  {
         return mbnMbcCaseFold(flag, bytes, pp, end, lower);
     }
     
-    static final boolean SJIS_CAN_BE_TRAIL_TABLE[] = {
-        false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
-        false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
-        false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
-        false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
-        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
-        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
-        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
-        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false,
-        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
-        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
-        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
-        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
-        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
-        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
-        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
-        true, true, true, true, true, true, true, true, true, true, true, true, true, false, false, false
-    };
-    
-    private static boolean isSjisMbFirst(int b) { 
-        return SjisEncLen[b] > 1;
-    }
-    
-    private static boolean isSjisMbTrail(int b) {
-        return SJIS_CAN_BE_TRAIL_TABLE[b];
-    }
-    
-    @Override
-    public int leftAdjustCharHead(byte[]bytes, int p, int end) {
-        if (end <= p) return end;
-        
-        int p_ = end;
-        
-        if (isSjisMbTrail(bytes[p_] & 0xff)) {
-            while (p_ > p) {
-                if (!isSjisMbFirst(bytes[--p_] & 0xff)) {
-                    p_++;
-                    break;
-                }
-            }
-        }
-        int len = length(bytes[p_]);
-        if (p_ + len > end) return p_;
-        p_ += len;
-        return p_ + ((end - p_) & ~1);
-    }    
-    
-    @Override    
-    public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
-        int c = bytes[p] & 0xff;
-        return isSjisMbTrail(c);
-    }
-    
     private static final int CR_Hiragana[] = {
         1,
         0x829f, 0x82f1
@@ -178,7 +120,11 @@ public final class SJISEncoding extends MultiByteEncoding  {
                 return isCodeCTypeInternal(code, ctype);
             } else {
                 if (isWordGraphPrint(ctype)) {
-                    return codeToMbcLength(code) > 1;
+                    if (Config.VANILLA) {
+                        return codeToMbcLength(code) > 1;
+                    } else {
+                        return true;
+                    }
                 }
             }
         } else {
@@ -201,6 +147,25 @@ public final class SJISEncoding extends MultiByteEncoding  {
             return PropertyList[ctype];
         }
     }    
+
+    static final boolean SJIS_CAN_BE_TRAIL_TABLE[] = {
+        false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+        false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+        false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+        false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false,
+        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+        true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+        true, true, true, true, true, true, true, true, true, true, true, true, true, false, false, false
+    };
     
     static final int SjisEncLen[] = {
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -221,5 +186,44 @@ public final class SJISEncoding extends MultiByteEncoding  {
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
     };
     
+    private static final int SjisTrans[][] = Config.VANILLA ? null : new int[][]{
+        { /* S0   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 8 */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F, F, F
+        },
+        { /* S1   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F,
+          /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, F, F, F
+        }
+    };
+    
     public static final SJISEncoding INSTANCE = new SJISEncoding();
 }
diff --git a/src/org/joni/encoding/specific/UTF16BEEncoding.java b/src/org/joni/encoding/specific/UTF16BEEncoding.java
index 951e5a8..71e2f97 100644
--- a/src/org/joni/encoding/specific/UTF16BEEncoding.java
+++ b/src/org/joni/encoding/specific/UTF16BEEncoding.java
@@ -22,11 +22,12 @@ package org.joni.encoding.specific;
 import org.joni.Config;
 import org.joni.IntHolder;
 import org.joni.encoding.unicode.UnicodeEncoding;
+import org.joni.exception.IllegalCharacterException;
 
 public final class UTF16BEEncoding extends UnicodeEncoding {
 
     protected UTF16BEEncoding() {
-        super(UTF16EncLen);
+        super(2, 4, UTF16EncLen);
     }
     
     @Override
@@ -35,20 +36,26 @@ public final class UTF16BEEncoding extends UnicodeEncoding {
     }
     
     @Override
-    public int maxLength() {
-        return 4;
-    }
-    
-    @Override
-    public int minLength() {
-        return 2;
-    }
-    
-    @Override
-    public boolean isFixedWidth() {
-        return false;
+    public int length(byte[]bytes, int p, int end) {
+        if (Config.VANILLA) {
+            return length(bytes[p]);
+        } else {
+            int b = bytes[p] & 0xff;
+            if (!isSurrogate(b)) {
+                return end - p >= 2 ? 2 : 1;
+            }
+            if (isSurrogateFirst(b)) {
+                switch (end - p) {   
+                case 1:     return -3;
+                case 2:     return -2;
+                case 3:     if (isSurrogateSecond(bytes[2] & 0xff)) return -1;
+                default:    if (isSurrogateSecond(bytes[2] & 0xff)) return -4;
+                }
+            }
+        }
+        throw IllegalCharacterException.INSTANCE;
     }
-    
+
     @Override
     public boolean isNewLine(byte[]bytes, int p, int end) {
         if (p + 1 < end) {
@@ -63,11 +70,7 @@ public final class UTF16BEEncoding extends UnicodeEncoding {
         }
         return false;
     }
-    
-    private static boolean isSurrogateFirst(int c) {
-        return c >= 0xd8 && c <= 0xdb;
-    }
-    
+
     @Override
     public int mbcToCode(byte[]bytes, int p, int end) {
         int code;
@@ -92,12 +95,21 @@ public final class UTF16BEEncoding extends UnicodeEncoding {
     public int codeToMbc(int code, byte[]bytes, int p) {    
         int p_ = p;
         if (code > 0xffff) {
-            int plane = code >>> 16;
-            bytes[p_++] = (byte)((plane >>> 2) + 0xd8);
-            int high = (code & 0xff00) >>> 8;
-            bytes[p_++] = (byte)(((plane & 0x03) << 6) + (high >>> 2));
-            bytes[p_++] = (byte)((high & 0x02) + 0xdc);
-            bytes[p_]   = (byte)(code & 0xff);
+            if (Config.VANILLA) {
+                int plane = code >>> 16;
+                bytes[p_++] = (byte)((plane >>> 2) + 0xd8);
+                int high = (code & 0xff00) >>> 8;
+                bytes[p_++] = (byte)(((plane & 0x03) << 6) + (high >>> 2));
+                bytes[p_++] = (byte)((high & 0x02) + 0xdc);
+                bytes[p_]   = (byte)(code & 0xff);
+            } else {
+                int high = (code >>> 10) + 0xd7c0;
+                int low = (code & 0x3ff) + 0xdc00;
+                bytes[p_++] = (byte)((high >>> 8) & 0xff);
+                bytes[p_++] = (byte)(high & 0xff);
+                bytes[p_++] = (byte)((low >>> 8) & 0xff);
+                bytes[p_]   = (byte)(low & 0xff);
+            }
             return 4;
         } else {
             bytes[p_++] = (byte)((code & 0xff00) >>> 8);
@@ -142,10 +154,6 @@ public final class UTF16BEEncoding extends UnicodeEncoding {
         return super.ctypeCodeRange(ctype);
     }
     
-    private static boolean isSurrogateSecond(int c) {
-        return c >= 0xdc && c <= 0xdf;
-    }    
-    
     @Override
     public int leftAdjustCharHead(byte[]bytes, int p, int end) {
         if (end <= p) return end;
@@ -180,6 +188,31 @@ public final class UTF16BEEncoding extends UnicodeEncoding {
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
     };
+
+    private static boolean isSurrogateFirst(int c) {
+        if (Config.VANILLA) {
+            return c >= 0xd8 && c <= 0xdb;
+        } else {
+            return (c & 0xfc) == 0xd8;
+        }
+    }
+
+    private static boolean isSurrogateSecond(int c) {
+        if (Config.VANILLA) {
+            return c >= 0xdc && c <= 0xdf;
+        } else {
+            return (c & 0xfc) == 0xdc;
+        }
+    }    
+
+    private static boolean isSurrogate(int c) {
+        if (Config.VANILLA) {
+            return (c & 0xf8) == 0;
+        } else {
+            return (c & 0xf8) == 0xd8;
+        }
+        
+    }
     
     public static final UTF16BEEncoding INSTANCE = new UTF16BEEncoding();
 }
diff --git a/src/org/joni/encoding/specific/UTF16LEEncoding.java b/src/org/joni/encoding/specific/UTF16LEEncoding.java
index d369802..7781eab 100644
--- a/src/org/joni/encoding/specific/UTF16LEEncoding.java
+++ b/src/org/joni/encoding/specific/UTF16LEEncoding.java
@@ -22,38 +22,43 @@ package org.joni.encoding.specific;
 import org.joni.Config;
 import org.joni.IntHolder;
 import org.joni.encoding.unicode.UnicodeEncoding;
+import org.joni.exception.IllegalCharacterException;
 
 public final class UTF16LEEncoding extends UnicodeEncoding {
 
     protected UTF16LEEncoding() {
-        super(UTF16BEEncoding.UTF16EncLen);
+        super(2, 4, UTF16BEEncoding.UTF16EncLen);
     }
 
     @Override
     public int length(byte c) { 
         return EncLen[(c & 0xff) + 1];       
-    }    
-    
-    @Override
-    public String toString() {
-        return "UTF-16LE";
     }
     
     @Override
-    public int maxLength() {
-        return 4;
-    }
-    
-    @Override
-    public int minLength() {
-        return 2;
+    public int length(byte[]bytes, int p, int end) {
+        if (Config.VANILLA) {
+            return length(bytes[p]);
+        } else {
+            int length = end - p;
+            if (length < 2) return -1;
+
+            int b = bytes[p] & 0xff;
+            if (!isSurrogate(b)) return 2;
+
+            if (isSurrogateFirst(b)) {
+                if (length < 4) return -(4 - length);
+                if (isSurrogateSecond(bytes[p + 3] & 0xff)) return 4;
+            }
+            throw IllegalCharacterException.INSTANCE;
+        }
     }
-    
+
     @Override
-    public boolean isFixedWidth() {
-        return false;
+    public String toString() {
+        return "UTF-16LE";
     }
-    
+
     @Override
     public boolean isNewLine(byte[]bytes, int p, int end) {
         if (p + 1 < end) {
@@ -69,10 +74,6 @@ public final class UTF16LEEncoding extends UnicodeEncoding {
         return false;
     }
     
-    private static boolean isSurrogateFirst(int c) {
-        return c >= 0xd8 && c <= 0xdb;
-    }
-    
     @Override
     public int mbcToCode(byte[]bytes, int p, int end) {
         int code;
@@ -97,12 +98,21 @@ public final class UTF16LEEncoding extends UnicodeEncoding {
     public int codeToMbc(int code, byte[]bytes, int p) {    
         int p_ = p;
         if (code > 0xffff) {
-            int plane = code >>> 16;
-            int high = (code & 0xff00) >>> 8;
-            bytes[p_++] = (byte)(((plane & 0x03) << 6) + (high >>> 2));
-            bytes[p_++] = (byte)((plane >>> 2) + 0xd8);
-            bytes[p_++] = (byte)(code & 0xff);
-            bytes[p_  ] = (byte)((high & 0x02) + 0xdc);
+            if (Config.VANILLA) {
+                int plane = code >>> 16;
+                int high = (code & 0xff00) >>> 8;
+                bytes[p_++] = (byte)(((plane & 0x03) << 6) + (high >>> 2));
+                bytes[p_++] = (byte)((plane >>> 2) + 0xd8);
+                bytes[p_++] = (byte)(code & 0xff);
+                bytes[p_  ] = (byte)((high & 0x02) + 0xdc);
+            } else {
+                int high = (code >>> 10) + 0xd7c0;
+                int low = (code & 0x3ff) + 0xdc00;
+                bytes[p_++] = (byte)(high & 0xff);
+                bytes[p_++] = (byte)((high >>> 8) & 0xff);
+                bytes[p_++] = (byte)(low & 0xff);
+                bytes[p_]   = (byte)((low >>> 8) & 0xff);
+            }
             return 4;
         } else {
             bytes[p_++] = (byte)(code & 0xff);
@@ -146,10 +156,6 @@ public final class UTF16LEEncoding extends UnicodeEncoding {
         return super.ctypeCodeRange(ctype);
     }
     
-    private static boolean isSurrogateSecond(int c) {
-        return c >= 0xdc && c <= 0xdf;
-    }    
-    
     @Override
     public int leftAdjustCharHead(byte[]bytes, int p, int end) {
         if (end <= p) return end;
@@ -165,6 +171,31 @@ public final class UTF16LEEncoding extends UnicodeEncoding {
     public boolean isReverseMatchAllowed(byte[]bytes, int p, int end) {
         return false;
     }
+
+    private static boolean isSurrogateFirst(int c) {
+        if (Config.VANILLA) {
+            return c >= 0xd8 && c <= 0xdb;
+        } else {
+            return (c & 0xfc) == 0xd8;
+        }
+    }
+
+    private static boolean isSurrogateSecond(int c) {
+        if (Config.VANILLA) {
+            return c >= 0xdc && c <= 0xdf;
+        } else {
+            return (c & 0xfc) == 0xdc;
+        }
+    }    
+
+    private static boolean isSurrogate(int c) {
+        if (Config.VANILLA) {
+            return (c & 0xf8) == 0;
+        } else {
+            return (c & 0xf8) == 0xd8;
+        }
+        
+    }
     
     public static final UTF16LEEncoding INSTANCE = new UTF16LEEncoding();
 }
diff --git a/src/org/joni/encoding/specific/UTF32BEEncoding.java b/src/org/joni/encoding/specific/UTF32BEEncoding.java
index 2907fca..4670689 100644
--- a/src/org/joni/encoding/specific/UTF32BEEncoding.java
+++ b/src/org/joni/encoding/specific/UTF32BEEncoding.java
@@ -26,39 +26,29 @@ import org.joni.encoding.unicode.UnicodeEncoding;
 public final class UTF32BEEncoding extends UnicodeEncoding {
 
     protected UTF32BEEncoding() {
-        super(null);
+        super(4, 4, null);
     }
 
     @Override
-    public int length(byte c) { 
-        return 4;       
-    }
-
-    @Override
-    public int strLength(byte[]bytes, int p, int end) {
-        return (end - p) >>> 2;
-    }
-    
-    @Override
     public String toString() {
         return "UTF-32BE";
     }
-    
+
     @Override
-    public int maxLength() {
-        return 4;
+    public int length(byte c) { 
+        return 4;       
     }
-    
+
     @Override
-    public int minLength() {
+    public int length(byte[]bytes, int p, int end) { 
         return 4;
     }
 
     @Override
-    public boolean isFixedWidth() {
-        return true;
+    public int strLength(byte[]bytes, int p, int end) {
+        return (end - p) >>> 2;
     }
-    
+
     @Override
     public boolean isNewLine(byte[]bytes, int p, int end) {
         if (p + 3 < end) {
diff --git a/src/org/joni/encoding/specific/UTF32LEEncoding.java b/src/org/joni/encoding/specific/UTF32LEEncoding.java
index 66f4ee7..7aca470 100644
--- a/src/org/joni/encoding/specific/UTF32LEEncoding.java
+++ b/src/org/joni/encoding/specific/UTF32LEEncoding.java
@@ -26,39 +26,29 @@ import org.joni.encoding.unicode.UnicodeEncoding;
 public final class UTF32LEEncoding extends UnicodeEncoding {
 
     protected UTF32LEEncoding() {
-        super(null);
+        super(4, 4, null);
     }
-
-    @Override
-    public int length(byte c) { 
-        return 4;       
-    }
-
-    @Override
-    public int strLength(byte[]bytes, int p, int end) {
-        return (end - p) >>> 2;
-    }
-
+    
     @Override
     public String toString() {
         return "UTF-32LE";
     }
-    
+
     @Override
-    public int maxLength() {
-        return 4;
+    public int length(byte c) { 
+        return 4;       
     }
-    
+
     @Override
-    public int minLength() {
+    public int length(byte[]bytes, int p, int end) { 
         return 4;
     }
-    
+
     @Override
-    public boolean isFixedWidth() {
-        return true;
+    public int strLength(byte[]bytes, int p, int end) {
+        return (end - p) >>> 2;
     }
-    
+
     @Override
     public boolean isNewLine(byte[]bytes, int p, int end) {
         if (p + 3 < end) {
diff --git a/src/org/joni/encoding/specific/UTF8Encoding.java b/src/org/joni/encoding/specific/UTF8Encoding.java
index ac1fd2a..6771855 100644
--- a/src/org/joni/encoding/specific/UTF8Encoding.java
+++ b/src/org/joni/encoding/specific/UTF8Encoding.java
@@ -29,29 +29,23 @@ public final class UTF8Encoding extends UnicodeEncoding {
     static final boolean USE_INVALID_CODE_SCHEME = true; 
 
     protected UTF8Encoding() {
-        super(UTF8EncLen);
+        super(1, 6, UTF8EncLen, UTF8Trans);
     }
     
     @Override
     public String toString() {
         return "UTF-8";
     }
-    
-    @Override
-    public int maxLength() {
-        return 6;
-    }
-    
+
     @Override
-    public int minLength() {
-        return 1;
+    public int length(byte[]bytes, int p, int end) {
+        if (Config.VANILLA) {
+            return length(bytes[p]);
+        } else {
+            return safeLengthForUptoFour(bytes, p, end);
+        }
     }
-    
-    @Override
-    public boolean isFixedWidth() {
-        return false;
-    }   
-    
+
     @Override
     public boolean isNewLine(byte[]bytes, int p, int end) {
         if (p < end) {
@@ -102,7 +96,7 @@ public final class UTF8Encoding extends UnicodeEncoding {
     
     @Override
     public int mbcToCode(byte[]bytes, int p, int end) {
-        int len = length(bytes[p]);
+        int len = length(bytes, p, end);
         
         int c = bytes[p++] & 0xff;
         
@@ -227,7 +221,8 @@ public final class UTF8Encoding extends UnicodeEncoding {
         return true;
     }
     
-    static final int UTF8EncLen[] = {
+    private static final int UTF8EncLen[] = Config.VANILLA ? 
+        new int[]{
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -244,7 +239,171 @@ public final class UTF8Encoding extends UnicodeEncoding {
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
         4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
-    };  
+    } : new int[] {
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+        4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+    };
+
+    private static final int UTF8Trans[][] = Config.VANILLA ? null : new int[][]{
+        { /* S0   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* c */ F, F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* e */ 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3,
+          /* f */ 5, 6, 6, 6, 7, F, F, F, F, F, F, F, F, F, F, F 
+        },
+        { /* S1   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+          /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F 
+        },
+        { /* S2   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F 
+        },
+        { /* S3   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 8 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F 
+        },
+        { /* S4   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 8 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F 
+        },
+        { /* S5   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F 
+        },
+        { /* S6   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 8 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F 
+        },
+        { /* S7   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+          /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* 8 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+          /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F 
+        }
+    };
 
-    public static final UTF8Encoding INSTANCE = new UTF8Encoding();    
+    public static final UTF8Encoding INSTANCE = new UTF8Encoding();
 }
diff --git a/src/org/joni/encoding/unicode/UnicodeEncoding.java b/src/org/joni/encoding/unicode/UnicodeEncoding.java
index fee01f6..095488b 100644
--- a/src/org/joni/encoding/unicode/UnicodeEncoding.java
+++ b/src/org/joni/encoding/unicode/UnicodeEncoding.java
@@ -32,12 +32,17 @@ import org.joni.exception.ValueException;
 
 public abstract class UnicodeEncoding extends MultiByteEncoding {
     private static final int PROPERTY_NAME_MAX_SIZE = 20;
-    
-    protected UnicodeEncoding(int[]EncLen) {
+
+    protected UnicodeEncoding(int minLength, int maxLength, int[]EncLen) {
+        // ASCII type tables for all Unicode encodings
+        super(minLength, maxLength, EncLen, null, UNICODE_ISO_8859_1_CTypeTable);
+    }
+
+    protected UnicodeEncoding(int minLength, int maxLength, int[]EncLen, int[][]Trans) {
         // ASCII type tables for all Unicode encodings        
-        super(EncLen, UNICODE_ISO_8859_1_CTypeTable);
+        super(minLength, maxLength, EncLen, Trans, UNICODE_ISO_8859_1_CTypeTable);
     }
-    
+
     // onigenc_unicode_is_code_ctype
     @Override
     public final boolean isCodeCType(int code, int ctype) {
@@ -74,7 +79,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
             if (code >= 0x80) throw new ValueException(ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME);
             buf[len++] = (byte)code;
             if (len >= PROPERTY_NAME_MAX_SIZE) throw new ValueException(ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME, name, p, end);
-            p_ += length(name[p_]);
+            p_ += length(name, p_, end);
         }
 
         Integer ctype = UnicodeCTypeNames.CTypeNameHash.get(buf, 0, len);
@@ -89,7 +94,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
         int foldP = 0;
         
         int code = mbcToCode(bytes, p, end);
-        int len = length(bytes[p]);
+        int len = length(bytes, p, end);
         pp.value += len;
         
         if (Config.USE_UNICODE_CASE_FOLD_TURKISH_AZERI) {
@@ -236,8 +241,8 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
     @Override
     public CaseFoldCodeItem[]caseFoldCodesByString(int flag, byte[]bytes, int p, int end) {
         int code = mbcToCode(bytes, p, end);
-        int len = length(bytes[p]);
-        
+        int len = length(bytes, p, end);
+
         if (Config.USE_UNICODE_CASE_FOLD_TURKISH_AZERI) {
             if ((flag & Config.ENC_CASE_FOLD_TURKISH_AZERI) != 0) {
                 if (code == 0x0049) {
@@ -356,7 +361,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
                     codes[1] = code;
                 }
                     
-                int clen = length(bytes[p]);
+                int clen = length(bytes, p, end);
                 len += clen;
                 int[]z2 = UnicodeCaseFolds.Unfold2Hash.get(codes);
                 if (z2 != null) {
@@ -374,7 +379,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
                     } else {
                         codes[2] = code;
                     }
-                    clen = length(bytes[p]);
+                    clen = length(bytes, p, end);
                     len += clen;
                     z2 = UnicodeCaseFolds.Unfold3Hash.get(codes);
                     if (z2 != null) {
diff --git a/src/org/joni/exception/ErrorMessages.java b/src/org/joni/exception/ErrorMessages.java
index 9b6b42f..fb67447 100644
--- a/src/org/joni/exception/ErrorMessages.java
+++ b/src/org/joni/exception/ErrorMessages.java
@@ -95,4 +95,6 @@ public interface ErrorMessages {
 
     final String ERR_ENCODING_CLASS_DEF_NOT_FOUND = "encoding class <%n> not found";
     final String ERR_ENCODING_LOAD_ERROR = "problem loading encoding <%n>";
+    
+    final String ERR_ILLEGAL_CHARACTER = "illegal character";
 }
diff --git a/src/org/joni/encoding/EucEncoding.java b/src/org/joni/exception/IllegalCharacterException.java
similarity index 60%
copy from src/org/joni/encoding/EucEncoding.java
copy to src/org/joni/exception/IllegalCharacterException.java
index 2cb26cd..bffefbc 100644
--- a/src/org/joni/encoding/EucEncoding.java
+++ b/src/org/joni/exception/IllegalCharacterException.java
@@ -1,42 +1,27 @@
-/*
- * Permission is hereby granted, free of charge, to any person obtaining a copy of 
- * this software and associated documentation files (the "Software"), to deal in 
- * the Software without restriction, including without limitation the rights to 
- * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
- * of the Software, and to permit persons to whom the Software is furnished to do
- * so, subject to the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
- * SOFTWARE.
- */
-package org.joni.encoding;
-
-public abstract class EucEncoding extends MultiByteEncoding {
-    
-    protected EucEncoding(int[]EncLen, short[]CTypeTable) {
-        super(EncLen, CTypeTable);
-    }
-    
-    protected abstract boolean isLead(int c);
-    
-    @Override
-    public int leftAdjustCharHead(byte[]bytes, int p, int end) {
-        /* In this encoding mb-trail bytes doesn't mix with single bytes. */
-        if (end <= p) return end;
-        int p_ = end;
-        while (!isLead(bytes[p_] & 0xff) && p_ > p) p_--;
-        int len = length(bytes[p_]);
-        if (p_ + len > end) return p_;
-        
-        p_ += len;
-        return p_ + ((end - p_) & ~1);
-    }    
-}
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of 
+ * this software and associated documentation files (the "Software"), to deal in 
+ * the Software without restriction, including without limitation the rights to 
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
+ * SOFTWARE.
+ */
+package org.joni.exception;
+
+public class IllegalCharacterException extends JOniException {
+    private IllegalCharacterException() {
+        super(ErrorMessages.ERR_ILLEGAL_CHARACTER);
+    }
+    public static final IllegalCharacterException INSTANCE = new IllegalCharacterException();
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jruby-joni.git



More information about the pkg-java-commits mailing list