[jruby-joni] 102/194: fix ignore case

Hideki Yamane henrich at moszumanska.debian.org
Thu Feb 1 12:04:31 UTC 2018


This is an automated email from the git hooks/post-receive script.

henrich pushed a commit to branch debian/sid
in repository jruby-joni.

commit f861a46f14c5e243716a03fe596078d58ea2d0ab
Author: Marcin Mielzynski <lopx at gazeta.pl>
Date:   Thu Jan 4 17:43:19 2018 +0100

    fix ignore case
---
 src/org/joni/Analyser.java     |  3 ++-
 src/org/joni/OptExactInfo.java | 36 ++++++++++++++++++------------------
 src/org/joni/Regex.java        |  2 +-
 test/org/joni/test/TestU8.java | 21 +++++++++++++++++++++
 4 files changed, 42 insertions(+), 20 deletions(-)

diff --git a/src/org/joni/Analyser.java b/src/org/joni/Analyser.java
index 98a4dd2..b267e85 100644
--- a/src/org/joni/Analyser.java
+++ b/src/org/joni/Analyser.java
@@ -2022,6 +2022,7 @@ final class Analyser extends Parser {
 
             if (!sn.isAmbig()) {
                 opt.exb.concatStr(sn.bytes, sn.p, sn.end, sn.isRaw(), enc);
+                opt.exb.ignoreCase = 0;
 
                 if (slen > 0) {
                     opt.map.addChar(sn.bytes[sn.p], enc);
@@ -2035,7 +2036,7 @@ final class Analyser extends Parser {
                     max = enc.maxLengthDistance() * n;
                 } else {
                     opt.exb.concatStr(sn.bytes, sn.p, sn.end, sn.isRaw(), enc);
-                    opt.exb.ignoreCase = true;
+                    opt.exb.ignoreCase = 1;
 
                     if (slen > 0) {
                         opt.map.addCharAmb(sn.bytes, sn.p, sn.end, enc, oenv.caseFoldFlag);
diff --git a/src/org/joni/OptExactInfo.java b/src/org/joni/OptExactInfo.java
index ef9fb78..2ad4f27 100644
--- a/src/org/joni/OptExactInfo.java
+++ b/src/org/joni/OptExactInfo.java
@@ -26,10 +26,8 @@ final class OptExactInfo {
 
     final MinMaxLen mmd = new MinMaxLen();
     final OptAnchorInfo anchor = new OptAnchorInfo();
-
     boolean reachEnd;
-    boolean ignoreCase;
-
+    int ignoreCase; /* -1: unset, 0: case sensitive, 1: ignore case */
     final byte bytes[] = new byte[OPT_EXACT_MAXLEN];
     int length;
 
@@ -40,9 +38,8 @@ final class OptExactInfo {
     void clear() {
         mmd.clear();
         anchor.clear();
-
         reachEnd = false;
-        ignoreCase = false;
+        ignoreCase = -1;
         length = 0;
     }
 
@@ -57,19 +54,20 @@ final class OptExactInfo {
     }
 
     void concat(OptExactInfo other, Encoding enc) {
-        if (!ignoreCase && other.ignoreCase) {
-            if (length >= other.length) return; /* avoid */
-            ignoreCase = true;
+        if (ignoreCase < 0) {
+            ignoreCase = other.ignoreCase;
+        } else if (ignoreCase != other.ignoreCase) {
+            return;
         }
 
         int p = 0; // add->s;
         int end = p + other.length;
 
         int i;
-        for (i=length; p < end;) {
+        for (i = length; p < end;) {
             int len = enc.length(other.bytes, p, end);
             if (i + len > OPT_EXACT_MAXLEN) break;
-            for (int j=0; j<len && p < end; j++) {
+            for (int j = 0; j < len && p < end; j++) {
                 bytes[i++] = other.bytes[p++]; // arraycopy or even don't copy anything ??
             }
         }
@@ -79,21 +77,19 @@ final class OptExactInfo {
 
         OptAnchorInfo tmp = new OptAnchorInfo();
         tmp.concat(anchor, other.anchor, 1, 1);
-        if (!other.reachEnd) tmp.rightAnchor = 0;
+        if (!reachEnd) tmp.rightAnchor = 0;
         anchor.copy(tmp);
     }
 
-    // ?? raw is not used here
     void concatStr(byte[]lbytes, int p, int end, boolean raw, Encoding enc) {
         int i;
         for (i = length; p < end && i < OPT_EXACT_MAXLEN;) {
             int len = enc.length(lbytes, p, end);
             if (i + len > OPT_EXACT_MAXLEN) break;
-            for (int j=0; j<len && p < end; j++) {
+            for (int j = 0; j < len && p < end; j++) {
                 bytes[i++] = lbytes[p++];
             }
         }
-
         length = i;
     }
 
@@ -125,7 +121,11 @@ final class OptExactInfo {
         if (!other.reachEnd || i<other.length || i<length) reachEnd = false;
 
         length = i;
-        ignoreCase |= other.ignoreCase;
+        if (ignoreCase < 0) {
+            ignoreCase = other.ignoreCase;
+        } else if (other.ignoreCase >= 0) {
+            ignoreCase |= other.ignoreCase;
+        }
 
         anchor.altMerge(other.anchor);
 
@@ -151,8 +151,8 @@ final class OptExactInfo {
             if (alt.length > 1) v2 += 5;
         }
 
-        if (!ignoreCase) v1 *= 2;
-        if (!alt.ignoreCase) v2 *= 2;
+        if (ignoreCase <= 0) v1 *= 2;
+        if (alt.ignoreCase <= 0) v2 *= 2;
 
         if (mmd.compareDistanceValue(alt.mmd, v1, v2) > 0) copy(alt);
     }
@@ -162,7 +162,7 @@ final class OptExactInfo {
     int compare(OptMapInfo m) {
         if (m.value <= 0) return -1;
 
-        int ve = COMP_EM_BASE * length * (ignoreCase ? 1 : 2);
+        int ve = COMP_EM_BASE * length * (ignoreCase > 0 ? 1 : 2);
         int vm = COMP_EM_BASE * 5 * 2 / m.value;
 
         return mmd.compareDistanceValue(m.mmd, ve, vm);
diff --git a/src/org/joni/Regex.java b/src/org/joni/Regex.java
index 92ffdc0..eb281d4 100644
--- a/src/org/joni/Regex.java
+++ b/src/org/joni/Regex.java
@@ -304,7 +304,7 @@ public final class Regex {
         exactP = 0;
         exactEnd = e.length;
 
-        if (e.ignoreCase) {
+        if (e.ignoreCase > 0) {
             // encodings won't return toLowerTable for case insensitive search if it's not safe to use it directly
             searchAlgorithm = enc.toLowerCaseTable() != null ? SearchAlgorithm.SLOW_IC_SB : SearchAlgorithm.SLOW_IC;
         } else {
diff --git a/test/org/joni/test/TestU8.java b/test/org/joni/test/TestU8.java
index 9a5bdea..3ec6385 100755
--- a/test/org/joni/test/TestU8.java
+++ b/test/org/joni/test/TestU8.java
@@ -157,6 +157,27 @@ public class TestU8 extends Test {
         x2s("\\A\\X\\X\\z", "\r\u0308", 0, 3);
         x2s("\\A\\X\\X\\z", "\n\u0308", 0, 3);
 
+        x2s("[0-9-a]+", " 0123456789-a ", 1, 13);
+        x2s("[0-9-\\s]+", " 0123456789-a ", 0, 12);
+        x2s("[0-9-あ\\\\/\u0001]+", " 0123456789-あ\\/\u0001 ", 1, 18);
+        x2s("[a-b-]+", "ab-", 0, 3);
+        x2s("[a-b-&&-]+", "ab-", 2, 3);
+        x2s("(?i)[a[b-あ]]+", "abあ", 0, 5);
+        x2s("(?i)[\\d[:^graph:]]+", "0あ", 0, 1);
+        x2s("(?ia)[\\d[:^print:]]+", "0あ", 0, 4);
+
+        x2s("(?i:a) B", "a B", 0, 3);
+        x2s("(?i:a )B", "a B", 0, 3);
+        x2s("B (?i:a)", "B a", 0, 3);
+        x2s("B(?i: a)", "B a", 0, 3);
+
+        x2s("(?a)[\\p{Space}\\d]", "\u00a0", 0, 2);
+        x2s("(?a)[\\d\\p{Space}]", "\u00a0", 0, 2);
+        ns("(?a)[^\\p{Space}\\d]", "\u00a0");
+        ns("(?a)[^\\d\\p{Space}]", "\u00a0");
+        x2s("(?d)[[:space:]\\d]", "\u00a0", 0, 2);
+        ns("(?d)[^\\d[:space:]]", "\u00a0");
+
         x2s("\\p{In_Unified_Canadian_Aboriginal_Syllabics_Extended}+", "\u18B0\u18FF", 0, 6);
         x2s("(?i)\u1ffc", "\u2126\u1fbe", 0, 6);
         x2s("(?i)\u1ffc", "\u1ff3", 0, 3);

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jruby-joni.git



More information about the pkg-java-commits mailing list