[jruby-joni] 102/194: fix ignore case
Hideki Yamane
henrich at moszumanska.debian.org
Thu Feb 1 12:04:31 UTC 2018
This is an automated email from the git hooks/post-receive script.
henrich pushed a commit to branch debian/sid
in repository jruby-joni.
commit f861a46f14c5e243716a03fe596078d58ea2d0ab
Author: Marcin Mielzynski <lopx at gazeta.pl>
Date: Thu Jan 4 17:43:19 2018 +0100
fix ignore case
---
src/org/joni/Analyser.java | 3 ++-
src/org/joni/OptExactInfo.java | 36 ++++++++++++++++++------------------
src/org/joni/Regex.java | 2 +-
test/org/joni/test/TestU8.java | 21 +++++++++++++++++++++
4 files changed, 42 insertions(+), 20 deletions(-)
diff --git a/src/org/joni/Analyser.java b/src/org/joni/Analyser.java
index 98a4dd2..b267e85 100644
--- a/src/org/joni/Analyser.java
+++ b/src/org/joni/Analyser.java
@@ -2022,6 +2022,7 @@ final class Analyser extends Parser {
if (!sn.isAmbig()) {
opt.exb.concatStr(sn.bytes, sn.p, sn.end, sn.isRaw(), enc);
+ opt.exb.ignoreCase = 0;
if (slen > 0) {
opt.map.addChar(sn.bytes[sn.p], enc);
@@ -2035,7 +2036,7 @@ final class Analyser extends Parser {
max = enc.maxLengthDistance() * n;
} else {
opt.exb.concatStr(sn.bytes, sn.p, sn.end, sn.isRaw(), enc);
- opt.exb.ignoreCase = true;
+ opt.exb.ignoreCase = 1;
if (slen > 0) {
opt.map.addCharAmb(sn.bytes, sn.p, sn.end, enc, oenv.caseFoldFlag);
diff --git a/src/org/joni/OptExactInfo.java b/src/org/joni/OptExactInfo.java
index ef9fb78..2ad4f27 100644
--- a/src/org/joni/OptExactInfo.java
+++ b/src/org/joni/OptExactInfo.java
@@ -26,10 +26,8 @@ final class OptExactInfo {
final MinMaxLen mmd = new MinMaxLen();
final OptAnchorInfo anchor = new OptAnchorInfo();
-
boolean reachEnd;
- boolean ignoreCase;
-
+ int ignoreCase; /* -1: unset, 0: case sensitive, 1: ignore case */
final byte bytes[] = new byte[OPT_EXACT_MAXLEN];
int length;
@@ -40,9 +38,8 @@ final class OptExactInfo {
void clear() {
mmd.clear();
anchor.clear();
-
reachEnd = false;
- ignoreCase = false;
+ ignoreCase = -1;
length = 0;
}
@@ -57,19 +54,20 @@ final class OptExactInfo {
}
void concat(OptExactInfo other, Encoding enc) {
- if (!ignoreCase && other.ignoreCase) {
- if (length >= other.length) return; /* avoid */
- ignoreCase = true;
+ if (ignoreCase < 0) {
+ ignoreCase = other.ignoreCase;
+ } else if (ignoreCase != other.ignoreCase) {
+ return;
}
int p = 0; // add->s;
int end = p + other.length;
int i;
- for (i=length; p < end;) {
+ for (i = length; p < end;) {
int len = enc.length(other.bytes, p, end);
if (i + len > OPT_EXACT_MAXLEN) break;
- for (int j=0; j<len && p < end; j++) {
+ for (int j = 0; j < len && p < end; j++) {
bytes[i++] = other.bytes[p++]; // arraycopy or even don't copy anything ??
}
}
@@ -79,21 +77,19 @@ final class OptExactInfo {
OptAnchorInfo tmp = new OptAnchorInfo();
tmp.concat(anchor, other.anchor, 1, 1);
- if (!other.reachEnd) tmp.rightAnchor = 0;
+ if (!reachEnd) tmp.rightAnchor = 0;
anchor.copy(tmp);
}
- // ?? raw is not used here
void concatStr(byte[]lbytes, int p, int end, boolean raw, Encoding enc) {
int i;
for (i = length; p < end && i < OPT_EXACT_MAXLEN;) {
int len = enc.length(lbytes, p, end);
if (i + len > OPT_EXACT_MAXLEN) break;
- for (int j=0; j<len && p < end; j++) {
+ for (int j = 0; j < len && p < end; j++) {
bytes[i++] = lbytes[p++];
}
}
-
length = i;
}
@@ -125,7 +121,11 @@ final class OptExactInfo {
if (!other.reachEnd || i<other.length || i<length) reachEnd = false;
length = i;
- ignoreCase |= other.ignoreCase;
+ if (ignoreCase < 0) {
+ ignoreCase = other.ignoreCase;
+ } else if (other.ignoreCase >= 0) {
+ ignoreCase |= other.ignoreCase;
+ }
anchor.altMerge(other.anchor);
@@ -151,8 +151,8 @@ final class OptExactInfo {
if (alt.length > 1) v2 += 5;
}
- if (!ignoreCase) v1 *= 2;
- if (!alt.ignoreCase) v2 *= 2;
+ if (ignoreCase <= 0) v1 *= 2;
+ if (alt.ignoreCase <= 0) v2 *= 2;
if (mmd.compareDistanceValue(alt.mmd, v1, v2) > 0) copy(alt);
}
@@ -162,7 +162,7 @@ final class OptExactInfo {
int compare(OptMapInfo m) {
if (m.value <= 0) return -1;
- int ve = COMP_EM_BASE * length * (ignoreCase ? 1 : 2);
+ int ve = COMP_EM_BASE * length * (ignoreCase > 0 ? 1 : 2);
int vm = COMP_EM_BASE * 5 * 2 / m.value;
return mmd.compareDistanceValue(m.mmd, ve, vm);
diff --git a/src/org/joni/Regex.java b/src/org/joni/Regex.java
index 92ffdc0..eb281d4 100644
--- a/src/org/joni/Regex.java
+++ b/src/org/joni/Regex.java
@@ -304,7 +304,7 @@ public final class Regex {
exactP = 0;
exactEnd = e.length;
- if (e.ignoreCase) {
+ if (e.ignoreCase > 0) {
// encodings won't return toLowerTable for case insensitive search if it's not safe to use it directly
searchAlgorithm = enc.toLowerCaseTable() != null ? SearchAlgorithm.SLOW_IC_SB : SearchAlgorithm.SLOW_IC;
} else {
diff --git a/test/org/joni/test/TestU8.java b/test/org/joni/test/TestU8.java
index 9a5bdea..3ec6385 100755
--- a/test/org/joni/test/TestU8.java
+++ b/test/org/joni/test/TestU8.java
@@ -157,6 +157,27 @@ public class TestU8 extends Test {
x2s("\\A\\X\\X\\z", "\r\u0308", 0, 3);
x2s("\\A\\X\\X\\z", "\n\u0308", 0, 3);
+ x2s("[0-9-a]+", " 0123456789-a ", 1, 13);
+ x2s("[0-9-\\s]+", " 0123456789-a ", 0, 12);
+ x2s("[0-9-あ\\\\/\u0001]+", " 0123456789-あ\\/\u0001 ", 1, 18);
+ x2s("[a-b-]+", "ab-", 0, 3);
+ x2s("[a-b-&&-]+", "ab-", 2, 3);
+ x2s("(?i)[a[b-あ]]+", "abあ", 0, 5);
+ x2s("(?i)[\\d[:^graph:]]+", "0あ", 0, 1);
+ x2s("(?ia)[\\d[:^print:]]+", "0あ", 0, 4);
+
+ x2s("(?i:a) B", "a B", 0, 3);
+ x2s("(?i:a )B", "a B", 0, 3);
+ x2s("B (?i:a)", "B a", 0, 3);
+ x2s("B(?i: a)", "B a", 0, 3);
+
+ x2s("(?a)[\\p{Space}\\d]", "\u00a0", 0, 2);
+ x2s("(?a)[\\d\\p{Space}]", "\u00a0", 0, 2);
+ ns("(?a)[^\\p{Space}\\d]", "\u00a0");
+ ns("(?a)[^\\d\\p{Space}]", "\u00a0");
+ x2s("(?d)[[:space:]\\d]", "\u00a0", 0, 2);
+ ns("(?d)[^\\d[:space:]]", "\u00a0");
+
x2s("\\p{In_Unified_Canadian_Aboriginal_Syllabics_Extended}+", "\u18B0\u18FF", 0, 6);
x2s("(?i)\u1ffc", "\u2126\u1fbe", 0, 6);
x2s("(?i)\u1ffc", "\u1ff3", 0, 3);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jruby-joni.git
More information about the pkg-java-commits
mailing list