[jruby-joni] 02/08: point reset (\K) https://stackoverflow.com/a/2136600
Hideki Yamane
henrich at moszumanska.debian.org
Mon Dec 11 12:37:36 UTC 2017
This is an automated email from the git hooks/post-receive script.
henrich pushed a commit to branch debian/sid
in repository jruby-joni.
commit 59dbb27ca0ab4b9617576286e3a1fd706e2a84f1
Author: Marcin Mielzynski <lopx at gazeta.pl>
Date: Tue Nov 28 20:22:04 2017 +0100
point reset (\K) https://stackoverflow.com/a/2136600
---
src/org/joni/ByteCodeMachine.java | 64 ++++++++++++++++++++++-----------------
src/org/joni/StackEntry.java | 10 +++++-
src/org/joni/StackMachine.java | 22 ++++++++------
src/org/joni/Syntax.java | 2 +-
test/org/joni/test/TestU8.java | 11 +++++++
5 files changed, 69 insertions(+), 40 deletions(-)
diff --git a/src/org/joni/ByteCodeMachine.java b/src/org/joni/ByteCodeMachine.java
index 5cf643e..5ac56b9 100644
--- a/src/org/joni/ByteCodeMachine.java
+++ b/src/org/joni/ByteCodeMachine.java
@@ -47,6 +47,7 @@ class ByteCodeMachine extends StackMachine {
private int sprev;
private int sstart;
private int sbegin;
+ private int pkeep;
private final int[]code; // byte code
private int ip; // instruction pointer
@@ -99,7 +100,7 @@ class ByteCodeMachine extends StackMachine {
// was clear ???
node.group = 0;
- node.beg = sstart - str;
+ node.beg = ((pkeep > s) ? s : pkeep) - str;
node.end = s - str;
stkp = 0;
@@ -186,6 +187,7 @@ class ByteCodeMachine extends StackMachine {
bestLen = -1;
s = sstart;
+ pkeep = sstart;
Thread currentThread = Thread.currentThread();
final int[]code = this.code;
@@ -252,6 +254,7 @@ class ByteCodeMachine extends StackMachine {
case OPCode.MEMORY_START: opMemoryStart(); continue;
case OPCode.MEMORY_END_PUSH: opMemoryEndPush(); continue;
case OPCode.MEMORY_END: opMemoryEnd(); continue;
+ case OPCode.KEEP: opKeep(); continue;
case OPCode.MEMORY_END_PUSH_REC: opMemoryEndPushRec(); continue;
case OPCode.MEMORY_END_REC: opMemoryEndRec(); continue;
@@ -360,7 +363,7 @@ class ByteCodeMachine extends StackMachine {
final Region region = msaRegion;
if (region != null) {
// USE_POSIX_REGION_OPTION ... else ...
- region.beg[0] = msaBegin = sstart - str;
+ region.beg[0] = msaBegin = ((pkeep > s) ? s : pkeep) - str;
region.end[0] = msaEnd = s - str;
for (int i = 1; i <= regex.numMem; i++) {
// opt!
@@ -384,7 +387,7 @@ class ByteCodeMachine extends StackMachine {
if (regex.captureHistory != 0) checkCaptureHistory(region);
}
} else {
- msaBegin = sstart - str;
+ msaBegin = ((pkeep > s) ? s : pkeep) - str;
msaEnd = s - str;
}
} else {
@@ -874,7 +877,7 @@ class ByteCodeMachine extends StackMachine {
private void opAnyCharStar() {
final byte[]bytes = this.bytes;
while (s < range) {
- pushAlt(ip, s, sprev);
+ pushAlt(ip, s, sprev, pkeep);
int n = enc.length(bytes, s, end);
if (s + n > range) {opFail(); return;}
if (enc.isNewLine(bytes, s, end)) {opFail(); return;}
@@ -887,7 +890,7 @@ class ByteCodeMachine extends StackMachine {
private void opAnyCharStarSb() {
final byte[]bytes = this.bytes;
while (s < range) {
- pushAlt(ip, s, sprev);
+ pushAlt(ip, s, sprev, pkeep);
if (bytes[s] == Encoding.NEW_LINE) {opFail(); return;}
sprev = s;
s++;
@@ -898,7 +901,7 @@ class ByteCodeMachine extends StackMachine {
private void opAnyCharMLStar() {
final byte[]bytes = this.bytes;
while (s < range) {
- pushAlt(ip, s, sprev);
+ pushAlt(ip, s, sprev, pkeep);
int n = enc.length(bytes, s, end);
if (s + n > range) {opFail(); return;}
sprev = s;
@@ -909,7 +912,7 @@ class ByteCodeMachine extends StackMachine {
private void opAnyCharMLStarSb() {
while (s < range) {
- pushAlt(ip, s, sprev);
+ pushAlt(ip, s, sprev, pkeep);
sprev = s;
s++;
}
@@ -921,7 +924,7 @@ class ByteCodeMachine extends StackMachine {
final byte[]bytes = this.bytes;
while (s < range) {
- if (c == bytes[s]) pushAlt(ip + 1, s, sprev);
+ if (c == bytes[s]) pushAlt(ip + 1, s, sprev, pkeep);
int n = enc.length(bytes, s, end);
if (s + n > range || enc.isNewLine(bytes, s, end)) {opFail(); return;}
sprev = s;
@@ -937,7 +940,7 @@ class ByteCodeMachine extends StackMachine {
while (s < range) {
byte b = bytes[s];
- if (c == b) pushAlt(ip + 1, s, sprev);
+ if (c == b) pushAlt(ip + 1, s, sprev, pkeep);
if (b == Encoding.NEW_LINE) {opFail(); return;}
sprev = s;
s++;
@@ -951,7 +954,7 @@ class ByteCodeMachine extends StackMachine {
final byte[]bytes = this.bytes;
while (s < range) {
- if (c == bytes[s]) pushAlt(ip + 1, s, sprev);
+ if (c == bytes[s]) pushAlt(ip + 1, s, sprev, pkeep);
int n = enc.length(bytes, s, end);
if (s + n > range) {opFail(); return;}
sprev = s;
@@ -966,7 +969,7 @@ class ByteCodeMachine extends StackMachine {
final byte[]bytes = this.bytes;
while (s < range) {
- if (c == bytes[s]) pushAlt(ip + 1, s, sprev);
+ if (c == bytes[s]) pushAlt(ip + 1, s, sprev, pkeep);
sprev = s;
s++;
}
@@ -981,7 +984,7 @@ class ByteCodeMachine extends StackMachine {
while (s < range) {
if (stateCheckVal(s, mem)) {opFail(); return;}
- pushAltWithStateCheck(ip, s, sprev, mem);
+ pushAltWithStateCheck(ip, s, sprev, mem, pkeep);
int n = enc.length(bytes, s, end);
if (s + n > range || enc.isNewLine(bytes, s, end)) {opFail(); return;}
sprev = s;
@@ -996,7 +999,7 @@ class ByteCodeMachine extends StackMachine {
while (s < range) {
if (stateCheckVal(s, mem)) {opFail(); return;}
- pushAltWithStateCheck(ip, s, sprev, mem);
+ pushAltWithStateCheck(ip, s, sprev, mem, pkeep);
if (bytes[s] == Encoding.NEW_LINE) {opFail(); return;}
sprev = s;
s++;
@@ -1011,7 +1014,7 @@ class ByteCodeMachine extends StackMachine {
final byte[]bytes = this.bytes;
while (s < range) {
if (stateCheckVal(s, mem)) {opFail(); return;}
- pushAltWithStateCheck(ip, s, sprev, mem);
+ pushAltWithStateCheck(ip, s, sprev, mem, pkeep);
int n = enc.length(bytes, s, end);
if (s + n > range) {opFail(); return;}
sprev = s;
@@ -1025,7 +1028,7 @@ class ByteCodeMachine extends StackMachine {
while (s < range) {
if (stateCheckVal(s, mem)) {opFail(); return;}
- pushAltWithStateCheck(ip, s, sprev, mem);
+ pushAltWithStateCheck(ip, s, sprev, mem, pkeep);
sprev = s;
s++;
}
@@ -1204,6 +1207,10 @@ class ByteCodeMachine extends StackMachine {
repeatStk[memEndStk + mem] = s;
}
+ private void opKeep() {
+ pkeep = s;
+ }
+
private void opMemoryEndPushRec() {
int mem = code[ip++];
int stkp = getMemStart(mem); /* should be before push mem-end. */
@@ -1438,7 +1445,7 @@ class ByteCodeMachine extends StackMachine {
/* no need: IS_DYNAMIC_OPTION() == 0 */
private void opSetOptionPush() {
// option = code[ip++]; // final for now
- pushAlt(ip, s, sprev);
+ pushAlt(ip, s, sprev, pkeep);
ip += OPSize.SET_OPTION + OPSize.FAIL;
}
@@ -1527,7 +1534,7 @@ class ByteCodeMachine extends StackMachine {
private void opPush() {
int addr = code[ip++];
- pushAlt(ip + addr, s, sprev);
+ pushAlt(ip + addr, s, sprev, pkeep);
}
// CEC
@@ -1535,7 +1542,7 @@ class ByteCodeMachine extends StackMachine {
int mem = code[ip++];
if (stateCheckVal(s, mem)) {opFail(); return;}
int addr = code[ip++];
- pushAltWithStateCheck(ip + addr, s, sprev, mem);
+ pushAltWithStateCheck(ip + addr, s, sprev, mem, pkeep);
}
// CEC
@@ -1546,7 +1553,7 @@ class ByteCodeMachine extends StackMachine {
if (stateCheckVal(s, mem)) {
ip += addr;
} else {
- pushAltWithStateCheck(ip + addr, s, sprev, mem);
+ pushAltWithStateCheck(ip + addr, s, sprev, mem, pkeep);
}
}
@@ -1566,7 +1573,7 @@ class ByteCodeMachine extends StackMachine {
// beyond string check
if (s < range && code[ip] == bytes[s]) {
ip++;
- pushAlt(ip + addr, s, sprev);
+ pushAlt(ip + addr, s, sprev, pkeep);
return;
}
ip += addr + 1;
@@ -1577,7 +1584,7 @@ class ByteCodeMachine extends StackMachine {
// beyond string check
if (s < range && code[ip] == bytes[s]) {
ip++;
- pushAlt(ip + addr, s, sprev);
+ pushAlt(ip + addr, s, sprev, pkeep);
return;
}
ip++;
@@ -1592,7 +1599,7 @@ class ByteCodeMachine extends StackMachine {
pushRepeat(mem, ip);
if (regex.repeatRangeLo[mem] == 0) { // lower
- pushAlt(ip + addr, s, sprev);
+ pushAlt(ip + addr, s, sprev, pkeep);
}
}
@@ -1605,7 +1612,7 @@ class ByteCodeMachine extends StackMachine {
pushRepeat(mem, ip);
if (regex.repeatRangeLo[mem] == 0) {
- pushAlt(ip, s, sprev);
+ pushAlt(ip, s, sprev, pkeep);
ip += addr;
}
}
@@ -1618,7 +1625,7 @@ class ByteCodeMachine extends StackMachine {
if (e.getRepeatCount() >= regex.repeatRangeHi[mem]) {
/* end of repeat. Nothing to do. */
} else if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) {
- pushAlt(ip, s, sprev);
+ pushAlt(ip, s, sprev, pkeep);
ip = e.getRepeatPCode(); /* Don't use stkp after PUSH. */
} else {
ip = e.getRepeatPCode();
@@ -1647,7 +1654,7 @@ class ByteCodeMachine extends StackMachine {
if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) {
int pcode = e.getRepeatPCode();
pushRepeatInc(si);
- pushAlt(pcode, s, sprev);
+ pushAlt(pcode, s, sprev, pkeep);
} else {
ip = e.getRepeatPCode();
pushRepeatInc(si);
@@ -1670,7 +1677,7 @@ class ByteCodeMachine extends StackMachine {
}
private void opPushPos() {
- pushPos(s, sprev);
+ pushPos(s, sprev, pkeep);
}
private void opPopPos() {
@@ -1681,7 +1688,7 @@ class ByteCodeMachine extends StackMachine {
private void opPushPosNot() {
int addr = code[ip++];
- pushPosNot(ip + addr, s, sprev);
+ pushPosNot(ip + addr, s, sprev, pkeep);
}
private void opFailPos() {
@@ -1721,7 +1728,7 @@ class ByteCodeMachine extends StackMachine {
ip += addr;
// return FAIL;
} else {
- pushLookBehindNot(ip + addr, s, sprev);
+ pushLookBehindNot(ip + addr, s, sprev, pkeep);
s = q;
sprev = enc.prevCharHead(bytes, str, s, end);
}
@@ -1754,6 +1761,7 @@ class ByteCodeMachine extends StackMachine {
ip = e.getStatePCode();
s = e.getStatePStr();
sprev = e.getStatePStrPrev();
+ pkeep = e.getPKeep();
if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
if (e.getStateCheck() != 0) {
diff --git a/src/org/joni/StackEntry.java b/src/org/joni/StackEntry.java
index 81b1785..24de7b6 100644
--- a/src/org/joni/StackEntry.java
+++ b/src/org/joni/StackEntry.java
@@ -21,7 +21,7 @@ package org.joni;
final class StackEntry {
int type;
- private int E1, E2, E3, E4;
+ private int E1, E2, E3, E4, E5;
// first union member
/* byte code position */
@@ -49,10 +49,18 @@ final class StackEntry {
void setStateCheck(int check) {
E4 = check;
}
+
int getStateCheck() {
return E4;
}
+ void setPKeep(int pkeep) {
+ E5 = pkeep;
+ }
+
+ int getPKeep() {
+ return E5;
+ }
// second union member
/* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
void setRepeatCount(int count) {
diff --git a/src/org/joni/StackMachine.java b/src/org/joni/StackMachine.java
index d5210ca..7715ae9 100644
--- a/src/org/joni/StackMachine.java
+++ b/src/org/joni/StackMachine.java
@@ -163,13 +163,14 @@ abstract class StackMachine extends Matcher implements StackType {
stateCheckBuffSize = 0;
}
- private void push(int type, int pat, int s, int prev) {
+ private void push(int type, int pat, int s, int prev, int pkeep) {
StackEntry e = ensure1();
e.type = type;
e.setStatePCode(pat);
e.setStatePStr(s);
e.setStatePStrPrev(prev);
if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(0);
+ e.setPKeep(pkeep);
stk++;
}
@@ -181,13 +182,14 @@ abstract class StackMachine extends Matcher implements StackType {
stk++;
}
- protected final void pushAltWithStateCheck(int pat, int s, int sprev, int snum) {
+ protected final void pushAltWithStateCheck(int pat, int s, int sprev, int snum, int pkeep) {
StackEntry e = ensure1();
e.type = ALT;
e.setStatePCode(pat);
e.setStatePStr(s);
e.setStatePStrPrev(sprev);
if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(stateCheckBuff != null ? snum : 0);
+ e.setPKeep(pkeep);
stk++;
}
@@ -201,24 +203,24 @@ abstract class StackMachine extends Matcher implements StackType {
}
}
- protected final void pushAlt(int pat, int s, int prev) {
- push(ALT, pat, s, prev);
+ protected final void pushAlt(int pat, int s, int prev, int pkeep) {
+ push(ALT, pat, s, prev, pkeep);
}
- protected final void pushPos(int s, int prev) {
- push(POS, -1 /*NULL_UCHARP*/, s, prev);
+ protected final void pushPos(int s, int prev, int pkeep) {
+ push(POS, -1 /*NULL_UCHARP*/, s, prev, pkeep);
}
- protected final void pushPosNot(int pat, int s, int prev) {
- push(POS_NOT, pat, s, prev);
+ protected final void pushPosNot(int pat, int s, int prev, int pkeep) {
+ push(POS_NOT, pat, s, prev, pkeep);
}
protected final void pushStopBT() {
pushType(STOP_BT);
}
- protected final void pushLookBehindNot(int pat, int s, int sprev) {
- push(LOOK_BEHIND_NOT, pat, s, sprev);
+ protected final void pushLookBehindNot(int pat, int s, int sprev, int pkeep) {
+ push(LOOK_BEHIND_NOT, pat, s, sprev, pkeep);
}
protected final void pushRepeat(int id, int pat) {
diff --git a/src/org/joni/Syntax.java b/src/org/joni/Syntax.java
index 5ec30f7..ac1f203 100644
--- a/src/org/joni/Syntax.java
+++ b/src/org/joni/Syntax.java
@@ -422,7 +422,7 @@ public final class Syntax implements SyntaxProperties{
OP2_ESC_H_XDIGIT |
OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER |
OP2_QMARK_LPAREN_CONDITION |
- OP2_ESC_CAPITAL_R_LINEBREAK // | OP2_ESC_CAPITAL_K_KEEP
+ OP2_ESC_CAPITAL_R_LINEBREAK | OP2_ESC_CAPITAL_K_KEEP
),
( GNU_REGEX_BV |
diff --git a/test/org/joni/test/TestU8.java b/test/org/joni/test/TestU8.java
index de8433a..b5c54e6 100644
--- a/test/org/joni/test/TestU8.java
+++ b/test/org/joni/test/TestU8.java
@@ -99,6 +99,17 @@ public class TestU8 extends Test {
x2s("(?=((?<x>)(\\k<x>)))", "", 0, 0);
x2s("a\\g<0>*z", "aaazzz", 0, 6);
+
+ x2s("ab\\Kcd", "abcd", 2, 4);
+ x2s("ab\\Kc(\\Kd|z)", "abcd", 3, 4);
+ x2s("ab\\Kc(\\Kz|d)", "abcd", 2, 4);
+ x2s("(a\\K)*", "aaab", 3, 3);
+ x3s("(a\\K)*", "aaab", 2, 3, 1);
+ // x2s("a\\K?a", "aa", 0, 2); // error: differ from perl
+ x2s("ab(?=c\\Kd)", "abcd", 2, 2); // This behaviour is currently not well defined. (see: perlre)
+ x2s("(?<=a\\Kb|aa)cd", "abcd", 1, 4); // ...
+ x2s("(?<=ab|a\\Ka)cd", "abcd", 2, 4); // ...
+
}
public static void main(String[] args) throws Throwable {
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jruby-joni.git
More information about the pkg-java-commits
mailing list