[jruby-joni] 02/08: point reset (\K) https://stackoverflow.com/a/2136600

Hideki Yamane henrich at moszumanska.debian.org
Mon Dec 11 12:37:36 UTC 2017


This is an automated email from the git hooks/post-receive script.

henrich pushed a commit to branch debian/sid
in repository jruby-joni.

commit 59dbb27ca0ab4b9617576286e3a1fd706e2a84f1
Author: Marcin Mielzynski <lopx at gazeta.pl>
Date:   Tue Nov 28 20:22:04 2017 +0100

    point reset (\K) https://stackoverflow.com/a/2136600
---
 src/org/joni/ByteCodeMachine.java | 64 ++++++++++++++++++++++-----------------
 src/org/joni/StackEntry.java      | 10 +++++-
 src/org/joni/StackMachine.java    | 22 ++++++++------
 src/org/joni/Syntax.java          |  2 +-
 test/org/joni/test/TestU8.java    | 11 +++++++
 5 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/src/org/joni/ByteCodeMachine.java b/src/org/joni/ByteCodeMachine.java
index 5cf643e..5ac56b9 100644
--- a/src/org/joni/ByteCodeMachine.java
+++ b/src/org/joni/ByteCodeMachine.java
@@ -47,6 +47,7 @@ class ByteCodeMachine extends StackMachine {
     private int sprev;
     private int sstart;
     private int sbegin;
+    private int pkeep;
 
     private final int[]code;        // byte code
     private int ip;                 // instruction pointer
@@ -99,7 +100,7 @@ class ByteCodeMachine extends StackMachine {
 
         // was clear ???
         node.group = 0;
-        node.beg = sstart - str;
+        node.beg = ((pkeep > s) ? s : pkeep) - str;
         node.end = s      - str;
 
         stkp = 0;
@@ -186,6 +187,7 @@ class ByteCodeMachine extends StackMachine {
 
         bestLen = -1;
         s = sstart;
+        pkeep = sstart;
         Thread currentThread = Thread.currentThread();
 
         final int[]code = this.code;
@@ -252,6 +254,7 @@ class ByteCodeMachine extends StackMachine {
                 case OPCode.MEMORY_START:               opMemoryStart();           continue;
                 case OPCode.MEMORY_END_PUSH:            opMemoryEndPush();         continue;
                 case OPCode.MEMORY_END:                 opMemoryEnd();             continue;
+                case OPCode.KEEP:                       opKeep();                  continue;
                 case OPCode.MEMORY_END_PUSH_REC:        opMemoryEndPushRec();      continue;
                 case OPCode.MEMORY_END_REC:             opMemoryEndRec();          continue;
 
@@ -360,7 +363,7 @@ class ByteCodeMachine extends StackMachine {
             final Region region = msaRegion;
             if (region != null) {
                 // USE_POSIX_REGION_OPTION ... else ...
-                region.beg[0] = msaBegin = sstart - str;
+                region.beg[0] = msaBegin = ((pkeep > s) ? s : pkeep) - str;
                 region.end[0] = msaEnd   = s      - str;
                 for (int i = 1; i <= regex.numMem; i++) {
                     // opt!
@@ -384,7 +387,7 @@ class ByteCodeMachine extends StackMachine {
                     if (regex.captureHistory != 0) checkCaptureHistory(region);
                 }
             } else {
-                msaBegin = sstart - str;
+                msaBegin = ((pkeep > s) ? s : pkeep) - str;
                 msaEnd   = s      - str;
             }
         } else {
@@ -874,7 +877,7 @@ class ByteCodeMachine extends StackMachine {
     private void opAnyCharStar() {
         final byte[]bytes = this.bytes;
         while (s < range) {
-            pushAlt(ip, s, sprev);
+            pushAlt(ip, s, sprev, pkeep);
             int n = enc.length(bytes, s, end);
             if (s + n > range) {opFail(); return;}
             if (enc.isNewLine(bytes, s, end)) {opFail(); return;}
@@ -887,7 +890,7 @@ class ByteCodeMachine extends StackMachine {
     private void opAnyCharStarSb() {
         final byte[]bytes = this.bytes;
         while (s < range) {
-            pushAlt(ip, s, sprev);
+            pushAlt(ip, s, sprev, pkeep);
             if (bytes[s] == Encoding.NEW_LINE) {opFail(); return;}
             sprev = s;
             s++;
@@ -898,7 +901,7 @@ class ByteCodeMachine extends StackMachine {
     private void opAnyCharMLStar() {
         final byte[]bytes = this.bytes;
         while (s < range) {
-            pushAlt(ip, s, sprev);
+            pushAlt(ip, s, sprev, pkeep);
             int n = enc.length(bytes, s, end);
             if (s + n > range) {opFail(); return;}
             sprev = s;
@@ -909,7 +912,7 @@ class ByteCodeMachine extends StackMachine {
 
     private void opAnyCharMLStarSb() {
         while (s < range) {
-            pushAlt(ip, s, sprev);
+            pushAlt(ip, s, sprev, pkeep);
             sprev = s;
             s++;
         }
@@ -921,7 +924,7 @@ class ByteCodeMachine extends StackMachine {
         final byte[]bytes = this.bytes;
 
         while (s < range) {
-            if (c == bytes[s]) pushAlt(ip + 1, s, sprev);
+            if (c == bytes[s]) pushAlt(ip + 1, s, sprev, pkeep);
             int n = enc.length(bytes, s, end);
             if (s + n > range || enc.isNewLine(bytes, s, end)) {opFail(); return;}
             sprev = s;
@@ -937,7 +940,7 @@ class ByteCodeMachine extends StackMachine {
 
         while (s < range) {
             byte b = bytes[s];
-            if (c == b) pushAlt(ip + 1, s, sprev);
+            if (c == b) pushAlt(ip + 1, s, sprev, pkeep);
             if (b == Encoding.NEW_LINE) {opFail(); return;}
             sprev = s;
             s++;
@@ -951,7 +954,7 @@ class ByteCodeMachine extends StackMachine {
         final byte[]bytes = this.bytes;
 
         while (s < range) {
-            if (c == bytes[s]) pushAlt(ip + 1, s, sprev);
+            if (c == bytes[s]) pushAlt(ip + 1, s, sprev, pkeep);
             int n = enc.length(bytes, s, end);
             if (s + n > range) {opFail(); return;}
             sprev = s;
@@ -966,7 +969,7 @@ class ByteCodeMachine extends StackMachine {
         final byte[]bytes = this.bytes;
 
         while (s < range) {
-            if (c == bytes[s]) pushAlt(ip + 1, s, sprev);
+            if (c == bytes[s]) pushAlt(ip + 1, s, sprev, pkeep);
             sprev = s;
             s++;
         }
@@ -981,7 +984,7 @@ class ByteCodeMachine extends StackMachine {
 
         while (s < range) {
             if (stateCheckVal(s, mem)) {opFail(); return;}
-            pushAltWithStateCheck(ip, s, sprev, mem);
+            pushAltWithStateCheck(ip, s, sprev, mem, pkeep);
             int n = enc.length(bytes, s, end);
             if (s + n > range || enc.isNewLine(bytes, s, end)) {opFail(); return;}
             sprev = s;
@@ -996,7 +999,7 @@ class ByteCodeMachine extends StackMachine {
 
         while (s < range) {
             if (stateCheckVal(s, mem)) {opFail(); return;}
-            pushAltWithStateCheck(ip, s, sprev, mem);
+            pushAltWithStateCheck(ip, s, sprev, mem, pkeep);
             if (bytes[s] == Encoding.NEW_LINE) {opFail(); return;}
             sprev = s;
             s++;
@@ -1011,7 +1014,7 @@ class ByteCodeMachine extends StackMachine {
         final byte[]bytes = this.bytes;
         while (s < range) {
             if (stateCheckVal(s, mem)) {opFail(); return;}
-            pushAltWithStateCheck(ip, s, sprev, mem);
+            pushAltWithStateCheck(ip, s, sprev, mem, pkeep);
             int n = enc.length(bytes, s, end);
             if (s + n > range) {opFail(); return;}
             sprev = s;
@@ -1025,7 +1028,7 @@ class ByteCodeMachine extends StackMachine {
 
         while (s < range) {
             if (stateCheckVal(s, mem)) {opFail(); return;}
-            pushAltWithStateCheck(ip, s, sprev, mem);
+            pushAltWithStateCheck(ip, s, sprev, mem, pkeep);
             sprev = s;
             s++;
         }
@@ -1204,6 +1207,10 @@ class ByteCodeMachine extends StackMachine {
         repeatStk[memEndStk + mem] = s;
     }
 
+    private void opKeep() {
+        pkeep = s;
+    }
+
     private void opMemoryEndPushRec() {
         int mem = code[ip++];
         int stkp = getMemStart(mem); /* should be before push mem-end. */
@@ -1438,7 +1445,7 @@ class ByteCodeMachine extends StackMachine {
     /* no need: IS_DYNAMIC_OPTION() == 0 */
     private void opSetOptionPush() {
         // option = code[ip++]; // final for now
-        pushAlt(ip, s, sprev);
+        pushAlt(ip, s, sprev, pkeep);
         ip += OPSize.SET_OPTION + OPSize.FAIL;
     }
 
@@ -1527,7 +1534,7 @@ class ByteCodeMachine extends StackMachine {
 
     private void opPush() {
         int addr = code[ip++];
-        pushAlt(ip + addr, s, sprev);
+        pushAlt(ip + addr, s, sprev, pkeep);
     }
 
     // CEC
@@ -1535,7 +1542,7 @@ class ByteCodeMachine extends StackMachine {
         int mem = code[ip++];
         if (stateCheckVal(s, mem)) {opFail(); return;}
         int addr = code[ip++];
-        pushAltWithStateCheck(ip + addr, s, sprev, mem);
+        pushAltWithStateCheck(ip + addr, s, sprev, mem, pkeep);
     }
 
     // CEC
@@ -1546,7 +1553,7 @@ class ByteCodeMachine extends StackMachine {
         if (stateCheckVal(s, mem)) {
             ip += addr;
         } else {
-            pushAltWithStateCheck(ip + addr, s, sprev, mem);
+            pushAltWithStateCheck(ip + addr, s, sprev, mem, pkeep);
         }
     }
 
@@ -1566,7 +1573,7 @@ class ByteCodeMachine extends StackMachine {
         // beyond string check
         if (s < range && code[ip] == bytes[s]) {
             ip++;
-            pushAlt(ip + addr, s, sprev);
+            pushAlt(ip + addr, s, sprev, pkeep);
             return;
         }
         ip += addr + 1;
@@ -1577,7 +1584,7 @@ class ByteCodeMachine extends StackMachine {
         // beyond string check
         if (s < range && code[ip] == bytes[s]) {
             ip++;
-            pushAlt(ip + addr, s, sprev);
+            pushAlt(ip + addr, s, sprev, pkeep);
             return;
         }
         ip++;
@@ -1592,7 +1599,7 @@ class ByteCodeMachine extends StackMachine {
         pushRepeat(mem, ip);
 
         if (regex.repeatRangeLo[mem] == 0) { // lower
-            pushAlt(ip + addr, s, sprev);
+            pushAlt(ip + addr, s, sprev, pkeep);
         }
     }
 
@@ -1605,7 +1612,7 @@ class ByteCodeMachine extends StackMachine {
         pushRepeat(mem, ip);
 
         if (regex.repeatRangeLo[mem] == 0) {
-            pushAlt(ip, s, sprev);
+            pushAlt(ip, s, sprev, pkeep);
             ip += addr;
         }
     }
@@ -1618,7 +1625,7 @@ class ByteCodeMachine extends StackMachine {
         if (e.getRepeatCount() >= regex.repeatRangeHi[mem]) {
             /* end of repeat. Nothing to do. */
         } else if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) {
-            pushAlt(ip, s, sprev);
+            pushAlt(ip, s, sprev, pkeep);
             ip = e.getRepeatPCode(); /* Don't use stkp after PUSH. */
         } else {
             ip = e.getRepeatPCode();
@@ -1647,7 +1654,7 @@ class ByteCodeMachine extends StackMachine {
             if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) {
                 int pcode = e.getRepeatPCode();
                 pushRepeatInc(si);
-                pushAlt(pcode, s, sprev);
+                pushAlt(pcode, s, sprev, pkeep);
             } else {
                 ip = e.getRepeatPCode();
                 pushRepeatInc(si);
@@ -1670,7 +1677,7 @@ class ByteCodeMachine extends StackMachine {
     }
 
     private void opPushPos() {
-        pushPos(s, sprev);
+        pushPos(s, sprev, pkeep);
     }
 
     private void opPopPos() {
@@ -1681,7 +1688,7 @@ class ByteCodeMachine extends StackMachine {
 
     private void opPushPosNot() {
         int addr = code[ip++];
-        pushPosNot(ip + addr, s, sprev);
+        pushPosNot(ip + addr, s, sprev, pkeep);
     }
 
     private void opFailPos() {
@@ -1721,7 +1728,7 @@ class ByteCodeMachine extends StackMachine {
             ip += addr;
             // return FAIL;
         } else {
-            pushLookBehindNot(ip + addr, s, sprev);
+            pushLookBehindNot(ip + addr, s, sprev, pkeep);
             s = q;
             sprev = enc.prevCharHead(bytes, str, s, end);
         }
@@ -1754,6 +1761,7 @@ class ByteCodeMachine extends StackMachine {
         ip    = e.getStatePCode();
         s     = e.getStatePStr();
         sprev = e.getStatePStrPrev();
+        pkeep = e.getPKeep();
 
         if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
             if (e.getStateCheck() != 0) {
diff --git a/src/org/joni/StackEntry.java b/src/org/joni/StackEntry.java
index 81b1785..24de7b6 100644
--- a/src/org/joni/StackEntry.java
+++ b/src/org/joni/StackEntry.java
@@ -21,7 +21,7 @@ package org.joni;
 
 final class StackEntry {
     int type;
-    private int E1, E2, E3, E4;
+    private int E1, E2, E3, E4, E5;
 
     // first union member
     /* byte code position */
@@ -49,10 +49,18 @@ final class StackEntry {
     void setStateCheck(int check) {
         E4 = check;
     }
+
     int getStateCheck() {
         return E4;
     }
 
+    void setPKeep(int pkeep) {
+        E5 = pkeep;
+    }
+
+    int getPKeep() {
+        return E5;
+    }
     // second union member
     /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
     void setRepeatCount(int count) {
diff --git a/src/org/joni/StackMachine.java b/src/org/joni/StackMachine.java
index d5210ca..7715ae9 100644
--- a/src/org/joni/StackMachine.java
+++ b/src/org/joni/StackMachine.java
@@ -163,13 +163,14 @@ abstract class StackMachine extends Matcher implements StackType {
         stateCheckBuffSize = 0;
     }
 
-    private void push(int type, int pat, int s, int prev) {
+    private void push(int type, int pat, int s, int prev, int pkeep) {
         StackEntry e = ensure1();
         e.type = type;
         e.setStatePCode(pat);
         e.setStatePStr(s);
         e.setStatePStrPrev(prev);
         if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(0);
+        e.setPKeep(pkeep);
         stk++;
     }
 
@@ -181,13 +182,14 @@ abstract class StackMachine extends Matcher implements StackType {
         stk++;
     }
 
-    protected final void pushAltWithStateCheck(int pat, int s, int sprev, int snum) {
+    protected final void pushAltWithStateCheck(int pat, int s, int sprev, int snum, int pkeep) {
         StackEntry e = ensure1();
         e.type = ALT;
         e.setStatePCode(pat);
         e.setStatePStr(s);
         e.setStatePStrPrev(sprev);
         if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(stateCheckBuff != null ? snum : 0);
+        e.setPKeep(pkeep);
         stk++;
     }
 
@@ -201,24 +203,24 @@ abstract class StackMachine extends Matcher implements StackType {
         }
     }
 
-    protected final void pushAlt(int pat, int s, int prev) {
-        push(ALT, pat, s, prev);
+    protected final void pushAlt(int pat, int s, int prev, int pkeep) {
+        push(ALT, pat, s, prev, pkeep);
     }
 
-    protected final void pushPos(int s, int prev) {
-        push(POS, -1 /*NULL_UCHARP*/, s, prev);
+    protected final void pushPos(int s, int prev, int pkeep) {
+        push(POS, -1 /*NULL_UCHARP*/, s, prev, pkeep);
     }
 
-    protected final void pushPosNot(int pat, int s, int prev) {
-        push(POS_NOT, pat, s, prev);
+    protected final void pushPosNot(int pat, int s, int prev, int pkeep) {
+        push(POS_NOT, pat, s, prev, pkeep);
     }
 
     protected final void pushStopBT() {
         pushType(STOP_BT);
     }
 
-    protected final void pushLookBehindNot(int pat, int s, int sprev) {
-        push(LOOK_BEHIND_NOT, pat, s, sprev);
+    protected final void pushLookBehindNot(int pat, int s, int sprev, int pkeep) {
+        push(LOOK_BEHIND_NOT, pat, s, sprev, pkeep);
     }
 
     protected final void pushRepeat(int id, int pat) {
diff --git a/src/org/joni/Syntax.java b/src/org/joni/Syntax.java
index 5ec30f7..ac1f203 100644
--- a/src/org/joni/Syntax.java
+++ b/src/org/joni/Syntax.java
@@ -422,7 +422,7 @@ public final class Syntax implements SyntaxProperties{
         OP2_ESC_H_XDIGIT |
         OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER |
         OP2_QMARK_LPAREN_CONDITION |
-        OP2_ESC_CAPITAL_R_LINEBREAK // | OP2_ESC_CAPITAL_K_KEEP
+        OP2_ESC_CAPITAL_R_LINEBREAK | OP2_ESC_CAPITAL_K_KEEP
         ),
 
         ( GNU_REGEX_BV |
diff --git a/test/org/joni/test/TestU8.java b/test/org/joni/test/TestU8.java
index de8433a..b5c54e6 100644
--- a/test/org/joni/test/TestU8.java
+++ b/test/org/joni/test/TestU8.java
@@ -99,6 +99,17 @@ public class TestU8 extends Test {
         x2s("(?=((?<x>)(\\k<x>)))", "", 0, 0);
 
         x2s("a\\g<0>*z", "aaazzz", 0, 6);
+
+        x2s("ab\\Kcd", "abcd", 2, 4);
+        x2s("ab\\Kc(\\Kd|z)", "abcd", 3, 4);
+        x2s("ab\\Kc(\\Kz|d)", "abcd", 2, 4);
+        x2s("(a\\K)*", "aaab", 3, 3);
+        x3s("(a\\K)*", "aaab", 2, 3, 1);
+        // x2s("a\\K?a", "aa", 0, 2);             // error: differ from perl
+        x2s("ab(?=c\\Kd)", "abcd", 2, 2);         // This behaviour is currently not well defined. (see: perlre)
+        x2s("(?<=a\\Kb|aa)cd", "abcd", 1, 4);     // ...
+        x2s("(?<=ab|a\\Ka)cd", "abcd", 2, 4);     // ...
+
     }
 
     public static void main(String[] args) throws Throwable {

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jruby-joni.git



More information about the pkg-java-commits mailing list