[jruby-joni] 196/279: Imported Upstream version 2.1.0
Hideki Yamane
henrich at moszumanska.debian.org
Mon Nov 16 11:27:32 UTC 2015
This is an automated email from the git hooks/post-receive script.
henrich pushed a commit to branch debian/sid
in repository jruby-joni.
commit 8dec518cf2da1e338713820b6c93b5bc02ec53cd
Author: Hideki Yamane <henrich at debian.org>
Date: Sun Feb 16 17:13:59 2014 +0900
Imported Upstream version 2.1.0
---
README.md | 65 ++++++++++++++++++++++++++++
pom.xml | 29 ++++---------
src/org/joni/Analyser.java | 65 ++++++++++++++++++----------
src/org/joni/Lexer.java | 8 +++-
src/org/joni/Parser.java | 54 +++++++++++++++++++----
src/org/joni/ScanEnvironment.java | 32 ++++++++++++++
src/org/joni/Syntax.java | 39 +++++++++++++++++
src/org/joni/ast/CClassNode.java | 6 ++-
src/org/joni/ast/EncloseNode.java | 1 +
src/org/joni/ast/QuantifierNode.java | 7 ++-
src/org/joni/constants/SyntaxProperties.java | 1 +
src/org/joni/exception/ErrorMessages.java | 1 +
12 files changed, 252 insertions(+), 56 deletions(-)
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b51db7a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,65 @@
+joni
+====
+
+Java port of Oniguruma regexp library
+
+## Usage
+
+### Imports
+ ```java
+ import org.jcodings.specific.UTF8Encoding;
+ import org.joni.Matcher;
+ import org.joni.Option;
+ import org.joni.Regex;
+ ```
+
+### Matching
+
+ ```java
+
+ byte[] pattern = "a*".getBytes();
+ byte[] str = "aaa".getBytes();
+
+ Regex regex = new Regex(pattern, 0, pattern.length, Option.NONE, UTF8Encoding.INSTANCE);
+ Matcher matcher = regex.matcher(str);
+ int result = matcher.search(0, str.length, Option.DEFAULT);
+ ```
+
+### Using captures
+
+ ```java
+ byte[] pattern = "(a*)".getBytes();
+ byte[] str = "aaa".getBytes();
+
+ Regex regex = new Regex(pattern, 0, pattern.length, Option.NONE, UTF8Encoding.INSTANCE);
+ Matcher matcher = regex.matcher(str);
+ int result = matcher.search(0, str.length, Option.DEFAULT);
+ if (result != -1) {
+ Region region = matcher.getEagerRegion();
+ }
+ ```
+
+### Using named captures
+
+ ```java
+ byte[] pattern = "(?<name>a*)".getBytes();
+ byte[] str = "aaa".getBytes();
+
+ Regex regex = new Regex(pattern, 0, pattern.length, Option.NONE, UTF8Encoding.INSTANCE);
+ Matcher matcher = regex.matcher(str);
+ int result = matcher.search(0, str.length, Option.DEFAULT);
+ if (result != -1) {
+ Region region = matcher.getEagerRegion();
+ for (Iterator<NameEntry> entry = regex.namedBackrefIterator(); entry.hasNext();) {
+ NameEntry e = entry.next();
+ int number = e.getBackRefs()[0]; // can have many refs per name
+ // int begin = region.beg[number];
+ // int end = region.end[number];
+
+ }
+ }
+ ```
+
+## License
+
+Joni is released under the [MIT License](http://www.opensource.org/licenses/MIT).
diff --git a/pom.xml b/pom.xml
index a1f09f3..3d4352d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,11 +1,10 @@
<?xml version="1.0" ?>
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.jruby.joni</groupId>
<artifactId>joni</artifactId>
<packaging>jar</packaging>
- <version>2.0.0</version>
+ <version>2.1.0</version>
<name>Joni</name>
<description>
Java port of Oniguruma: http://www.geocities.jp/kosako3/oniguruma
@@ -16,6 +15,12 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
+ <parent>
+ <groupId>org.sonatype.oss</groupId>
+ <artifactId>oss-parent</artifactId>
+ <version>7</version>
+ </parent>
+
<issueManagement>
<system>JIRA</system>
<url>http://jira.codehaus.org/browse/JRUBY</url>
@@ -35,24 +40,6 @@
</license>
</licenses>
- <distributionManagement>
- <repository>
- <id>codehaus-jruby-repository</id>
- <name>JRuby Central Repository</name>
- <url>dav:https://dav.codehaus.org/repository/jruby</url>
- </repository>
- <snapshotRepository>
- <id>codehaus-jruby-snapshot-repository</id>
- <name>JRuby Central Development Repository</name>
- <url>dav:https://dav.codehaus.org/snapshots.repository/jruby</url>
- </snapshotRepository>
- <site>
- <id>codehaus-jruby-site</id>
- <name>JRuby Maven site</name>
- <url>dav:https://dav.codehaus.org/jruby/info</url>
- </site>
- </distributionManagement>
-
<repositories>
<repository>
<id>codehaus</id>
diff --git a/src/org/joni/Analyser.java b/src/org/joni/Analyser.java
index 815a7dd..9665ba6 100644
--- a/src/org/joni/Analyser.java
+++ b/src/org/joni/Analyser.java
@@ -412,13 +412,19 @@ final class Analyser extends Parser {
BackRefNode br = (BackRefNode)node;
if (br.isRecursion()) break;
- if (br.back[0] > env.numMem) newValueException(ERR_INVALID_BACKREF);
- min = getMinMatchLength(env.memNodes[br.back[0]]);
+ if (br.back[0] > env.numMem) {
+ if (!syntax.op2OptionECMAScript()) newValueException(ERR_INVALID_BACKREF);
+ } else {
+ min = getMinMatchLength(env.memNodes[br.back[0]]);
+ }
for (int i=1; i<br.backNum; i++) {
- if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
- int tmin = getMinMatchLength(env.memNodes[br.back[i]]);
- if (min > tmin) min = tmin;
+ if (br.back[i] > env.numMem) {
+ if (!syntax.op2OptionECMAScript()) newValueException(ERR_INVALID_BACKREF);
+ } else {
+ int tmin = getMinMatchLength(env.memNodes[br.back[i]]);
+ if (min > tmin) min = tmin;
+ }
}
break;
@@ -546,9 +552,12 @@ final class Analyser extends Parser {
}
for (int i=0; i<br.backNum; i++) {
- if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
- int tmax = getMaxMatchLength(env.memNodes[br.back[i]]);
- if (max < tmax) max = tmax;
+ if (br.back[i] > env.numMem) {
+ if(!syntax.op2OptionECMAScript()) newValueException(ERR_INVALID_BACKREF);
+ } else {
+ int tmax = getMaxMatchLength(env.memNodes[br.back[i]]);
+ if (max < tmax) max = tmax;
+ }
}
break;
@@ -1780,15 +1789,18 @@ final class Analyser extends Parser {
case NodeType.BREF:
BackRefNode br = (BackRefNode)node;
for (int i=0; i<br.backNum; i++) {
- if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
- env.backrefedMem = bsOnAt(env.backrefedMem, br.back[i]);
- env.btMemStart = bsOnAt(env.btMemStart, br.back[i]);
- if (Config.USE_BACKREF_WITH_LEVEL) {
- if (br.isNestLevel()) {
- env.btMemEnd = bsOnAt(env.btMemEnd, br.back[i]);
- }
- } // USE_BACKREF_AT_LEVEL
- ((EncloseNode)env.memNodes[br.back[i]]).setMemBackrefed();
+ if (br.back[i] > env.numMem) {
+ if (!syntax.op2OptionECMAScript()) newValueException(ERR_INVALID_BACKREF);
+ } else {
+ env.backrefedMem = bsOnAt(env.backrefedMem, br.back[i]);
+ env.btMemStart = bsOnAt(env.btMemStart, br.back[i]);
+ if (Config.USE_BACKREF_WITH_LEVEL) {
+ if (br.isNestLevel()) {
+ env.btMemEnd = bsOnAt(env.btMemEnd, br.back[i]);
+ }
+ } // USE_BACKREF_AT_LEVEL
+ ((EncloseNode)env.memNodes[br.back[i]]).setMemBackrefed();
+ }
}
break;
@@ -2081,14 +2093,21 @@ final class Analyser extends Parser {
Node[]nodes = oenv.scanEnv.memNodes;
- int min = getMinMatchLength(nodes[br.back[0]]);
- int max = getMaxMatchLength(nodes[br.back[0]]);
+ int min = 0;
+ int max = 0;
+
+ if (nodes != null && nodes[br.back[0]] != null) {
+ min = getMinMatchLength(nodes[br.back[0]]);
+ max = getMaxMatchLength(nodes[br.back[0]]);
+ }
for (int i=1; i<br.backNum; i++) {
- int tmin = getMinMatchLength(nodes[br.back[i]]);
- int tmax = getMaxMatchLength(nodes[br.back[i]]);
- if (min > tmin) min = tmin;
- if (max < tmax) max = tmax;
+ if (nodes[br.back[i]] != null) {
+ int tmin = getMinMatchLength(nodes[br.back[i]]);
+ int tmax = getMaxMatchLength(nodes[br.back[i]]);
+ if (min > tmin) min = tmin;
+ if (max < tmax) max = tmax;
+ }
}
opt.length.set(min, max);
break;
diff --git a/src/org/joni/Lexer.java b/src/org/joni/Lexer.java
index bc919ad..0b70271 100644
--- a/src/org/joni/Lexer.java
+++ b/src/org/joni/Lexer.java
@@ -184,7 +184,13 @@ class Lexer extends ScannerSupport {
}
private void fetchEscapedValueControl() {
- if (!left()) newSyntaxException(ERR_END_PATTERN_AT_CONTROL);
+ if (!left()) {
+ if (syntax.op2OptionECMAScript()) {
+ return;
+ } else {
+ newSyntaxException(ERR_END_PATTERN_AT_CONTROL);
+ }
+ }
fetch();
if (c == '?') {
c = 0177;
diff --git a/src/org/joni/Parser.java b/src/org/joni/Parser.java
index 3d56e9e..62208a7 100644
--- a/src/org/joni/Parser.java
+++ b/src/org/joni/Parser.java
@@ -157,7 +157,7 @@ class Parser extends Lexer {
neg = false;
}
- if (token.type == TokenType.CC_CLOSE) {
+ if (token.type == TokenType.CC_CLOSE && !syntax.op2OptionECMAScript()) {
if (!codeExistCheck(']', true)) newSyntaxException(ERR_EMPTY_CHAR_CLASS);
env.ccEscWarn("]");
token.type = TokenType.CHAR; /* allow []...] */
@@ -429,6 +429,9 @@ class Parser extends Lexer {
break;
case '!': /* preceding read */
node = new AnchorNode(AnchorType.PREC_READ_NOT);
+ if (syntax.op2OptionECMAScript()) {
+ env.pushPrecReadNotNode(node);
+ }
break;
case '>': /* (?>...) stop backtrack */
node = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
@@ -579,10 +582,16 @@ class Parser extends Lexer {
if (node.getType() == NodeType.ANCHOR) {
AnchorNode an = (AnchorNode) node;
an.setTarget(target);
+ if (syntax.op2OptionECMAScript() && an.type == AnchorType.PREC_READ_NOT) {
+ env.popPrecReadNotNode(an);
+ }
} else {
EncloseNode en = (EncloseNode)node;
en.setTarget(target);
if (en.type == EncloseType.MEMORY) {
+ if (syntax.op2OptionECMAScript()) {
+ en.containingAnchor = env.currentPrecReadNotNode();
+ }
/* Don't move this to previous of parse_subexp() */
env.setMemNode(en.regNum, node);
}
@@ -750,13 +759,37 @@ class Parser extends Lexer {
break;
case BACKREF:
- int[]backRefs = token.getBackrefNum() > 1 ? token.getBackrefRefs() : new int[]{token.getBackrefRef1()};
- node = new BackRefNode(token.getBackrefNum(),
- backRefs,
- token.getBackrefByName(),
- token.getBackrefExistLevel(), // #ifdef USE_BACKREF_AT_LEVEL
- token.getBackrefLevel(), // ...
- env);
+ if (syntax.op2OptionECMAScript() && token.getBackrefNum() == 1 && env.memNodes != null) {
+ EncloseNode encloseNode = (EncloseNode) env.memNodes[token.getBackrefRef1()];
+ boolean shouldIgnore = false;
+ if (encloseNode != null && encloseNode.containingAnchor != null) {
+ shouldIgnore = true;
+ for (Node anchorNode : env.precReadNotNodes) {
+ if (anchorNode == encloseNode.containingAnchor) {
+ shouldIgnore = false;
+ break;
+ }
+ }
+ }
+ if (shouldIgnore) {
+ node = StringNode.EMPTY;
+ } else {
+ node = new BackRefNode(token.getBackrefNum(),
+ new int[]{token.getBackrefRef1()},
+ token.getBackrefByName(),
+ token.getBackrefExistLevel(), // #ifdef USE_BACKREF_AT_LEVEL
+ token.getBackrefLevel(), // ...
+ env);
+ }
+ } else {
+ int[]backRefs = token.getBackrefNum() > 1 ? token.getBackrefRefs() : new int[]{token.getBackrefRef1()};
+ node = new BackRefNode(token.getBackrefNum(),
+ backRefs,
+ token.getBackrefByName(),
+ token.getBackrefExistLevel(), // #ifdef USE_BACKREF_AT_LEVEL
+ token.getBackrefLevel(), // ...
+ env);
+ }
break;
@@ -857,6 +890,9 @@ class Parser extends Lexer {
while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
if (target.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
+ if (syntax.op2OptionECMAScript() && target.getType() == NodeType.QTFR) {
+ newSyntaxException(ERR_NESTED_REPEAT_NOT_ALLOWED);
+ }
QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
token.getRepeatUpper(),
token.type == TokenType.INTERVAL);
@@ -871,7 +907,7 @@ class Parser extends Lexer {
qn = en;
}
- if (ret == 0) {
+ if (ret == 0 || (syntax.op2OptionECMAScript() && ret == 1)) {
target = qn;
} else if (ret == 2) { /* split case: /abc+/ */
target = ConsAltNode.newListNode(target, null);
diff --git a/src/org/joni/ScanEnvironment.java b/src/org/joni/ScanEnvironment.java
index 02a1ad7..0dbce24 100644
--- a/src/org/joni/ScanEnvironment.java
+++ b/src/org/joni/ScanEnvironment.java
@@ -55,6 +55,9 @@ public final class ScanEnvironment {
int currMaxRegNum;
boolean hasRecursion;
+ int numPrecReadNotNodes;
+ Node precReadNotNodes[];
+
public ScanEnvironment(Regex regex, Syntax syntax) {
this.reg = regex;
option = regex.options;
@@ -80,6 +83,9 @@ public final class ScanEnvironment {
combExpMaxRegNum = 0;
currMaxRegNum = 0;
hasRecursion = false;
+
+ numPrecReadNotNodes = 0;
+ precReadNotNodes = null;
}
public int addMemEntry() {
@@ -102,6 +108,32 @@ public final class ScanEnvironment {
}
}
+ public void pushPrecReadNotNode(Node node) {
+ numPrecReadNotNodes++;
+ if (precReadNotNodes == null) {
+ precReadNotNodes = new Node[SCANENV_MEMNODES_SIZE];
+ } else if (numPrecReadNotNodes >= precReadNotNodes.length) {
+ Node[]tmp = new Node[precReadNotNodes.length << 1];
+ System.arraycopy(precReadNotNodes, 0, tmp, 0, precReadNotNodes.length);
+ precReadNotNodes = tmp;
+ }
+ precReadNotNodes[numPrecReadNotNodes - 1] = node;
+ }
+
+ public void popPrecReadNotNode(Node node) {
+ if (precReadNotNodes != null && precReadNotNodes[numPrecReadNotNodes - 1] == node) {
+ precReadNotNodes[numPrecReadNotNodes - 1] = null;
+ numPrecReadNotNodes--;
+ }
+ }
+
+ public Node currentPrecReadNotNode() {
+ if (numPrecReadNotNodes > 0) {
+ return precReadNotNodes[numPrecReadNotNodes - 1];
+ }
+ return null;
+ }
+
public int convertBackslashValue(int c) {
if (syntax.opEscControlChars()) {
switch (c) {
diff --git a/src/org/joni/Syntax.java b/src/org/joni/Syntax.java
index 74662a8..4e7b5e7 100644
--- a/src/org/joni/Syntax.java
+++ b/src/org/joni/Syntax.java
@@ -278,6 +278,10 @@ public final class Syntax implements SyntaxProperties{
return isOp2(OP2_INEFFECTIVE_ESCAPE);
}
+ public boolean op2OptionECMAScript() {
+ return isOp2(OP2_OPTION_ECMASCRIPT);
+ }
+
/**
* BEHAVIOR
*
@@ -603,4 +607,39 @@ public final class Syntax implements SyntaxProperties{
INEFFECTIVE_META_CHAR /* anychar anytime */
)
);
+
+ public static final Syntax ECMAScript = new Syntax(
+ (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY |
+ OP_ESC_OCTAL3 | OP_ESC_X_HEX2 |
+ OP_ESC_CONTROL_CHARS | OP_ESC_C_CONTROL |
+ OP_DECIMAL_BACKREF | OP_ESC_D_DIGIT |
+ OP_ESC_S_WHITE_SPACE | OP_ESC_W_WORD )
+ & ~OP_ESC_LTGT_WORD_BEGIN_END ),
+
+ ( OP2_ESC_CAPITAL_Q_QUOTE |
+ OP2_QMARK_GROUP_EFFECT | OP2_OPTION_PERL |
+ OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
+ OP2_ESC_U_HEX4 | OP2_ESC_V_VTAB |
+ OP2_OPTION_ECMASCRIPT ),
+
+ ( CONTEXT_INDEP_ANCHORS |
+ CONTEXT_INDEP_REPEAT_OPS |
+ CONTEXT_INVALID_REPEAT_OPS |
+ ALLOW_INVALID_INTERVAL |
+ BACKSLASH_ESCAPE_IN_CC |
+ ALLOW_DOUBLE_RANGE_OP_IN_CC |
+ DIFFERENT_LEN_ALT_LOOK_BEHIND ),
+
+ Option.NONE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
}
diff --git a/src/org/joni/ast/CClassNode.java b/src/org/joni/ast/CClassNode.java
index 7927023..6dd5d7f 100644
--- a/src/org/joni/ast/CClassNode.java
+++ b/src/org/joni/ast/CClassNode.java
@@ -335,8 +335,12 @@ public final class CClassNode extends Node {
if (Config.NON_UNICODE_SDW) {
switch(ctype) {
- case CharacterType.D:
case CharacterType.S:
+ if (!not && env.syntax.op2OptionECMAScript()) {
+ // treat \u2028 and \u2029 as whitespace
+ addCodeRange(env, 8232, 8233);
+ }
+ case CharacterType.D:
case CharacterType.W:
ctype ^= CharacterType.SPECIAL_MASK;
if (not) {
diff --git a/src/org/joni/ast/EncloseNode.java b/src/org/joni/ast/EncloseNode.java
index 0a07ed1..7c45d14 100644
--- a/src/org/joni/ast/EncloseNode.java
+++ b/src/org/joni/ast/EncloseNode.java
@@ -34,6 +34,7 @@ public final class EncloseNode extends StateNode implements EncloseType {
public int maxLength; // OnigDistance
public int charLength;
public int optCount; // referenced count in optimize_node_left()
+ public Node containingAnchor; //
// node_new_enclose / onig_node_new_enclose
public EncloseNode(int type) {
diff --git a/src/org/joni/ast/QuantifierNode.java b/src/org/joni/ast/QuantifierNode.java
index 8ec53cb..d75aa27 100644
--- a/src/org/joni/ast/QuantifierNode.java
+++ b/src/org/joni/ast/QuantifierNode.java
@@ -200,7 +200,12 @@ public final class QuantifierNode extends StateNode {
}
public int setQuantifier(Node tgt, boolean group, ScanEnvironment env, byte[]bytes, int p, int end) {
- if (lower == 1 && upper == 1) return 1;
+ if (lower == 1 && upper == 1) {
+ if (env.syntax.op2OptionECMAScript()) {
+ setTarget(tgt);
+ }
+ return 1;
+ }
switch(tgt.getType()) {
diff --git a/src/org/joni/constants/SyntaxProperties.java b/src/org/joni/constants/SyntaxProperties.java
index 61f2269..075324c 100644
--- a/src/org/joni/constants/SyntaxProperties.java
+++ b/src/org/joni/constants/SyntaxProperties.java
@@ -74,6 +74,7 @@ public interface SyntaxProperties {
/* final int OP2_CHAR_PROPERTY_PREFIX_IS = (1<<18); */
final int OP2_ESC_H_XDIGIT = (1<<19); /* \h, \H */
final int OP2_INEFFECTIVE_ESCAPE = (1<<20); /* \ */
+ final int OP2_OPTION_ECMASCRIPT = (1<<21); /* EcmaScript quirks */
/* syntax (behavior); */
final int CONTEXT_INDEP_ANCHORS = (1<<31); /* not implemented */
diff --git a/src/org/joni/exception/ErrorMessages.java b/src/org/joni/exception/ErrorMessages.java
index f490713..683ff62 100644
--- a/src/org/joni/exception/ErrorMessages.java
+++ b/src/org/joni/exception/ErrorMessages.java
@@ -54,6 +54,7 @@ public interface ErrorMessages extends org.jcodings.exception.ErrorMessages {
final String ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS = "unmatched range specifier in char-class";
final String ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED = "target of repeat operator is not specified";
final String ERR_TARGET_OF_REPEAT_OPERATOR_INVALID = "target of repeat operator is invalid";
+ final String ERR_NESTED_REPEAT_NOT_ALLOWED = "nested repeat is not allowed";
final String ERR_NESTED_REPEAT_OPERATOR = "nested repeat operator";
final String ERR_UNMATCHED_CLOSE_PARENTHESIS = "unmatched close parenthesis";
final String ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS = "end pattern with unmatched parenthesis";
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jruby-joni.git
More information about the pkg-java-commits
mailing list