[Python-modules-commits] [python-regex] 01/05: Import python-regex_0.1.20160425.orig.tar.gz
Sandro Tosi
morph at moszumanska.debian.org
Mon Apr 25 21:25:08 UTC 2016
This is an automated email from the git hooks/post-receive script.
morph pushed a commit to branch master
in repository python-regex.
commit 4c3a1584fa43ddc3dc071d73c173d9178e22876e
Author: Sandro Tosi <morph at debian.org>
Date: Mon Apr 25 22:17:15 2016 +0100
Import python-regex_0.1.20160425.orig.tar.gz
---
PKG-INFO | 2 +-
Python2/_regex.c | 745 ++++++++++++++++++++++++++++++++++++++-----------
Python2/_regex_core.py | 275 +++++++++++-------
Python2/regex.py | 10 +-
Python2/test_regex.py | 63 +++++
Python3/_regex.c | 741 +++++++++++++++++++++++++++++++++++++-----------
Python3/_regex_core.py | 275 +++++++++++-------
Python3/regex.py | 10 +-
Python3/test_regex.py | 63 +++++
setup.py | 2 +-
10 files changed, 1666 insertions(+), 520 deletions(-)
diff --git a/PKG-INFO b/PKG-INFO
index f3ac398..1afff98 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: regex
-Version: 2016.01.10
+Version: 2016.04.25
Summary: Alternative regular expression module, to replace re.
Home-page: https://bitbucket.org/mrabarnett/mrab-regex
Author: Matthew Barnett
diff --git a/Python2/_regex.c b/Python2/_regex.c
index e960210..e3effcc 100644
--- a/Python2/_regex.c
+++ b/Python2/_regex.c
@@ -644,6 +644,7 @@ typedef struct PatternObject {
PyObject_HEAD
PyObject* pattern; /* Pattern source (or None). */
Py_ssize_t flags; /* Flags used when compiling pattern source. */
+ RE_UINT8* packed_code_list;
PyObject* weakreflist; /* List of weak references */
/* Nodes into which the regular expression is compiled. */
RE_Node* start_node;
@@ -679,7 +680,10 @@ typedef struct PatternObject {
RE_GroupData* groups_storage;
RE_RepeatData* repeats_storage;
size_t fuzzy_count; /* The number of fuzzy sections. */
+ /* Additional info. */
Py_ssize_t req_offset; /* The offset to the required string. */
+ PyObject* required_chars;
+ Py_ssize_t req_flags;
RE_Node* req_string; /* The required string. */
BOOL is_fuzzy; /* Whether it's a fuzzy pattern. */
BOOL do_search_start; /* Whether to do an initial search. */
@@ -760,12 +764,12 @@ typedef struct RE_CompileArgs {
* of them. Empty strings aren't recorded, so if 'list' and 'item' are both
* NULL then the result is an empty string.
*/
-typedef struct JoinInfo {
+typedef struct RE_JoinInfo {
PyObject* list; /* The list of slices if there are more than 2 of them. */
PyObject* item; /* The slice if there is only 1 of them. */
BOOL reversed; /* Whether the slices have been found in reverse order. */
BOOL is_unicode; /* Whether the string is Unicode. */
-} JoinInfo;
+} RE_JoinInfo;
/* Info about fuzzy matching. */
typedef struct {
@@ -793,6 +797,25 @@ typedef struct RE_BestList {
RE_BestEntry* entries;
} RE_BestList;
+/* A stack of guard checks. */
+typedef struct RE_Check {
+ RE_Node* node;
+ RE_STATUS_T result;
+} RE_Check;
+
+typedef struct RE_CheckStack {
+ Py_ssize_t capacity;
+ Py_ssize_t count;
+ RE_Check* items;
+} RE_CheckStack;
+
+/* A stack of nodes. */
+typedef struct RE_NodeStack {
+ Py_ssize_t capacity;
+ Py_ssize_t count;
+ RE_Node** items;
+} RE_NodeStack;
+
/* Function types for getting info from a MatchObject. */
typedef PyObject* (*RE_GetByIndexFunc)(MatchObject* self, Py_ssize_t index);
@@ -6826,8 +6849,10 @@ Py_LOCAL_INLINE(int) try_match_STRING(RE_State* state, RE_NextNode* next,
for (s_pos = 0; s_pos < length; s_pos++) {
if (text_pos + s_pos >= state->slice_end) {
- if (state->partial_side == RE_PARTIAL_RIGHT)
+ if (state->partial_side == RE_PARTIAL_RIGHT) {
+ next_position->text_pos = text_pos;
return RE_ERROR_PARTIAL;
+ }
return RE_ERROR_FAILURE;
}
@@ -6874,8 +6899,13 @@ Py_LOCAL_INLINE(int) try_match_STRING_FLD(RE_State* state, RE_NextNode* next,
if (f_pos >= folded_len) {
/* Fetch and casefold another character. */
if (text_pos >= state->slice_end) {
- if (state->partial_side == RE_PARTIAL_RIGHT)
+ if (state->partial_side == RE_PARTIAL_RIGHT) {
+ if (next->match_step == 0)
+ next_position->text_pos = start_pos;
+ else
+ next_position->text_pos = text_pos;
return RE_ERROR_PARTIAL;
+ }
return RE_ERROR_FAILURE;
}
@@ -6942,8 +6972,13 @@ Py_LOCAL_INLINE(int) try_match_STRING_FLD_REV(RE_State* state, RE_NextNode*
if (f_pos >= folded_len) {
/* Fetch and casefold another character. */
if (text_pos <= state->slice_start) {
- if (state->partial_side == RE_PARTIAL_LEFT)
+ if (state->partial_side == RE_PARTIAL_LEFT) {
+ if (next->match_step == 0)
+ next_position->text_pos = start_pos;
+ else
+ next_position->text_pos = text_pos;
return RE_ERROR_PARTIAL;
+ }
return RE_ERROR_FAILURE;
}
@@ -6994,8 +7029,10 @@ Py_LOCAL_INLINE(int) try_match_STRING_IGN(RE_State* state, RE_NextNode* next,
for (s_pos = 0; s_pos < length; s_pos++) {
if (text_pos + s_pos >= state->slice_end) {
- if (state->partial_side == RE_PARTIAL_RIGHT)
+ if (state->partial_side == RE_PARTIAL_RIGHT) {
+ next_position->text_pos = text_pos;
return RE_ERROR_PARTIAL;
+ }
return RE_ERROR_FAILURE;
}
@@ -7031,8 +7068,10 @@ Py_LOCAL_INLINE(int) try_match_STRING_IGN_REV(RE_State* state, RE_NextNode*
for (s_pos = 0; s_pos < length; s_pos++) {
if (text_pos - s_pos <= state->slice_start) {
- if (state->partial_side == RE_PARTIAL_LEFT)
+ if (state->partial_side == RE_PARTIAL_LEFT) {
+ next_position->text_pos = text_pos;
return RE_ERROR_PARTIAL;
+ }
return RE_ERROR_FAILURE;
}
@@ -7062,8 +7101,10 @@ Py_LOCAL_INLINE(int) try_match_STRING_REV(RE_State* state, RE_NextNode* next,
for (s_pos = 0; s_pos < length; s_pos++) {
if (text_pos - s_pos <= state->slice_start) {
- if (state->partial_side == RE_PARTIAL_LEFT)
+ if (state->partial_side == RE_PARTIAL_LEFT) {
+ next_position->text_pos = text_pos;
return RE_ERROR_PARTIAL;
+ }
return RE_ERROR_FAILURE;
}
@@ -7118,12 +7159,6 @@ Py_LOCAL_INLINE(int) try_match(RE_State* state, RE_NextNode* next, Py_ssize_t
case RE_OP_BOUNDARY:
status = try_match_BOUNDARY(state, test, text_pos);
break;
- case RE_OP_BRANCH:
- status = try_match(state, &test->next_1, text_pos, next_position);
- if (status == RE_ERROR_FAILURE)
- status = try_match(state, &test->nonstring.next_2, text_pos,
- next_position);
- break;
case RE_OP_CHARACTER:
status = try_match_CHARACTER(state, test, text_pos);
break;
@@ -8607,6 +8642,11 @@ again:
int status;
status = try_match(state, &test->next_1, text_pos, new_position);
+ if (status == RE_ERROR_PARTIAL) {
+ new_position->node = node;
+ new_position->text_pos = start_pos;
+ return status;
+ }
if (status < 0)
return status;
@@ -9491,6 +9531,7 @@ Py_LOCAL_INLINE(int) string_set_match_fld_fwdrev(RE_SafeState* safe_state,
Py_ssize_t first;
Py_ssize_t last;
PyObject* string_set;
+ void* folded_buffer;
state = safe_state->re_state;
full_case_fold = state->encoding->full_case_fold;
@@ -9634,13 +9675,18 @@ Py_LOCAL_INLINE(int) string_set_match_fld_fwdrev(RE_SafeState* safe_state,
goto finished;
}
+ /* Point to the used portion of the folded buffer. */
+ folded_buffer = (void*)((Py_UCS1*)folded + first * folded_charsize);
+ last -= first;
+ first = 0;
+
/* We've already looked for a partial match (if allowed), but what about a
* complete match?
*/
while (len >= min_len) {
if (end_of_fold[len]) {
- status = string_set_contains_ign(state, string_set, folded, first,
- last, folded_charsize);
+ status = string_set_contains_ign(state, string_set, folded_buffer,
+ first, last, folded_charsize);
if (status == 1) {
/* Advance past the match. */
@@ -10012,6 +10058,7 @@ found:
++fuzzy_info->counts[RE_FUZZY_ERR];
fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type];
++state->total_errors;
+ ++state->capture_change;
*text_pos = data.new_text_pos;
*node = data.new_node;
@@ -10078,6 +10125,7 @@ found:
++fuzzy_info->counts[RE_FUZZY_ERR];
fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type];
++state->total_errors;
+ ++state->capture_change;
*text_pos = data.new_text_pos;
*node = data.new_node;
@@ -10172,6 +10220,7 @@ Py_LOCAL_INLINE(int) retry_fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t*
++fuzzy_info->counts[RE_FUZZY_ERR];
fuzzy_info->total_cost += values[RE_FUZZY_VAL_INS_COST];
++state->total_errors;
+ ++state->capture_change;
/* Check whether there are too few errors. */
state->too_few_errors = bt_data->fuzzy_insert.too_few_errors;
@@ -10248,6 +10297,7 @@ found:
++fuzzy_info->counts[RE_FUZZY_ERR];
fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type];
++state->total_errors;
+ ++state->capture_change;
*text_pos = data.new_text_pos;
*string_pos = data.new_string_pos;
@@ -10312,6 +10362,7 @@ found:
++fuzzy_info->counts[RE_FUZZY_ERR];
fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type];
++state->total_errors;
+ ++state->capture_change;
*text_pos = data.new_text_pos;
*node = new_node;
@@ -10430,6 +10481,7 @@ found:
++fuzzy_info->counts[RE_FUZZY_ERR];
fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type];
++state->total_errors;
+ ++state->capture_change;
*text_pos = new_text_pos;
*string_pos = data.new_string_pos;
@@ -10504,6 +10556,7 @@ found:
++fuzzy_info->counts[RE_FUZZY_ERR];
fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type];
++state->total_errors;
+ ++state->capture_change;
*text_pos = new_text_pos;
*node = new_node;
@@ -10628,6 +10681,7 @@ found:
++fuzzy_info->counts[RE_FUZZY_ERR];
fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type];
++state->total_errors;
+ ++state->capture_change;
*text_pos = new_text_pos;
*group_pos = new_group_pos;
@@ -10699,6 +10753,7 @@ found:
++fuzzy_info->counts[RE_FUZZY_ERR];
fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type];
++state->total_errors;
+ ++state->capture_change;
*text_pos = new_text_pos;
*node = new_node;
@@ -11375,7 +11430,7 @@ start_match:
/* Locate the required string, if there's one, unless this is a recursive
* call of 'basic_match'.
*/
- if (!pattern->req_string)
+ if (!pattern->req_string || state->text_pos < state->req_pos)
found_pos = state->text_pos;
else {
found_pos = locate_required_string(safe_state, search);
@@ -11396,7 +11451,7 @@ next_match_1:
*/
status = search_start(safe_state, &start_pair, &new_position, 0);
if (status == RE_ERROR_PARTIAL) {
- state->match_pos = state->text_pos;
+ state->match_pos = new_position.text_pos;
return status;
} else if (status != RE_ERROR_SUCCESS)
return status;
@@ -12009,6 +12064,12 @@ advance:
changed = rp_data->capture_change != state->capture_change ||
state->text_pos != rp_data->start;
+ /* Additional checks are needed if there's fuzzy matching. */
+ if (changed && state->pattern->is_fuzzy && rp_data->count >=
+ node->values[1])
+ changed = !(node->step == 1 ? state->text_pos >=
+ state->slice_end : state->text_pos <= state->slice_start);
+
/* The counts are of type size_t, so the format needs to specify
* that.
*/
@@ -12174,6 +12235,12 @@ advance:
changed = rp_data->capture_change != state->capture_change ||
state->text_pos != rp_data->start;
+ /* Additional checks are needed if there's fuzzy matching. */
+ if (changed && state->pattern->is_fuzzy && rp_data->count >=
+ node->values[1])
+ changed = !(node->step == 1 ? state->text_pos >=
+ state->slice_end : state->text_pos <= state->slice_start);
+
/* The counts are of type size_t, so the format needs to specify
* that.
*/
@@ -13316,8 +13383,10 @@ advance:
gfolded_pos = 0;
}
- if (folded_pos < folded_len && folded[folded_pos] ==
- gfolded[gfolded_pos]) {
+ if (folded_pos < folded_len && same_char_ign(encoding,
+ locale_info,
+ folded[folded_pos],
+ gfolded[gfolded_pos])) {
++folded_pos;
++gfolded_pos;
} else if (node->status & RE_STATUS_FUZZY) {
@@ -13419,8 +13488,9 @@ advance:
gfolded_pos = gfolded_len;
}
- if (folded_pos > 0 && folded[folded_pos - 1] ==
- gfolded[gfolded_pos - 1]) {
+ if (folded_pos > 0 && same_char_ign(encoding, locale_info,
+ folded[folded_pos - 1],
+ gfolded[gfolded_pos - 1])) {
--folded_pos;
--gfolded_pos;
} else if (node->status & RE_STATUS_FUZZY) {
@@ -16226,11 +16296,21 @@ Py_LOCAL_INLINE(void) restore_groups(RE_SafeState* safe_state, RE_GroupData*
state = safe_state->re_state;
pattern = state->pattern;
- for (g = 0; g < pattern->true_group_count; g++)
- re_dealloc(state->groups[g].captures);
+ for (g = 0; g < pattern->true_group_count; g++) {
+ RE_GroupData* group;
+ RE_GroupData* saved;
+
+ group = &state->groups[g];
+ saved = &saved_groups[g];
+
+ group->span = saved->span;
- Py_MEMCPY(state->groups, saved_groups, pattern->true_group_count *
- sizeof(RE_GroupData));
+ group->capture_count = saved->capture_count;
+ Py_MEMCPY(group->captures, saved->captures, saved->capture_count *
+ sizeof(RE_GroupSpan));
+
+ re_dealloc(saved->captures);
+ }
re_dealloc(saved_groups);
@@ -18261,8 +18341,8 @@ Py_LOCAL_INLINE(PyObject*) get_match_replacement(MatchObject* self, PyObject*
}
/* Initialises the join list. */
-Py_LOCAL_INLINE(void) init_join_list(JoinInfo* join_info, BOOL reversed, BOOL
- is_unicode) {
+Py_LOCAL_INLINE(void) init_join_list(RE_JoinInfo* join_info, BOOL reversed,
+ BOOL is_unicode) {
join_info->list = NULL;
join_info->item = NULL;
join_info->reversed = reversed;
@@ -18270,7 +18350,7 @@ Py_LOCAL_INLINE(void) init_join_list(JoinInfo* join_info, BOOL reversed, BOOL
}
/* Adds an item to the join list. */
-Py_LOCAL_INLINE(int) add_to_join_list(JoinInfo* join_info, PyObject* item) {
+Py_LOCAL_INLINE(int) add_to_join_list(RE_JoinInfo* join_info, PyObject* item) {
PyObject* new_item;
int status;
@@ -18339,13 +18419,13 @@ error:
}
/* Clears the join list. */
-Py_LOCAL_INLINE(void) clear_join_list(JoinInfo* join_info) {
+Py_LOCAL_INLINE(void) clear_join_list(RE_JoinInfo* join_info) {
Py_XDECREF(join_info->list);
Py_XDECREF(join_info->item);
}
/* Joins together a list of strings for pattern_subx. */
-Py_LOCAL_INLINE(PyObject*) join_list_info(JoinInfo* join_info) {
+Py_LOCAL_INLINE(PyObject*) join_list_info(RE_JoinInfo* join_info) {
/* If the list already exists then just do the join. */
if (join_info->list) {
PyObject* joiner;
@@ -18446,7 +18526,7 @@ Py_LOCAL_INLINE(Py_ssize_t) check_replacement_string(PyObject* str_replacement,
static PyObject* match_expand(MatchObject* self, PyObject* str_template) {
Py_ssize_t literal_length;
PyObject* replacement;
- JoinInfo join_info;
+ RE_JoinInfo join_info;
Py_ssize_t size;
Py_ssize_t i;
@@ -18676,7 +18756,7 @@ error:
Py_LOCAL_INLINE(PyObject*) make_match_copy(MatchObject* self);
/* MatchObject's '__copy__' method. */
-static PyObject* match_copy(MatchObject* self, PyObject *unused) {
+static PyObject* match_copy(MatchObject* self, PyObject* unused) {
return make_match_copy(self);
}
@@ -19376,7 +19456,7 @@ static PyObject* scanner_match(ScannerObject* self, PyObject* unused) {
}
/* ScannerObject's 'search' method. */
-static PyObject* scanner_search(ScannerObject* self, PyObject *unused) {
+static PyObject* scanner_search(ScannerObject* self, PyObject* unused) {
return scanner_search_or_match(self, TRUE);
}
@@ -19430,7 +19510,7 @@ Py_LOCAL_INLINE(PyObject*) make_scanner_copy(ScannerObject* self) {
}
/* ScannerObject's '__copy__' method. */
-static PyObject* scanner_copy(ScannerObject* self, PyObject *unused) {
+static PyObject* scanner_copy(ScannerObject* self, PyObject* unused) {
return make_scanner_copy(self);
}
@@ -19712,7 +19792,7 @@ error:
}
/* SplitterObject's 'split' method. */
-static PyObject* splitter_split(SplitterObject* self, PyObject *unused) {
+static PyObject* splitter_split(SplitterObject* self, PyObject* unused) {
PyObject* result;
result = next_split_part(self);
@@ -19777,7 +19857,7 @@ Py_LOCAL_INLINE(PyObject*) make_splitter_copy(SplitterObject* self) {
}
/* SplitterObject's '__copy__' method. */
-static PyObject* splitter_copy(SplitterObject* self, PyObject *unused) {
+static PyObject* splitter_copy(SplitterObject* self, PyObject* unused) {
return make_splitter_copy(self);
}
@@ -20239,7 +20319,7 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject*
BOOL is_template = FALSE;
RE_State state;
RE_SafeState safe_state;
- JoinInfo join_info;
+ RE_JoinInfo join_info;
Py_ssize_t sub_count;
Py_ssize_t last_pos;
Py_ssize_t step;
@@ -21030,7 +21110,7 @@ Py_LOCAL_INLINE(PyObject*) make_pattern_copy(PatternObject* self) {
}
/* PatternObject's '__copy__' method. */
-static PyObject* pattern_copy(PatternObject* self, PyObject *unused) {
+static PyObject* pattern_copy(PatternObject* self, PyObject* unused) {
return make_pattern_copy(self);
}
@@ -21194,7 +21274,9 @@ static void pattern_dealloc(PyObject* self_) {
Py_DECREF(self->named_lists);
Py_DECREF(self->named_list_indexes);
+ Py_DECREF(self->required_chars);
re_dealloc(self->locale_info);
+ re_dealloc(self->packed_code_list);
PyObject_DEL(self);
}
@@ -21263,6 +21345,117 @@ Py_LOCAL_INLINE(BOOL) append_integer(PyObject* list, Py_ssize_t value) {
return TRUE;
}
+/* Packs the code list that's needed for pickling. */
+Py_LOCAL_INLINE(RE_UINT8*) pack_code_list(RE_CODE* code, Py_ssize_t code_len) {
+ Py_ssize_t max_size;
+ RE_UINT8* packed;
+ Py_ssize_t count;
+ RE_UINT32 value;
+ Py_ssize_t i;
+ RE_UINT8* new_packed;
+
+ /* What is the maximum number of bytes needed to store it?
+ *
+ * A 32-bit RE_CODE might need 5 bytes ((32 + 6) / 7).
+ */
+ max_size = code_len * 5 + ((sizeof(Py_ssize_t) * 8) + 6) / 7;
+
+ packed = (RE_UINT8*)re_alloc((size_t)max_size);
+ count = 0;
+
+ /* Store the length of the code list. */
+ value = (RE_UINT32)code_len;
+
+ while (value >= 0x80) {
+ packed[count++] = 0x80 | (value & 0x7F);
+ value >>= 7;
+ }
+
+ packed[count++] = value;
+
+ /* Store each of the elements of the code list. */
+ for (i = 0; i < code_len; i++) {
+ value = (RE_UINT32)code[i];
+
+ while (value >= 0x80) {
+ packed[count++] = 0x80 | (value & 0x7F);
+ value >>= 7;
+ }
+
+ packed[count++] = value;
+ }
+
+ /* Discard the unused bytes. */
+ new_packed = re_realloc(packed, count);
+ if (new_packed)
+ packed = new_packed;
+
+ return packed;
+}
+
+/* Unpacks the code list that's needed for pickling. */
+Py_LOCAL_INLINE(PyObject*) unpack_code_list(RE_UINT8* packed) {
+ PyObject* code_list;
+ Py_ssize_t index;
+ RE_UINT32 value;
+ int shift;
+ size_t count;
+
+ code_list = PyList_New(0);
+ if (!code_list)
+ return NULL;
+
+ index = 0;
+
+ /* Unpack the length of the code list. */
+ value = 0;
+ shift = 0;
+
+ while (packed[index] >= 0x80) {
+ value |= (RE_UINT32)(packed[index++] & 0x7F) << shift;
+ shift += 7;
+ }
+
+ value |= (RE_UINT32)packed[index++] << shift;
+ count = (size_t)value;
+
+ /* Unpack each of the elements of the code list. */
+ while (count > 0) {
+ PyObject* obj;
+ int status;
+
+ value = 0;
+ shift = 0;
+
+ while (packed[index] >= 0x80) {
+ value |= (RE_UINT32)(packed[index++] & 0x7F) << shift;
+ shift += 7;
+ }
+
+ value |= (RE_UINT32)packed[index++] << shift;
+#if PY_VERSION_HEX >= 0x02060000
+ obj = PyLong_FromSize_t((size_t)value);
+#else
+ obj = PyLong_FromUnsignedLongLong((size_t)value);
+#endif
+ if (!obj)
+ goto error;
+
+ status = PyList_Append(code_list, obj);
+ Py_DECREF(obj);
+ if (status == -1)
+ goto error;
+
+ --count;
+ }
+
+ return code_list;
+
+error:
+ Py_DECREF(code_list);
+ return NULL;
+}
+
/* MatchObject's '__repr__' method. */
static PyObject* match_repr(PyObject* self_) {
MatchObject* self;
@@ -21368,8 +21561,8 @@ static PyObject* pattern_repr(PyObject* self_) {
int flag_count;
unsigned int i;
Py_ssize_t pos;
- PyObject *key;
- PyObject *value;
+ PyObject* key;
+ PyObject* value;
PyObject* separator;
PyObject* result;
@@ -21462,9 +21655,32 @@ static PyObject* pattern_groupindex(PyObject* self_) {
return PyDict_Copy(self->groupindex);
}
+/* PatternObject's '_pickled_data' method. */
+static PyObject* pattern_pickled_data(PyObject* self_) {
+ PatternObject* self;
+ PyObject* code_list;
+ PyObject* pickled_data;
+
+ self = (PatternObject*)self_;
+
+ code_list = unpack_code_list(self->packed_code_list);
+ if (!code_list)
+ return NULL;
+
+ /* Build the data needed for picking. */
+ pickled_data = Py_BuildValue("OnOOOOOnOnn", self->pattern, self->flags,
+ code_list, self->groupindex, self->indexgroup, self->named_lists,
+ self->named_list_indexes, self->req_offset, self->required_chars,
+ self->req_flags, self->public_group_count);
+
+ return pickled_data;
+}
+
static PyGetSetDef pattern_getset[] = {
{"groupindex", (getter)pattern_groupindex, (setter)NULL,
"A dictionary mapping group names to group numbers."},
+ {"_pickled_data", (getter)pattern_pickled_data, (setter)NULL,
+ "Data used for pickling."},
{NULL} /* Sentinel */
};
@@ -21533,123 +21749,272 @@ Py_LOCAL_INLINE(void) skip_one_way_branches(PatternObject* pattern) {
pattern->start_node = pattern->start_node->next_1.node;
}
-/* Adds guards to repeats which are followed by a reference to a group.
- *
- * Returns whether a guard was added for a node at or after the given node.
- */
+/* Initialises a check stack. */
+Py_LOCAL_INLINE(void) CheckStack_init(RE_CheckStack* stack) {
+ stack->capacity = 0;
+ stack->count = 0;
+ stack->items = NULL;
+}
+
+/* Finalises a check stack. */
+Py_LOCAL_INLINE(void) CheckStack_fini(RE_CheckStack* stack) {
+ PyMem_Free(stack->items);
+ stack->capacity = 0;
+ stack->count = 0;
+ stack->items = NULL;
+}
+
+/* Pushes an item onto a check stack. */
+Py_LOCAL_INLINE(BOOL) CheckStack_push(RE_CheckStack* stack, RE_Node* node,
+ RE_STATUS_T result) {
+ RE_Check* check;
+
+ if (stack->count >= stack->capacity) {
+ Py_ssize_t new_capacity;
+ RE_Check* new_items;
+
+ new_capacity = stack->capacity * 2;
+ if (new_capacity == 0)
+ new_capacity = 16;
+
+ new_items = (RE_Check*)PyMem_Realloc(stack->items, new_capacity *
+ sizeof(RE_Check));
+ if (!new_items)
+ return FALSE;
+
+ stack->capacity = new_capacity;
+ stack->items = new_items;
+ }
+
+ check = &stack->items[stack->count++];
+ check->node = node;
+ check->result = result;
+
+ return TRUE;
+}
+
+/* Pops an item off a check stack. Returns NULL if the stack is empty. */
+Py_LOCAL_INLINE(RE_Check*) CheckStack_pop(RE_CheckStack* stack) {
+ return stack->count > 0 ? &stack->items[--stack->count] : NULL;
+}
+
+/* Adds guards to repeats which are followed by a reference to a group. */
Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node*
- node) {
- RE_STATUS_T result;
+ start_node) {
+ RE_CheckStack stack;
- result = RE_STATUS_NEITHER;
+ CheckStack_init(&stack);
+
+ CheckStack_push(&stack, start_node, RE_STATUS_NEITHER);
for (;;) {
- if (node->status & RE_STATUS_VISITED_AG)
- return node->status & (RE_STATUS_REPEAT | RE_STATUS_REF);
+ RE_Check* check;
+ RE_Node* node;
+ RE_STATUS_T result;
- switch (node->op) {
- case RE_OP_BRANCH:
- {
- RE_STATUS_T branch_1_result;
- RE_STATUS_T branch_2_result;
- RE_STATUS_T status;
-
- branch_1_result = add_repeat_guards(pattern, node->next_1.node);
- branch_2_result = add_repeat_guards(pattern,
- node->nonstring.next_2.node);
- status = max_status_3(result, branch_1_result, branch_2_result);
- node->status = RE_STATUS_VISITED_AG | status;
- return status;
- }
- case RE_OP_END_GREEDY_REPEAT:
- case RE_OP_END_LAZY_REPEAT:
- node->status |= RE_STATUS_VISITED_AG;
- return result;
- case RE_OP_GREEDY_REPEAT:
- case RE_OP_LAZY_REPEAT:
- {
- BOOL limited;
- RE_STATUS_T body_result;
- RE_STATUS_T tail_result;
- RE_RepeatInfo* repeat_info;
- RE_STATUS_T status;
-
- limited = ~node->values[2] != 0;
- if (limited)
- body_result = RE_STATUS_LIMITED;
- else
- body_result = add_repeat_guards(pattern, node->next_1.node);
- tail_result = add_repeat_guards(pattern,
- node->nonstring.next_2.node);
-
- repeat_info = &pattern->repeat_info[node->values[0]];
- if (body_result != RE_STATUS_REF)
- repeat_info->status |= RE_STATUS_BODY;
- if (tail_result != RE_STATUS_REF)
- repeat_info->status |= RE_STATUS_TAIL;
- if (limited)
- result = max_status_2(result, RE_STATUS_LIMITED);
- else
- result = max_status_2(result, RE_STATUS_REPEAT);
- status = max_status_3(result, body_result, tail_result);
- node->status |= RE_STATUS_VISITED_AG | status;
- return status;
- }
- case RE_OP_GREEDY_REPEAT_ONE:
- case RE_OP_LAZY_REPEAT_ONE:
- {
- BOOL limited;
- RE_STATUS_T tail_result;
- RE_RepeatInfo* repeat_info;
- RE_STATUS_T status;
-
- limited = ~node->values[2] != 0;
- tail_result = add_repeat_guards(pattern, node->next_1.node);
-
- repeat_info = &pattern->repeat_info[node->values[0]];
- repeat_info->status |= RE_STATUS_BODY;
- if (tail_result != RE_STATUS_REF)
- repeat_info->status |= RE_STATUS_TAIL;
- if (limited)
- result = max_status_2(result, RE_STATUS_LIMITED);
- else
- result = max_status_2(result, RE_STATUS_REPEAT);
- status = max_status_3(result, RE_STATUS_REPEAT, tail_result);
- node->status = RE_STATUS_VISITED_AG | status;
- return status;
- }
- case RE_OP_GROUP_CALL:
- case RE_OP_REF_GROUP:
- case RE_OP_REF_GROUP_FLD:
- case RE_OP_REF_GROUP_FLD_REV:
- case RE_OP_REF_GROUP_IGN:
- case RE_OP_REF_GROUP_IGN_REV:
- case RE_OP_REF_GROUP_REV:
- result = RE_STATUS_REF;
- node = node->next_1.node;
- break;
- case RE_OP_GROUP_EXISTS:
- {
- RE_STATUS_T branch_1_result;
- RE_STATUS_T branch_2_result;
- RE_STATUS_T status;
-
- branch_1_result = add_repeat_guards(pattern, node->next_1.node);
- branch_2_result = add_repeat_guards(pattern,
- node->nonstring.next_2.node);
- status = max_status_4(result, branch_1_result, branch_2_result,
- RE_STATUS_REF);
- node->status = RE_STATUS_VISITED_AG | status;
- return status;
- }
- case RE_OP_SUCCESS:
- node->status = RE_STATUS_VISITED_AG | result;
- return result;
- default:
- node = node->next_1.node;
+ check = CheckStack_pop(&stack);
+
+ if (!check)
break;
+
+ node = check->node;
+ result = check->result;
+
+ if (!(node->status & RE_STATUS_VISITED_AG)) {
+ switch (check->node->op) {
+ case RE_OP_BRANCH:
+ {
+ RE_Node* branch_1;
+ RE_Node* branch_2;
+ BOOL visited_branch_1;
+ BOOL visited_branch_2;
+
+ branch_1 = node->next_1.node;
+ branch_2 = node->nonstring.next_2.node;
+ visited_branch_1 = (branch_1->status & RE_STATUS_VISITED_AG);
+ visited_branch_2 = (branch_2->status & RE_STATUS_VISITED_AG);
+
+ if (visited_branch_1 && visited_branch_2) {
+ RE_STATUS_T branch_1_result;
+ RE_STATUS_T branch_2_result;
+
+ branch_1_result = branch_1->status & (RE_STATUS_REPEAT |
+ RE_STATUS_REF);
+ branch_2_result = branch_2->status & (RE_STATUS_REPEAT |
+ RE_STATUS_REF);
+
+ node->status |= RE_STATUS_VISITED_AG | max_status_3(result,
+ branch_1_result, branch_2_result);
+ } else {
+ CheckStack_push(&stack, node, result);
+ if (!visited_branch_2)
+ CheckStack_push(&stack, branch_2, RE_STATUS_NEITHER);
+ if (!visited_branch_1)
+ CheckStack_push(&stack, branch_1, RE_STATUS_NEITHER);
+ }
+ break;
+ }
+ case RE_OP_END_GREEDY_REPEAT:
+ case RE_OP_END_LAZY_REPEAT:
+ node->status |= RE_STATUS_VISITED_AG;
+ break;
+ case RE_OP_GREEDY_REPEAT:
+ case RE_OP_LAZY_REPEAT:
+ {
+ BOOL limited;
+ RE_Node* body;
+ RE_Node* tail;
+ BOOL visited_body;
+ BOOL visited_tail;
+
+ limited = ~node->values[2] != 0;
+
+ body = node->next_1.node;
+ tail = node->nonstring.next_2.node;
+ visited_body = (body->status & RE_STATUS_VISITED_AG);
+ visited_tail = (tail->status & RE_STATUS_VISITED_AG);
+
+ if (visited_body && visited_tail) {
+ RE_STATUS_T body_result;
+ RE_STATUS_T tail_result;
+ RE_RepeatInfo* repeat_info;
+
+ body_result = body->status & (RE_STATUS_REPEAT |
+ RE_STATUS_REF);
+ tail_result = tail->status & (RE_STATUS_REPEAT |
+ RE_STATUS_REF);
+
+ repeat_info = &pattern->repeat_info[node->values[0]];
+ if (body_result != RE_STATUS_REF)
+ repeat_info->status |= RE_STATUS_BODY;
+ if (tail_result != RE_STATUS_REF)
+ repeat_info->status |= RE_STATUS_TAIL;
+
+ if (limited)
+ result = max_status_2(result, RE_STATUS_LIMITED);
+ else
+ result = max_status_2(result, RE_STATUS_REPEAT);
+ node->status |= RE_STATUS_VISITED_AG | max_status_3(result,
+ body_result, tail_result);
+ } else {
+ CheckStack_push(&stack, node, result);
+ if (!visited_tail)
+ CheckStack_push(&stack, tail, RE_STATUS_NEITHER);
+ if (!visited_body) {
+ if (limited)
+ body->status |= RE_STATUS_VISITED_AG |
+ RE_STATUS_LIMITED;
+ else
+ CheckStack_push(&stack, body, RE_STATUS_NEITHER);
+ }
+ }
+ break;
+ }
+ case RE_OP_GREEDY_REPEAT_ONE:
+ case RE_OP_LAZY_REPEAT_ONE:
+ {
+ RE_Node* tail;
+ BOOL visited_tail;
+
+ tail = node->next_1.node;
+ visited_tail = (tail->status & RE_STATUS_VISITED_AG);
+
+ if (visited_tail) {
+ BOOL limited;
+ RE_STATUS_T tail_result;
+ RE_RepeatInfo* repeat_info;
+
+ limited = ~node->values[2] != 0;
+
+ tail_result = tail->status & (RE_STATUS_REPEAT |
+ RE_STATUS_REF);
+
+ repeat_info = &pattern->repeat_info[node->values[0]];
+ repeat_info->status |= RE_STATUS_BODY;
+
+ if (tail_result != RE_STATUS_REF)
+ repeat_info->status |= RE_STATUS_TAIL;
+
+ if (limited)
+ result = max_status_2(result, RE_STATUS_LIMITED);
+ else
+ result = max_status_2(result, RE_STATUS_REPEAT);
+ node->status |= RE_STATUS_VISITED_AG | max_status_3(result,
+ RE_STATUS_REPEAT, tail_result);
+ } else {
+ CheckStack_push(&stack, node, result);
+ CheckStack_push(&stack, tail, RE_STATUS_NEITHER);
+ }
+ break;
+ }
+ case RE_OP_GROUP_CALL:
+ case RE_OP_REF_GROUP:
+ case RE_OP_REF_GROUP_FLD:
+ case RE_OP_REF_GROUP_FLD_REV:
+ case RE_OP_REF_GROUP_IGN:
+ case RE_OP_REF_GROUP_IGN_REV:
+ case RE_OP_REF_GROUP_REV:
+ {
+ RE_Node* tail;
+ BOOL visited_tail;
+
+ tail = node->next_1.node;
+ visited_tail = (tail->status & RE_STATUS_VISITED_AG);
+
+ if (visited_tail)
+ node->status |= RE_STATUS_VISITED_AG | RE_STATUS_REF;
... 2826 lines suppressed ...
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-regex.git
More information about the Python-modules-commits
mailing list