[Python-modules-commits] [python-regex] 01/05: Import python-regex_0.1.20160425.orig.tar.gz

Sandro Tosi morph at moszumanska.debian.org
Mon Apr 25 21:25:08 UTC 2016


This is an automated email from the git hooks/post-receive script.

morph pushed a commit to branch master
in repository python-regex.

commit 4c3a1584fa43ddc3dc071d73c173d9178e22876e
Author: Sandro Tosi <morph at debian.org>
Date:   Mon Apr 25 22:17:15 2016 +0100

    Import python-regex_0.1.20160425.orig.tar.gz
---
 PKG-INFO               |   2 +-
 Python2/_regex.c       | 745 ++++++++++++++++++++++++++++++++++++++-----------
 Python2/_regex_core.py | 275 +++++++++++-------
 Python2/regex.py       |  10 +-
 Python2/test_regex.py  |  63 +++++
 Python3/_regex.c       | 741 +++++++++++++++++++++++++++++++++++++-----------
 Python3/_regex_core.py | 275 +++++++++++-------
 Python3/regex.py       |  10 +-
 Python3/test_regex.py  |  63 +++++
 setup.py               |   2 +-
 10 files changed, 1666 insertions(+), 520 deletions(-)

diff --git a/PKG-INFO b/PKG-INFO
index f3ac398..1afff98 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: regex
-Version: 2016.01.10
+Version: 2016.04.25
 Summary: Alternative regular expression module, to replace re.
 Home-page: https://bitbucket.org/mrabarnett/mrab-regex
 Author: Matthew Barnett
diff --git a/Python2/_regex.c b/Python2/_regex.c
index e960210..e3effcc 100644
--- a/Python2/_regex.c
+++ b/Python2/_regex.c
@@ -644,6 +644,7 @@ typedef struct PatternObject {
     PyObject_HEAD
     PyObject* pattern; /* Pattern source (or None). */
     Py_ssize_t flags; /* Flags used when compiling pattern source. */
+    RE_UINT8* packed_code_list;
     PyObject* weakreflist; /* List of weak references */
     /* Nodes into which the regular expression is compiled. */
     RE_Node* start_node;
@@ -679,7 +680,10 @@ typedef struct PatternObject {
     RE_GroupData* groups_storage;
     RE_RepeatData* repeats_storage;
     size_t fuzzy_count; /* The number of fuzzy sections. */
+    /* Additional info. */
     Py_ssize_t req_offset; /* The offset to the required string. */
+    PyObject* required_chars;
+    Py_ssize_t req_flags;
     RE_Node* req_string; /* The required string. */
     BOOL is_fuzzy; /* Whether it's a fuzzy pattern. */
     BOOL do_search_start; /* Whether to do an initial search. */
@@ -760,12 +764,12 @@ typedef struct RE_CompileArgs {
  * of them. Empty strings aren't recorded, so if 'list' and 'item' are both
  * NULL then the result is an empty string.
  */
-typedef struct JoinInfo {
+typedef struct RE_JoinInfo {
     PyObject* list; /* The list of slices if there are more than 2 of them. */
     PyObject* item; /* The slice if there is only 1 of them. */
     BOOL reversed; /* Whether the slices have been found in reverse order. */
     BOOL is_unicode; /* Whether the string is Unicode. */
-} JoinInfo;
+} RE_JoinInfo;
 
 /* Info about fuzzy matching. */
 typedef struct {
@@ -793,6 +797,25 @@ typedef struct RE_BestList {
     RE_BestEntry* entries;
 } RE_BestList;
 
+/* A stack of guard checks. */
+typedef struct RE_Check {
+    RE_Node* node;
+    RE_STATUS_T result;
+} RE_Check;
+
+typedef struct RE_CheckStack {
+    Py_ssize_t capacity;
+    Py_ssize_t count;
+    RE_Check* items;
+} RE_CheckStack;
+
+/* A stack of nodes. */
+typedef struct RE_NodeStack {
+    Py_ssize_t capacity;
+    Py_ssize_t count;
+    RE_Node** items;
+} RE_NodeStack;
+
 /* Function types for getting info from a MatchObject. */
 typedef PyObject* (*RE_GetByIndexFunc)(MatchObject* self, Py_ssize_t index);
 
@@ -6826,8 +6849,10 @@ Py_LOCAL_INLINE(int) try_match_STRING(RE_State* state, RE_NextNode* next,
 
     for (s_pos = 0; s_pos < length; s_pos++) {
         if (text_pos + s_pos >= state->slice_end) {
-            if (state->partial_side == RE_PARTIAL_RIGHT)
+            if (state->partial_side == RE_PARTIAL_RIGHT) {
+                next_position->text_pos = text_pos;
                 return RE_ERROR_PARTIAL;
+            }
 
             return RE_ERROR_FAILURE;
         }
@@ -6874,8 +6899,13 @@ Py_LOCAL_INLINE(int) try_match_STRING_FLD(RE_State* state, RE_NextNode* next,
         if (f_pos >= folded_len) {
             /* Fetch and casefold another character. */
             if (text_pos >= state->slice_end) {
-                if (state->partial_side == RE_PARTIAL_RIGHT)
+                if (state->partial_side == RE_PARTIAL_RIGHT) {
+                    if (next->match_step == 0)
+                        next_position->text_pos = start_pos;
+                    else
+                        next_position->text_pos = text_pos;
                     return RE_ERROR_PARTIAL;
+                }
 
                 return RE_ERROR_FAILURE;
             }
@@ -6942,8 +6972,13 @@ Py_LOCAL_INLINE(int) try_match_STRING_FLD_REV(RE_State* state, RE_NextNode*
         if (f_pos >= folded_len) {
             /* Fetch and casefold another character. */
             if (text_pos <= state->slice_start) {
-                if (state->partial_side == RE_PARTIAL_LEFT)
+                if (state->partial_side == RE_PARTIAL_LEFT) {
+                    if (next->match_step == 0)
+                        next_position->text_pos = start_pos;
+                    else
+                        next_position->text_pos = text_pos;
                     return RE_ERROR_PARTIAL;
+                }
 
                 return RE_ERROR_FAILURE;
             }
@@ -6994,8 +7029,10 @@ Py_LOCAL_INLINE(int) try_match_STRING_IGN(RE_State* state, RE_NextNode* next,
 
     for (s_pos = 0; s_pos < length; s_pos++) {
         if (text_pos + s_pos >= state->slice_end) {
-            if (state->partial_side == RE_PARTIAL_RIGHT)
+            if (state->partial_side == RE_PARTIAL_RIGHT) {
+                next_position->text_pos = text_pos;
                 return RE_ERROR_PARTIAL;
+            }
 
             return RE_ERROR_FAILURE;
         }
@@ -7031,8 +7068,10 @@ Py_LOCAL_INLINE(int) try_match_STRING_IGN_REV(RE_State* state, RE_NextNode*
 
     for (s_pos = 0; s_pos < length; s_pos++) {
         if (text_pos - s_pos <= state->slice_start) {
-            if (state->partial_side == RE_PARTIAL_LEFT)
+            if (state->partial_side == RE_PARTIAL_LEFT) {
+                next_position->text_pos = text_pos;
                 return RE_ERROR_PARTIAL;
+            }
 
             return RE_ERROR_FAILURE;
         }
@@ -7062,8 +7101,10 @@ Py_LOCAL_INLINE(int) try_match_STRING_REV(RE_State* state, RE_NextNode* next,
 
     for (s_pos = 0; s_pos < length; s_pos++) {
         if (text_pos - s_pos <= state->slice_start) {
-            if (state->partial_side == RE_PARTIAL_LEFT)
+            if (state->partial_side == RE_PARTIAL_LEFT) {
+                next_position->text_pos = text_pos;
                 return RE_ERROR_PARTIAL;
+            }
 
             return RE_ERROR_FAILURE;
         }
@@ -7118,12 +7159,6 @@ Py_LOCAL_INLINE(int) try_match(RE_State* state, RE_NextNode* next, Py_ssize_t
     case RE_OP_BOUNDARY:
         status = try_match_BOUNDARY(state, test, text_pos);
         break;
-    case RE_OP_BRANCH:
-        status = try_match(state, &test->next_1, text_pos, next_position);
-        if (status == RE_ERROR_FAILURE)
-            status = try_match(state, &test->nonstring.next_2, text_pos,
-              next_position);
-        break;
     case RE_OP_CHARACTER:
         status = try_match_CHARACTER(state, test, text_pos);
         break;
@@ -8607,6 +8642,11 @@ again:
             int status;
 
             status = try_match(state, &test->next_1, text_pos, new_position);
+            if (status == RE_ERROR_PARTIAL) {
+                new_position->node = node;
+                new_position->text_pos = start_pos;
+                return status;
+            }
             if (status < 0)
                 return status;
 
@@ -9491,6 +9531,7 @@ Py_LOCAL_INLINE(int) string_set_match_fld_fwdrev(RE_SafeState* safe_state,
     Py_ssize_t first;
     Py_ssize_t last;
     PyObject* string_set;
+    void* folded_buffer;
 
     state = safe_state->re_state;
     full_case_fold = state->encoding->full_case_fold;
@@ -9634,13 +9675,18 @@ Py_LOCAL_INLINE(int) string_set_match_fld_fwdrev(RE_SafeState* safe_state,
         goto finished;
     }
 
+    /* Point to the used portion of the folded buffer. */
+    folded_buffer = (void*)((Py_UCS1*)folded + first * folded_charsize);
+    last -= first;
+    first = 0;
+
     /* We've already looked for a partial match (if allowed), but what about a
      * complete match?
      */
     while (len >= min_len) {
         if (end_of_fold[len]) {
-            status = string_set_contains_ign(state, string_set, folded, first,
-              last, folded_charsize);
+            status = string_set_contains_ign(state, string_set, folded_buffer,
+              first, last, folded_charsize);
 
             if (status == 1) {
                 /* Advance past the match. */
@@ -10012,6 +10058,7 @@ found:
     ++fuzzy_info->counts[RE_FUZZY_ERR];
     fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type];
     ++state->total_errors;
+    ++state->capture_change;
 
     *text_pos = data.new_text_pos;
     *node = data.new_node;
@@ -10078,6 +10125,7 @@ found:
     ++fuzzy_info->counts[RE_FUZZY_ERR];
     fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type];
     ++state->total_errors;
+    ++state->capture_change;
 
     *text_pos = data.new_text_pos;
     *node = data.new_node;
@@ -10172,6 +10220,7 @@ Py_LOCAL_INLINE(int) retry_fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t*
     ++fuzzy_info->counts[RE_FUZZY_ERR];
     fuzzy_info->total_cost += values[RE_FUZZY_VAL_INS_COST];
     ++state->total_errors;
+    ++state->capture_change;
 
     /* Check whether there are too few errors. */
     state->too_few_errors = bt_data->fuzzy_insert.too_few_errors;
@@ -10248,6 +10297,7 @@ found:
     ++fuzzy_info->counts[RE_FUZZY_ERR];
     fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type];
     ++state->total_errors;
+    ++state->capture_change;
 
     *text_pos = data.new_text_pos;
     *string_pos = data.new_string_pos;
@@ -10312,6 +10362,7 @@ found:
     ++fuzzy_info->counts[RE_FUZZY_ERR];
     fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type];
     ++state->total_errors;
+    ++state->capture_change;
 
     *text_pos = data.new_text_pos;
     *node = new_node;
@@ -10430,6 +10481,7 @@ found:
     ++fuzzy_info->counts[RE_FUZZY_ERR];
     fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type];
     ++state->total_errors;
+    ++state->capture_change;
 
     *text_pos = new_text_pos;
     *string_pos = data.new_string_pos;
@@ -10504,6 +10556,7 @@ found:
     ++fuzzy_info->counts[RE_FUZZY_ERR];
     fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type];
     ++state->total_errors;
+    ++state->capture_change;
 
     *text_pos = new_text_pos;
     *node = new_node;
@@ -10628,6 +10681,7 @@ found:
     ++fuzzy_info->counts[RE_FUZZY_ERR];
     fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type];
     ++state->total_errors;
+    ++state->capture_change;
 
     *text_pos = new_text_pos;
     *group_pos = new_group_pos;
@@ -10699,6 +10753,7 @@ found:
     ++fuzzy_info->counts[RE_FUZZY_ERR];
     fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type];
     ++state->total_errors;
+    ++state->capture_change;
 
     *text_pos = new_text_pos;
     *node = new_node;
@@ -11375,7 +11430,7 @@ start_match:
     /* Locate the required string, if there's one, unless this is a recursive
      * call of 'basic_match'.
      */
-    if (!pattern->req_string)
+    if (!pattern->req_string || state->text_pos < state->req_pos)
         found_pos = state->text_pos;
     else {
         found_pos = locate_required_string(safe_state, search);
@@ -11396,7 +11451,7 @@ next_match_1:
              */
             status = search_start(safe_state, &start_pair, &new_position, 0);
             if (status == RE_ERROR_PARTIAL) {
-                state->match_pos = state->text_pos;
+                state->match_pos = new_position.text_pos;
                 return status;
             } else if (status != RE_ERROR_SUCCESS)
                 return status;
@@ -12009,6 +12064,12 @@ advance:
             changed = rp_data->capture_change != state->capture_change ||
               state->text_pos != rp_data->start;
 
+            /* Additional checks are needed if there's fuzzy matching. */
+            if (changed && state->pattern->is_fuzzy && rp_data->count >=
+              node->values[1])
+                changed = !(node->step == 1 ? state->text_pos >=
+                  state->slice_end : state->text_pos <= state->slice_start);
+
             /* The counts are of type size_t, so the format needs to specify
              * that.
              */
@@ -12174,6 +12235,12 @@ advance:
             changed = rp_data->capture_change != state->capture_change ||
               state->text_pos != rp_data->start;
 
+            /* Additional checks are needed if there's fuzzy matching. */
+            if (changed && state->pattern->is_fuzzy && rp_data->count >=
+              node->values[1])
+                changed = !(node->step == 1 ? state->text_pos >=
+                  state->slice_end : state->text_pos <= state->slice_start);
+
             /* The counts are of type size_t, so the format needs to specify
              * that.
              */
@@ -13316,8 +13383,10 @@ advance:
                     gfolded_pos = 0;
                 }
 
-                if (folded_pos < folded_len && folded[folded_pos] ==
-                  gfolded[gfolded_pos]) {
+                if (folded_pos < folded_len && same_char_ign(encoding,
+                  locale_info,
+                   folded[folded_pos],
+                   gfolded[gfolded_pos])) {
                     ++folded_pos;
                     ++gfolded_pos;
                 } else if (node->status & RE_STATUS_FUZZY) {
@@ -13419,8 +13488,9 @@ advance:
                     gfolded_pos = gfolded_len;
                 }
 
-                if (folded_pos > 0 && folded[folded_pos - 1] ==
-                  gfolded[gfolded_pos - 1]) {
+                if (folded_pos > 0 && same_char_ign(encoding, locale_info,
+                   folded[folded_pos - 1],
+                   gfolded[gfolded_pos - 1])) {
                     --folded_pos;
                     --gfolded_pos;
                 } else if (node->status & RE_STATUS_FUZZY) {
@@ -16226,11 +16296,21 @@ Py_LOCAL_INLINE(void) restore_groups(RE_SafeState* safe_state, RE_GroupData*
     state = safe_state->re_state;
     pattern = state->pattern;
 
-    for (g = 0; g < pattern->true_group_count; g++)
-        re_dealloc(state->groups[g].captures);
+    for (g = 0; g < pattern->true_group_count; g++) {
+        RE_GroupData* group;
+        RE_GroupData* saved;
+
+        group = &state->groups[g];
+        saved = &saved_groups[g];
+
+        group->span = saved->span;
 
-    Py_MEMCPY(state->groups, saved_groups, pattern->true_group_count *
-      sizeof(RE_GroupData));
+        group->capture_count = saved->capture_count;
+        Py_MEMCPY(group->captures, saved->captures, saved->capture_count *
+          sizeof(RE_GroupSpan));
+
+        re_dealloc(saved->captures);
+    }
 
     re_dealloc(saved_groups);
 
@@ -18261,8 +18341,8 @@ Py_LOCAL_INLINE(PyObject*) get_match_replacement(MatchObject* self, PyObject*
 }
 
 /* Initialises the join list. */
-Py_LOCAL_INLINE(void) init_join_list(JoinInfo* join_info, BOOL reversed, BOOL
-  is_unicode) {
+Py_LOCAL_INLINE(void) init_join_list(RE_JoinInfo* join_info, BOOL reversed,
+  BOOL is_unicode) {
     join_info->list = NULL;
     join_info->item = NULL;
     join_info->reversed = reversed;
@@ -18270,7 +18350,7 @@ Py_LOCAL_INLINE(void) init_join_list(JoinInfo* join_info, BOOL reversed, BOOL
 }
 
 /* Adds an item to the join list. */
-Py_LOCAL_INLINE(int) add_to_join_list(JoinInfo* join_info, PyObject* item) {
+Py_LOCAL_INLINE(int) add_to_join_list(RE_JoinInfo* join_info, PyObject* item) {
     PyObject* new_item;
     int status;
 
@@ -18339,13 +18419,13 @@ error:
 }
 
 /* Clears the join list. */
-Py_LOCAL_INLINE(void) clear_join_list(JoinInfo* join_info) {
+Py_LOCAL_INLINE(void) clear_join_list(RE_JoinInfo* join_info) {
     Py_XDECREF(join_info->list);
     Py_XDECREF(join_info->item);
 }
 
 /* Joins together a list of strings for pattern_subx. */
-Py_LOCAL_INLINE(PyObject*) join_list_info(JoinInfo* join_info) {
+Py_LOCAL_INLINE(PyObject*) join_list_info(RE_JoinInfo* join_info) {
     /* If the list already exists then just do the join. */
     if (join_info->list) {
         PyObject* joiner;
@@ -18446,7 +18526,7 @@ Py_LOCAL_INLINE(Py_ssize_t) check_replacement_string(PyObject* str_replacement,
 static PyObject* match_expand(MatchObject* self, PyObject* str_template) {
     Py_ssize_t literal_length;
     PyObject* replacement;
-    JoinInfo join_info;
+    RE_JoinInfo join_info;
     Py_ssize_t size;
     Py_ssize_t i;
 
@@ -18676,7 +18756,7 @@ error:
 Py_LOCAL_INLINE(PyObject*) make_match_copy(MatchObject* self);
 
 /* MatchObject's '__copy__' method. */
-static PyObject* match_copy(MatchObject* self, PyObject *unused) {
+static PyObject* match_copy(MatchObject* self, PyObject* unused) {
     return make_match_copy(self);
 }
 
@@ -19376,7 +19456,7 @@ static PyObject* scanner_match(ScannerObject* self, PyObject* unused) {
 }
 
 /* ScannerObject's 'search' method. */
-static PyObject* scanner_search(ScannerObject* self, PyObject *unused) {
+static PyObject* scanner_search(ScannerObject* self, PyObject* unused) {
     return scanner_search_or_match(self, TRUE);
 }
 
@@ -19430,7 +19510,7 @@ Py_LOCAL_INLINE(PyObject*) make_scanner_copy(ScannerObject* self) {
 }
 
 /* ScannerObject's '__copy__' method. */
-static PyObject* scanner_copy(ScannerObject* self, PyObject *unused) {
+static PyObject* scanner_copy(ScannerObject* self, PyObject* unused) {
     return make_scanner_copy(self);
 }
 
@@ -19712,7 +19792,7 @@ error:
 }
 
 /* SplitterObject's 'split' method. */
-static PyObject* splitter_split(SplitterObject* self, PyObject *unused) {
+static PyObject* splitter_split(SplitterObject* self, PyObject* unused) {
     PyObject* result;
 
     result = next_split_part(self);
@@ -19777,7 +19857,7 @@ Py_LOCAL_INLINE(PyObject*) make_splitter_copy(SplitterObject* self) {
 }
 
 /* SplitterObject's '__copy__' method. */
-static PyObject* splitter_copy(SplitterObject* self, PyObject *unused) {
+static PyObject* splitter_copy(SplitterObject* self, PyObject* unused) {
     return make_splitter_copy(self);
 }
 
@@ -20239,7 +20319,7 @@ Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject*
     BOOL is_template = FALSE;
     RE_State state;
     RE_SafeState safe_state;
-    JoinInfo join_info;
+    RE_JoinInfo join_info;
     Py_ssize_t sub_count;
     Py_ssize_t last_pos;
     Py_ssize_t step;
@@ -21030,7 +21110,7 @@ Py_LOCAL_INLINE(PyObject*) make_pattern_copy(PatternObject* self) {
 }
 
 /* PatternObject's '__copy__' method. */
-static PyObject* pattern_copy(PatternObject* self, PyObject *unused) {
+static PyObject* pattern_copy(PatternObject* self, PyObject* unused) {
     return make_pattern_copy(self);
 }
 
@@ -21194,7 +21274,9 @@ static void pattern_dealloc(PyObject* self_) {
 
     Py_DECREF(self->named_lists);
     Py_DECREF(self->named_list_indexes);
+    Py_DECREF(self->required_chars);
     re_dealloc(self->locale_info);
+    re_dealloc(self->packed_code_list);
     PyObject_DEL(self);
 }
 
@@ -21263,6 +21345,117 @@ Py_LOCAL_INLINE(BOOL) append_integer(PyObject* list, Py_ssize_t value) {
     return TRUE;
 }
 
+/* Packs the code list that's needed for pickling. */
+Py_LOCAL_INLINE(RE_UINT8*) pack_code_list(RE_CODE* code, Py_ssize_t code_len) {
+    Py_ssize_t max_size;
+    RE_UINT8* packed;
+    Py_ssize_t count;
+    RE_UINT32 value;
+    Py_ssize_t i;
+    RE_UINT8* new_packed;
+
+    /* What is the maximum number of bytes needed to store it?
+     *
+     * A 32-bit RE_CODE might need 5 bytes ((32 + 6) / 7).
+     */
+    max_size = code_len * 5 + ((sizeof(Py_ssize_t) * 8) + 6) / 7;
+
+    packed = (RE_UINT8*)re_alloc((size_t)max_size);
+    count = 0;
+
+    /* Store the length of the code list. */
+    value = (RE_UINT32)code_len;
+
+    while (value >= 0x80) {
+        packed[count++] = 0x80 | (value & 0x7F);
+        value >>= 7;
+    }
+
+    packed[count++] = value;
+
+    /* Store each of the elements of the code list. */
+    for (i = 0; i < code_len; i++) {
+        value = (RE_UINT32)code[i];
+
+        while (value >= 0x80) {
+            packed[count++] = 0x80 | (value & 0x7F);
+            value >>= 7;
+        }
+
+        packed[count++] = value;
+    }
+
+    /* Discard the unused bytes. */
+    new_packed = re_realloc(packed, count);
+    if (new_packed)
+        packed = new_packed;
+
+    return packed;
+}
+
+/* Unpacks the code list that's needed for pickling. */
+Py_LOCAL_INLINE(PyObject*) unpack_code_list(RE_UINT8* packed) {
+    PyObject* code_list;
+    Py_ssize_t index;
+    RE_UINT32 value;
+    int shift;
+    size_t count;
+
+    code_list = PyList_New(0);
+    if (!code_list)
+        return NULL;
+
+    index = 0;
+
+    /* Unpack the length of the code list. */
+    value = 0;
+    shift = 0;
+
+    while (packed[index] >= 0x80) {
+        value |= (RE_UINT32)(packed[index++] & 0x7F) << shift;
+        shift += 7;
+    }
+
+    value |= (RE_UINT32)packed[index++] << shift;
+    count = (size_t)value;
+
+    /* Unpack each of the elements of the code list. */
+    while (count > 0) {
+        PyObject* obj;
+        int status;
+
+        value = 0;
+        shift = 0;
+
+        while (packed[index] >= 0x80) {
+            value |= (RE_UINT32)(packed[index++] & 0x7F) << shift;
+            shift += 7;
+        }
+
+        value |= (RE_UINT32)packed[index++] << shift;
+#if PY_VERSION_HEX >= 0x02060000
+        obj = PyLong_FromSize_t((size_t)value);
+#else
+        obj = PyLong_FromUnsignedLongLong((size_t)value);
+#endif
+        if (!obj)
+            goto error;
+
+        status = PyList_Append(code_list, obj);
+        Py_DECREF(obj);
+        if (status == -1)
+            goto error;
+
+        --count;
+    }
+
+    return code_list;
+
+error:
+    Py_DECREF(code_list);
+    return NULL;
+}
+
 /* MatchObject's '__repr__' method. */
 static PyObject* match_repr(PyObject* self_) {
     MatchObject* self;
@@ -21368,8 +21561,8 @@ static PyObject* pattern_repr(PyObject* self_) {
     int flag_count;
     unsigned int i;
     Py_ssize_t pos;
-    PyObject *key;
-    PyObject *value;
+    PyObject* key;
+    PyObject* value;
     PyObject* separator;
     PyObject* result;
 
@@ -21462,9 +21655,32 @@ static PyObject* pattern_groupindex(PyObject* self_) {
     return PyDict_Copy(self->groupindex);
 }
 
+/* PatternObject's '_pickled_data' method. */
+static PyObject* pattern_pickled_data(PyObject* self_) {
+    PatternObject* self;
+    PyObject* code_list;
+    PyObject* pickled_data;
+
+    self = (PatternObject*)self_;
+
+    code_list = unpack_code_list(self->packed_code_list);
+    if (!code_list)
+        return NULL;
+
+    /* Build the data needed for picking. */
+    pickled_data = Py_BuildValue("OnOOOOOnOnn", self->pattern, self->flags,
+      code_list, self->groupindex, self->indexgroup, self->named_lists,
+      self->named_list_indexes, self->req_offset, self->required_chars,
+      self->req_flags, self->public_group_count);
+
+    return pickled_data;
+}
+
 static PyGetSetDef pattern_getset[] = {
     {"groupindex", (getter)pattern_groupindex, (setter)NULL,
       "A dictionary mapping group names to group numbers."},
+    {"_pickled_data", (getter)pattern_pickled_data, (setter)NULL,
+      "Data used for pickling."},
     {NULL} /* Sentinel */
 };
 
@@ -21533,123 +21749,272 @@ Py_LOCAL_INLINE(void) skip_one_way_branches(PatternObject* pattern) {
         pattern->start_node = pattern->start_node->next_1.node;
 }
 
-/* Adds guards to repeats which are followed by a reference to a group.
- *
- * Returns whether a guard was added for a node at or after the given node.
- */
+/* Initialises a check stack. */
+Py_LOCAL_INLINE(void) CheckStack_init(RE_CheckStack* stack) {
+    stack->capacity = 0;
+    stack->count = 0;
+    stack->items = NULL;
+}
+
+/* Finalises a check stack. */
+Py_LOCAL_INLINE(void) CheckStack_fini(RE_CheckStack* stack) {
+    PyMem_Free(stack->items);
+    stack->capacity = 0;
+    stack->count = 0;
+    stack->items = NULL;
+}
+
+/* Pushes an item onto a check stack. */
+Py_LOCAL_INLINE(BOOL) CheckStack_push(RE_CheckStack* stack, RE_Node* node,
+  RE_STATUS_T result) {
+    RE_Check* check;
+
+    if (stack->count >= stack->capacity) {
+        Py_ssize_t new_capacity;
+        RE_Check* new_items;
+
+        new_capacity = stack->capacity * 2;
+        if (new_capacity == 0)
+            new_capacity = 16;
+
+        new_items = (RE_Check*)PyMem_Realloc(stack->items, new_capacity *
+          sizeof(RE_Check));
+        if (!new_items)
+            return FALSE;
+
+        stack->capacity = new_capacity;
+        stack->items = new_items;
+    }
+
+    check = &stack->items[stack->count++];
+    check->node = node;
+    check->result = result;
+
+    return TRUE;
+}
+
+/* Pops an item off a check stack. Returns NULL if the stack is empty. */
+Py_LOCAL_INLINE(RE_Check*) CheckStack_pop(RE_CheckStack* stack) {
+    return stack->count > 0 ? &stack->items[--stack->count] : NULL;
+}
+
+/* Adds guards to repeats which are followed by a reference to a group. */
 Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node*
-  node) {
-    RE_STATUS_T result;
+  start_node) {
+    RE_CheckStack stack;
 
-    result = RE_STATUS_NEITHER;
+    CheckStack_init(&stack);
+
+    CheckStack_push(&stack, start_node, RE_STATUS_NEITHER);
 
     for (;;) {
-        if (node->status & RE_STATUS_VISITED_AG)
-            return node->status & (RE_STATUS_REPEAT | RE_STATUS_REF);
+        RE_Check* check;
+        RE_Node* node;
+        RE_STATUS_T result;
 
-        switch (node->op) {
-        case RE_OP_BRANCH:
-        {
-            RE_STATUS_T branch_1_result;
-            RE_STATUS_T branch_2_result;
-            RE_STATUS_T status;
-
-            branch_1_result = add_repeat_guards(pattern, node->next_1.node);
-            branch_2_result = add_repeat_guards(pattern,
-              node->nonstring.next_2.node);
-            status = max_status_3(result, branch_1_result, branch_2_result);
-            node->status = RE_STATUS_VISITED_AG | status;
-            return status;
-        }
-        case RE_OP_END_GREEDY_REPEAT:
-        case RE_OP_END_LAZY_REPEAT:
-            node->status |= RE_STATUS_VISITED_AG;
-            return result;
-        case RE_OP_GREEDY_REPEAT:
-        case RE_OP_LAZY_REPEAT:
-        {
-            BOOL limited;
-            RE_STATUS_T body_result;
-            RE_STATUS_T tail_result;
-            RE_RepeatInfo* repeat_info;
-            RE_STATUS_T status;
-
-            limited = ~node->values[2] != 0;
-            if (limited)
-                body_result = RE_STATUS_LIMITED;
-            else
-                body_result = add_repeat_guards(pattern, node->next_1.node);
-            tail_result = add_repeat_guards(pattern,
-              node->nonstring.next_2.node);
-
-            repeat_info = &pattern->repeat_info[node->values[0]];
-            if (body_result != RE_STATUS_REF)
-                repeat_info->status |= RE_STATUS_BODY;
-            if (tail_result != RE_STATUS_REF)
-                repeat_info->status |= RE_STATUS_TAIL;
-            if (limited)
-                result = max_status_2(result, RE_STATUS_LIMITED);
-            else
-                result = max_status_2(result, RE_STATUS_REPEAT);
-            status = max_status_3(result, body_result, tail_result);
-            node->status |= RE_STATUS_VISITED_AG | status;
-            return status;
-        }
-        case RE_OP_GREEDY_REPEAT_ONE:
-        case RE_OP_LAZY_REPEAT_ONE:
-        {
-            BOOL limited;
-            RE_STATUS_T tail_result;
-            RE_RepeatInfo* repeat_info;
-            RE_STATUS_T status;
-
-            limited = ~node->values[2] != 0;
-            tail_result = add_repeat_guards(pattern, node->next_1.node);
-
-            repeat_info = &pattern->repeat_info[node->values[0]];
-            repeat_info->status |= RE_STATUS_BODY;
-            if (tail_result != RE_STATUS_REF)
-                repeat_info->status |= RE_STATUS_TAIL;
-            if (limited)
-                result = max_status_2(result, RE_STATUS_LIMITED);
-            else
-                result = max_status_2(result, RE_STATUS_REPEAT);
-            status = max_status_3(result, RE_STATUS_REPEAT, tail_result);
-            node->status = RE_STATUS_VISITED_AG | status;
-            return status;
-        }
-        case RE_OP_GROUP_CALL:
-        case RE_OP_REF_GROUP:
-        case RE_OP_REF_GROUP_FLD:
-        case RE_OP_REF_GROUP_FLD_REV:
-        case RE_OP_REF_GROUP_IGN:
-        case RE_OP_REF_GROUP_IGN_REV:
-        case RE_OP_REF_GROUP_REV:
-            result = RE_STATUS_REF;
-            node = node->next_1.node;
-            break;
-        case RE_OP_GROUP_EXISTS:
-        {
-            RE_STATUS_T branch_1_result;
-            RE_STATUS_T branch_2_result;
-            RE_STATUS_T status;
-
-            branch_1_result = add_repeat_guards(pattern, node->next_1.node);
-            branch_2_result = add_repeat_guards(pattern,
-              node->nonstring.next_2.node);
-            status = max_status_4(result, branch_1_result, branch_2_result,
-              RE_STATUS_REF);
-            node->status = RE_STATUS_VISITED_AG | status;
-            return status;
-        }
-        case RE_OP_SUCCESS:
-            node->status = RE_STATUS_VISITED_AG | result;
-            return result;
-        default:
-            node = node->next_1.node;
+        check = CheckStack_pop(&stack);
+
+        if (!check)
             break;
+
+        node = check->node;
+        result = check->result;
+
+        if (!(node->status & RE_STATUS_VISITED_AG)) {
+            switch (check->node->op) {
+            case RE_OP_BRANCH:
+            {
+                RE_Node* branch_1;
+                RE_Node* branch_2;
+                BOOL visited_branch_1;
+                BOOL visited_branch_2;
+
+                branch_1 = node->next_1.node;
+                branch_2 = node->nonstring.next_2.node;
+                visited_branch_1 = (branch_1->status & RE_STATUS_VISITED_AG);
+                visited_branch_2 = (branch_2->status & RE_STATUS_VISITED_AG);
+
+                if (visited_branch_1 && visited_branch_2) {
+                    RE_STATUS_T branch_1_result;
+                    RE_STATUS_T branch_2_result;
+
+                    branch_1_result = branch_1->status & (RE_STATUS_REPEAT |
+                      RE_STATUS_REF);
+                    branch_2_result = branch_2->status & (RE_STATUS_REPEAT |
+                      RE_STATUS_REF);
+
+                    node->status |= RE_STATUS_VISITED_AG | max_status_3(result,
+                      branch_1_result, branch_2_result);
+                } else {
+                    CheckStack_push(&stack, node, result);
+                    if (!visited_branch_2)
+                        CheckStack_push(&stack, branch_2, RE_STATUS_NEITHER);
+                    if (!visited_branch_1)
+                        CheckStack_push(&stack, branch_1, RE_STATUS_NEITHER);
+                }
+                break;
+            }
+            case RE_OP_END_GREEDY_REPEAT:
+            case RE_OP_END_LAZY_REPEAT:
+                node->status |= RE_STATUS_VISITED_AG;
+                break;
+            case RE_OP_GREEDY_REPEAT:
+            case RE_OP_LAZY_REPEAT:
+            {
+                BOOL limited;
+                RE_Node* body;
+                RE_Node* tail;
+                BOOL visited_body;
+                BOOL visited_tail;
+
+                limited = ~node->values[2] != 0;
+
+                body = node->next_1.node;
+                tail = node->nonstring.next_2.node;
+                visited_body = (body->status & RE_STATUS_VISITED_AG);
+                visited_tail = (tail->status & RE_STATUS_VISITED_AG);
+
+                if (visited_body && visited_tail) {
+                    RE_STATUS_T body_result;
+                    RE_STATUS_T tail_result;
+                    RE_RepeatInfo* repeat_info;
+
+                    body_result = body->status & (RE_STATUS_REPEAT |
+                      RE_STATUS_REF);
+                    tail_result = tail->status & (RE_STATUS_REPEAT |
+                      RE_STATUS_REF);
+
+                    repeat_info = &pattern->repeat_info[node->values[0]];
+                    if (body_result != RE_STATUS_REF)
+                        repeat_info->status |= RE_STATUS_BODY;
+                    if (tail_result != RE_STATUS_REF)
+                        repeat_info->status |= RE_STATUS_TAIL;
+
+                    if (limited)
+                        result = max_status_2(result, RE_STATUS_LIMITED);
+                    else
+                        result = max_status_2(result, RE_STATUS_REPEAT);
+                    node->status |= RE_STATUS_VISITED_AG | max_status_3(result,
+                      body_result, tail_result);
+                } else {
+                    CheckStack_push(&stack, node, result);
+                    if (!visited_tail)
+                        CheckStack_push(&stack, tail, RE_STATUS_NEITHER);
+                    if (!visited_body) {
+                        if (limited)
+                            body->status |= RE_STATUS_VISITED_AG |
+                              RE_STATUS_LIMITED;
+                        else
+                            CheckStack_push(&stack, body, RE_STATUS_NEITHER);
+                    }
+                }
+                break;
+            }
+            case RE_OP_GREEDY_REPEAT_ONE:
+            case RE_OP_LAZY_REPEAT_ONE:
+            {
+                RE_Node* tail;
+                BOOL visited_tail;
+
+                tail = node->next_1.node;
+                visited_tail = (tail->status & RE_STATUS_VISITED_AG);
+
+                if (visited_tail) {
+                    BOOL limited;
+                    RE_STATUS_T tail_result;
+                    RE_RepeatInfo* repeat_info;
+
+                    limited = ~node->values[2] != 0;
+
+                    tail_result = tail->status & (RE_STATUS_REPEAT |
+                      RE_STATUS_REF);
+
+                    repeat_info = &pattern->repeat_info[node->values[0]];
+                    repeat_info->status |= RE_STATUS_BODY;
+
+                    if (tail_result != RE_STATUS_REF)
+                        repeat_info->status |= RE_STATUS_TAIL;
+
+                    if (limited)
+                        result = max_status_2(result, RE_STATUS_LIMITED);
+                    else
+                        result = max_status_2(result, RE_STATUS_REPEAT);
+                    node->status |= RE_STATUS_VISITED_AG | max_status_3(result,
+                      RE_STATUS_REPEAT, tail_result);
+                } else {
+                    CheckStack_push(&stack, node, result);
+                    CheckStack_push(&stack, tail, RE_STATUS_NEITHER);
+                }
+                break;
+            }
+            case RE_OP_GROUP_CALL:
+            case RE_OP_REF_GROUP:
+            case RE_OP_REF_GROUP_FLD:
+            case RE_OP_REF_GROUP_FLD_REV:
+            case RE_OP_REF_GROUP_IGN:
+            case RE_OP_REF_GROUP_IGN_REV:
+            case RE_OP_REF_GROUP_REV:
+            {
+                RE_Node* tail;
+                BOOL visited_tail;
+
+                tail = node->next_1.node;
+                visited_tail = (tail->status & RE_STATUS_VISITED_AG);
+
+                if (visited_tail)
+                    node->status |= RE_STATUS_VISITED_AG | RE_STATUS_REF;
... 2826 lines suppressed ...

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-regex.git



More information about the Python-modules-commits mailing list