[Python-modules-commits] [python-regex] 01/04: Import python-regex_0.1.20160721.orig.tar.gz

Tue Aug 16 18:42:40 UTC 2016

This is an automated email from the git hooks/post-receive script.

morph pushed a commit to branch master
in repository python-regex.

commit f1a7ba9269a6cd8974f9b7065c30c3b56317bd89
Author: Sandro Tosi <morph at debian.org>
Date:   Tue Aug 16 19:38:41 2016 +0100

    Import python-regex_0.1.20160721.orig.tar.gz
---
 PKG-INFO                   |     4 +-
 Python2/_regex.c           |   196 +-
 Python2/_regex.h           |     2 +-
 Python2/_regex_core.py     |    14 +-
 Python2/_regex_unicode.c   | 13197 ++++++++++++++++++++++---------------------
 Python2/_regex_unicode.h   |    67 +-
 Python2/regex.py           |     2 +-
 Python2/test_regex.py      |    10 +
 Python3/_regex.c           |   196 +-
 Python3/_regex.h           |     2 +-
 Python3/_regex_core.py     |    12 +-
 Python3/_regex_unicode.c   | 13197 ++++++++++++++++++++++---------------------
 Python3/_regex_unicode.h   |    67 +-
 Python3/regex.py           |     2 +-
 Python3/test_regex.py      |    10 +
 docs/Features.rst          |     2 +-
 docs/UnicodeProperties.txt |    48 +-
 setup.py                   |     2 +-
 18 files changed, 14192 insertions(+), 12838 deletions(-)

diff --git a/PKG-INFO b/PKG-INFO
index 6d9055e..2eae981 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: regex
-Version: 2016.06.19
+Version: 2016.07.21
 Summary: Alternative regular expression module, to replace re.
 Home-page: https://bitbucket.org/mrabarnett/mrab-regex
 Author: Matthew Barnett
@@ -139,7 +139,7 @@ Description: Introduction
         Unicode
         -------
         
-        This module supports Unicode 8.0.
+        This module supports Unicode 9.0.
         
         Full Unicode case-folding is supported.
         
diff --git a/Python2/_regex.c b/Python2/_regex.c
index fddf67f..bd8a84d 100644
--- a/Python2/_regex.c
+++ b/Python2/_regex.c
@@ -1564,7 +1564,10 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
     Py_ssize_t pos_p1;
     int prop_p1;
 
-    /* Break at the start and end of the text. */
+    /* Break at the start and end of the text, unless the text is empty. */
+    if (state->text_length == 0)
+        return FALSE;
+
     /* WB1 */
     if (text_pos <= 0)
         return TRUE;
@@ -1584,12 +1587,21 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
         return FALSE;
 
     /* Otherwise break before and after Newlines (including CR and LF). */
-    /* WB3a and WB3b */
+    /* WB3a */
     if (prop_m1 == RE_BREAK_NEWLINE || prop_m1 == RE_BREAK_CR || prop_m1 ==
-      RE_BREAK_LF || prop == RE_BREAK_NEWLINE || prop == RE_BREAK_CR || prop ==
       RE_BREAK_LF)
         return TRUE;
 
+    /* WB3b */
+    if (prop == RE_BREAK_NEWLINE || prop == RE_BREAK_CR || prop == RE_BREAK_LF)
+        return TRUE;
+
+    /* Don't break within emoji zwj sequences. */
+    /* WB3c */
+    if (prop_m1 == RE_BREAK_ZWJ && (prop == RE_BREAK_GLUEAFTERZWJ || prop ==
+      RE_BREAK_EBASEGAZ))
+        return FALSE;
+
     /* WB4 */
     /* Get the property of the previous character, ignoring Format and Extend
      * characters.
@@ -1598,7 +1610,8 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
     prop_m1 = RE_BREAK_OTHER;
     while (pos_m1 >= 0) {
         prop_m1 = (int)re_get_word_break(char_at(state->text, pos_m1));
-        if (prop_m1 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT)
+        if (prop_m1 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT && prop_m1
+          != RE_BREAK_ZWJ)
             break;
 
         --pos_m1;
@@ -1611,7 +1624,8 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
     prop_m2 = RE_BREAK_OTHER;
     while (pos_m2 >= 0) {
         prop_m2 = (int)re_get_word_break(char_at(state->text, pos_m2));
-        if (prop_m2 != RE_BREAK_EXTEND && prop_m2 != RE_BREAK_FORMAT)
+        if (prop_m2 != RE_BREAK_EXTEND && prop_m2 != RE_BREAK_FORMAT && prop_m2
+          != RE_BREAK_ZWJ)
             break;
 
         --pos_m2;
@@ -1624,7 +1638,8 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
     prop_p0 = prop;
     while (pos_p0 < state->text_length) {
         prop_p0 = (int)re_get_word_break(char_at(state->text, pos_p0));
-        if (prop_p0 != RE_BREAK_EXTEND && prop_p0 != RE_BREAK_FORMAT)
+        if (prop_p0 != RE_BREAK_EXTEND && prop_p0 != RE_BREAK_FORMAT && prop_p0
+          != RE_BREAK_ZWJ)
             break;
 
         ++pos_p0;
@@ -1637,7 +1652,8 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
     prop_p1 = RE_BREAK_OTHER;
     while (pos_p1 < state->text_length) {
         prop_p1 = (int)re_get_word_break(char_at(state->text, pos_p1));
-        if (prop_p1 != RE_BREAK_EXTEND && prop_p1 != RE_BREAK_FORMAT)
+        if (prop_p1 != RE_BREAK_EXTEND && prop_p1 != RE_BREAK_FORMAT && prop_p1
+          != RE_BREAK_ZWJ)
             break;
 
         ++pos_p1;
@@ -1662,19 +1678,23 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
       prop_p0 == RE_BREAK_SINGLEQUOTE) && (prop_p1 == RE_BREAK_ALETTER ||
       prop_p1 == RE_BREAK_HEBREWLETTER))
         return FALSE;
+
     /* WB7 */
     if ((prop_m2 == RE_BREAK_ALETTER || prop_m2 == RE_BREAK_HEBREWLETTER) &&
       (prop_m1 == RE_BREAK_MIDLETTER || prop_m1 == RE_BREAK_MIDNUMLET ||
       prop_m1 == RE_BREAK_SINGLEQUOTE) && (prop_p0 == RE_BREAK_ALETTER ||
       prop_p0 == RE_BREAK_HEBREWLETTER))
         return FALSE;
+
     /* WB7a */
     if (prop_m1 == RE_BREAK_HEBREWLETTER && prop_p0 == RE_BREAK_SINGLEQUOTE)
         return FALSE;
+
     /* WB7b */
     if (prop_m1 == RE_BREAK_HEBREWLETTER && prop_p0 == RE_BREAK_DOUBLEQUOTE &&
       prop_p1 == RE_BREAK_HEBREWLETTER)
         return FALSE;
+
     /* WB7c */
     if (prop_m2 == RE_BREAK_HEBREWLETTER && prop_m1 == RE_BREAK_DOUBLEQUOTE &&
       prop_p0 == RE_BREAK_HEBREWLETTER)
@@ -1686,10 +1706,12 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
     /* WB8 */
     if (prop_m1 == RE_BREAK_NUMERIC && prop_p0 == RE_BREAK_NUMERIC)
         return FALSE;
+
     /* WB9 */
     if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER) &&
       prop_p0 == RE_BREAK_NUMERIC)
         return FALSE;
+
     /* WB10 */
     if (prop_m1 == RE_BREAK_NUMERIC && (prop_p0 == RE_BREAK_ALETTER || prop_p0
       == RE_BREAK_HEBREWLETTER))
@@ -1701,6 +1723,7 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
       == RE_BREAK_MIDNUMLET || prop_m1 == RE_BREAK_SINGLEQUOTE) && prop_p0 ==
       RE_BREAK_NUMERIC)
         return FALSE;
+
     /* WB12 */
     if (prop_m1 == RE_BREAK_NUMERIC && (prop_p0 == RE_BREAK_MIDNUM || prop_p0
       == RE_BREAK_MIDNUMLET || prop_p0 == RE_BREAK_SINGLEQUOTE) && prop_p1 ==
@@ -1718,20 +1741,44 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
       prop_m1 == RE_BREAK_NUMERIC || prop_m1 == RE_BREAK_KATAKANA || prop_m1 ==
       RE_BREAK_EXTENDNUMLET) && prop_p0 == RE_BREAK_EXTENDNUMLET)
         return FALSE;
+
     /* WB13b */
     if (prop_m1 == RE_BREAK_EXTENDNUMLET && (prop_p0 == RE_BREAK_ALETTER ||
       prop_p0 == RE_BREAK_HEBREWLETTER || prop_p0 == RE_BREAK_NUMERIC ||
       prop_p0 == RE_BREAK_KATAKANA))
         return FALSE;
 
-    /* Don't break between regional indicator symbols. */
-    /* WB13c */
-    if (prop_m1 == RE_BREAK_REGIONALINDICATOR && prop_p0 ==
-      RE_BREAK_REGIONALINDICATOR)
+    /* Don't break within emoji modifier sequences. */
+    /* WB14 */
+    if ((prop_m1 == RE_BREAK_EBASE || prop_m1 == RE_BREAK_EBASEGAZ) && prop_p0
+      == RE_BREAK_EMODIFIER)
         return FALSE;
 
+    /* Don't break within emoji flag sequences. That is, don't break between
+     * regional indicator (RI) symbols if there is an odd number of RI
+     * characters before the break point.
+     */
+    /* WB15 and WB16 */
+    prop = (int)re_get_word_break(char_at(state->text, text_pos));
+    if (prop == RE_BREAK_REGIONALINDICATOR) {
+        Py_ssize_t pos;
+
+        pos = text_pos - 1;
+        while (pos >= 0) {
+            prop = (int)re_get_word_break(char_at(state->text, pos));
+            if (prop != RE_BREAK_REGIONALINDICATOR)
+                break;
+
+            --pos;
+        }
+        ++pos;
+
+        if ((text_pos - pos) % 2 != 0)
+            return FALSE;
+    }
+
     /* Otherwise, break everywhere (including around ideographs). */
-    /* WB14 */
+    /* WB999 */
     return TRUE;
 }
 
@@ -1898,6 +1945,10 @@ static BOOL unicode_at_grapheme_boundary(RE_State* state, Py_ssize_t text_pos)
     int prop;
     int prop_m1;
 
+    /* Break at the start and end of text, unless the text is empty. */
+    if (state->text_length == 0)
+        return FALSE;
+
     /* Break at the start and end of the text. */
     /* GB1 */
     if (text_pos <= 0)
@@ -1919,10 +1970,14 @@ static BOOL unicode_at_grapheme_boundary(RE_State* state, Py_ssize_t text_pos)
         return FALSE;
 
     /* Otherwise break before and after controls (including CR and LF). */
-    /* GB4 and GB5 */
+    /* GB4 */
     if (prop_m1 == RE_GBREAK_CONTROL || prop_m1 == RE_GBREAK_CR || prop_m1 ==
-      RE_GBREAK_LF || prop == RE_GBREAK_CONTROL || prop == RE_GBREAK_CR || prop
-      == RE_GBREAK_LF)
+      RE_GBREAK_LF)
+        return TRUE;
+
+    /* GB5 */
+    if (prop == RE_GBREAK_CONTROL || prop == RE_GBREAK_CR || prop ==
+      RE_GBREAK_LF)
         return TRUE;
 
     /* Don't break Hangul syllable sequences. */
@@ -1930,24 +1985,20 @@ static BOOL unicode_at_grapheme_boundary(RE_State* state, Py_ssize_t text_pos)
     if (prop_m1 == RE_GBREAK_L && (prop == RE_GBREAK_L || prop == RE_GBREAK_V
       || prop == RE_GBREAK_LV || prop == RE_GBREAK_LVT))
         return FALSE;
+
     /* GB7 */
     if ((prop_m1 == RE_GBREAK_LV || prop_m1 == RE_GBREAK_V) && (prop ==
       RE_GBREAK_V || prop == RE_GBREAK_T))
         return FALSE;
+
     /* GB8 */
     if ((prop_m1 == RE_GBREAK_LVT || prop_m1 == RE_GBREAK_T) && (prop ==
       RE_GBREAK_T))
         return FALSE;
 
-    /* Don't break between regional indicator symbols. */
-    /* GB8a */
-    if (prop_m1 == RE_GBREAK_REGIONALINDICATOR && prop ==
-      RE_GBREAK_REGIONALINDICATOR)
-        return FALSE;
-
     /* Don't break just before Extend characters. */
     /* GB9 */
-    if (prop == RE_GBREAK_EXTEND)
+    if (prop == RE_GBREAK_EXTEND || prop == RE_GBREAK_ZWJ)
         return FALSE;
 
     /* Don't break before SpacingMarks, or after Prepend characters. */
@@ -1959,8 +2010,57 @@ static BOOL unicode_at_grapheme_boundary(RE_State* state, Py_ssize_t text_pos)
     if (prop_m1 == RE_GBREAK_PREPEND)
         return FALSE;
 
-    /* Otherwise, break everywhere. */
+    /* Don't break within emoji modifier sequences or emoji zwj sequences. */
     /* GB10 */
+    if (prop == RE_GBREAK_EMODIFIER) {
+        Py_ssize_t pos;
+
+        pos = text_pos - 1;
+        while (pos >= 0) {
+            int prev_prop;
+
+            prev_prop = (int)re_get_grapheme_cluster_break(char_at(state->text,
+              pos));
+            if (prev_prop != RE_GBREAK_EXTEND) {
+                if (prev_prop == RE_GBREAK_EBASE || prev_prop ==
+                  RE_GBREAK_EBASEGAZ)
+                    return FALSE;
+                break;
+            }
+            --pos;
+        }
+    }
+
+    /* GB11 */
+    if (prop_m1 == RE_GBREAK_ZWJ && (prop == RE_GBREAK_GLUEAFTERZWJ || prop ==
+      RE_GBREAK_EBASEGAZ))
+        return FALSE;
+
+    /* Don't break within emoji flag sequences. That is, don't break between
+     * regional indicator (RI) symbols if there is an odd number of RI
+     * characters before the break point.
+     */
+    /* GB12 and GB13 */
+    if (prop == RE_GBREAK_REGIONALINDICATOR) {
+        Py_ssize_t pos;
+
+        pos = text_pos - 1;
+        while (pos >= 0) {
+            prop = (int)re_get_grapheme_cluster_break(char_at(state->text,
+              pos));
+            if (prop != RE_GBREAK_REGIONALINDICATOR)
+                break;
+
+            --pos;
+        }
+        ++pos;
+
+        if ((text_pos - pos) % 2 != 0)
+            return FALSE;
+    }
+
+    /* Otherwise, break everywhere. */
+    /* GB999 */
     return TRUE;
 }
 
@@ -14708,7 +14808,10 @@ backtrack:
                  * backtracked inside and already restored the groups. We also
                  * need to restore certain flags.
                  */
-                if (bt_data->lookaround.node->match)
+                RE_Node* node;
+
+                node = bt_data->lookaround.node;
+                if (node->match && (node->status & RE_STATUS_HAS_GROUPS))
                     pop_groups(state);
 
                 state->too_few_errors = bt_data->lookaround.too_few_errors;
@@ -14776,7 +14879,7 @@ backtrack:
             discard_backtrack(state);
             break;
         }
-        case RE_OP_FAILURE:
+        case RE_OP_FAILURE: /* Failure. */
         {
             TRACE(("%s\n", re_op_text[bt_data->op]))
 
@@ -21987,28 +22090,6 @@ Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node*
                 }
                 break;
             }
-            case RE_OP_GROUP_CALL:
-            case RE_OP_REF_GROUP:
-            case RE_OP_REF_GROUP_FLD:
-            case RE_OP_REF_GROUP_FLD_REV:
-            case RE_OP_REF_GROUP_IGN:
-            case RE_OP_REF_GROUP_IGN_REV:
-            case RE_OP_REF_GROUP_REV:
-            {
-                RE_Node* tail;
-                BOOL visited_tail;
-
-                tail = node->next_1.node;
-                visited_tail = (tail->status & RE_STATUS_VISITED_AG);
-
-                if (visited_tail)
-                    node->status |= RE_STATUS_VISITED_AG | RE_STATUS_REF;
-                else {
-                    CheckStack_push(&stack, node, result);
-                    CheckStack_push(&stack, tail, RE_STATUS_NEITHER);
-                }
-                break;
-            }
             case RE_OP_GROUP_EXISTS:
             {
                 RE_Node* branch_1;
@@ -22041,6 +22122,27 @@ Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node*
                 }
                 break;
             }
+            case RE_OP_REF_GROUP:
+            case RE_OP_REF_GROUP_FLD:
+            case RE_OP_REF_GROUP_FLD_REV:
+            case RE_OP_REF_GROUP_IGN:
+            case RE_OP_REF_GROUP_IGN_REV:
+            case RE_OP_REF_GROUP_REV:
+            {
+                RE_Node* tail;
+                BOOL visited_tail;
+
+                tail = node->next_1.node;
+                visited_tail = (tail->status & RE_STATUS_VISITED_AG);
+
+                if (visited_tail)
+                    node->status |= RE_STATUS_VISITED_AG | RE_STATUS_REF;
+                else {
+                    CheckStack_push(&stack, node, result);
+                    CheckStack_push(&stack, tail, RE_STATUS_NEITHER);
+                }
+                break;
+            }
             case RE_OP_SUCCESS:
                 node->status |= RE_STATUS_VISITED_AG | result;
                 break;
diff --git a/Python2/_regex.h b/Python2/_regex.h
index 37ab8a9..33ccfdb 100644
--- a/Python2/_regex.h
+++ b/Python2/_regex.h
@@ -11,7 +11,7 @@
  * 2010-01-16 mrab Re-written
  */
 
-/* Supports Unicode version 8.0.0. */
+/* Supports Unicode version 9.0.0. */
 
 #define RE_MAGIC 20100116
 
diff --git a/Python2/_regex_core.py b/Python2/_regex_core.py
index 5599ed4..1b3f005 100644
--- a/Python2/_regex_core.py
+++ b/Python2/_regex_core.py
@@ -2067,9 +2067,11 @@ class Branch(RegexBase):
 
     def _add_precheck(self, info, reverse, branches):
         charset = set()
+        pos = -1 if reverse else 0
+
         for branch in branches:
             if type(branch) is Literal and branch.case_flags == NOCASE:
-                charset.add(branch.characters[0])
+                charset.add(branch.characters[pos])
             else:
                 return
 
@@ -2995,7 +2997,7 @@ class LookAround(RegexBase):
         self.subpattern.fix_groups(pattern, self.behind, fuzzy)
 
     def optimise(self, info, reverse):
-        subpattern = self.subpattern.optimise(info, reverse)
+        subpattern = self.subpattern.optimise(info, self.behind)
         if self.positive and subpattern.is_empty():
             return subpattern
 
@@ -3053,9 +3055,9 @@ class LookAroundConditional(RegexBase):
         self.no_item.fix_groups(pattern, reverse, fuzzy)
 
     def optimise(self, info, reverse):
-        subpattern = self.subpattern.optimise(info, reverse)
-        yes_item = self.yes_item.optimise(info, reverse)
-        no_item = self.no_item.optimise(info, reverse)
+        subpattern = self.subpattern.optimise(info, self.behind)
+        yes_item = self.yes_item.optimise(info, self.behind)
+        no_item = self.no_item.optimise(info, self.behind)
 
         return LookAroundConditional(self.behind, self.positive, subpattern,
           yes_item, no_item)
@@ -3108,7 +3110,7 @@ class LookAroundConditional(RegexBase):
         print("%sEITHER" % (INDENT * indent))
         self.yes_item.dump(indent + 1, reverse)
         if not self.no_item.is_empty():
-            print("%sOR".format(INDENT * indent))
+            print("%sOR" % (INDENT * indent))
             self.no_item.dump(indent + 1, reverse)
 
     def is_empty(self):
diff --git a/Python2/_regex_unicode.c b/Python2/_regex_unicode.c
index 47c896e..f470005 100644
--- a/Python2/_regex_unicode.c
+++ b/Python2/_regex_unicode.c
@@ -1,4 +1,4 @@
-/* For Unicode version 8.0.0 */
+/* For Unicode version 9.0.0 */
 
 #include "_regex_unicode.h"
 
@@ -24,9 +24,12 @@ char* re_strings[] = {
     "1/10",
     "1/12",
     "1/16",
+    "1/160",
     "1/2",
+    "1/20",
     "1/3",
     "1/4",
+    "1/40",
     "1/5",
     "1/6",
     "1/7",
@@ -98,9 +101,11 @@ char* re_strings[] = {
     "3",
     "3/16",
     "3/2",
+    "3/20",
     "3/4",
     "3/5",
     "3/8",
+    "3/80",
     "30",
     "300",
     "3000",
@@ -176,7 +181,12 @@ char* re_strings[] = {
     "ABOVE",
     "ABOVELEFT",
     "ABOVERIGHT",
+    "ADLAM",
+    "ADLM",
     "AEGEANNUMBERS",
+    "AFRICANFEH",
+    "AFRICANNOON",
+    "AFRICANQAF",
     "AGHB",
     "AHEX",
     "AHOM",
@@ -260,6 +270,8 @@ char* re_strings[] = {
     "BENG",
     "BENGALI",
     "BETH",
+    "BHAIKSUKI",
+    "BHKS",
     "BIDIC",
     "BIDICLASS",
     "BIDICONTROL",
@@ -459,8 +471,10 @@ char* re_strings[] = {
     "CYRILLIC",
     "CYRILLICEXTA",
     "CYRILLICEXTB",
+    "CYRILLICEXTC",
     "CYRILLICEXTENDEDA",
     "CYRILLICEXTENDEDB",
+    "CYRILLICEXTENDEDC",
     "CYRILLICSUP",
     "CYRILLICSUPPLEMENT",
     "CYRILLICSUPPLEMENTARY",
@@ -508,10 +522,16 @@ char* re_strings[] = {
     "EA",
     "EARLYDYNASTICCUNEIFORM",
     "EASTASIANWIDTH",
+    "EB",
+    "EBASE",
+    "EBASEGAZ",
+    "EBG",
     "EGYP",
     "EGYPTIANHIEROGLYPHS",
     "ELBA",
     "ELBASAN",
+    "EM",
+    "EMODIFIER",
     "EMOTICONS",
     "EN",
     "ENC",
@@ -562,6 +582,7 @@ char* re_strings[] = {
     "FULLWIDTH",
     "GAF",
     "GAMAL",
+    "GAZ",
     "GC",
     "GCB",
     "GEMINATIONMARK",
@@ -577,7 +598,10 @@ char* re_strings[] = {
     "GL",
     "GLAG",
     "GLAGOLITIC",
+    "GLAGOLITICSUP",
+    "GLAGOLITICSUPPLEMENT",
     "GLUE",
+    "GLUEAFTERZWJ",
     "GOTH",
     "GOTHIC",
     "GRAN",
@@ -650,6 +674,8 @@ char* re_strings[] = {
     "IDEO",
     "IDEOGRAPHIC",
     "IDEOGRAPHICDESCRIPTIONCHARACTERS",
+    "IDEOGRAPHICSYMBOLS",
+    "IDEOGRAPHICSYMBOLSANDPUNCTUATION",
     "IDS",
     "IDSB",
     "IDSBINARYOPERATOR",
@@ -840,6 +866,8 @@ char* re_strings[] = {
     "MANICHAEANWAW",
     "MANICHAEANYODH",
     "MANICHAEANZAYIN",
+    "MARC",
+    "MARCHEN",
     "MARK",
     "MATH",
     "MATHALPHANUM",
@@ -890,6 +918,8 @@ char* re_strings[] = {
     "MODIFYINGLETTER",
     "MONG",
     "MONGOLIAN",
+    "MONGOLIANSUP",
+    "MONGOLIANSUPPLEMENT",
     "MRO",
     "MROO",
     "MTEI",
@@ -916,6 +946,7 @@ char* re_strings[] = {
     "NCHAR",
     "ND",
     "NEUTRAL",
+    "NEWA",
     "NEWLINE",
     "NEWTAILUE",
     "NEXTLINE",
@@ -979,6 +1010,8 @@ char* re_strings[] = {
     "ORKH",
     "ORNAMENTALDINGBATS",
     "ORYA",
+    "OSAGE",
+    "OSGE",
     "OSMA",
     "OSMANYA",
     "OTHER",
@@ -1012,6 +1045,7 @@ char* re_strings[] = {
     "PAUC",
     "PAUCINHAU",
     "PC",
+    "PCM",
     "PD",
     "PDF",
     "PDI",
@@ -1045,6 +1079,7 @@ char* re_strings[] = {
     "PR",
     "PREFIXNUMERIC",
     "PREPEND",
+    "PREPENDEDCONCATENATIONMARK",
     "PRINT",
     "PRIVATEUSE",
     "PRIVATEUSEAREA",
@@ -1106,6 +1141,7 @@ char* re_strings[] = {
     "SEGMENTSEPARATOR",
     "SEMKATH",
     "SENTENCEBREAK",
+    "SENTENCETERMINAL",
     "SEP",
     "SEPARATOR",
     "SG",
@@ -1199,6 +1235,9 @@ char* re_strings[] = {
     "TALU",
     "TAMIL",
     "TAML",
+    "TANG",
+    "TANGUT",
+    "TANGUTCOMPONENTS",
     "TAVT",
     "TAW",
     "TEHMARBUTA",
@@ -1311,1585 +1350,1646 @@ char* re_strings[] = {
     "ZP",
     "ZS",
     "ZW",
+    "ZWJ",
     "ZWSPACE",
     "ZYYY",
     "ZZZZ",
 };
 
-/* strings: 12240 bytes. */
+/* strings: 12639 bytes. */
 
 /* properties. */
 
 RE_Property re_properties[] = {
-    { 547,  0,  0},
-    { 544,  0,  0},
-    { 252,  1,  1},
-    { 251,  1,  1},
-    {1081,  2,  2},
-    {1079,  2,  2},
-    {1259,  3,  3},
-    {1254,  3,  3},
+    { 568,  0,  0},
+    { 565,  0,  0},
+    { 264,  1,  1},
+    { 263,  1,  1},
+    {1116,  2,  2},
+    {1114,  2,  2},
+    {1298,  3,  3},
+    {1293,  3,  3},
+    { 590,  4,  4},
     { 566,  4,  4},
-    { 545,  4,  4},
-    {1087,  5,  5},
-    {1078,  5,  5},
-    { 823,  6,  6},
-    { 172,  7,  6},
-    { 171,  7,  6},
-    { 767,  8,  6},
-    { 766,  8,  6},
-    {1227,  9,  6},
-    {1226,  9,  6},
-    { 294, 10,  6},
-    { 296, 11,  6},
-    { 350, 11,  6},
-    { 343, 12,  6},
-    { 433, 12,  6},
-    { 345, 13,  6},
-    { 435, 13,  6},
-    { 344, 14,  6},
-    { 434, 14,  6},
-    { 341, 15,  6},
-    { 431, 15,  6},
-    { 342, 16,  6},
-    { 432, 16,  6},
-    { 636, 17,  6},
-    { 632, 17,  6},
-    { 628, 18,  6},
-    { 627, 18,  6},
-    {1267, 19,  6},
-    {1266, 19,  6},
-    {1265, 20,  6},
-    {1264, 20,  6},
-    { 458, 21,  6},
-    { 466, 21,  6},
-    { 567, 22,  6},
-    { 575, 22,  6},
-    { 565, 23,  6},
-    { 569, 23,  6},
-    { 568, 24,  6},
-    { 576, 24,  6},
-    {1255, 25,  6},
-    {1262, 25,  6},
-    {1117, 25,  6},
-    { 244, 26,  6},
-    { 242, 26,  6},
-    { 671, 27,  6},
-    { 669, 27,  6},
-    { 451, 28,  6},
-    { 625, 29,  6},
-    {1044, 30,  6},
-    {1041, 30,  6},
-    {1188, 31,  6},
-    {1187, 31,  6},
-    { 971, 32,  6},
-    { 952, 32,  6},
-    { 612, 33,  6},
-    { 611, 33,  6},
-    { 204, 34,  6},
-    { 160, 34,  6},
+    {1122,  5,  5},
+    {1113,  5,  5},
+    { 851,  6,  6},
+    { 182,  7,  6},
+    { 181,  7,  6},
+    { 793,  8,  6},
+    { 792,  8,  6},
+    {1266,  9,  6},
+    {1265,  9,  6},
+    { 306, 10,  6},
+    { 308, 11,  6},
+    { 362, 11,  6},
+    { 355, 12,  6},
+    { 445, 12,  6},
+    { 357, 13,  6},
+    { 447, 13,  6},
+    { 356, 14,  6},
+    { 446, 14,  6},
+    { 353, 15,  6},
+    { 443, 15,  6},
+    { 354, 16,  6},
+    { 444, 16,  6},
+    { 662, 17,  6},
+    { 658, 17,  6},
+    { 652, 18,  6},
+    { 651, 18,  6},
+    {1306, 19,  6},
+    {1305, 19,  6},
+    {1304, 20,  6},
+    {1303, 20,  6},
+    { 472, 21,  6},
+    { 480, 21,  6},
+    { 591, 22,  6},
+    { 599, 22,  6},
+    { 589, 23,  6},
+    { 593, 23,  6},
+    { 592, 24,  6},
+    { 600, 24,  6},
+    {1294, 25,  6},
+    {1301, 25,  6},
+    {1153, 25,  6},
+    { 256, 26,  6},
+    { 254, 26,  6},
+    { 697, 27,  6},
+    { 695, 27,  6},
+    { 465, 28,  6},
+    { 649, 29,  6},
+    {1079, 30,  6},
+    {1076, 30,  6},
+    {1227, 31,  6},
+    {1226, 31,  6},
+    {1004, 32,  6},
+    { 983, 32,  6},
+    { 636, 33,  6},
+    { 635, 33,  6},
+    { 214, 34,  6},
+    { 170, 34,  6},
+    { 997, 35,  6},
     { 964, 35,  6},
-    { 933, 35,  6},
-    { 630, 36,  6},
-    { 629, 36,  6},
-    { 468, 37,  6},
-    { 467, 37,  6},
-    { 523, 38,  6},
-    { 521, 38,  6},
-    { 970, 39,  6},
-    { 951, 39,  6},
-    { 976, 40,  6},
-    { 977, 40,  6},
-    { 909, 41,  6},
-    { 895, 41,  6},
-    { 966, 42,  6},
-    { 938, 42,  6},
-    { 634, 43,  6},
-    { 633, 43,  6},
-    { 637, 44,  6},
-    { 635, 44,  6},
-    {1046, 45,  6},
-    {1223, 46,  6},
-    {1219, 46,  6},
-    { 965, 47,  6},
-    { 935, 47,  6},
-    { 460, 48,  6},
-    { 459, 48,  6},
-    {1113, 49,  6},
-    {1082, 49,  6},
-    { 765, 50,  6},
-    { 764, 50,  6},
-    { 968, 51,  6},
-    { 940, 51,  6},
-    { 967, 52,  6},
-    { 939, 52,  6},
-    {1126, 53,  6},
-    {1232, 54,  6},
-    {1248, 54,  6},
-    { 989, 55,  6},
-    { 990, 55,  6},
-    { 988, 56,  6},
-    { 987, 56,  6},
-    { 598, 57,  7},
-    { 622, 57,  7},
-    { 243, 58,  8},
-    { 234, 58,  8},
-    { 288, 59,  9},
-    { 300, 59,  9},
-    { 457, 60, 10},
-    { 482, 60, 10},
-    { 489, 61, 11},
-    { 487, 61, 11},
-    { 673, 62, 12},
-    { 667, 62, 12},
-    { 674, 63, 13},
-    { 675, 63, 13},
-    { 757, 64, 14},
-    { 732, 64, 14},
-    { 928, 65, 15},
-    { 921, 65, 15},
-    { 929, 66, 16},
-    { 931, 66, 16},
-    { 246, 67,  6},
-    { 245, 67,  6},
-    { 641, 68, 17},
-    { 648, 68, 17},
-    { 642, 69, 18},
-    { 649, 69, 18},
-    { 175, 70,  6},
-    { 170, 70,  6},
-    { 183, 71,  6},
-    { 250, 72,  6},
-    { 564, 73,  6},
-    {1027, 74,  6},
-    {1258, 75,  6},
-    {1263, 76,  6},
-    {1019, 77,  6},
-    {1018, 78,  6},
-    {1020, 79,  6},
-    {1021, 80,  6},
-};
-
-/* properties: 588 bytes. */
+    { 654, 36,  6},
+    { 653, 36,  6},
+    { 482, 37,  6},
+    { 481, 37,  6},
+    { 543, 38,  6},
+    { 541, 38,  6},
+    {1003, 39,  6},
+    { 982, 39,  6},
+    {1009, 40,  6},
+    {1010, 40,  6},
+    { 940, 41,  6},
+    { 925, 41,  6},
+    { 999, 42,  6},
+    { 969, 42,  6},
+    { 660, 43,  6},
+    { 659, 43,  6},
+    { 663, 44,  6},
+    { 661, 44,  6},
+    {1081, 45,  6},
+    {1262, 46,  6},
+    {1258, 46,  6},
+    { 998, 47,  6},
+    { 966, 47,  6},
+    { 474, 48,  6},
+    { 473, 48,  6},
+    {1149, 49,  6},
+    {1117, 49,  6},
+    { 791, 50,  6},
+    { 790, 50,  6},
+    {1001, 51,  6},
+    { 971, 51,  6},
+    {1000, 52,  6},
+    { 970, 52,  6},
+    {1123, 53,  6},
+    {1162, 53,  6},
+    {1271, 54,  6},
+    {1287, 54,  6},
+    {1022, 55,  6},
+    {1023, 55,  6},
+    {1021, 56,  6},
+    {1020, 56,  6},
+    {1061, 57,  6},
+    {1027, 57,  6},
+    { 622, 58,  7},
+    { 646, 58,  7},
+    { 255, 59,  8},
+    { 244, 59,  8},
+    { 300, 60,  9},
+    { 312, 60,  9},
+    { 471, 61, 10},
+    { 496, 61, 10},
+    { 503, 62, 11},
+    { 501, 62, 11},
+    { 699, 63, 12},
+    { 693, 63, 12},
+    { 700, 64, 13},
+    { 701, 64, 13},
+    { 783, 65, 14},
+    { 758, 65, 14},
+    { 959, 66, 15},
+    { 952, 66, 15},
+    { 960, 67, 16},
+    { 962, 67, 16},
+    { 258, 68,  6},
+    { 257, 68,  6},
+    { 667, 69, 17},
+    { 674, 69, 17},
+    { 668, 70, 18},
+    { 675, 70, 18},
+    { 185, 71,  6},
+    { 180, 71,  6},
+    { 193, 72,  6},
+    { 262, 73,  6},
+    { 588, 74,  6},
+    {1062, 75,  6},
+    {1297, 76,  6},
+    {1302, 77,  6},
+    {1053, 78,  6},
+    {1052, 79,  6},
+    {1054, 80,  6},
+    {1055, 81,  6},
+};
+
+/* properties: 600 bytes. */
 
 /* property values. */
 
 RE_PropertyValue re_property_values[] = {
-    {1220,  0,   0},
-    { 383,  0,   0},
-    {1228,  0,   1},
-    { 774,  0,   1},
-    { 768,  0,   2},
-    { 761,  0,   2},
-    {1200,  0,   3},
-    { 773,  0,   3},
-    { 865,  0,   4},
-    { 762,  0,   4},
-    { 969,  0,   5},
-    { 763,  0,   5},
-    { 913,  0,   6},
-    { 863,  0,   6},
-    { 505,  0,   7},
-    { 831,  0,   7},
-    {1119,  0,   8},
-    { 830,  0,   8},
-    { 456,  0,   9},
-    { 896,  0,   9},
... 31261 lines suppressed ...

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-regex.git