[Python-modules-commits] [python-regex] 01/04: Import python-regex_0.1.20160721.orig.tar.gz
Sandro Tosi
morph at moszumanska.debian.org
Tue Aug 16 18:42:40 UTC 2016
This is an automated email from the git hooks/post-receive script.
morph pushed a commit to branch master
in repository python-regex.
commit f1a7ba9269a6cd8974f9b7065c30c3b56317bd89
Author: Sandro Tosi <morph at debian.org>
Date: Tue Aug 16 19:38:41 2016 +0100
Import python-regex_0.1.20160721.orig.tar.gz
---
PKG-INFO | 4 +-
Python2/_regex.c | 196 +-
Python2/_regex.h | 2 +-
Python2/_regex_core.py | 14 +-
Python2/_regex_unicode.c | 13197 ++++++++++++++++++++++---------------------
Python2/_regex_unicode.h | 67 +-
Python2/regex.py | 2 +-
Python2/test_regex.py | 10 +
Python3/_regex.c | 196 +-
Python3/_regex.h | 2 +-
Python3/_regex_core.py | 12 +-
Python3/_regex_unicode.c | 13197 ++++++++++++++++++++++---------------------
Python3/_regex_unicode.h | 67 +-
Python3/regex.py | 2 +-
Python3/test_regex.py | 10 +
docs/Features.rst | 2 +-
docs/UnicodeProperties.txt | 48 +-
setup.py | 2 +-
18 files changed, 14192 insertions(+), 12838 deletions(-)
diff --git a/PKG-INFO b/PKG-INFO
index 6d9055e..2eae981 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: regex
-Version: 2016.06.19
+Version: 2016.07.21
Summary: Alternative regular expression module, to replace re.
Home-page: https://bitbucket.org/mrabarnett/mrab-regex
Author: Matthew Barnett
@@ -139,7 +139,7 @@ Description: Introduction
Unicode
-------
- This module supports Unicode 8.0.
+ This module supports Unicode 9.0.
Full Unicode case-folding is supported.
diff --git a/Python2/_regex.c b/Python2/_regex.c
index fddf67f..bd8a84d 100644
--- a/Python2/_regex.c
+++ b/Python2/_regex.c
@@ -1564,7 +1564,10 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
Py_ssize_t pos_p1;
int prop_p1;
- /* Break at the start and end of the text. */
+ /* Break at the start and end of the text, unless the text is empty. */
+ if (state->text_length == 0)
+ return FALSE;
+
/* WB1 */
if (text_pos <= 0)
return TRUE;
@@ -1584,12 +1587,21 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
return FALSE;
/* Otherwise break before and after Newlines (including CR and LF). */
- /* WB3a and WB3b */
+ /* WB3a */
if (prop_m1 == RE_BREAK_NEWLINE || prop_m1 == RE_BREAK_CR || prop_m1 ==
- RE_BREAK_LF || prop == RE_BREAK_NEWLINE || prop == RE_BREAK_CR || prop ==
RE_BREAK_LF)
return TRUE;
+ /* WB3b */
+ if (prop == RE_BREAK_NEWLINE || prop == RE_BREAK_CR || prop == RE_BREAK_LF)
+ return TRUE;
+
+ /* Don't break within emoji zwj sequences. */
+ /* WB3c */
+ if (prop_m1 == RE_BREAK_ZWJ && (prop == RE_BREAK_GLUEAFTERZWJ || prop ==
+ RE_BREAK_EBASEGAZ))
+ return FALSE;
+
/* WB4 */
/* Get the property of the previous character, ignoring Format and Extend
* characters.
@@ -1598,7 +1610,8 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
prop_m1 = RE_BREAK_OTHER;
while (pos_m1 >= 0) {
prop_m1 = (int)re_get_word_break(char_at(state->text, pos_m1));
- if (prop_m1 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT)
+ if (prop_m1 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT && prop_m1
+ != RE_BREAK_ZWJ)
break;
--pos_m1;
@@ -1611,7 +1624,8 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
prop_m2 = RE_BREAK_OTHER;
while (pos_m2 >= 0) {
prop_m2 = (int)re_get_word_break(char_at(state->text, pos_m2));
- if (prop_m2 != RE_BREAK_EXTEND && prop_m2 != RE_BREAK_FORMAT)
+ if (prop_m2 != RE_BREAK_EXTEND && prop_m2 != RE_BREAK_FORMAT && prop_m2
+ != RE_BREAK_ZWJ)
break;
--pos_m2;
@@ -1624,7 +1638,8 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
prop_p0 = prop;
while (pos_p0 < state->text_length) {
prop_p0 = (int)re_get_word_break(char_at(state->text, pos_p0));
- if (prop_p0 != RE_BREAK_EXTEND && prop_p0 != RE_BREAK_FORMAT)
+ if (prop_p0 != RE_BREAK_EXTEND && prop_p0 != RE_BREAK_FORMAT && prop_p0
+ != RE_BREAK_ZWJ)
break;
++pos_p0;
@@ -1637,7 +1652,8 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
prop_p1 = RE_BREAK_OTHER;
while (pos_p1 < state->text_length) {
prop_p1 = (int)re_get_word_break(char_at(state->text, pos_p1));
- if (prop_p1 != RE_BREAK_EXTEND && prop_p1 != RE_BREAK_FORMAT)
+ if (prop_p1 != RE_BREAK_EXTEND && prop_p1 != RE_BREAK_FORMAT && prop_p1
+ != RE_BREAK_ZWJ)
break;
++pos_p1;
@@ -1662,19 +1678,23 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
prop_p0 == RE_BREAK_SINGLEQUOTE) && (prop_p1 == RE_BREAK_ALETTER ||
prop_p1 == RE_BREAK_HEBREWLETTER))
return FALSE;
+
/* WB7 */
if ((prop_m2 == RE_BREAK_ALETTER || prop_m2 == RE_BREAK_HEBREWLETTER) &&
(prop_m1 == RE_BREAK_MIDLETTER || prop_m1 == RE_BREAK_MIDNUMLET ||
prop_m1 == RE_BREAK_SINGLEQUOTE) && (prop_p0 == RE_BREAK_ALETTER ||
prop_p0 == RE_BREAK_HEBREWLETTER))
return FALSE;
+
/* WB7a */
if (prop_m1 == RE_BREAK_HEBREWLETTER && prop_p0 == RE_BREAK_SINGLEQUOTE)
return FALSE;
+
/* WB7b */
if (prop_m1 == RE_BREAK_HEBREWLETTER && prop_p0 == RE_BREAK_DOUBLEQUOTE &&
prop_p1 == RE_BREAK_HEBREWLETTER)
return FALSE;
+
/* WB7c */
if (prop_m2 == RE_BREAK_HEBREWLETTER && prop_m1 == RE_BREAK_DOUBLEQUOTE &&
prop_p0 == RE_BREAK_HEBREWLETTER)
@@ -1686,10 +1706,12 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
/* WB8 */
if (prop_m1 == RE_BREAK_NUMERIC && prop_p0 == RE_BREAK_NUMERIC)
return FALSE;
+
/* WB9 */
if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER) &&
prop_p0 == RE_BREAK_NUMERIC)
return FALSE;
+
/* WB10 */
if (prop_m1 == RE_BREAK_NUMERIC && (prop_p0 == RE_BREAK_ALETTER || prop_p0
== RE_BREAK_HEBREWLETTER))
@@ -1701,6 +1723,7 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
== RE_BREAK_MIDNUMLET || prop_m1 == RE_BREAK_SINGLEQUOTE) && prop_p0 ==
RE_BREAK_NUMERIC)
return FALSE;
+
/* WB12 */
if (prop_m1 == RE_BREAK_NUMERIC && (prop_p0 == RE_BREAK_MIDNUM || prop_p0
== RE_BREAK_MIDNUMLET || prop_p0 == RE_BREAK_SINGLEQUOTE) && prop_p1 ==
@@ -1718,20 +1741,44 @@ static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) {
prop_m1 == RE_BREAK_NUMERIC || prop_m1 == RE_BREAK_KATAKANA || prop_m1 ==
RE_BREAK_EXTENDNUMLET) && prop_p0 == RE_BREAK_EXTENDNUMLET)
return FALSE;
+
/* WB13b */
if (prop_m1 == RE_BREAK_EXTENDNUMLET && (prop_p0 == RE_BREAK_ALETTER ||
prop_p0 == RE_BREAK_HEBREWLETTER || prop_p0 == RE_BREAK_NUMERIC ||
prop_p0 == RE_BREAK_KATAKANA))
return FALSE;
- /* Don't break between regional indicator symbols. */
- /* WB13c */
- if (prop_m1 == RE_BREAK_REGIONALINDICATOR && prop_p0 ==
- RE_BREAK_REGIONALINDICATOR)
+ /* Don't break within emoji modifier sequences. */
+ /* WB14 */
+ if ((prop_m1 == RE_BREAK_EBASE || prop_m1 == RE_BREAK_EBASEGAZ) && prop_p0
+ == RE_BREAK_EMODIFIER)
return FALSE;
+ /* Don't break within emoji flag sequences. That is, don't break between
+ * regional indicator (RI) symbols if there is an odd number of RI
+ * characters before the break point.
+ */
+ /* WB15 and WB16 */
+ prop = (int)re_get_word_break(char_at(state->text, text_pos));
+ if (prop == RE_BREAK_REGIONALINDICATOR) {
+ Py_ssize_t pos;
+
+ pos = text_pos - 1;
+ while (pos >= 0) {
+ prop = (int)re_get_word_break(char_at(state->text, pos));
+ if (prop != RE_BREAK_REGIONALINDICATOR)
+ break;
+
+ --pos;
+ }
+ ++pos;
+
+ if ((text_pos - pos) % 2 != 0)
+ return FALSE;
+ }
+
/* Otherwise, break everywhere (including around ideographs). */
- /* WB14 */
+ /* WB999 */
return TRUE;
}
@@ -1898,6 +1945,10 @@ static BOOL unicode_at_grapheme_boundary(RE_State* state, Py_ssize_t text_pos)
int prop;
int prop_m1;
+ /* Break at the start and end of text, unless the text is empty. */
+ if (state->text_length == 0)
+ return FALSE;
+
/* Break at the start and end of the text. */
/* GB1 */
if (text_pos <= 0)
@@ -1919,10 +1970,14 @@ static BOOL unicode_at_grapheme_boundary(RE_State* state, Py_ssize_t text_pos)
return FALSE;
/* Otherwise break before and after controls (including CR and LF). */
- /* GB4 and GB5 */
+ /* GB4 */
if (prop_m1 == RE_GBREAK_CONTROL || prop_m1 == RE_GBREAK_CR || prop_m1 ==
- RE_GBREAK_LF || prop == RE_GBREAK_CONTROL || prop == RE_GBREAK_CR || prop
- == RE_GBREAK_LF)
+ RE_GBREAK_LF)
+ return TRUE;
+
+ /* GB5 */
+ if (prop == RE_GBREAK_CONTROL || prop == RE_GBREAK_CR || prop ==
+ RE_GBREAK_LF)
return TRUE;
/* Don't break Hangul syllable sequences. */
@@ -1930,24 +1985,20 @@ static BOOL unicode_at_grapheme_boundary(RE_State* state, Py_ssize_t text_pos)
if (prop_m1 == RE_GBREAK_L && (prop == RE_GBREAK_L || prop == RE_GBREAK_V
|| prop == RE_GBREAK_LV || prop == RE_GBREAK_LVT))
return FALSE;
+
/* GB7 */
if ((prop_m1 == RE_GBREAK_LV || prop_m1 == RE_GBREAK_V) && (prop ==
RE_GBREAK_V || prop == RE_GBREAK_T))
return FALSE;
+
/* GB8 */
if ((prop_m1 == RE_GBREAK_LVT || prop_m1 == RE_GBREAK_T) && (prop ==
RE_GBREAK_T))
return FALSE;
- /* Don't break between regional indicator symbols. */
- /* GB8a */
- if (prop_m1 == RE_GBREAK_REGIONALINDICATOR && prop ==
- RE_GBREAK_REGIONALINDICATOR)
- return FALSE;
-
/* Don't break just before Extend characters. */
/* GB9 */
- if (prop == RE_GBREAK_EXTEND)
+ if (prop == RE_GBREAK_EXTEND || prop == RE_GBREAK_ZWJ)
return FALSE;
/* Don't break before SpacingMarks, or after Prepend characters. */
@@ -1959,8 +2010,57 @@ static BOOL unicode_at_grapheme_boundary(RE_State* state, Py_ssize_t text_pos)
if (prop_m1 == RE_GBREAK_PREPEND)
return FALSE;
- /* Otherwise, break everywhere. */
+ /* Don't break within emoji modifier sequences or emoji zwj sequences. */
/* GB10 */
+ if (prop == RE_GBREAK_EMODIFIER) {
+ Py_ssize_t pos;
+
+ pos = text_pos - 1;
+ while (pos >= 0) {
+ int prev_prop;
+
+ prev_prop = (int)re_get_grapheme_cluster_break(char_at(state->text,
+ pos));
+ if (prev_prop != RE_GBREAK_EXTEND) {
+ if (prev_prop == RE_GBREAK_EBASE || prev_prop ==
+ RE_GBREAK_EBASEGAZ)
+ return FALSE;
+ break;
+ }
+ --pos;
+ }
+ }
+
+ /* GB11 */
+ if (prop_m1 == RE_GBREAK_ZWJ && (prop == RE_GBREAK_GLUEAFTERZWJ || prop ==
+ RE_GBREAK_EBASEGAZ))
+ return FALSE;
+
+ /* Don't break within emoji flag sequences. That is, don't break between
+ * regional indicator (RI) symbols if there is an odd number of RI
+ * characters before the break point.
+ */
+ /* GB12 and GB13 */
+ if (prop == RE_GBREAK_REGIONALINDICATOR) {
+ Py_ssize_t pos;
+
+ pos = text_pos - 1;
+ while (pos >= 0) {
+ prop = (int)re_get_grapheme_cluster_break(char_at(state->text,
+ pos));
+ if (prop != RE_GBREAK_REGIONALINDICATOR)
+ break;
+
+ --pos;
+ }
+ ++pos;
+
+ if ((text_pos - pos) % 2 != 0)
+ return FALSE;
+ }
+
+ /* Otherwise, break everywhere. */
+ /* GB999 */
return TRUE;
}
@@ -14708,7 +14808,10 @@ backtrack:
* backtracked inside and already restored the groups. We also
* need to restore certain flags.
*/
- if (bt_data->lookaround.node->match)
+ RE_Node* node;
+
+ node = bt_data->lookaround.node;
+ if (node->match && (node->status & RE_STATUS_HAS_GROUPS))
pop_groups(state);
state->too_few_errors = bt_data->lookaround.too_few_errors;
@@ -14776,7 +14879,7 @@ backtrack:
discard_backtrack(state);
break;
}
- case RE_OP_FAILURE:
+ case RE_OP_FAILURE: /* Failure. */
{
TRACE(("%s\n", re_op_text[bt_data->op]))
@@ -21987,28 +22090,6 @@ Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node*
}
break;
}
- case RE_OP_GROUP_CALL:
- case RE_OP_REF_GROUP:
- case RE_OP_REF_GROUP_FLD:
- case RE_OP_REF_GROUP_FLD_REV:
- case RE_OP_REF_GROUP_IGN:
- case RE_OP_REF_GROUP_IGN_REV:
- case RE_OP_REF_GROUP_REV:
- {
- RE_Node* tail;
- BOOL visited_tail;
-
- tail = node->next_1.node;
- visited_tail = (tail->status & RE_STATUS_VISITED_AG);
-
- if (visited_tail)
- node->status |= RE_STATUS_VISITED_AG | RE_STATUS_REF;
- else {
- CheckStack_push(&stack, node, result);
- CheckStack_push(&stack, tail, RE_STATUS_NEITHER);
- }
- break;
- }
case RE_OP_GROUP_EXISTS:
{
RE_Node* branch_1;
@@ -22041,6 +22122,27 @@ Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node*
}
break;
}
+ case RE_OP_REF_GROUP:
+ case RE_OP_REF_GROUP_FLD:
+ case RE_OP_REF_GROUP_FLD_REV:
+ case RE_OP_REF_GROUP_IGN:
+ case RE_OP_REF_GROUP_IGN_REV:
+ case RE_OP_REF_GROUP_REV:
+ {
+ RE_Node* tail;
+ BOOL visited_tail;
+
+ tail = node->next_1.node;
+ visited_tail = (tail->status & RE_STATUS_VISITED_AG);
+
+ if (visited_tail)
+ node->status |= RE_STATUS_VISITED_AG | RE_STATUS_REF;
+ else {
+ CheckStack_push(&stack, node, result);
+ CheckStack_push(&stack, tail, RE_STATUS_NEITHER);
+ }
+ break;
+ }
case RE_OP_SUCCESS:
node->status |= RE_STATUS_VISITED_AG | result;
break;
diff --git a/Python2/_regex.h b/Python2/_regex.h
index 37ab8a9..33ccfdb 100644
--- a/Python2/_regex.h
+++ b/Python2/_regex.h
@@ -11,7 +11,7 @@
* 2010-01-16 mrab Re-written
*/
-/* Supports Unicode version 8.0.0. */
+/* Supports Unicode version 9.0.0. */
#define RE_MAGIC 20100116
diff --git a/Python2/_regex_core.py b/Python2/_regex_core.py
index 5599ed4..1b3f005 100644
--- a/Python2/_regex_core.py
+++ b/Python2/_regex_core.py
@@ -2067,9 +2067,11 @@ class Branch(RegexBase):
def _add_precheck(self, info, reverse, branches):
charset = set()
+ pos = -1 if reverse else 0
+
for branch in branches:
if type(branch) is Literal and branch.case_flags == NOCASE:
- charset.add(branch.characters[0])
+ charset.add(branch.characters[pos])
else:
return
@@ -2995,7 +2997,7 @@ class LookAround(RegexBase):
self.subpattern.fix_groups(pattern, self.behind, fuzzy)
def optimise(self, info, reverse):
- subpattern = self.subpattern.optimise(info, reverse)
+ subpattern = self.subpattern.optimise(info, self.behind)
if self.positive and subpattern.is_empty():
return subpattern
@@ -3053,9 +3055,9 @@ class LookAroundConditional(RegexBase):
self.no_item.fix_groups(pattern, reverse, fuzzy)
def optimise(self, info, reverse):
- subpattern = self.subpattern.optimise(info, reverse)
- yes_item = self.yes_item.optimise(info, reverse)
- no_item = self.no_item.optimise(info, reverse)
+ subpattern = self.subpattern.optimise(info, self.behind)
+ yes_item = self.yes_item.optimise(info, self.behind)
+ no_item = self.no_item.optimise(info, self.behind)
return LookAroundConditional(self.behind, self.positive, subpattern,
yes_item, no_item)
@@ -3108,7 +3110,7 @@ class LookAroundConditional(RegexBase):
print("%sEITHER" % (INDENT * indent))
self.yes_item.dump(indent + 1, reverse)
if not self.no_item.is_empty():
- print("%sOR".format(INDENT * indent))
+ print("%sOR" % (INDENT * indent))
self.no_item.dump(indent + 1, reverse)
def is_empty(self):
diff --git a/Python2/_regex_unicode.c b/Python2/_regex_unicode.c
index 47c896e..f470005 100644
--- a/Python2/_regex_unicode.c
+++ b/Python2/_regex_unicode.c
@@ -1,4 +1,4 @@
-/* For Unicode version 8.0.0 */
+/* For Unicode version 9.0.0 */
#include "_regex_unicode.h"
@@ -24,9 +24,12 @@ char* re_strings[] = {
"1/10",
"1/12",
"1/16",
+ "1/160",
"1/2",
+ "1/20",
"1/3",
"1/4",
+ "1/40",
"1/5",
"1/6",
"1/7",
@@ -98,9 +101,11 @@ char* re_strings[] = {
"3",
"3/16",
"3/2",
+ "3/20",
"3/4",
"3/5",
"3/8",
+ "3/80",
"30",
"300",
"3000",
@@ -176,7 +181,12 @@ char* re_strings[] = {
"ABOVE",
"ABOVELEFT",
"ABOVERIGHT",
+ "ADLAM",
+ "ADLM",
"AEGEANNUMBERS",
+ "AFRICANFEH",
+ "AFRICANNOON",
+ "AFRICANQAF",
"AGHB",
"AHEX",
"AHOM",
@@ -260,6 +270,8 @@ char* re_strings[] = {
"BENG",
"BENGALI",
"BETH",
+ "BHAIKSUKI",
+ "BHKS",
"BIDIC",
"BIDICLASS",
"BIDICONTROL",
@@ -459,8 +471,10 @@ char* re_strings[] = {
"CYRILLIC",
"CYRILLICEXTA",
"CYRILLICEXTB",
+ "CYRILLICEXTC",
"CYRILLICEXTENDEDA",
"CYRILLICEXTENDEDB",
+ "CYRILLICEXTENDEDC",
"CYRILLICSUP",
"CYRILLICSUPPLEMENT",
"CYRILLICSUPPLEMENTARY",
@@ -508,10 +522,16 @@ char* re_strings[] = {
"EA",
"EARLYDYNASTICCUNEIFORM",
"EASTASIANWIDTH",
+ "EB",
+ "EBASE",
+ "EBASEGAZ",
+ "EBG",
"EGYP",
"EGYPTIANHIEROGLYPHS",
"ELBA",
"ELBASAN",
+ "EM",
+ "EMODIFIER",
"EMOTICONS",
"EN",
"ENC",
@@ -562,6 +582,7 @@ char* re_strings[] = {
"FULLWIDTH",
"GAF",
"GAMAL",
+ "GAZ",
"GC",
"GCB",
"GEMINATIONMARK",
@@ -577,7 +598,10 @@ char* re_strings[] = {
"GL",
"GLAG",
"GLAGOLITIC",
+ "GLAGOLITICSUP",
+ "GLAGOLITICSUPPLEMENT",
"GLUE",
+ "GLUEAFTERZWJ",
"GOTH",
"GOTHIC",
"GRAN",
@@ -650,6 +674,8 @@ char* re_strings[] = {
"IDEO",
"IDEOGRAPHIC",
"IDEOGRAPHICDESCRIPTIONCHARACTERS",
+ "IDEOGRAPHICSYMBOLS",
+ "IDEOGRAPHICSYMBOLSANDPUNCTUATION",
"IDS",
"IDSB",
"IDSBINARYOPERATOR",
@@ -840,6 +866,8 @@ char* re_strings[] = {
"MANICHAEANWAW",
"MANICHAEANYODH",
"MANICHAEANZAYIN",
+ "MARC",
+ "MARCHEN",
"MARK",
"MATH",
"MATHALPHANUM",
@@ -890,6 +918,8 @@ char* re_strings[] = {
"MODIFYINGLETTER",
"MONG",
"MONGOLIAN",
+ "MONGOLIANSUP",
+ "MONGOLIANSUPPLEMENT",
"MRO",
"MROO",
"MTEI",
@@ -916,6 +946,7 @@ char* re_strings[] = {
"NCHAR",
"ND",
"NEUTRAL",
+ "NEWA",
"NEWLINE",
"NEWTAILUE",
"NEXTLINE",
@@ -979,6 +1010,8 @@ char* re_strings[] = {
"ORKH",
"ORNAMENTALDINGBATS",
"ORYA",
+ "OSAGE",
+ "OSGE",
"OSMA",
"OSMANYA",
"OTHER",
@@ -1012,6 +1045,7 @@ char* re_strings[] = {
"PAUC",
"PAUCINHAU",
"PC",
+ "PCM",
"PD",
"PDF",
"PDI",
@@ -1045,6 +1079,7 @@ char* re_strings[] = {
"PR",
"PREFIXNUMERIC",
"PREPEND",
+ "PREPENDEDCONCATENATIONMARK",
"PRINT",
"PRIVATEUSE",
"PRIVATEUSEAREA",
@@ -1106,6 +1141,7 @@ char* re_strings[] = {
"SEGMENTSEPARATOR",
"SEMKATH",
"SENTENCEBREAK",
+ "SENTENCETERMINAL",
"SEP",
"SEPARATOR",
"SG",
@@ -1199,6 +1235,9 @@ char* re_strings[] = {
"TALU",
"TAMIL",
"TAML",
+ "TANG",
+ "TANGUT",
+ "TANGUTCOMPONENTS",
"TAVT",
"TAW",
"TEHMARBUTA",
@@ -1311,1585 +1350,1646 @@ char* re_strings[] = {
"ZP",
"ZS",
"ZW",
+ "ZWJ",
"ZWSPACE",
"ZYYY",
"ZZZZ",
};
-/* strings: 12240 bytes. */
+/* strings: 12639 bytes. */
/* properties. */
RE_Property re_properties[] = {
- { 547, 0, 0},
- { 544, 0, 0},
- { 252, 1, 1},
- { 251, 1, 1},
- {1081, 2, 2},
- {1079, 2, 2},
- {1259, 3, 3},
- {1254, 3, 3},
+ { 568, 0, 0},
+ { 565, 0, 0},
+ { 264, 1, 1},
+ { 263, 1, 1},
+ {1116, 2, 2},
+ {1114, 2, 2},
+ {1298, 3, 3},
+ {1293, 3, 3},
+ { 590, 4, 4},
{ 566, 4, 4},
- { 545, 4, 4},
- {1087, 5, 5},
- {1078, 5, 5},
- { 823, 6, 6},
- { 172, 7, 6},
- { 171, 7, 6},
- { 767, 8, 6},
- { 766, 8, 6},
- {1227, 9, 6},
- {1226, 9, 6},
- { 294, 10, 6},
- { 296, 11, 6},
- { 350, 11, 6},
- { 343, 12, 6},
- { 433, 12, 6},
- { 345, 13, 6},
- { 435, 13, 6},
- { 344, 14, 6},
- { 434, 14, 6},
- { 341, 15, 6},
- { 431, 15, 6},
- { 342, 16, 6},
- { 432, 16, 6},
- { 636, 17, 6},
- { 632, 17, 6},
- { 628, 18, 6},
- { 627, 18, 6},
- {1267, 19, 6},
- {1266, 19, 6},
- {1265, 20, 6},
- {1264, 20, 6},
- { 458, 21, 6},
- { 466, 21, 6},
- { 567, 22, 6},
- { 575, 22, 6},
- { 565, 23, 6},
- { 569, 23, 6},
- { 568, 24, 6},
- { 576, 24, 6},
- {1255, 25, 6},
- {1262, 25, 6},
- {1117, 25, 6},
- { 244, 26, 6},
- { 242, 26, 6},
- { 671, 27, 6},
- { 669, 27, 6},
- { 451, 28, 6},
- { 625, 29, 6},
- {1044, 30, 6},
- {1041, 30, 6},
- {1188, 31, 6},
- {1187, 31, 6},
- { 971, 32, 6},
- { 952, 32, 6},
- { 612, 33, 6},
- { 611, 33, 6},
- { 204, 34, 6},
- { 160, 34, 6},
+ {1122, 5, 5},
+ {1113, 5, 5},
+ { 851, 6, 6},
+ { 182, 7, 6},
+ { 181, 7, 6},
+ { 793, 8, 6},
+ { 792, 8, 6},
+ {1266, 9, 6},
+ {1265, 9, 6},
+ { 306, 10, 6},
+ { 308, 11, 6},
+ { 362, 11, 6},
+ { 355, 12, 6},
+ { 445, 12, 6},
+ { 357, 13, 6},
+ { 447, 13, 6},
+ { 356, 14, 6},
+ { 446, 14, 6},
+ { 353, 15, 6},
+ { 443, 15, 6},
+ { 354, 16, 6},
+ { 444, 16, 6},
+ { 662, 17, 6},
+ { 658, 17, 6},
+ { 652, 18, 6},
+ { 651, 18, 6},
+ {1306, 19, 6},
+ {1305, 19, 6},
+ {1304, 20, 6},
+ {1303, 20, 6},
+ { 472, 21, 6},
+ { 480, 21, 6},
+ { 591, 22, 6},
+ { 599, 22, 6},
+ { 589, 23, 6},
+ { 593, 23, 6},
+ { 592, 24, 6},
+ { 600, 24, 6},
+ {1294, 25, 6},
+ {1301, 25, 6},
+ {1153, 25, 6},
+ { 256, 26, 6},
+ { 254, 26, 6},
+ { 697, 27, 6},
+ { 695, 27, 6},
+ { 465, 28, 6},
+ { 649, 29, 6},
+ {1079, 30, 6},
+ {1076, 30, 6},
+ {1227, 31, 6},
+ {1226, 31, 6},
+ {1004, 32, 6},
+ { 983, 32, 6},
+ { 636, 33, 6},
+ { 635, 33, 6},
+ { 214, 34, 6},
+ { 170, 34, 6},
+ { 997, 35, 6},
{ 964, 35, 6},
- { 933, 35, 6},
- { 630, 36, 6},
- { 629, 36, 6},
- { 468, 37, 6},
- { 467, 37, 6},
- { 523, 38, 6},
- { 521, 38, 6},
- { 970, 39, 6},
- { 951, 39, 6},
- { 976, 40, 6},
- { 977, 40, 6},
- { 909, 41, 6},
- { 895, 41, 6},
- { 966, 42, 6},
- { 938, 42, 6},
- { 634, 43, 6},
- { 633, 43, 6},
- { 637, 44, 6},
- { 635, 44, 6},
- {1046, 45, 6},
- {1223, 46, 6},
- {1219, 46, 6},
- { 965, 47, 6},
- { 935, 47, 6},
- { 460, 48, 6},
- { 459, 48, 6},
- {1113, 49, 6},
- {1082, 49, 6},
- { 765, 50, 6},
- { 764, 50, 6},
- { 968, 51, 6},
- { 940, 51, 6},
- { 967, 52, 6},
- { 939, 52, 6},
- {1126, 53, 6},
- {1232, 54, 6},
- {1248, 54, 6},
- { 989, 55, 6},
- { 990, 55, 6},
- { 988, 56, 6},
- { 987, 56, 6},
- { 598, 57, 7},
- { 622, 57, 7},
- { 243, 58, 8},
- { 234, 58, 8},
- { 288, 59, 9},
- { 300, 59, 9},
- { 457, 60, 10},
- { 482, 60, 10},
- { 489, 61, 11},
- { 487, 61, 11},
- { 673, 62, 12},
- { 667, 62, 12},
- { 674, 63, 13},
- { 675, 63, 13},
- { 757, 64, 14},
- { 732, 64, 14},
- { 928, 65, 15},
- { 921, 65, 15},
- { 929, 66, 16},
- { 931, 66, 16},
- { 246, 67, 6},
- { 245, 67, 6},
- { 641, 68, 17},
- { 648, 68, 17},
- { 642, 69, 18},
- { 649, 69, 18},
- { 175, 70, 6},
- { 170, 70, 6},
- { 183, 71, 6},
- { 250, 72, 6},
- { 564, 73, 6},
- {1027, 74, 6},
- {1258, 75, 6},
- {1263, 76, 6},
- {1019, 77, 6},
- {1018, 78, 6},
- {1020, 79, 6},
- {1021, 80, 6},
-};
-
-/* properties: 588 bytes. */
+ { 654, 36, 6},
+ { 653, 36, 6},
+ { 482, 37, 6},
+ { 481, 37, 6},
+ { 543, 38, 6},
+ { 541, 38, 6},
+ {1003, 39, 6},
+ { 982, 39, 6},
+ {1009, 40, 6},
+ {1010, 40, 6},
+ { 940, 41, 6},
+ { 925, 41, 6},
+ { 999, 42, 6},
+ { 969, 42, 6},
+ { 660, 43, 6},
+ { 659, 43, 6},
+ { 663, 44, 6},
+ { 661, 44, 6},
+ {1081, 45, 6},
+ {1262, 46, 6},
+ {1258, 46, 6},
+ { 998, 47, 6},
+ { 966, 47, 6},
+ { 474, 48, 6},
+ { 473, 48, 6},
+ {1149, 49, 6},
+ {1117, 49, 6},
+ { 791, 50, 6},
+ { 790, 50, 6},
+ {1001, 51, 6},
+ { 971, 51, 6},
+ {1000, 52, 6},
+ { 970, 52, 6},
+ {1123, 53, 6},
+ {1162, 53, 6},
+ {1271, 54, 6},
+ {1287, 54, 6},
+ {1022, 55, 6},
+ {1023, 55, 6},
+ {1021, 56, 6},
+ {1020, 56, 6},
+ {1061, 57, 6},
+ {1027, 57, 6},
+ { 622, 58, 7},
+ { 646, 58, 7},
+ { 255, 59, 8},
+ { 244, 59, 8},
+ { 300, 60, 9},
+ { 312, 60, 9},
+ { 471, 61, 10},
+ { 496, 61, 10},
+ { 503, 62, 11},
+ { 501, 62, 11},
+ { 699, 63, 12},
+ { 693, 63, 12},
+ { 700, 64, 13},
+ { 701, 64, 13},
+ { 783, 65, 14},
+ { 758, 65, 14},
+ { 959, 66, 15},
+ { 952, 66, 15},
+ { 960, 67, 16},
+ { 962, 67, 16},
+ { 258, 68, 6},
+ { 257, 68, 6},
+ { 667, 69, 17},
+ { 674, 69, 17},
+ { 668, 70, 18},
+ { 675, 70, 18},
+ { 185, 71, 6},
+ { 180, 71, 6},
+ { 193, 72, 6},
+ { 262, 73, 6},
+ { 588, 74, 6},
+ {1062, 75, 6},
+ {1297, 76, 6},
+ {1302, 77, 6},
+ {1053, 78, 6},
+ {1052, 79, 6},
+ {1054, 80, 6},
+ {1055, 81, 6},
+};
+
+/* properties: 600 bytes. */
/* property values. */
RE_PropertyValue re_property_values[] = {
- {1220, 0, 0},
- { 383, 0, 0},
- {1228, 0, 1},
- { 774, 0, 1},
- { 768, 0, 2},
- { 761, 0, 2},
- {1200, 0, 3},
- { 773, 0, 3},
- { 865, 0, 4},
- { 762, 0, 4},
- { 969, 0, 5},
- { 763, 0, 5},
- { 913, 0, 6},
- { 863, 0, 6},
- { 505, 0, 7},
- { 831, 0, 7},
- {1119, 0, 8},
- { 830, 0, 8},
- { 456, 0, 9},
- { 896, 0, 9},
... 31261 lines suppressed ...
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-regex.git
More information about the Python-modules-commits
mailing list