[Python-modules-commits] [python-nameparser] 01/03: New upstream version 0.5.3
Edward Betts
edward at moszumanska.debian.org
Thu Oct 5 09:08:55 UTC 2017
This is an automated email from the git hooks/post-receive script.
edward pushed a commit to annotated tag debian/0.5.3-1
in repository python-nameparser.
commit 464dcc976491b4942b08df0816b8dc9ea149c44f
Author: Edward Betts <edward at 4angle.com>
Date: Thu Oct 5 08:32:35 2017 +0100
New upstream version 0.5.3
---
PKG-INFO | 2 +-
nameparser.egg-info/PKG-INFO | 2 +-
nameparser/__init__.py | 2 +-
nameparser/config/regexes.py | 17 +++++++++++++++++
nameparser/parser.py | 11 ++++++++++-
setup.cfg | 1 -
tests.py | 26 ++++++++++++++++++++++++--
7 files changed, 54 insertions(+), 7 deletions(-)
diff --git a/PKG-INFO b/PKG-INFO
index 5ae6db2..5cad489 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: nameparser
-Version: 0.5.2
+Version: 0.5.3
Summary: A simple Python module for parsing human names into their individual components.
Home-page: https://github.com/derek73/python-nameparser
Author: Derek Gulbranson
diff --git a/nameparser.egg-info/PKG-INFO b/nameparser.egg-info/PKG-INFO
index 5ae6db2..5cad489 100644
--- a/nameparser.egg-info/PKG-INFO
+++ b/nameparser.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: nameparser
-Version: 0.5.2
+Version: 0.5.3
Summary: A simple Python module for parsing human names into their individual components.
Home-page: https://github.com/derek73/python-nameparser
Author: Derek Gulbranson
diff --git a/nameparser/__init__.py b/nameparser/__init__.py
index 7661fab..ea1125d 100644
--- a/nameparser/__init__.py
+++ b/nameparser/__init__.py
@@ -1,4 +1,4 @@
-VERSION = (0, 5, 2)
+VERSION = (0, 5, 3)
__version__ = '.'.join(map(str, VERSION))
__author__ = "Derek Gulbranson"
__author_email__ = 'derek73 at gmail.com'
diff --git a/nameparser/config/regexes.py b/nameparser/config/regexes.py
index 01ca86d..51a6ed2 100644
--- a/nameparser/config/regexes.py
+++ b/nameparser/config/regexes.py
@@ -2,6 +2,22 @@
from __future__ import unicode_literals
import re
+# emoji regex from https://stackoverflow.com/questions/26568722/remove-unicode-emoji-using-re-in-python
+try:
+ # Wide UCS-4 build
+ re_emoji = re.compile('['
+ '\U0001F300-\U0001F64F'
+ '\U0001F680-\U0001F6FF'
+ '\u2600-\u26FF\u2700-\u27BF]+',
+ re.UNICODE)
+except re.error:
+ # Narrow UCS-2 build
+ re_emoji = re.compile('('
+ '\ud83c[\udf00-\udfff]|'
+ '\ud83d[\udc00-\ude4f\ude80-\udeff]|'
+ '[\u2600-\u26FF\u2700-\u27BF])+',
+ re.UNICODE)
+
REGEXES = set([
("spaces", re.compile(r"\s+", re.U)),
("word", re.compile(r"(\w|\.)+", re.U)),
@@ -11,6 +27,7 @@ REGEXES = set([
("roman_numeral", re.compile(r'^(X|IX|IV|V?I{0,3})$', re.I | re.U)),
("no_vowels",re.compile(r'^[^aeyiuo]+$', re.I | re.U)),
("period_not_at_end",re.compile(r'.*\..+$', re.I | re.U)),
+ ("emoji",re_emoji),
])
"""
All regular expressions used by the parser are precompiled and stored in the config.
diff --git a/nameparser/parser.py b/nameparser/parser.py
index 55f574a..55d85df 100644
--- a/nameparser/parser.py
+++ b/nameparser/parser.py
@@ -77,6 +77,7 @@ class HumanName(object):
self.ENCODING = encoding
self.string_format = string_format or self.C.string_format
+ # full_name setter triggers the parse
self.full_name = full_name
def __iter__(self):
@@ -371,7 +372,7 @@ class HumanName(object):
"""
self.parse_nicknames()
-
+ self.squash_emoji()
def post_process(self):
"""
@@ -392,6 +393,14 @@ class HumanName(object):
self.nickname_list = re_nickname.findall(self._full_name)
self._full_name = re_nickname.sub('', self._full_name)
+ def squash_emoji(self):
+ """
+ Remove emoji from the input string.
+ """
+ re_emoji = self.C.regexes.emoji
+ if re_emoji and re_emoji.search(self._full_name):
+ self._full_name = re_emoji.sub('', self._full_name)
+
def handle_firstnames(self):
"""
If there are only two parts and one is a title, assume it's a last name
diff --git a/setup.cfg b/setup.cfg
index 6f08d0e..adf5ed7 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -4,5 +4,4 @@ universal = 1
[egg_info]
tag_build =
tag_date = 0
-tag_svn_revision = 0
diff --git a/tests.py b/tests.py
index 4137e74..983eed8 100644
--- a/tests.py
+++ b/tests.py
@@ -1936,6 +1936,28 @@ class HumanNameOutputFormatTests(HumanNameTestBase):
self.assertEqual(u(hn), "Rev John (Kenny) A. Kenneth Doe III")
hn.nickname=''
self.assertEqual(u(hn), "Rev John A. Kenneth Doe III")
+
+ def test_remove_emojis(self):
+ hn = HumanName("Sam Smith 😊")
+ self.m(hn.first,"Sam", hn)
+ self.m(hn.last,"Smith", hn)
+ self.assertEqual(u(hn), "Sam Smith")
+
+ def test_keep_non_emojis(self):
+ hn = HumanName("∫≜⩕ Smith 😊")
+ self.m(hn.first,"∫≜⩕", hn)
+ self.m(hn.last,"Smith", hn)
+ self.assertEqual(u(hn), "∫≜⩕ Smith")
+
+ def test_keep_emojis(self):
+ from nameparser.config import Constants
+ constants = Constants()
+ constants.regexes.emoji = False
+ hn = HumanName("∫≜⩕ Smith😊", constants)
+ self.m(hn.first,"∫≜⩕", hn)
+ self.m(hn.last,"Smith😊", hn)
+ self.assertEqual(u(hn), "∫≜⩕ Smith😊")
+ # test cleanup
TEST_NAMES = (
"John Doe",
@@ -2160,8 +2182,8 @@ if __name__ == '__main__':
name = sys.argv[1]
hn = HumanName(name, encoding=sys.stdout.encoding)
print((repr(hn)))
- # hn.capitalize()
- # print((repr(hn)))
+ hn.capitalize()
+ print((repr(hn)))
else:
print("-"*80)
print("Running tests")
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-nameparser.git
More information about the Python-modules-commits
mailing list