[Python-modules-commits] [python-nameparser] 01/03: New upstream version 0.5.3

Thu Oct 5 09:08:55 UTC 2017

This is an automated email from the git hooks/post-receive script.

edward pushed a commit to annotated tag debian/0.5.3-1
in repository python-nameparser.

commit 464dcc976491b4942b08df0816b8dc9ea149c44f
Author: Edward Betts <edward at 4angle.com>
Date:   Thu Oct 5 08:32:35 2017 +0100

    New upstream version 0.5.3
---
 PKG-INFO                     |  2 +-
 nameparser.egg-info/PKG-INFO |  2 +-
 nameparser/__init__.py       |  2 +-
 nameparser/config/regexes.py | 17 +++++++++++++++++
 nameparser/parser.py         | 11 ++++++++++-
 setup.cfg                    |  1 -
 tests.py                     | 26 ++++++++++++++++++++++++--
 7 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/PKG-INFO b/PKG-INFO
index 5ae6db2..5cad489 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: nameparser
-Version: 0.5.2
+Version: 0.5.3
 Summary: A simple Python module for parsing human names into their individual components.
 Home-page: https://github.com/derek73/python-nameparser
 Author: Derek Gulbranson
diff --git a/nameparser.egg-info/PKG-INFO b/nameparser.egg-info/PKG-INFO
index 5ae6db2..5cad489 100644
--- a/nameparser.egg-info/PKG-INFO
+++ b/nameparser.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: nameparser
-Version: 0.5.2
+Version: 0.5.3
 Summary: A simple Python module for parsing human names into their individual components.
 Home-page: https://github.com/derek73/python-nameparser
 Author: Derek Gulbranson
diff --git a/nameparser/__init__.py b/nameparser/__init__.py
index 7661fab..ea1125d 100644
--- a/nameparser/__init__.py
+++ b/nameparser/__init__.py
@@ -1,4 +1,4 @@
-VERSION = (0, 5, 2)
+VERSION = (0, 5, 3)
 __version__ = '.'.join(map(str, VERSION))
 __author__ = "Derek Gulbranson"
 __author_email__ = 'derek73 at gmail.com'
diff --git a/nameparser/config/regexes.py b/nameparser/config/regexes.py
index 01ca86d..51a6ed2 100644
--- a/nameparser/config/regexes.py
+++ b/nameparser/config/regexes.py
@@ -2,6 +2,22 @@
 from __future__ import unicode_literals
 import re
 
+# emoji regex from https://stackoverflow.com/questions/26568722/remove-unicode-emoji-using-re-in-python
+try:
+    # Wide UCS-4 build
+    re_emoji = re.compile('['
+        '\U0001F300-\U0001F64F'
+        '\U0001F680-\U0001F6FF'
+        '\u2600-\u26FF\u2700-\u27BF]+', 
+        re.UNICODE)
+except re.error:
+    # Narrow UCS-2 build
+    re_emoji = re.compile('('
+        '\ud83c[\udf00-\udfff]|'
+        '\ud83d[\udc00-\ude4f\ude80-\udeff]|'
+        '[\u2600-\u26FF\u2700-\u27BF])+', 
+        re.UNICODE)
+
 REGEXES = set([
     ("spaces", re.compile(r"\s+", re.U)),
     ("word", re.compile(r"(\w|\.)+", re.U)),
@@ -11,6 +27,7 @@ REGEXES = set([
     ("roman_numeral", re.compile(r'^(X|IX|IV|V?I{0,3})$', re.I | re.U)),
     ("no_vowels",re.compile(r'^[^aeyiuo]+$', re.I | re.U)),
     ("period_not_at_end",re.compile(r'.*\..+$', re.I | re.U)),
+    ("emoji",re_emoji),
 ])
 """
 All regular expressions used by the parser are precompiled and stored in the config.
diff --git a/nameparser/parser.py b/nameparser/parser.py
index 55f574a..55d85df 100644
--- a/nameparser/parser.py
+++ b/nameparser/parser.py
@@ -77,6 +77,7 @@ class HumanName(object):
         
         self.ENCODING = encoding
         self.string_format = string_format or self.C.string_format
+        # full_name setter triggers the parse
         self.full_name = full_name
     
     def __iter__(self):
@@ -371,7 +372,7 @@ class HumanName(object):
         
         """
         self.parse_nicknames()
-        
+        self.squash_emoji()
 
     def post_process(self):
         """
@@ -392,6 +393,14 @@ class HumanName(object):
             self.nickname_list = re_nickname.findall(self._full_name)
             self._full_name = re_nickname.sub('', self._full_name)
 
+    def squash_emoji(self):
+        """
+        Remove emoji from the input string.
+        """
+        re_emoji = self.C.regexes.emoji
+        if re_emoji and re_emoji.search(self._full_name):
+            self._full_name = re_emoji.sub('', self._full_name)
+
     def handle_firstnames(self):
         """
         If there are only two parts and one is a title, assume it's a last name
diff --git a/setup.cfg b/setup.cfg
index 6f08d0e..adf5ed7 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -4,5 +4,4 @@ universal = 1
 [egg_info]
 tag_build = 
 tag_date = 0
-tag_svn_revision = 0
 
diff --git a/tests.py b/tests.py
index 4137e74..983eed8 100644
--- a/tests.py
+++ b/tests.py
@@ -1936,6 +1936,28 @@ class HumanNameOutputFormatTests(HumanNameTestBase):
         self.assertEqual(u(hn), "Rev John (Kenny) A. Kenneth Doe III")
         hn.nickname=''
         self.assertEqual(u(hn), "Rev John A. Kenneth Doe III")
+    
+    def test_remove_emojis(self):
+        hn = HumanName("Sam Smith 😊")
+        self.m(hn.first,"Sam", hn)
+        self.m(hn.last,"Smith", hn)
+        self.assertEqual(u(hn), "Sam Smith")
+
+    def test_keep_non_emojis(self):
+        hn = HumanName("∫≜⩕ Smith 😊")
+        self.m(hn.first,"∫≜⩕", hn)
+        self.m(hn.last,"Smith", hn)
+        self.assertEqual(u(hn), "∫≜⩕ Smith")
+
+    def test_keep_emojis(self):
+        from nameparser.config import Constants
+        constants = Constants()
+        constants.regexes.emoji = False
+        hn = HumanName("∫≜⩕ Smith😊", constants)
+        self.m(hn.first,"∫≜⩕", hn)
+        self.m(hn.last,"Smith😊", hn)
+        self.assertEqual(u(hn), "∫≜⩕ Smith😊")
+        # test cleanup
 
 TEST_NAMES = (
     "John Doe",
@@ -2160,8 +2182,8 @@ if __name__ == '__main__':
         name = sys.argv[1]
         hn = HumanName(name, encoding=sys.stdout.encoding)
         print((repr(hn)))
-        # hn.capitalize()
-        # print((repr(hn)))
+        hn.capitalize()
+        print((repr(hn)))
     else:
         print("-"*80)
         print("Running tests")

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-nameparser.git