[Python-modules-commits] [python-nameparser] 01/03: New upstream version 0.5.5

Edward Betts edward at moszumanska.debian.org
Mon Jan 15 22:42:29 UTC 2018


This is an automated email from the git hooks/post-receive script.

edward pushed a commit to branch debian/master
in repository python-nameparser.

commit 308ac85297a25a3f8595af16091cc1e8f84e5ac3
Author: Edward Betts <edward at 4angle.com>
Date:   Mon Jan 15 19:24:45 2018 +0000

    New upstream version 0.5.5
---
 PKG-INFO                      |  3 ++-
 nameparser.egg-info/PKG-INFO  |  3 ++-
 nameparser/__init__.py        |  2 +-
 nameparser/config/__init__.py | 22 ++++++++++++++++------
 nameparser/config/prefixes.py | 11 ++++++++---
 nameparser/config/regexes.py  |  2 +-
 nameparser/config/suffixes.py |  2 ++
 nameparser/config/titles.py   |  1 +
 nameparser/parser.py          | 23 ++++++++++++++---------
 tests.py                      | 17 ++++++++++++++++-
 10 files changed, 63 insertions(+), 23 deletions(-)

diff --git a/PKG-INFO b/PKG-INFO
index 5cad489..e7c9987 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,11 +1,12 @@
 Metadata-Version: 1.1
 Name: nameparser
-Version: 0.5.3
+Version: 0.5.5
 Summary: A simple Python module for parsing human names into their individual components.
 Home-page: https://github.com/derek73/python-nameparser
 Author: Derek Gulbranson
 Author-email: derek73 at gmail.com
 License: LGPL
+Description-Content-Type: UNKNOWN
 Description: Name Parser
         ===========
         
diff --git a/nameparser.egg-info/PKG-INFO b/nameparser.egg-info/PKG-INFO
index 5cad489..e7c9987 100644
--- a/nameparser.egg-info/PKG-INFO
+++ b/nameparser.egg-info/PKG-INFO
@@ -1,11 +1,12 @@
 Metadata-Version: 1.1
 Name: nameparser
-Version: 0.5.3
+Version: 0.5.5
 Summary: A simple Python module for parsing human names into their individual components.
 Home-page: https://github.com/derek73/python-nameparser
 Author: Derek Gulbranson
 Author-email: derek73 at gmail.com
 License: LGPL
+Description-Content-Type: UNKNOWN
 Description: Name Parser
         ===========
         
diff --git a/nameparser/__init__.py b/nameparser/__init__.py
index ea1125d..75e716c 100644
--- a/nameparser/__init__.py
+++ b/nameparser/__init__.py
@@ -1,4 +1,4 @@
-VERSION = (0, 5, 3)
+VERSION = (0, 5, 5)
 __version__ = '.'.join(map(str, VERSION))
 __author__ = "Derek Gulbranson"
 __author_email__ = 'derek73 at gmail.com'
diff --git a/nameparser/config/__init__.py b/nameparser/config/__init__.py
index 7bddf90..3b11e88 100644
--- a/nameparser/config/__init__.py
+++ b/nameparser/config/__init__.py
@@ -43,6 +43,8 @@ from nameparser.config.titles import TITLES
 from nameparser.config.titles import FIRST_NAME_TITLES
 from nameparser.config.regexes import REGEXES
 
+DEFAULT_ENCODING = 'UTF-8'
+
 class SetManager(collections.Set):
     '''
     Easily add and remove config variables per module or instance. Subclass of
@@ -84,15 +86,23 @@ class SetManager(collections.Set):
             self.count = c + 1
             return getattr(self, self.elements[c]) or next(self)
     
+    def add_with_encoding(self, s, encoding=None):
+        """
+        Add the lower case and no-period version of the string to the set. Pass an
+        explicit `encoding` parameter to specify the encoding of binary strings that
+        are not DEFAULT_ENCODING (UTF-8).
+        """
+        encoding = encoding or sys.stdin.encoding or DEFAULT_ENCODING
+        if type(s) == binary_type:
+            s = s.decode(encoding)
+        self.elements.add(lc(s))
+
     def add(self, *strings):
         """
         Add the lower case and no-period version of the string arguments to the set.
-        Returns ``self`` for chaining.
+        Can pass a list of strings. Returns ``self`` for chaining.
         """
-        for s in strings:
-            if type(s) == binary_type:
-                s = s.decode(sys.stdin.encoding)
-            self.elements.add(lc(s))
+        [self.add_with_encoding(s) for s in strings]
         return self
     
     def remove(self, *strings):
@@ -193,7 +203,7 @@ class Constants(object):
         if not self._pst:
             self._pst = self.prefixes | self.suffix_acronyms | self.suffix_not_acronyms | self.titles
         return self._pst
-    
+
     def __repr__(self):
         return "<Constants() instance>"
     
diff --git a/nameparser/config/prefixes.py b/nameparser/config/prefixes.py
index 64731e8..21c82fa 100644
--- a/nameparser/config/prefixes.py
+++ b/nameparser/config/prefixes.py
@@ -4,18 +4,23 @@ from __future__ import unicode_literals
 #: Name pieces that appear before a last name. They join to the piece that follows them to make one new piece.
 PREFIXES = set([
     'abu',
-    'bon',
     'bin',
+    'bon',
     'da',
     'dal',
     'de',
+    'degli',
+    'dei',
     'del',
     'dela',
+    'della',
+    'delle',
+    'delli',
+    'dello',
     'der',
-    'de',
     'di',
-    'dí',
     'du',
+    'dí',
     'ibn',
     'la',
     'le',
diff --git a/nameparser/config/regexes.py b/nameparser/config/regexes.py
index 51a6ed2..42da85d 100644
--- a/nameparser/config/regexes.py
+++ b/nameparser/config/regexes.py
@@ -21,7 +21,7 @@ except re.error:
 REGEXES = set([
     ("spaces", re.compile(r"\s+", re.U)),
     ("word", re.compile(r"(\w|\.)+", re.U)),
-    ("mac", re.compile(r'^(ma?c)(\w+)', re.I | re.U)),
+    ("mac", re.compile(r'^(ma?c)(\w{2,})', re.I | re.U)),
     ("initial", re.compile(r'^(\w\.|[A-Z])?$', re.U)),
     ("nickname", re.compile(r'\s*?[\("](.+?)[\)"]', re.U)),
     ("roman_numeral", re.compile(r'^(X|IX|IV|V?I{0,3})$', re.I | re.U)),
diff --git a/nameparser/config/suffixes.py b/nameparser/config/suffixes.py
index b8c4f4b..c058ce6 100644
--- a/nameparser/config/suffixes.py
+++ b/nameparser/config/suffixes.py
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 SUFFIX_NOT_ACRONYMS = set([
+    'dr',
     'esq',
     'esquire',
     'jr',
@@ -72,6 +73,7 @@ SUFFIX_ACRONYMS = set([
     'idsm',
     'iom',
     'iso',
+    'jd',
     'kbe',
     'kcb',
     'kcie',
diff --git a/nameparser/config/titles.py b/nameparser/config/titles.py
index 0f37a19..131738f 100644
--- a/nameparser/config/titles.py
+++ b/nameparser/config/titles.py
@@ -618,6 +618,7 @@ TITLES = FIRST_NAME_TITLES | set([
     'warden',
     'warrant',
     'wing',
+    'wm',
     'wo-1',
     'wo1',
     'wo2',
diff --git a/nameparser/parser.py b/nameparser/parser.py
index 55d85df..1370cb2 100644
--- a/nameparser/parser.py
+++ b/nameparser/parser.py
@@ -11,6 +11,7 @@ from nameparser.util import lc
 from nameparser.util import log
 from nameparser.config import CONSTANTS
 from nameparser.config import Constants
+from nameparser.config import DEFAULT_ENCODING
 
 ENCODING = 'utf-8'
 
@@ -69,13 +70,13 @@ class HumanName(object):
     unparsable = True
     _full_name = ''
     
-    def __init__(self, full_name="", constants=CONSTANTS, encoding=ENCODING, 
+    def __init__(self, full_name="", constants=CONSTANTS, encoding=DEFAULT_ENCODING,
                 string_format=None):
         self.C = constants
         if type(self.C) is not type(CONSTANTS):
             self.C = Constants()
         
-        self.ENCODING = encoding
+        self.encoding = encoding
         self.string_format = string_format or self.C.string_format
         # full_name setter triggers the parse
         self.full_name = full_name
@@ -127,7 +128,7 @@ class HumanName(object):
         if self.string_format:
             # string_format = "{title} {first} {middle} {last} {suffix} ({nickname})"
             _s = self.string_format.format(**self.as_dict())
-            # remove trailing punctation from missing nicknames
+            # remove trailing punctuation from missing nicknames
             _s = _s.replace(str(self.C.empty_attribute_default),'').replace(" ()","").replace(" ''","").replace(' ""',"")
             return self.collapse_whitespace(_s).strip(', ')
         return " ".join(self)
@@ -135,7 +136,7 @@ class HumanName(object):
     def __str__(self):
         if sys.version >= '3':
             return self.__unicode__()
-        return self.__unicode__().encode(self.ENCODING)
+        return self.__unicode__().encode(self.encoding)
     
     def __repr__(self):
         if self.unparsable:
@@ -152,7 +153,7 @@ class HumanName(object):
             }
         if sys.version >= '3':
             return _string
-        return _string.encode(self.ENCODING)
+        return _string.encode(self.encoding)
     
     def as_dict(self, include_empty=True):
         """
@@ -355,7 +356,7 @@ class HumanName(object):
         self.original = value
         self._full_name = value
         if isinstance(value, binary_type):
-            self._full_name = value.decode(self.ENCODING)
+            self._full_name = value.decode(self.encoding)
         self.parse_full_name()
     
     def collapse_whitespace(self, string):
@@ -640,7 +641,7 @@ class HumanName(object):
         
         """
         length = len(pieces) + additional_parts_count
-        # don't join on conjuctions if there's only 2 parts
+        # don't join on conjunctions if there's only 2 parts
         if length < 3:
             return pieces
             
@@ -657,7 +658,7 @@ class HumanName(object):
         for i, val in enumerate(conj_index):
             try:
                 if conj_index[i+1] == val+1:
-                     contiguous_conj_i += [val]
+                    contiguous_conj_i += [val]
             except IndexError:
                 pass
         
@@ -679,7 +680,11 @@ class HumanName(object):
         for i in reversed(delete_i):
             # delete pieces in reverse order or the index changes on each delete
             del pieces[i]
-        
+
+        if len(pieces) == 1:
+            # if there's only one piece left, nothing left to do
+            return pieces
+
         # refresh conjunction index locations
         conj_index = [i for i, piece in enumerate(pieces) if self.is_conjunction(piece)]
         
diff --git a/tests.py b/tests.py
index 983eed8..3123a6f 100644
--- a/tests.py
+++ b/tests.py
@@ -189,6 +189,7 @@ class HumanNamePythonTests(HumanNameTestBase):
         self.m(hn.first, "", hn)
         self.m(hn.last, "", hn)
 
+
 class FirstNameHandlingTests(HumanNameTestBase):
     def test_first_name(self):
         hn = HumanName("Andrew")
@@ -1058,7 +1059,6 @@ class HumanNameBruteForceTests(HumanNameTestBase):
         self.m(hn.last, "Almighty", hn)
 
 
-
 class HumanNameConjunctionTestCase(HumanNameTestBase):
     # Last name with conjunction
     def test_last_name_with_conjunction(self):
@@ -1244,6 +1244,11 @@ class HumanNameConjunctionTestCase(HumanNameTestBase):
         # if you want to be technical, Queen is in FIRST_NAME_TITLES
         self.m(hn.first, "Elizabeth", hn)
 
+    def test_name_is_conjunctions(self):
+        hn = HumanName("e and e")
+        self.m(hn.first, "e and e", hn)
+
+
 class ConstantsCustomization(HumanNameTestBase):
 
     def test_add_title(self):
@@ -1335,6 +1340,12 @@ class ConstantsCustomization(HumanNameTestBase):
         hn.C.empty_attribute_default = None
         self.assertEqual('', str(hn), hn)
 
+    def test_add_constant_with_explicit_encoding(self):
+        c = Constants()
+        c.titles.add_with_encoding(b'b\351ck', encoding='latin_1')
+        self.assertIn('béck', c.titles)
+
+
 class HumanNameNicknameTestCase(HumanNameTestBase):
     # https://code.google.com/p/python-nameparser/issues/detail?id=33
     def test_nickname_in_parenthesis(self):
@@ -1858,6 +1869,10 @@ class HumanNameCapitalizationTestCase(HumanNameTestBase):
         hn.capitalize()
         self.m(str(hn), 'Ronald McDonald', hn)
 
+    def test_short_names_with_mac(self):
+        hn = HumanName('mack johnson')
+        hn.capitalize()
+        self.m(str(hn), 'Mack Johnson', hn)
 
 class HumanNameOutputFormatTests(HumanNameTestBase):
     

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-nameparser.git



More information about the Python-modules-commits mailing list