[Python-modules-commits] [python-nameparser] 01/03: Import python-nameparser_0.4.0.orig.tar.gz

Edward Betts edward at moszumanska.debian.org
Tue Jul 5 10:09:31 UTC 2016


This is an automated email from the git hooks/post-receive script.

edward pushed a commit to branch master
in repository python-nameparser.

commit 47511fcc093c2e0e9f19965b21bcb03190325c3e
Author: Edward Betts <edward at 4angle.com>
Date:   Tue Jul 5 09:26:14 2016 +0100

    Import python-nameparser_0.4.0.orig.tar.gz
---
 PKG-INFO                      | 12 +++++++++---
 README.rst                    | 10 ++++++++--
 nameparser.egg-info/PKG-INFO  | 12 +++++++++---
 nameparser/__init__.py        |  2 +-
 nameparser/config/__init__.py |  5 +----
 nameparser/config/prefixes.py |  1 +
 nameparser/config/suffixes.py |  4 ----
 nameparser/config/titles.py   |  8 ++++++++
 nameparser/parser.py          | 32 ++++++++++++++++++-------------
 tests.py                      | 44 +++++++++++++++++++++++++++++++++----------
 10 files changed, 90 insertions(+), 40 deletions(-)

diff --git a/PKG-INFO b/PKG-INFO
index 19ddc1a..87a9820 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: nameparser
-Version: 0.3.16
+Version: 0.4.0
 Summary: A simple Python module for parsing human names into their individual components.
 Home-page: https://github.com/derek73/python-nameparser
 Author: Derek Gulbranson
@@ -24,7 +24,11 @@ Description: Name Parser
         * hn.suffix
         * hn.nickname
         
-        Supports 3 different comma placement variations in the input string.
+        Supported Name Structures
+        ~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        The supported name structure is generally "Title First Middle Last Suffix", where all pieces 
+        are optional. Comma-separated format like "Last, First" is also supported.
         
         1. Title Firstname "Nickname" Middle Middle Lastname Suffix
         2. Lastname [Suffix], Title Firstname (Nickname) Middle Middle[,] Suffix [, Suffix]
@@ -41,7 +45,7 @@ Description: Name Parser
         
         It attempts the best guess that can be made with a simple, rule-based approach. 
         Its main use case is English and it is not likely to be useful for languages 
-        that do not share the same structure as English names. It's not perfect, but it 
+        that do not conform to the supported name structure. It's not perfect, but it 
         gets you pretty far.
         
         Installation
@@ -81,6 +85,8 @@ Description: Name Parser
             'de la Vega'
             >>> name.as_dict()
             {'last': 'de la Vega', 'suffix': 'III', 'title': 'Dr.', 'middle': 'Q. Xavier', 'nickname': 'Doc Vega', 'first': 'Juan'}
+            >>> str(name)
+            'Dr. Juan Q. Xavier de la Vega III (Doc Vega)'
             >>> name.string_format = "{first} {last}"
             >>> str(name)
             'Juan de la Vega'
diff --git a/README.rst b/README.rst
index 6422810..da2265c 100644
--- a/README.rst
+++ b/README.rst
@@ -16,7 +16,11 @@ individual components.
 * hn.suffix
 * hn.nickname
 
-Supports 3 different comma placement variations in the input string.
+Supported Name Structures
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The supported name structure is generally "Title First Middle Last Suffix", where all pieces 
+are optional. Comma-separated format like "Last, First" is also supported.
 
 1. Title Firstname "Nickname" Middle Middle Lastname Suffix
 2. Lastname [Suffix], Title Firstname (Nickname) Middle Middle[,] Suffix [, Suffix]
@@ -33,7 +37,7 @@ of names that are all upper- or lowercase names.
 
 It attempts the best guess that can be made with a simple, rule-based approach. 
 Its main use case is English and it is not likely to be useful for languages 
-that do not share the same structure as English names. It's not perfect, but it 
+that do not conform to the supported name structure. It's not perfect, but it 
 gets you pretty far.
 
 Installation
@@ -73,6 +77,8 @@ Quick Start Example
     'de la Vega'
     >>> name.as_dict()
     {'last': 'de la Vega', 'suffix': 'III', 'title': 'Dr.', 'middle': 'Q. Xavier', 'nickname': 'Doc Vega', 'first': 'Juan'}
+    >>> str(name)
+    'Dr. Juan Q. Xavier de la Vega III (Doc Vega)'
     >>> name.string_format = "{first} {last}"
     >>> str(name)
     'Juan de la Vega'
diff --git a/nameparser.egg-info/PKG-INFO b/nameparser.egg-info/PKG-INFO
index 19ddc1a..87a9820 100644
--- a/nameparser.egg-info/PKG-INFO
+++ b/nameparser.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: nameparser
-Version: 0.3.16
+Version: 0.4.0
 Summary: A simple Python module for parsing human names into their individual components.
 Home-page: https://github.com/derek73/python-nameparser
 Author: Derek Gulbranson
@@ -24,7 +24,11 @@ Description: Name Parser
         * hn.suffix
         * hn.nickname
         
-        Supports 3 different comma placement variations in the input string.
+        Supported Name Structures
+        ~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        The supported name structure is generally "Title First Middle Last Suffix", where all pieces 
+        are optional. Comma-separated format like "Last, First" is also supported.
         
         1. Title Firstname "Nickname" Middle Middle Lastname Suffix
         2. Lastname [Suffix], Title Firstname (Nickname) Middle Middle[,] Suffix [, Suffix]
@@ -41,7 +45,7 @@ Description: Name Parser
         
         It attempts the best guess that can be made with a simple, rule-based approach. 
         Its main use case is English and it is not likely to be useful for languages 
-        that do not share the same structure as English names. It's not perfect, but it 
+        that do not conform to the supported name structure. It's not perfect, but it 
         gets you pretty far.
         
         Installation
@@ -81,6 +85,8 @@ Description: Name Parser
             'de la Vega'
             >>> name.as_dict()
             {'last': 'de la Vega', 'suffix': 'III', 'title': 'Dr.', 'middle': 'Q. Xavier', 'nickname': 'Doc Vega', 'first': 'Juan'}
+            >>> str(name)
+            'Dr. Juan Q. Xavier de la Vega III (Doc Vega)'
             >>> name.string_format = "{first} {last}"
             >>> str(name)
             'Juan de la Vega'
diff --git a/nameparser/__init__.py b/nameparser/__init__.py
index 9e05702..80ae1e7 100644
--- a/nameparser/__init__.py
+++ b/nameparser/__init__.py
@@ -1,4 +1,4 @@
-VERSION = (0, 3, 16)
+VERSION = (0, 4, 0)
 __version__ = '.'.join(map(str, VERSION))
 __author__ = "Derek Gulbranson"
 __author_email__ = 'derek73 at gmail.com'
diff --git a/nameparser/config/__init__.py b/nameparser/config/__init__.py
index 37bb338..7bddf90 100644
--- a/nameparser/config/__init__.py
+++ b/nameparser/config/__init__.py
@@ -37,7 +37,6 @@ from nameparser.util import lc
 from nameparser.config.prefixes import PREFIXES
 from nameparser.config.capitalization import CAPITALIZATION_EXCEPTIONS
 from nameparser.config.conjunctions import CONJUNCTIONS
-from nameparser.config.suffixes import SUFFIXES 
 from nameparser.config.suffixes import SUFFIX_ACRONYMS
 from nameparser.config.suffixes import SUFFIX_NOT_ACRONYMS
 from nameparser.config.titles import TITLES
@@ -171,7 +170,6 @@ class Constants(object):
     
     def __init__(self, 
                     prefixes=PREFIXES, 
-                    suffixes=SUFFIXES,
                     suffix_acronyms=SUFFIX_ACRONYMS,
                     suffix_not_acronyms=SUFFIX_NOT_ACRONYMS,
                     titles=TITLES,
@@ -181,7 +179,6 @@ class Constants(object):
                     regexes=REGEXES
                 ):
         self.prefixes            = SetManager(prefixes)
-        self.suffixes            = SetManager(suffixes)
         self.suffix_acronyms     = SetManager(suffix_acronyms)
         self.suffix_not_acronyms = SetManager(suffix_not_acronyms)
         self.titles              = SetManager(titles)
@@ -194,7 +191,7 @@ class Constants(object):
     @property
     def suffixes_prefixes_titles(self):
         if not self._pst:
-            self._pst = self.prefixes | self.suffixes | self.titles
+            self._pst = self.prefixes | self.suffix_acronyms | self.suffix_not_acronyms | self.titles
         return self._pst
     
     def __repr__(self):
diff --git a/nameparser/config/prefixes.py b/nameparser/config/prefixes.py
index d01d99f..64731e8 100644
--- a/nameparser/config/prefixes.py
+++ b/nameparser/config/prefixes.py
@@ -15,6 +15,7 @@ PREFIXES = set([
     'de',
     'di',
     'dí',
+    'du',
     'ibn',
     'la',
     'le',
diff --git a/nameparser/config/suffixes.py b/nameparser/config/suffixes.py
index b0f1751..b8c4f4b 100644
--- a/nameparser/config/suffixes.py
+++ b/nameparser/config/suffixes.py
@@ -122,7 +122,3 @@ that may or may not have periods between the letters. The parser removes periods
 when matching against these pieces.
 
 """
-SUFFIXES = SUFFIX_ACRONYMS | SUFFIX_NOT_ACRONYMS
-"""
-A union of the sets :py:attr:`SUFFIX_ACRONYMS` and :py:attr:`SUFFIX_NOT_ACRONYMS`
-"""
diff --git a/nameparser/config/titles.py b/nameparser/config/titles.py
index f718a3b..0fb78e6 100644
--- a/nameparser/config/titles.py
+++ b/nameparser/config/titles.py
@@ -16,6 +16,14 @@ FIRST_NAME_TITLES = set([
     'sir',
     'sister',
     'uncle',
+    'sheikh',
+    'sheik',
+    'shaik',
+    'shayk',
+    'shaykh',
+    'shaikh',
+    'cheikh',
+    'shekh',
 ])
 """
 When these titles appear with a single other name, that name is a first name, e.g.
diff --git a/nameparser/parser.py b/nameparser/parser.py
index dd86174..fc2c173 100644
--- a/nameparser/parser.py
+++ b/nameparser/parser.py
@@ -134,12 +134,12 @@ class HumanName(object):
         else:
             _string = "<%(class)s : [\n\ttitle: '%(title)s' \n\tfirst: '%(first)s' \n\tmiddle: '%(middle)s' \n\tlast: '%(last)s' \n\tsuffix: '%(suffix)s'\n\tnickname: '%(nickname)s'\n]>" % {
                 'class': self.__class__.__name__,
-                'title': self.title,
-                'first': self.first,
-                'middle': self.middle,
-                'last': self.last,
-                'suffix': self.suffix,
-                'nickname': self.nickname,
+                'title': self.title or '',
+                'first': self.first or '',
+                'middle': self.middle or '',
+                'last': self.last or '',
+                'suffix': self.suffix or '',
+                'nickname': self.nickname or '',
             }
         if sys.version >= '3':
             return _string
@@ -544,8 +544,8 @@ class HumanName(object):
                 raise TypeError("Name parts must be strings. Got {0}".format(type(part)))
             output += [x.strip(' ,') for x in part.split(' ')]
         
-        # If there's periods, check if it's titles without spaces and add spaces
-        # so they get picked up later as titles.
+        # If part contains periods, check if it's multiple titles or suffixes together without spaces
+        # if so, add the new part with periods to the constants so they get parsed correctly later
         for part in output:
             # if this part has a period not at the beginning or end
             if self.C.regexes.period_not_at_end.match(part):
@@ -559,7 +559,7 @@ class HumanName(object):
                     self.C.titles.add(part)
                     continue
                 if len(list(suffixes)):
-                    self.C.suffixes.add(part)
+                    self.C.suffix_not_acronyms.add(part)
                     continue
         
         return self.join_on_conjunctions(output, additional_parts_count)
@@ -684,12 +684,15 @@ class HumanName(object):
         replacement = lambda m: self.cap_word(m.group(0))
         return self.C.regexes.word.sub(replacement, piece)
 
-    def capitalize(self):
+    def capitalize(self, force=False):
         """
         The HumanName class can try to guess the correct capitalization 
-        of name entered in all upper or lower case. It will not adjust 
-        the case of names entered in mixed case.
+        of name entered in all upper or lower case. By default, it will not adjust 
+        the case of names entered in mixed case. To run capitalization on all names
+        pass the parameter `force=True`.
         
+        :param bool force: force capitalization of strings that include mixed case
+
         **Usage**
         
         .. doctest:: capitalize
@@ -703,10 +706,13 @@ class HumanName(object):
             >>> name.capitalize()
             >>> str(name) 
             'Shirley Maclaine'
+            >>> name.capitalize(force=True)
+            >>> str(name) 
+            'Shirley MacLaine'
         
         """
         name = u(self)
-        if not (name == name.upper() or name == name.lower()):
+        if not force and not (name == name.upper() or name == name.lower()):
             return
         self.title_list  = self.cap_piece(self.title ).split(' ')
         self.first_list  = self.cap_piece(self.first ).split(' ')
diff --git a/tests.py b/tests.py
index 0ca9677..579bf13 100644
--- a/tests.py
+++ b/tests.py
@@ -1517,6 +1517,25 @@ class SuffixesTestCase(HumanNameTestBase):
         self.m(hn.last, "King", hn)
         self.m(hn.suffix, "Jr", hn)
 
+    def test_suffix_with_periods(self):
+        hn = HumanName("John Doe Msc.Ed.")
+        self.m(hn.first,"John", hn)
+        self.m(hn.last,"Doe", hn)
+        self.m(hn.suffix,"Msc.Ed.", hn)
+
+    def test_suffix_with_periods_with_comma(self):
+        hn = HumanName("John Doe, Msc.Ed.")
+        self.m(hn.first,"John", hn)
+        self.m(hn.last,"Doe", hn)
+        self.m(hn.suffix,"Msc.Ed.", hn)
+
+    def test_suffix_with_periods_with_lastname_comma(self):
+        hn = HumanName("Doe, John Msc.Ed.")
+        self.m(hn.first,"John", hn)
+        self.m(hn.last,"Doe", hn)
+        self.m(hn.suffix,"Msc.Ed.", hn)
+
+
 class TitleTestCase(HumanNameTestBase):
 
     def test_last_name_is_also_title(self):
@@ -1719,17 +1738,17 @@ class TitleTestCase(HumanNameTestBase):
         self.m(hn.first, "Jane", hn)
         self.m(hn.last, "Doctor", hn)
 
-    @unittest.expectedFailure
-    def test_title_as_suffix(self):
-        """
-        Semantically, PhD is a title, not a suffix. 
-        http://code.google.com/p/python-nameparser/issues/detail?id=7
-        """
-        hn = HumanName("J. Smith, PhD")
-        self.m(hn.title, "PhD", hn)
-        self.m(hn.first, "J.", hn)
-        self.m(hn.last, "Smith", hn)
+    def test_title_with_periods(self):
+        hn = HumanName("Lt.Gov. John Doe")
+        self.m(hn.title,"Lt.Gov.", hn)
+        self.m(hn.first,"John", hn)
+        self.m(hn.last,"Doe", hn)
 
+    def test_title_with_periods_lastname_comma(self):
+        hn = HumanName("Doe, Lt.Gov. John")
+        self.m(hn.title,"Lt.Gov.", hn)
+        self.m(hn.first,"John", hn)
+        self.m(hn.last,"Doe", hn)
 
 class HumanNameCapitalizationTestCase(HumanNameTestBase):
     def test_capitalization_exception_for_III(self):
@@ -1772,6 +1791,11 @@ class HumanNameCapitalizationTestCase(HumanNameTestBase):
         hn.capitalize()
         self.m(str(hn), 'Shirley Maclaine', hn)
 
+    def test_force_capitalization(self):
+        hn = HumanName('Shirley Maclaine')
+        hn.capitalize(force=True)
+        self.m(str(hn), 'Shirley MacLaine', hn)
+
     def test_capitalize_diacritics(self):
         hn = HumanName('matthëus schmidt')
         hn.capitalize()

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-nameparser.git



More information about the Python-modules-commits mailing list