[Python-modules-commits] [python-nameparser] 01/03: Import python-nameparser_0.4.0.orig.tar.gz
Edward Betts
edward at moszumanska.debian.org
Tue Jul 5 10:09:31 UTC 2016
This is an automated email from the git hooks/post-receive script.
edward pushed a commit to branch master
in repository python-nameparser.
commit 47511fcc093c2e0e9f19965b21bcb03190325c3e
Author: Edward Betts <edward at 4angle.com>
Date: Tue Jul 5 09:26:14 2016 +0100
Import python-nameparser_0.4.0.orig.tar.gz
---
PKG-INFO | 12 +++++++++---
README.rst | 10 ++++++++--
nameparser.egg-info/PKG-INFO | 12 +++++++++---
nameparser/__init__.py | 2 +-
nameparser/config/__init__.py | 5 +----
nameparser/config/prefixes.py | 1 +
nameparser/config/suffixes.py | 4 ----
nameparser/config/titles.py | 8 ++++++++
nameparser/parser.py | 32 ++++++++++++++++++-------------
tests.py | 44 +++++++++++++++++++++++++++++++++----------
10 files changed, 90 insertions(+), 40 deletions(-)
diff --git a/PKG-INFO b/PKG-INFO
index 19ddc1a..87a9820 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: nameparser
-Version: 0.3.16
+Version: 0.4.0
Summary: A simple Python module for parsing human names into their individual components.
Home-page: https://github.com/derek73/python-nameparser
Author: Derek Gulbranson
@@ -24,7 +24,11 @@ Description: Name Parser
* hn.suffix
* hn.nickname
- Supports 3 different comma placement variations in the input string.
+ Supported Name Structures
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ The supported name structure is generally "Title First Middle Last Suffix", where all pieces
+ are optional. Comma-separated format like "Last, First" is also supported.
1. Title Firstname "Nickname" Middle Middle Lastname Suffix
2. Lastname [Suffix], Title Firstname (Nickname) Middle Middle[,] Suffix [, Suffix]
@@ -41,7 +45,7 @@ Description: Name Parser
It attempts the best guess that can be made with a simple, rule-based approach.
Its main use case is English and it is not likely to be useful for languages
- that do not share the same structure as English names. It's not perfect, but it
+ that do not conform to the supported name structure. It's not perfect, but it
gets you pretty far.
Installation
@@ -81,6 +85,8 @@ Description: Name Parser
'de la Vega'
>>> name.as_dict()
{'last': 'de la Vega', 'suffix': 'III', 'title': 'Dr.', 'middle': 'Q. Xavier', 'nickname': 'Doc Vega', 'first': 'Juan'}
+ >>> str(name)
+ 'Dr. Juan Q. Xavier de la Vega III (Doc Vega)'
>>> name.string_format = "{first} {last}"
>>> str(name)
'Juan de la Vega'
diff --git a/README.rst b/README.rst
index 6422810..da2265c 100644
--- a/README.rst
+++ b/README.rst
@@ -16,7 +16,11 @@ individual components.
* hn.suffix
* hn.nickname
-Supports 3 different comma placement variations in the input string.
+Supported Name Structures
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The supported name structure is generally "Title First Middle Last Suffix", where all pieces
+are optional. Comma-separated format like "Last, First" is also supported.
1. Title Firstname "Nickname" Middle Middle Lastname Suffix
2. Lastname [Suffix], Title Firstname (Nickname) Middle Middle[,] Suffix [, Suffix]
@@ -33,7 +37,7 @@ of names that are all upper- or lowercase names.
It attempts the best guess that can be made with a simple, rule-based approach.
Its main use case is English and it is not likely to be useful for languages
-that do not share the same structure as English names. It's not perfect, but it
+that do not conform to the supported name structure. It's not perfect, but it
gets you pretty far.
Installation
@@ -73,6 +77,8 @@ Quick Start Example
'de la Vega'
>>> name.as_dict()
{'last': 'de la Vega', 'suffix': 'III', 'title': 'Dr.', 'middle': 'Q. Xavier', 'nickname': 'Doc Vega', 'first': 'Juan'}
+ >>> str(name)
+ 'Dr. Juan Q. Xavier de la Vega III (Doc Vega)'
>>> name.string_format = "{first} {last}"
>>> str(name)
'Juan de la Vega'
diff --git a/nameparser.egg-info/PKG-INFO b/nameparser.egg-info/PKG-INFO
index 19ddc1a..87a9820 100644
--- a/nameparser.egg-info/PKG-INFO
+++ b/nameparser.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: nameparser
-Version: 0.3.16
+Version: 0.4.0
Summary: A simple Python module for parsing human names into their individual components.
Home-page: https://github.com/derek73/python-nameparser
Author: Derek Gulbranson
@@ -24,7 +24,11 @@ Description: Name Parser
* hn.suffix
* hn.nickname
- Supports 3 different comma placement variations in the input string.
+ Supported Name Structures
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ The supported name structure is generally "Title First Middle Last Suffix", where all pieces
+ are optional. Comma-separated format like "Last, First" is also supported.
1. Title Firstname "Nickname" Middle Middle Lastname Suffix
2. Lastname [Suffix], Title Firstname (Nickname) Middle Middle[,] Suffix [, Suffix]
@@ -41,7 +45,7 @@ Description: Name Parser
It attempts the best guess that can be made with a simple, rule-based approach.
Its main use case is English and it is not likely to be useful for languages
- that do not share the same structure as English names. It's not perfect, but it
+ that do not conform to the supported name structure. It's not perfect, but it
gets you pretty far.
Installation
@@ -81,6 +85,8 @@ Description: Name Parser
'de la Vega'
>>> name.as_dict()
{'last': 'de la Vega', 'suffix': 'III', 'title': 'Dr.', 'middle': 'Q. Xavier', 'nickname': 'Doc Vega', 'first': 'Juan'}
+ >>> str(name)
+ 'Dr. Juan Q. Xavier de la Vega III (Doc Vega)'
>>> name.string_format = "{first} {last}"
>>> str(name)
'Juan de la Vega'
diff --git a/nameparser/__init__.py b/nameparser/__init__.py
index 9e05702..80ae1e7 100644
--- a/nameparser/__init__.py
+++ b/nameparser/__init__.py
@@ -1,4 +1,4 @@
-VERSION = (0, 3, 16)
+VERSION = (0, 4, 0)
__version__ = '.'.join(map(str, VERSION))
__author__ = "Derek Gulbranson"
__author_email__ = 'derek73 at gmail.com'
diff --git a/nameparser/config/__init__.py b/nameparser/config/__init__.py
index 37bb338..7bddf90 100644
--- a/nameparser/config/__init__.py
+++ b/nameparser/config/__init__.py
@@ -37,7 +37,6 @@ from nameparser.util import lc
from nameparser.config.prefixes import PREFIXES
from nameparser.config.capitalization import CAPITALIZATION_EXCEPTIONS
from nameparser.config.conjunctions import CONJUNCTIONS
-from nameparser.config.suffixes import SUFFIXES
from nameparser.config.suffixes import SUFFIX_ACRONYMS
from nameparser.config.suffixes import SUFFIX_NOT_ACRONYMS
from nameparser.config.titles import TITLES
@@ -171,7 +170,6 @@ class Constants(object):
def __init__(self,
prefixes=PREFIXES,
- suffixes=SUFFIXES,
suffix_acronyms=SUFFIX_ACRONYMS,
suffix_not_acronyms=SUFFIX_NOT_ACRONYMS,
titles=TITLES,
@@ -181,7 +179,6 @@ class Constants(object):
regexes=REGEXES
):
self.prefixes = SetManager(prefixes)
- self.suffixes = SetManager(suffixes)
self.suffix_acronyms = SetManager(suffix_acronyms)
self.suffix_not_acronyms = SetManager(suffix_not_acronyms)
self.titles = SetManager(titles)
@@ -194,7 +191,7 @@ class Constants(object):
@property
def suffixes_prefixes_titles(self):
if not self._pst:
- self._pst = self.prefixes | self.suffixes | self.titles
+ self._pst = self.prefixes | self.suffix_acronyms | self.suffix_not_acronyms | self.titles
return self._pst
def __repr__(self):
diff --git a/nameparser/config/prefixes.py b/nameparser/config/prefixes.py
index d01d99f..64731e8 100644
--- a/nameparser/config/prefixes.py
+++ b/nameparser/config/prefixes.py
@@ -15,6 +15,7 @@ PREFIXES = set([
'de',
'di',
'dí',
+ 'du',
'ibn',
'la',
'le',
diff --git a/nameparser/config/suffixes.py b/nameparser/config/suffixes.py
index b0f1751..b8c4f4b 100644
--- a/nameparser/config/suffixes.py
+++ b/nameparser/config/suffixes.py
@@ -122,7 +122,3 @@ that may or may not have periods between the letters. The parser removes periods
when matching against these pieces.
"""
-SUFFIXES = SUFFIX_ACRONYMS | SUFFIX_NOT_ACRONYMS
-"""
-A union of the sets :py:attr:`SUFFIX_ACRONYMS` and :py:attr:`SUFFIX_NOT_ACRONYMS`
-"""
diff --git a/nameparser/config/titles.py b/nameparser/config/titles.py
index f718a3b..0fb78e6 100644
--- a/nameparser/config/titles.py
+++ b/nameparser/config/titles.py
@@ -16,6 +16,14 @@ FIRST_NAME_TITLES = set([
'sir',
'sister',
'uncle',
+ 'sheikh',
+ 'sheik',
+ 'shaik',
+ 'shayk',
+ 'shaykh',
+ 'shaikh',
+ 'cheikh',
+ 'shekh',
])
"""
When these titles appear with a single other name, that name is a first name, e.g.
diff --git a/nameparser/parser.py b/nameparser/parser.py
index dd86174..fc2c173 100644
--- a/nameparser/parser.py
+++ b/nameparser/parser.py
@@ -134,12 +134,12 @@ class HumanName(object):
else:
_string = "<%(class)s : [\n\ttitle: '%(title)s' \n\tfirst: '%(first)s' \n\tmiddle: '%(middle)s' \n\tlast: '%(last)s' \n\tsuffix: '%(suffix)s'\n\tnickname: '%(nickname)s'\n]>" % {
'class': self.__class__.__name__,
- 'title': self.title,
- 'first': self.first,
- 'middle': self.middle,
- 'last': self.last,
- 'suffix': self.suffix,
- 'nickname': self.nickname,
+ 'title': self.title or '',
+ 'first': self.first or '',
+ 'middle': self.middle or '',
+ 'last': self.last or '',
+ 'suffix': self.suffix or '',
+ 'nickname': self.nickname or '',
}
if sys.version >= '3':
return _string
@@ -544,8 +544,8 @@ class HumanName(object):
raise TypeError("Name parts must be strings. Got {0}".format(type(part)))
output += [x.strip(' ,') for x in part.split(' ')]
- # If there's periods, check if it's titles without spaces and add spaces
- # so they get picked up later as titles.
+ # If part contains periods, check if it's multiple titles or suffixes together without spaces
+ # if so, add the new part with periods to the constants so they get parsed correctly later
for part in output:
# if this part has a period not at the beginning or end
if self.C.regexes.period_not_at_end.match(part):
@@ -559,7 +559,7 @@ class HumanName(object):
self.C.titles.add(part)
continue
if len(list(suffixes)):
- self.C.suffixes.add(part)
+ self.C.suffix_not_acronyms.add(part)
continue
return self.join_on_conjunctions(output, additional_parts_count)
@@ -684,12 +684,15 @@ class HumanName(object):
replacement = lambda m: self.cap_word(m.group(0))
return self.C.regexes.word.sub(replacement, piece)
- def capitalize(self):
+ def capitalize(self, force=False):
"""
The HumanName class can try to guess the correct capitalization
- of name entered in all upper or lower case. It will not adjust
- the case of names entered in mixed case.
+ of name entered in all upper or lower case. By default, it will not adjust
+ the case of names entered in mixed case. To run capitalization on all names
+ pass the parameter `force=True`.
+ :param bool force: force capitalization of strings that include mixed case
+
**Usage**
.. doctest:: capitalize
@@ -703,10 +706,13 @@ class HumanName(object):
>>> name.capitalize()
>>> str(name)
'Shirley Maclaine'
+ >>> name.capitalize(force=True)
+ >>> str(name)
+ 'Shirley MacLaine'
"""
name = u(self)
- if not (name == name.upper() or name == name.lower()):
+ if not force and not (name == name.upper() or name == name.lower()):
return
self.title_list = self.cap_piece(self.title ).split(' ')
self.first_list = self.cap_piece(self.first ).split(' ')
diff --git a/tests.py b/tests.py
index 0ca9677..579bf13 100644
--- a/tests.py
+++ b/tests.py
@@ -1517,6 +1517,25 @@ class SuffixesTestCase(HumanNameTestBase):
self.m(hn.last, "King", hn)
self.m(hn.suffix, "Jr", hn)
+ def test_suffix_with_periods(self):
+ hn = HumanName("John Doe Msc.Ed.")
+ self.m(hn.first,"John", hn)
+ self.m(hn.last,"Doe", hn)
+ self.m(hn.suffix,"Msc.Ed.", hn)
+
+ def test_suffix_with_periods_with_comma(self):
+ hn = HumanName("John Doe, Msc.Ed.")
+ self.m(hn.first,"John", hn)
+ self.m(hn.last,"Doe", hn)
+ self.m(hn.suffix,"Msc.Ed.", hn)
+
+ def test_suffix_with_periods_with_lastname_comma(self):
+ hn = HumanName("Doe, John Msc.Ed.")
+ self.m(hn.first,"John", hn)
+ self.m(hn.last,"Doe", hn)
+ self.m(hn.suffix,"Msc.Ed.", hn)
+
+
class TitleTestCase(HumanNameTestBase):
def test_last_name_is_also_title(self):
@@ -1719,17 +1738,17 @@ class TitleTestCase(HumanNameTestBase):
self.m(hn.first, "Jane", hn)
self.m(hn.last, "Doctor", hn)
- @unittest.expectedFailure
- def test_title_as_suffix(self):
- """
- Semantically, PhD is a title, not a suffix.
- http://code.google.com/p/python-nameparser/issues/detail?id=7
- """
- hn = HumanName("J. Smith, PhD")
- self.m(hn.title, "PhD", hn)
- self.m(hn.first, "J.", hn)
- self.m(hn.last, "Smith", hn)
+ def test_title_with_periods(self):
+ hn = HumanName("Lt.Gov. John Doe")
+ self.m(hn.title,"Lt.Gov.", hn)
+ self.m(hn.first,"John", hn)
+ self.m(hn.last,"Doe", hn)
+ def test_title_with_periods_lastname_comma(self):
+ hn = HumanName("Doe, Lt.Gov. John")
+ self.m(hn.title,"Lt.Gov.", hn)
+ self.m(hn.first,"John", hn)
+ self.m(hn.last,"Doe", hn)
class HumanNameCapitalizationTestCase(HumanNameTestBase):
def test_capitalization_exception_for_III(self):
@@ -1772,6 +1791,11 @@ class HumanNameCapitalizationTestCase(HumanNameTestBase):
hn.capitalize()
self.m(str(hn), 'Shirley Maclaine', hn)
+ def test_force_capitalization(self):
+ hn = HumanName('Shirley Maclaine')
+ hn.capitalize(force=True)
+ self.m(str(hn), 'Shirley MacLaine', hn)
+
def test_capitalize_diacritics(self):
hn = HumanName('matthëus schmidt')
hn.capitalize()
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-nameparser.git
More information about the Python-modules-commits
mailing list