[Python-modules-commits] [python-nameparser] 01/03: New upstream version 0.5.5
Edward Betts
edward at moszumanska.debian.org
Mon Jan 15 22:42:29 UTC 2018
This is an automated email from the git hooks/post-receive script.
edward pushed a commit to branch debian/master
in repository python-nameparser.
commit 308ac85297a25a3f8595af16091cc1e8f84e5ac3
Author: Edward Betts <edward at 4angle.com>
Date: Mon Jan 15 19:24:45 2018 +0000
New upstream version 0.5.5
---
PKG-INFO | 3 ++-
nameparser.egg-info/PKG-INFO | 3 ++-
nameparser/__init__.py | 2 +-
nameparser/config/__init__.py | 22 ++++++++++++++++------
nameparser/config/prefixes.py | 11 ++++++++---
nameparser/config/regexes.py | 2 +-
nameparser/config/suffixes.py | 2 ++
nameparser/config/titles.py | 1 +
nameparser/parser.py | 23 ++++++++++++++---------
tests.py | 17 ++++++++++++++++-
10 files changed, 63 insertions(+), 23 deletions(-)
diff --git a/PKG-INFO b/PKG-INFO
index 5cad489..e7c9987 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,11 +1,12 @@
Metadata-Version: 1.1
Name: nameparser
-Version: 0.5.3
+Version: 0.5.5
Summary: A simple Python module for parsing human names into their individual components.
Home-page: https://github.com/derek73/python-nameparser
Author: Derek Gulbranson
Author-email: derek73 at gmail.com
License: LGPL
+Description-Content-Type: UNKNOWN
Description: Name Parser
===========
diff --git a/nameparser.egg-info/PKG-INFO b/nameparser.egg-info/PKG-INFO
index 5cad489..e7c9987 100644
--- a/nameparser.egg-info/PKG-INFO
+++ b/nameparser.egg-info/PKG-INFO
@@ -1,11 +1,12 @@
Metadata-Version: 1.1
Name: nameparser
-Version: 0.5.3
+Version: 0.5.5
Summary: A simple Python module for parsing human names into their individual components.
Home-page: https://github.com/derek73/python-nameparser
Author: Derek Gulbranson
Author-email: derek73 at gmail.com
License: LGPL
+Description-Content-Type: UNKNOWN
Description: Name Parser
===========
diff --git a/nameparser/__init__.py b/nameparser/__init__.py
index ea1125d..75e716c 100644
--- a/nameparser/__init__.py
+++ b/nameparser/__init__.py
@@ -1,4 +1,4 @@
-VERSION = (0, 5, 3)
+VERSION = (0, 5, 5)
__version__ = '.'.join(map(str, VERSION))
__author__ = "Derek Gulbranson"
__author_email__ = 'derek73 at gmail.com'
diff --git a/nameparser/config/__init__.py b/nameparser/config/__init__.py
index 7bddf90..3b11e88 100644
--- a/nameparser/config/__init__.py
+++ b/nameparser/config/__init__.py
@@ -43,6 +43,8 @@ from nameparser.config.titles import TITLES
from nameparser.config.titles import FIRST_NAME_TITLES
from nameparser.config.regexes import REGEXES
+DEFAULT_ENCODING = 'UTF-8'
+
class SetManager(collections.Set):
'''
Easily add and remove config variables per module or instance. Subclass of
@@ -84,15 +86,23 @@ class SetManager(collections.Set):
self.count = c + 1
return getattr(self, self.elements[c]) or next(self)
+ def add_with_encoding(self, s, encoding=None):
+ """
+ Add the lower case and no-period version of the string to the set. Pass an
+ explicit `encoding` parameter to specify the encoding of binary strings that
+ are not DEFAULT_ENCODING (UTF-8).
+ """
+ encoding = encoding or sys.stdin.encoding or DEFAULT_ENCODING
+ if type(s) == binary_type:
+ s = s.decode(encoding)
+ self.elements.add(lc(s))
+
def add(self, *strings):
"""
Add the lower case and no-period version of the string arguments to the set.
- Returns ``self`` for chaining.
+ Can pass a list of strings. Returns ``self`` for chaining.
"""
- for s in strings:
- if type(s) == binary_type:
- s = s.decode(sys.stdin.encoding)
- self.elements.add(lc(s))
+ [self.add_with_encoding(s) for s in strings]
return self
def remove(self, *strings):
@@ -193,7 +203,7 @@ class Constants(object):
if not self._pst:
self._pst = self.prefixes | self.suffix_acronyms | self.suffix_not_acronyms | self.titles
return self._pst
-
+
def __repr__(self):
return "<Constants() instance>"
diff --git a/nameparser/config/prefixes.py b/nameparser/config/prefixes.py
index 64731e8..21c82fa 100644
--- a/nameparser/config/prefixes.py
+++ b/nameparser/config/prefixes.py
@@ -4,18 +4,23 @@ from __future__ import unicode_literals
#: Name pieces that appear before a last name. They join to the piece that follows them to make one new piece.
PREFIXES = set([
'abu',
- 'bon',
'bin',
+ 'bon',
'da',
'dal',
'de',
+ 'degli',
+ 'dei',
'del',
'dela',
+ 'della',
+ 'delle',
+ 'delli',
+ 'dello',
'der',
- 'de',
'di',
- 'dí',
'du',
+ 'dí',
'ibn',
'la',
'le',
diff --git a/nameparser/config/regexes.py b/nameparser/config/regexes.py
index 51a6ed2..42da85d 100644
--- a/nameparser/config/regexes.py
+++ b/nameparser/config/regexes.py
@@ -21,7 +21,7 @@ except re.error:
REGEXES = set([
("spaces", re.compile(r"\s+", re.U)),
("word", re.compile(r"(\w|\.)+", re.U)),
- ("mac", re.compile(r'^(ma?c)(\w+)', re.I | re.U)),
+ ("mac", re.compile(r'^(ma?c)(\w{2,})', re.I | re.U)),
("initial", re.compile(r'^(\w\.|[A-Z])?$', re.U)),
("nickname", re.compile(r'\s*?[\("](.+?)[\)"]', re.U)),
("roman_numeral", re.compile(r'^(X|IX|IV|V?I{0,3})$', re.I | re.U)),
diff --git a/nameparser/config/suffixes.py b/nameparser/config/suffixes.py
index b8c4f4b..c058ce6 100644
--- a/nameparser/config/suffixes.py
+++ b/nameparser/config/suffixes.py
@@ -2,6 +2,7 @@
from __future__ import unicode_literals
SUFFIX_NOT_ACRONYMS = set([
+ 'dr',
'esq',
'esquire',
'jr',
@@ -72,6 +73,7 @@ SUFFIX_ACRONYMS = set([
'idsm',
'iom',
'iso',
+ 'jd',
'kbe',
'kcb',
'kcie',
diff --git a/nameparser/config/titles.py b/nameparser/config/titles.py
index 0f37a19..131738f 100644
--- a/nameparser/config/titles.py
+++ b/nameparser/config/titles.py
@@ -618,6 +618,7 @@ TITLES = FIRST_NAME_TITLES | set([
'warden',
'warrant',
'wing',
+ 'wm',
'wo-1',
'wo1',
'wo2',
diff --git a/nameparser/parser.py b/nameparser/parser.py
index 55d85df..1370cb2 100644
--- a/nameparser/parser.py
+++ b/nameparser/parser.py
@@ -11,6 +11,7 @@ from nameparser.util import lc
from nameparser.util import log
from nameparser.config import CONSTANTS
from nameparser.config import Constants
+from nameparser.config import DEFAULT_ENCODING
ENCODING = 'utf-8'
@@ -69,13 +70,13 @@ class HumanName(object):
unparsable = True
_full_name = ''
- def __init__(self, full_name="", constants=CONSTANTS, encoding=ENCODING,
+ def __init__(self, full_name="", constants=CONSTANTS, encoding=DEFAULT_ENCODING,
string_format=None):
self.C = constants
if type(self.C) is not type(CONSTANTS):
self.C = Constants()
- self.ENCODING = encoding
+ self.encoding = encoding
self.string_format = string_format or self.C.string_format
# full_name setter triggers the parse
self.full_name = full_name
@@ -127,7 +128,7 @@ class HumanName(object):
if self.string_format:
# string_format = "{title} {first} {middle} {last} {suffix} ({nickname})"
_s = self.string_format.format(**self.as_dict())
- # remove trailing punctation from missing nicknames
+ # remove trailing punctuation from missing nicknames
_s = _s.replace(str(self.C.empty_attribute_default),'').replace(" ()","").replace(" ''","").replace(' ""',"")
return self.collapse_whitespace(_s).strip(', ')
return " ".join(self)
@@ -135,7 +136,7 @@ class HumanName(object):
def __str__(self):
if sys.version >= '3':
return self.__unicode__()
- return self.__unicode__().encode(self.ENCODING)
+ return self.__unicode__().encode(self.encoding)
def __repr__(self):
if self.unparsable:
@@ -152,7 +153,7 @@ class HumanName(object):
}
if sys.version >= '3':
return _string
- return _string.encode(self.ENCODING)
+ return _string.encode(self.encoding)
def as_dict(self, include_empty=True):
"""
@@ -355,7 +356,7 @@ class HumanName(object):
self.original = value
self._full_name = value
if isinstance(value, binary_type):
- self._full_name = value.decode(self.ENCODING)
+ self._full_name = value.decode(self.encoding)
self.parse_full_name()
def collapse_whitespace(self, string):
@@ -640,7 +641,7 @@ class HumanName(object):
"""
length = len(pieces) + additional_parts_count
- # don't join on conjuctions if there's only 2 parts
+ # don't join on conjunctions if there's only 2 parts
if length < 3:
return pieces
@@ -657,7 +658,7 @@ class HumanName(object):
for i, val in enumerate(conj_index):
try:
if conj_index[i+1] == val+1:
- contiguous_conj_i += [val]
+ contiguous_conj_i += [val]
except IndexError:
pass
@@ -679,7 +680,11 @@ class HumanName(object):
for i in reversed(delete_i):
# delete pieces in reverse order or the index changes on each delete
del pieces[i]
-
+
+ if len(pieces) == 1:
+ # if there's only one piece left, nothing left to do
+ return pieces
+
# refresh conjunction index locations
conj_index = [i for i, piece in enumerate(pieces) if self.is_conjunction(piece)]
diff --git a/tests.py b/tests.py
index 983eed8..3123a6f 100644
--- a/tests.py
+++ b/tests.py
@@ -189,6 +189,7 @@ class HumanNamePythonTests(HumanNameTestBase):
self.m(hn.first, "", hn)
self.m(hn.last, "", hn)
+
class FirstNameHandlingTests(HumanNameTestBase):
def test_first_name(self):
hn = HumanName("Andrew")
@@ -1058,7 +1059,6 @@ class HumanNameBruteForceTests(HumanNameTestBase):
self.m(hn.last, "Almighty", hn)
-
class HumanNameConjunctionTestCase(HumanNameTestBase):
# Last name with conjunction
def test_last_name_with_conjunction(self):
@@ -1244,6 +1244,11 @@ class HumanNameConjunctionTestCase(HumanNameTestBase):
# if you want to be technical, Queen is in FIRST_NAME_TITLES
self.m(hn.first, "Elizabeth", hn)
+ def test_name_is_conjunctions(self):
+ hn = HumanName("e and e")
+ self.m(hn.first, "e and e", hn)
+
+
class ConstantsCustomization(HumanNameTestBase):
def test_add_title(self):
@@ -1335,6 +1340,12 @@ class ConstantsCustomization(HumanNameTestBase):
hn.C.empty_attribute_default = None
self.assertEqual('', str(hn), hn)
+ def test_add_constant_with_explicit_encoding(self):
+ c = Constants()
+ c.titles.add_with_encoding(b'b\351ck', encoding='latin_1')
+ self.assertIn('béck', c.titles)
+
+
class HumanNameNicknameTestCase(HumanNameTestBase):
# https://code.google.com/p/python-nameparser/issues/detail?id=33
def test_nickname_in_parenthesis(self):
@@ -1858,6 +1869,10 @@ class HumanNameCapitalizationTestCase(HumanNameTestBase):
hn.capitalize()
self.m(str(hn), 'Ronald McDonald', hn)
+ def test_short_names_with_mac(self):
+ hn = HumanName('mack johnson')
+ hn.capitalize()
+ self.m(str(hn), 'Mack Johnson', hn)
class HumanNameOutputFormatTests(HumanNameTestBase):
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-nameparser.git
More information about the Python-modules-commits
mailing list