[Blends-commit] r3316 - /blends/trunk/webtools/blendstasktools.py
tille at users.alioth.debian.org
tille at users.alioth.debian.org
Mon May 7 19:51:15 UTC 2012
Author: tille
Date: Mon May 7 19:51:14 2012
New Revision: 3316
URL: http://svn.debian.org/wsvn/blends/?sc=1&rev=3316
Log:
Apply patch from Michael Banck to normalise output of authors field
My changes to Michaels patch:
- Add logging about changed authors fields as INFO
- Fixed some wrong syntax in split method
- Add emergency break once more ',' found than the string had ' and ' strings before and do not change in this case
Modified:
blends/trunk/webtools/blendstasktools.py
Modified: blends/trunk/webtools/blendstasktools.py
URL: http://svn.debian.org/wsvn/blends/blends/trunk/webtools/blendstasktools.py?rev=3316&op=diff
==============================================================================
--- blends/trunk/webtools/blendstasktools.py (original)
+++ blends/trunk/webtools/blendstasktools.py Mon May 7 19:51:14 2012
@@ -1488,6 +1488,44 @@
if row[pub]:
if pub == "pages":
row[pub] = re.sub("--", "-", row[pub])
+ if (pub == "authors" and row[pub].count(" and ") or row[pub].count(" AND ")):
+ # assume "authors" string is a regular BibTeX "and" separated list of authors
+ row[pub] = re.sub("AND", "and", row[pub].strip())
+ authors_list = row[pub].split(" and ")
+ # normalize several BibTeX styles to "First Last, First Last and First Last":
+ # 1. "First Last and First Last and First Last"
+ # 2. "Last, First and Last, First and Last, First"
+ # 3. "First Last, First Last and First Last"
+ authors_string = ""
+ while (authors_list):
+ author = authors_list.pop(0)
+ if (author.count(",") > 1):
+ # 3. "First Last, First Last and First Last"
+ # authors string is already in desired format, keep it
+ authors_string = row[pub].strip()
+ break
+ elif (row[pub].count(",") == row[pub].count(" and ") + 1):
+ # 2. "Last, First and Last, First and Last, First"
+ # reverse last and first name
+ (last, first) = author.split(", ")
+ full_author = first + " " + last
+ else:
+ # 1. "First Last and First Last and First Last"
+ full_author = author
+ if (len(authors_list) > 1):
+ authors_string += full_author + ", "
+ elif (len(authors_list) > 0):
+ authors_string += full_author + " and "
+ else:
+ authors_string += full_author
+ if row[pub] != authors_string:
+ # emergency brake if algorithm fails to detect non-names like '1000 Genome Project Data Processing Subgroup'
+ if authors_string.count(',') > row[pub].count(' and '):
+ logger.warning("Refuse to change Author string in %s: '%s'(%i) -> '%s'(%i)", \
+ dep.pkg, to_unicode(row[pub]), row[pub].count(' and '), to_unicode(authors_string), authors_string.count(','))
+ else:
+ logger.info("Author string changed in %s: '%s' -> '%s'", dep.pkg, to_unicode(row[pub]), to_unicode(authors_string))
+ row[pub] = authors_string
if not dep.properties.has_key('published'):
dep.properties['published'] = {}
if dep.properties['published'].has_key(pub):
More information about the Blends-commit
mailing list