[Blends-commit] r3316 - /blends/trunk/webtools/blendstasktools.py

tille at users.alioth.debian.org tille at users.alioth.debian.org
Mon May 7 19:51:15 UTC 2012


Author: tille
Date: Mon May  7 19:51:14 2012
New Revision: 3316

URL: http://svn.debian.org/wsvn/blends/?sc=1&rev=3316
Log:
Apply patch from Michael Banck to normalise output of authors field
  My changes to Michaels patch:
   - Add logging about changed authors fields as INFO
   - Fixed some wrong syntax in split method
   - Add emergency break once more ',' found than the string had ' and ' strings before and do not change in this case

Modified:
    blends/trunk/webtools/blendstasktools.py

Modified: blends/trunk/webtools/blendstasktools.py
URL: http://svn.debian.org/wsvn/blends/blends/trunk/webtools/blendstasktools.py?rev=3316&op=diff
==============================================================================
--- blends/trunk/webtools/blendstasktools.py (original)
+++ blends/trunk/webtools/blendstasktools.py Mon May  7 19:51:14 2012
@@ -1488,6 +1488,44 @@
                     if row[pub]:
                         if pub == "pages":
                             row[pub] = re.sub("--", "-", row[pub])
+                        if (pub == "authors" and row[pub].count(" and ") or row[pub].count(" AND ")):
+                            # assume "authors" string is a regular BibTeX "and" separated list of authors
+                            row[pub] = re.sub("AND", "and", row[pub].strip())
+                            authors_list = row[pub].split(" and ")
+                            # normalize several BibTeX styles to "First Last, First Last and First Last":
+                            # 1. "First Last and First Last and First Last"
+                            # 2. "Last, First and Last, First and Last, First"
+                            # 3. "First Last, First Last and First Last"
+                            authors_string = ""
+                            while (authors_list):
+                                author = authors_list.pop(0)
+                                if (author.count(",") > 1):
+                                    # 3. "First Last, First Last and First Last"
+                                    # authors string is already in desired format, keep it
+                                    authors_string = row[pub].strip()
+                                    break
+                                elif (row[pub].count(",") == row[pub].count(" and ") + 1):
+                                    # 2. "Last, First and Last, First and Last, First"
+                                    # reverse last and first name
+                                    (last, first) = author.split(", ")
+                                    full_author = first + " " + last
+                                else:
+                                    # 1. "First Last and First Last and First Last"
+                                    full_author = author
+                                if (len(authors_list) > 1):
+                                    authors_string += full_author + ", "
+                                elif (len(authors_list) > 0):
+                                    authors_string += full_author + " and "
+                                else:
+                                    authors_string += full_author
+                            if row[pub] != authors_string:
+                        	# emergency brake if algorithm fails to detect non-names like '1000 Genome Project Data Processing Subgroup'
+                                if authors_string.count(',') > row[pub].count(' and '):
+                            	    logger.warning("Refuse to change Author string in %s: '%s'(%i) -> '%s'(%i)", \
+                            	                    dep.pkg, to_unicode(row[pub]), row[pub].count(' and '), to_unicode(authors_string), authors_string.count(','))
+                            	else:
+                        	    logger.info("Author string changed in %s: '%s' -> '%s'", dep.pkg, to_unicode(row[pub]), to_unicode(authors_string))
+                                    row[pub] = authors_string
                         if not dep.properties.has_key('published'):
                             dep.properties['published'] = {}
                         if dep.properties['published'].has_key(pub):




More information about the Blends-commit mailing list