[Blends-commit] r3347 - /blends/trunk/webtools/blendstasktools.py

Wed May 23 19:57:53 UTC 2012

Author: tille
Date: Wed May 23 19:57:52 2012
New Revision: 3347

URL: http://svn.debian.org/wsvn/blends/?sc=1&rev=3347
Log:
Separate publication parsing code to make it usable also for packages in new and vcs

Modified:
    blends/trunk/webtools/blendstasktools.py

Modified: blends/trunk/webtools/blendstasktools.py
URL: http://svn.debian.org/wsvn/blends/blends/trunk/webtools/blendstasktools.py?rev=3347&op=diff
==============================================================================

--- blends/trunk/webtools/blendstasktools.py (original)
+++ blends/trunk/webtools/blendstasktools.py Wed May 23 19:57:52 2012
@@ -343,9 +343,31 @@
                    p.distribution, p.component, p.version, p.architecture, p.maintainer,
                    p.source, p.section, p.distribution, 'new' AS release, p.component, p.homepage,
                    s.changed_by,
-                   description AS description_en, long_description AS long_description_en
+                   description AS description_en, long_description AS long_description_en,
+         bibyear.value    AS "year",
+         bibtitle.value   AS "title",
+         bibauthor.value  AS "authors",
+         bibdoi.value     AS "doi",
+         bibpmid.value    AS "pubmed",
+         biburl.value     AS "url",
+         bibjournal.value AS "journal",
+         bibvolume.value  AS "volume",
+         bibnumber.value  AS "number",
+         bibpages.value   AS "pages",
+         bibeprint.value  AS "eprint"
                    FROM new_packages p
 		   JOIN new_sources s ON p.source = s.source AND p.version = s.version
+    LEFT OUTER JOIN bibref bibyear    ON p.source = bibyear.source    AND bibyear.rank = 0    AND bibyear.key    = 'year'    AND bibyear.package = ''
+    LEFT OUTER JOIN bibref bibtitle   ON p.source = bibtitle.source   AND bibtitle.rank = 0   AND bibtitle.key   = 'title'   AND bibtitle.package = ''
+    LEFT OUTER JOIN bibref bibauthor  ON p.source = bibauthor.source  AND bibauthor.rank = 0  AND bibauthor.key  = 'author'  AND bibauthor.package = ''
+    LEFT OUTER JOIN bibref bibdoi     ON p.source = bibdoi.source     AND bibdoi.rank = 0     AND bibdoi.key     = 'doi'     AND bibdoi.package = ''
+    LEFT OUTER JOIN bibref bibpmid    ON p.source = bibpmid.source    AND bibpmid.rank = 0    AND bibpmid.key    = 'pmid'    AND bibpmid.package = ''
+    LEFT OUTER JOIN bibref biburl     ON p.source = biburl.source     AND biburl.rank = 0     AND biburl.key     = 'url'     AND biburl.package = ''
+    LEFT OUTER JOIN bibref bibjournal ON p.source = bibjournal.source AND bibjournal.rank = 0 AND bibjournal.key = 'journal' AND bibjournal.package = ''
+    LEFT OUTER JOIN bibref bibvolume  ON p.source = bibvolume.source  AND bibvolume.rank = 0  AND bibvolume.key  = 'volume'  AND bibvolume.package = ''
+    LEFT OUTER JOIN bibref bibnumber  ON p.source = bibnumber.source  AND bibnumber.rank = 0  AND bibnumber.key  = 'number'  AND bibnumber.package = ''
+    LEFT OUTER JOIN bibref bibpages   ON p.source = bibpages.source   AND bibpages.rank = 0   AND bibpages.key   = 'pages'   AND bibpages.package = ''
+    LEFT OUTER JOIN bibref bibeprint  ON p.source = bibeprint.source  AND bibeprint.rank = 0  AND bibeprint.key  = 'eprint'  AND bibeprint.package = ''
                    WHERE (p.package, p.version) IN  
                          (SELECT package, max(version) FROM
                    new_packages WHERE package = ANY ($1) GROUP BY package)"""
@@ -362,9 +384,31 @@
                    p.vcs_browser AS "vcs-browser",
                    p.blend,
                    p.license,
-                   p.wnpp
+                   p.wnpp,
+         bibyear.value    AS "year",
+         bibtitle.value   AS "title",
+         bibauthor.value  AS "authors",
+         bibdoi.value     AS "doi",
+         bibpmid.value    AS "pubmed",
+         biburl.value     AS "url",
+         bibjournal.value AS "journal",
+         bibvolume.value  AS "volume",
+         bibnumber.value  AS "number",
+         bibpages.value   AS "pages",
+         bibeprint.value  AS "eprint"
                    FROM blends_prospectivepackages p
-                   WHERE package = ANY ($1)"""
+    LEFT OUTER JOIN bibref bibyear    ON p.source = bibyear.source    AND bibyear.rank = 0    AND bibyear.key    = 'year'    AND bibyear.package = ''
+    LEFT OUTER JOIN bibref bibtitle   ON p.source = bibtitle.source   AND bibtitle.rank = 0   AND bibtitle.key   = 'title'   AND bibtitle.package = ''
+    LEFT OUTER JOIN bibref bibauthor  ON p.source = bibauthor.source  AND bibauthor.rank = 0  AND bibauthor.key  = 'author'  AND bibauthor.package = ''
+    LEFT OUTER JOIN bibref bibdoi     ON p.source = bibdoi.source     AND bibdoi.rank = 0     AND bibdoi.key     = 'doi'     AND bibdoi.package = ''
+    LEFT OUTER JOIN bibref bibpmid    ON p.source = bibpmid.source    AND bibpmid.rank = 0    AND bibpmid.key    = 'pmid'    AND bibpmid.package = ''
+    LEFT OUTER JOIN bibref biburl     ON p.source = biburl.source     AND biburl.rank = 0     AND biburl.key     = 'url'     AND biburl.package = ''
+    LEFT OUTER JOIN bibref bibjournal ON p.source = bibjournal.source AND bibjournal.rank = 0 AND bibjournal.key = 'journal' AND bibjournal.package = ''
+    LEFT OUTER JOIN bibref bibvolume  ON p.source = bibvolume.source  AND bibvolume.rank = 0  AND bibvolume.key  = 'volume'  AND bibvolume.package = ''
+    LEFT OUTER JOIN bibref bibnumber  ON p.source = bibnumber.source  AND bibnumber.rank = 0  AND bibnumber.key  = 'number'  AND bibnumber.package = ''
+    LEFT OUTER JOIN bibref bibpages   ON p.source = bibpages.source   AND bibpages.rank = 0   AND bibpages.key   = 'pages'   AND bibpages.package = ''
+    LEFT OUTER JOIN bibref bibeprint  ON p.source = bibeprint.source  AND bibeprint.rank = 0  AND bibeprint.key  = 'eprint'  AND bibeprint.package = ''
+                   WHERE p.package = ANY ($1)"""
 _execute_udd_query(query)
 
 # This prepared statement is called only once but it makes sense to mention it in the
@@ -698,6 +742,61 @@
         #    ret += ", desc['en']:"   + str(self.desc['en'])
         return ret
 
+    def SetPublications(self, row):
+        for pub in ("year", "title", "authors", "doi", "pubmed", "url", "journal", "volume", "number", "pages", "eprint" ):
+            if row[pub]:
+                if pub == "pages":
+                    row[pub] = re.sub("--", "-", row[pub])
+                if (pub == "authors" and row[pub].count(" and ") or row[pub].count(" AND ")):
+                    # assume "authors" string is a regular BibTeX "and" separated list of authors
+                    row[pub] = re.sub("AND", "and", row[pub].strip())
+                    authors_list = row[pub].split(" and ")
+                    # normalize several BibTeX styles to "First Last, First Last and First Last":
+                    # 1. "First Last and First Last and First Last"
+                    # 2. "Last, First and Last, First and Last, First"
+                    # 3. "First Last, First Last and First Last"
+                    authors_string = ""
+                    while (authors_list):
+                        author = authors_list.pop(0)
+                        if (author.count(",") > 1):
+                            # 3. "First Last, First Last and First Last"
+                            # authors string is already in desired format, keep it
+                            authors_string = row[pub].strip()
+                            break
+                        elif (row[pub].count(",") == row[pub].count(" and ") + 1):
+                            # 2. "Last, First and Last, First and Last, First"
+                            # reverse last and first name
+                            (last, first) = author.split(", ")
+                            full_author = first + " " + last
+                        else:
+                            # 1. "First Last and First Last and First Last"
+                            full_author = author
+                        if (len(authors_list) > 1):
+                            authors_string += full_author + ", "
+                        elif (len(authors_list) > 0):
+                            authors_string += full_author + " and "
+                        else:
+                            authors_string += full_author
+                    if row[pub] != authors_string:
+                        # emergency brake if algorithm fails to detect non-names like '1000 Genome Project Data Processing Subgroup'
+                        if authors_string.count(',') > row[pub].count(' and '):
+                            logger.warning("Refuse to change Author string in %s: '%s'(%i) -> '%s'(%i)", \
+                                            self.pkg, to_unicode(row[pub]), row[pub].count(' and '), to_unicode(authors_string), authors_string.count(','))
+                        else:
+                            logger.debug("Author string changed in %s: '%s' -> '%s'", self.pkg, to_unicode(row[pub]), to_unicode(authors_string))
+                            row[pub] = authors_string
+                if not self.properties.has_key('published'):
+                    self.properties['published'] = {}
+                if self.properties['published'].has_key(pub):
+                    if self.properties['published'][pub] == to_unicode(row[pub]):
+                    	try:
+                           print >>rmpub, "%s: %s: Published-%s: %s" % (self.taskname, self.pkg, pub, to_unicode(row[pub]))
+            	        except UnicodeEncodeError:
+            	           print >>rmpub, "--- %s: %s: Published-%s: some duplicated value featuring encoding problems ---" % (self.taskname, self.pkg, pub)
+            		logger.info("%s/%s: Publication-%s = %s can be removed"  % (self.taskname, self.pkg, pub, to_unicode(row[pub])))
+            	    else:
+            	        logger.info("%s conflicting fields Publication-%s in tasks file with value '%s' and in UDD with value '%s'" % (self.pkg, pub, self.properties['published'][pub], to_unicode(row[pub])))
+                self.properties['published'][pub] = to_unicode(row[pub])
 
 class Tasks:
     # Provide a list of depencencies defined in metapackages
@@ -1046,6 +1145,7 @@
                     logger.error("===> AttributeError in metapackage long %s (lang='%s'): '%s'; ErrTxt: %s" % \
                                      (self.metapkg.pkg, lang, ddtptranslations['long_description_'+lang], err))
                     self.metapkg.desc[lang]['long'] = 'Missing long description'
+
 
     def _AppendDependency2List(self, dep, source):
         # Append dependency which was found in the tasks file if not Ignore / Avoid and
@@ -1505,61 +1605,7 @@
                 # link to packages.debian.org search page to see overview about all
                 # package versions in all releases
                 dep.properties['pkg-url'] = PKGURLMASK % dep.pkg
-
-                for pub in ("year", "title", "authors", "doi", "pubmed", "url", "journal", "volume", "number", "pages", "eprint" ):
-                    if row[pub]:
-                        if pub == "pages":
-                            row[pub] = re.sub("--", "-", row[pub])
-                        if (pub == "authors" and row[pub].count(" and ") or row[pub].count(" AND ")):
-                            # assume "authors" string is a regular BibTeX "and" separated list of authors
-                            row[pub] = re.sub("AND", "and", row[pub].strip())
-                            authors_list = row[pub].split(" and ")
-                            # normalize several BibTeX styles to "First Last, First Last and First Last":
-                            # 1. "First Last and First Last and First Last"
-                            # 2. "Last, First and Last, First and Last, First"
-                            # 3. "First Last, First Last and First Last"
-                            authors_string = ""
-                            while (authors_list):
-                                author = authors_list.pop(0)
-                                if (author.count(",") > 1):
-                                    # 3. "First Last, First Last and First Last"
-                                    # authors string is already in desired format, keep it
-                                    authors_string = row[pub].strip()
-                                    break
-                                elif (row[pub].count(",") == row[pub].count(" and ") + 1):
-                                    # 2. "Last, First and Last, First and Last, First"
-                                    # reverse last and first name
-                                    (last, first) = author.split(", ")
-                                    full_author = first + " " + last
-                                else:
-                                    # 1. "First Last and First Last and First Last"
-                                    full_author = author
-                                if (len(authors_list) > 1):
-                                    authors_string += full_author + ", "
-                                elif (len(authors_list) > 0):
-                                    authors_string += full_author + " and "
-                                else:
-                                    authors_string += full_author
-                            if row[pub] != authors_string:
-                        	# emergency brake if algorithm fails to detect non-names like '1000 Genome Project Data Processing Subgroup'
-                                if authors_string.count(',') > row[pub].count(' and '):
-                            	    logger.warning("Refuse to change Author string in %s: '%s'(%i) -> '%s'(%i)", \
-                            	                    dep.pkg, to_unicode(row[pub]), row[pub].count(' and '), to_unicode(authors_string), authors_string.count(','))
-                            	else:
-                        	    logger.debug("Author string changed in %s: '%s' -> '%s'", dep.pkg, to_unicode(row[pub]), to_unicode(authors_string))
-                                    row[pub] = authors_string
-                        if not dep.properties.has_key('published'):
-                            dep.properties['published'] = {}
-                        if dep.properties['published'].has_key(pub):
-                    	    if dep.properties['published'][pub] == to_unicode(row[pub]):
-                    		try:
-                    	           print >>rmpub, "%s: %s: Published-%s: %s" % (dep.taskname, dep.pkg, pub, to_unicode(row[pub]))
-                    	        except UnicodeEncodeError:
-                    	           print >>rmpub, "--- %s: %s: Published-%s: some duplicated value featuring encoding problems ---" % (dep.taskname, dep.pkg, pub)
-                    		logger.info("%s/%s: Publication-%s = %s can be removed"  % (dep.taskname, dep.pkg, pub, to_unicode(row[pub])))
-                    	    else:
-                    	        logger.info("%s conflicting fields Publication-%s in tasks file with value '%s' and in UDD with value '%s'" % (dep.pkg, pub, dep.properties['published'][pub], to_unicode(row[pub])))
-                        dep.properties['published'][pub] = to_unicode(row[pub])
+                dep.SetPublications(row)
                 for l in languages:
                     if row['description_'+l]:
                         dep.desc[l] = {}