[Blends-commit] r3347 - /blends/trunk/webtools/blendstasktools.py
tille at users.alioth.debian.org
tille at users.alioth.debian.org
Wed May 23 19:57:53 UTC 2012
Author: tille
Date: Wed May 23 19:57:52 2012
New Revision: 3347
URL: http://svn.debian.org/wsvn/blends/?sc=1&rev=3347
Log:
Separate publication parsing code to make it usable also for packages in new and vcs
Modified:
blends/trunk/webtools/blendstasktools.py
Modified: blends/trunk/webtools/blendstasktools.py
URL: http://svn.debian.org/wsvn/blends/blends/trunk/webtools/blendstasktools.py?rev=3347&op=diff
==============================================================================
--- blends/trunk/webtools/blendstasktools.py (original)
+++ blends/trunk/webtools/blendstasktools.py Wed May 23 19:57:52 2012
@@ -343,9 +343,31 @@
p.distribution, p.component, p.version, p.architecture, p.maintainer,
p.source, p.section, p.distribution, 'new' AS release, p.component, p.homepage,
s.changed_by,
- description AS description_en, long_description AS long_description_en
+ description AS description_en, long_description AS long_description_en,
+ bibyear.value AS "year",
+ bibtitle.value AS "title",
+ bibauthor.value AS "authors",
+ bibdoi.value AS "doi",
+ bibpmid.value AS "pubmed",
+ biburl.value AS "url",
+ bibjournal.value AS "journal",
+ bibvolume.value AS "volume",
+ bibnumber.value AS "number",
+ bibpages.value AS "pages",
+ bibeprint.value AS "eprint"
FROM new_packages p
JOIN new_sources s ON p.source = s.source AND p.version = s.version
+ LEFT OUTER JOIN bibref bibyear ON p.source = bibyear.source AND bibyear.rank = 0 AND bibyear.key = 'year' AND bibyear.package = ''
+ LEFT OUTER JOIN bibref bibtitle ON p.source = bibtitle.source AND bibtitle.rank = 0 AND bibtitle.key = 'title' AND bibtitle.package = ''
+ LEFT OUTER JOIN bibref bibauthor ON p.source = bibauthor.source AND bibauthor.rank = 0 AND bibauthor.key = 'author' AND bibauthor.package = ''
+ LEFT OUTER JOIN bibref bibdoi ON p.source = bibdoi.source AND bibdoi.rank = 0 AND bibdoi.key = 'doi' AND bibdoi.package = ''
+ LEFT OUTER JOIN bibref bibpmid ON p.source = bibpmid.source AND bibpmid.rank = 0 AND bibpmid.key = 'pmid' AND bibpmid.package = ''
+ LEFT OUTER JOIN bibref biburl ON p.source = biburl.source AND biburl.rank = 0 AND biburl.key = 'url' AND biburl.package = ''
+ LEFT OUTER JOIN bibref bibjournal ON p.source = bibjournal.source AND bibjournal.rank = 0 AND bibjournal.key = 'journal' AND bibjournal.package = ''
+ LEFT OUTER JOIN bibref bibvolume ON p.source = bibvolume.source AND bibvolume.rank = 0 AND bibvolume.key = 'volume' AND bibvolume.package = ''
+ LEFT OUTER JOIN bibref bibnumber ON p.source = bibnumber.source AND bibnumber.rank = 0 AND bibnumber.key = 'number' AND bibnumber.package = ''
+ LEFT OUTER JOIN bibref bibpages ON p.source = bibpages.source AND bibpages.rank = 0 AND bibpages.key = 'pages' AND bibpages.package = ''
+ LEFT OUTER JOIN bibref bibeprint ON p.source = bibeprint.source AND bibeprint.rank = 0 AND bibeprint.key = 'eprint' AND bibeprint.package = ''
WHERE (p.package, p.version) IN
(SELECT package, max(version) FROM
new_packages WHERE package = ANY ($1) GROUP BY package)"""
@@ -362,9 +384,31 @@
p.vcs_browser AS "vcs-browser",
p.blend,
p.license,
- p.wnpp
+ p.wnpp,
+ bibyear.value AS "year",
+ bibtitle.value AS "title",
+ bibauthor.value AS "authors",
+ bibdoi.value AS "doi",
+ bibpmid.value AS "pubmed",
+ biburl.value AS "url",
+ bibjournal.value AS "journal",
+ bibvolume.value AS "volume",
+ bibnumber.value AS "number",
+ bibpages.value AS "pages",
+ bibeprint.value AS "eprint"
FROM blends_prospectivepackages p
- WHERE package = ANY ($1)"""
+ LEFT OUTER JOIN bibref bibyear ON p.source = bibyear.source AND bibyear.rank = 0 AND bibyear.key = 'year' AND bibyear.package = ''
+ LEFT OUTER JOIN bibref bibtitle ON p.source = bibtitle.source AND bibtitle.rank = 0 AND bibtitle.key = 'title' AND bibtitle.package = ''
+ LEFT OUTER JOIN bibref bibauthor ON p.source = bibauthor.source AND bibauthor.rank = 0 AND bibauthor.key = 'author' AND bibauthor.package = ''
+ LEFT OUTER JOIN bibref bibdoi ON p.source = bibdoi.source AND bibdoi.rank = 0 AND bibdoi.key = 'doi' AND bibdoi.package = ''
+ LEFT OUTER JOIN bibref bibpmid ON p.source = bibpmid.source AND bibpmid.rank = 0 AND bibpmid.key = 'pmid' AND bibpmid.package = ''
+ LEFT OUTER JOIN bibref biburl ON p.source = biburl.source AND biburl.rank = 0 AND biburl.key = 'url' AND biburl.package = ''
+ LEFT OUTER JOIN bibref bibjournal ON p.source = bibjournal.source AND bibjournal.rank = 0 AND bibjournal.key = 'journal' AND bibjournal.package = ''
+ LEFT OUTER JOIN bibref bibvolume ON p.source = bibvolume.source AND bibvolume.rank = 0 AND bibvolume.key = 'volume' AND bibvolume.package = ''
+ LEFT OUTER JOIN bibref bibnumber ON p.source = bibnumber.source AND bibnumber.rank = 0 AND bibnumber.key = 'number' AND bibnumber.package = ''
+ LEFT OUTER JOIN bibref bibpages ON p.source = bibpages.source AND bibpages.rank = 0 AND bibpages.key = 'pages' AND bibpages.package = ''
+ LEFT OUTER JOIN bibref bibeprint ON p.source = bibeprint.source AND bibeprint.rank = 0 AND bibeprint.key = 'eprint' AND bibeprint.package = ''
+ WHERE p.package = ANY ($1)"""
_execute_udd_query(query)
# This prepared statement is called only once but it makes sense to mention it in the
@@ -698,6 +742,61 @@
# ret += ", desc['en']:" + str(self.desc['en'])
return ret
+ def SetPublications(self, row):
+ for pub in ("year", "title", "authors", "doi", "pubmed", "url", "journal", "volume", "number", "pages", "eprint" ):
+ if row[pub]:
+ if pub == "pages":
+ row[pub] = re.sub("--", "-", row[pub])
+ if (pub == "authors" and row[pub].count(" and ") or row[pub].count(" AND ")):
+ # assume "authors" string is a regular BibTeX "and" separated list of authors
+ row[pub] = re.sub("AND", "and", row[pub].strip())
+ authors_list = row[pub].split(" and ")
+ # normalize several BibTeX styles to "First Last, First Last and First Last":
+ # 1. "First Last and First Last and First Last"
+ # 2. "Last, First and Last, First and Last, First"
+ # 3. "First Last, First Last and First Last"
+ authors_string = ""
+ while (authors_list):
+ author = authors_list.pop(0)
+ if (author.count(",") > 1):
+ # 3. "First Last, First Last and First Last"
+ # authors string is already in desired format, keep it
+ authors_string = row[pub].strip()
+ break
+ elif (row[pub].count(",") == row[pub].count(" and ") + 1):
+ # 2. "Last, First and Last, First and Last, First"
+ # reverse last and first name
+ (last, first) = author.split(", ")
+ full_author = first + " " + last
+ else:
+ # 1. "First Last and First Last and First Last"
+ full_author = author
+ if (len(authors_list) > 1):
+ authors_string += full_author + ", "
+ elif (len(authors_list) > 0):
+ authors_string += full_author + " and "
+ else:
+ authors_string += full_author
+ if row[pub] != authors_string:
+ # emergency brake if algorithm fails to detect non-names like '1000 Genome Project Data Processing Subgroup'
+ if authors_string.count(',') > row[pub].count(' and '):
+ logger.warning("Refuse to change Author string in %s: '%s'(%i) -> '%s'(%i)", \
+ self.pkg, to_unicode(row[pub]), row[pub].count(' and '), to_unicode(authors_string), authors_string.count(','))
+ else:
+ logger.debug("Author string changed in %s: '%s' -> '%s'", self.pkg, to_unicode(row[pub]), to_unicode(authors_string))
+ row[pub] = authors_string
+ if not self.properties.has_key('published'):
+ self.properties['published'] = {}
+ if self.properties['published'].has_key(pub):
+ if self.properties['published'][pub] == to_unicode(row[pub]):
+ try:
+ print >>rmpub, "%s: %s: Published-%s: %s" % (self.taskname, self.pkg, pub, to_unicode(row[pub]))
+ except UnicodeEncodeError:
+ print >>rmpub, "--- %s: %s: Published-%s: some duplicated value featuring encoding problems ---" % (self.taskname, self.pkg, pub)
+ logger.info("%s/%s: Publication-%s = %s can be removed" % (self.taskname, self.pkg, pub, to_unicode(row[pub])))
+ else:
+ logger.info("%s conflicting fields Publication-%s in tasks file with value '%s' and in UDD with value '%s'" % (self.pkg, pub, self.properties['published'][pub], to_unicode(row[pub])))
+ self.properties['published'][pub] = to_unicode(row[pub])
class Tasks:
# Provide a list of depencencies defined in metapackages
@@ -1046,6 +1145,7 @@
logger.error("===> AttributeError in metapackage long %s (lang='%s'): '%s'; ErrTxt: %s" % \
(self.metapkg.pkg, lang, ddtptranslations['long_description_'+lang], err))
self.metapkg.desc[lang]['long'] = 'Missing long description'
+
def _AppendDependency2List(self, dep, source):
# Append dependency which was found in the tasks file if not Ignore / Avoid and
@@ -1505,61 +1605,7 @@
# link to packages.debian.org search page to see overview about all
# package versions in all releases
dep.properties['pkg-url'] = PKGURLMASK % dep.pkg
-
- for pub in ("year", "title", "authors", "doi", "pubmed", "url", "journal", "volume", "number", "pages", "eprint" ):
- if row[pub]:
- if pub == "pages":
- row[pub] = re.sub("--", "-", row[pub])
- if (pub == "authors" and row[pub].count(" and ") or row[pub].count(" AND ")):
- # assume "authors" string is a regular BibTeX "and" separated list of authors
- row[pub] = re.sub("AND", "and", row[pub].strip())
- authors_list = row[pub].split(" and ")
- # normalize several BibTeX styles to "First Last, First Last and First Last":
- # 1. "First Last and First Last and First Last"
- # 2. "Last, First and Last, First and Last, First"
- # 3. "First Last, First Last and First Last"
- authors_string = ""
- while (authors_list):
- author = authors_list.pop(0)
- if (author.count(",") > 1):
- # 3. "First Last, First Last and First Last"
- # authors string is already in desired format, keep it
- authors_string = row[pub].strip()
- break
- elif (row[pub].count(",") == row[pub].count(" and ") + 1):
- # 2. "Last, First and Last, First and Last, First"
- # reverse last and first name
- (last, first) = author.split(", ")
- full_author = first + " " + last
- else:
- # 1. "First Last and First Last and First Last"
- full_author = author
- if (len(authors_list) > 1):
- authors_string += full_author + ", "
- elif (len(authors_list) > 0):
- authors_string += full_author + " and "
- else:
- authors_string += full_author
- if row[pub] != authors_string:
- # emergency brake if algorithm fails to detect non-names like '1000 Genome Project Data Processing Subgroup'
- if authors_string.count(',') > row[pub].count(' and '):
- logger.warning("Refuse to change Author string in %s: '%s'(%i) -> '%s'(%i)", \
- dep.pkg, to_unicode(row[pub]), row[pub].count(' and '), to_unicode(authors_string), authors_string.count(','))
- else:
- logger.debug("Author string changed in %s: '%s' -> '%s'", dep.pkg, to_unicode(row[pub]), to_unicode(authors_string))
- row[pub] = authors_string
- if not dep.properties.has_key('published'):
- dep.properties['published'] = {}
- if dep.properties['published'].has_key(pub):
- if dep.properties['published'][pub] == to_unicode(row[pub]):
- try:
- print >>rmpub, "%s: %s: Published-%s: %s" % (dep.taskname, dep.pkg, pub, to_unicode(row[pub]))
- except UnicodeEncodeError:
- print >>rmpub, "--- %s: %s: Published-%s: some duplicated value featuring encoding problems ---" % (dep.taskname, dep.pkg, pub)
- logger.info("%s/%s: Publication-%s = %s can be removed" % (dep.taskname, dep.pkg, pub, to_unicode(row[pub])))
- else:
- logger.info("%s conflicting fields Publication-%s in tasks file with value '%s' and in UDD with value '%s'" % (dep.pkg, pub, dep.properties['published'][pub], to_unicode(row[pub])))
- dep.properties['published'][pub] = to_unicode(row[pub])
+ dep.SetPublications(row)
for l in languages:
if row['description_'+l]:
dep.desc[l] = {}
More information about the Blends-commit
mailing list