[Blends-commit] r2735 - in /blends/trunk/team_analysis_tools: create_bad_names.sql maintain_bad_names.py
tille at users.alioth.debian.org
tille at users.alioth.debian.org
Wed Apr 6 16:56:44 UTC 2011
Author: tille
Date: Wed Apr 6 16:56:43 2011
New Revision: 2735
URL: http://svn.debian.org/wsvn/blends/?sc=1&rev=2735
Log:
New method to handle unique names
Added:
blends/trunk/team_analysis_tools/maintain_bad_names.py (with props)
Modified:
blends/trunk/team_analysis_tools/create_bad_names.sql
Modified: blends/trunk/team_analysis_tools/create_bad_names.sql
URL: http://svn.debian.org/wsvn/blends/blends/trunk/team_analysis_tools/create_bad_names.sql?rev=2735&op=diff
==============================================================================
--- blends/trunk/team_analysis_tools/create_bad_names.sql (original)
+++ blends/trunk/team_analysis_tools/create_bad_names.sql Wed Apr 6 16:56:43 2011
@@ -1,11 +1,24 @@
+DROP TABLE IF EXISTS carnivore_names_prefered;
+CREATE TABLE carnivore_names_prefered (
+ id int,
+ name text,
+ PRIMARY KEY (id),
+ FOREIGN KEY (id, name) REFERENCES carnivore_names DEFERRABLE);
+GRANT SELECT ON carnivore_names_prefered TO PUBLIC;
+
+-- Insert those names which are unique
+INSERT INTO carnivore_names_prefered
+ SELECT * FROM carnivore_names
+ WHERE id in (SELECT id FROM carnivore_names
+ GROUP BY id HAVING COUNT(*) = 1);
+
+
DROP TABLE IF EXISTS carnivore_bad_names;
-
CREATE TABLE carnivore_bad_names (
id int,
name text,
FOREIGN KEY (id, name) REFERENCES carnivore_names DEFERRABLE);
GRANT SELECT ON carnivore_bad_names TO PUBLIC;
-
INSERT INTO carnivore_bad_names VALUES ( 14, 'Nelson Antonio de Oliveira' );
INSERT INTO carnivore_bad_names VALUES ( 14, 'Nelson A. de Oliveira' );
Added: blends/trunk/team_analysis_tools/maintain_bad_names.py
URL: http://svn.debian.org/wsvn/blends/blends/trunk/team_analysis_tools/maintain_bad_names.py?rev=2735&op=file
==============================================================================
--- blends/trunk/team_analysis_tools/maintain_bad_names.py (added)
+++ blends/trunk/team_analysis_tools/maintain_bad_names.py Wed Apr 6 16:56:43 2011
@@ -1,0 +1,193 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# Copyright 2011: Andreas Tille <tille at debian.org>
+# License: GPL
+#
+# UUD table carnivore_names contains sometimes more
+# names for one and the same uploader. To get identical
+# names which are needed for some applications this tool
+# helps to maintain a table
+# carnivore_name_prefered
+# which contains the spelling of the name which should
+# be prefered for such applications.
+# Using this prefered name a table
+# carnivore_bad_names
+# is maintained which can be used as lookup for the
+# id of the uploader once a different name than the
+# prefered one occures
+
+PORT=5441
+DEFAULTPORT=5432
+import psycopg2
+from sys import stderr, exit
+import re
+
+debug=False
+PREFEREDFACTOR=3
+BLACKLIST = ('Thawte Freemail Member',
+ 'System V',
+ 'root',
+ 'Inc.',
+ 'q'
+ )
+
+WHITELIST = ('Ramakrishnan Muthukrishnan',
+ 'Tobias Quathamer',
+ 'Stéphane Glondu',
+ 'Debian FreeSmartphone.Org Team',
+ 'Chao-Ming',
+ 'Nicolas François',
+ )
+
+has_quotes_re = re.compile('".*"')
+has_second_initial_re = re.compile('^\w+ [A-Z]\. \w+$')
+
+def prompt(prompt):
+ return raw_input(prompt).strip()
+
+def quote(s):
+ return "'" + s.replace("\\", "\\\\").replace("'", "\\'").replace('"', '\\"') + "'"
+
+def List2PgArray(list):
+ # turn a list of strings into the syntax for a PostgreSQL array:
+ # {"string1","string2",...,"stringN"}
+ if not list:
+ return '{}'
+ komma='{'
+ PgArray=''
+ for s in list:
+ PgArray=PgArray+komma+'"'+ s.replace("'", "''").replace('"', '\\\\"') +'"'
+ komma=','
+ return PgArray+'}'
+
+def StringWithoutQuotes(s):
+ if has_quotes_re.match(s):
+ if debug:
+ print >>stderr, "Remove quotes from %s" % (s)
+ return re.sub('"(.*)"', '\\1', s)
+ return s
+
+try:
+ conn = psycopg2.connect(database="udd")
+except psycopg2.OperationalError:
+ print >>stderr, "Problem connecting to UDD"
+ exit(-1)
+curs = conn.cursor()
+
+# Check what name is used how often
+query = """PREPARE name_usage (text[]) AS
+ SELECT name, count(*) FROM (
+ SELECT maintainer_name AS name FROM all_sources WHERE maintainer_name = ANY($1)
+ UNION ALL
+ SELECT submitter_name AS name FROM all_bugs WHERE submitter_name = ANY($1)
+ UNION ALL
+ SELECT owner_name AS name FROM all_bugs WHERE owner_name = ANY($1)
+ UNION ALL
+ SELECT done_name AS name FROM all_bugs WHERE done_name = ANY($1)
+ UNION ALL
+ SELECT name AS name FROM uploaders WHERE name = ANY($1)
+ UNION ALL
+ SELECT changed_by_name AS name FROM upload_history WHERE changed_by_name = ANY($1)
+ UNION ALL
+ SELECT maintainer_name AS name FROM upload_history WHERE maintainer_name = ANY($1)
+ UNION ALL
+ SELECT signed_by_name AS name FROM upload_history WHERE signed_by_name = ANY($1)
+ ) AS names
+ GROUP BY name
+ ORDER BY count DESC;"""
+curs.execute(query)
+
+query = "PREPARE insert_prefered_name AS INSERT INTO carnivore_names_prefered (id, name) VALUES ($1, $2)"
+curs.execute(query)
+
+query = """SELECT name, id from carnivore_names cn
+ WHERE id IN (SELECT id FROM carnivore_names
+ WHERE id not in (SELECT id FROM carnivore_names_prefered)
+ GROUP BY id HAVING COUNT(*) > 1)
+ ORDER BY id;"""
+curs.execute(query)
+
+id=0
+name = []
+allnames = curs.fetchall()
+for r in allnames:
+ if id != r[1]:
+ if id != 0:
+ prefered = -1
+ while prefered == -1:
+ i=0
+ query = "EXECUTE name_usage ('%s')" % List2PgArray(name)
+ curs.execute(query)
+ usednames = curs.fetchall()
+ if len(usednames) == 0:
+ print >>stderr, "Names %s are not used in sources at all" % str(name)
+ for n in name:
+ usednames.append((n,0))
+ if len(usednames) == 1:
+ if debug:
+ print "Some names in carnivore_names of %s are not used in all_sources, all_bugs and uploaders" % str(name)
+ prefered = 0
+ else:
+ if usednames[0][1] > PREFEREDFACTOR*usednames[1][1]:
+ if debug:
+ print "Name %s(%d) is way more prefered than %s(%d) and possibly others" % \
+ (usednames[0][0], usednames[0][1], usednames[1][0], usednames[1][1])
+ prefered = 0
+ else:
+ # Prefer upper cased names
+ if usednames[0][0].lower() == usednames[1][0].lower():
+ if usednames[0][0].istitle():
+ prefered = 0
+ else:
+ if usednames[1][0].istitle():
+ prefered = 1
+ if debug:
+ print "Both name variants just have different capitalisation, %s is prefered." % usednames[prefered][0]
+ if len(usednames) == 2:
+ # prefer name versions with additional initial letter of second given name
+ if has_second_initial_re.match(usednames[0][0]):
+ if re.sub('^(\w+ )[A-Z]\. (\w+)$', '\\1\\2', usednames[0][0]) == usednames[1][0]:
+ if debug:
+ print "Most frequent name has second name initial %s" % usednames[0][0]
+ prefered = 0
+ if has_second_initial_re.match(usednames[1][0]):
+ if re.sub('^(\w+ )[A-Z]\. (\w+)$', '\\1\\2', usednames[1][0]) == usednames[0][0]:
+ if debug:
+ print "Second frequent name has second name initial %s" % usednames[1][0]
+ prefered = 1
+ # check whitelist of names
+ i = -1
+ for u in usednames:
+ i += 1
+ if u[0] in WHITELIST:
+ prefered = i
+ if debug:
+ print "Found %s in whitelist" % u[0]
+ break
+ if prefered < 0:
+ for n in usednames:
+ print "(%d)\t%s (%d)" % (i, n[0], n[1])
+ i += 1
+ s = prompt("Please type number of prefered name: ")
+ try:
+ si = int(s)
+ except ValueError:
+ print "Please insert integer number"
+ continue
+ if si < 0 or si >= i:
+ print "Integer not in range"
+ continue
+ prefered = si
+ query = "EXECUTE insert_prefered_name (%d, %s)" % (id, quote(usednames[prefered][0]))
+ try:
+ curs.execute(query)
+ except psycopg2.ProgrammingError, err:
+ print >>stderr, "%s: %s" % (err, query)
+ conn.commit() # just commit after every insert to not see all names several times when checking this script
+ id = r[1]
+ name = []
+ if r[0] not in BLACKLIST:
+ name.append(StringWithoutQuotes(r[0]))
+ else:
+ if r[0] not in BLACKLIST:
+ name.append(StringWithoutQuotes(r[0]))
Propchange: blends/trunk/team_analysis_tools/maintain_bad_names.py
------------------------------------------------------------------------------
svn:executable = *
More information about the Blends-commit
mailing list