[Blends-commit] r2904 - in /blends/trunk/team_analysis_tools: archives.sql author_stats author_stats_helper.py replace_names.sql upload_history.py
tille at users.alioth.debian.org
tille at users.alioth.debian.org
Wed Jul 20 20:25:14 UTC 2011
Author: tille
Date: Wed Jul 20 20:25:14 2011
New Revision: 2904
URL: http://svn.debian.org/wsvn/blends/?sc=1&rev=2904
Log:
Let creation of graphs work again with PG 9.x, some more enhancements
Added:
blends/trunk/team_analysis_tools/author_stats_helper.py (with props)
Modified:
blends/trunk/team_analysis_tools/archives.sql
blends/trunk/team_analysis_tools/author_stats
blends/trunk/team_analysis_tools/replace_names.sql
blends/trunk/team_analysis_tools/upload_history.py
Modified: blends/trunk/team_analysis_tools/archives.sql
URL: http://svn.debian.org/wsvn/blends/blends/trunk/team_analysis_tools/archives.sql?rev=2904&op=diff
==============================================================================
--- blends/trunk/team_analysis_tools/archives.sql (original)
+++ blends/trunk/team_analysis_tools/archives.sql Wed Jul 20 20:25:14 2011
@@ -15,238 +15,68 @@
ts date
);
-CREATE LANGUAGE plpgsql ;
+-- top N authors of mailing list
+CREATE OR REPLACE FUNCTION author_names_of_list(text,int) RETURNS SETOF RECORD AS $$
+ SELECT author FROM (
+ SELECT author, COUNT(*)::int
+ FROM listarchive
+ WHERE project = $1
+ GROUP BY author
+ ORDER BY count DESC
+ LIMIT $2
+ ) tmp
+$$ LANGUAGE 'SQL';
/*
- * Build a query string for several purposes
- * ARG1: Query to obtain wanted columns
- * ARG2: Feature that is queried
- * See below how this helper is used.
+SELECT * FROM author_names_of_list('med', 12) AS (category text) ;
+SELECT * FROM author_names_of_list('debian-med-packaging', 12) AS (category text) ;
*/
-CREATE OR REPLACE FUNCTION BuildQueryCDDsHelper(text, text)
- RETURNS text AS '
- DECLARE
- IterQuery ALIAS FOR \$1 ;
- Feature ALIAS FOR \$2 ;
- ret text ;
- union text ;
- query1 text ;
- query2 text ;
- r1 RECORD ;
- r2 RECORD ;
- ri RECORD ;
- qi RECORD ;
- BEGIN
- query1 := IterQuery ;
- query2 := query1;
- union := '''' ;
- ret := ''SELECT yearmonth'' ;
-
- FOR r1 IN EXECUTE query1 LOOP
- ret := ret || '', CAST(SUM("'' || r1.feature || ''") AS int) AS "'' || r1.feature || ''"'' ;
- END LOOP;
-
- ret := ret || ''
- FROM (
-'' ;
-
- FOR r1 IN EXECUTE query1 LOOP
- ret := ret || union || '' SELECT yearmonth'' ;
- union := ''
- UNION
-'';
- FOR r2 IN EXECUTE query2 LOOP
- IF r1.feature = r2.feature THEN
- ret := ret || '', COUNT(*)'' ;
- ELSE
- ret := ret || '', 0'' ;
- END IF;
- ret := ret || '' AS "'' || r2.feature || ''"'';
- END LOOP ;
- ret := ret || ''
- FROM listarchive
- WHERE '' || Feature || '' = '''''' || r1.feature || '''''' GROUP BY yearmonth'';
- END LOOP ;
-
- ret := ret || ''
- ) zw
- GROUP BY yearmonth
- ORDER BY yearmonth;'' ;
-
- RETURN ret;
- END; ' LANGUAGE 'plpgsql';
+CREATE OR REPLACE FUNCTION author_per_year_of_list(text,int) RETURNS SETOF RECORD AS $$
+ SELECT author, year, COUNT(*)::int FROM (
+ SELECT author, EXTRACT(year FROM yearmonth)::int AS year
+ FROM listarchive
+ WHERE author IN (SELECT * FROM author_names_of_list($1, $2) AS (author text))
+ AND project = $1
+ ) tmp
+ GROUP BY author, year
+ ORDER BY year, count DESC, author
+$$ LANGUAGE 'SQL';
/*
- * This query returns stats about all mailing lists
+SELECT * FROM author_per_year_of_list('med', 12) AS (author text, year int, value int) ;
+SELECT * FROM author_per_year_of_list('debian-med-packaging', 12) AS (author text, year int, value int) ;
*/
-CREATE OR REPLACE FUNCTION BuildQueryCDDs()
- RETURNS text AS '
- DECLARE
- ret text ;
+/*
+ Creating the whole crosstable in a simple SQL statement is not possible
+ because the return tuple of crosstab needs to specify all columns which
+ needs some string operations for variable number of columns (see below
+ for an example.
- BEGIN
+ So this will be implemented in a Python script which can easily create
+ the string needed.
- ret := BuildQueryCDDsHelper(
- ''SELECT project AS feature, COUNT(*) AS num FROM listarchive GROUP BY project ORDER BY num DESC;'',
- ''project'') ;
- return ret ;
- END; ' LANGUAGE 'plpgsql';
+CREATE OR REPLACE FUNCTION liststat_table(text,int) RETURNS SETOF RECORD AS $$
+ SELECT *
+ FROM
+ crosstab(
+ 'SELECT year AS row_name, name AS bucket, count AS value
+ FROM author_per_year_of_list($1, $2) AS (name text, year int, count int)',
+ 'SELECT * FROM author_names_of_list($1, $2) AS (category text)'
+ ) As (year_row text, xxx int, yyy int, ...) -- <-- problematic row
+$$ LANGUAGE 'SQL';
-/*
- * This query returns stats about the ARG2 most active authors in a specific
- * Mailing list (ARG1)
- */
+ SELECT *
+ FROM
+ crosstab(
+ 'SELECT year AS row_name, name AS bucket, count AS value
+ FROM author_per_year_of_list(''med'', 12) AS (name text, year int, count int)',
+ 'SELECT * FROM author_names_of_list(''med'', 12) AS (category text)'
+ ) As (year text, upl1 int, upl2 int, upl3 int, upl4 int, upl5 int, upl6 int, upl7 int, upl8 int, upl9 int, upl10 int, upl11 int, upl12 int)
+;
-CREATE OR REPLACE FUNCTION BuildQueryAuthors(text, int)
- RETURNS text AS '
- DECLARE
- Project ALIAS FOR \$1 ;
- NumAuthors ALIAS FOR \$2 ;
- ret text ;
-
- BEGIN
-
- ret := BuildQueryCDDsHelper(
- ''SELECT author AS feature, COUNT(*) AS num FROM listarchive
- WHERE project = '''''' || Project || '''''' AND author IN (
- SELECT author FROM (SELECT author, count(*) as anz From listarchive where project = '''''' || Project || ''''''
- GROUP BY author ORDER BY anz DESC LIMIT '' || NumAuthors || '') AS zw)
- GROUP BY author ORDER BY num DESC;'',
- ''project = '''''' || Project || '''''' AND author'') ;
- return ret ;
- END; ' LANGUAGE 'plpgsql';
-
-
-CREATE OR REPLACE FUNCTION CompareCDDs()
- RETURNS SETOF RECORD AS '
- DECLARE
- ret text ;
- query text ;
- r RECORD ;
-
- BEGIN
-
- SELECT INTO query BuildQueryCDDs() ;
-
- FOR r IN EXECUTE query LOOP
- RETURN NEXT r;
- END LOOP;
-
- END; ' LANGUAGE 'plpgsql';
-
-/*
- * You might call this as, but there is less chance to get column names right.
- * SELECT * FROM CompareCDDs() AS
- * ( yearmonth date, cdd1 int, cdd2 int, cdd3 int, cdd4 int, cdd5 int, cdd6 int, cdd7 int, cdd8 int );
- *
- * That's why we use the shell script wrappers ...
- */
-
-
-/*******************************************
- *
- * Same thing as above but for whole year
- *
- *******************************************/
-
-/*
- * Build a query string for several purposes
- * ARG1: Query to obtain wanted columns
- * ARG2: Feature that is queried
- * See below how this helper is used.
- */
-CREATE OR REPLACE FUNCTION BuildQueryCDDsYearHelper(text, text)
- RETURNS text AS '
- DECLARE
- IterQuery ALIAS FOR \$1 ;
- Feature ALIAS FOR \$2 ;
- ret text ;
- union text ;
- query1 text ;
- query2 text ;
- r1 RECORD ;
- r2 RECORD ;
- ri RECORD ;
- qi RECORD ;
- BEGIN
-
- query1 := IterQuery ;
- query2 := query1;
- union := '''' ;
- ret := ''SELECT EXTRACT(''''year'''' FROM year) AS year'' ;
-
- FOR r1 IN EXECUTE query1 LOOP
- ret := ret || '', CAST(SUM("'' || r1.feature || ''") AS int) AS "'' || replace(r1.feature,'' '',''_'') || ''"'' ;
- END LOOP;
-
- ret := ret || ''
- FROM (
-'' ;
-
- FOR r1 IN EXECUTE query1 LOOP
- ret := ret || union || '' SELECT date_trunc(''''year'''', yearmonth)::date AS year'' ;
- union := ''
- UNION
-'';
- FOR r2 IN EXECUTE query2 LOOP
- IF r1.feature = r2.feature THEN
- ret := ret || '', COUNT(*)'' ;
- ELSE
- ret := ret || '', 0'' ;
- END IF;
- ret := ret || '' AS "'' || r2.feature || ''"'';
- END LOOP ;
- ret := ret || ''
- FROM listarchive
- WHERE '' || Feature || '' = '''''' || replace(r1.feature, '''''''', '''''''''''') || '''''' GROUP BY year'';
- END LOOP ;
-
- ret := ret || ''
- ) zw
- GROUP BY year
- ORDER BY year;'' ;
-
- RETURN ret;
- END; ' LANGUAGE 'plpgsql';
-
-CREATE OR REPLACE FUNCTION BuildQueryCDDsYear()
- RETURNS text AS '
- DECLARE
- ret text ;
-
- BEGIN
-
- ret := BuildQueryCDDsYearHelper(
- ''SELECT project AS feature, COUNT(*) AS num FROM listarchive GROUP BY project ORDER BY num DESC;'',
- ''project'') ;
- return ret ;
- END; ' LANGUAGE 'plpgsql';
-
-/*
- * This query returns stats about the ARG2 most active authors in a specific
- * Mailing list (ARG1)
- */
-
-CREATE OR REPLACE FUNCTION BuildQueryAuthorsYear(text, int)
- RETURNS text AS '
- DECLARE
- Project ALIAS FOR \$1 ;
- NumAuthors ALIAS FOR \$2 ;
- ret text ;
-
- BEGIN
-
- ret := BuildQueryCDDsYearHelper(
- ''SELECT author AS feature, COUNT(*) AS num FROM listarchive
- WHERE project = '''''' || Project || '''''' AND author IN (
- SELECT author FROM (SELECT author, count(*) as anz From listarchive where project = '''''' || Project || ''''''
- GROUP BY author ORDER BY anz DESC LIMIT '' || NumAuthors || '') AS zw)
- GROUP BY author ORDER BY num DESC;'',
- ''project = '''''' || Project || '''''' AND author'') ;
- return ret ;
- END; ' LANGUAGE 'plpgsql';
+*/
COMMIT;
EOT
Modified: blends/trunk/team_analysis_tools/author_stats
URL: http://svn.debian.org/wsvn/blends/blends/trunk/team_analysis_tools/author_stats?rev=2904&op=diff
==============================================================================
--- blends/trunk/team_analysis_tools/author_stats (original)
+++ blends/trunk/team_analysis_tools/author_stats Wed Jul 20 20:25:14 2011
@@ -16,19 +16,26 @@
NAME=authorstat_"$1"
DATFILE="${NAME}_year.txt"
-QUERYFILE=`mktemp`
-psql -t listarchives -c "SELECT BuildQueryAuthorsYear('$1', $NUM) ;" | sed '1s/\( CAST(SUM([^)]\+) AS int) AS "[^"]\+[ _]\w\)[^ ^_]\+"/\1."/g' > $QUERYFILE
-cat $QUERYFILE | \
- psql listarchives \
- >"$DATFILE"
-rm $QUERYFILE
+#QUERYFILE=`mktemp`
+# psql -t listarchives -c "SELECT BuildQueryAuthorsYear('$1', $NUM) ;" | sed '1s/\( CAST(SUM([^)]\+) AS int) AS "[^"]\+[ _]\w\)[^ ^_]\+"/\1."/g' > $QUERYFILE
+#cat $QUERYFILE | \
+# psql listarchives \
+# >"$DATFILE"
+#rm $QUERYFILE
+#
+#sed -i -e '/^[-+]\+$/d' -e '/^([0-9]\+ [A-Za-z]\+)$/d' \
+# -e 's/[[:space:]]*|[[:space:]]*/\t/g' \
+# "$DATFILE"
-sed -i -e '/^[-+]\+$/d' -e '/^([0-9]\+ [A-Za-z]\+)$/d' \
- -e 's/[[:space:]]*|[[:space:]]*/\t/g' \
- "$DATFILE"
+./author_stats_helper.py $1 $NUM > $DATFILE
## -e 's/®//g' -e 's/é/e/' \
## -e 's/á/á/g' -e 's/ñ/ñ/' \
## -e 's/è/è/g' -e 's/ö/ö/g' \
-./author_stats_create_graph $DATFILE $NUM "List activities for $1 list" "$3" "$4"
+if [ "$3" = "pdf" ] ; then
+ # PDFs are usually used in slides where we add the headline in LaTeX
+ ./author_stats_create_graph $DATFILE $NUM "" "$3" "$4"
+else
+ ./author_stats_create_graph $DATFILE $NUM "List activities for $1 list" "$3" "$4"
+fi
Added: blends/trunk/team_analysis_tools/author_stats_helper.py
URL: http://svn.debian.org/wsvn/blends/blends/trunk/team_analysis_tools/author_stats_helper.py?rev=2904&op=file
==============================================================================
--- blends/trunk/team_analysis_tools/author_stats_helper.py (added)
+++ blends/trunk/team_analysis_tools/author_stats_helper.py Wed Jul 20 20:25:14 2011
@@ -1,0 +1,102 @@
+#!/usr/bin/python
+# Copyright 2011: Andreas Tille <tille at debian.org>
+# License: GPL
+# Takes a project mailing list name and a number of top authors
+# Creates a text file with the statistics table
+
+NMAX=30
+
+from sys import argv, stderr, exit
+
+if len(argv) != 3 :
+ print >>stderr, "Usage: %s <project> <no authors>" % (argv[0])
+ exit(-1)
+
+project = argv[1]
+
+PORT=5441
+DEFAULTPORT=5432
+DB='listarchives'
+
+import psycopg2
+import re
+
+try:
+ conn = psycopg2.connect(database=DB)
+except psycopg2.OperationalError:
+ try:
+ conn = psycopg2.connect(host="localhost",port=DEFAULTPORT,user="guest",database=DB)
+ except psycopg2.OperationalError:
+ conn = psycopg2.connect(host="127.0.0.1",port=DEFAULTPORT,user="guest",database=DB)
+
+curs = conn.cursor()
+
+def RowDictionaries(cursor):
+ """Return a list of dictionaries which specify the values by their column names"""
+
+ description = cursor.description
+ if not description:
+ # even if there are no data sets to return the description should contain the table structure. If not something went
+ # wrong and we return NULL as to represent a problem
+ return NULL
+ if cursor.rowcount <= 0:
+ # if there are no rows in the cursor we return an empty list
+ return []
+
+ data = cursor.fetchall()
+ result = []
+
+ for row in data:
+ resultrow = {}
+ i = 0
+ for dd in description:
+ resultrow[dd[0]] = row[i]
+ i += 1
+ result.append(resultrow)
+ return result
+
+query = "SELECT replace(author,' ','_') AS author FROM author_names_of_list('%s', %i) AS (author text);" % (project, NMAX)
+# print query
+curs.execute(query)
+
+print ' year',
+nuploaders = 0
+for row in curs.fetchall():
+ print '\t' + re.sub('^(.*_\w)[^_]*$', '\\1', row[0]),
+ nuploaders += 1
+print ''
+
+typestring = 'year text'
+for i in range(nuploaders):
+ typestring = typestring + ', upl' + str(i+1) + ' int'
+query = """SELECT *
+ FROM
+ crosstab(
+ 'SELECT year AS row_name, name AS bucket, count AS value
+ FROM author_per_year_of_list(''%s'', %i) AS (name text, year int, count int)',
+ 'SELECT * FROM author_names_of_list(''%s'', %i) AS (category text)'
+
+ ) As (%s)
+""" % (project, nuploaders, project, nuploaders, typestring)
+
+try:
+ # print query
+ curs.execute(query)
+except psycopg2.ProgrammingError, err:
+ if crosstab_missing_re.match(str(err)):
+ print >>stderr, """Please do
+ psql udd < /usr/share/postgresql/<pgversion>/contrib/tablefunc.sql
+before calling this program."""
+ else:
+ print >>stderr, "To few authors in %s list.\n%s" % (project, err)
+ exit(-1)
+for row in curs.fetchall():
+ print ' ' + row[0] ,
+ for v in row[1:]:
+ if v:
+ print '\t' + str(v),
+ else:
+ print '\t0',
+ print ''
+
+
Propchange: blends/trunk/team_analysis_tools/author_stats_helper.py
------------------------------------------------------------------------------
svn:executable = *
Modified: blends/trunk/team_analysis_tools/replace_names.sql
URL: http://svn.debian.org/wsvn/blends/blends/trunk/team_analysis_tools/replace_names.sql?rev=2904&op=diff
==============================================================================
--- blends/trunk/team_analysis_tools/replace_names.sql (original)
+++ blends/trunk/team_analysis_tools/replace_names.sql Wed Jul 20 20:25:14 2011
@@ -72,7 +72,7 @@
UPDATE listarchive SET author = 'David Bremner' WHERE author = 'bremner-guest' ;
UPDATE listarchive SET author = 'Christian Kastner' WHERE author = 'chrisk' ;
UPDATE listarchive SET author = 'Christopher Walker' WHERE author = 'cjw1006-guest' ;
-UPDATE listarchive SET author = 'Michael Hanke' WHERE author = 'mhanke-guest' ;
+-- see below in <new></new>
UPDATE listarchive SET author = 'Alastair McKinstry' WHERE author = 'mckinstry' ;
UPDATE listarchive SET author = 'Otavio Salvador' WHERE author = 'otavio' ;
UPDATE listarchive SET author = 'Frederic Lehobey' WHERE author = 'fdl-guest' or author = 'Frederic Daniel Luc Lehobey' ;
@@ -80,6 +80,11 @@
UPDATE listarchive SET author = 'Hans-Christoph Steiner' WHERE author = 'eighthave-guest' ;
UPDATE listarchive SET author = 'Karol Langner' WHERE author = 'klm-guest' ;
UPDATE listarchive SET author = 'Georges Khaznadar' WHERE author = 'georgesk' ;
+
+-- <new>
+UPDATE listarchive SET author = 'Joost van Baal' WHERE author like 'Joost van Baal%' OR author = 'joostvb' ;
+UPDATE listarchive SET author = 'Michael Hanke' WHERE author = 'mhanke-guest' OR author = 'mih' ;
+-- </new>
-- stupid spammers at project = 'pkg-grass-devel'.
DELETE FROM listarchive WHERE author = 'info' AND project = 'pkg-grass-devel' ;
Modified: blends/trunk/team_analysis_tools/upload_history.py
URL: http://svn.debian.org/wsvn/blends/blends/trunk/team_analysis_tools/upload_history.py?rev=2904&op=diff
==============================================================================
--- blends/trunk/team_analysis_tools/upload_history.py (original)
+++ blends/trunk/team_analysis_tools/upload_history.py Wed Jul 20 20:25:14 2011
@@ -32,10 +32,10 @@
PORT=5441
DEFAULTPORT=5432
-from sys import stderr, exit
+from sys import argv, stderr, exit
from os import system
import psycopg2
-from re import sub
+import re
try:
conn = psycopg2.connect(host="localhost",port=PORT,user="guest",database="udd")
@@ -47,7 +47,7 @@
curs = conn.cursor()
-
+crosstab_missing_re = re.compile(".* crosstab.*")
def RowDictionaries(cursor):
"""Return a list of dictionaries which specify the values by their column names"""
@@ -83,7 +83,7 @@
print >>out, ' year',
nuploaders = 0
for row in curs.fetchall():
- print >>out, '\t' + sub('^(.*_\w)[^_]*$', '\\1', row[0]),
+ print >>out, '\t' + re.sub('^(.*_\w)[^_]*$', '\\1', row[0]),
nuploaders += 1
print >>out, ''
@@ -100,10 +100,16 @@
""" % (teams[team], nuploaders, teams[team], nuploaders, typestring)
try:
+ # print query
curs.execute(query)
except psycopg2.ProgrammingError, err:
- print >>stderr, "To few uploaders in %s team.\n%s" % (team, err)
- exit
+ if crosstab_missing_re.match(str(err)):
+ print >>stderr, """Please do
+ psql udd < /usr/share/postgresql/<pgversion>/contrib/tablefunc.sql
+before calling this program."""
+ else:
+ print >>stderr, "To few uploaders in %s team.\n%s" % (team, err)
+ exit(-1)
for row in curs.fetchall():
print >>out, ' ' + row[0] ,
for v in row[1:]:
@@ -113,5 +119,13 @@
print >>out, '\t0',
print >>out, ''
out.close()
- system('./author_stats_create_graph ' + datafile + ' ' + str(min(nuploaders, MAXUPLOADERS)) + ' "Uploaders of ' + team + ' team"')
+ cmdstring='./author_stats_create_graph ' + datafile + ' ' + str(min(nuploaders, MAXUPLOADERS))
+ if len(argv) > 1 :
+ if argv[1].startswith('pdf'):
+ cmdstring = cmdstring + ' "" pdf'
+ if len(argv) > 2:
+ cmdstring = cmdstring + ' ' + argv[2]
+ else:
+ cmdstring = cmdstring + ' "Uploaders of ' + team + ' team"'
+ system(cmdstring)
More information about the Blends-commit
mailing list