[Blends-commit] r2904 - in /blends/trunk/team_analysis_tools: archives.sql author_stats author_stats_helper.py replace_names.sql upload_history.py

Wed Jul 20 20:25:14 UTC 2011

Author: tille
Date: Wed Jul 20 20:25:14 2011
New Revision: 2904

URL: http://svn.debian.org/wsvn/blends/?sc=1&rev=2904
Log:
Let creation of graphs work again with PG 9.x, some more enhancements

Added:
    blends/trunk/team_analysis_tools/author_stats_helper.py   (with props)
Modified:
    blends/trunk/team_analysis_tools/archives.sql
    blends/trunk/team_analysis_tools/author_stats
    blends/trunk/team_analysis_tools/replace_names.sql
    blends/trunk/team_analysis_tools/upload_history.py

Modified: blends/trunk/team_analysis_tools/archives.sql
URL: http://svn.debian.org/wsvn/blends/blends/trunk/team_analysis_tools/archives.sql?rev=2904&op=diff
==============================================================================

--- blends/trunk/team_analysis_tools/archives.sql (original)
+++ blends/trunk/team_analysis_tools/archives.sql Wed Jul 20 20:25:14 2011
@@ -15,238 +15,68 @@
    ts        date
 );
 
-CREATE LANGUAGE plpgsql ;
+-- top N authors of mailing list
+CREATE OR REPLACE FUNCTION author_names_of_list(text,int) RETURNS SETOF RECORD AS $$
+  SELECT author FROM (
+    SELECT author, COUNT(*)::int
+      FROM listarchive
+      WHERE project = $1
+      GROUP BY author
+      ORDER BY count DESC
+      LIMIT $2
+  ) tmp
+$$ LANGUAGE 'SQL';
 
 /*
- * Build a query string for several purposes
- *   ARG1: Query to obtain wanted columns
- *   ARG2: Feature that is queried
- * See below how this helper is used.
+SELECT * FROM author_names_of_list('med', 12) AS (category text) ;
+SELECT * FROM author_names_of_list('debian-med-packaging', 12) AS (category text) ;
  */
-CREATE OR REPLACE FUNCTION BuildQueryCDDsHelper(text, text)
-    RETURNS text AS '
-    DECLARE
-       IterQuery  ALIAS FOR \$1 ;
-       Feature    ALIAS FOR \$2 ;
-       ret        text ;
-       union      text ;
-       query1     text ;
-       query2     text ;
-       r1         RECORD ;
-       r2         RECORD ;
-       ri         RECORD ;
-       qi         RECORD ;
-    BEGIN
 
-    query1 := IterQuery ;
-    query2 := query1;
-    union  := '''' ;
-    ret    := ''SELECT yearmonth'' ;
-
-    FOR r1 IN EXECUTE query1 LOOP
-    	ret := ret || '', CAST(SUM("'' || r1.feature || ''") AS int) AS "'' || r1.feature || ''"'' ;
-    END LOOP;
-
-    ret := ret || ''
-  FROM (
-'' ;
-
-    FOR r1 IN EXECUTE query1 LOOP
-       ret   := ret || union || ''    SELECT yearmonth'' ;
-       union := ''
-    UNION
-'';
-       FOR r2 IN EXECUTE query2 LOOP
-       	   IF r1.feature = r2.feature THEN
-	      ret := ret || '', COUNT(*)'' ;
-	   ELSE
-	      ret := ret || '', 0'' ;
-	   END IF;
-	   ret := ret || '' AS "'' || r2.feature || ''"'';
-       END LOOP ;
-       ret := ret || ''
-       FROM listarchive 
-       WHERE '' || Feature || '' = '''''' || r1.feature || '''''' GROUP BY yearmonth'';
-    END LOOP ;
-
-    ret := ret || ''
-  ) zw
-  GROUP BY yearmonth
-  ORDER BY yearmonth;'' ;
-
-    RETURN ret;
-  END; ' LANGUAGE 'plpgsql';
+CREATE OR REPLACE FUNCTION author_per_year_of_list(text,int) RETURNS SETOF RECORD AS $$
+  SELECT author, year, COUNT(*)::int FROM (
+    SELECT author,  EXTRACT(year FROM yearmonth)::int AS year
+      FROM listarchive
+     WHERE author IN (SELECT * FROM author_names_of_list($1, $2) AS (author text))
+       AND project = $1
+  ) tmp
+  GROUP BY author, year
+  ORDER BY year, count DESC, author
+$$ LANGUAGE 'SQL';
 
 /*
- * This query returns stats about all mailing lists
+SELECT * FROM author_per_year_of_list('med', 12) AS (author text, year int, value int) ;
+SELECT * FROM author_per_year_of_list('debian-med-packaging', 12) AS (author text, year int, value int) ;
  */
 
-CREATE OR REPLACE FUNCTION BuildQueryCDDs()
-    RETURNS text AS '
-    DECLARE
-       ret        text ;
+/*
+  Creating the whole crosstable in a simple SQL statement is not possible
+  because the return tuple of crosstab needs to specify all columns which
+  needs some string operations for variable number of columns (see below
+  for an example.
 
-    BEGIN
+  So this will be implemented in a Python script which can easily create
+  the string needed.
 
-    ret := BuildQueryCDDsHelper(
-               ''SELECT project AS feature, COUNT(*) AS num FROM listarchive GROUP BY project ORDER BY num DESC;'',
-               ''project'') ;
-    return ret ;
-  END; ' LANGUAGE 'plpgsql';
+CREATE OR REPLACE FUNCTION liststat_table(text,int) RETURNS SETOF RECORD AS $$
+  SELECT *
+    FROM
+      crosstab(
+             'SELECT year AS row_name, name AS bucket, count AS value
+                     FROM author_per_year_of_list($1, $2) AS (name text, year int, count int)',
+             'SELECT * FROM author_names_of_list($1, $2) AS (category text)'
+        ) As (year_row text, xxx int, yyy int, ...) -- <-- problematic row
+$$ LANGUAGE 'SQL';
 
-/*
- * This query returns stats about the ARG2 most active authors in a specific
- * Mailing list (ARG1)
- */
+  SELECT *
+    FROM
+      crosstab(
+             'SELECT year AS row_name, name AS bucket, count AS value
+                     FROM author_per_year_of_list(''med'', 12) AS (name text, year int, count int)',
+             'SELECT * FROM author_names_of_list(''med'', 12) AS (category text)'
+        ) As (year text, upl1 int, upl2 int, upl3 int, upl4 int, upl5 int, upl6 int, upl7 int, upl8 int, upl9 int, upl10 int, upl11 int, upl12 int)
+;
 
-CREATE OR REPLACE FUNCTION BuildQueryAuthors(text, int)
-    RETURNS text AS '
-    DECLARE
-       Project    ALIAS FOR \$1 ;
-       NumAuthors ALIAS FOR \$2 ;
-       ret        text ;
-
-    BEGIN
-
-    ret := BuildQueryCDDsHelper(
-               ''SELECT author AS feature, COUNT(*) AS num FROM listarchive
-                 WHERE project = '''''' || Project || '''''' AND author IN (
-      SELECT author FROM (SELECT author, count(*) as anz From listarchive where project = '''''' || Project || ''''''
-           GROUP BY author ORDER BY anz DESC LIMIT '' || NumAuthors || '') AS zw)
-   GROUP BY author ORDER BY num DESC;'',
-               ''project = '''''' || Project || '''''' AND author'') ;
-    return ret ;
-  END; ' LANGUAGE 'plpgsql';
-
-
-CREATE OR REPLACE FUNCTION CompareCDDs()
-    RETURNS SETOF RECORD AS '
-    DECLARE
-       ret        text ;
-       query      text ;
-       r          RECORD ;
-
-    BEGIN
-
-    SELECT INTO query BuildQueryCDDs() ;
-
-    FOR r IN EXECUTE query LOOP
-        RETURN NEXT r;
-    END LOOP;
-
-  END; ' LANGUAGE 'plpgsql';
-
-/*
- * You might call this as, but there is less chance to get column names right.
- *  SELECT * FROM CompareCDDs() AS
- *   ( yearmonth date, cdd1 int, cdd2 int, cdd3 int, cdd4 int, cdd5 int, cdd6 int, cdd7 int, cdd8 int );
- *
- * That's why we use the shell script wrappers ...
- */
-
-
-/*******************************************
- *
- * Same thing as above but for whole year
- *
- *******************************************/
-
-/*
- * Build a query string for several purposes
- *   ARG1: Query to obtain wanted columns
- *   ARG2: Feature that is queried
- * See below how this helper is used.
- */
-CREATE OR REPLACE FUNCTION BuildQueryCDDsYearHelper(text, text)
-    RETURNS text AS '
-    DECLARE
-       IterQuery  ALIAS FOR \$1 ;
-       Feature    ALIAS FOR \$2 ;
-       ret        text ;
-       union      text ;
-       query1     text ;
-       query2     text ;
-       r1         RECORD ;
-       r2         RECORD ;
-       ri         RECORD ;
-       qi         RECORD ;
-    BEGIN
-
-    query1 := IterQuery ;
-    query2 := query1;
-    union  := '''' ;
-    ret    := ''SELECT EXTRACT(''''year'''' FROM year) AS year'' ;
-
-    FOR r1 IN EXECUTE query1 LOOP
-    	ret := ret || '', CAST(SUM("'' || r1.feature || ''") AS int) AS "'' || replace(r1.feature,'' '',''_'') || ''"'' ;
-    END LOOP;
-
-    ret := ret || ''
-  FROM (
-'' ;
-
-    FOR r1 IN EXECUTE query1 LOOP
-       ret   := ret || union || ''    SELECT date_trunc(''''year'''', yearmonth)::date AS year'' ;
-       union := ''
-    UNION
-'';
-       FOR r2 IN EXECUTE query2 LOOP
-       	   IF r1.feature = r2.feature THEN
-	      ret := ret || '', COUNT(*)'' ;
-	   ELSE
-	      ret := ret || '', 0'' ;
-	   END IF;
-	   ret := ret || '' AS "'' || r2.feature || ''"'';
-       END LOOP ;
-       ret := ret || ''
-       FROM listarchive 
-       WHERE '' || Feature || '' = '''''' || replace(r1.feature, '''''''', '''''''''''') || '''''' GROUP BY year'';
-    END LOOP ;
-
-    ret := ret || ''
-  ) zw
-  GROUP BY year
-  ORDER BY year;'' ;
-
-    RETURN ret;
-  END; ' LANGUAGE 'plpgsql';
-
-CREATE OR REPLACE FUNCTION BuildQueryCDDsYear()
-    RETURNS text AS '
-    DECLARE
-       ret        text ;
-
-    BEGIN
-
-    ret := BuildQueryCDDsYearHelper(
-               ''SELECT project AS feature, COUNT(*) AS num FROM listarchive GROUP BY project ORDER BY num DESC;'',
-               ''project'') ;
-    return ret ;
-  END; ' LANGUAGE 'plpgsql';
-
-/*
- * This query returns stats about the ARG2 most active authors in a specific
- * Mailing list (ARG1)
- */
-
-CREATE OR REPLACE FUNCTION BuildQueryAuthorsYear(text, int)
-    RETURNS text AS '
-    DECLARE
-       Project    ALIAS FOR \$1 ;
-       NumAuthors ALIAS FOR \$2 ;
-       ret        text ;
-
-    BEGIN
-
-    ret := BuildQueryCDDsYearHelper(
-               ''SELECT author AS feature, COUNT(*) AS num FROM listarchive
-                 WHERE project = '''''' || Project || '''''' AND author IN (
-      SELECT author FROM (SELECT author, count(*) as anz From listarchive where project = '''''' || Project || ''''''
-           GROUP BY author ORDER BY anz DESC LIMIT '' || NumAuthors || '') AS zw)
-   GROUP BY author ORDER BY num DESC;'',
-               ''project = '''''' || Project || '''''' AND author'') ;
-    return ret ;
-  END; ' LANGUAGE 'plpgsql';
+*/
 
 COMMIT;
 EOT

Modified: blends/trunk/team_analysis_tools/author_stats
URL: http://svn.debian.org/wsvn/blends/blends/trunk/team_analysis_tools/author_stats?rev=2904&op=diff
==============================================================================
--- blends/trunk/team_analysis_tools/author_stats (original)
+++ blends/trunk/team_analysis_tools/author_stats Wed Jul 20 20:25:14 2011
@@ -16,19 +16,26 @@
 
 NAME=authorstat_"$1"
 DATFILE="${NAME}_year.txt"
-QUERYFILE=`mktemp`
-psql -t listarchives -c "SELECT BuildQueryAuthorsYear('$1', $NUM) ;" | sed '1s/\( CAST(SUM([^)]\+) AS int) AS "[^"]\+[ _]\w\)[^ ^_]\+"/\1."/g' > $QUERYFILE
-cat $QUERYFILE | \
-   psql listarchives \
-   >"$DATFILE"
-rm $QUERYFILE
+#QUERYFILE=`mktemp`
+# psql -t listarchives -c "SELECT BuildQueryAuthorsYear('$1', $NUM) ;" | sed '1s/\( CAST(SUM([^)]\+) AS int) AS "[^"]\+[ _]\w\)[^ ^_]\+"/\1."/g' > $QUERYFILE
+#cat $QUERYFILE | \
+#   psql listarchives \
+#   >"$DATFILE"
+#rm $QUERYFILE
+#
+#sed -i -e '/^[-+]\+$/d' -e '/^([0-9]\+ [A-Za-z]\+)$/d' \
+#       -e 's/[[:space:]]*|[[:space:]]*/\t/g'           \
+#       "$DATFILE"
 
-sed -i -e '/^[-+]\+$/d' -e '/^([0-9]\+ [A-Za-z]\+)$/d' \
-       -e 's/[[:space:]]*|[[:space:]]*/\t/g'           \
-       "$DATFILE"
+./author_stats_helper.py $1 $NUM > $DATFILE
 
 ##       -e 's/&#xAE;//g'  -e 's/&#xE9;/e/' \
 ##       -e 's/&#xE1;/Ã¡/g' -e 's/&#xF1;/Ã±/' \
 ##       -e 's/&#xE8;/Ã¨/g' -e 's/&#xF6;/Ã¶/g' \
 
-./author_stats_create_graph $DATFILE $NUM "List activities for $1 list" "$3" "$4"
+if [ "$3" = "pdf" ] ; then
+    # PDFs are usually used in slides where we add the headline in LaTeX
+    ./author_stats_create_graph $DATFILE $NUM "" "$3" "$4"
+else
+    ./author_stats_create_graph $DATFILE $NUM "List activities for $1 list" "$3" "$4"
+fi

Added: blends/trunk/team_analysis_tools/author_stats_helper.py
URL: http://svn.debian.org/wsvn/blends/blends/trunk/team_analysis_tools/author_stats_helper.py?rev=2904&op=file
==============================================================================
--- blends/trunk/team_analysis_tools/author_stats_helper.py (added)
+++ blends/trunk/team_analysis_tools/author_stats_helper.py Wed Jul 20 20:25:14 2011
@@ -1,0 +1,102 @@
+#!/usr/bin/python
+# Copyright 2011: Andreas Tille <tille at debian.org>
+# License: GPL
+# Takes a project mailing list name and a number of top authors
+# Creates a text file with the statistics table
+
+NMAX=30
+
+from sys import argv, stderr, exit
+
+if len(argv) != 3 :
+    print >>stderr, "Usage: %s <project> <no authors>" % (argv[0])
+    exit(-1)
+
+project = argv[1]
+
+PORT=5441
+DEFAULTPORT=5432
+DB='listarchives'
+
+import psycopg2
+import re
+
+try:
+    conn = psycopg2.connect(database=DB)
+except psycopg2.OperationalError:
+    try:
+        conn = psycopg2.connect(host="localhost",port=DEFAULTPORT,user="guest",database=DB)
+    except psycopg2.OperationalError:
+	conn = psycopg2.connect(host="127.0.0.1",port=DEFAULTPORT,user="guest",database=DB)
+
+curs = conn.cursor()
+
+def RowDictionaries(cursor):
+    """Return a list of dictionaries which specify the values by their column names"""
+
+    description = cursor.description
+    if not description:
+        # even if there are no data sets to return the description should contain the table structure.  If not something went
+        # wrong and we return NULL as to represent a problem
+        return NULL
+    if cursor.rowcount <= 0:
+        # if there are no rows in the cursor we return an empty list
+        return []
+
+    data = cursor.fetchall()
+    result = []
+
+    for row in data:
+        resultrow = {}
+        i = 0
+        for dd in description:
+            resultrow[dd[0]] = row[i]
+            i += 1
+        result.append(resultrow)
+    return result
+
+query = "SELECT replace(author,' ','_') AS author FROM author_names_of_list('%s', %i) AS (author text);" % (project, NMAX)
+# print query
+curs.execute(query)
+
+print ' year',
+nuploaders = 0
+for row in curs.fetchall():
+    print '\t' + re.sub('^(.*_\w)[^_]*$', '\\1', row[0]),
+    nuploaders += 1
+print ''
+
+typestring = 'year text'
+for i in range(nuploaders):
+    typestring = typestring + ', upl' + str(i+1) + ' int'
+query = """SELECT *
+	FROM 
+	crosstab(
+             'SELECT year AS row_name, name AS bucket, count AS value
+                     FROM author_per_year_of_list(''%s'', %i) AS (name text, year int, count int)',
+             'SELECT * FROM author_names_of_list(''%s'', %i) AS (category text)'
+
+        ) As (%s)
+""" % (project, nuploaders, project, nuploaders, typestring)
+
+try:
+    # print query
+    curs.execute(query)
+except psycopg2.ProgrammingError, err:
+    if crosstab_missing_re.match(str(err)):
+        print >>stderr, """Please do
+	psql udd < /usr/share/postgresql/<pgversion>/contrib/tablefunc.sql
+before calling this program."""
+    else:
+        print >>stderr, "To few authors in %s list.\n%s" % (project, err)
+    exit(-1)
+for row in curs.fetchall():
+    print ' ' + row[0] ,
+    for v in row[1:]:
+        if v:
+            print '\t' + str(v),
+        else:
+            print '\t0',
+    print ''
+
+

Propchange: blends/trunk/team_analysis_tools/author_stats_helper.py
------------------------------------------------------------------------------
    svn:executable = *

Modified: blends/trunk/team_analysis_tools/replace_names.sql
URL: http://svn.debian.org/wsvn/blends/blends/trunk/team_analysis_tools/replace_names.sql?rev=2904&op=diff
==============================================================================
--- blends/trunk/team_analysis_tools/replace_names.sql (original)
+++ blends/trunk/team_analysis_tools/replace_names.sql Wed Jul 20 20:25:14 2011
@@ -72,7 +72,7 @@
 UPDATE listarchive SET author = 'David Bremner'          WHERE author = 'bremner-guest' ;
 UPDATE listarchive SET author = 'Christian Kastner'      WHERE author = 'chrisk' ;
 UPDATE listarchive SET author = 'Christopher Walker'     WHERE author = 'cjw1006-guest' ;
-UPDATE listarchive SET author = 'Michael Hanke'          WHERE author = 'mhanke-guest' ;
+-- see below in <new></new>
 UPDATE listarchive SET author = 'Alastair McKinstry'     WHERE author = 'mckinstry' ;
 UPDATE listarchive SET author = 'Otavio Salvador'        WHERE author = 'otavio' ;
 UPDATE listarchive SET author = 'Frederic Lehobey'       WHERE author = 'fdl-guest' or author = 'Frederic Daniel Luc Lehobey' ;
@@ -80,6 +80,11 @@
 UPDATE listarchive SET author = 'Hans-Christoph Steiner' WHERE author = 'eighthave-guest' ;
 UPDATE listarchive SET author = 'Karol Langner'          WHERE author = 'klm-guest' ;
 UPDATE listarchive SET author = 'Georges Khaznadar'      WHERE author = 'georgesk' ;
+
+-- <new>
+UPDATE listarchive SET author = 'Joost van Baal'         WHERE author like 'Joost van Baal%' OR author = 'joostvb' ;
+UPDATE listarchive SET author = 'Michael Hanke'          WHERE author = 'mhanke-guest' OR author = 'mih' ;
+-- </new>
 
 -- stupid spammers at project = 'pkg-grass-devel'.
 DELETE FROM listarchive WHERE author = 'info' AND project = 'pkg-grass-devel' ;

Modified: blends/trunk/team_analysis_tools/upload_history.py
URL: http://svn.debian.org/wsvn/blends/blends/trunk/team_analysis_tools/upload_history.py?rev=2904&op=diff
==============================================================================
--- blends/trunk/team_analysis_tools/upload_history.py (original)
+++ blends/trunk/team_analysis_tools/upload_history.py Wed Jul 20 20:25:14 2011
@@ -32,10 +32,10 @@
 PORT=5441
 DEFAULTPORT=5432
 
-from sys import stderr, exit
+from sys import argv, stderr, exit
 from os import system
 import psycopg2
-from re import sub
+import re
 
 try:
     conn = psycopg2.connect(host="localhost",port=PORT,user="guest",database="udd")
@@ -47,7 +47,7 @@
 
 curs = conn.cursor()
 
-
+crosstab_missing_re  = re.compile(".* crosstab.*")
 
 def RowDictionaries(cursor):
     """Return a list of dictionaries which specify the values by their column names"""
@@ -83,7 +83,7 @@
     print >>out, ' year',
     nuploaders = 0
     for row in curs.fetchall():
-        print >>out, '\t' + sub('^(.*_\w)[^_]*$', '\\1', row[0]),
+        print >>out, '\t' + re.sub('^(.*_\w)[^_]*$', '\\1', row[0]),
         nuploaders += 1
     print >>out, ''
 
@@ -100,10 +100,16 @@
 """ % (teams[team], nuploaders, teams[team], nuploaders, typestring)
 
     try:
+	# print query
         curs.execute(query)
     except psycopg2.ProgrammingError, err:
-        print >>stderr, "To few uploaders in %s team.\n%s" % (team, err)
-	exit
+	if crosstab_missing_re.match(str(err)):
+	    print >>stderr, """Please do
+	psql udd < /usr/share/postgresql/<pgversion>/contrib/tablefunc.sql
+before calling this program."""
+	else:
+            print >>stderr, "To few uploaders in %s team.\n%s" % (team, err)
+	exit(-1)
     for row in curs.fetchall():
         print >>out, ' ' + row[0] ,
         for v in row[1:]:
@@ -113,5 +119,13 @@
                 print >>out, '\t0',
         print >>out, ''
     out.close()
-    system('./author_stats_create_graph ' + datafile + ' ' + str(min(nuploaders, MAXUPLOADERS)) + ' "Uploaders of ' + team + ' team"')
+    cmdstring='./author_stats_create_graph ' + datafile + ' ' + str(min(nuploaders, MAXUPLOADERS))
+    if len(argv) > 1 :
+	if argv[1].startswith('pdf'):
+	    cmdstring = cmdstring + ' "" pdf'
+	if len(argv) > 2:
+	    cmdstring = cmdstring + ' ' + argv[2]
+    else:
+	cmdstring = cmdstring + ' "Uploaders of ' + team + ' team"'
+    system(cmdstring)