[med-svn] r2352 - trunk/community/talks/200808_debconf8
tille at alioth.debian.org
tille at alioth.debian.org
Mon Jul 28 12:35:25 UTC 2008
Author: tille
Date: 2008-07-28 12:35:23 +0000 (Mon, 28 Jul 2008)
New Revision: 2352
Modified:
trunk/community/talks/200808_debconf8/archives.sql
trunk/community/talks/200808_debconf8/get-archive-pages
trunk/community/talks/200808_debconf8/liststat
Log:
Use some SQL functions to simplify queries about list statistics
Modified: trunk/community/talks/200808_debconf8/archives.sql
===================================================================
--- trunk/community/talks/200808_debconf8/archives.sql 2008-07-28 12:31:31 UTC (rev 2351)
+++ trunk/community/talks/200808_debconf8/archives.sql 2008-07-28 12:35:23 UTC (rev 2352)
@@ -15,5 +15,135 @@
ts date
);
+CREATE LANGUAGE plpgsql ;
+
+/*
+ * Build a query string for several purposes
+ * ARG1: Query to obtain wanted columns
+ * ARG2: Feature that is queried
+ * See below how this helper is used.
+ */
+CREATE OR REPLACE FUNCTION BuildQueryCDDsHelper(text, text)
+ RETURNS text AS '
+ DECLARE
+ IterQuery ALIAS FOR \$1 ;
+ Feature ALIAS FOR \$2 ;
+ ret text ;
+ union text ;
+ query1 text ;
+ query2 text ;
+ r1 RECORD ;
+ r2 RECORD ;
+ ri RECORD ;
+ qi RECORD ;
+ BEGIN
+
+ query1 := IterQuery ;
+ query2 := query1;
+ union := '''' ;
+ ret := ''SELECT yearmonth'' ;
+
+ FOR r1 IN EXECUTE query1 LOOP
+ ret := ret || '', CAST(SUM("'' || r1.feature || ''") AS int) AS "'' || r1.feature || ''"'' ;
+ END LOOP;
+
+ ret := ret || ''
+ FROM (
+'' ;
+
+ FOR r1 IN EXECUTE query1 LOOP
+ ret := ret || union || '' SELECT yearmonth'' ;
+ union := ''
+ UNION
+'';
+ FOR r2 IN EXECUTE query2 LOOP
+ IF r1.feature = r2.feature THEN
+ ret := ret || '', COUNT(*)'' ;
+ ELSE
+ ret := ret || '', 0'' ;
+ END IF;
+ ret := ret || '' AS "'' || r2.feature || ''"'';
+ END LOOP ;
+ ret := ret || ''
+ FROM listarchive
+ WHERE '' || Feature || '' = '''''' || r1.feature || '''''' GROUP BY yearmonth'';
+ END LOOP ;
+
+ ret := ret || ''
+ ) zw
+ GROUP BY yearmonth
+ ORDER BY yearmonth;'' ;
+
+ RETURN ret;
+ END; ' LANGUAGE 'plpgsql';
+
+/*
+ * This query returns stats about all mailing lists
+ */
+
+CREATE OR REPLACE FUNCTION BuildQueryCDDs()
+ RETURNS text AS '
+ DECLARE
+ ret text ;
+
+ BEGIN
+
+ ret := BuildQueryCDDsHelper(
+ ''SELECT project AS feature, COUNT(*) AS num FROM listarchive GROUP BY project ORDER BY num DESC;'',
+ ''project'') ;
+ return ret ;
+ END; ' LANGUAGE 'plpgsql';
+
+/*
+ * This query returns stats about the ARG2 most active authors in a specific
+ * Mailing list (ARG1)
+ */
+
+CREATE OR REPLACE FUNCTION BuildQueryAuthors(text, int)
+ RETURNS text AS '
+ DECLARE
+ Project ALIAS FOR \$1 ;
+ NumAuthors ALIAS FOR \$2 ;
+ ret text ;
+
+ BEGIN
+
+ ret := BuildQueryCDDsHelper(
+ ''SELECT author AS feature, COUNT(*) AS num FROM listarchive
+ WHERE project = '''''' || Project || '''''' AND author IN (
+ SELECT author FROM (SELECT author, count(*) as anz From listarchive where project = '''''' || Project || ''''''
+ GROUP BY author ORDER BY anz DESC LIMIT '' || NumAuthors || '') AS zw)
+ GROUP BY author ORDER BY num DESC;'',
+ ''author'') ;
+ return ret ;
+ END; ' LANGUAGE 'plpgsql';
+
+
+CREATE OR REPLACE FUNCTION CompareCDDs()
+ RETURNS SETOF RECORD AS '
+ DECLARE
+ ret text ;
+ query text ;
+ r RECORD ;
+
+ BEGIN
+
+ SELECT INTO query BuildQueryCDDs() ;
+
+ FOR r IN EXECUTE query LOOP
+ RETURN NEXT r;
+ END LOOP;
+
+ END; ' LANGUAGE 'plpgsql';
+
+/*
+ * You might call this as, but there is less chance to get column names right.
+ * SELECT * FROM CompareCDDs() AS
+ * ( yearmonth date, cdd1 int, cdd2 int, cdd3 int, cdd4 int, cdd5 int, cdd6 int, cdd7 int, cdd8 int );
+ *
+ * That's why we use the shell script wrappers ...
+ */
+
COMMIT;
EOT
+
Modified: trunk/community/talks/200808_debconf8/get-archive-pages
===================================================================
--- trunk/community/talks/200808_debconf8/get-archive-pages 2008-07-28 12:31:31 UTC (rev 2351)
+++ trunk/community/talks/200808_debconf8/get-archive-pages 2008-07-28 12:35:23 UTC (rev 2352)
@@ -102,11 +102,11 @@
$_ =~ s/^\[[^\]]+\]\s*([^\s]+)/$1/ ; # Remove other list markers (but only if something is following)
$_ =~ s/\s*\(fwd\)\s*//i ; # Remove (fwd)
$subject = $_ ;
- if ( $subject =~ /^[&#x\d;\sA-F\?]+$/ ) {
+ if ( $subject =~ /^[-&#x\d;\sA-F\?:,]+$/ ) {
print "Potential SPAM line - strange subject: $project $year-$month: $subject\n";
$spamlines++ ;
} else {
- if ( $author =~ /^[&#x\d;\sA-F\?]+$/ ||
+ if ( $author =~ /^[-&#x\d;\sA-F\?:,]+$/ ||
$author =~ /info/i ) { # never had a non-spam message from an author whos name contains info
print "Potential SPAM line - strange author: $project $year-$month: $author\n";
$spamlines++ ;
Modified: trunk/community/talks/200808_debconf8/liststat
===================================================================
--- trunk/community/talks/200808_debconf8/liststat 2008-07-28 12:31:31 UTC (rev 2351)
+++ trunk/community/talks/200808_debconf8/liststat 2008-07-28 12:35:23 UTC (rev 2352)
@@ -1,55 +1,8 @@
-#!/bin/sh
+#!/bin/sh -x
DATFILE="liststat.dat"
-psql cddlistarchives >"$DATFILE" <<EOT
+psql -t cddlistarchives -c 'SELECT BuildQueryCDDs() ;' | \
+ psql cddlistarchives \
+ >"$DATFILE"
-SELECT EXTRACT (year FROM yearmonth) || '-' ||
- CASE WHEN EXTRACT (month FROM yearmonth) < 10 THEN '0' ELSE '' END || EXTRACT (month FROM yearmonth) AS "YearMonth",
- SUM(med) AS med, SUM(edu) AS edu, SUM(jr) AS jr, SUM(nonprofit) AS nonprofit, SUM(lex) AS lex,
- SUM(enterprise) AS enterprise, SUM(accessibility) AS accessibility, SUM(desktop) AS desktop
- FROM (
- SELECT yearmonth, COUNT(*) AS med, 0 AS edu, 0 AS jr, 0 AS nonprofit, 0 AS lex,
- 0 AS enterprise, 0 AS accessibility, 0 AS desktop
- FROM listarchive
- WHERE project = 'med' GROUP BY yearmonth
- UNION
- SELECT yearmonth, 0 AS med, COUNT(*) AS edu, 0 AS jr, 0 AS nonprofit, 0 AS lex,
- 0 AS enterprise, 0 AS accessibility, 0 AS desktop
- FROM listarchive
- WHERE project = 'edu' GROUP BY yearmonth
- UNION
- SELECT yearmonth, 0 AS med, 0 AS edu, COUNT(*) AS jr, 0 AS nonprofit, 0 AS lex,
- 0 AS enterprise, 0 AS accessibility, 0 AS desktop
- FROM listarchive
- WHERE project = 'jr' GROUP BY yearmonth
- UNION
- SELECT yearmonth, 0 AS med, 0 AS edu, 0 AS jr, COUNT(*) AS nonprofit, 0 AS lex,
- 0 AS enterprise, 0 AS accessibility, 0 AS desktop
- FROM listarchive
- WHERE project = 'nonprofit' GROUP BY yearmonth
- UNION
- SELECT yearmonth, 0 AS med, 0 AS edu, 0 AS jr, 0 AS nonprofit, COUNT(*) AS lex,
- 0 AS enterprise, 0 AS accessibility, 0 AS desktop
- FROM listarchive
- WHERE project = 'lex' GROUP BY yearmonth
- UNION
- SELECT yearmonth, 0 AS med, 0 AS edu, 0 AS jr, 0 AS nonprofit, 0 AS lex,
- COUNT(*) AS enterprise, 0 AS accessibility, 0 AS desktop
- FROM listarchive
- WHERE project = 'jr' GROUP BY yearmonth
- UNION
- SELECT yearmonth, 0 AS med, 0 AS edu, 0 AS jr, 0 AS nonprofit, 0 AS lex,
- 0 AS enterprise, COUNT(*) AS accessibility, 0 AS desktop
- FROM listarchive
- WHERE project = 'nonprofit' GROUP BY yearmonth
- UNION
- SELECT yearmonth, 0 AS med, 0 AS edu, 0 AS jr, 0 AS nonprofit, 0 AS lex,
- 0 AS enterprise, 0 AS accessibility, COUNT(*) AS desktop
- FROM listarchive
- WHERE project = 'lex' GROUP BY yearmonth
- ) zw
- GROUP BY yearmonth
- ORDER BY yearmonth;
-
-EOT
-
sed -i -e '/^[-+]\+$/d' -e '/^([0-9]\+ [A-Za-z]\+)$/d' -e 's/[[:space:]]*|[[:space:]]*/\t/g' "$DATFILE"
+
More information about the debian-med-commit
mailing list