[med-svn] r2354 - trunk/community/talks/200808_debconf8
tille at alioth.debian.org
tille at alioth.debian.org
Mon Jul 28 13:21:47 UTC 2008
Author: tille
Date: 2008-07-28 13:21:45 +0000 (Mon, 28 Jul 2008)
New Revision: 2354
Removed:
trunk/community/talks/200808_debconf8/liststat
Modified:
trunk/community/talks/200808_debconf8/archives.sql
trunk/community/talks/200808_debconf8/get-archive-pages
Log:
Deactivate writing data from mailing list archive into separate files because using the database is prefered
Modified: trunk/community/talks/200808_debconf8/archives.sql
===================================================================
--- trunk/community/talks/200808_debconf8/archives.sql 2008-07-28 13:12:12 UTC (rev 2353)
+++ trunk/community/talks/200808_debconf8/archives.sql 2008-07-28 13:21:45 UTC (rev 2354)
@@ -144,6 +144,110 @@
* That's why we use the shell script wrappers ...
*/
+
+/*******************************************
+ *
+ * Same thing as above but for whole year
+ *
+ *******************************************/
+
+/*
+ * Build a query string for several purposes
+ * ARG1: Query to obtain wanted columns
+ * ARG2: Feature that is queried
+ * See below how this helper is used.
+ */
+CREATE OR REPLACE FUNCTION BuildQueryCDDsYearHelper(text, text)
+ RETURNS text AS '
+ DECLARE
+ IterQuery ALIAS FOR \$1 ;
+ Feature ALIAS FOR \$2 ;
+ ret text ;
+ union text ;
+ query1 text ;
+ query2 text ;
+ r1 RECORD ;
+ r2 RECORD ;
+ ri RECORD ;
+ qi RECORD ;
+ BEGIN
+
+ query1 := IterQuery ;
+ query2 := query1;
+ union := '''' ;
+ ret := ''SELECT EXTRACT(''''year'''' FROM year) AS year'' ;
+
+ FOR r1 IN EXECUTE query1 LOOP
+ ret := ret || '', CAST(SUM("'' || r1.feature || ''") AS int) AS "'' || r1.feature || ''"'' ;
+ END LOOP;
+
+ ret := ret || ''
+ FROM (
+'' ;
+
+ FOR r1 IN EXECUTE query1 LOOP
+ ret := ret || union || '' SELECT date_trunc(''''year'''', yearmonth)::date AS year'' ;
+ union := ''
+ UNION
+'';
+ FOR r2 IN EXECUTE query2 LOOP
+ IF r1.feature = r2.feature THEN
+ ret := ret || '', COUNT(*)'' ;
+ ELSE
+ ret := ret || '', 0'' ;
+ END IF;
+ ret := ret || '' AS "'' || r2.feature || ''"'';
+ END LOOP ;
+ ret := ret || ''
+ FROM listarchive
+ WHERE '' || Feature || '' = '''''' || r1.feature || '''''' GROUP BY year'';
+ END LOOP ;
+
+ ret := ret || ''
+ ) zw
+ GROUP BY year
+ ORDER BY year;'' ;
+
+ RETURN ret;
+ END; ' LANGUAGE 'plpgsql';
+
+CREATE OR REPLACE FUNCTION BuildQueryCDDsYear()
+ RETURNS text AS '
+ DECLARE
+ ret text ;
+
+ BEGIN
+
+ ret := BuildQueryCDDsYearHelper(
+ ''SELECT project AS feature, COUNT(*) AS num FROM listarchive GROUP BY project ORDER BY num DESC;'',
+ ''project'') ;
+ return ret ;
+ END; ' LANGUAGE 'plpgsql';
+
+/*
+ * This query returns stats about the ARG2 most active authors in a specific
+ * Mailing list (ARG1)
+ */
+
+CREATE OR REPLACE FUNCTION BuildQueryAuthorsYear(text, int)
+ RETURNS text AS '
+ DECLARE
+ Project ALIAS FOR \$1 ;
+ NumAuthors ALIAS FOR \$2 ;
+ ret text ;
+
+ BEGIN
+
+ ret := BuildQueryCDDsHelper(
+ ''SELECT author AS feature, COUNT(*) AS num FROM listarchive
+ WHERE project = '''''' || Project || '''''' AND author IN (
+ SELECT author FROM (SELECT author, count(*) as anz From listarchive where project = '''''' || Project || ''''''
+ GROUP BY author ORDER BY anz DESC LIMIT '' || NumAuthors || '') AS zw)
+ GROUP BY author ORDER BY num DESC;'',
+ ''author'') ;
+ return ret ;
+ END; ' LANGUAGE 'plpgsql';
+
COMMIT;
EOT
Modified: trunk/community/talks/200808_debconf8/get-archive-pages
===================================================================
--- trunk/community/talks/200808_debconf8/get-archive-pages 2008-07-28 13:12:12 UTC (rev 2353)
+++ trunk/community/talks/200808_debconf8/get-archive-pages 2008-07-28 13:21:45 UTC (rev 2354)
@@ -14,6 +14,10 @@
'Skolelinux archive Installer', 'Debian Wiki');
my @SPAMAUTHORS = ('Pls check this new site');
+# if != 0 then extract of mailing list archives is stored in files in dirs
+# The prefered method is to use only the database
+my $storefiles = 0;
+
# Debian-Jr starts in 2000
my $YEARSTART = 2000;
@@ -42,8 +46,10 @@
$daten->execute() ;
$daten->finish() ;
- mkdir($project,0777);
- chdir($project);
+ if ( $storefiles ) {
+ mkdir($project,0777);
+ chdir($project);
+ }
my $URL="${BASEURL}-${project}";
my $year;
my $month;
@@ -54,7 +60,9 @@
}
my $url = "${URL}/${year}/${month}/";
my $datafile = "${year}-${month}" ;
- unless ( open(HTMLSNIP, ">$datafile") ) { die("Unable to open $datafile"); }
+ if ( $storefiles ) {
+ unless ( open(HTMLSNIP, ">$datafile") ) { die("Unable to open $datafile"); }
+ }
my $messagelines = 0;
my $spamlines = 0;
my $robotlines = 0;
@@ -64,7 +72,7 @@
my $indexpage = $ua->get($url, Host => $uri->host );
unless ( $indexpage->is_success ) { # some mailing lists startet later ...
$url = '';
- close HTMLSNIP ;
+ if ( $storefiles ) { close HTMLSNIP ; }
# remove empty file
unlink($datafile);
next;
@@ -125,7 +133,9 @@
}
}
if ( $robotflag == 0 ) {
- print HTMLSNIP "$subject ; $author\n";
+ if ( $storefiles ) {
+ print HTMLSNIP "$subject ; $author\n";
+ }
$datain->execute($project, "$year-$month-01", $author, $subject,
"${URL}/${year}/${month}/$msgurl") ;
$messagelines++ ;
@@ -142,7 +152,9 @@
} else {
$url = '';
}
- print HTMLSNIP "$messages Messages ($messagelines real messages, $spamlines SPAM, $robotlines messages by robots)\n";
+ if ( $storefiles ) {
+ print HTMLSNIP "$messages Messages ($messagelines real messages, $spamlines SPAM, $robotlines messages by robots)\n";
+ }
if ( $messages != $messagelines + $spamlines + $robotlines ) {
print "Warning: $project $year/$month counted $messagelines Messages, $spamlines SPAM and $robotlines robots but page says $messages\n";
}
@@ -163,10 +175,10 @@
}
}
}
- close HTMLSNIP ;
+ if ( $storefiles ) { close HTMLSNIP ; }
}
}
- chdir($cdw);
+ if ( $storefiles ) { chdir($cdw); }
}
$datain->finish;
Deleted: trunk/community/talks/200808_debconf8/liststat
===================================================================
--- trunk/community/talks/200808_debconf8/liststat 2008-07-28 13:12:12 UTC (rev 2353)
+++ trunk/community/talks/200808_debconf8/liststat 2008-07-28 13:21:45 UTC (rev 2354)
@@ -1,8 +0,0 @@
-#!/bin/sh -x
-DATFILE="liststat.dat"
-psql -t cddlistarchives -c 'SELECT BuildQueryCDDs() ;' | \
- psql cddlistarchives \
- >"$DATFILE"
-
-sed -i -e '/^[-+]\+$/d' -e '/^([0-9]\+ [A-Za-z]\+)$/d' -e 's/[[:space:]]*|[[:space:]]*/\t/g' "$DATFILE"
-
More information about the debian-med-commit
mailing list