[med-svn] r2354 - trunk/community/talks/200808_debconf8

tille at alioth.debian.org tille at alioth.debian.org
Mon Jul 28 13:21:47 UTC 2008


Author: tille
Date: 2008-07-28 13:21:45 +0000 (Mon, 28 Jul 2008)
New Revision: 2354

Removed:
   trunk/community/talks/200808_debconf8/liststat
Modified:
   trunk/community/talks/200808_debconf8/archives.sql
   trunk/community/talks/200808_debconf8/get-archive-pages
Log:
Deactivate writing data from mailing list archive into separate files because using the database is prefered


Modified: trunk/community/talks/200808_debconf8/archives.sql
===================================================================
--- trunk/community/talks/200808_debconf8/archives.sql	2008-07-28 13:12:12 UTC (rev 2353)
+++ trunk/community/talks/200808_debconf8/archives.sql	2008-07-28 13:21:45 UTC (rev 2354)
@@ -144,6 +144,110 @@
  * That's why we use the shell script wrappers ...
  */
 
+
+/*******************************************
+ *
+ * Same thing as above but for whole year
+ *
+ *******************************************/
+
+/*
+ * Build a query string for several purposes
+ *   ARG1: Query to obtain wanted columns
+ *   ARG2: Feature that is queried
+ * See below how this helper is used.
+ */
+CREATE OR REPLACE FUNCTION BuildQueryCDDsYearHelper(text, text)
+    RETURNS text AS '
+    DECLARE
+       IterQuery  ALIAS FOR \$1 ;
+       Feature    ALIAS FOR \$2 ;
+       ret        text ;
+       union      text ;
+       query1     text ;
+       query2     text ;
+       r1         RECORD ;
+       r2         RECORD ;
+       ri         RECORD ;
+       qi         RECORD ;
+    BEGIN
+
+    query1 := IterQuery ;
+    query2 := query1;
+    union  := '''' ;
+    ret    := ''SELECT EXTRACT(''''year'''' FROM year) AS year'' ;
+
+    FOR r1 IN EXECUTE query1 LOOP
+    	ret := ret || '', CAST(SUM("'' || r1.feature || ''") AS int) AS "'' || r1.feature || ''"'' ;
+    END LOOP;
+
+    ret := ret || ''
+  FROM (
+'' ;
+
+    FOR r1 IN EXECUTE query1 LOOP
+       ret   := ret || union || ''    SELECT date_trunc(''''year'''', yearmonth)::date AS year'' ;
+       union := ''
+    UNION
+'';
+       FOR r2 IN EXECUTE query2 LOOP
+       	   IF r1.feature = r2.feature THEN
+	      ret := ret || '', COUNT(*)'' ;
+	   ELSE
+	      ret := ret || '', 0'' ;
+	   END IF;
+	   ret := ret || '' AS "'' || r2.feature || ''"'';
+       END LOOP ;
+       ret := ret || ''
+       FROM listarchive 
+       WHERE '' || Feature || '' = '''''' || r1.feature || '''''' GROUP BY year'';
+    END LOOP ;
+
+    ret := ret || ''
+  ) zw
+  GROUP BY year
+  ORDER BY year;'' ;
+
+    RETURN ret;
+  END; ' LANGUAGE 'plpgsql';
+
+CREATE OR REPLACE FUNCTION BuildQueryCDDsYear()
+    RETURNS text AS '
+    DECLARE
+       ret        text ;
+
+    BEGIN
+
+    ret := BuildQueryCDDsYearHelper(
+               ''SELECT project AS feature, COUNT(*) AS num FROM listarchive GROUP BY project ORDER BY num DESC;'',
+               ''project'') ;
+    return ret ;
+  END; ' LANGUAGE 'plpgsql';
+
+/*
+ * This query returns stats about the ARG2 most active authors in a specific
+ * Mailing list (ARG1)
+ */
+
+CREATE OR REPLACE FUNCTION BuildQueryAuthorsYear(text, int)
+    RETURNS text AS '
+    DECLARE
+       Project    ALIAS FOR \$1 ;
+       NumAuthors ALIAS FOR \$2 ;
+       ret        text ;
+
+    BEGIN
+
+    ret := BuildQueryCDDsHelper(
+               ''SELECT author AS feature, COUNT(*) AS num FROM listarchive
+                 WHERE project = '''''' || Project || '''''' AND author IN (
+      SELECT author FROM (SELECT author, count(*) as anz From listarchive where project = '''''' || Project || ''''''
+           GROUP BY author ORDER BY anz DESC LIMIT '' || NumAuthors || '') AS zw)
+   GROUP BY author ORDER BY num DESC;'',
+               ''author'') ;
+    return ret ;
+  END; ' LANGUAGE 'plpgsql';
+
 COMMIT;
 EOT
 

Modified: trunk/community/talks/200808_debconf8/get-archive-pages
===================================================================
--- trunk/community/talks/200808_debconf8/get-archive-pages	2008-07-28 13:12:12 UTC (rev 2353)
+++ trunk/community/talks/200808_debconf8/get-archive-pages	2008-07-28 13:21:45 UTC (rev 2354)
@@ -14,6 +14,10 @@
                 'Skolelinux archive Installer', 'Debian Wiki');
 my @SPAMAUTHORS = ('Pls check this new site');
 
+# if != 0 then extract of mailing list archives is stored in files in dirs
+# The prefered method is to use only the database
+my $storefiles = 0;
+
 # Debian-Jr starts in 2000
 my $YEARSTART = 2000;
 
@@ -42,8 +46,10 @@
     $daten->execute() ;
     $daten->finish() ;
 
-    mkdir($project,0777);
-    chdir($project);
+    if ( $storefiles ) {
+	mkdir($project,0777);
+	chdir($project);
+    }
     my $URL="${BASEURL}-${project}";
     my $year;
     my $month;
@@ -54,7 +60,9 @@
 	    }
 	    my $url = "${URL}/${year}/${month}/";
 	    my $datafile = "${year}-${month}" ;
-	    unless ( open(HTMLSNIP, ">$datafile") ) { die("Unable to open $datafile"); }
+	    if ( $storefiles ) {
+		unless ( open(HTMLSNIP, ">$datafile") ) { die("Unable to open $datafile"); }
+	    }
 	    my $messagelines = 0;
 	    my $spamlines    = 0;
 	    my $robotlines   = 0;
@@ -64,7 +72,7 @@
 		my $indexpage = $ua->get($url, Host => $uri->host );
 		unless ( $indexpage->is_success ) { # some mailing lists startet later ...
 		    $url = '';
-		    close HTMLSNIP ;
+		    if ( $storefiles ) { close HTMLSNIP ; }
 		    # remove empty file
 		    unlink($datafile);
 		    next;
@@ -125,7 +133,9 @@
 					}
 				    }
 				    if ( $robotflag == 0 ) {
-					print HTMLSNIP "$subject ; $author\n";
+					if ( $storefiles ) {
+					    print HTMLSNIP "$subject ; $author\n";
+					}
 					$datain->execute($project, "$year-$month-01", $author, $subject,
 							 "${URL}/${year}/${month}/$msgurl") ;
 					$messagelines++ ;
@@ -142,7 +152,9 @@
 				} else {
 				    $url = '';
 				}
-				print HTMLSNIP "$messages Messages ($messagelines real messages, $spamlines SPAM, $robotlines messages by robots)\n";
+				if ( $storefiles ) {
+				    print HTMLSNIP "$messages Messages ($messagelines real messages, $spamlines SPAM, $robotlines messages by robots)\n";
+				}
 				if ( $messages != $messagelines + $spamlines + $robotlines ) {
 				    print "Warning: $project $year/$month counted $messagelines Messages, $spamlines SPAM and $robotlines robots but page says $messages\n";
 				}
@@ -163,10 +175,10 @@
 		    }
 		}
 	    }
-	    close HTMLSNIP ;
+	    if ( $storefiles ) { close HTMLSNIP ; }
 	}
     }
-    chdir($cdw);
+    if ( $storefiles ) { chdir($cdw); }
 }
 
 $datain->finish;

Deleted: trunk/community/talks/200808_debconf8/liststat
===================================================================
--- trunk/community/talks/200808_debconf8/liststat	2008-07-28 13:12:12 UTC (rev 2353)
+++ trunk/community/talks/200808_debconf8/liststat	2008-07-28 13:21:45 UTC (rev 2354)
@@ -1,8 +0,0 @@
-#!/bin/sh -x
-DATFILE="liststat.dat"
-psql -t cddlistarchives -c 'SELECT BuildQueryCDDs() ;' | \
-   psql cddlistarchives \
-   >"$DATFILE"
-
-sed -i -e '/^[-+]\+$/d' -e '/^([0-9]\+ [A-Za-z]\+)$/d' -e 's/[[:space:]]*|[[:space:]]*/\t/g' "$DATFILE"
-




More information about the debian-med-commit mailing list