[med-svn] r2651 - trunk/community/talks/200808_debconf8

tille at alioth.debian.org tille at alioth.debian.org
Tue Nov 11 12:59:48 UTC 2008


Author: tille
Date: 2008-11-11 12:59:48 +0000 (Tue, 11 Nov 2008)
New Revision: 2651

Modified:
   trunk/community/talks/200808_debconf8/0fix_ralf_edu
   trunk/community/talks/200808_debconf8/archives.sql
   trunk/community/talks/200808_debconf8/author_stats
   trunk/community/talks/200808_debconf8/get-archive-pages
   trunk/community/talks/200808_debconf8/list_stats
Log:
Update for list_stats scripts


Modified: trunk/community/talks/200808_debconf8/0fix_ralf_edu
===================================================================
--- trunk/community/talks/200808_debconf8/0fix_ralf_edu	2008-11-11 12:55:27 UTC (rev 2650)
+++ trunk/community/talks/200808_debconf8/0fix_ralf_edu	2008-11-11 12:59:48 UTC (rev 2651)
@@ -3,7 +3,7 @@
 # so many variants of spelling in the index - but tis disturbs
 # the stats and so it is fixed here
 
-psql cddlistarchives << EOT
+psql listarchives << EOT
 begin;
 update listarchive set author = 'Ralf Gesellensetter' where project = 'edu' and author like 'Ralf%setter' ;
 commit;

Modified: trunk/community/talks/200808_debconf8/archives.sql
===================================================================
--- trunk/community/talks/200808_debconf8/archives.sql	2008-11-11 12:55:27 UTC (rev 2650)
+++ trunk/community/talks/200808_debconf8/archives.sql	2008-11-11 12:59:48 UTC (rev 2651)
@@ -1,8 +1,8 @@
 #!/bin/sh
 
-createdb cddlistarchives
+createdb listarchives
 
-psql cddlistarchives <<EOT
+psql listarchives <<EOT
 
 BEGIN;
 

Modified: trunk/community/talks/200808_debconf8/author_stats
===================================================================
--- trunk/community/talks/200808_debconf8/author_stats	2008-11-11 12:55:27 UTC (rev 2650)
+++ trunk/community/talks/200808_debconf8/author_stats	2008-11-11 12:59:48 UTC (rev 2651)
@@ -11,8 +11,8 @@
 
 NAME=authorstat_"$1"
 DATFILE="${NAME}_year.dat"
-psql -t cddlistarchives -c "SELECT BuildQueryAuthorsYear('$1', $NUM) ;" | \
-   psql cddlistarchives \
+psql -t listarchives -c "SELECT BuildQueryAuthorsYear('$1', $NUM) ;" | \
+   psql listarchives \
    >"$DATFILE"
       
 sed -i -e '/^[-+]\+$/d' -e '/^([0-9]\+ [A-Za-z]\+)$/d' \
@@ -42,7 +42,8 @@
 # plotcolors=rainbow($NUM)
 plotcolors=mycolors[1:$NUM]
 
-barplot(dmstats.mat,beside=TRUE,col=plotcolors)
+barplot(dmstats.mat,beside=TRUE,col=plotcolors,
+        main = "List activities for $1 list")
 legend(x="topleft", colnames(dmstats[,2:$ENDCOL]),fill=plotcolors, inset=0.05,
        text.col=textcolor
       )

Modified: trunk/community/talks/200808_debconf8/get-archive-pages
===================================================================
--- trunk/community/talks/200808_debconf8/get-archive-pages	2008-11-11 12:55:27 UTC (rev 2650)
+++ trunk/community/talks/200808_debconf8/get-archive-pages	2008-11-11 12:59:48 UTC (rev 2651)
@@ -9,10 +9,10 @@
 my $BASEURL  = "http://lists.debian.org/debian" ;
 my @PROJECTS = ('med', 'edu', 'jr', 'accessibility', 'desktop', 'enterprise', 'lex',
                 'nonprofit', 'science', 'custom',
-                'boot', 'release', 'i18n', 'devel', 'project', 'kernel', 'qa') ; # ... just for the sake of interest
+                'boot', 'release', 'i18n', 'devel', 'project', 'kernel', 'qa', # ... just for the sake of interest
+                'ctte', 'curiosa', 'devel-games', 'kde', 'mentors', 'policy',
+                'security', 'user-german', 'vote', 'www') ; 
 
- at PROJECTS = ('boot', 'release', 'qa') ; # ... basically test alioth ...
-
 # Well, there is also interest in alioth lists ...
 my $BASEALIOTH = 'http://lists.alioth.debian.org/pipermail/';
 my @ALIOTHPRJ  = ('debichem-devel', 'pkg-grass-general', 'debian-live-devel', 'pkg-samba-maint' ) ;
@@ -61,7 +61,7 @@
 $day++;
 my $today = "$YEAREND-$MONTHEND-$day";
 
-my $dbname = 'cddlistarchives';
+my $dbname = 'listarchives';
 my $dbh    = DBI->connect("dbi:Pg:dbname=$dbname");
 
 my $ua = LWP::UserAgent->new( agent => 'varbot');
@@ -133,7 +133,7 @@
 	    my $spamlines    = 0;
 	    my $robotlines   = 0;
 	    while ( $url =~ /.+/ ) { # if only one page $url is set to ''
-		print "DEBUG: $year-$month: $url\n";
+		# print "DEBUG: $year-$month: $url\n";
 		my $uri = URI->new($url);
 		my $indexpage = $ua->get($url, Host => $uri->host );
 		unless ( $indexpage->is_success ) { # some mailing lists startet later ...
@@ -151,7 +151,10 @@
 		if ( $type == 0 ) {
 		    @data = $indexpage->content =~ m#.*<!--TNAVEND-->\n(.+)<hr>.*<!--BNAVSTART-->.*#gs;
 		} else {
-		    my @tmpdata = $indexpage->content =~ m#.*<b>Ending:</b> <i>[ \w]+ [ \d:]+ UTC [\d]+</i><br>\n(.+)<a name="end"><b>Last message date:</b></a>.*#gs;
+
+#          <b>Ending:</b> <i>Tue Feb 28 08:06:40 CEST 2006</i><br>
+#      <a name="end"><b>Last message date:</b></a> 
+		    my @tmpdata = $indexpage->content =~ m#.*<b>Ending:</b>\s*<i>[ \w]+ [ \d:]+ [A-Z]+ [\d]+</i><br>\n(.+)<a name="end"><b>Last message date:</b></a>.*#gs;
 		    my $tmpdata = '';
                     my $tmpline = '';
 		    foreach $content (@tmpdata) {
@@ -163,10 +166,11 @@
 				 $_ =~ /^<\/I>$/ || $_ =~ /^\s*<\/?p>\s*$/ ||
                                  $_ =~ /^\s*<\/?UL>\s*$/i ||
 				 $_ =~ /^<\/A><A NAME="\d+">&nbsp;<\/A>$/ ) { next ; }
-			    if ( ($msgurl, $subject) = $_ =~ /^\s*<LI><A HREF="(\d+.html)">\[[-\w]+\]\s*(.+)$/ ) {
+			    if ( ($msgurl, $subject) = $_ =~ /^\s*<LI><A HREF="(\d+.html)">(.+)$/ ) {
 				$_ = $subject ;
-				$_ =~ s/^\s*Re:\s*//i ;       # Remove Re:
-				$_ =~ s/^\s*//i ;             # Remove blanks
+				s/^\s*\[[-\w]+\]\s*// ; # Remove list name in [] if exists
+				s/^\s*Re:\s*//i ;       # Remove Re:
+				s/^\s*//i ;             # Remove blanks
 				$tmpline = $msgurl . $SEPARATOR . $subject ;
 			    } else {
 				if ( $_ =~ /<I>/ || $_ =~ /<b>Messages:<\/b>/ ) {
@@ -180,6 +184,7 @@
 		    }
 		    @data = ($tmpdata);
 		}
+		$messages = 0;
 		foreach $content (@data) {
 		    @lines = split(/(\n)/, $content);
 		    my $linestart = '';
@@ -202,7 +207,6 @@
 			     $line =~ /^\s*<li><em>Message not available<\/em>/ ||
 			     $line =~ /<em>\(continued\)<\/em>\s*$/ ||
 			     $line =~ /^\s*$/) { next ; }
-			# print "DEBUG: $line\n";
 			if ( $storefiles ) {
 			    print HTMLSNIP "$line\n";
 			}
@@ -281,9 +285,11 @@
 				    }
 				}
 			    } else {
-				if ( ($messages) = $line =~ m#^\s*<b>Messages:</b>\s*(\d+)<p>#gs ) {
-				    if ( $storefiles ) {
-					print HTMLSNIP "$messages Messages ($messagelines real messages, $spamlines SPAM, $robotlines messages by robots)\n";
+				if ( $messages == 0 ) {
+				    if ( ($messages) = $line =~ m#^\s*<b>Messages:</b>\s*(\d+)<p>#gs ) {
+					if ( $storefiles ) {
+					    print HTMLSNIP "$messages Messages ($messagelines real messages, $spamlines SPAM, $robotlines messages by robots)\n";
+					}
 				    }
 				}
 			    }
@@ -307,6 +313,8 @@
 $query = $query . "UPDATE listarchive SET author = 'Francesco P. Lovergine' WHERE project = 'pkg-grass-general' AND author LIKE 'Francesco%Lovergine';" ;
 $query = $query . "UPDATE listarchive SET author = 'Christian Perrier' WHERE project = 'pkg-samba-maint' AND author = 'bubulle';" ;
 $query = $query . "UPDATE listarchive SET author = 'Steve Langasek' WHERE project = 'pkg-samba-maint' AND author = 'vorlon';" ;
+$query = $query . "UPDATE listarchive SET author = 'Adrian von Bidder' WHERE author like 'Adrian % von Bidder';" ;
+$query = $query . "UPDATE listarchive SET author = 'Thomas Bushnell BSG' WHERE author like 'Thomas Bushnell%BSG';" ;
 
 $daten = $dbh->prepare_cached($query);
 $daten->execute() ;
@@ -326,11 +334,14 @@
   s/&#232;/è/g; # this is alioths way to express the same character
   s/&#xF6;/ö/g;
   s/&#246;/ö/g; # this is alioths way to express the same character
+  s/&#xFC;/ü/g;
   s/&#xE1;/á/g;
   s/&#xF1;/ñ/g;
   s/&#xF3;/ó/g;
   s/&#xAE;/®/g;
   s/&#xE9;/é/g;
+  s/&#x15F;/ş/g;
+  s/&#xF8;/ø/g;
 
   return ($_);
 }

Modified: trunk/community/talks/200808_debconf8/list_stats
===================================================================
--- trunk/community/talks/200808_debconf8/list_stats	2008-11-11 12:55:27 UTC (rev 2650)
+++ trunk/community/talks/200808_debconf8/list_stats	2008-11-11 12:59:48 UTC (rev 2651)
@@ -3,8 +3,8 @@
 # CDDs and related projects.
 
 DATFILE="liststat_year.dat"
-psql -t cddlistarchives -c 'SELECT BuildQueryCDDsYear() ;' | \
-   psql cddlistarchives \
+psql -t listarchives -c 'SELECT BuildQueryCDDsYear() ;' | \
+   psql listarchives \
    >"$DATFILE"
       
 sed -i -e '/^[-+]\+$/d' -e '/^([0-9]\+ [A-Za-z]\+)$/d' -e 's/[[:space:]]*|[[:space:]]*/\t/g' "$DATFILE"




More information about the debian-med-commit mailing list