[med-svn] r2651 - trunk/community/talks/200808_debconf8
tille at alioth.debian.org
tille at alioth.debian.org
Tue Nov 11 12:59:48 UTC 2008
Author: tille
Date: 2008-11-11 12:59:48 +0000 (Tue, 11 Nov 2008)
New Revision: 2651
Modified:
trunk/community/talks/200808_debconf8/0fix_ralf_edu
trunk/community/talks/200808_debconf8/archives.sql
trunk/community/talks/200808_debconf8/author_stats
trunk/community/talks/200808_debconf8/get-archive-pages
trunk/community/talks/200808_debconf8/list_stats
Log:
Update for list_stats scripts
Modified: trunk/community/talks/200808_debconf8/0fix_ralf_edu
===================================================================
--- trunk/community/talks/200808_debconf8/0fix_ralf_edu 2008-11-11 12:55:27 UTC (rev 2650)
+++ trunk/community/talks/200808_debconf8/0fix_ralf_edu 2008-11-11 12:59:48 UTC (rev 2651)
@@ -3,7 +3,7 @@
# so many variants of spelling in the index - but tis disturbs
# the stats and so it is fixed here
-psql cddlistarchives << EOT
+psql listarchives << EOT
begin;
update listarchive set author = 'Ralf Gesellensetter' where project = 'edu' and author like 'Ralf%setter' ;
commit;
Modified: trunk/community/talks/200808_debconf8/archives.sql
===================================================================
--- trunk/community/talks/200808_debconf8/archives.sql 2008-11-11 12:55:27 UTC (rev 2650)
+++ trunk/community/talks/200808_debconf8/archives.sql 2008-11-11 12:59:48 UTC (rev 2651)
@@ -1,8 +1,8 @@
#!/bin/sh
-createdb cddlistarchives
+createdb listarchives
-psql cddlistarchives <<EOT
+psql listarchives <<EOT
BEGIN;
Modified: trunk/community/talks/200808_debconf8/author_stats
===================================================================
--- trunk/community/talks/200808_debconf8/author_stats 2008-11-11 12:55:27 UTC (rev 2650)
+++ trunk/community/talks/200808_debconf8/author_stats 2008-11-11 12:59:48 UTC (rev 2651)
@@ -11,8 +11,8 @@
NAME=authorstat_"$1"
DATFILE="${NAME}_year.dat"
-psql -t cddlistarchives -c "SELECT BuildQueryAuthorsYear('$1', $NUM) ;" | \
- psql cddlistarchives \
+psql -t listarchives -c "SELECT BuildQueryAuthorsYear('$1', $NUM) ;" | \
+ psql listarchives \
>"$DATFILE"
sed -i -e '/^[-+]\+$/d' -e '/^([0-9]\+ [A-Za-z]\+)$/d' \
@@ -42,7 +42,8 @@
# plotcolors=rainbow($NUM)
plotcolors=mycolors[1:$NUM]
-barplot(dmstats.mat,beside=TRUE,col=plotcolors)
+barplot(dmstats.mat,beside=TRUE,col=plotcolors,
+ main = "List activities for $1 list")
legend(x="topleft", colnames(dmstats[,2:$ENDCOL]),fill=plotcolors, inset=0.05,
text.col=textcolor
)
Modified: trunk/community/talks/200808_debconf8/get-archive-pages
===================================================================
--- trunk/community/talks/200808_debconf8/get-archive-pages 2008-11-11 12:55:27 UTC (rev 2650)
+++ trunk/community/talks/200808_debconf8/get-archive-pages 2008-11-11 12:59:48 UTC (rev 2651)
@@ -9,10 +9,10 @@
my $BASEURL = "http://lists.debian.org/debian" ;
my @PROJECTS = ('med', 'edu', 'jr', 'accessibility', 'desktop', 'enterprise', 'lex',
'nonprofit', 'science', 'custom',
- 'boot', 'release', 'i18n', 'devel', 'project', 'kernel', 'qa') ; # ... just for the sake of interest
+ 'boot', 'release', 'i18n', 'devel', 'project', 'kernel', 'qa', # ... just for the sake of interest
+ 'ctte', 'curiosa', 'devel-games', 'kde', 'mentors', 'policy',
+ 'security', 'user-german', 'vote', 'www') ;
- at PROJECTS = ('boot', 'release', 'qa') ; # ... basically test alioth ...
-
# Well, there is also interest in alioth lists ...
my $BASEALIOTH = 'http://lists.alioth.debian.org/pipermail/';
my @ALIOTHPRJ = ('debichem-devel', 'pkg-grass-general', 'debian-live-devel', 'pkg-samba-maint' ) ;
@@ -61,7 +61,7 @@
$day++;
my $today = "$YEAREND-$MONTHEND-$day";
-my $dbname = 'cddlistarchives';
+my $dbname = 'listarchives';
my $dbh = DBI->connect("dbi:Pg:dbname=$dbname");
my $ua = LWP::UserAgent->new( agent => 'varbot');
@@ -133,7 +133,7 @@
my $spamlines = 0;
my $robotlines = 0;
while ( $url =~ /.+/ ) { # if only one page $url is set to ''
- print "DEBUG: $year-$month: $url\n";
+ # print "DEBUG: $year-$month: $url\n";
my $uri = URI->new($url);
my $indexpage = $ua->get($url, Host => $uri->host );
unless ( $indexpage->is_success ) { # some mailing lists startet later ...
@@ -151,7 +151,10 @@
if ( $type == 0 ) {
@data = $indexpage->content =~ m#.*<!--TNAVEND-->\n(.+)<hr>.*<!--BNAVSTART-->.*#gs;
} else {
- my @tmpdata = $indexpage->content =~ m#.*<b>Ending:</b> <i>[ \w]+ [ \d:]+ UTC [\d]+</i><br>\n(.+)<a name="end"><b>Last message date:</b></a>.*#gs;
+
+# <b>Ending:</b> <i>Tue Feb 28 08:06:40 CEST 2006</i><br>
+# <a name="end"><b>Last message date:</b></a>
+ my @tmpdata = $indexpage->content =~ m#.*<b>Ending:</b>\s*<i>[ \w]+ [ \d:]+ [A-Z]+ [\d]+</i><br>\n(.+)<a name="end"><b>Last message date:</b></a>.*#gs;
my $tmpdata = '';
my $tmpline = '';
foreach $content (@tmpdata) {
@@ -163,10 +166,11 @@
$_ =~ /^<\/I>$/ || $_ =~ /^\s*<\/?p>\s*$/ ||
$_ =~ /^\s*<\/?UL>\s*$/i ||
$_ =~ /^<\/A><A NAME="\d+"> <\/A>$/ ) { next ; }
- if ( ($msgurl, $subject) = $_ =~ /^\s*<LI><A HREF="(\d+.html)">\[[-\w]+\]\s*(.+)$/ ) {
+ if ( ($msgurl, $subject) = $_ =~ /^\s*<LI><A HREF="(\d+.html)">(.+)$/ ) {
$_ = $subject ;
- $_ =~ s/^\s*Re:\s*//i ; # Remove Re:
- $_ =~ s/^\s*//i ; # Remove blanks
+ s/^\s*\[[-\w]+\]\s*// ; # Remove list name in [] if exists
+ s/^\s*Re:\s*//i ; # Remove Re:
+ s/^\s*//i ; # Remove blanks
$tmpline = $msgurl . $SEPARATOR . $subject ;
} else {
if ( $_ =~ /<I>/ || $_ =~ /<b>Messages:<\/b>/ ) {
@@ -180,6 +184,7 @@
}
@data = ($tmpdata);
}
+ $messages = 0;
foreach $content (@data) {
@lines = split(/(\n)/, $content);
my $linestart = '';
@@ -202,7 +207,6 @@
$line =~ /^\s*<li><em>Message not available<\/em>/ ||
$line =~ /<em>\(continued\)<\/em>\s*$/ ||
$line =~ /^\s*$/) { next ; }
- # print "DEBUG: $line\n";
if ( $storefiles ) {
print HTMLSNIP "$line\n";
}
@@ -281,9 +285,11 @@
}
}
} else {
- if ( ($messages) = $line =~ m#^\s*<b>Messages:</b>\s*(\d+)<p>#gs ) {
- if ( $storefiles ) {
- print HTMLSNIP "$messages Messages ($messagelines real messages, $spamlines SPAM, $robotlines messages by robots)\n";
+ if ( $messages == 0 ) {
+ if ( ($messages) = $line =~ m#^\s*<b>Messages:</b>\s*(\d+)<p>#gs ) {
+ if ( $storefiles ) {
+ print HTMLSNIP "$messages Messages ($messagelines real messages, $spamlines SPAM, $robotlines messages by robots)\n";
+ }
}
}
}
@@ -307,6 +313,8 @@
$query = $query . "UPDATE listarchive SET author = 'Francesco P. Lovergine' WHERE project = 'pkg-grass-general' AND author LIKE 'Francesco%Lovergine';" ;
$query = $query . "UPDATE listarchive SET author = 'Christian Perrier' WHERE project = 'pkg-samba-maint' AND author = 'bubulle';" ;
$query = $query . "UPDATE listarchive SET author = 'Steve Langasek' WHERE project = 'pkg-samba-maint' AND author = 'vorlon';" ;
+$query = $query . "UPDATE listarchive SET author = 'Adrian von Bidder' WHERE author like 'Adrian % von Bidder';" ;
+$query = $query . "UPDATE listarchive SET author = 'Thomas Bushnell BSG' WHERE author like 'Thomas Bushnell%BSG';" ;
$daten = $dbh->prepare_cached($query);
$daten->execute() ;
@@ -326,11 +334,14 @@
s/è/è/g; # this is alioths way to express the same character
s/ö/ö/g;
s/ö/ö/g; # this is alioths way to express the same character
+ s/ü/ü/g;
s/á/á/g;
s/ñ/ñ/g;
s/ó/ó/g;
s/®/®/g;
s/é/é/g;
+ s/ş/ş/g;
+ s/ø/ø/g;
return ($_);
}
Modified: trunk/community/talks/200808_debconf8/list_stats
===================================================================
--- trunk/community/talks/200808_debconf8/list_stats 2008-11-11 12:55:27 UTC (rev 2650)
+++ trunk/community/talks/200808_debconf8/list_stats 2008-11-11 12:59:48 UTC (rev 2651)
@@ -3,8 +3,8 @@
# CDDs and related projects.
DATFILE="liststat_year.dat"
-psql -t cddlistarchives -c 'SELECT BuildQueryCDDsYear() ;' | \
- psql cddlistarchives \
+psql -t listarchives -c 'SELECT BuildQueryCDDsYear() ;' | \
+ psql listarchives \
>"$DATFILE"
sed -i -e '/^[-+]\+$/d' -e '/^([0-9]\+ [A-Za-z]\+)$/d' -e 's/[[:space:]]*|[[:space:]]*/\t/g' "$DATFILE"
More information about the debian-med-commit
mailing list