[med-svn] r2404 - trunk/community/talks/200808_debconf8
tille at alioth.debian.org
tille at alioth.debian.org
Mon Aug 11 00:06:26 UTC 2008
Author: tille
Date: 2008-08-11 00:06:25 +0000 (Mon, 11 Aug 2008)
New Revision: 2404
Modified:
trunk/community/talks/200808_debconf8/get-archive-pages
Log:
Further work on Alioth lists, not working yet
Modified: trunk/community/talks/200808_debconf8/get-archive-pages
===================================================================
--- trunk/community/talks/200808_debconf8/get-archive-pages 2008-08-10 23:36:05 UTC (rev 2403)
+++ trunk/community/talks/200808_debconf8/get-archive-pages 2008-08-11 00:06:25 UTC (rev 2404)
@@ -37,7 +37,8 @@
my @MONTHES = ('01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12');
my @ROBOTS = ('Debian Installer', 'bugzilla-skolelinux', 'Archive Administrator', 'hostmaster',
'Debian-med-request', 'Debian testing watch', 'Debian Bug Tracking System',
- 'Skolelinux archive Installer', 'Debian Wiki', 'gentoo-\w+\+help');
+ 'Skolelinux archive Installer', 'Debian Wiki', 'gentoo-\w+\+help',
+ 'Debichem-commits');
## TODO: just consider mails containing these strings as SPAM
## This has to be implemented in the code below
@@ -143,11 +144,13 @@
foreach $content (@tmpdata) {
@lines = split(/(\n)/, $content);
foreach $line (@lines) {
- if ( $line =~ /^\s*$/ || $line =~ /^<!--\d+ / ||
- $line =~ /^<\/I>$/ || $line =~ /^<UL>$/ ||
+ if ( $line =~ /^\s*$/ || $line =~ /^<!--\d+ / ||
+ $line =~ /^<\/I>$/ || $line =~ /^\s*<\/?UL>\s*$/i ||
$line =~ /^<\/A><A NAME="\d+"> <\/A>$/ ) { next ; }
- if ( $line =~ /^<LI><A HREF="\d+.html">\[[-\w]+\]/ ) {
- @data = (@data, $line) ;
+ if ( ($subject) = $line =~ /^\s*<LI><A HREF="\d+.html">\[[-\w]+\]\s*(.+)$/ ) {
+ $_ = $subject ;
+ $_ =~ s/^Re:\s*//i ; # Remove Re:
+ @data = (@data, $subject) ;
} else {
@data = (@data, "$line\n" ) ;
}
More information about the debian-med-commit
mailing list