[med-svn] r2326 - trunk/community/talks/200808_debconf8
tille at alioth.debian.org
tille at alioth.debian.org
Sat Jul 26 15:30:39 UTC 2008
Author: tille
Date: 2008-07-26 15:30:38 +0000 (Sat, 26 Jul 2008)
New Revision: 2326
Modified:
trunk/community/talks/200808_debconf8/get-archive-pages
Log:
Clean up index pages
Modified: trunk/community/talks/200808_debconf8/get-archive-pages
===================================================================
--- trunk/community/talks/200808_debconf8/get-archive-pages 2008-07-26 14:26:11 UTC (rev 2325)
+++ trunk/community/talks/200808_debconf8/get-archive-pages 2008-07-26 15:30:38 UTC (rev 2326)
@@ -37,11 +37,32 @@
my $uri = URI->new($url);
my $page = $ua->get($url, Host => $uri->host );
unless ( $page->is_success ) { next } ; # some mailing lists startet later ...
- (my @data) = $page->content =~ m#.*<!--TNAVEND-->\n(.+)\n<hr>\n.*#gs;
- #print "$year-$month\n at data\n";
+ (my @data) = $page->content =~ m#.*<!--TNAVEND-->\n(.+)<hr>.*<!--BNAVSTART-->.*#gs;
+ #print "$year-$month\n$data\n";
my $datafile = "${year}-${month}" ;
unless ( open(HTMLSNIP, ">$datafile") ) { die("Unable to open $datafile"); }
- print HTMLSNIP "@data";
+ my ($content, $subject, $author) ;
+ foreach $content (@data) {
+ my @lines = split(/(\n)/, $content);
+ # print "------> @lines\n" ;
+ my $line;
+ foreach $line (@lines) {
+ if ( $line =~ /^\s*<\/?ul>\s*$/ ||
+ $line =~ /^\s*<\/?li>\s*$/ ||
+ $line =~ /^\s*<li>[^<]+<\/li>\s*$/ ||
+ $line =~ /^\s*$/) { next ; }
+ if ( ($subject, $author) = $line =~ m#<li><strong>.*html">(.+)</a></strong>\s*<em>(.+)</em>#gs ) {
+ $_ = $subject ;
+ $_ =~ s/^Re:\s*//i ; # Remove Re:
+ $_ =~ s/^\[[^\]]+\]\s*// ; # Remove other list markers
+ $_ =~ s/\s*\(fwd\)\s*//i ; # Remove (fwd)
+ $subject = $_ ;
+ print HTMLSNIP "$subject ; $author\n";
+ } else {
+ print HTMLSNIP "$line\n";
+ }
+ }
+ }
close HTMLSNIP ;
}
}
More information about the debian-med-commit
mailing list