[med-svn] r2326 - trunk/community/talks/200808_debconf8

tille at alioth.debian.org tille at alioth.debian.org
Sat Jul 26 15:30:39 UTC 2008


Author: tille
Date: 2008-07-26 15:30:38 +0000 (Sat, 26 Jul 2008)
New Revision: 2326

Modified:
   trunk/community/talks/200808_debconf8/get-archive-pages
Log:
Clean up index pages


Modified: trunk/community/talks/200808_debconf8/get-archive-pages
===================================================================
--- trunk/community/talks/200808_debconf8/get-archive-pages	2008-07-26 14:26:11 UTC (rev 2325)
+++ trunk/community/talks/200808_debconf8/get-archive-pages	2008-07-26 15:30:38 UTC (rev 2326)
@@ -37,11 +37,32 @@
 	    my $uri = URI->new($url);
 	    my $page = $ua->get($url, Host => $uri->host );
 	    unless ( $page->is_success ) { next } ; # some mailing lists startet later ...
-	    (my @data) = $page->content =~ m#.*<!--TNAVEND-->\n(.+)\n<hr>\n.*#gs;
-	    #print "$year-$month\n at data\n";
+	    (my @data) = $page->content =~ m#.*<!--TNAVEND-->\n(.+)<hr>.*<!--BNAVSTART-->.*#gs;
+	    #print "$year-$month\n$data\n";
 	    my $datafile = "${year}-${month}" ;
 	    unless ( open(HTMLSNIP, ">$datafile") ) { die("Unable to open $datafile"); }
-	    print HTMLSNIP "@data";
+	    my ($content, $subject, $author) ;
+	    foreach $content (@data) {
+		my @lines = split(/(\n)/, $content);
+		# print "------> @lines\n" ;
+		my $line;
+		foreach $line (@lines) {
+		    if ( $line =~ /^\s*<\/?ul>\s*$/ || 
+                         $line =~ /^\s*<\/?li>\s*$/ ||
+                         $line =~ /^\s*<li>[^<]+<\/li>\s*$/ ||
+                         $line =~ /^\s*$/) { next ; }
+		    if ( ($subject, $author) = $line =~ m#<li><strong>.*html">(.+)</a></strong>\s*<em>(.+)</em>#gs ) {
+			$_ = $subject ;
+			$_ =~ s/^Re:\s*//i ;       # Remove Re:
+			$_ =~ s/^\[[^\]]+\]\s*// ; # Remove other list markers
+			$_ =~ s/\s*\(fwd\)\s*//i ; # Remove (fwd)
+			$subject = $_ ;
+			print HTMLSNIP "$subject ; $author\n";
+		    } else {
+			print HTMLSNIP "$line\n";
+		    }
+		}
+	    }
 	    close HTMLSNIP ;
 	}
     }




More information about the debian-med-commit mailing list