[debian-edu-commits] debian-edu/ 01/01: Created get_pages which fetches all pages from AllInOne.

Alexander Alemayhu alexander at bitraf.no
Sun Mar 22 15:23:29 UTC 2015


This is an automated email from the git hooks/post-receive script.

ccscanf-guest pushed a commit to branch master
in repository debian-edu-itil-doc-nb.

commit bafb26439830988a9698d7571586e4385602665f
Author: Marius Halden <marius.h at lden.org>
Date:   Sun Mar 22 16:18:08 2015 +0100

    Created get_pages which fetches all pages from AllInOne.
---
 documentation/scripts/get_pages | 125 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)

diff --git a/documentation/scripts/get_pages b/documentation/scripts/get_pages
new file mode 100755
index 0000000..3cd84b2
--- /dev/null
+++ b/documentation/scripts/get_pages
@@ -0,0 +1,125 @@
+#!/bin/sh
+#
+# download the AllInOne page of a manual on wiki.debian.org as separate pages.
+#
+# very loosly based on the moinmoin2pdf script from Petter Reinholdtsen
+#
+# Author/Copyright:	Holger Levsen
+# Licence:		GPL2+
+# first edited:		2006-07-06
+# last edited:		2009-05-30
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+set -x
+set -u
+
+# Make sure all section IDs are unique.  If file--subsection is not
+# unique, use file--section--subsection--subsubsection instead.
+unique_section_ids() {
+    # Emulate anchor_name_from_text() function from moin
+    perl -MURI::Escape -pe "my \$file = escape(${ASCIINAME}); my @h = \$file;" \
+	-e 'my %ids; sub escape {
+        my $s = shift;
+	$s=~s/ /_/g;
+	$s=~s/~/+/g;
+	$s=~s/"/.22/g;
+	$s=~s/\+/+-/g;
+	$s=~s/\//+AC8/g;
+	$s=~s/²/+ALI-/g;
+	$s=uri_escape($s);
+	$s=~s/%/./g;
+	$s=~s/\.3A/:/g;
+	return $s;
+    }; sub anchor{
+        my ($pre, $title) = @_;
+        if ("</section>" eq $pre) {
+            pop @h;
+	    return "$pre";
+        } else {
+            my ($s) = $title =~ m%<title>(.+)</title>%;
+            $s = escape($s);
+            push(@h, $s);
+            my $id = "$file--$s";
+            $id = join("--", @h) if (exists $ids{$id});
+            my $retval = "<section id=\"$id\">$title";
+            $ids{$id} = 1;
+#           print STDERR "S: $retval\n";
+            return $retval;
+        }
+    }
+    s%(</?section>)(<title>[^<]*?</title>)?%&anchor($1, $2)%eg;'
+}
+
+# the last but one sed "preserves" the 2nd matched regex
+# the last sed does the same as dos2unix
+# head at the end chops of the last two lines with the Category:Permalink entry
+PERL_LWP_SSL_VERIFY_HOSTNAME=0 GET -H User-Agent: "${url}AllInOne?action=raw"|sed "s%<<Include(%%g" | sed "s%)>>%%g" | sed 's/<<TableOfContents(1//' | sed "s%$path1%%g" |sed 's/.$//'|head -n -2> id
+
+for i in `cat id` ; do
+	NAME=`echo "${i}" |sed "s/\(.*\)\/\(.*\)/\2/" `
+	# The ø -> oe conversion is a workaround for bug #657511.
+ 	ASCIINAME=$(echo $NAME  | tr "ø" "oe" | iconv -t ASCII//TRANSLIT)
+	TARGET=${NAME}.xml
+	echo "$TARGET		${url}${i}?action=show&mimetype=text/docbook"
+	# download the docbook version of the manual from the wiki and pipe it through sed to
+	#   - insert the build date
+	#   - convert <code> tag to <computeroutput> as this is understood by docbook (tools)
+	#   - provide correct path to the images
+	#   - remove the revision history
+	#   - remove the Category:Permalink line
+	#   - add some linebreaks
+	#   - delete the first lines containing the XML declaration
+	PERL_LWP_SSL_VERIFY_HOSTNAME=0 GET "${url}${i}?action=show&mimetype=text/docbook" | 
+	# replace tags:
+	sed "s%code>%computeroutput>%g" |
+	sed "s%/htdocs/rightsidebar/img/%./images/%g" |
+	# remove initial and final tags:
+	#perl -pe "s%</?article>%%g" |
+	# remove tags and enclosed content:
+	#sed "s#<articleinfo>\(.*\)</articleinfo>##g" |
+	# Comment useless remarks from XML: they just show an ugly drawing in XML
+	perl -pe "s%<remark>.*?</remark>%<!-- $& -->%g" |
+	# Broken URL: workaround to #656945
+	sed "s%<ulink url=\"https://wiki.debian.org/${path1}${i}/%<ulink url=\"https://wiki.debian.org/%g" |
+	# Make wiki self links actually local
+	sed "s%<link linkend=\"%<link linkend=\"${ASCIINAME}--%g" |
+	perl -pe "s%<ulink url=\"https://wiki.debian.org/${path1}/(HowTo/)?(\w+)#\">(.*?)</ulink>%<link linkend='\2'>\3</link>%g" |
+	perl -pe "s%<ulink url=\"https://wiki.debian.org/${path1}/(HowTo/)?(\w+)#(.*?)\">(.*?)</ulink>%<link linkend='\2--\3'>\4</link>%g" |
+	perl -000 -pe "s%<para><ulink url=\"https://wiki.debian.org/CategoryPermalink#\">CategoryPermalink</ulink>\s*</para>%%" |
+	unique_section_ids |
+	# introduce line breaks:
+	sed "s%<title>%\n<title>%g" |
+	sed "s%<\/title>%\n<\/title>%g" |
+	sed "s%<section%\n\n<section%g" |
+	sed "s%<\/section>%\n<\/section>%g" |
+	sed "s%<para>%\n<para>%g" |
+	sed "s%<\/para>%\n<\/para>%g" |
+	sed "s%FIXME%\nFIXME%g" |
+	sed "s%<itemizedlist>%\n<itemizedlist>%" |
+	sed "s%<listitem>%\n<listitem>%" |
+	# cut off first lines:
+	sed '1,4d' > $TARGET
+done
+
+sed -i "s/\(.*\)\/\(.*\)/\2/" id
+
+# turn links into internal references if appropriate
+# this needs to run after ./get_images
+#
+#  -0\777  read multiple lines
+for i in `cat id` ; do
+	perl -0\777 -pi -e "s/<ulink url=\"$path2(.*)\/(.*)\">(.*)\n<\/ulink>/<link linkend=\"\2\">\3<\/link>/g" ${i}.xml
+
+	# make it a docbook article again
+	sed -i "0,/</ s#<#<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE article PUBLIC \"-//OASIS//DTD DocBook XML V4.4//EN\" \"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd\">\n<#" ${i}.xml
+done
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-edu/upstream/debian-edu-itil-doc-nb.git



More information about the debian-edu-commits mailing list