[Python-modules-commits] [python-odf] 87/118: Many improvements to EPub output
Wolfgang Borgert
debacle at moszumanska.debian.org
Fri Oct 3 21:27:27 UTC 2014
This is an automated email from the git hooks/post-receive script.
debacle pushed a commit to reference refs/remotes/upstream/master
in repository python-odf.
commit 09b1834c73f4c41fea9a7adde0f002df14f0e641
Author: Søren Roug <soren.roug at eea.europa.eu>
Date: Sun May 16 16:42:33 2010 +0000
Many improvements to EPub output
---
contrib/odf2epub/odf2epub | 84 +++++++++++++++++++----------
odf/load.py | 2 +-
odf/odf2xhtml.py | 131 ++++++++++++++++++++++++----------------------
3 files changed, 127 insertions(+), 90 deletions(-)
diff --git a/contrib/odf2epub/odf2epub b/contrib/odf2epub/odf2epub
index 8a77b57..53f51fb 100755
--- a/contrib/odf2epub/odf2epub
+++ b/contrib/odf2epub/odf2epub
@@ -19,6 +19,7 @@
#
from odf.odf2xhtml import ODF2XHTML
from odf.namespaces import TEXTNS, XLINKNS
+from odf.opendocument import load
import sys, getopt, time, zipfile
from StringIO import StringIO
from cgi import escape
@@ -68,12 +69,27 @@ class ODF2EPUB(ODF2XHTML):
# pass
def s_text_h(self, tag, attrs):
+ """ Handle a heading
+ If the heading is a level 1 heading, then split the HTML file.
+ We have to be careful, because the heading can be inside a frame or a table.
+ """
level = int(attrs[(TEXTNS,'outline-level')])
if level == 1:
- self.closetag('body')
- self.closetag('html')
+ tags_to_keep = self.htmlstack[:]
+ tags_to_close = self.htmlstack[:]
+ tags_to_close.reverse()
+ for htag,hattrs,hblock in tags_to_close:
+ if htag == 'body':
+ self.generate_footnotes()
+ self._resetfootnotes()
+ self.closetag(htag)
+ # I have to do this rather ugly, as the saved header part doesn't
+ # go through the self.opentag() method
self.chapters.append(''.join(self.headerpart + self.lines))
self.lines = []
+ self.htmlstack = tags_to_keep[:2] # Only <html> and <body>
+ for htag,hattrs,hblock in tags_to_keep[2:]:
+ self.opentag(htag,hattrs,hblock)
return ODF2XHTML.s_text_h(self, tag, attrs)
def e_text_h(self, tag, attrs):
@@ -83,12 +99,16 @@ class ODF2EPUB(ODF2XHTML):
if level < 1: level = 1
lev = self.headinglevels[1:level+1]
outline = '.'.join(map(str,lev) )
- anchor = self.get_anchor("%s.%s" % ( outline, ''.join(self.data)))
- n = NavpointEntry(anchor, ''.join(self.data), len(self.chapters), level)
+ heading = ''.join(self.data)
+ anchor = self.get_anchor("%s.%s" % ( outline, heading))
+ n = NavpointEntry(anchor, heading, len(self.chapters), level)
self.navpoint_list.append(n)
return ODF2XHTML.e_text_h(self, tag, attrs)
def s_office_text(self, tag, attrs):
+ """ Save all the lines up to and including the <body> tag
+ so I can split the file into more files
+ """
ODF2XHTML.s_office_text(self, tag, attrs)
self.headerpart = self.lines
self.lines = []
@@ -130,16 +150,18 @@ class EPublication:
<dc:language>%s</dc:language>
<dc:identifier id="BookID" opf:scheme="URI">%s</dc:identifier>
<dc:creator>%s</dc:creator>
+ %s
</metadata>
<manifest>
- <item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>"""
+ <item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>
+ <item id="styles-css" href="styles.css" media-type="text/css"/>"""
toc_ncx_head = """<?xml version="1.0"?>
<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN"
"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
- <head>
+ <head>
<meta name="dtb:uid" content="%s"/>
<meta name="dtb:depth" content="2"/>
<meta name="dtb:totalPageCount" content="0"/>
@@ -159,10 +181,11 @@ class EPublication:
</ncx>"""
def __init__(self, filename, coverimage):
- self.filename = filename
+ self.doc = load(filename)
self.coverimage = coverimage
self.odhandler = ODF2EPUB(True, False)
- self.odhandler.load(filename)
+ self.odhandler.set_style_file("styles.css")
+ self.odhandler.load(self.doc)
def _zipwrite(self, outputfp):
""" Write the document to an open file pointer """
@@ -181,6 +204,12 @@ class EPublication:
zout.external_attr = UNIXPERMS
outputfp.writestr(zout, self.container)
+ # Write CSS part
+ zout = zipfile.ZipInfo('OEBPS/styles.css', now)
+ zout.compress_type = zipfile.ZIP_DEFLATED
+ zout.external_attr = UNIXPERMS
+ outputfp.writestr(zout, self.odhandler.css())
+
# Write HTML parts
for chapter in range(len(self.odhandler.chapters)):
zout = zipfile.ZipInfo('OEBPS/chapter%d.xhtml' % chapter, now)
@@ -190,35 +219,37 @@ class EPublication:
outputfp.writestr(zout, xhtml)
# Copy images over to output
- z = zipfile.ZipFile(self.filename)
- for zinfo in z.infolist():
- if zinfo.filename[0:9] == 'Pictures/':
- zipinfo = zipfile.ZipInfo("OEBPS/" + zinfo.filename, now)
- zipinfo.external_attr = UNIXPERMS
- outputfp.writestr(zipinfo, z.read(zinfo.filename))
+ for arcname, picturerec in self.doc.Pictures.items():
+ what_it_is, fileobj, mediatype = picturerec
+ zi = zipfile.ZipInfo("OEBPS/" + str(arcname), now)
+ zi.compress_type = zipfile.ZIP_STORED
+ zi.external_attr = UNIXPERMS
+ outputfp.writestr(zi, fileobj)
# Write content.opf
zout = zipfile.ZipInfo('OEBPS/content.opf', now)
zout.compress_type = zipfile.ZIP_DEFLATED
zout.external_attr = UNIXPERMS
opf = []
+ if self.coverimage:
+ covermeta = """<meta name="cover" content="cover-image"/>"""
+ else:
+ covermeta = ""
opf.append(self.content_opf_head % (escaped(self.odhandler.title), escaped(self.odhandler.language),
- escaped(args[0]), escaped(self.odhandler.creator)))
+ escaped(args[0]), escaped(self.odhandler.creator), covermeta))
if self.coverimage:
- opf.append(""" <item id="cover" href="cover.xhtml" media-type="application/xhtml+xml"/>""")
+ opf.append(""" <item id="cover-page" href="cover.xhtml" media-type="application/xhtml+xml"/>""")
opf.append(""" <item id="cover-image" href="Pictures/cover.jpg" media-type="image/jpeg"/>""")
for chapter in range(len(self.odhandler.chapters)):
opf.append(""" <item id="chapter%d.xhtml" href="chapter%d.xhtml" media-type="application/xhtml+xml"/>""" % (chapter, chapter))
# Write manifest of images.
- # FIXME: Set correct media-type
- # FIXME: Provide a valid identifier
- for zname in z.namelist():
- if zname[0:9] == 'Pictures/':
- opf.append(""" <item id="%s" href="%s" media-type="image/jpeg"/>""" % (zname, zname))
+ for arcname, picturerec in self.doc.Pictures.items():
+ what_it_is, fileobj, mediatype = picturerec
+ opf.append(""" <item id="%s" href="%s" media-type="%s"/>""" % (arcname.replace('/','_'), arcname, mediatype))
opf.append("""</manifest>""")
opf.append("""<spine toc="ncx">""")
if self.coverimage:
- opf.append(""" <itemref idref="cover" linear="no"/>""")
+ opf.append(""" <itemref idref="cover-page" linear="no"/>""")
for chapter in range(len(self.odhandler.chapters)):
opf.append(""" <itemref idref="chapter%d.xhtml"/>""" % chapter)
opf.append("""</spine>""")
@@ -231,8 +262,6 @@ class EPublication:
opf.append('</package>')
outputfp.writestr(zout, '\n'.join(opf))
- z.close()
-
# Write toc.ncx
zout = zipfile.ZipInfo('OEBPS/toc.ncx', now)
zout.compress_type = zipfile.ZIP_DEFLATED
@@ -246,16 +275,17 @@ class EPublication:
for np in self.odhandler.navpoint_list:
if np_inx == 2:
np.level = 1
+ if np.level > 2: np.level = 2
if np_inx != 2 and np.level <= np_level:
- opf.append(""" </navPoint> <!-- same level -->""");
+ opf.append(""" </navPoint>""");
if np_inx != 2 and np.level < np_level:
opf.append(""" </navPoint>""");
- opf.append(""" <navPoint id="navPoint-%d" playOrder="%d">
+ opf.append(""" <navPoint id="navPoint-%d" playOrder="%d"> <!-- L%d -->
<navLabel>
<text>%s</text>
</navLabel>
<content src="chapter%d.xhtml#%s"/>
- """ % (np_inx, np_inx, escaped(np.title), np.chapter, np.anchor))
+ """ % (np_inx, np_inx, np.level, escaped(np.title), np.chapter, np.anchor))
np_inx += 1
np_level = np.level
opf.append(""" </navPoint>""");
diff --git a/odf/load.py b/odf/load.py
index 1f0e45e..cdcc794 100644
--- a/odf/load.py
+++ b/odf/load.py
@@ -63,7 +63,7 @@ class LoadParser(handler.ContentHandler):
self.level = self.level + 1
# Add any accumulated text content
- content = ''.join(self.data).strip()
+ content = ''.join(self.data)
if len(content) > 0:
self.parent.addText(content, check_grammar=False)
self.data = []
diff --git a/odf/odf2xhtml.py b/odf/odf2xhtml.py
index 85397de..cb15844 100644
--- a/odf/odf2xhtml.py
+++ b/odf/odf2xhtml.py
@@ -20,13 +20,11 @@
#
#import pdb
#pdb.set_trace()
-import zipfile
-import xml.sax
-from xml.sax import handler, expatreader
-from xml.sax.xmlreader import InputSource
+from xml.sax import handler
from xml.sax.saxutils import escape, quoteattr
from xml.dom import Node
-from cStringIO import StringIO
+
+from opendocument import load
from namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \
FORMNS, MATHNS, METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, SCRIPTNS, \
@@ -358,6 +356,7 @@ class ODF2XHTML(handler.ContentHandler):
(NUMBERNS, "date-style"):(self.s_ignorexml, None),
(NUMBERNS, "number-style"):(self.s_ignorexml, None),
(NUMBERNS, "text-style"):(self.s_ignorexml, None),
+ (OFFICENS, "annotation"):(self.s_ignorexml, None),
(OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
(OFFICENS, "document"):(self.s_office_document_content, self.e_office_document_content),
(OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
@@ -432,6 +431,12 @@ class ODF2XHTML(handler.ContentHandler):
self.elements[(OFFICENS, u"document-content")] = (None,None)
self._resetobject()
+ def _resetfootnotes(self):
+ # Footnotes and endnotes
+ self.notedict = {}
+ self.currentnote = 0
+ self.notebody = ''
+
def _resetobject(self):
self.lines = []
self._wfunc = self._wlines
@@ -441,11 +446,13 @@ class ODF2XHTML(handler.ContentHandler):
self.creator = ''
self.data = []
self.tagstack = TagStack()
+ self.htmlstack = []
self.pstack = []
self.processelem = True
self.processcont = True
self.listtypes = {}
self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10
+ self.use_internal_css = True
self.cs = StyleToCSS()
self.anchors = {}
@@ -454,10 +461,7 @@ class ODF2XHTML(handler.ContentHandler):
self.styledict = {}
self.currentstyle = None
- # Footnotes and endnotes
- self.notedict = {}
- self.currentnote = 0
- self.notebody = ''
+ self._resetfootnotes()
# Tags from meta.xml
self.metatags = []
@@ -474,6 +478,7 @@ class ODF2XHTML(handler.ContentHandler):
def opentag(self, tag, attrs={}, block=False):
""" Create an open HTML tag """
+ self.htmlstack.append((tag,attrs,block))
a = []
for key,val in attrs.items():
a.append('''%s=%s''' % (key, quoteattr(val)))
@@ -485,6 +490,8 @@ class ODF2XHTML(handler.ContentHandler):
self.writeout("\n")
def closetag(self, tag, block=True):
+ """ Close an open HTML tag """
+ self.htmlstack.pop()
self.writeout("</%s>" % tag)
if block == True:
self.writeout("\n")
@@ -574,7 +581,7 @@ class ODF2XHTML(handler.ContentHandler):
""" Get the title from the meta data and create a HTML <title>
"""
self.title = ''.join(self.data)
- self.metatags.append('<title>%s</title>\n' % escape(self.title))
+ #self.metatags.append('<title>%s</title>\n' % escape(self.title))
self.data = []
def e_dc_metatag(self, tag, attrs):
@@ -729,13 +736,9 @@ class ODF2XHTML(handler.ContentHandler):
def html_body(self, tag, attrs):
self.writedata()
- if self.generate_css:
+ if self.generate_css and self.use_internal_css:
self.opentag('style', {'type':"text/css"}, True)
self.writeout('/*<![CDATA[*/\n')
- self.writeout('\nimg { width: 100%; height: 100%; }\n')
- self.writeout('* { padding: 0; margin: 0; background-color:white; }\n')
- self.writeout('body { margin: 0 1em; }\n')
- self.writeout('ol, ul { padding-left: 2em; }\n')
self.generate_stylesheet()
self.writeout('/*]]>*/\n')
self.closetag('style')
@@ -743,6 +746,13 @@ class ODF2XHTML(handler.ContentHandler):
self.closetag('head')
self.opentag('body', block=True)
+ default_styles = """
+img { width: 100%; height: 100%; }
+* { padding: 0; margin: 0; background-color:white; }
+body { margin: 0 1em; }
+ol, ul { padding-left: 2em; }
+"""
+
def generate_stylesheet(self):
for name in self.stylestack:
styles = self.styledict.get(name)
@@ -762,6 +772,7 @@ class ODF2XHTML(handler.ContentHandler):
styles = parentstyle
self.styledict[name] = styles
# Write the styles to HTML
+ self.writeout(self.default_styles)
for name in self.stylestack:
styles = self.styledict.get(name)
css2 = self.cs.convert_styles(styles)
@@ -803,6 +814,7 @@ class ODF2XHTML(handler.ContentHandler):
self.emptytag('meta', { 'http-equiv':"Content-Type", 'content':"text/html;charset=UTF-8"})
for metaline in self.metatags:
self.writeout(metaline)
+ self.writeout('<title>%s</title>\n' % escape(self.title))
def e_office_document_content(self, tag, attrs):
""" Last tag """
@@ -1090,20 +1102,26 @@ class ODF2XHTML(handler.ContentHandler):
self.purgedata()
def e_text_h(self, tag, attrs):
- """ Headings end """
+ """ Headings end
+ Side-effect: If there is no title in the metadata, then it is taken
+ from the first heading of any level.
+ """
self.writedata()
level = int(attrs[(TEXTNS,'outline-level')])
if level > 6: level = 6 # Heading levels go only to 6 in XHTML
if level < 1: level = 1
lev = self.headinglevels[1:level+1]
outline = '.'.join(map(str,lev) )
- anchor = self.get_anchor("%s.%s" % ( outline, ''.join(self.data)))
+ heading = ''.join(self.data)
+ if self.title == '': self.title = heading
+ anchor = self.get_anchor("%s.%s" % ( outline, heading))
self.opentag('a', {'id': anchor} )
self.closetag('a', False)
self.closetag('h%s' % level)
self.purgedata()
def s_text_line_break(self, tag, attrs):
+ """ Force a line break (<br/>) """
self.writedata()
self.emptytag('br')
self.purgedata()
@@ -1123,9 +1141,9 @@ class ODF2XHTML(handler.ContentHandler):
name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
list_class = "%s_%d" % (name, level)
if self.generate_css:
- self.opentag('%s' % self.listtypes.get(list_class,'UL'), {'class': list_class })
+ self.opentag('%s' % self.listtypes.get(list_class,'ul'), {'class': list_class })
else:
- self.opentag('%s' % self.listtypes.get(list_class,'UL'))
+ self.opentag('%s' % self.listtypes.get(list_class,'ul'))
self.purgedata()
def e_text_list(self, tag, attrs):
@@ -1140,7 +1158,7 @@ class ODF2XHTML(handler.ContentHandler):
# textbox itself may be nested within another list.
name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
list_class = "%s_%d" % (name, level)
- self.closetag(self.listtypes.get(list_class,'UL'))
+ self.closetag(self.listtypes.get(list_class,'ul'))
self.purgedata()
def s_text_list_item(self, tag, attrs):
@@ -1319,25 +1337,14 @@ class ODF2XHTML(handler.ContentHandler):
#-----------------------------------------------------------------------------
def load(self, odffile):
- self._odffile = odffile
-
- def newcss(self, doc):
- self._wfunc = self._writenothing
+ self.lines = []
+ self._wfunc = self._wlines
+ if isinstance(odffile, basestring):
+ doc = load(odffile)
+ else:
+ doc = odffile
self._walknode(doc.topnode)
- self._csslines = []
- self._wfunc = self._writecss
- self.generate_stylesheet()
- res = ''.join(self._csslines)
- del self._csslines
- return res
- def newxhtml(self, doc):
- """ Takes a document opened with load() and parses it
- The return value is the xhtml output
- """
- self._walknode(doc.topnode)
- return ''.join(self.lines)
-
def _walknode(self, node):
if node.nodeType == Node.ELEMENT_NODE:
self.startElementNS(node.qname, node.tagName, node.attributes)
@@ -1347,26 +1354,6 @@ class ODF2XHTML(handler.ContentHandler):
if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
self.characters(unicode(node))
- def parseodf(self):
- self._resetobject()
- # Extract the interesting files
- z = zipfile.ZipFile(self._odffile)
-
- # For some reason Trac has trouble when xml.sax.make_parser() is used.
- # Could it be because PyXML is installed, and therefore a different parser
- # might be chosen? By calling expatreader directly we avoid this issue
- parser = expatreader.create_parser()
- parser.setFeature(handler.feature_namespaces, 1)
- parser.setContentHandler(self)
- parser.setErrorHandler(handler.ErrorHandler())
- inpsrc = InputSource()
-
- for xmlfile in ('meta.xml', 'styles.xml', 'content.xml'):
- self.xmlfile = xmlfile
- content = z.read(xmlfile)
- inpsrc.setByteStream(StringIO(content))
- parser.parse(inpsrc)
- z.close()
def odf2xhtml(self, odffile):
""" Load a file and return XHTML
@@ -1378,9 +1365,13 @@ class ODF2XHTML(handler.ContentHandler):
if s != '': self.lines.append(s)
def xhtml(self):
- self.lines = []
- self._wfunc = self._wlines
- self.parseodf()
+ """ Parses the document and returns the HTML content """
+ return ''.join(self.lines)
+
+ def newxhtml(self, doc):
+ """ Takes a document opened with load() and parses it
+ The return value is the xhtml output
+ """
return ''.join(self.lines)
def _writecss(self, s):
@@ -1389,12 +1380,28 @@ class ODF2XHTML(handler.ContentHandler):
def _writenothing(self, s):
pass
+ def newcss(self, doc):
+ self._csslines = []
+ self._wfunc = self._writecss
+ self.generate_stylesheet()
+ res = ''.join(self._csslines)
+ self._wfunc = self._wlines
+ del self._csslines
+ return res
+
def css(self):
- self._wfunc = self._writenothing
- self.parseodf()
+ """ Parses the document and returns the CSS content """
self._csslines = []
self._wfunc = self._writecss
self.generate_stylesheet()
res = ''.join(self._csslines)
+ self._wfunc = self._wlines
del self._csslines
return res
+
+ def set_style_file(self, stylefilename, media=None):
+ self.use_internal_css = False
+ if media:
+ self.metatags.append('<link rel="stylesheet" type="text/css" href="%s" media="%s"/>\n' % (stylefilename,media))
+ else:
+ self.metatags.append('<link rel="stylesheet" type="text/css" href="%s"/>\n' % (stylefilename))
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-odf.git
More information about the Python-modules-commits
mailing list