[Python-modules-commits] [python-odf] 86/118: odf2xhtml now has methods for loaded documents
Wolfgang Borgert
debacle at moszumanska.debian.org
Fri Oct 3 21:27:26 UTC 2014
This is an automated email from the git hooks/post-receive script.
debacle pushed a commit to reference refs/remotes/upstream/master
in repository python-odf.
commit 9f800bb46fa8c1babba385b098a172c9c742aba8
Author: Søren Roug <soren.roug at eea.europa.eu>
Date: Fri Apr 30 15:29:31 2010 +0000
odf2xhtml now has methods for loaded documents
---
odf/element.py | 17 ++++----
odf/odf2xhtml.py | 101 +++++++++++++++++++++++++++++++---------------
tests/elementparser.py | 98 ++++++++++++++++++++++++++++++++++++++++++++
tests/testdatastyles.py | 2 +-
tests/testload.py | 17 +++++---
tests/testmasterstyles.py | 25 +++++++++---
tests/teststyles.py | 12 +++++-
tests/testsubobjects.py | 12 +++++-
8 files changed, 228 insertions(+), 56 deletions(-)
diff --git a/odf/element.py b/odf/element.py
index ee5377d..aad6980 100644
--- a/odf/element.py
+++ b/odf/element.py
@@ -437,15 +437,14 @@ class Element(Node):
# if allowed_attrs and (namespace, localpart) not in allowed_attrs:
# raise AttributeError, "Attribute %s:%s is not allowed in element <%s>" % ( prefix, localpart, self.tagName)
c = AttrConverters()
- self.attributes[prefix + ":" + localpart] = c.convert((namespace, localpart), value, self)
+ self.attributes[(namespace, localpart)] = c.convert((namespace, localpart), value, self)
def getAttrNS(self, namespace, localpart):
prefix = self.get_nsprefix(namespace)
- return self.attributes.get(prefix + ":" + localpart)
+ return self.attributes.get((namespace, localpart))
def removeAttrNS(self, namespace, localpart):
- prefix = self.get_nsprefix(namespace)
- del self.attributes[prefix + ":" + localpart]
+ del self.attributes[(namespace, localpart)]
def getAttribute(self, attr):
""" Get an attribute value. The method knows which namespace the attribute is in
@@ -468,8 +467,9 @@ class Element(Node):
if level == 0:
for namespace, prefix in self.namespaces.items():
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
- for attkey in self.attributes.keys():
- f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8')))
+ for qname in self.attributes.keys():
+ prefix = self.get_nsprefix(qname[0])
+ f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
f.write('>')
def write_close_tag(self, level, f):
@@ -481,8 +481,9 @@ class Element(Node):
if level == 0:
for namespace, prefix in self.namespaces.items():
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
- for attkey in self.attributes.keys():
- f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8')))
+ for qname in self.attributes.keys():
+ prefix = self.get_nsprefix(qname[0])
+ f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
if self.childNodes:
f.write('>')
for element in self.childNodes:
diff --git a/odf/odf2xhtml.py b/odf/odf2xhtml.py
index 8cf1fea..85397de 100644
--- a/odf/odf2xhtml.py
+++ b/odf/odf2xhtml.py
@@ -25,6 +25,7 @@ import xml.sax
from xml.sax import handler, expatreader
from xml.sax.xmlreader import InputSource
from xml.sax.saxutils import escape, quoteattr
+from xml.dom import Node
from cStringIO import StringIO
from namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \
@@ -358,6 +359,7 @@ class ODF2XHTML(handler.ContentHandler):
(NUMBERNS, "number-style"):(self.s_ignorexml, None),
(NUMBERNS, "text-style"):(self.s_ignorexml, None),
(OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
+ (OFFICENS, "document"):(self.s_office_document_content, self.e_office_document_content),
(OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
(OFFICENS, "forms"):(self.s_ignorexml, None),
(OFFICENS, "master-styles"):(self.s_office_master_styles, None),
@@ -367,6 +369,7 @@ class ODF2XHTML(handler.ContentHandler):
(OFFICENS, "styles"):(self.s_office_styles, None),
(OFFICENS, "text"):(self.s_office_text, self.e_office_text),
(OFFICENS, "scripts"):(self.s_ignorexml, None),
+ (OFFICENS, "settings"):(self.s_ignorexml, None),
(PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
# (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
(STYLENS, "default-page-layout"):(self.s_ignorexml, None),
@@ -427,6 +430,37 @@ class ODF2XHTML(handler.ContentHandler):
self.elements[(OFFICENS, u"spreadsheet")] = (None,None)
self.elements[(OFFICENS, u"presentation")] = (None,None)
self.elements[(OFFICENS, u"document-content")] = (None,None)
+ self._resetobject()
+
+ def _resetobject(self):
+ self.lines = []
+ self._wfunc = self._wlines
+ self.xmlfile = ''
+ self.title = ''
+ self.language = ''
+ self.creator = ''
+ self.data = []
+ self.tagstack = TagStack()
+ self.pstack = []
+ self.processelem = True
+ self.processcont = True
+ self.listtypes = {}
+ self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10
+ self.cs = StyleToCSS()
+ self.anchors = {}
+
+ # Style declarations
+ self.stylestack = []
+ self.styledict = {}
+ self.currentstyle = None
+
+ # Footnotes and endnotes
+ self.notedict = {}
+ self.currentnote = 0
+ self.notebody = ''
+
+ # Tags from meta.xml
+ self.metatags = []
def writeout(self, s):
@@ -462,16 +496,12 @@ class ODF2XHTML(handler.ContentHandler):
self.writeout("<%s %s/>\n" % (tag, " ".join(a)))
#--------------------------------------------------
+# Interface to parser
+#--------------------------------------------------
def characters(self, data):
if self.processelem and self.processcont:
self.data.append(data)
- def handle_starttag(self, tag, method, attrs):
- method(tag,attrs)
-
- def handle_endtag(self, tag, attrs, method):
- method(tag, attrs)
-
def startElementNS(self, tag, qname, attrs):
self.pstack.append( (self.processelem, self.processcont) )
if self.processelem:
@@ -492,6 +522,13 @@ class ODF2XHTML(handler.ContentHandler):
self.unknown_endtag(tag, attrs)
self.processelem, self.processcont = self.pstack.pop()
+#--------------------------------------------------
+ def handle_starttag(self, tag, method, attrs):
+ method(tag,attrs)
+
+ def handle_endtag(self, tag, attrs, method):
+ method(tag, attrs)
+
def unknown_starttag(self, tag, attrs):
pass
@@ -1284,34 +1321,34 @@ class ODF2XHTML(handler.ContentHandler):
def load(self, odffile):
self._odffile = odffile
- def parseodf(self):
- self.xmlfile = ''
- self.title = ''
- self.language = ''
- self.creator = ''
- self.data = []
- self.tagstack = TagStack()
- self.pstack = []
- self.processelem = True
- self.processcont = True
- self.listtypes = {}
- self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10
- self.cs = StyleToCSS()
- self.anchors = {}
-
- # Style declarations
- self.stylestack = []
- self.styledict = {}
- self.currentstyle = None
-
- # Footnotes and endnotes
- self.notedict = {}
- self.currentnote = 0
- self.notebody = ''
+ def newcss(self, doc):
+ self._wfunc = self._writenothing
+ self._walknode(doc.topnode)
+ self._csslines = []
+ self._wfunc = self._writecss
+ self.generate_stylesheet()
+ res = ''.join(self._csslines)
+ del self._csslines
+ return res
- # Tags from meta.xml
- self.metatags = []
+ def newxhtml(self, doc):
+ """ Takes a document opened with load() and parses it
+ The return value is the xhtml output
+ """
+ self._walknode(doc.topnode)
+ return ''.join(self.lines)
+
+ def _walknode(self, node):
+ if node.nodeType == Node.ELEMENT_NODE:
+ self.startElementNS(node.qname, node.tagName, node.attributes)
+ for c in node.childNodes:
+ self._walknode(c)
+ self.endElementNS(node.qname, node.tagName)
+ if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
+ self.characters(unicode(node))
+ def parseodf(self):
+ self._resetobject()
# Extract the interesting files
z = zipfile.ZipFile(self._odffile)
diff --git a/tests/elementparser.py b/tests/elementparser.py
new file mode 100644
index 0000000..3aba3a4
--- /dev/null
+++ b/tests/elementparser.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (C) 2010 Søren Roug, European Environment Agency
+#
+# This is free software. You may redistribute it under the terms
+# of the Apache license and the GNU General Public License Version
+# 2 or at your option any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+# Contributor(s):
+#
+import unittest
+
+""" Really simplistic parser of an element with attributes """
+
+class ElementParser:
+ def __init__(self, s, elmttoparse):
+ qelements = s.split('<')
+ for i in range(len(qelements)):
+ q = qelements[i]
+ if q[:len(elmttoparse)] == elmttoparse:
+ s = '<'.join([''] + qelements[i:])
+ self.attributes = {}
+ self.element = None
+ currattr = None
+
+ buf = []
+ START = 1
+ INELEM = 2
+ SPACE = 3
+ INATTR = 4
+ INVALUE=5
+ BEFOREVALUE = 6
+ NOMORE = 7
+
+ state=START
+ ls = list(s)
+ for c in ls:
+ if state == NOMORE:
+ continue
+ if state == INVALUE: # We're in the value of the attribute. Only look for the terminator
+ if c == '"':
+ state = SPACE
+ c = ''.join(buf)
+ self.attributes[currattr] = c
+ buf = []
+ else:
+ buf.append(c)
+ else:
+ if c == '<':
+ state = INELEM
+ elif c == ' ':
+ if state == INELEM:
+ self.element = ''.join(buf)
+ buf = []
+ state = SPACE
+ elif c == '=':
+ if state == INATTR:
+ state = BEFOREVALUE
+ currattr = ''.join(buf)
+ buf = []
+ elif c == '"':
+ state = INVALUE
+ elif c == '>' or c == '/':
+ state = NOMORE
+ elif c > '"' and c <= 'z' and state == SPACE: # Start of attribute
+ state = INATTR
+ buf = []
+ buf.append(c)
+ else:
+ buf.append(c)
+
+ def has_value(self, attribute, value):
+ v = self.attributes.get(attribute, None)
+ if v and v == value: return True
+ return False
+
+class TestParser(unittest.TestCase):
+ def test1(self):
+ s='<draw:object xlink:href="./Object 1"/><style:style style:name="Standard" style:display-name="Standard" style:family="paragraph"><style:property/>'
+ e = ElementParser(s,'style:style')
+ self.assertEqual(e.element,'style:style')
+ assert e.has_value("style:display-name","Standard")
+
+ e = ElementParser(s,'draw:object')
+ self.assertEqual(e.element,'draw:object')
+ assert e.has_value("xlink:href","./Object 1")
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/testdatastyles.py b/tests/testdatastyles.py
index 711f643..9085170 100644
--- a/tests/testdatastyles.py
+++ b/tests/testdatastyles.py
@@ -25,7 +25,7 @@ from odf.number import Text,PercentageStyle, Number
from odf.table import Table,TableRow,TableCell
-class TestHeadings(unittest.TestCase):
+class TestDatastyles(unittest.TestCase):
saved = False
diff --git a/tests/testload.py b/tests/testload.py
index 6860b96..ca81cdb 100644
--- a/tests/testload.py
+++ b/tests/testload.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-# Copyright (C) 2008 Søren Roug, European Environment Agency
+# Copyright (C) 2008-2010 Søren Roug, European Environment Agency
#
# This is free software. You may redistribute it under the terms
# of the Apache license and the GNU General Public License Version
@@ -22,7 +22,7 @@ import unittest, os, os.path
from odf.opendocument import OpenDocumentText, load
from odf import style, text
from odf.text import P, H, LineBreak
-
+from elementparser import ElementParser
class TestSimple(unittest.TestCase):
@@ -104,8 +104,13 @@ class TestExampleDocs(unittest.TestCase):
os.path.dirname(__file__), "examples", "simpletable.odt")
d = load(simpletable_odt)
result = unicode(d.contentxml(),'utf-8')
- self.assertNotEqual(-1, result.find(u"""<text:sequence-decl text:name="Text" text:display-outline-level="0"/>"""))
- self.assertNotEqual(-1, result.find(u"""<table:table table:name="Tabel1" table:style-name="Tabel1"><table:table-column table:number-columns-repeated="2" table:style-name="Tabel1.A"/>"""))
+ e = ElementParser(result,'text:sequence-decl')
+ self.assertTrue(e.has_value("text:name","Drawing")) # Last sequence
+ self.assertTrue(e.has_value("text:display-outline-level","0"))
+
+ e = ElementParser(result,'table:table-column')
+ self.assertTrue(e.has_value("table:number-columns-repeated","2"))
+ self.assertTrue(e.has_value("table:style-name","Tabel1.A"))
def test_headerfooter(self):
""" Test that styles referenced from master pages are renamed in OOo 2.x documents """
@@ -113,8 +118,8 @@ class TestExampleDocs(unittest.TestCase):
os.path.dirname(__file__), "examples", "headerfooter.odt")
d = load(simplelist_odt)
result = unicode(d.stylesxml(),'utf-8')
- self.assertNotEqual(-1, result.find(u"""style:name="MP1" """))
- self.assertNotEqual(-1, result.find(u"""style:name="MP2" """))
+ self.assertNotEqual(-1, result.find(u'''style:name="MP1"'''))
+ self.assertNotEqual(-1, result.find(u'''style:name="MP2"'''))
self.assertNotEqual(-1, result.find(u"""<style:header><text:p text:style-name="MP1">Header<text:tab/>"""))
self.assertNotEqual(-1, result.find(u"""<style:footer><text:p text:style-name="MP2">Footer<text:tab/>"""))
diff --git a/tests/testmasterstyles.py b/tests/testmasterstyles.py
index 30f92c2..baad0c5 100644
--- a/tests/testmasterstyles.py
+++ b/tests/testmasterstyles.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-# Copyright (C) 2007 Søren Roug, European Environment Agency
+# Copyright (C) 2007-2010 Søren Roug, European Environment Agency
#
# This is free software. You may redistribute it under the terms
# of the Apache license and the GNU General Public License Version
@@ -24,6 +24,7 @@ from odf import style, text
from odf.table import Table, TableColumn, TableRow, TableCell
from odf.element import IllegalChild
from odf.namespaces import TEXTNS
+from elementparser import ElementParser
class TestMasterStyles(unittest.TestCase):
@@ -57,10 +58,24 @@ class TestMasterStyles(unittest.TestCase):
presdoc.styles.addElement(titlestyle)
s = unicode(presdoc.stylesxml(),'UTF-8')
- # Not a good test: the attributes can come in a different order
- self.assertContains(s, u'<style:page-layout style:name="MyLayout"><style:page-layout-properties fo:margin="0cm" fo:page-width="28cm" fo:page-height="21cm" style:print-orientation="landscape"/></style:page-layout>')
- self.assertContains(s, u'<office:styles><style:style style:name="MyMaster-title" style:display-name="MyMaster-title" style:family="presentation"><style:paragraph-properties fo:text-align="center"/><style:text-properties fo:font-size="34pt"/><style:graphic-properties draw:fill-color="#ffff99"/></style:style></office:styles>')
- self.assertContains(s, u'<office:master-styles><style:master-page style:name="MyMaster" style:display-name="MyMaster" style:page-layout-name="MyLayout"/></office:master-styles>')
+ self.assertContains(s, u'<style:page-layout style:name="MyLayout"><style:page-layout-properties ')
+ e = ElementParser(s,'style:page-layout-properties')
+ self.assertEqual(e.element,'style:page-layout-properties')
+ self.assertTrue(e.has_value("fo:margin","0cm"))
+ self.assertTrue(e.has_value("fo:page-width","28cm"))
+ self.assertTrue(e.has_value("fo:page-height","21cm"))
+ self.assertTrue(e.has_value("style:print-orientation","landscape"))
+
+ e = ElementParser(s,'style:style')
+ self.assertTrue(e.has_value("style:name","MyMaster-title"))
+ self.assertTrue(e.has_value("style:display-name","MyMaster-title"))
+ self.assertTrue(e.has_value("style:family","presentation"))
+
+ self.assertContains(s, u'<style:paragraph-properties fo:text-align="center"/><style:text-properties fo:font-size="34pt"/><style:graphic-properties draw:fill-color="#ffff99"/></style:style></office:styles>')
+ e = ElementParser(s,'style:master-page')
+ self.assertTrue(e.has_value("style:name","MyMaster"))
+ self.assertTrue(e.has_value("style:display-name","MyMaster"))
+ self.assertTrue(e.has_value("style:page-layout-name","MyLayout"))
def testMasterWithHeader(self):
""" Create a text document with a page layout called "pagelayout"
diff --git a/tests/teststyles.py b/tests/teststyles.py
index 99c33a8..1fc83e5 100644
--- a/tests/teststyles.py
+++ b/tests/teststyles.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-# Copyright (C) 2007 Søren Roug, European Environment Agency
+# Copyright (C) 2007-2010 Søren Roug, European Environment Agency
#
# This is free software. You may redistribute it under the terms
# of the Apache license and the GNU General Public License Version
@@ -24,6 +24,8 @@ from odf import style, text
from odf.table import Table, TableColumn, TableRow, TableCell
from odf.element import IllegalChild
from odf.namespaces import TEXTNS
+from elementparser import ElementParser
+
class TestStyles(unittest.TestCase):
@@ -85,7 +87,13 @@ class TestQattributes(unittest.TestCase):
s.index(u"""<?xml version='1.0' encoding='UTF-8'?>\n""")
s.index(u'xmlns:ns35="http://foreignuri.com"')
s.index(u'<style:paragraph-properties ns35:enable-numbering="true"/>')
- s.index(u'<office:styles><style:style style:name="Standard" style:display-name="Standard" style:family="paragraph">')
+ e = ElementParser(s,'style:style')
+# e = ElementParser(u'<style:style style:name="Standard" style:display-name="Standard" style:family="paragraph">')
+ self.assertEqual(e.element,'style:style')
+ self.assertTrue(e.has_value("style:display-name","Standard"))
+ self.assertTrue(e.has_value("style:name","Standard"))
+ self.assertTrue(e.has_value("style:family","paragraph"))
+
if __name__ == '__main__':
diff --git a/tests/testsubobjects.py b/tests/testsubobjects.py
index 5cb746c..0befb3e 100644
--- a/tests/testsubobjects.py
+++ b/tests/testsubobjects.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-# Copyright (C) 2007 Søren Roug, European Environment Agency
+# Copyright (C) 2007-2010 Søren Roug, European Environment Agency
#
# This is free software. You may redistribute it under the terms
# of the Apache license and the GNU General Public License Version
@@ -22,6 +22,7 @@ import unittest, os, zipfile
from odf.opendocument import OpenDocumentText
from odf import draw, text
from odf.element import IllegalChild
+from elementparser import ElementParser
def _getxmlpart(odffile, xmlfile):
""" Get the content out of the ODT file"""
@@ -57,7 +58,14 @@ class TestUnicode(unittest.TestCase):
self.assertEqual(subsubloc,'./Object 1/Object 1')
c = unicode(self.textdoc.contentxml(),'UTF-8')
- c.index(u'<office:body><office:text><draw:frame svg:width="476pt" text:anchor-type="paragraph" svg:height="404pt"><draw:object xlink:href="./Object 1"/></draw:frame></office:text></office:body>')
+ c.index(u'<office:body><office:text><draw:frame ')
+ e = ElementParser(c, 'draw:frame')
+# e = ElementParser('<draw:frame svg:width="476pt" text:anchor-type="paragraph" svg:height="404pt">')
+ self.assertTrue(e.has_value('svg:width',"476pt"))
+ self.assertTrue(e.has_value('svg:height',"404pt"))
+ self.assertTrue(e.has_value('text:anchor-type',"paragraph"))
+ self.assertFalse(e.has_value('svg:height',"476pt"))
+ c.index(u'<draw:object xlink:href="./Object 1"/></draw:frame></office:text></office:body>')
c.index(u'xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"')
self.textdoc.save("TEST.odt")
self.saved = True
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-odf.git
More information about the Python-modules-commits
mailing list