[Python-modules-commits] [python-odf] 86/118: odf2xhtml now has methods for loaded documents

Fri Oct 3 21:27:26 UTC 2014

This is an automated email from the git hooks/post-receive script.

debacle pushed a commit to reference refs/remotes/upstream/master
in repository python-odf.

commit 9f800bb46fa8c1babba385b098a172c9c742aba8
Author: Søren Roug <soren.roug at eea.europa.eu>
Date:   Fri Apr 30 15:29:31 2010 +0000

    odf2xhtml now has methods for loaded documents
---
 odf/element.py            |  17 ++++----
 odf/odf2xhtml.py          | 101 +++++++++++++++++++++++++++++++---------------
 tests/elementparser.py    |  98 ++++++++++++++++++++++++++++++++++++++++++++
 tests/testdatastyles.py   |   2 +-
 tests/testload.py         |  17 +++++---
 tests/testmasterstyles.py |  25 +++++++++---
 tests/teststyles.py       |  12 +++++-
 tests/testsubobjects.py   |  12 +++++-
 8 files changed, 228 insertions(+), 56 deletions(-)

diff --git a/odf/element.py b/odf/element.py
index ee5377d..aad6980 100644
--- a/odf/element.py
+++ b/odf/element.py
@@ -437,15 +437,14 @@ class Element(Node):
 #       if allowed_attrs and (namespace, localpart) not in allowed_attrs:
 #           raise AttributeError, "Attribute %s:%s is not allowed in element <%s>" % ( prefix, localpart, self.tagName)
         c = AttrConverters()
-        self.attributes[prefix + ":" + localpart] = c.convert((namespace, localpart), value, self)
+        self.attributes[(namespace, localpart)] = c.convert((namespace, localpart), value, self)
 
     def getAttrNS(self, namespace, localpart):
         prefix = self.get_nsprefix(namespace)
-        return self.attributes.get(prefix + ":" + localpart)
+        return self.attributes.get((namespace, localpart))
 
     def removeAttrNS(self, namespace, localpart):
-        prefix = self.get_nsprefix(namespace)
-        del self.attributes[prefix + ":" + localpart]
+        del self.attributes[(namespace, localpart)]
 
     def getAttribute(self, attr):
         """ Get an attribute value. The method knows which namespace the attribute is in
@@ -468,8 +467,9 @@ class Element(Node):
         if level == 0:
             for namespace, prefix in self.namespaces.items():
                 f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
-        for attkey in self.attributes.keys():
-            f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8')))
+        for qname in self.attributes.keys():
+            prefix = self.get_nsprefix(qname[0])
+            f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
         f.write('>')
 
     def write_close_tag(self, level, f):
@@ -481,8 +481,9 @@ class Element(Node):
         if level == 0:
             for namespace, prefix in self.namespaces.items():
                 f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
-        for attkey in self.attributes.keys():
-            f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8')))
+        for qname in self.attributes.keys():
+            prefix = self.get_nsprefix(qname[0])
+            f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
         if self.childNodes:
             f.write('>')
             for element in self.childNodes:
diff --git a/odf/odf2xhtml.py b/odf/odf2xhtml.py
index 8cf1fea..85397de 100644
--- a/odf/odf2xhtml.py
+++ b/odf/odf2xhtml.py
@@ -25,6 +25,7 @@ import xml.sax
 from xml.sax import handler, expatreader
 from xml.sax.xmlreader import InputSource
 from xml.sax.saxutils import escape, quoteattr
+from xml.dom import Node
 from cStringIO import StringIO
 
 from namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \
@@ -358,6 +359,7 @@ class ODF2XHTML(handler.ContentHandler):
         (NUMBERNS, "number-style"):(self.s_ignorexml, None),
         (NUMBERNS, "text-style"):(self.s_ignorexml, None),
         (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
+        (OFFICENS, "document"):(self.s_office_document_content, self.e_office_document_content),
         (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
         (OFFICENS, "forms"):(self.s_ignorexml, None),
         (OFFICENS, "master-styles"):(self.s_office_master_styles, None),
@@ -367,6 +369,7 @@ class ODF2XHTML(handler.ContentHandler):
         (OFFICENS, "styles"):(self.s_office_styles, None),
         (OFFICENS, "text"):(self.s_office_text, self.e_office_text),
         (OFFICENS, "scripts"):(self.s_ignorexml, None),
+        (OFFICENS, "settings"):(self.s_ignorexml, None),
         (PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
 #       (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
         (STYLENS, "default-page-layout"):(self.s_ignorexml, None),
@@ -427,6 +430,37 @@ class ODF2XHTML(handler.ContentHandler):
             self.elements[(OFFICENS, u"spreadsheet")] = (None,None)
             self.elements[(OFFICENS, u"presentation")] = (None,None)
             self.elements[(OFFICENS, u"document-content")] = (None,None)
+        self._resetobject()
+
+    def _resetobject(self):
+        self.lines = []
+        self._wfunc = self._wlines
+        self.xmlfile = ''
+        self.title = ''
+        self.language = ''
+        self.creator = ''
+        self.data = []
+        self.tagstack = TagStack()
+        self.pstack = []
+        self.processelem = True
+        self.processcont = True
+        self.listtypes = {}
+        self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10
+        self.cs = StyleToCSS()
+        self.anchors = {}
+
+        # Style declarations
+        self.stylestack = []
+        self.styledict = {}
+        self.currentstyle = None
+
+        # Footnotes and endnotes
+        self.notedict = {}
+        self.currentnote = 0
+        self.notebody = ''
+
+        # Tags from meta.xml
+        self.metatags = []
 
 
     def writeout(self, s):
@@ -462,16 +496,12 @@ class ODF2XHTML(handler.ContentHandler):
         self.writeout("<%s %s/>\n" % (tag, " ".join(a)))
 
 #--------------------------------------------------
+# Interface to parser
+#--------------------------------------------------
     def characters(self, data):
         if self.processelem and self.processcont:
             self.data.append(data)
 
-    def handle_starttag(self, tag, method, attrs):
-        method(tag,attrs)
-
-    def handle_endtag(self, tag, attrs, method):
-        method(tag, attrs)
-
     def startElementNS(self, tag, qname, attrs):
         self.pstack.append( (self.processelem, self.processcont) )
         if self.processelem:
@@ -492,6 +522,13 @@ class ODF2XHTML(handler.ContentHandler):
                 self.unknown_endtag(tag, attrs)
         self.processelem, self.processcont = self.pstack.pop()
 
+#--------------------------------------------------
+    def handle_starttag(self, tag, method, attrs):
+        method(tag,attrs)
+
+    def handle_endtag(self, tag, attrs, method):
+        method(tag, attrs)
+
     def unknown_starttag(self, tag, attrs):
         pass
 
@@ -1284,34 +1321,34 @@ class ODF2XHTML(handler.ContentHandler):
     def load(self, odffile):
         self._odffile = odffile
 
-    def parseodf(self):
-        self.xmlfile = ''
-        self.title = ''
-        self.language = ''
-        self.creator = ''
-        self.data = []
-        self.tagstack = TagStack()
-        self.pstack = []
-        self.processelem = True
-        self.processcont = True
-        self.listtypes = {}
-        self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10
-        self.cs = StyleToCSS()
-        self.anchors = {}
-
-        # Style declarations
-        self.stylestack = []
-        self.styledict = {}
-        self.currentstyle = None
-
-        # Footnotes and endnotes
-        self.notedict = {}
-        self.currentnote = 0
-        self.notebody = ''
+    def newcss(self, doc):
+        self._wfunc = self._writenothing
+        self._walknode(doc.topnode)
+        self._csslines = []
+        self._wfunc = self._writecss
+        self.generate_stylesheet()
+        res = ''.join(self._csslines)
+        del self._csslines
+        return res
 
-        # Tags from meta.xml
-        self.metatags = []
+    def newxhtml(self, doc):
+        """ Takes a document opened with load() and parses it
+            The return value is the xhtml output
+        """
+        self._walknode(doc.topnode)
+        return ''.join(self.lines)
+        
+    def _walknode(self, node):
+        if node.nodeType == Node.ELEMENT_NODE:
+            self.startElementNS(node.qname, node.tagName, node.attributes)
+            for c in node.childNodes:
+                self._walknode(c)
+            self.endElementNS(node.qname, node.tagName)
+        if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
+            self.characters(unicode(node))
 
+    def parseodf(self):
+        self._resetobject()
         # Extract the interesting files
         z = zipfile.ZipFile(self._odffile)
 
diff --git a/tests/elementparser.py b/tests/elementparser.py
new file mode 100644
index 0000000..3aba3a4
--- /dev/null
+++ b/tests/elementparser.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (C) 2010 Søren Roug, European Environment Agency
+#
+# This is free software.  You may redistribute it under the terms
+# of the Apache license and the GNU General Public License Version
+# 2 or at your option any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+#
+# Contributor(s):
+#
+import unittest
+
+""" Really simplistic parser of an element with attributes """
+
+class ElementParser:
+    def __init__(self, s, elmttoparse):
+        qelements = s.split('<')
+        for i in range(len(qelements)):
+            q = qelements[i]
+            if q[:len(elmttoparse)] == elmttoparse:
+                s = '<'.join([''] + qelements[i:])
+        self.attributes = {}
+        self.element = None
+        currattr = None
+
+        buf = []
+        START = 1
+        INELEM = 2
+        SPACE = 3
+        INATTR = 4
+        INVALUE=5
+        BEFOREVALUE = 6
+        NOMORE = 7
+
+        state=START
+        ls = list(s)
+        for c in ls:
+            if state == NOMORE:
+                continue
+            if state == INVALUE: # We're in the value of the attribute. Only look for the terminator
+                if c == '"':
+                    state = SPACE
+                    c = ''.join(buf)
+                    self.attributes[currattr] = c
+                    buf = []
+                else:
+                    buf.append(c)
+            else:
+                if c == '<':
+                    state = INELEM
+                elif c == ' ':
+                    if state == INELEM:
+                       self.element = ''.join(buf)
+                       buf = []
+                    state = SPACE
+                elif c == '=':
+                    if state == INATTR:
+                        state = BEFOREVALUE
+                        currattr = ''.join(buf)
+                        buf = []
+                elif c == '"':
+                     state = INVALUE
+                elif c == '>' or c == '/':
+                    state = NOMORE
+                elif c > '"' and c <= 'z' and state == SPACE: # Start of attribute
+                    state = INATTR
+                    buf = []
+                    buf.append(c)
+                else:
+                    buf.append(c)
+
+    def has_value(self, attribute, value):
+        v = self.attributes.get(attribute, None)
+        if v and v == value: return True
+        return False
+
+class TestParser(unittest.TestCase):
+    def test1(self):
+        s='<draw:object xlink:href="./Object 1"/><style:style style:name="Standard" style:display-name="Standard" style:family="paragraph"><style:property/>'
+        e = ElementParser(s,'style:style')
+        self.assertEqual(e.element,'style:style')
+        assert e.has_value("style:display-name","Standard")
+
+        e = ElementParser(s,'draw:object')
+        self.assertEqual(e.element,'draw:object')
+        assert e.has_value("xlink:href","./Object 1")
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/testdatastyles.py b/tests/testdatastyles.py
index 711f643..9085170 100644
--- a/tests/testdatastyles.py
+++ b/tests/testdatastyles.py
@@ -25,7 +25,7 @@ from odf.number import Text,PercentageStyle, Number
 from odf.table import Table,TableRow,TableCell
 
 
-class TestHeadings(unittest.TestCase):
+class TestDatastyles(unittest.TestCase):
     
     saved = False
 
diff --git a/tests/testload.py b/tests/testload.py
index 6860b96..ca81cdb 100644
--- a/tests/testload.py
+++ b/tests/testload.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-# Copyright (C) 2008 Søren Roug, European Environment Agency
+# Copyright (C) 2008-2010 Søren Roug, European Environment Agency
 #
 # This is free software.  You may redistribute it under the terms
 # of the Apache license and the GNU General Public License Version
@@ -22,7 +22,7 @@ import unittest, os, os.path
 from odf.opendocument import OpenDocumentText, load
 from odf import style, text
 from odf.text import P, H, LineBreak
-
+from elementparser import ElementParser
 
 class TestSimple(unittest.TestCase):
     
@@ -104,8 +104,13 @@ class TestExampleDocs(unittest.TestCase):
             os.path.dirname(__file__), "examples", "simpletable.odt")
         d = load(simpletable_odt)
         result = unicode(d.contentxml(),'utf-8')
-        self.assertNotEqual(-1, result.find(u"""<text:sequence-decl text:name="Text" text:display-outline-level="0"/>"""))
-        self.assertNotEqual(-1, result.find(u"""<table:table table:name="Tabel1" table:style-name="Tabel1"><table:table-column table:number-columns-repeated="2" table:style-name="Tabel1.A"/>"""))
+        e = ElementParser(result,'text:sequence-decl')
+        self.assertTrue(e.has_value("text:name","Drawing")) # Last sequence
+        self.assertTrue(e.has_value("text:display-outline-level","0"))
+
+        e = ElementParser(result,'table:table-column')
+        self.assertTrue(e.has_value("table:number-columns-repeated","2"))
+        self.assertTrue(e.has_value("table:style-name","Tabel1.A"))
 
     def test_headerfooter(self):
         """ Test that styles referenced from master pages are renamed in OOo 2.x documents """
@@ -113,8 +118,8 @@ class TestExampleDocs(unittest.TestCase):
             os.path.dirname(__file__), "examples", "headerfooter.odt")
         d = load(simplelist_odt)
         result = unicode(d.stylesxml(),'utf-8')
-        self.assertNotEqual(-1, result.find(u"""style:name="MP1" """))
-        self.assertNotEqual(-1, result.find(u"""style:name="MP2" """))
+        self.assertNotEqual(-1, result.find(u'''style:name="MP1"'''))
+        self.assertNotEqual(-1, result.find(u'''style:name="MP2"'''))
         self.assertNotEqual(-1, result.find(u"""<style:header><text:p text:style-name="MP1">Header<text:tab/>"""))
         self.assertNotEqual(-1, result.find(u"""<style:footer><text:p text:style-name="MP2">Footer<text:tab/>"""))
 
diff --git a/tests/testmasterstyles.py b/tests/testmasterstyles.py
index 30f92c2..baad0c5 100644
--- a/tests/testmasterstyles.py
+++ b/tests/testmasterstyles.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-# Copyright (C) 2007 Søren Roug, European Environment Agency
+# Copyright (C) 2007-2010 Søren Roug, European Environment Agency
 #
 # This is free software.  You may redistribute it under the terms
 # of the Apache license and the GNU General Public License Version
@@ -24,6 +24,7 @@ from odf import style, text
 from odf.table import Table, TableColumn, TableRow, TableCell
 from odf.element import IllegalChild
 from odf.namespaces import TEXTNS
+from elementparser import ElementParser
 
 class TestMasterStyles(unittest.TestCase):
     
@@ -57,10 +58,24 @@ class TestMasterStyles(unittest.TestCase):
         presdoc.styles.addElement(titlestyle)
 
         s = unicode(presdoc.stylesxml(),'UTF-8')
-        # Not a good test: the attributes can come in a different order
-        self.assertContains(s, u'<style:page-layout style:name="MyLayout"><style:page-layout-properties fo:margin="0cm" fo:page-width="28cm" fo:page-height="21cm" style:print-orientation="landscape"/></style:page-layout>')
-        self.assertContains(s, u'<office:styles><style:style style:name="MyMaster-title" style:display-name="MyMaster-title" style:family="presentation"><style:paragraph-properties fo:text-align="center"/><style:text-properties fo:font-size="34pt"/><style:graphic-properties draw:fill-color="#ffff99"/></style:style></office:styles>')
-        self.assertContains(s, u'<office:master-styles><style:master-page style:name="MyMaster" style:display-name="MyMaster" style:page-layout-name="MyLayout"/></office:master-styles>')
+        self.assertContains(s, u'<style:page-layout style:name="MyLayout"><style:page-layout-properties ')
+        e = ElementParser(s,'style:page-layout-properties')
+        self.assertEqual(e.element,'style:page-layout-properties')
+        self.assertTrue(e.has_value("fo:margin","0cm"))
+        self.assertTrue(e.has_value("fo:page-width","28cm"))
+        self.assertTrue(e.has_value("fo:page-height","21cm"))
+        self.assertTrue(e.has_value("style:print-orientation","landscape"))
+
+        e = ElementParser(s,'style:style')
+        self.assertTrue(e.has_value("style:name","MyMaster-title"))
+        self.assertTrue(e.has_value("style:display-name","MyMaster-title"))
+        self.assertTrue(e.has_value("style:family","presentation"))
+
+        self.assertContains(s, u'<style:paragraph-properties fo:text-align="center"/><style:text-properties fo:font-size="34pt"/><style:graphic-properties draw:fill-color="#ffff99"/></style:style></office:styles>')
+        e = ElementParser(s,'style:master-page')
+        self.assertTrue(e.has_value("style:name","MyMaster"))
+        self.assertTrue(e.has_value("style:display-name","MyMaster"))
+        self.assertTrue(e.has_value("style:page-layout-name","MyLayout"))
 
     def testMasterWithHeader(self):
         """ Create a text document with a page layout called "pagelayout"
diff --git a/tests/teststyles.py b/tests/teststyles.py
index 99c33a8..1fc83e5 100644
--- a/tests/teststyles.py
+++ b/tests/teststyles.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-# Copyright (C) 2007 Søren Roug, European Environment Agency
+# Copyright (C) 2007-2010 Søren Roug, European Environment Agency
 #
 # This is free software.  You may redistribute it under the terms
 # of the Apache license and the GNU General Public License Version
@@ -24,6 +24,8 @@ from odf import style, text
 from odf.table import Table, TableColumn, TableRow, TableCell
 from odf.element import IllegalChild
 from odf.namespaces import TEXTNS
+from elementparser import ElementParser
+
 
 class TestStyles(unittest.TestCase):
     
@@ -85,7 +87,13 @@ class TestQattributes(unittest.TestCase):
         s.index(u"""<?xml version='1.0' encoding='UTF-8'?>\n""")
         s.index(u'xmlns:ns35="http://foreignuri.com"')
         s.index(u'<style:paragraph-properties ns35:enable-numbering="true"/>')
-        s.index(u'<office:styles><style:style style:name="Standard" style:display-name="Standard" style:family="paragraph">')
+        e = ElementParser(s,'style:style')
+#        e = ElementParser(u'<style:style style:name="Standard" style:display-name="Standard" style:family="paragraph">')
+        self.assertEqual(e.element,'style:style')
+        self.assertTrue(e.has_value("style:display-name","Standard"))
+        self.assertTrue(e.has_value("style:name","Standard"))
+        self.assertTrue(e.has_value("style:family","paragraph"))
+
 
 
 if __name__ == '__main__':
diff --git a/tests/testsubobjects.py b/tests/testsubobjects.py
index 5cb746c..0befb3e 100644
--- a/tests/testsubobjects.py
+++ b/tests/testsubobjects.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-# Copyright (C) 2007 Søren Roug, European Environment Agency
+# Copyright (C) 2007-2010 Søren Roug, European Environment Agency
 #
 # This is free software.  You may redistribute it under the terms
 # of the Apache license and the GNU General Public License Version
@@ -22,6 +22,7 @@ import unittest, os, zipfile
 from odf.opendocument import OpenDocumentText
 from odf import draw, text
 from odf.element import IllegalChild
+from elementparser import ElementParser
 
 def _getxmlpart(odffile, xmlfile):
     """ Get the content out of the ODT file"""
@@ -57,7 +58,14 @@ class TestUnicode(unittest.TestCase):
         self.assertEqual(subsubloc,'./Object 1/Object 1')
 
         c = unicode(self.textdoc.contentxml(),'UTF-8')
-        c.index(u'<office:body><office:text><draw:frame svg:width="476pt" text:anchor-type="paragraph" svg:height="404pt"><draw:object xlink:href="./Object 1"/></draw:frame></office:text></office:body>')
+        c.index(u'<office:body><office:text><draw:frame ')
+        e = ElementParser(c, 'draw:frame')
+#       e = ElementParser('<draw:frame svg:width="476pt" text:anchor-type="paragraph" svg:height="404pt">')
+        self.assertTrue(e.has_value('svg:width',"476pt"))
+        self.assertTrue(e.has_value('svg:height',"404pt"))
+        self.assertTrue(e.has_value('text:anchor-type',"paragraph"))
+        self.assertFalse(e.has_value('svg:height',"476pt"))
+        c.index(u'<draw:object xlink:href="./Object 1"/></draw:frame></office:text></office:body>')
         c.index(u'xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"')
         self.textdoc.save("TEST.odt")
         self.saved = True

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-odf.git