[Python-modules-commits] r13412 - in packages/pdfminer/trunk/debian (4 files)
jwilk at users.alioth.debian.org
jwilk at users.alioth.debian.org
Sun Jun 13 10:27:31 UTC 2010
Date: Sunday, June 13, 2010 @ 10:27:29
Author: jwilk
Revision: 13412
Backport two upstream patches.
Added:
packages/pdfminer/trunk/debian/patches/layout.diff
packages/pdfminer/trunk/debian/patches/nested-tags.diff
Modified:
packages/pdfminer/trunk/debian/changelog
packages/pdfminer/trunk/debian/patches/series
Modified: packages/pdfminer/trunk/debian/changelog
===================================================================
--- packages/pdfminer/trunk/debian/changelog 2010-06-13 09:53:58 UTC (rev 13411)
+++ packages/pdfminer/trunk/debian/changelog 2010-06-13 10:27:29 UTC (rev 13412)
@@ -8,5 +8,8 @@
[psparser-testsuite.diff]
* Store encoding data in gzipped pickles rather than in Python modules.
This way we can save lots of disk space. [encoding-data.diff]
+ * Backport upstream patches:
+ + to fix a bug in layout analysis [layout.diff];
+ + to allow extraction of nested tags [nested-tags.diff].
- -- Jakub Wilk <jwilk at debian.org> Sat, 12 Jun 2010 16:54:14 +0200
+ -- Jakub Wilk <jwilk at debian.org> Sun, 13 Jun 2010 11:58:54 +0200
Added: packages/pdfminer/trunk/debian/patches/layout.diff
===================================================================
--- packages/pdfminer/trunk/debian/patches/layout.diff (rev 0)
+++ packages/pdfminer/trunk/debian/patches/layout.diff 2010-06-13 10:27:29 UTC (rev 13412)
@@ -0,0 +1,18 @@
+Description: Fix a bug in layout analysis.
+Origin: backport, http://code.google.com/p/pdfminerr/source/detail?r=226
+Last-Update: 2010-06-13
+
+--- a/pdfminer/layout.py
++++ b/pdfminer/layout.py
+@@ -527,8 +527,9 @@
+
+ ## group_boxes
+ ##
+-def group_boxes(groupfunc, objs, distfunc, debug=0):
+- assert objs
++def group_boxes(groupfunc, objs0, distfunc, debug=0):
++ assert objs0
++ objs = objs0[:]
+ while 2 <= len(objs):
+ mindist = INF
+ minpair = None
Added: packages/pdfminer/trunk/debian/patches/nested-tags.diff
===================================================================
--- packages/pdfminer/trunk/debian/patches/nested-tags.diff (rev 0)
+++ packages/pdfminer/trunk/debian/patches/nested-tags.diff 2010-06-13 10:27:29 UTC (rev 13412)
@@ -0,0 +1,46 @@
+Description: Allow extraction of nested tags.
+Origin: backport, http://code.google.com/p/pdfminerr/source/detail?r=226
+Last-Update: 2010-06-13
+
+--- a/pdfminer/converter.py
++++ b/pdfminer/converter.py
+@@ -6,7 +6,7 @@
+ from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB
+ from layout import LTContainer, LTPage, LTText, LTLine, LTRect, LTPolygon
+ from layout import LTFigure, LTImage, LTChar, LTTextLine, LTTextBox, LTTextGroup
+-from utils import apply_matrix_pt, mult_matrix
++from utils import apply_matrix_pt, mult_matrix, translate_matrix
+ from utils import enc, bbox2str, create_bmp
+
+
+@@ -354,7 +354,7 @@
+ self.outfp = outfp
+ self.codec = codec
+ self.pageno = 0
+- self.tag = None
++ self.stack = []
+ return
+
+ def render_string(self, textstate, seq):
+@@ -388,16 +388,16 @@
+ s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v)
+ in sorted(props.iteritems()) )
+ self.outfp.write('<%s%s>' % (enc(tag.name), s))
+- self.tag = tag
++ self.stack.append(tag)
+ return
+
+ def end_tag(self):
+- assert self.tag
+- self.outfp.write('</%s>' % enc(self.tag.name))
+- self.tag = None
++ assert self.stack
++ tag = self.stack.pop(-1)
++ self.outfp.write('</%s>' % enc(tag.name))
+ return
+
+ def do_tag(self, tag, props=None):
+ self.begin_tag(tag, props)
+- self.tag = None
++ self.stack.pop(-1)
+ return
Modified: packages/pdfminer/trunk/debian/patches/series
===================================================================
--- packages/pdfminer/trunk/debian/patches/series 2010-06-13 09:53:58 UTC (rev 13411)
+++ packages/pdfminer/trunk/debian/patches/series 2010-06-13 10:27:29 UTC (rev 13412)
@@ -1,3 +1,5 @@
+layout.diff
+nested-tags.diff
pdf-testsuite.diff
psparser-testsuite.diff
encoding-data.diff
More information about the Python-modules-commits
mailing list