[Python-modules-commits] r13414 - in packages/pdfminer/tags (19 files)
jwilk at users.alioth.debian.org
jwilk at users.alioth.debian.org
Sun Jun 13 11:48:36 UTC 2010
Date: Sunday, June 13, 2010 @ 11:48:32
Author: jwilk
Revision: 13414
[svn-buildpackage] Tagging pdfminer 20100424+dfsg-1
Added:
packages/pdfminer/tags/20100424+dfsg-1/
packages/pdfminer/tags/20100424+dfsg-1/debian/changelog
(from rev 13413, packages/pdfminer/trunk/debian/changelog)
packages/pdfminer/tags/20100424+dfsg-1/debian/clean
(from rev 13407, packages/pdfminer/trunk/debian/clean)
packages/pdfminer/tags/20100424+dfsg-1/debian/control
(from rev 13407, packages/pdfminer/trunk/debian/control)
packages/pdfminer/tags/20100424+dfsg-1/debian/patches/encoding-data.diff
(from rev 13410, packages/pdfminer/trunk/debian/patches/encoding-data.diff)
packages/pdfminer/tags/20100424+dfsg-1/debian/patches/layout.diff
(from rev 13412, packages/pdfminer/trunk/debian/patches/layout.diff)
packages/pdfminer/tags/20100424+dfsg-1/debian/patches/nested-tags.diff
(from rev 13412, packages/pdfminer/trunk/debian/patches/nested-tags.diff)
packages/pdfminer/tags/20100424+dfsg-1/debian/patches/pdf-testsuite.diff
(from rev 13411, packages/pdfminer/trunk/debian/patches/pdf-testsuite.diff)
packages/pdfminer/tags/20100424+dfsg-1/debian/patches/series
(from rev 13412, packages/pdfminer/trunk/debian/patches/series)
packages/pdfminer/tags/20100424+dfsg-1/debian/pdfminer-data.install
(from rev 13407, packages/pdfminer/trunk/debian/pdfminer-data.install)
packages/pdfminer/tags/20100424+dfsg-1/debian/python-pdfminer.install
(from rev 13407, packages/pdfminer/trunk/debian/python-pdfminer.install)
packages/pdfminer/tags/20100424+dfsg-1/debian/rules
(from rev 13407, packages/pdfminer/trunk/debian/rules)
Deleted:
packages/pdfminer/tags/20100424+dfsg-1/debian/changelog
packages/pdfminer/tags/20100424+dfsg-1/debian/clean
packages/pdfminer/tags/20100424+dfsg-1/debian/control
packages/pdfminer/tags/20100424+dfsg-1/debian/patches/pdf-testsuite.diff
packages/pdfminer/tags/20100424+dfsg-1/debian/patches/poppler-data.diff
packages/pdfminer/tags/20100424+dfsg-1/debian/patches/series
packages/pdfminer/tags/20100424+dfsg-1/debian/rules
Deleted: packages/pdfminer/tags/20100424+dfsg-1/debian/changelog
===================================================================
--- packages/pdfminer/trunk/debian/changelog 2010-06-12 14:54:34 UTC (rev 13406)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/changelog 2010-06-13 11:48:32 UTC (rev 13414)
@@ -1,12 +0,0 @@
-pdfminer (20100424+dfsg-1) UNRELEASED; urgency=low
-
- * Initial release (closes: #584555).
- * Strip non-DFSG-free test documents from the .orig.tar.gz.
- + Run tests only on those files that are actually available.
- [dfsg-testsuite.diff]
- * Disable test suite for psparser.py, as it is currently broken.
- [psparser-testsuite.diff]
- * Reuse CMaps provided by the poppler-data package.
- [poppler-data.diff]
-
- -- Jakub Wilk <jwilk at debian.org> Sat, 12 Jun 2010 16:54:14 +0200
Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/changelog (from rev 13413, packages/pdfminer/trunk/debian/changelog)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/changelog (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/changelog 2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,15 @@
+pdfminer (20100424+dfsg-1) experimental; urgency=low
+
+ * Initial release (closes: #584555).
+ * Strip non-DFSG-free test documents from the .orig.tar.gz.
+ + Run tests only on those files that are actually available.
+ [dfsg-testsuite.diff]
+ * Disable test suite for psparser.py, as it is currently broken.
+ [psparser-testsuite.diff]
+ * Store encoding data in gzipped pickles rather than in Python modules.
+ This way we can save lots of disk space. [encoding-data.diff]
+ * Backport upstream patches:
+ + to fix a bug in layout analysis [layout.diff];
+ + to allow extraction of nested tags [nested-tags.diff].
+
+ -- Jakub Wilk <jwilk at debian.org> Sun, 13 Jun 2010 12:27:50 +0200
Deleted: packages/pdfminer/tags/20100424+dfsg-1/debian/clean
===================================================================
--- packages/pdfminer/trunk/debian/clean 2010-06-12 14:54:34 UTC (rev 13406)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/clean 2010-06-13 11:48:32 UTC (rev 13414)
@@ -1,2 +0,0 @@
-debian/manpages/*.[0-9]
-docs/changelog
Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/clean (from rev 13407, packages/pdfminer/trunk/debian/clean)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/clean (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/clean 2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,3 @@
+debian/manpages/*.[0-9]
+docs/changelog
+pdfminer/cmap/*.gz
Deleted: packages/pdfminer/tags/20100424+dfsg-1/debian/control
===================================================================
--- packages/pdfminer/trunk/debian/control 2010-06-12 14:54:34 UTC (rev 13406)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/control 2010-06-13 11:48:32 UTC (rev 13414)
@@ -1,29 +0,0 @@
-Source: pdfminer
-Section: python
-Priority: optional
-Maintainer: Jakub Wilk <jwilk at debian.org>
-Uploaders: Debian Python Modules Team <python-modules-team at lists.alioth.debian.org>
-Build-Depends: debhelper (>= 7.0.50~),
- python-all (>= 2.4), python-support (>= 0.90), python-nose,
- elinks-lite | elinks,
- docbook-xsl, docbook-xml, xsltproc, libxml2-utils
-XS-Python-Version: >= 2.4
-Standards-Version: 3.8.4
-Homepage: http://www.unixuser.org/~euske/python/pdfminer/
-Vcs-Svn: svn://svn.debian.org/python-modules/packages/pdfminer/trunk/
-Vcs-Browser: http://svn.debian.org/viewsvn/python-modules/packages/pdfminer/trunk/
-
-Package: python-pdfminer
-Architecture: all
-Depends: ${misc:Depends}, ${python:Depends}
-Suggests: poppler-data
-Description: PDF parser and analyser
- PDFMiner is a tool for extracting information from PDF documents, which
- focuses entirely on getting and analyzing text data. It allows to obtain the
- exact location of texts in a page, as well as other information such as fonts
- or lines. It includes a PDF converter that can transform PDF files into other
- text formats (such as HTML). It has an extensible PDF parser that can be used
- for other purposes instead of text analysis.
- .
- This package provides the Python module and the command-line tools: pdf2txt
- and dumppdf.
Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/control (from rev 13407, packages/pdfminer/trunk/debian/control)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/control (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/control 2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,40 @@
+Source: pdfminer
+Section: python
+Priority: optional
+Maintainer: Jakub Wilk <jwilk at debian.org>
+Uploaders: Debian Python Modules Team <python-modules-team at lists.alioth.debian.org>
+Build-Depends: debhelper (>= 7.0.50~),
+ python-all (>= 2.4), python-support (>= 0.90), python-nose,
+ elinks-lite | elinks,
+ docbook-xsl, docbook-xml, xsltproc, libxml2-utils
+XS-Python-Version: >= 2.4
+Standards-Version: 3.8.4
+Homepage: http://www.unixuser.org/~euske/python/pdfminer/
+Vcs-Svn: svn://svn.debian.org/python-modules/packages/pdfminer/trunk/
+Vcs-Browser: http://svn.debian.org/viewsvn/python-modules/packages/pdfminer/trunk/
+
+Package: python-pdfminer
+Architecture: all
+Depends: ${misc:Depends}, ${python:Depends}
+Suggests: pdfminer-data
+Description: PDF parser and analyser
+ PDFMiner is a tool for extracting information from PDF documents, which
+ focuses entirely on getting and analyzing text data. It allows to obtain the
+ exact location of texts in a page, as well as other information such as fonts
+ or lines. It includes a PDF converter that can transform PDF files into other
+ text formats (such as HTML). It has an extensible PDF parser that can be used
+ for other purposes instead of text analysis.
+ .
+ This package provides the Python module and the command-line tools: pdf2txt
+ and dumppdf.
+
+Package: pdfminer-data
+Architecture: all
+Depends: ${misc:Depends}
+Recommends: python-pdfminer
+Description: PDF parser and analyser (encoding data)
+ PDFMiner is a tool for extracting information from PDF documents, which
+ focuses entirely on getting and analyzing text data.
+ .
+ This package contains the encoding data needed to read some PDF documents in
+ CJK (Chinese, Japanese, Korean) languages.
Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/patches/encoding-data.diff (from rev 13410, packages/pdfminer/trunk/debian/patches/encoding-data.diff)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/patches/encoding-data.diff (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/patches/encoding-data.diff 2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,193 @@
+Description:
+ Store encoding data in gzipped pickles rather than in Python modules.
+ This way we can save lots of disk space.
+Author: Jakub Wilk <jwilk at debian.org>
+Forwarded: yes
+Last-Update: 2010-06-12
+
+--- a/setup.py
++++ b/setup.py
+@@ -22,6 +22,9 @@
+ 'pdfminer',
+ 'pdfminer.cmap'
+ ],
++ package_data={
++ 'pdfminer.cmap': ['*.pickle.gz'],
++ },
+ scripts=[
+ 'tools/pdf2txt.py',
+ 'tools/dumppdf.py'
+--- a/Makefile
++++ b/Makefile
+@@ -36,17 +36,17 @@
+ CONV_CMAP=$(PYTHON) tools/conv_cmap.py
+ CMAPSRC=cmaprsrc
+ CMAPDST=pdfminer/cmap
+-cmap: $(CMAPDST)/TO_UNICODE_Adobe_CNS1.py $(CMAPDST)/TO_UNICODE_Adobe_GB1.py \
+- $(CMAPDST)/TO_UNICODE_Adobe_Japan1.py $(CMAPDST)/TO_UNICODE_Adobe_Korea1.py
++cmap: $(CMAPDST)/to-unicode-Adobe-CNS1.pickle.gz $(CMAPDST)/to-unicode-Adobe-GB1.pickle.gz \
++ $(CMAPDST)/to-unicode-Adobe-Japan1.pickle.gz $(CMAPDST)/to-unicode-Adobe-Korea1.pickle.gz
+ cmap_clean:
+ cd $(CMAPDST) && make cmap_clean
+-$(CMAPDST)/TO_UNICODE_Adobe_CNS1.py:
++$(CMAPDST)/to-unicode-Adobe-CNS1.pickle.gz:
+ $(CONV_CMAP) $(CMAPDST) Adobe-CNS1 $(CMAPSRC)/cid2code_Adobe_CNS1.txt cp950 big5
+-$(CMAPDST)/TO_UNICODE_Adobe_GB1.py:
++$(CMAPDST)/to-unicode-Adobe-GB1.pickle.gz:
+ $(CONV_CMAP) $(CMAPDST) Adobe-GB1 $(CMAPSRC)/cid2code_Adobe_GB1.txt cp936 gb2312
+-$(CMAPDST)/TO_UNICODE_Adobe_Japan1.py:
++$(CMAPDST)/to-unicode-Adobe-Japan1.pickle.gz:
+ $(CONV_CMAP) $(CMAPDST) Adobe-Japan1 $(CMAPSRC)/cid2code_Adobe_Japan1.txt cp932 euc-jp
+-$(CMAPDST)/TO_UNICODE_Adobe_Korea1.py:
++$(CMAPDST)/to-unicode-Adobe-Korea1.pickle.gz:
+ $(CONV_CMAP) $(CMAPDST) Adobe-Korea1 $(CMAPSRC)/cid2code_Adobe_Korea1.txt cp949 euc-kr
+
+ test: cmap
+--- a/pdfminer/cmapdb.py
++++ b/pdfminer/cmapdb.py
+@@ -15,6 +15,9 @@
+ import re
+ import os
+ import os.path
++import gzip
++import cPickle as pickle
++import cmap
+ from struct import pack, unpack
+ from psparser import PSStackParser
+ from psparser import PSException, PSSyntaxError, PSTypeError, PSEOF
+@@ -210,34 +213,50 @@
+ class CMapDB(object):
+
+ debug = 0
++ _cmap_cache = {}
++ _umap_cache = {}
+
+ class CMapNotFound(CMapError): pass
+
+ @classmethod
++ def _load_data(klass, name):
++ filename = '%s.pickle.gz' % name
++ if klass.debug:
++ print >>sys.stderr, 'loading:', name
++ for directory in os.path.dirname(cmap.__file__), '/usr/share/pdfminer/':
++ path = os.path.join(directory, filename)
++ if os.path.exists(path):
++ gzfile = gzip.open(path)
++ try:
++ return type(name, (), pickle.loads(gzfile.read()))
++ finally:
++ gzfile.close()
++ else:
++ raise CMapDB.CMapNotFound(name)
++
++ @classmethod
+ def get_cmap(klass, name):
+ if name == 'Identity-H':
+ return IdentityCMap(False)
+ elif name == 'Identity-V':
+ return IdentityCMap(True)
+- modname = 'pdfminer.cmap.%s' % name.replace('-','_')
+- if klass.debug:
+- print >>sys.stderr, 'loading:', modname
+ try:
+- module = __import__(modname, fromlist=['pdfminer.cmap'])
+- except ImportError:
+- raise CMapDB.CMapNotFound(name)
+- return PyCMap(name, module)
++ return klass._cmap_cache[name]
++ except KeyError:
++ pass
++ data = klass._load_data(name)
++ klass._cmap_cache[name] = cmap = PyCMap(name, data)
++ return cmap
+
+ @classmethod
+ def get_unicode_map(klass, name, vertical=False):
+- modname = 'pdfminer.cmap.TO_UNICODE_%s' % name.replace('-','_')
+- if klass.debug:
+- print >>sys.stderr, 'loading:', modname, vertical
+ try:
+- module = __import__(modname, fromlist=['pdfminer.cmap'])
+- except ImportError:
+- raise CMapDB.CMapNotFound(name)
+- return PyUnicodeMap(name, module, vertical)
++ return klass._umap_cache[name][vertical]
++ except KeyError:
++ pass
++ data = klass._load_data('to-unicode-%s' % name)
++ klass._umap_cache[name] = umaps = [PyUnicodeMap(name, data, v) for v in (False, True)]
++ return umaps[vertical]
+
+
+ ## CMapParser
+--- a/pdfminer/cmap/Makefile
++++ b/pdfminer/cmap/Makefile
+@@ -6,5 +6,4 @@
+ -rm *.pyc *.pyo
+
+ cmap_clean:
+- -rm *.py
+- touch __init__.py
++ rm -f *.pickle.gz
+--- a/tools/conv_cmap.py
++++ b/tools/conv_cmap.py
+@@ -1,6 +1,8 @@
+ #!/usr/bin/env python
+ import sys
+ import os.path
++import gzip
++import cPickle as pickle
+
+ def process_cid2code(fp, check_codecs=[]):
+
+@@ -118,9 +120,6 @@
+ print 'usage: %s output_dir regname cid2code.txt codecs ...' % argv[0]
+ return 100
+
+- def pyname(name):
+- return name.replace('-','_')+'.py'
+-
+ args = argv[1:]
+ if len(args) < 3: return usage()
+ (outdir, regname, src) = args[:3]
+@@ -132,22 +131,24 @@
+ fp.close()
+
+ for (name, cmap) in code2cid.iteritems():
+- fname = pyname(name)
++ fname = '%s.pickle.gz' % name
+ print >>sys.stderr, 'writing %r...' % fname
+- fp = file(os.path.join(outdir, fname), 'w')
+- print >>fp, '#!/usr/bin/env python'
+- print >>fp, '#', fname
+- print >>fp, 'IS_VERTICAL = %r' % is_vertical.get(name, False)
+- print >>fp, 'CODE2CID = %r' % cmap
++ fp = gzip.open(os.path.join(outdir, fname), 'wb')
++ data = dict(
++ IS_VERTICAL=is_vertical.get(name, False),
++ CODE2CID=cmap,
++ )
++ fp.write(pickle.dumps(data))
+ fp.close()
+
+- fname = 'TO_UNICODE_'+pyname(regname)
++ fname = 'to-unicode-%s.pickle.gz' % regname
+ print >>sys.stderr, 'writing %r...' % fname
+- fp = file(os.path.join(outdir, fname), 'w')
+- print >>fp, '#!/usr/bin/env python'
+- print >>fp, '#', fname
+- print >>fp, 'CID2UNICHR_H = %r' % cid2unichr_h
+- print >>fp, 'CID2UNICHR_V = %r' % cid2unichr_v
++ fp = gzip.open(os.path.join(outdir, fname), 'wb')
++ data = dict(
++ CID2UNICHR_H=cid2unichr_h,
++ CID2UNICHR_V=cid2unichr_v,
++ )
++ fp.write(pickle.dumps(data))
+ fp.close()
+
+ return 0
+--- /dev/null
++++ b/pdfminer/cmap/__init__.py
+@@ -0,0 +1 @@
++#
Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/patches/layout.diff (from rev 13412, packages/pdfminer/trunk/debian/patches/layout.diff)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/patches/layout.diff (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/patches/layout.diff 2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,18 @@
+Description: Fix a bug in layout analysis.
+Origin: backport, http://code.google.com/p/pdfminerr/source/detail?r=226
+Last-Update: 2010-06-13
+
+--- a/pdfminer/layout.py
++++ b/pdfminer/layout.py
+@@ -527,8 +527,9 @@
+
+ ## group_boxes
+ ##
+-def group_boxes(groupfunc, objs, distfunc, debug=0):
+- assert objs
++def group_boxes(groupfunc, objs0, distfunc, debug=0):
++ assert objs0
++ objs = objs0[:]
+ while 2 <= len(objs):
+ mindist = INF
+ minpair = None
Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/patches/nested-tags.diff (from rev 13412, packages/pdfminer/trunk/debian/patches/nested-tags.diff)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/patches/nested-tags.diff (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/patches/nested-tags.diff 2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,46 @@
+Description: Allow extraction of nested tags.
+Origin: backport, http://code.google.com/p/pdfminerr/source/detail?r=226
+Last-Update: 2010-06-13
+
+--- a/pdfminer/converter.py
++++ b/pdfminer/converter.py
+@@ -6,7 +6,7 @@
+ from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB
+ from layout import LTContainer, LTPage, LTText, LTLine, LTRect, LTPolygon
+ from layout import LTFigure, LTImage, LTChar, LTTextLine, LTTextBox, LTTextGroup
+-from utils import apply_matrix_pt, mult_matrix
++from utils import apply_matrix_pt, mult_matrix, translate_matrix
+ from utils import enc, bbox2str, create_bmp
+
+
+@@ -354,7 +354,7 @@
+ self.outfp = outfp
+ self.codec = codec
+ self.pageno = 0
+- self.tag = None
++ self.stack = []
+ return
+
+ def render_string(self, textstate, seq):
+@@ -388,16 +388,16 @@
+ s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v)
+ in sorted(props.iteritems()) )
+ self.outfp.write('<%s%s>' % (enc(tag.name), s))
+- self.tag = tag
++ self.stack.append(tag)
+ return
+
+ def end_tag(self):
+- assert self.tag
+- self.outfp.write('</%s>' % enc(self.tag.name))
+- self.tag = None
++ assert self.stack
++ tag = self.stack.pop(-1)
++ self.outfp.write('</%s>' % enc(tag.name))
+ return
+
+ def do_tag(self, tag, props=None):
+ self.begin_tag(tag, props)
+- self.tag = None
++ self.stack.pop(-1)
+ return
Deleted: packages/pdfminer/tags/20100424+dfsg-1/debian/patches/pdf-testsuite.diff
===================================================================
--- packages/pdfminer/trunk/debian/patches/pdf-testsuite.diff 2010-06-12 14:54:34 UTC (rev 13406)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/patches/pdf-testsuite.diff 2010-06-13 11:48:32 UTC (rev 13414)
@@ -1,53 +0,0 @@
-Description:
- Run tests only on those files that are actually present in the tarball.
- This patch is required because several test documents were removed from the
- Debian source package.
-Author: Jakub Wilk <jwilk at debian.org>
-Forwarded: no
-Last-Update: 2010-06-08
-
---- a/samples/Makefile
-+++ b/samples/Makefile
-@@ -6,38 +6,10 @@
- PYTHON=python
- PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -Dx -p1
-
--HTMLS= \
-- simple1.html \
-- simple2.html \
-- dmca.html \
-- f1040nr.html \
-- i1040nr.html \
-- jo.html \
-- kampo.html \
-- naacl06-shinyama.html \
-- nlp2004slides.html
--
--TEXTS= \
-- simple1.txt \
-- simple2.txt \
-- dmca.txt \
-- f1040nr.txt \
-- i1040nr.txt \
-- jo.txt \
-- kampo.txt \
-- naacl06-shinyama.txt \
-- nlp2004slides.txt
--
--XMLS= \
-- simple1.xml \
-- simple2.xml \
-- dmca.xml \
-- f1040nr.xml \
-- i1040nr.xml \
-- jo.xml \
-- kampo.xml \
-- naacl06-shinyama.xml \
-- nlp2004slides.xml
-+PDFS = $(wildcard *.pdf)
-+HTMLS = $(PDFS:.pdf=.html)
-+TEXTS = $(PDFS:.pdf=.txt)
-+XMLS= $(PDFS:.pdf=.xml)
-
- test: htmls texts xmls
-
Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/patches/pdf-testsuite.diff (from rev 13411, packages/pdfminer/trunk/debian/patches/pdf-testsuite.diff)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/patches/pdf-testsuite.diff (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/patches/pdf-testsuite.diff 2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,53 @@
+Description:
+ Run tests only on those files that are actually present in the tarball.
+ This patch is required because several test documents were removed from the
+ Debian source package.
+Author: Jakub Wilk <jwilk at debian.org>
+Forwarded: yes
+Last-Update: 2010-06-08
+
+--- a/samples/Makefile
++++ b/samples/Makefile
+@@ -6,38 +6,10 @@
+ PYTHON=python
+ PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -Dx -p1
+
+-HTMLS= \
+- simple1.html \
+- simple2.html \
+- dmca.html \
+- f1040nr.html \
+- i1040nr.html \
+- jo.html \
+- kampo.html \
+- naacl06-shinyama.html \
+- nlp2004slides.html
+-
+-TEXTS= \
+- simple1.txt \
+- simple2.txt \
+- dmca.txt \
+- f1040nr.txt \
+- i1040nr.txt \
+- jo.txt \
+- kampo.txt \
+- naacl06-shinyama.txt \
+- nlp2004slides.txt
+-
+-XMLS= \
+- simple1.xml \
+- simple2.xml \
+- dmca.xml \
+- f1040nr.xml \
+- i1040nr.xml \
+- jo.xml \
+- kampo.xml \
+- naacl06-shinyama.xml \
+- nlp2004slides.xml
++PDFS = $(wildcard *.pdf)
++HTMLS = $(PDFS:.pdf=.html)
++TEXTS = $(PDFS:.pdf=.txt)
++XMLS= $(PDFS:.pdf=.xml)
+
+ test: htmls texts xmls
+
Deleted: packages/pdfminer/tags/20100424+dfsg-1/debian/patches/poppler-data.diff
===================================================================
--- packages/pdfminer/trunk/debian/patches/poppler-data.diff 2010-06-12 14:54:34 UTC (rev 13406)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/patches/poppler-data.diff 2010-06-13 11:48:32 UTC (rev 13414)
@@ -1,95 +0,0 @@
-Description:
- Reuse CMaps provided by the poppler-data package. This way we don't need to
- ship pdfminer.cmap.* modules in the Debian package.
-Author: Jakub Wilk <jwilk at debian.org>
-Bug-Debian: http://bugs.debian.org/584555
-Forwarded: not-needed
-Last-Update: 2010-06-08
-
---- a/pdfminer/cmapdb.py
-+++ b/pdfminer/cmapdb.py
-@@ -15,6 +15,7 @@
- import re
- import os
- import os.path
-+import glob
- from struct import pack, unpack
- from psparser import PSStackParser
- from psparser import PSException, PSSyntaxError, PSTypeError, PSEOF
-@@ -204,6 +205,22 @@
- def __repr__(self):
- return '<PyUnicodeMap: %s>' % (self.name)
-
-+def _cache(method):
-+
-+ def wrapped_method(self, *args):
-+ cache_attr_name = '_%s_cache' % method.__name__
-+ cache = getattr(self, cache_attr_name, None)
-+ if cache is None:
-+ cache = {}
-+ setattr(self, cache_attr_name, cache)
-+ try:
-+ return cache[args]
-+ except KeyError:
-+ result = method(self, *args)
-+ cache[args] = result
-+ return result
-+
-+ return wrapped_method
-
- ## CMapDB
- ##
-@@ -214,6 +231,7 @@
- class CMapNotFound(CMapError): pass
-
- @classmethod
-+ @_cache
- def get_cmap(klass, name):
- if name == 'Identity-H':
- return IdentityCMap(False)
-@@ -225,10 +243,22 @@
- try:
- module = __import__(modname, fromlist=['pdfminer.cmap'])
- except ImportError:
-+ for directory in glob.glob('/usr/share/poppler/cMap/*/'):
-+ if not os.path.exists(directory + name):
-+ continue
-+ cmap = FileCMap()
-+ fp = file(directory + name, 'rb')
-+ try:
-+ CMapParser(cmap, fp).run()
-+ finally:
-+ fp.close()
-+ return cmap
- raise CMapDB.CMapNotFound(name)
-- return PyCMap(name, module)
-+ else:
-+ return PyCMap(name, module)
-
- @classmethod
-+ @_cache
- def get_unicode_map(klass, name, vertical=False):
- modname = 'pdfminer.cmap.TO_UNICODE_%s' % name.replace('-','_')
- if klass.debug:
-@@ -236,8 +266,20 @@
- try:
- module = __import__(modname, fromlist=['pdfminer.cmap'])
- except ImportError:
-+ for directory in glob.glob('/usr/share/poppler/cMap/*/'):
-+ filename = directory + name + '-UCS2'
-+ if not os.path.exists(filename):
-+ continue
-+ cmap = FileUnicodeMap()
-+ fp = file(filename, 'rb')
-+ try:
-+ CMapParser(cmap, fp).run()
-+ finally:
-+ fp.close()
-+ return cmap
- raise CMapDB.CMapNotFound(name)
-- return PyUnicodeMap(name, module, vertical)
-+ else:
-+ return PyUnicodeMap(name, module, vertical)
-
-
- ## CMapParser
Deleted: packages/pdfminer/tags/20100424+dfsg-1/debian/patches/series
===================================================================
--- packages/pdfminer/trunk/debian/patches/series 2010-06-12 14:54:34 UTC (rev 13406)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/patches/series 2010-06-13 11:48:32 UTC (rev 13414)
@@ -1,3 +0,0 @@
-pdf-testsuite.diff
-psparser-testsuite.diff
-poppler-data.diff
Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/patches/series (from rev 13412, packages/pdfminer/trunk/debian/patches/series)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/patches/series (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/patches/series 2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,5 @@
+layout.diff
+nested-tags.diff
+pdf-testsuite.diff
+psparser-testsuite.diff
+encoding-data.diff
Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/pdfminer-data.install (from rev 13407, packages/pdfminer/trunk/debian/pdfminer-data.install)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/pdfminer-data.install (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/pdfminer-data.install 2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1 @@
+usr/lib/python*/*-packages/pdfminer/cmap/*.pickle.gz /usr/share/pdfminer/
Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/python-pdfminer.install (from rev 13407, packages/pdfminer/trunk/debian/python-pdfminer.install)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/python-pdfminer.install (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/python-pdfminer.install 2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,5 @@
+/usr/bin/pdf2txt
+/usr/bin/dumppdf
+/usr/lib/python*/*-packages/pdfminer-*.egg-info
+/usr/lib/python*/*-packages/pdfminer/*.py
+/usr/lib/python*/*-packages/pdfminer/cmap/*.py
Deleted: packages/pdfminer/tags/20100424+dfsg-1/debian/rules
===================================================================
--- packages/pdfminer/trunk/debian/rules 2010-06-12 14:54:34 UTC (rev 13406)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/rules 2010-06-13 11:48:32 UTC (rev 13414)
@@ -1,52 +0,0 @@
-#!/usr/bin/make -f
-
-SOURCE_DIR = $(dir $(firstword $(MAKEFILE_LIST)))/..
-UPSTREAM_VERSION = $(shell cd $(SOURCE_DIR) && dpkg-parsechangelog | sed -n -r -e '/^Version: ([0-9.]+)([+]dfsg).*/ { s//\1/; p; q; }')
-
-.PHONY: override_dh_auto_build
-override_dh_auto_build:
- dh_auto_build -Spython_distutils
-
-.PHONY: override_dh_auto_install
-override_dh_auto_install:
- dh_auto_install -Spython_distutils
-
-.PHONY: override_dh_install
-override_dh_install:
- rename.ul .py '' debian/python-pdfminer/usr/bin/*.py
- dh_install
-
-.PHONY: override_dh_installman
-override_dh_installman:
- $(MAKE) -C debian/manpages/
- dh_installman
-
-.PHONY: override_dh_auto_test
-override_dh_auto_test:
-ifeq ($(filter nocheck,$(DEB_BUILD_OPTIONS)),)
- set -e -x; \
- for python in $(shell pyversions -r); do \
- $$python /usr/bin/nosetests --with-doctest --verbose pdfminer/*.py; \
- $(MAKE) -C samples clean; \
- $(MAKE) -C samples PYTHON=$$python CMP="diff -u" test; \
- done
-endif
-
-.PHONY: override_dh_installchangelogs
-override_dh_installchangelogs:
- elinks -config-file /dev/null -dump -no-numbering -no-references docs/index.html \
- | sed -n -e '/^Changes/,/^ ---/ { /^ / s/// p }' \
- > docs/changelog
- dh_installchangelogs docs/changelog
-
-.PHONY:
-get-orig-source:
- sh -x $(SOURCE_DIR)/debian/get-orig-source.sh $(UPSTREAM_VERSION)
-
-.PHONY: build build-arch build-indep binary binary-arch binary-indep clean
-build build-arch build-indep binary binary-indep clean:
- dh $(@)
-# In order not to confuse lintian, binary-arch is a separate target:
-binary-arch:
-
-# vim:ts=4 sw=4 noet
Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/rules (from rev 13407, packages/pdfminer/trunk/debian/rules)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/rules (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/rules 2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,53 @@
+#!/usr/bin/make -f
+
+SOURCE_DIR = $(dir $(firstword $(MAKEFILE_LIST)))/..
+UPSTREAM_VERSION = $(shell cd $(SOURCE_DIR) && dpkg-parsechangelog | sed -n -r -e '/^Version: ([0-9.]+)([+]dfsg).*/ { s//\1/; p; q; }')
+
+.PHONY: override_dh_auto_build
+override_dh_auto_build:
+ $(MAKE) cmap
+ dh_auto_build -Spython_distutils
+
+.PHONY: override_dh_auto_install
+override_dh_auto_install:
+ dh_auto_install -Spython_distutils
+
+.PHONY: override_dh_install
+override_dh_install:
+ rename.ul .py '' debian/tmp/usr/bin/*.py
+ dh_install
+
+.PHONY: override_dh_installman
+override_dh_installman:
+ $(MAKE) -C debian/manpages/
+ dh_installman
+
+.PHONY: override_dh_auto_test
+override_dh_auto_test:
+ifeq ($(filter nocheck,$(DEB_BUILD_OPTIONS)),)
+ set -e -x; \
+ for python in $(shell pyversions -r); do \
+ $$python /usr/bin/nosetests --with-doctest --verbose pdfminer/*.py; \
+ $(MAKE) -C samples clean; \
+ $(MAKE) -C samples PYTHON=$$python CMP="diff -u" test; \
+ done
+endif
+
+.PHONY: override_dh_installchangelogs
+override_dh_installchangelogs:
+ elinks -config-file /dev/null -dump -no-numbering -no-references docs/index.html \
+ | sed -n -e '/^Changes/,/^ ---/ { /^ / s/// p }' \
+ > docs/changelog
+ dh_installchangelogs docs/changelog
+
+.PHONY:
+get-orig-source:
+ sh -x $(SOURCE_DIR)/debian/get-orig-source.sh $(UPSTREAM_VERSION)
+
+.PHONY: build build-arch build-indep binary binary-arch binary-indep clean
+build build-arch build-indep binary binary-indep clean:
+ dh $(@)
+# In order not to confuse lintian, binary-arch is a separate target:
+binary-arch:
+
+# vim:ts=4 sw=4 noet
More information about the Python-modules-commits
mailing list