[Python-modules-commits] r13414 - in packages/pdfminer/tags (19 files)

jwilk at users.alioth.debian.org jwilk at users.alioth.debian.org
Sun Jun 13 11:48:36 UTC 2010


    Date: Sunday, June 13, 2010 @ 11:48:32
  Author: jwilk
Revision: 13414

[svn-buildpackage] Tagging pdfminer 20100424+dfsg-1

Added:
  packages/pdfminer/tags/20100424+dfsg-1/
  packages/pdfminer/tags/20100424+dfsg-1/debian/changelog
    (from rev 13413, packages/pdfminer/trunk/debian/changelog)
  packages/pdfminer/tags/20100424+dfsg-1/debian/clean
    (from rev 13407, packages/pdfminer/trunk/debian/clean)
  packages/pdfminer/tags/20100424+dfsg-1/debian/control
    (from rev 13407, packages/pdfminer/trunk/debian/control)
  packages/pdfminer/tags/20100424+dfsg-1/debian/patches/encoding-data.diff
    (from rev 13410, packages/pdfminer/trunk/debian/patches/encoding-data.diff)
  packages/pdfminer/tags/20100424+dfsg-1/debian/patches/layout.diff
    (from rev 13412, packages/pdfminer/trunk/debian/patches/layout.diff)
  packages/pdfminer/tags/20100424+dfsg-1/debian/patches/nested-tags.diff
    (from rev 13412, packages/pdfminer/trunk/debian/patches/nested-tags.diff)
  packages/pdfminer/tags/20100424+dfsg-1/debian/patches/pdf-testsuite.diff
    (from rev 13411, packages/pdfminer/trunk/debian/patches/pdf-testsuite.diff)
  packages/pdfminer/tags/20100424+dfsg-1/debian/patches/series
    (from rev 13412, packages/pdfminer/trunk/debian/patches/series)
  packages/pdfminer/tags/20100424+dfsg-1/debian/pdfminer-data.install
    (from rev 13407, packages/pdfminer/trunk/debian/pdfminer-data.install)
  packages/pdfminer/tags/20100424+dfsg-1/debian/python-pdfminer.install
    (from rev 13407, packages/pdfminer/trunk/debian/python-pdfminer.install)
  packages/pdfminer/tags/20100424+dfsg-1/debian/rules
    (from rev 13407, packages/pdfminer/trunk/debian/rules)
Deleted:
  packages/pdfminer/tags/20100424+dfsg-1/debian/changelog
  packages/pdfminer/tags/20100424+dfsg-1/debian/clean
  packages/pdfminer/tags/20100424+dfsg-1/debian/control
  packages/pdfminer/tags/20100424+dfsg-1/debian/patches/pdf-testsuite.diff
  packages/pdfminer/tags/20100424+dfsg-1/debian/patches/poppler-data.diff
  packages/pdfminer/tags/20100424+dfsg-1/debian/patches/series
  packages/pdfminer/tags/20100424+dfsg-1/debian/rules

Deleted: packages/pdfminer/tags/20100424+dfsg-1/debian/changelog
===================================================================
--- packages/pdfminer/trunk/debian/changelog	2010-06-12 14:54:34 UTC (rev 13406)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/changelog	2010-06-13 11:48:32 UTC (rev 13414)
@@ -1,12 +0,0 @@
-pdfminer (20100424+dfsg-1) UNRELEASED; urgency=low
-
-  * Initial release (closes: #584555).
-  * Strip non-DFSG-free test documents from the .orig.tar.gz.
-    + Run tests only on those files that are actually available.
-      [dfsg-testsuite.diff]
-  * Disable test suite for psparser.py, as it is currently broken.
-    [psparser-testsuite.diff]
-  * Reuse CMaps provided by the poppler-data package.
-    [poppler-data.diff]
-
- -- Jakub Wilk <jwilk at debian.org>  Sat, 12 Jun 2010 16:54:14 +0200

Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/changelog (from rev 13413, packages/pdfminer/trunk/debian/changelog)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/changelog	                        (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/changelog	2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,15 @@
+pdfminer (20100424+dfsg-1) experimental; urgency=low
+
+  * Initial release (closes: #584555).
+  * Strip non-DFSG-free test documents from the .orig.tar.gz.
+    + Run tests only on those files that are actually available.
+      [dfsg-testsuite.diff]
+  * Disable test suite for psparser.py, as it is currently broken.
+    [psparser-testsuite.diff]
+  * Store encoding data in gzipped pickles rather than in Python modules.
+    This way we can save lots of disk space. [encoding-data.diff]
+  * Backport upstream patches:
+    + to fix a bug in layout analysis [layout.diff];
+    + to allow extraction of nested tags [nested-tags.diff].
+
+ -- Jakub Wilk <jwilk at debian.org>  Sun, 13 Jun 2010 12:27:50 +0200

Deleted: packages/pdfminer/tags/20100424+dfsg-1/debian/clean
===================================================================
--- packages/pdfminer/trunk/debian/clean	2010-06-12 14:54:34 UTC (rev 13406)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/clean	2010-06-13 11:48:32 UTC (rev 13414)
@@ -1,2 +0,0 @@
-debian/manpages/*.[0-9]
-docs/changelog

Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/clean (from rev 13407, packages/pdfminer/trunk/debian/clean)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/clean	                        (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/clean	2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,3 @@
+debian/manpages/*.[0-9]
+docs/changelog
+pdfminer/cmap/*.gz

Deleted: packages/pdfminer/tags/20100424+dfsg-1/debian/control
===================================================================
--- packages/pdfminer/trunk/debian/control	2010-06-12 14:54:34 UTC (rev 13406)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/control	2010-06-13 11:48:32 UTC (rev 13414)
@@ -1,29 +0,0 @@
-Source: pdfminer
-Section: python
-Priority: optional
-Maintainer: Jakub Wilk <jwilk at debian.org>
-Uploaders: Debian Python Modules Team <python-modules-team at lists.alioth.debian.org>
-Build-Depends: debhelper (>= 7.0.50~),
-  python-all (>= 2.4), python-support (>= 0.90), python-nose,
-  elinks-lite | elinks,
-  docbook-xsl, docbook-xml, xsltproc, libxml2-utils
-XS-Python-Version: >= 2.4
-Standards-Version: 3.8.4
-Homepage: http://www.unixuser.org/~euske/python/pdfminer/
-Vcs-Svn: svn://svn.debian.org/python-modules/packages/pdfminer/trunk/
-Vcs-Browser: http://svn.debian.org/viewsvn/python-modules/packages/pdfminer/trunk/
-
-Package: python-pdfminer
-Architecture: all
-Depends: ${misc:Depends}, ${python:Depends}
-Suggests: poppler-data
-Description: PDF parser and analyser
- PDFMiner is a tool for extracting information from PDF documents, which
- focuses entirely on getting and analyzing text data. It allows to obtain the
- exact location of texts in a page, as well as other information such as fonts
- or lines. It includes a PDF converter that can transform PDF files into other
- text formats (such as HTML). It has an extensible PDF parser that can be used
- for other purposes instead of text analysis. 
- .
- This package provides the Python module and the command-line tools: pdf2txt
- and dumppdf.

Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/control (from rev 13407, packages/pdfminer/trunk/debian/control)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/control	                        (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/control	2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,40 @@
+Source: pdfminer
+Section: python
+Priority: optional
+Maintainer: Jakub Wilk <jwilk at debian.org>
+Uploaders: Debian Python Modules Team <python-modules-team at lists.alioth.debian.org>
+Build-Depends: debhelper (>= 7.0.50~),
+  python-all (>= 2.4), python-support (>= 0.90), python-nose,
+  elinks-lite | elinks,
+  docbook-xsl, docbook-xml, xsltproc, libxml2-utils
+XS-Python-Version: >= 2.4
+Standards-Version: 3.8.4
+Homepage: http://www.unixuser.org/~euske/python/pdfminer/
+Vcs-Svn: svn://svn.debian.org/python-modules/packages/pdfminer/trunk/
+Vcs-Browser: http://svn.debian.org/viewsvn/python-modules/packages/pdfminer/trunk/
+
+Package: python-pdfminer
+Architecture: all
+Depends: ${misc:Depends}, ${python:Depends}
+Suggests: pdfminer-data
+Description: PDF parser and analyser
+ PDFMiner is a tool for extracting information from PDF documents, which
+ focuses entirely on getting and analyzing text data. It allows to obtain the
+ exact location of texts in a page, as well as other information such as fonts
+ or lines. It includes a PDF converter that can transform PDF files into other
+ text formats (such as HTML). It has an extensible PDF parser that can be used
+ for other purposes instead of text analysis. 
+ .
+ This package provides the Python module and the command-line tools: pdf2txt
+ and dumppdf.
+
+Package: pdfminer-data
+Architecture: all
+Depends: ${misc:Depends}
+Recommends: python-pdfminer
+Description: PDF parser and analyser (encoding data)
+ PDFMiner is a tool for extracting information from PDF documents, which
+ focuses entirely on getting and analyzing text data. 
+ .
+ This package contains the encoding data needed to read some PDF documents in
+ CJK (Chinese, Japanese, Korean) languages.

Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/patches/encoding-data.diff (from rev 13410, packages/pdfminer/trunk/debian/patches/encoding-data.diff)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/patches/encoding-data.diff	                        (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/patches/encoding-data.diff	2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,193 @@
+Description:
+  Store encoding data in gzipped pickles rather than in Python modules.
+  This way we can save lots of disk space.
+Author: Jakub Wilk <jwilk at debian.org>
+Forwarded: yes
+Last-Update: 2010-06-12
+
+--- a/setup.py
++++ b/setup.py
+@@ -22,6 +22,9 @@
+     'pdfminer',
+     'pdfminer.cmap'
+     ],
++    package_data={
++    'pdfminer.cmap': ['*.pickle.gz'],
++    },
+     scripts=[
+     'tools/pdf2txt.py',
+     'tools/dumppdf.py'
+--- a/Makefile
++++ b/Makefile
+@@ -36,17 +36,17 @@
+ CONV_CMAP=$(PYTHON) tools/conv_cmap.py
+ CMAPSRC=cmaprsrc
+ CMAPDST=pdfminer/cmap
+-cmap: $(CMAPDST)/TO_UNICODE_Adobe_CNS1.py $(CMAPDST)/TO_UNICODE_Adobe_GB1.py \
+-	$(CMAPDST)/TO_UNICODE_Adobe_Japan1.py $(CMAPDST)/TO_UNICODE_Adobe_Korea1.py
++cmap: $(CMAPDST)/to-unicode-Adobe-CNS1.pickle.gz $(CMAPDST)/to-unicode-Adobe-GB1.pickle.gz \
++	$(CMAPDST)/to-unicode-Adobe-Japan1.pickle.gz $(CMAPDST)/to-unicode-Adobe-Korea1.pickle.gz
+ cmap_clean:
+ 	cd $(CMAPDST) && make cmap_clean
+-$(CMAPDST)/TO_UNICODE_Adobe_CNS1.py:
++$(CMAPDST)/to-unicode-Adobe-CNS1.pickle.gz:
+ 	$(CONV_CMAP) $(CMAPDST) Adobe-CNS1 $(CMAPSRC)/cid2code_Adobe_CNS1.txt cp950 big5
+-$(CMAPDST)/TO_UNICODE_Adobe_GB1.py:
++$(CMAPDST)/to-unicode-Adobe-GB1.pickle.gz:
+ 	$(CONV_CMAP) $(CMAPDST) Adobe-GB1 $(CMAPSRC)/cid2code_Adobe_GB1.txt cp936 gb2312
+-$(CMAPDST)/TO_UNICODE_Adobe_Japan1.py:
++$(CMAPDST)/to-unicode-Adobe-Japan1.pickle.gz:
+ 	$(CONV_CMAP) $(CMAPDST) Adobe-Japan1 $(CMAPSRC)/cid2code_Adobe_Japan1.txt cp932 euc-jp
+-$(CMAPDST)/TO_UNICODE_Adobe_Korea1.py:
++$(CMAPDST)/to-unicode-Adobe-Korea1.pickle.gz:
+ 	$(CONV_CMAP) $(CMAPDST) Adobe-Korea1 $(CMAPSRC)/cid2code_Adobe_Korea1.txt cp949 euc-kr
+ 
+ test: cmap
+--- a/pdfminer/cmapdb.py
++++ b/pdfminer/cmapdb.py
+@@ -15,6 +15,9 @@
+ import re
+ import os
+ import os.path
++import gzip
++import cPickle as pickle
++import cmap
+ from struct import pack, unpack
+ from psparser import PSStackParser
+ from psparser import PSException, PSSyntaxError, PSTypeError, PSEOF
+@@ -210,34 +213,50 @@
+ class CMapDB(object):
+ 
+     debug = 0
++    _cmap_cache = {}
++    _umap_cache = {}
+     
+     class CMapNotFound(CMapError): pass
+ 
+     @classmethod
++    def _load_data(klass, name):
++        filename = '%s.pickle.gz' % name
++        if klass.debug:
++            print >>sys.stderr, 'loading:', name
++        for directory in os.path.dirname(cmap.__file__), '/usr/share/pdfminer/':
++            path = os.path.join(directory, filename)
++            if os.path.exists(path):
++                gzfile = gzip.open(path)
++                try:
++                    return type(name, (), pickle.loads(gzfile.read()))
++                finally:
++                    gzfile.close()
++        else:
++            raise CMapDB.CMapNotFound(name)
++
++    @classmethod
+     def get_cmap(klass, name):
+         if name == 'Identity-H':
+             return IdentityCMap(False)
+         elif name == 'Identity-V':
+             return IdentityCMap(True)
+-        modname = 'pdfminer.cmap.%s' % name.replace('-','_')
+-        if klass.debug:
+-            print >>sys.stderr, 'loading:', modname
+         try:
+-            module = __import__(modname, fromlist=['pdfminer.cmap'])
+-        except ImportError:
+-            raise CMapDB.CMapNotFound(name)
+-        return PyCMap(name, module)
++            return klass._cmap_cache[name]
++        except KeyError:
++            pass
++        data = klass._load_data(name)
++        klass._cmap_cache[name] = cmap = PyCMap(name, data)
++        return cmap
+ 
+     @classmethod
+     def get_unicode_map(klass, name, vertical=False):
+-        modname = 'pdfminer.cmap.TO_UNICODE_%s' % name.replace('-','_')
+-        if klass.debug:
+-            print >>sys.stderr, 'loading:', modname, vertical
+         try:
+-            module = __import__(modname, fromlist=['pdfminer.cmap'])
+-        except ImportError:
+-            raise CMapDB.CMapNotFound(name)
+-        return PyUnicodeMap(name, module, vertical)
++            return klass._umap_cache[name][vertical]
++        except KeyError:
++            pass
++        data = klass._load_data('to-unicode-%s' % name)
++        klass._umap_cache[name] = umaps = [PyUnicodeMap(name, data, v) for v in (False, True)]
++        return umaps[vertical]
+ 
+ 
+ ##  CMapParser
+--- a/pdfminer/cmap/Makefile
++++ b/pdfminer/cmap/Makefile
+@@ -6,5 +6,4 @@
+ 	-rm *.pyc *.pyo
+ 
+ cmap_clean:
+-	-rm *.py
+-	touch __init__.py
++	rm -f *.pickle.gz
+--- a/tools/conv_cmap.py
++++ b/tools/conv_cmap.py
+@@ -1,6 +1,8 @@
+ #!/usr/bin/env python
+ import sys
+ import os.path
++import gzip
++import cPickle as pickle
+ 
+ def process_cid2code(fp, check_codecs=[]):
+ 
+@@ -118,9 +120,6 @@
+         print 'usage: %s output_dir regname cid2code.txt codecs ...' % argv[0]
+         return 100
+     
+-    def pyname(name):
+-        return name.replace('-','_')+'.py'
+-
+     args = argv[1:]
+     if len(args) < 3: return usage()
+     (outdir, regname, src) = args[:3]
+@@ -132,22 +131,24 @@
+     fp.close()
+ 
+     for (name, cmap) in code2cid.iteritems():
+-        fname = pyname(name)
++        fname = '%s.pickle.gz' % name
+         print >>sys.stderr, 'writing %r...' % fname
+-        fp = file(os.path.join(outdir, fname), 'w')
+-        print >>fp, '#!/usr/bin/env python'
+-        print >>fp, '#', fname
+-        print >>fp, 'IS_VERTICAL = %r' % is_vertical.get(name, False)
+-        print >>fp, 'CODE2CID = %r' % cmap
++        fp = gzip.open(os.path.join(outdir, fname), 'wb')
++        data = dict(
++            IS_VERTICAL=is_vertical.get(name, False),
++            CODE2CID=cmap,
++        )
++        fp.write(pickle.dumps(data))
+         fp.close()
+ 
+-    fname = 'TO_UNICODE_'+pyname(regname)
++    fname = 'to-unicode-%s.pickle.gz' % regname
+     print >>sys.stderr, 'writing %r...' % fname
+-    fp = file(os.path.join(outdir, fname), 'w')
+-    print >>fp, '#!/usr/bin/env python'
+-    print >>fp, '#', fname
+-    print >>fp, 'CID2UNICHR_H = %r' % cid2unichr_h
+-    print >>fp, 'CID2UNICHR_V = %r' % cid2unichr_v
++    fp = gzip.open(os.path.join(outdir, fname), 'wb')
++    data = dict(
++        CID2UNICHR_H=cid2unichr_h,
++        CID2UNICHR_V=cid2unichr_v,
++    )
++    fp.write(pickle.dumps(data))
+     fp.close()
+ 
+     return 0
+--- /dev/null
++++ b/pdfminer/cmap/__init__.py
+@@ -0,0 +1 @@
++#

Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/patches/layout.diff (from rev 13412, packages/pdfminer/trunk/debian/patches/layout.diff)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/patches/layout.diff	                        (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/patches/layout.diff	2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,18 @@
+Description: Fix a bug in layout analysis.
+Origin: backport, http://code.google.com/p/pdfminerr/source/detail?r=226
+Last-Update: 2010-06-13
+
+--- a/pdfminer/layout.py
++++ b/pdfminer/layout.py
+@@ -527,8 +527,9 @@
+ 
+ ##  group_boxes
+ ##
+-def group_boxes(groupfunc, objs, distfunc, debug=0):
+-    assert objs
++def group_boxes(groupfunc, objs0, distfunc, debug=0):
++    assert objs0
++    objs = objs0[:]
+     while 2 <= len(objs):
+         mindist = INF
+         minpair = None

Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/patches/nested-tags.diff (from rev 13412, packages/pdfminer/trunk/debian/patches/nested-tags.diff)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/patches/nested-tags.diff	                        (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/patches/nested-tags.diff	2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,46 @@
+Description: Allow extraction of nested tags.
+Origin: backport, http://code.google.com/p/pdfminerr/source/detail?r=226
+Last-Update: 2010-06-13
+
+--- a/pdfminer/converter.py
++++ b/pdfminer/converter.py
+@@ -6,7 +6,7 @@
+ from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB
+ from layout import LTContainer, LTPage, LTText, LTLine, LTRect, LTPolygon
+ from layout import LTFigure, LTImage, LTChar, LTTextLine, LTTextBox, LTTextGroup
+-from utils import apply_matrix_pt, mult_matrix
++from utils import apply_matrix_pt, mult_matrix, translate_matrix
+ from utils import enc, bbox2str, create_bmp
+ 
+ 
+@@ -354,7 +354,7 @@
+         self.outfp = outfp
+         self.codec = codec
+         self.pageno = 0
+-        self.tag = None
++        self.stack = []
+         return
+ 
+     def render_string(self, textstate, seq):
+@@ -388,16 +388,16 @@
+             s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v)
+                          in sorted(props.iteritems()) )
+         self.outfp.write('<%s%s>' % (enc(tag.name), s))
+-        self.tag = tag
++        self.stack.append(tag)
+         return
+ 
+     def end_tag(self):
+-        assert self.tag
+-        self.outfp.write('</%s>' % enc(self.tag.name))
+-        self.tag = None
++        assert self.stack
++        tag = self.stack.pop(-1)
++        self.outfp.write('</%s>' % enc(tag.name))
+         return
+ 
+     def do_tag(self, tag, props=None):
+         self.begin_tag(tag, props)
+-        self.tag = None
++        self.stack.pop(-1)
+         return

Deleted: packages/pdfminer/tags/20100424+dfsg-1/debian/patches/pdf-testsuite.diff
===================================================================
--- packages/pdfminer/trunk/debian/patches/pdf-testsuite.diff	2010-06-12 14:54:34 UTC (rev 13406)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/patches/pdf-testsuite.diff	2010-06-13 11:48:32 UTC (rev 13414)
@@ -1,53 +0,0 @@
-Description:
-  Run tests only on those files that are actually present in the tarball.
-  This patch is required because several test documents were removed from the
-  Debian source package.
-Author: Jakub Wilk <jwilk at debian.org>
-Forwarded: no
-Last-Update: 2010-06-08
-
---- a/samples/Makefile
-+++ b/samples/Makefile
-@@ -6,38 +6,10 @@
- PYTHON=python
- PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -Dx -p1
- 
--HTMLS= \
--	simple1.html \
--	simple2.html \
--	dmca.html \
--	f1040nr.html \
--	i1040nr.html \
--	jo.html \
--	kampo.html \
--	naacl06-shinyama.html \
--	nlp2004slides.html
--
--TEXTS= \
--	simple1.txt \
--	simple2.txt \
--	dmca.txt \
--	f1040nr.txt \
--	i1040nr.txt \
--	jo.txt \
--	kampo.txt \
--	naacl06-shinyama.txt \
--	nlp2004slides.txt
--
--XMLS= \
--	simple1.xml \
--	simple2.xml \
--	dmca.xml \
--	f1040nr.xml \
--	i1040nr.xml \
--	jo.xml \
--	kampo.xml \
--	naacl06-shinyama.xml \
--	nlp2004slides.xml
-+PDFS = $(wildcard *.pdf)
-+HTMLS = $(PDFS:.pdf=.html)
-+TEXTS = $(PDFS:.pdf=.txt)
-+XMLS= $(PDFS:.pdf=.xml)
- 
- test: htmls texts xmls
- 

Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/patches/pdf-testsuite.diff (from rev 13411, packages/pdfminer/trunk/debian/patches/pdf-testsuite.diff)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/patches/pdf-testsuite.diff	                        (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/patches/pdf-testsuite.diff	2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,53 @@
+Description:
+  Run tests only on those files that are actually present in the tarball.
+  This patch is required because several test documents were removed from the
+  Debian source package.
+Author: Jakub Wilk <jwilk at debian.org>
+Forwarded: yes
+Last-Update: 2010-06-08
+
+--- a/samples/Makefile
++++ b/samples/Makefile
+@@ -6,38 +6,10 @@
+ PYTHON=python
+ PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -Dx -p1
+ 
+-HTMLS= \
+-	simple1.html \
+-	simple2.html \
+-	dmca.html \
+-	f1040nr.html \
+-	i1040nr.html \
+-	jo.html \
+-	kampo.html \
+-	naacl06-shinyama.html \
+-	nlp2004slides.html
+-
+-TEXTS= \
+-	simple1.txt \
+-	simple2.txt \
+-	dmca.txt \
+-	f1040nr.txt \
+-	i1040nr.txt \
+-	jo.txt \
+-	kampo.txt \
+-	naacl06-shinyama.txt \
+-	nlp2004slides.txt
+-
+-XMLS= \
+-	simple1.xml \
+-	simple2.xml \
+-	dmca.xml \
+-	f1040nr.xml \
+-	i1040nr.xml \
+-	jo.xml \
+-	kampo.xml \
+-	naacl06-shinyama.xml \
+-	nlp2004slides.xml
++PDFS = $(wildcard *.pdf)
++HTMLS = $(PDFS:.pdf=.html)
++TEXTS = $(PDFS:.pdf=.txt)
++XMLS= $(PDFS:.pdf=.xml)
+ 
+ test: htmls texts xmls
+ 

Deleted: packages/pdfminer/tags/20100424+dfsg-1/debian/patches/poppler-data.diff
===================================================================
--- packages/pdfminer/trunk/debian/patches/poppler-data.diff	2010-06-12 14:54:34 UTC (rev 13406)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/patches/poppler-data.diff	2010-06-13 11:48:32 UTC (rev 13414)
@@ -1,95 +0,0 @@
-Description:
-  Reuse CMaps provided by the poppler-data package. This way we don't need to
-  ship pdfminer.cmap.* modules in the Debian package.
-Author: Jakub Wilk <jwilk at debian.org>
-Bug-Debian: http://bugs.debian.org/584555
-Forwarded: not-needed
-Last-Update: 2010-06-08
-
---- a/pdfminer/cmapdb.py
-+++ b/pdfminer/cmapdb.py
-@@ -15,6 +15,7 @@
- import re
- import os
- import os.path
-+import glob
- from struct import pack, unpack
- from psparser import PSStackParser
- from psparser import PSException, PSSyntaxError, PSTypeError, PSEOF
-@@ -204,6 +205,22 @@
-     def __repr__(self):
-         return '<PyUnicodeMap: %s>' % (self.name)
- 
-+def _cache(method):
-+
-+    def wrapped_method(self, *args):
-+        cache_attr_name = '_%s_cache' % method.__name__
-+        cache = getattr(self, cache_attr_name, None)
-+        if cache is None:
-+            cache = {}
-+            setattr(self, cache_attr_name, cache)
-+        try:
-+            return cache[args]
-+        except KeyError:
-+            result = method(self, *args)
-+            cache[args] = result
-+            return result
-+
-+    return wrapped_method
- 
- ##  CMapDB
- ##
-@@ -214,6 +231,7 @@
-     class CMapNotFound(CMapError): pass
- 
-     @classmethod
-+    @_cache
-     def get_cmap(klass, name):
-         if name == 'Identity-H':
-             return IdentityCMap(False)
-@@ -225,10 +243,22 @@
-         try:
-             module = __import__(modname, fromlist=['pdfminer.cmap'])
-         except ImportError:
-+            for directory in glob.glob('/usr/share/poppler/cMap/*/'):
-+                if not os.path.exists(directory + name):
-+                    continue
-+                cmap = FileCMap()
-+                fp = file(directory + name, 'rb')
-+                try:
-+                    CMapParser(cmap, fp).run()
-+                finally:
-+                    fp.close()
-+                return cmap
-             raise CMapDB.CMapNotFound(name)
--        return PyCMap(name, module)
-+        else:
-+            return PyCMap(name, module)
- 
-     @classmethod
-+    @_cache
-     def get_unicode_map(klass, name, vertical=False):
-         modname = 'pdfminer.cmap.TO_UNICODE_%s' % name.replace('-','_')
-         if klass.debug:
-@@ -236,8 +266,20 @@
-         try:
-             module = __import__(modname, fromlist=['pdfminer.cmap'])
-         except ImportError:
-+            for directory in glob.glob('/usr/share/poppler/cMap/*/'):
-+                filename = directory + name + '-UCS2'
-+                if not os.path.exists(filename):
-+                    continue
-+                cmap = FileUnicodeMap()
-+                fp = file(filename, 'rb')
-+                try:
-+                    CMapParser(cmap, fp).run()
-+                finally:
-+                    fp.close()
-+                return cmap
-             raise CMapDB.CMapNotFound(name)
--        return PyUnicodeMap(name, module, vertical)
-+        else:
-+            return PyUnicodeMap(name, module, vertical)
- 
- 
- ##  CMapParser

Deleted: packages/pdfminer/tags/20100424+dfsg-1/debian/patches/series
===================================================================
--- packages/pdfminer/trunk/debian/patches/series	2010-06-12 14:54:34 UTC (rev 13406)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/patches/series	2010-06-13 11:48:32 UTC (rev 13414)
@@ -1,3 +0,0 @@
-pdf-testsuite.diff
-psparser-testsuite.diff
-poppler-data.diff

Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/patches/series (from rev 13412, packages/pdfminer/trunk/debian/patches/series)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/patches/series	                        (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/patches/series	2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,5 @@
+layout.diff
+nested-tags.diff
+pdf-testsuite.diff
+psparser-testsuite.diff
+encoding-data.diff

Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/pdfminer-data.install (from rev 13407, packages/pdfminer/trunk/debian/pdfminer-data.install)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/pdfminer-data.install	                        (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/pdfminer-data.install	2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1 @@
+usr/lib/python*/*-packages/pdfminer/cmap/*.pickle.gz /usr/share/pdfminer/

Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/python-pdfminer.install (from rev 13407, packages/pdfminer/trunk/debian/python-pdfminer.install)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/python-pdfminer.install	                        (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/python-pdfminer.install	2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,5 @@
+/usr/bin/pdf2txt
+/usr/bin/dumppdf
+/usr/lib/python*/*-packages/pdfminer-*.egg-info
+/usr/lib/python*/*-packages/pdfminer/*.py
+/usr/lib/python*/*-packages/pdfminer/cmap/*.py

Deleted: packages/pdfminer/tags/20100424+dfsg-1/debian/rules
===================================================================
--- packages/pdfminer/trunk/debian/rules	2010-06-12 14:54:34 UTC (rev 13406)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/rules	2010-06-13 11:48:32 UTC (rev 13414)
@@ -1,52 +0,0 @@
-#!/usr/bin/make -f
-
-SOURCE_DIR = $(dir $(firstword $(MAKEFILE_LIST)))/..
-UPSTREAM_VERSION = $(shell cd $(SOURCE_DIR) && dpkg-parsechangelog | sed -n -r -e '/^Version: ([0-9.]+)([+]dfsg).*/ { s//\1/; p; q; }')
-
-.PHONY: override_dh_auto_build
-override_dh_auto_build:
-	dh_auto_build -Spython_distutils
-
-.PHONY: override_dh_auto_install
-override_dh_auto_install:
-	dh_auto_install -Spython_distutils
-
-.PHONY: override_dh_install
-override_dh_install:
-	rename.ul .py '' debian/python-pdfminer/usr/bin/*.py
-	dh_install
-
-.PHONY: override_dh_installman
-override_dh_installman:
-	$(MAKE) -C debian/manpages/
-	dh_installman
-
-.PHONY: override_dh_auto_test
-override_dh_auto_test:
-ifeq ($(filter nocheck,$(DEB_BUILD_OPTIONS)),)
-	set -e -x; \
-	for python in $(shell pyversions -r); do \
-		$$python /usr/bin/nosetests --with-doctest --verbose pdfminer/*.py; \
-		$(MAKE) -C samples clean; \
-		$(MAKE) -C samples PYTHON=$$python CMP="diff -u" test; \
-	done
-endif
-
-.PHONY: override_dh_installchangelogs
-override_dh_installchangelogs:
-	elinks -config-file /dev/null -dump -no-numbering -no-references docs/index.html \
-	| sed -n -e '/^Changes/,/^  ---/ { /^     / s/// p }' \
-	> docs/changelog
-	dh_installchangelogs docs/changelog
-
-.PHONY:
-get-orig-source:
-	sh -x $(SOURCE_DIR)/debian/get-orig-source.sh $(UPSTREAM_VERSION)
-
-.PHONY: build build-arch build-indep binary binary-arch binary-indep clean
-build build-arch build-indep binary binary-indep clean:
-	dh $(@)
-# In order not to confuse lintian, binary-arch is a separate target:
-binary-arch:
-
-# vim:ts=4 sw=4 noet

Copied: packages/pdfminer/tags/20100424+dfsg-1/debian/rules (from rev 13407, packages/pdfminer/trunk/debian/rules)
===================================================================
--- packages/pdfminer/tags/20100424+dfsg-1/debian/rules	                        (rev 0)
+++ packages/pdfminer/tags/20100424+dfsg-1/debian/rules	2010-06-13 11:48:32 UTC (rev 13414)
@@ -0,0 +1,53 @@
+#!/usr/bin/make -f
+
+SOURCE_DIR = $(dir $(firstword $(MAKEFILE_LIST)))/..
+UPSTREAM_VERSION = $(shell cd $(SOURCE_DIR) && dpkg-parsechangelog | sed -n -r -e '/^Version: ([0-9.]+)([+]dfsg).*/ { s//\1/; p; q; }')
+
+.PHONY: override_dh_auto_build
+override_dh_auto_build:
+	$(MAKE) cmap
+	dh_auto_build -Spython_distutils
+
+.PHONY: override_dh_auto_install
+override_dh_auto_install:
+	dh_auto_install -Spython_distutils
+
+.PHONY: override_dh_install
+override_dh_install:
+	rename.ul .py '' debian/tmp/usr/bin/*.py
+	dh_install
+
+.PHONY: override_dh_installman
+override_dh_installman:
+	$(MAKE) -C debian/manpages/
+	dh_installman
+
+.PHONY: override_dh_auto_test
+override_dh_auto_test:
+ifeq ($(filter nocheck,$(DEB_BUILD_OPTIONS)),)
+	set -e -x; \
+	for python in $(shell pyversions -r); do \
+		$$python /usr/bin/nosetests --with-doctest --verbose pdfminer/*.py; \
+		$(MAKE) -C samples clean; \
+		$(MAKE) -C samples PYTHON=$$python CMP="diff -u" test; \
+	done
+endif
+
+.PHONY: override_dh_installchangelogs
+override_dh_installchangelogs:
+	elinks -config-file /dev/null -dump -no-numbering -no-references docs/index.html \
+	| sed -n -e '/^Changes/,/^  ---/ { /^     / s/// p }' \
+	> docs/changelog
+	dh_installchangelogs docs/changelog
+
+.PHONY:
+get-orig-source:
+	sh -x $(SOURCE_DIR)/debian/get-orig-source.sh $(UPSTREAM_VERSION)
+
+.PHONY: build build-arch build-indep binary binary-arch binary-indep clean
+build build-arch build-indep binary binary-indep clean:
+	dh $(@)
+# In order not to confuse lintian, binary-arch is a separate target:
+binary-arch:
+
+# vim:ts=4 sw=4 noet




More information about the Python-modules-commits mailing list