[Python-modules-commits] r13415 - in packages/nltk/trunk/debian (3 files)

Sun Jun 13 17:42:37 UTC 2010

Date: Sunday, June 13, 2010 @ 17:42:33
  Author: jwilk
Revision: 13415

Provide ntlk-wordnet.

Added:
  packages/nltk/trunk/debian/patches/wordnet-lexnames.diff
Modified:
  packages/nltk/trunk/debian/control
  packages/nltk/trunk/debian/patches/series

Modified: packages/nltk/trunk/debian/control
===================================================================

--- packages/nltk/trunk/debian/control	2010-06-13 11:48:32 UTC (rev 13414)
+++ packages/nltk/trunk/debian/control	2010-06-13 17:42:33 UTC (rev 13415)
@@ -13,10 +13,21 @@
 Package: python-nltk
 Architecture: all
 Depends: ${python:Depends}, ${misc:Depends}, python-yaml
-Suggests: python-numpy, python-matplotlib, prover9
+Suggests: python-numpy, python-matplotlib, prover9, nltk-wordnet
 Description: Python libraries for natural language processing
  The Natural Language Toolkit (NLTK) is a suite of open source Python
  modules, data and documentation for research and development in natural
  language processing. NLTK contains code supporting dozens of NLP tasks,
  along with 40 popular corpora and extensive documentation including a
  375-page online book.
+
+Package: nltk-wordnet
+Architecture: all
+Depends: python-nltk, wordnet-base, wordnet-sense-index
+Description: Python libraries for natural language processing (wordnet)
+ The Natural Language Toolkit (NLTK) is a suite of open source Python
+ modules, data and documentation for research and development in natural
+ language processing.
+ .
+ This package enables access to Wordnet, an electronic lexical database of
+ English language, from NLTK.

Modified: packages/nltk/trunk/debian/patches/series
===================================================================
--- packages/nltk/trunk/debian/patches/series	2010-06-13 11:48:32 UTC (rev 13414)
+++ packages/nltk/trunk/debian/patches/series	2010-06-13 17:42:33 UTC (rev 13415)
@@ -1,2 +1,3 @@
 10-remove-mallet-interface.patch
 etree.diff
+wordnet-lexnames.diff

Added: packages/nltk/trunk/debian/patches/wordnet-lexnames.diff
===================================================================
--- packages/nltk/trunk/debian/patches/wordnet-lexnames.diff	                        (rev 0)
+++ packages/nltk/trunk/debian/patches/wordnet-lexnames.diff	2010-06-13 17:42:33 UTC (rev 13415)
@@ -0,0 +1,83 @@
+Description:
+  Upstream version of wordnet corpus reader requires the â€˜lexnamesâ€™ file with
+  is not shipped with the Debian wordnet-* packages. Hardcode values from this
+  file, so it's not required to exist anymore.
+Author: Jakub Wilk <jwilk at debian.org>
+Forwarded: not-needed
+Last-Update: 2010-06-13
+
+--- a/nltk/corpus/reader/wordnet.py
++++ b/nltk/corpus/reader/wordnet.py
+@@ -789,7 +789,7 @@
+ 
+     #: A list of file identifiers for all the fileids used by this
+     #: corpus reader.
+-    _FILES = ('cntlist.rev', 'lexnames', 'index.sense',
++    _FILES = ('cntlist.rev', 'index.sense',
+               'index.adj', 'index.adv', 'index.noun', 'index.verb',
+               'data.adj', 'data.adv', 'data.noun', 'data.verb',
+               'adj.exc', 'adv.exc', 'noun.exc', 'verb.exc', )
+@@ -819,16 +819,56 @@
+ 
+         self._data_file_map = {}
+         self._exception_map = {}
+-        self._lexnames = []
++        self._lexnames = [
++            'adj.all',
++            'adj.pert',
++            'adv.all',
++            'noun.Tops',
++            'noun.act',
++            'noun.animal',
++            'noun.artifact',
++            'noun.attribute',
++            'noun.body',
++            'noun.cognition',
++            'noun.communication',
++            'noun.event',
++            'noun.feeling',
++            'noun.food',
++            'noun.group',
++            'noun.location',
++            'noun.motive',
++            'noun.object',
++            'noun.person',
++            'noun.phenomenon',
++            'noun.plant',
++            'noun.possession',
++            'noun.process',
++            'noun.quantity',
++            'noun.relation',
++            'noun.shape',
++            'noun.state',
++            'noun.substance',
++            'noun.time',
++            'verb.body',
++            'verb.change',
++            'verb.cognition',
++            'verb.communication',
++            'verb.competition',
++            'verb.consumption',
++            'verb.contact',
++            'verb.creation',
++            'verb.emotion',
++            'verb.motion',
++            'verb.perception',
++            'verb.possession',
++            'verb.social',
++            'verb.stative',
++            'verb.weather',
++            'adj.ppl',
++        ]
+         self._key_count_file = None
+         self._key_synset_file = None
+ 
+-        # Load the lexnames
+-        for i, line in enumerate(self.open('lexnames')):
+-            index, lexname, _ = line.split()
+-            assert int(index) == i
+-            self._lexnames.append(lexname)
+-
+         # Load the indices for lemmas and synset offsets
+         self._load_lemma_pos_offset_map()
+