[Python-modules-commits] [pysword] 01/03: import pysword_0.2.3.orig.tar.gz

Raoul Snyman superfly-guest at moszumanska.debian.org
Mon Mar 6 22:30:25 UTC 2017


This is an automated email from the git hooks/post-receive script.

superfly-guest pushed a commit to branch master
in repository pysword.

commit 8c39b0b17567d67357843c47e33e6dabf78a2330
Author: Raoul Snyman <raoul at snyman.info>
Date:   Mon Mar 6 14:56:26 2017 -0700

    import pysword_0.2.3.orig.tar.gz
---
 PKG-INFO                              |  188 +++
 README.rst                            |  169 +++
 debian/changelog                      |    6 +
 debian/compat                         |    1 +
 debian/control                        |   49 +
 debian/copyright                      |   57 +
 debian/rules                          |   11 +
 debian/source/format                  |    1 +
 debian/watch                          |    3 +
 pysword.egg-info/PKG-INFO             |  188 +++
 pysword.egg-info/SOURCES.txt          |   13 +
 pysword.egg-info/dependency_links.txt |    1 +
 pysword.egg-info/top_level.txt        |    1 +
 pysword/__init__.py                   |   21 +
 pysword/bible.py                      |  236 ++++
 pysword/books.py                      |  196 +++
 pysword/canon-parser.py               |  119 ++
 pysword/canons.py                     | 2200 +++++++++++++++++++++++++++++++++
 pysword/cleaner.py                    |  150 +++
 pysword/modules.py                    |  126 ++
 setup.cfg                             |    5 +
 setup.py                              |   32 +
 22 files changed, 3773 insertions(+)

diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..cb3ff78
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,188 @@
+Metadata-Version: 1.1
+Name: pysword
+Version: 0.2.3
+Summary: A native Python2/3 reader module for the SWORD Project Bible Modules
+Home-page: https://gitlab.com/tgc-dk/pysword
+Author: Tomas Groth
+Author-email: second at tgc.dk
+License: GPL2
+Description: A native Python reader of the SWORD Project Bible Modules
+        
+        This project is **not** an official `CrossWire <http://crosswire.org/>`_
+        project. It merely provides an alternative way to read the bible modules
+        created by CrossWires `SWORD <http://crosswire.org/sword/index.jsp>`_ project.
+        
+        Features
+        --------
+        
+        -  Read SWORD bibles (not commentaries etc.)
+        -  Detection of locally installed bible modules.
+        -  Supports all known SWORD module formats (ztext, ztext4, rawtext,
+           rawtext4)
+        -  Read from zipped modules, like those available from
+           http://www.crosswire.org/sword/modules/ModDisp.jsp?modType=Bibles
+        -  Clean text of OSIS, GBF or ThML tags.
+        -  Supports both python 2 and 3 (tested with 2.7 and 3.5)
+        
+        License
+        -------
+        
+        Since parts of the code is derived and/or copied from the SWORD project
+        (see canons.py) which is GPL2, this code is also under the GPL2 license.
+        
+        Installation
+        ------------
+        
+        PySwords source code can be downloaded from PySwords `release list <https://gitlab.com/tgc-dk/pysword/tags>`_,
+        but it is also available from `PyPI <https://pypi.python.org/pypi/pysword/>`_
+        for install using ``pip`` or ``easy_install``.
+        It also available for `ArchLinux (AUR) <https://aur.archlinux.org/packages/?K=pysword>`_,
+        and will soon be available as a package in Debian and Fedora.
+        
+        Example code
+        ------------
+        
+        Use modules from default datapath
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        .. code:: python
+        
+            from pysword.modules import SwordModules
+            # Find available modules/bibles in standard data path.
+            # For non-standard data path, pass it as an argument to the SwordModules constructor.
+            modules = SwordModules()
+            # In this case we'll assume the modules found is something like:
+            # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+            found_modules = modules.parse_modules()
+            bible = modules.get_bible_from_module('KJV')
+            # Get John chapter 3 verse 16
+            output = bible.get(books=['john'], chapters=[3], verses=[16])
+        
+        Load module from zip-file
+        ~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        .. code:: python
+        
+            from pysword.modules import SwordModules
+            # Load module in zip
+            # NB: the zip content is only available as long as the SwordModules object exists
+            modules = SwordModules('KJV.zip')
+            # In this case the module found is:
+            # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+            found_modules = modules.parse_modules()
+            bible = modules.get_bible_from_module('KJV')
+            # Get John chapter 3 verse 16
+            output = bible.get(books=['john'], chapters=[3], verses=[16])
+        
+        Manually create bible
+        ~~~~~~~~~~~~~~~~~~~~~
+        
+        .. code:: python
+        
+            from pysword.bible import SwordBible
+            # Create the bible. The arguments are:
+            # SwordBible(<module path>, <module type>, <versification>, <encoding>, <text formatting>)
+            # Only the first is required, the rest have default values which should work in most cases.
+            bible = SwordBible('/home/me/.sword/modules/texts/ztext/kjv/', 'ztext', 'default', 'utf8', 'OSIS')
+            # Get John chapter 3 verse 16
+            output = bible.get(books=['john'], chapters=[3], verses=[16])
+        
+        Run tests
+        ---------
+        
+        To run the testsuite, first run the script that download the files used
+        for testing, and then use nosetests to run the testsuite:
+        
+        .. code:: sh
+        
+            $ python tests/resources/download_bibles.py
+            $ nosetests -v tests/
+        
+        The tests should run and pass using both python 2 and 3.
+        
+        Contributing
+        ------------
+        
+        If you want to contribute, you are most welcome to do so!
+        Feel free to report issues and create merge request at https://gitlab.com/tgc-dk/pysword
+        If you create a merge request please include a test the proves that your code actually works.
+        
+        Module formats
+        --------------
+        
+        I'll use Python's struct module's format strings to describe byte
+        formatting. See https://docs.python.org/3/library/struct.html
+        
+        There are current 4 formats for bible modules in SWORD.
+        
+        ztext format documentation
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        Take the Old Testament (OT) for example. Three files:
+        
+        -  ot.bzv: Maps verses to character ranges in compressed buffers. 10
+           bytes ('<IIH') for each verse in the Bible:
+        
+           -  buffer\_num (I): which compressed buffer the verse is located in
+           -  verse\_start (I): the location in the uncompressed buffer where
+              the verse begins
+           -  verse\_len (H): length of the verse, in uncompressed characters
+        
+        These 10-byte records are densely packed, indexed by VerseKey 'Indicies'
+        (docs later). So the record for the verse with index x starts at byte
+        10\*x.
+        
+        -  ot.bzs: Tells where the compressed buffers start and end. 12 bytes
+           ('<III') for each compressed buffer:
+        
+           -  offset (I): where the compressed buffer starts in the file
+           -  size (I): the length of the compressed data, in bytes
+           -  uc\_size (I): the length of the uncompressed data, in bytes
+              (unused)
+        
+        These 12-byte records are densely packed, indexed by buffer\_num (see
+        previous). So the record for compressed buffer buffer\_num starts at
+        byte 12\*buffer\_num.
+        
+        -  ot.bzz: Contains the compressed text. Read 'size' bytes starting at
+           'offset'.
+        
+        ztext4 format documentation
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        ztext4 is the same as ztext, except that in the bzv-file the verse\_len
+        is now represented by 4-byte integer (I), making the record 12 bytes in
+        all.
+        
+        rawtext format documentation
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        Again OT example. Two files:
+        
+        -  ot.vss: Maps verses to character ranges in text file. 6 bytes ('<IH')
+           for each verse in the Bible:
+        
+           -  verse\_start (I): the location in the textfile where the verse
+              begins
+           -  verse\_len (H): length of the verse, in characters
+        
+        -  ot: Contains the text. Read 'verse\_len' characters starting at
+           'verse\_start'.
+        
+        rawtext4 format documentation
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        rawtext4 is the same as rawtext, except that in the vss-file the
+        verse\_len is now represented by 4-byte integer (I), making the record 8
+        bytes in all.
+        
+Platform: any
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Religion
+Classifier: Intended Audience :: Developers
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Religion
+Classifier: Topic :: Software Development
+Classifier: License :: OSI Approved :: GNU General Public License v2 (GPLv2)
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..70d0e1e
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,169 @@
+A native Python reader of the SWORD Project Bible Modules
+
+This project is **not** an official `CrossWire <http://crosswire.org/>`_
+project. It merely provides an alternative way to read the bible modules
+created by CrossWires `SWORD <http://crosswire.org/sword/index.jsp>`_ project.
+
+Features
+--------
+
+-  Read SWORD bibles (not commentaries etc.)
+-  Detection of locally installed bible modules.
+-  Supports all known SWORD module formats (ztext, ztext4, rawtext,
+   rawtext4)
+-  Read from zipped modules, like those available from
+   http://www.crosswire.org/sword/modules/ModDisp.jsp?modType=Bibles
+-  Clean text of OSIS, GBF or ThML tags.
+-  Supports both python 2 and 3 (tested with 2.7 and 3.5)
+
+License
+-------
+
+Since parts of the code is derived and/or copied from the SWORD project
+(see canons.py) which is GPL2, this code is also under the GPL2 license.
+
+Installation
+------------
+
+PySwords source code can be downloaded from PySwords `release list <https://gitlab.com/tgc-dk/pysword/tags>`_,
+but it is also available from `PyPI <https://pypi.python.org/pypi/pysword/>`_
+for install using ``pip`` or ``easy_install``.
+It also available for `ArchLinux (AUR) <https://aur.archlinux.org/packages/?K=pysword>`_,
+and will soon be available as a package in Debian and Fedora.
+
+Example code
+------------
+
+Use modules from default datapath
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: python
+
+    from pysword.modules import SwordModules
+    # Find available modules/bibles in standard data path.
+    # For non-standard data path, pass it as an argument to the SwordModules constructor.
+    modules = SwordModules()
+    # In this case we'll assume the modules found is something like:
+    # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+    found_modules = modules.parse_modules()
+    bible = modules.get_bible_from_module('KJV')
+    # Get John chapter 3 verse 16
+    output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+Load module from zip-file
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: python
+
+    from pysword.modules import SwordModules
+    # Load module in zip
+    # NB: the zip content is only available as long as the SwordModules object exists
+    modules = SwordModules('KJV.zip')
+    # In this case the module found is:
+    # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+    found_modules = modules.parse_modules()
+    bible = modules.get_bible_from_module('KJV')
+    # Get John chapter 3 verse 16
+    output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+Manually create bible
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: python
+
+    from pysword.bible import SwordBible
+    # Create the bible. The arguments are:
+    # SwordBible(<module path>, <module type>, <versification>, <encoding>, <text formatting>)
+    # Only the first is required, the rest have default values which should work in most cases.
+    bible = SwordBible('/home/me/.sword/modules/texts/ztext/kjv/', 'ztext', 'default', 'utf8', 'OSIS')
+    # Get John chapter 3 verse 16
+    output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+Run tests
+---------
+
+To run the testsuite, first run the script that download the files used
+for testing, and then use nosetests to run the testsuite:
+
+.. code:: sh
+
+    $ python tests/resources/download_bibles.py
+    $ nosetests -v tests/
+
+The tests should run and pass using both python 2 and 3.
+
+Contributing
+------------
+
+If you want to contribute, you are most welcome to do so!
+Feel free to report issues and create merge request at https://gitlab.com/tgc-dk/pysword
+If you create a merge request please include a test the proves that your code actually works.
+
+Module formats
+--------------
+
+I'll use Python's struct module's format strings to describe byte
+formatting. See https://docs.python.org/3/library/struct.html
+
+There are current 4 formats for bible modules in SWORD.
+
+ztext format documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Take the Old Testament (OT) for example. Three files:
+
+-  ot.bzv: Maps verses to character ranges in compressed buffers. 10
+   bytes ('<IIH') for each verse in the Bible:
+
+   -  buffer\_num (I): which compressed buffer the verse is located in
+   -  verse\_start (I): the location in the uncompressed buffer where
+      the verse begins
+   -  verse\_len (H): length of the verse, in uncompressed characters
+
+These 10-byte records are densely packed, indexed by VerseKey 'Indicies'
+(docs later). So the record for the verse with index x starts at byte
+10\*x.
+
+-  ot.bzs: Tells where the compressed buffers start and end. 12 bytes
+   ('<III') for each compressed buffer:
+
+   -  offset (I): where the compressed buffer starts in the file
+   -  size (I): the length of the compressed data, in bytes
+   -  uc\_size (I): the length of the uncompressed data, in bytes
+      (unused)
+
+These 12-byte records are densely packed, indexed by buffer\_num (see
+previous). So the record for compressed buffer buffer\_num starts at
+byte 12\*buffer\_num.
+
+-  ot.bzz: Contains the compressed text. Read 'size' bytes starting at
+   'offset'.
+
+ztext4 format documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ztext4 is the same as ztext, except that in the bzv-file the verse\_len
+is now represented by 4-byte integer (I), making the record 12 bytes in
+all.
+
+rawtext format documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Again OT example. Two files:
+
+-  ot.vss: Maps verses to character ranges in text file. 6 bytes ('<IH')
+   for each verse in the Bible:
+
+   -  verse\_start (I): the location in the textfile where the verse
+      begins
+   -  verse\_len (H): length of the verse, in characters
+
+-  ot: Contains the text. Read 'verse\_len' characters starting at
+   'verse\_start'.
+
+rawtext4 format documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+rawtext4 is the same as rawtext, except that in the vss-file the
+verse\_len is now represented by 4-byte integer (I), making the record 8
+bytes in all.
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 0000000..0e83d16
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,6 @@
+pysword (0.2.3-1) unstable; urgency=medium
+
+  * Redo package and repository after making a mess
+  * Initial release. (Closes: #821461)
+
+ -- Raoul Snyman <raoul at snyman.info>  Mon, 06 Mar 2017 14:48:14 -0700
diff --git a/debian/compat b/debian/compat
new file mode 100644
index 0000000..ec63514
--- /dev/null
+++ b/debian/compat
@@ -0,0 +1 @@
+9
diff --git a/debian/control b/debian/control
new file mode 100644
index 0000000..d24b599
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,49 @@
+Source: pysword
+Maintainer: Debian Python Modules Team <python-modules-team at lists.alioth.debian.org>
+Uploaders: Raoul Snyman <raoul at snyman.info>
+Section: python
+Priority: optional
+Build-Depends: debhelper (>= 9),
+ dh-python,
+ python-all (>= 2.6.6-3),
+ python-setuptools,
+ python3-all,
+ python3-setuptools
+Standards-Version: 3.9.8
+Homepage: https://gitlab.com/tgc-dk/pysword
+Vcs-Git: https://anonscm.debian.org/git/python-modules/packages/pysword.git
+Vcs-Browser: https://anonscm.debian.org/git/python-modules/packages/pysword.git
+
+Package: python-pysword
+Architecture: all
+Depends: ${misc:Depends}, ${python:Depends}
+Description: native Python reader module for the SWORD Project (Python 2)
+ This project is not an official CrossWire project. It merely provides an
+ alternative way to read the bible modules created by CrossWire's SWORD
+ project.
+ .
+ Features:
+  * Read SWORD bibles (not commentaries etc.)
+  * Detection of locally installed bible modules.
+  * Supports all known SWORD module formats (ztext, ztext4, rawtext, rawtext4)
+  * Read from zipped modules
+  * Clean text of OSIS, GBF or ThML tags.
+  .
+  This package contains the Python 2 bindings.
+
+Package: python3-pysword
+Architecture: all
+Depends: ${misc:Depends}, ${python3:Depends}
+Description: native Python reader module for the SWORD Project (Python 3)
+ This project is not an official CrossWire project. It merely provides an
+ alternative way to read the bible modules created by CrossWire's SWORD
+ project.
+ .
+ Features:
+  * Read SWORD bibles (not commentaries etc.)
+  * Detection of locally installed bible modules.
+  * Supports all known SWORD module formats (ztext, ztext4, rawtext, rawtext4)
+  * Read from zipped modules
+  * Clean text of OSIS, GBF or ThML tags.
+  .
+  This package contains the Python 3 bindings.
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 0000000..158c31e
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1,57 @@
+Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: pysword
+Source: https://gitlab.com/tgc-dk/pysword
+
+Files: *
+Copyright: 2008-2016 Kenneth Arnold <kcarnold at mit.edu>
+           2008-2016 Joshua Gross <joshua.gross at gmail.com>
+           2008-2016 Tomas Groth <tomasgroth at yahoo.dk>
+           2008-2016 Matthew Wardrop <mister.wardrop at gmail.com>
+           2008-2016 Ryan Hiebert
+License: GPL-2
+ This package is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 dated June, 1991.
+ .
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE.  See the GNU General Public License for more
+ details.
+ .
+ You should have received a copy of the GNU General Public
+ License along with this package; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ Boston, MA  02110-1301 USA
+ .
+ On Debian systems, the full text of the GNU General Public
+ License version 2 can be found in the file
+ `/usr/share/common-licenses/GPL-2'.
+
+Files: debian/*
+Copyright: 2016 Unit 193 <unit193 at ubuntu.com>
+           2016 Raoul Snyman <raoul at snyman.info>
+License: BSD-3-Clause
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+ 3. Neither the name of the copyright holder nor the names of its contributors
+    may be used to endorse or promote products derived from this software
+    without specific prior written permission.
+ .
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE HOLDERS OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 0000000..1756190
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,11 @@
+#!/usr/bin/make -f
+
+# Uncomment this to turn on verbose mode.
+#export DH_VERBOSE=1
+
+export PYBUILD_NAME=pysword
+export PYBUILD_DESTDIR_python2=debian/python-pysword/
+export PYBUILD_DESTDIR_python3=debian/python3-pysword/
+
+%:
+	dh $@ --with python2,python3 --buildsystem=pybuild
diff --git a/debian/source/format b/debian/source/format
new file mode 100644
index 0000000..163aaf8
--- /dev/null
+++ b/debian/source/format
@@ -0,0 +1 @@
+3.0 (quilt)
diff --git a/debian/watch b/debian/watch
new file mode 100644
index 0000000..92eac78
--- /dev/null
+++ b/debian/watch
@@ -0,0 +1,3 @@
+version=3
+opts=uversionmangle=s/(rc|a|b|c)/~$1/ \
+http://pypi.debian.net/pysword/pysword-(.+)\.(?:zip|tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz)))
diff --git a/pysword.egg-info/PKG-INFO b/pysword.egg-info/PKG-INFO
new file mode 100644
index 0000000..cb3ff78
--- /dev/null
+++ b/pysword.egg-info/PKG-INFO
@@ -0,0 +1,188 @@
+Metadata-Version: 1.1
+Name: pysword
+Version: 0.2.3
+Summary: A native Python2/3 reader module for the SWORD Project Bible Modules
+Home-page: https://gitlab.com/tgc-dk/pysword
+Author: Tomas Groth
+Author-email: second at tgc.dk
+License: GPL2
+Description: A native Python reader of the SWORD Project Bible Modules
+        
+        This project is **not** an official `CrossWire <http://crosswire.org/>`_
+        project. It merely provides an alternative way to read the bible modules
+        created by CrossWires `SWORD <http://crosswire.org/sword/index.jsp>`_ project.
+        
+        Features
+        --------
+        
+        -  Read SWORD bibles (not commentaries etc.)
+        -  Detection of locally installed bible modules.
+        -  Supports all known SWORD module formats (ztext, ztext4, rawtext,
+           rawtext4)
+        -  Read from zipped modules, like those available from
+           http://www.crosswire.org/sword/modules/ModDisp.jsp?modType=Bibles
+        -  Clean text of OSIS, GBF or ThML tags.
+        -  Supports both python 2 and 3 (tested with 2.7 and 3.5)
+        
+        License
+        -------
+        
+        Since parts of the code is derived and/or copied from the SWORD project
+        (see canons.py) which is GPL2, this code is also under the GPL2 license.
+        
+        Installation
+        ------------
+        
+        PySwords source code can be downloaded from PySwords `release list <https://gitlab.com/tgc-dk/pysword/tags>`_,
+        but it is also available from `PyPI <https://pypi.python.org/pypi/pysword/>`_
+        for install using ``pip`` or ``easy_install``.
+        It also available for `ArchLinux (AUR) <https://aur.archlinux.org/packages/?K=pysword>`_,
+        and will soon be available as a package in Debian and Fedora.
+        
+        Example code
+        ------------
+        
+        Use modules from default datapath
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        .. code:: python
+        
+            from pysword.modules import SwordModules
+            # Find available modules/bibles in standard data path.
+            # For non-standard data path, pass it as an argument to the SwordModules constructor.
+            modules = SwordModules()
+            # In this case we'll assume the modules found is something like:
+            # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+            found_modules = modules.parse_modules()
+            bible = modules.get_bible_from_module('KJV')
+            # Get John chapter 3 verse 16
+            output = bible.get(books=['john'], chapters=[3], verses=[16])
+        
+        Load module from zip-file
+        ~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        .. code:: python
+        
+            from pysword.modules import SwordModules
+            # Load module in zip
+            # NB: the zip content is only available as long as the SwordModules object exists
+            modules = SwordModules('KJV.zip')
+            # In this case the module found is:
+            # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+            found_modules = modules.parse_modules()
+            bible = modules.get_bible_from_module('KJV')
+            # Get John chapter 3 verse 16
+            output = bible.get(books=['john'], chapters=[3], verses=[16])
+        
+        Manually create bible
+        ~~~~~~~~~~~~~~~~~~~~~
+        
+        .. code:: python
+        
+            from pysword.bible import SwordBible
+            # Create the bible. The arguments are:
+            # SwordBible(<module path>, <module type>, <versification>, <encoding>, <text formatting>)
+            # Only the first is required, the rest have default values which should work in most cases.
+            bible = SwordBible('/home/me/.sword/modules/texts/ztext/kjv/', 'ztext', 'default', 'utf8', 'OSIS')
+            # Get John chapter 3 verse 16
+            output = bible.get(books=['john'], chapters=[3], verses=[16])
+        
+        Run tests
+        ---------
+        
+        To run the testsuite, first run the script that download the files used
+        for testing, and then use nosetests to run the testsuite:
+        
+        .. code:: sh
+        
+            $ python tests/resources/download_bibles.py
+            $ nosetests -v tests/
+        
+        The tests should run and pass using both python 2 and 3.
+        
+        Contributing
+        ------------
+        
+        If you want to contribute, you are most welcome to do so!
+        Feel free to report issues and create merge request at https://gitlab.com/tgc-dk/pysword
+        If you create a merge request please include a test the proves that your code actually works.
+        
+        Module formats
+        --------------
+        
+        I'll use Python's struct module's format strings to describe byte
+        formatting. See https://docs.python.org/3/library/struct.html
+        
+        There are current 4 formats for bible modules in SWORD.
+        
+        ztext format documentation
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        Take the Old Testament (OT) for example. Three files:
+        
+        -  ot.bzv: Maps verses to character ranges in compressed buffers. 10
+           bytes ('<IIH') for each verse in the Bible:
+        
+           -  buffer\_num (I): which compressed buffer the verse is located in
+           -  verse\_start (I): the location in the uncompressed buffer where
+              the verse begins
+           -  verse\_len (H): length of the verse, in uncompressed characters
+        
+        These 10-byte records are densely packed, indexed by VerseKey 'Indicies'
+        (docs later). So the record for the verse with index x starts at byte
+        10\*x.
+        
+        -  ot.bzs: Tells where the compressed buffers start and end. 12 bytes
+           ('<III') for each compressed buffer:
+        
+           -  offset (I): where the compressed buffer starts in the file
+           -  size (I): the length of the compressed data, in bytes
+           -  uc\_size (I): the length of the uncompressed data, in bytes
+              (unused)
+        
+        These 12-byte records are densely packed, indexed by buffer\_num (see
+        previous). So the record for compressed buffer buffer\_num starts at
+        byte 12\*buffer\_num.
+        
+        -  ot.bzz: Contains the compressed text. Read 'size' bytes starting at
+           'offset'.
+        
+        ztext4 format documentation
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        ztext4 is the same as ztext, except that in the bzv-file the verse\_len
+        is now represented by 4-byte integer (I), making the record 12 bytes in
+        all.
+        
+        rawtext format documentation
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        Again OT example. Two files:
+        
+        -  ot.vss: Maps verses to character ranges in text file. 6 bytes ('<IH')
+           for each verse in the Bible:
+        
+           -  verse\_start (I): the location in the textfile where the verse
+              begins
+           -  verse\_len (H): length of the verse, in characters
+        
+        -  ot: Contains the text. Read 'verse\_len' characters starting at
+           'verse\_start'.
+        
+        rawtext4 format documentation
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        rawtext4 is the same as rawtext, except that in the vss-file the
+        verse\_len is now represented by 4-byte integer (I), making the record 8
+        bytes in all.
+        
+Platform: any
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Religion
+Classifier: Intended Audience :: Developers
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Religion
+Classifier: Topic :: Software Development
+Classifier: License :: OSI Approved :: GNU General Public License v2 (GPLv2)
diff --git a/pysword.egg-info/SOURCES.txt b/pysword.egg-info/SOURCES.txt
new file mode 100644
index 0000000..4351f91
--- /dev/null
+++ b/pysword.egg-info/SOURCES.txt
@@ -0,0 +1,13 @@
+README.rst
+setup.py
+pysword/__init__.py
+pysword/bible.py
+pysword/books.py
+pysword/canon-parser.py
+pysword/canons.py
+pysword/cleaner.py
+pysword/modules.py
+pysword.egg-info/PKG-INFO
+pysword.egg-info/SOURCES.txt
+pysword.egg-info/dependency_links.txt
+pysword.egg-info/top_level.txt
\ No newline at end of file
diff --git a/pysword.egg-info/dependency_links.txt b/pysword.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/pysword.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/pysword.egg-info/top_level.txt b/pysword.egg-info/top_level.txt
new file mode 100644
index 0000000..41d3e59
--- /dev/null
+++ b/pysword.egg-info/top_level.txt
@@ -0,0 +1 @@
+pysword
diff --git a/pysword/__init__.py b/pysword/__init__.py
new file mode 100644
index 0000000..b0f8c57
--- /dev/null
+++ b/pysword/__init__.py
@@ -0,0 +1,21 @@
+###############################################################################
+# PySword - A native Python reader of the SWORD Project Bible Modules         #
+# --------------------------------------------------------------------------- #
+# Copyright (c) 2008-2016 Various developers:                                 #
+# Kenneth Arnold, Joshua Gross, Ryan Hiebert, Matthew Wardrop, Tomas Groth    #
+# --------------------------------------------------------------------------- #
+# This program is free software; you can redistribute it and/or modify it     #
+# under the terms of the GNU General Public License as published by the Free  #
+# Software Foundation; version 2 of the License.                              #
+#                                                                             #
+# This program is distributed in the hope that it will be useful, but WITHOUT #
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       #
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for    #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU General Public License along     #
+# with this program; if not, write to the Free Software Foundation, Inc., 51  #
+# Franklin St, Fifth Floor, Boston, MA 02110-1301 USA                         #
+###############################################################################
+
+__all__ = ['books', 'modules', 'bible', 'cleaner', 'canons']
diff --git a/pysword/bible.py b/pysword/bible.py
new file mode 100644
index 0000000..35ec643
--- /dev/null
+++ b/pysword/bible.py
@@ -0,0 +1,236 @@
+###############################################################################
+# PySword - A native Python reader of the SWORD Project Bible Modules         #
+# --------------------------------------------------------------------------- #
+# Copyright (c) 2008-2016 Various developers:                                 #
+# Kenneth Arnold, Joshua Gross, Ryan Hiebert, Matthew Wardrop, Tomas Groth    #
+# --------------------------------------------------------------------------- #
+# This program is free software; you can redistribute it and/or modify it     #
+# under the terms of the GNU General Public License as published by the Free  #
+# Software Foundation; version 2 of the License.                              #
+#                                                                             #
+# This program is distributed in the hope that it will be useful, but WITHOUT #
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       #
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for    #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU General Public License along     #
+# with this program; if not, write to the Free Software Foundation, Inc., 51  #
+# Franklin St, Fifth Floor, Boston, MA 02110-1301 USA                         #
+###############################################################################
+
+import os
+import struct
+import zlib
+
+from pysword.books import BibleStructure
+from pysword.cleaner import OSISCleaner, GBFCleaner, ThMLCleaner
+
+
+class SwordModuleType:
+    RAWTEXT = u'rawtext'
+    ZTEXT = u'ztext'
+    RAWTEXT4 = u'rawtext4'
+    ZTEXT4 = u'ztext4'
+
+
+class SwordBible(object):
+
+    def __init__(self, module_path, module_type=SwordModuleType.ZTEXT, versification=u'default', encoding=None,
+                 source_type=u'OSIS'):
+        """
+        Initialize the SwordBible object.
+        :param module_path: Path to SWORD modules datapath.
+        :param module_type: Types as defined by SwordModuleType, defaults to 'ztext'.
+        :param versification: Versification used for bible, defaults to 'default'.
+        :param encoding: Encoding used by the bible, should be either 'utf-8' or 'latin1'.
+        :param source_type: Type of (possible) tags in the text, can be 'OSIS', 'GBF' or 'ThML'.
+        :raise IOError: If files cannot be opened.
+        :raise ValueError: If unknown module_type is supplied.
+        """
+        self._module_type = module_type.lower()
+        self._module_path = module_path
+        self._files = {}
+
+        # Open the files needed to read from the module
+        if self._module_type in (SwordModuleType.ZTEXT, SwordModuleType.ZTEXT4):
+            try:
+                self._files[u'ot'] = self._get_ztext_files(u'ot')
+            except IOError:
+                pass
+            try:
+                self._files[u'nt'] = self._get_ztext_files(u'nt')
+            except IOError:
+                pass
+        elif self._module_type in (SwordModuleType.RAWTEXT, SwordModuleType.RAWTEXT4):
+            try:
+                self._files[u'ot'] = self._get_rawtext_files(u'ot')
+            except IOError:
+                pass
+            try:
+                self._files[u'nt'] = self._get_rawtext_files(u'nt')
+            except IOError:
+                pass
+        else:
+            raise ValueError(u'Invalid module type: %s' % module_type)
+        if u'ot' not in self._files and u'nt' not in self._files is None:
+            raise IOError(u'Could not open OT or NT for module')
+
+        # Load the bible structure
+        testaments = list(self._files)
+        self._structure = BibleStructure(versification, testaments)
+
+        # Set verse record format and size
+        if self._module_type == SwordModuleType.ZTEXT:
+            self._verse_record_format = u'<IIH'
+            self._verse_record_size = 10
+        elif self._module_type == SwordModuleType.ZTEXT4:
+            self._verse_record_format = u'<III'
+            self._verse_record_size = 12
+        elif self._module_type == SwordModuleType.RAWTEXT:
+            self._verse_record_format = u'<IH'
+            self._verse_record_size = 6
+        elif self._module_type == SwordModuleType.RAWTEXT4:
+            self._verse_record_format = u'<II'
+            self._verse_record_size = 8
+
+        # Detect text-encoding if none given
+        if encoding is None:
+            # pick the first available testament for testing
+            testament = list(self._files)[0]
+            if self._module_type in (SwordModuleType.ZTEXT, SwordModuleType.ZTEXT4):
+                undecoded_text = self._uncompressed_text(testament, 0)
+            else:
+                undecoded_text = self._files[testament][1].read(4096)
+            # Try to decode to utf-8, if it fails we fallback to latin1
+            try:
+                undecoded_text.decode()
+                self._encoding = u'utf-8'
+            except UnicodeDecodeError:
+                self._encoding = u'latin1'
+        else:
+            self._encoding = encoding
+        # Create cleaner to remove OSIS or GBF tags
+        if source_type:
+            if source_type.upper() == u'THML':
+                self._cleaner = ThMLCleaner()
+            elif source_type.upper() == u'GBF':
+                self._cleaner = GBFCleaner()
+            else:
+                self._cleaner = OSISCleaner()
+        else:
+            self._cleaner = OSISCleaner()
+
+    def _get_ztext_files(self, testament):
+        """
+        Given a testament ('ot' or 'nt'), returns a tuple of files (verse_to_buf, buf_to_loc, text)
+        :param testament: 'ot' or 'nt'
+        :return: returns a tuple of files (verse_to_buf, buf_to_loc, text)
+        """
+        v2b_name, b2l_name, text_name = [os.path.join(self._module_path,
+                                                      u'%s.bz%s' % (testament, code))
+                                         for code in (u'v', u's', u'z')]
+        return [open(name, u'rb') for name in (v2b_name, b2l_name, text_name)]
+
+    def _get_rawtext_files(self, testament):
+        """
+        "Given a testament ('ot' or 'nt'), returns a tuple of files (verse_to_loc, text)
+        :param testament: 'ot' or 'nt'
+        :return: returns a tuple of files (verse_to_loc, text)
+        """
+        v2l_name = os.path.join(self._module_path, u'%s.vss' % testament)
+        text_name = os.path.join(self._module_path, u'%s' % testament)
+        return [open(name, u'rb') for name in (v2l_name, text_name)]
+
+    def _ztext_for_index(self, testament, index):
+        """
+        Get the ztext for a given index.
+        :param testament: 'ot' or 'nt'
+        :param index: Verse buffer to read
+        :return: the text.
+        """
+        verse_to_buf, buf_to_loc, text = self._files[testament]
+
+        # Read the verse record.
+        verse_to_buf.seek(self._verse_record_size*index)
+        buf_num, verse_start, verse_len = struct.unpack(self._verse_record_format,
+                                                        verse_to_buf.read(self._verse_record_size))
+        uncompressed_text = self._uncompressed_text(testament, buf_num)
+        return uncompressed_text[verse_start:verse_start+verse_len].decode(self._encoding, errors=u'replace')
+
+    def _uncompressed_text(self, testament, buf_num):
+        """
+        Decompress ztext at given position.
+        :param testament: 'ot' or 'nt'
+        :param buf_num: Buffer to read
+        :return: The decompressed text
+        """
+        verse_to_buf, buf_to_loc, text = self._files[testament]
+
+        # Determine where the compressed data starts and ends.
+        buf_to_loc.seek(buf_num*12)
... 2936 lines suppressed ...

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/pysword.git



More information about the Python-modules-commits mailing list