[Python-modules-commits] [pysword] 01/03: Import pysword_0.2.1.orig.tar.gz

Raoul Snyman superfly-guest at moszumanska.debian.org
Tue Apr 19 14:25:53 UTC 2016


This is an automated email from the git hooks/post-receive script.

superfly-guest pushed a commit to branch master
in repository pysword.

commit 3e0082df2e3f3b2aaf2d60aad73e9fcb0d601c85
Author: Raoul Snyman <raoul at snyman.info>
Date:   Tue Apr 19 16:23:08 2016 +0200

    Import pysword_0.2.1.orig.tar.gz
---
 PKG-INFO                              |  181 +++
 README.rst                            |  162 +++
 pysword.egg-info/PKG-INFO             |  181 +++
 pysword.egg-info/SOURCES.txt          |   13 +
 pysword.egg-info/dependency_links.txt |    1 +
 pysword.egg-info/top_level.txt        |    1 +
 pysword/__init__.py                   |   21 +
 pysword/bible.py                      |  236 ++++
 pysword/books.py                      |  191 +++
 pysword/canon-parser.py               |  119 ++
 pysword/canons.py                     | 2200 +++++++++++++++++++++++++++++++++
 pysword/cleaner.py                    |  149 +++
 pysword/modules.py                    |  126 ++
 setup.cfg                             |    5 +
 setup.py                              |   32 +
 15 files changed, 3618 insertions(+)

diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..c076dee
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,181 @@
+Metadata-Version: 1.1
+Name: pysword
+Version: 0.2.1
+Summary: A native Python2/3 reader module for the SWORD Project Bible Modules
+Home-page: https://gitlab.com/tgc-dk/pysword
+Author: Tomas Groth
+Author-email: tomasgroth at yahoo.dk
+License: GPL2
+Description: A native Python reader of the SWORD Project Bible Modules
+        
+        This project is **not** an official `CrossWire <http://crosswire.org/>`_
+        project. It merely provides an alternative way to read the bible modules
+        created by CrossWires `SWORD <http://crosswire.org/sword/index.jsp>`_ project.
+        
+        Features
+        --------
+        
+        -  Read SWORD bibles (not commentaries etc.)
+        -  Detection of locally installed bible modules.
+        -  Supports all known SWORD module formats (ztext, ztext4, rawtext,
+           rawtext4)
+        -  Read from zipped modules, like those available from
+           http://www.crosswire.org/sword/modules/ModDisp.jsp?modType=Bibles
+        -  Clean text of OSIS, GBF or ThML tags.
+        -  Supports both python 2 and 3 (tested with 2.7 and 3.5)
+        
+        License
+        -------
+        
+        Since parts of the code is derived and/or copied from the SWORD project
+        (see canons.py) which is GPL2, this code is also under the GPL2 license.
+        
+        Installation
+        ------------
+        
+        PySwords source code can be downloaded from PySwords `release list <https://gitlab.com/tgc-dk/pysword/tags>`_,
+        but it is also available from `PyPI <https://pypi.python.org/pypi/pysword/>`_
+        for install using ``pip`` or ``easy_install``.
+        It also available for `ArchLinux (AUR) <https://aur.archlinux.org/packages/?K=pysword>`_,
+        and will soon be available as a package in Debian and Fedora.
+        
+        Example code
+        ------------
+        
+        Use modules from default datapath
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        .. code:: python
+        
+            from pysword.modules import SwordModules
+            # Find available modules/bibles in standard data path.
+            # For non-standard data path, pass it as an argument to the SwordModules constructor.
+            modules = SwordModules()
+            # In this case we'll assume the modules found is something like:
+            # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+            found_modules = modules.parse_modules()
+            bible = modules.get_bible_from_module('KJV')
+            # Get John chapter 3 verse 16
+            output = bible.get(books=['john'], chapters=[3], verses=[16])
+        
+        Load module from zip-file
+        ~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        .. code:: python
+        
+            from pysword.modules import SwordModules
+            # Load module in zip
+            # NB: the zip content is only available as long as the SwordModules object exists
+            modules = SwordModules('KJV.zip')
+            # In this case the module found is:
+            # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+            found_modules = modules.parse_modules()
+            bible = modules.get_bible_from_module('KJV')
+            # Get John chapter 3 verse 16
+            output = bible.get(books=['john'], chapters=[3], verses=[16])
+        
+        Manually create bible
+        ~~~~~~~~~~~~~~~~~~~~~
+        
+        .. code:: python
+        
+            from pysword.bible import SwordBible
+            # Create the bible. The arguments are:
+            # SwordBible(<module path>, <module type>, <versification>, <encoding>, <text formatting>)
+            # Only the first is required, the rest have default values which should work in most cases.
+            bible = SwordBible('/home/me/.sword/modules/texts/ztext/kjv/', 'ztext', 'default', 'utf8', 'OSIS')
+            # Get John chapter 3 verse 16
+            output = bible.get(books=['john'], chapters=[3], verses=[16])
+        
+        Run tests
+        ---------
+        
+        To run the testsuite, first run the script that download the files used
+        for testing, and then use nosetests to run the testsuite:
+        
+        .. code:: sh
+        
+            $ python tests/resources/download_bibles.py
+            $ nosetests -v tests/
+        
+        The tests should run and pass using both python 2 and 3.
+        
+        Module formats
+        --------------
+        
+        I'll use Python's struct module's format strings to describe byte
+        formatting. See https://docs.python.org/3/library/struct.html
+        
+        There are current 4 formats for bible modules in SWORD.
+        
+        ztext format documentation
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        Take the Old Testament (OT) for example. Three files:
+        
+        -  ot.bzv: Maps verses to character ranges in compressed buffers. 10
+           bytes ('<IIH') for each verse in the Bible:
+        
+           -  buffer\_num (I): which compressed buffer the verse is located in
+           -  verse\_start (I): the location in the uncompressed buffer where
+              the verse begins
+           -  verse\_len (H): length of the verse, in uncompressed characters
+        
+        These 10-byte records are densely packed, indexed by VerseKey 'Indicies'
+        (docs later). So the record for the verse with index x starts at byte
+        10\*x.
+        
+        -  ot.bzs: Tells where the compressed buffers start and end. 12 bytes
+           ('<III') for each compressed buffer:
+        
+           -  offset (I): where the compressed buffer starts in the file
+           -  size (I): the length of the compressed data, in bytes
+           -  uc\_size (I): the length of the uncompressed data, in bytes
+              (unused)
+        
+        These 12-byte records are densely packed, indexed by buffer\_num (see
+        previous). So the record for compressed buffer buffer\_num starts at
+        byte 12\*buffer\_num.
+        
+        -  ot.bzz: Contains the compressed text. Read 'size' bytes starting at
+           'offset'.
+        
+        ztext4 format documentation
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        ztext4 is the same as ztext, except that in the bzv-file the verse\_len
+        is now represented by 4-byte integer (I), making the record 12 bytes in
+        all.
+        
+        rawtext format documentation
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        Again OT example. Two files:
+        
+        -  ot.vss: Maps verses to character ranges in text file. 6 bytes ('<IH')
+           for each verse in the Bible:
+        
+           -  verse\_start (I): the location in the textfile where the verse
+              begins
+           -  verse\_len (H): length of the verse, in characters
+        
+        -  ot: Contains the text. Read 'verse\_len' characters starting at
+           'verse\_start'.
+        
+        rawtext4 format documentation
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        rawtext4 is the same as rawtext, except that in the vss-file the
+        verse\_len is now represented by 4-byte integer (I), making the record 8
+        bytes in all.
+        
+Platform: any
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Religion
+Classifier: Intended Audience :: Developers
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Religion
+Classifier: Topic :: Software Development
+Classifier: License :: OSI Approved :: GNU General Public License v2 (GPLv2)
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..babeade
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,162 @@
+A native Python reader of the SWORD Project Bible Modules
+
+This project is **not** an official `CrossWire <http://crosswire.org/>`_
+project. It merely provides an alternative way to read the bible modules
+created by CrossWires `SWORD <http://crosswire.org/sword/index.jsp>`_ project.
+
+Features
+--------
+
+-  Read SWORD bibles (not commentaries etc.)
+-  Detection of locally installed bible modules.
+-  Supports all known SWORD module formats (ztext, ztext4, rawtext,
+   rawtext4)
+-  Read from zipped modules, like those available from
+   http://www.crosswire.org/sword/modules/ModDisp.jsp?modType=Bibles
+-  Clean text of OSIS, GBF or ThML tags.
+-  Supports both python 2 and 3 (tested with 2.7 and 3.5)
+
+License
+-------
+
+Since parts of the code is derived and/or copied from the SWORD project
+(see canons.py) which is GPL2, this code is also under the GPL2 license.
+
+Installation
+------------
+
+PySwords source code can be downloaded from PySwords `release list <https://gitlab.com/tgc-dk/pysword/tags>`_,
+but it is also available from `PyPI <https://pypi.python.org/pypi/pysword/>`_
+for install using ``pip`` or ``easy_install``.
+It also available for `ArchLinux (AUR) <https://aur.archlinux.org/packages/?K=pysword>`_,
+and will soon be available as a package in Debian and Fedora.
+
+Example code
+------------
+
+Use modules from default datapath
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: python
+
+    from pysword.modules import SwordModules
+    # Find available modules/bibles in standard data path.
+    # For non-standard data path, pass it as an argument to the SwordModules constructor.
+    modules = SwordModules()
+    # In this case we'll assume the modules found is something like:
+    # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+    found_modules = modules.parse_modules()
+    bible = modules.get_bible_from_module('KJV')
+    # Get John chapter 3 verse 16
+    output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+Load module from zip-file
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: python
+
+    from pysword.modules import SwordModules
+    # Load module in zip
+    # NB: the zip content is only available as long as the SwordModules object exists
+    modules = SwordModules('KJV.zip')
+    # In this case the module found is:
+    # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+    found_modules = modules.parse_modules()
+    bible = modules.get_bible_from_module('KJV')
+    # Get John chapter 3 verse 16
+    output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+Manually create bible
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: python
+
+    from pysword.bible import SwordBible
+    # Create the bible. The arguments are:
+    # SwordBible(<module path>, <module type>, <versification>, <encoding>, <text formatting>)
+    # Only the first is required, the rest have default values which should work in most cases.
+    bible = SwordBible('/home/me/.sword/modules/texts/ztext/kjv/', 'ztext', 'default', 'utf8', 'OSIS')
+    # Get John chapter 3 verse 16
+    output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+Run tests
+---------
+
+To run the testsuite, first run the script that download the files used
+for testing, and then use nosetests to run the testsuite:
+
+.. code:: sh
+
+    $ python tests/resources/download_bibles.py
+    $ nosetests -v tests/
+
+The tests should run and pass using both python 2 and 3.
+
+Module formats
+--------------
+
+I'll use Python's struct module's format strings to describe byte
+formatting. See https://docs.python.org/3/library/struct.html
+
+There are current 4 formats for bible modules in SWORD.
+
+ztext format documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Take the Old Testament (OT) for example. Three files:
+
+-  ot.bzv: Maps verses to character ranges in compressed buffers. 10
+   bytes ('<IIH') for each verse in the Bible:
+
+   -  buffer\_num (I): which compressed buffer the verse is located in
+   -  verse\_start (I): the location in the uncompressed buffer where
+      the verse begins
+   -  verse\_len (H): length of the verse, in uncompressed characters
+
+These 10-byte records are densely packed, indexed by VerseKey 'Indicies'
+(docs later). So the record for the verse with index x starts at byte
+10\*x.
+
+-  ot.bzs: Tells where the compressed buffers start and end. 12 bytes
+   ('<III') for each compressed buffer:
+
+   -  offset (I): where the compressed buffer starts in the file
+   -  size (I): the length of the compressed data, in bytes
+   -  uc\_size (I): the length of the uncompressed data, in bytes
+      (unused)
+
+These 12-byte records are densely packed, indexed by buffer\_num (see
+previous). So the record for compressed buffer buffer\_num starts at
+byte 12\*buffer\_num.
+
+-  ot.bzz: Contains the compressed text. Read 'size' bytes starting at
+   'offset'.
+
+ztext4 format documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ztext4 is the same as ztext, except that in the bzv-file the verse\_len
+is now represented by 4-byte integer (I), making the record 12 bytes in
+all.
+
+rawtext format documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Again OT example. Two files:
+
+-  ot.vss: Maps verses to character ranges in text file. 6 bytes ('<IH')
+   for each verse in the Bible:
+
+   -  verse\_start (I): the location in the textfile where the verse
+      begins
+   -  verse\_len (H): length of the verse, in characters
+
+-  ot: Contains the text. Read 'verse\_len' characters starting at
+   'verse\_start'.
+
+rawtext4 format documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+rawtext4 is the same as rawtext, except that in the vss-file the
+verse\_len is now represented by 4-byte integer (I), making the record 8
+bytes in all.
diff --git a/pysword.egg-info/PKG-INFO b/pysword.egg-info/PKG-INFO
new file mode 100644
index 0000000..c076dee
--- /dev/null
+++ b/pysword.egg-info/PKG-INFO
@@ -0,0 +1,181 @@
+Metadata-Version: 1.1
+Name: pysword
+Version: 0.2.1
+Summary: A native Python2/3 reader module for the SWORD Project Bible Modules
+Home-page: https://gitlab.com/tgc-dk/pysword
+Author: Tomas Groth
+Author-email: tomasgroth at yahoo.dk
+License: GPL2
+Description: A native Python reader of the SWORD Project Bible Modules
+        
+        This project is **not** an official `CrossWire <http://crosswire.org/>`_
+        project. It merely provides an alternative way to read the bible modules
+        created by CrossWires `SWORD <http://crosswire.org/sword/index.jsp>`_ project.
+        
+        Features
+        --------
+        
+        -  Read SWORD bibles (not commentaries etc.)
+        -  Detection of locally installed bible modules.
+        -  Supports all known SWORD module formats (ztext, ztext4, rawtext,
+           rawtext4)
+        -  Read from zipped modules, like those available from
+           http://www.crosswire.org/sword/modules/ModDisp.jsp?modType=Bibles
+        -  Clean text of OSIS, GBF or ThML tags.
+        -  Supports both python 2 and 3 (tested with 2.7 and 3.5)
+        
+        License
+        -------
+        
+        Since parts of the code is derived and/or copied from the SWORD project
+        (see canons.py) which is GPL2, this code is also under the GPL2 license.
+        
+        Installation
+        ------------
+        
+        PySwords source code can be downloaded from PySwords `release list <https://gitlab.com/tgc-dk/pysword/tags>`_,
+        but it is also available from `PyPI <https://pypi.python.org/pypi/pysword/>`_
+        for install using ``pip`` or ``easy_install``.
+        It also available for `ArchLinux (AUR) <https://aur.archlinux.org/packages/?K=pysword>`_,
+        and will soon be available as a package in Debian and Fedora.
+        
+        Example code
+        ------------
+        
+        Use modules from default datapath
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        .. code:: python
+        
+            from pysword.modules import SwordModules
+            # Find available modules/bibles in standard data path.
+            # For non-standard data path, pass it as an argument to the SwordModules constructor.
+            modules = SwordModules()
+            # In this case we'll assume the modules found is something like:
+            # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+            found_modules = modules.parse_modules()
+            bible = modules.get_bible_from_module('KJV')
+            # Get John chapter 3 verse 16
+            output = bible.get(books=['john'], chapters=[3], verses=[16])
+        
+        Load module from zip-file
+        ~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        .. code:: python
+        
+            from pysword.modules import SwordModules
+            # Load module in zip
+            # NB: the zip content is only available as long as the SwordModules object exists
+            modules = SwordModules('KJV.zip')
+            # In this case the module found is:
+            # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+            found_modules = modules.parse_modules()
+            bible = modules.get_bible_from_module('KJV')
+            # Get John chapter 3 verse 16
+            output = bible.get(books=['john'], chapters=[3], verses=[16])
+        
+        Manually create bible
+        ~~~~~~~~~~~~~~~~~~~~~
+        
+        .. code:: python
+        
+            from pysword.bible import SwordBible
+            # Create the bible. The arguments are:
+            # SwordBible(<module path>, <module type>, <versification>, <encoding>, <text formatting>)
+            # Only the first is required, the rest have default values which should work in most cases.
+            bible = SwordBible('/home/me/.sword/modules/texts/ztext/kjv/', 'ztext', 'default', 'utf8', 'OSIS')
+            # Get John chapter 3 verse 16
+            output = bible.get(books=['john'], chapters=[3], verses=[16])
+        
+        Run tests
+        ---------
+        
+        To run the testsuite, first run the script that download the files used
+        for testing, and then use nosetests to run the testsuite:
+        
+        .. code:: sh
+        
+            $ python tests/resources/download_bibles.py
+            $ nosetests -v tests/
+        
+        The tests should run and pass using both python 2 and 3.
+        
+        Module formats
+        --------------
+        
+        I'll use Python's struct module's format strings to describe byte
+        formatting. See https://docs.python.org/3/library/struct.html
+        
+        There are current 4 formats for bible modules in SWORD.
+        
+        ztext format documentation
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        Take the Old Testament (OT) for example. Three files:
+        
+        -  ot.bzv: Maps verses to character ranges in compressed buffers. 10
+           bytes ('<IIH') for each verse in the Bible:
+        
+           -  buffer\_num (I): which compressed buffer the verse is located in
+           -  verse\_start (I): the location in the uncompressed buffer where
+              the verse begins
+           -  verse\_len (H): length of the verse, in uncompressed characters
+        
+        These 10-byte records are densely packed, indexed by VerseKey 'Indicies'
+        (docs later). So the record for the verse with index x starts at byte
+        10\*x.
+        
+        -  ot.bzs: Tells where the compressed buffers start and end. 12 bytes
+           ('<III') for each compressed buffer:
+        
+           -  offset (I): where the compressed buffer starts in the file
+           -  size (I): the length of the compressed data, in bytes
+           -  uc\_size (I): the length of the uncompressed data, in bytes
+              (unused)
+        
+        These 12-byte records are densely packed, indexed by buffer\_num (see
+        previous). So the record for compressed buffer buffer\_num starts at
+        byte 12\*buffer\_num.
+        
+        -  ot.bzz: Contains the compressed text. Read 'size' bytes starting at
+           'offset'.
+        
+        ztext4 format documentation
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        ztext4 is the same as ztext, except that in the bzv-file the verse\_len
+        is now represented by 4-byte integer (I), making the record 12 bytes in
+        all.
+        
+        rawtext format documentation
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        Again OT example. Two files:
+        
+        -  ot.vss: Maps verses to character ranges in text file. 6 bytes ('<IH')
+           for each verse in the Bible:
+        
+           -  verse\_start (I): the location in the textfile where the verse
+              begins
+           -  verse\_len (H): length of the verse, in characters
+        
+        -  ot: Contains the text. Read 'verse\_len' characters starting at
+           'verse\_start'.
+        
+        rawtext4 format documentation
+        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        
+        rawtext4 is the same as rawtext, except that in the vss-file the
+        verse\_len is now represented by 4-byte integer (I), making the record 8
+        bytes in all.
+        
+Platform: any
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Religion
+Classifier: Intended Audience :: Developers
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Religion
+Classifier: Topic :: Software Development
+Classifier: License :: OSI Approved :: GNU General Public License v2 (GPLv2)
diff --git a/pysword.egg-info/SOURCES.txt b/pysword.egg-info/SOURCES.txt
new file mode 100644
index 0000000..4351f91
--- /dev/null
+++ b/pysword.egg-info/SOURCES.txt
@@ -0,0 +1,13 @@
+README.rst
+setup.py
+pysword/__init__.py
+pysword/bible.py
+pysword/books.py
+pysword/canon-parser.py
+pysword/canons.py
+pysword/cleaner.py
+pysword/modules.py
+pysword.egg-info/PKG-INFO
+pysword.egg-info/SOURCES.txt
+pysword.egg-info/dependency_links.txt
+pysword.egg-info/top_level.txt
\ No newline at end of file
diff --git a/pysword.egg-info/dependency_links.txt b/pysword.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/pysword.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/pysword.egg-info/top_level.txt b/pysword.egg-info/top_level.txt
new file mode 100644
index 0000000..41d3e59
--- /dev/null
+++ b/pysword.egg-info/top_level.txt
@@ -0,0 +1 @@
+pysword
diff --git a/pysword/__init__.py b/pysword/__init__.py
new file mode 100644
index 0000000..b0f8c57
--- /dev/null
+++ b/pysword/__init__.py
@@ -0,0 +1,21 @@
+###############################################################################
+# PySword - A native Python reader of the SWORD Project Bible Modules         #
+# --------------------------------------------------------------------------- #
+# Copyright (c) 2008-2016 Various developers:                                 #
+# Kenneth Arnold, Joshua Gross, Ryan Hiebert, Matthew Wardrop, Tomas Groth    #
+# --------------------------------------------------------------------------- #
+# This program is free software; you can redistribute it and/or modify it     #
+# under the terms of the GNU General Public License as published by the Free  #
+# Software Foundation; version 2 of the License.                              #
+#                                                                             #
+# This program is distributed in the hope that it will be useful, but WITHOUT #
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       #
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for    #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU General Public License along     #
+# with this program; if not, write to the Free Software Foundation, Inc., 51  #
+# Franklin St, Fifth Floor, Boston, MA 02110-1301 USA                         #
+###############################################################################
+
+__all__ = ['books', 'modules', 'bible', 'cleaner', 'canons']
diff --git a/pysword/bible.py b/pysword/bible.py
new file mode 100644
index 0000000..1c7ac6b
--- /dev/null
+++ b/pysword/bible.py
@@ -0,0 +1,236 @@
+###############################################################################
+# PySword - A native Python reader of the SWORD Project Bible Modules         #
+# --------------------------------------------------------------------------- #
+# Copyright (c) 2008-2016 Various developers:                                 #
+# Kenneth Arnold, Joshua Gross, Ryan Hiebert, Matthew Wardrop, Tomas Groth    #
+# --------------------------------------------------------------------------- #
+# This program is free software; you can redistribute it and/or modify it     #
+# under the terms of the GNU General Public License as published by the Free  #
+# Software Foundation; version 2 of the License.                              #
+#                                                                             #
+# This program is distributed in the hope that it will be useful, but WITHOUT #
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       #
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for    #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU General Public License along     #
+# with this program; if not, write to the Free Software Foundation, Inc., 51  #
+# Franklin St, Fifth Floor, Boston, MA 02110-1301 USA                         #
+###############################################################################
+
+import os
+import struct
+import zlib
+
+from pysword.books import BibleStructure
+from pysword.cleaner import OSISCleaner, GBFCleaner, ThMLCleaner
+
+
+class SwordModuleType:
+    RAWTEXT = u'rawtext'
+    ZTEXT = u'ztext'
+    RAWTEXT4 = u'rawtext4'
+    ZTEXT4 = u'ztext4'
+
+
+class SwordBible(object):
+
+    def __init__(self, module_path, module_type=SwordModuleType.ZTEXT, versification=u'default', encoding=None,
+                 source_type=u'OSIS'):
+        """
+        Initialize the SwordBible object.
+        :param module_path: Path to SWORD modules datapath.
+        :param module_type: Types as defined by SwordModuleType, defaults to 'ztext'.
+        :param versification: Versification used for bible, defaults to 'default'.
+        :param encoding: Encoding used by the bible, should be either 'utf-8' or 'latin1'.
+        :param source_type: Type of (possible) tags in the text, can be 'OSIS', 'GBF' or 'ThML'.
+        :raise IOError: If files cannot be opened.
+        :raise ValueError: If unknown module_type is supplied.
+        """
+        self._module_type = module_type.lower()
+        self._module_path = module_path
+        self._files = {}
+
+        # Open the files needed to read from the module
+        if self._module_type in (SwordModuleType.ZTEXT, SwordModuleType.ZTEXT4):
+            try:
+                self._files[u'ot'] = self._get_ztext_files(u'ot')
+            except IOError:
+                pass
+            try:
+                self._files[u'nt'] = self._get_ztext_files(u'nt')
+            except IOError:
+                pass
+        elif self._module_type in (SwordModuleType.RAWTEXT, SwordModuleType.RAWTEXT4):
+            try:
+                self._files[u'ot'] = self._get_rawtext_files(u'ot')
+            except IOError:
+                pass
+            try:
+                self._files[u'nt'] = self._get_rawtext_files(u'nt')
+            except IOError:
+                pass
+        else:
+            raise ValueError(u'Invalid module type: %s' % module_type)
+        if u'ot' not in self._files and u'nt' not in self._files is None:
+            raise IOError(u'Could not open OT or NT for module')
+
+        # Load the bible structure
+        testaments = self._files.keys()
+        self._structure = BibleStructure(versification, testaments)
+
+        # Set verse record format and size
+        if self._module_type == SwordModuleType.ZTEXT:
+            self._verse_record_format = u'<IIH'
+            self._verse_record_size = 10
+        elif self._module_type == SwordModuleType.ZTEXT4:
+            self._verse_record_format = u'<III'
+            self._verse_record_size = 12
+        elif self._module_type == SwordModuleType.RAWTEXT:
+            self._verse_record_format = u'<IH'
+            self._verse_record_size = 6
+        elif self._module_type == SwordModuleType.RAWTEXT4:
+            self._verse_record_format = u'<II'
+            self._verse_record_size = 8
+
+        # Detect text-encoding if none given
+        if encoding is None:
+            # pick the first available testament for testing
+            testament = self._files.keys()[0]
+            if self._module_type in (SwordModuleType.ZTEXT, SwordModuleType.ZTEXT4):
+                undecoded_text = self._uncompressed_text(testament, 0)
+            else:
+                undecoded_text = self._files[testament][1].read(4096)
+            # Try to decode to utf-8, if it fails we fallback to latin1
+            try:
+                undecoded_text.decode()
+                self._encoding = u'utf-8'
+            except UnicodeDecodeError:
+                self._encoding = u'latin1'
+        else:
+            self._encoding = encoding
+        # Create cleaner to remove OSIS or GBF tags
+        if source_type:
+            if source_type.upper() == u'THML':
+                self._cleaner = ThMLCleaner()
+            elif source_type.upper() == u'GBF':
+                self._cleaner = GBFCleaner()
+            else:
+                self._cleaner = OSISCleaner()
+        else:
+            self._cleaner = OSISCleaner()
+
+    def _get_ztext_files(self, testament):
+        """
+        Given a testament ('ot' or 'nt'), returns a tuple of files (verse_to_buf, buf_to_loc, text)
+        :param testament: 'ot' or 'nt'
+        :return: returns a tuple of files (verse_to_buf, buf_to_loc, text)
+        """
+        v2b_name, b2l_name, text_name = [os.path.join(self._module_path,
+                                                      u'%s.bz%s' % (testament, code))
+                                         for code in (u'v', u's', u'z')]
+        return [open(name, u'rb') for name in (v2b_name, b2l_name, text_name)]
+
+    def _get_rawtext_files(self, testament):
+        """
+        "Given a testament ('ot' or 'nt'), returns a tuple of files (verse_to_loc, text)
+        :param testament: 'ot' or 'nt'
+        :return: returns a tuple of files (verse_to_loc, text)
+        """
+        v2l_name = os.path.join(self._module_path, u'%s.vss' % testament)
+        text_name = os.path.join(self._module_path, u'%s' % testament)
+        return [open(name, u'rb') for name in (v2l_name, text_name)]
+
+    def _ztext_for_index(self, testament, index):
+        """
+        Get the ztext for a given index.
+        :param testament: 'ot' or 'nt'
+        :param index: Verse buffer to read
+        :return: the text.
+        """
+        verse_to_buf, buf_to_loc, text = self._files[testament]
+
+        # Read the verse record.
+        verse_to_buf.seek(self._verse_record_size*index)
+        buf_num, verse_start, verse_len = struct.unpack(self._verse_record_format,
+                                                        verse_to_buf.read(self._verse_record_size))
+        uncompressed_text = self._uncompressed_text(testament, buf_num)
+        return uncompressed_text[verse_start:verse_start+verse_len].decode(self._encoding, errors=u'replace')
+
+    def _uncompressed_text(self, testament, buf_num):
+        """
+        Decompress ztext at given position.
+        :param testament: 'ot' or 'nt'
+        :param buf_num: Buffer to read
+        :return: The decompressed text
+        """
+        verse_to_buf, buf_to_loc, text = self._files[testament]
+
+        # Determine where the compressed data starts and ends.
+        buf_to_loc.seek(buf_num*12)
+        offset, size, uc_size = struct.unpack(u'<III', buf_to_loc.read(12))
+
+        # Get the compressed data.
+        text.seek(offset)
+        compressed_data = text.read(size)
+        return zlib.decompress(compressed_data)
+
+    def _rawtext_for_index(self, testament, index):
+        """
+        Get the rawtext for a given index.
+        :param testament: 'ot' or 'nt'
+        :param index: Verse buffer to read
+        :return: the text.
+        """
+        verse_to_loc, text = self._files[testament]
+
+        # Read the verse record.
+        verse_to_loc.seek(self._verse_record_size*index)
+        verse_start, verse_len = struct.unpack(self._verse_record_format, verse_to_loc.read(self._verse_record_size))
+        text.seek(verse_start)
+        return text.read(verse_len).decode(self._encoding, errors=u'replace')
+
+    # USER FACING #################################################################################
+    def get_iter(self, books=None, chapters=None, verses=None, clean=True):
+        """
+        Retrieve the text for a given reference as a dict.
+        :param books: Single book name or an array of book names
+        :param chapters: Single chapter number or an array of chapter numbers
+        :param verses: Single verse number or an array of verse numbers
+        :param clean: True for cleaning text for tags, False to keep them.
+        :return: iterator for the dict that contains the text
+        """
+        indicies = self._structure.ref_to_indicies(books=books, chapters=chapters, verses=verses)
+
+        for testament, idxs in indicies.items():
+            for idx in idxs:
+                if self._module_type in (SwordModuleType.ZTEXT, SwordModuleType.ZTEXT4):
+                    text = self._ztext_for_index(testament, idx)
+                else:
+                    text = self._rawtext_for_index(testament, idx)
+                if text is None:
+                    continue
+                if clean and self._cleaner and '<' in text:
+                    text = self._cleaner.clean(text)
+                yield text
+
+    def get(self, books=None, chapters=None, verses=None, clean=True, join='\n'):
+        """
+        Retrieve the text for a given reference.
+        :param books: Single book name or an array of book names
+        :param chapters: Single chapter number or an array of chapter numbers
+        :param verses: Single verse number or an array of verse numbers
+        :param clean: True for cleaning text for tags, False to keep them.
+        :param join: The char/string that should be used to mark a new verse, defaults to '\n'
+        :return: the text for the reference.
+        """
+        output = []
+        output.extend(list(self.get_iter(books=books, chapters=chapters, verses=verses, clean=clean)))
+        return join.join(output)
+
+    def get_structure(self):
+        """
+        Retrieve the structure of this bible.
+        :return: BibleStructure of this bible
+        """
+        return self._structure
diff --git a/pysword/books.py b/pysword/books.py
new file mode 100644
index 0000000..47c1f78
--- /dev/null
+++ b/pysword/books.py
@@ -0,0 +1,191 @@
+###############################################################################
+# PySword - A native Python reader of the SWORD Project Bible Modules         #
+# --------------------------------------------------------------------------- #
+# Copyright (c) 2008-2016 Various developers:                                 #
+# Kenneth Arnold, Joshua Gross, Ryan Hiebert, Matthew Wardrop, Tomas Groth    #
+# --------------------------------------------------------------------------- #
+# This program is free software; you can redistribute it and/or modify it     #
+# under the terms of the GNU General Public License as published by the Free  #
+# Software Foundation; version 2 of the License.                              #
+#                                                                             #
+# This program is distributed in the hope that it will be useful, but WITHOUT #
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       #
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for    #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU General Public License along     #
+# with this program; if not, write to the Free Software Foundation, Inc., 51  #
+# Franklin St, Fifth Floor, Boston, MA 02110-1301 USA                         #
+###############################################################################
+
+from pysword.canons import canons
+
+
+class BookStructure(object):
+    def __init__(self, name, osis_name, preferred_abbreviation, chapter_lengths):
+        """
+        :param name: Full English name of book
+        :param osis_name: Abbreviation of book
+        :param preferred_abbreviation: Preferred abbreviation of book
+        :param chapter_lengths: List containing the number of verses for each chapter.
+        """
+        self.name = name
+        self.osis_name = osis_name
+        self.preferred_abbreviation = preferred_abbreviation
+        self.chapter_lengths = chapter_lengths
+        self.num_chapters = len(chapter_lengths)
+
+    def __repr__(self):
+        return u'Book(%s)' % self.name
+
+    def name_matches(self, name):
+        """
+        Check if a name matches the name of this book.
+        :param name: The name to match
+        :return: True if matching else False
+        """
+        name = name.lower()
+        return name in [self.name.lower(), self.osis_name.lower(), self.preferred_abbreviation.lower()]
+
+    def chapter_offset(self, chapter_index):
+        """
+        Get offset based on chapter
+        :param chapter_index: The chapter index to calculate from.
+        :return: The calculated offset.
+        """
+        # Add chapter lengths to this point; plus 1 for every chapter title; plus 1 for book title
+        return sum(self.chapter_lengths[:chapter_index]) + (chapter_index + 1) + 1
+
+    def get_indicies(self, chapters=None, verses=None, offset=0):
+        """
+        Get indicies for given chapter(s) and verse(s).
+        :param chapters: Single chapter number or an array of chapter numbers
+        :param verses: Single verse number or an array of verse numbers
+        :param offset: The offset to used for this book when reading from file.
+        :return: An array of indicies.
+        """
+        if chapters is None:
+            chapters = list(range(1, self.num_chapters+1))
+        elif isinstance(chapters, int):
+            chapters = [chapters]
+        if len(chapters) != 1:
+            verses = None
+        elif isinstance(verses, int):
+            verses = [verses]
+
+        refs = []
+        for chapter in chapters:
+            if chapter > self.num_chapters:
+                raise ValueError(u'Book "%s" only have %d chapters.' % (self.name, self.num_chapters))
+            if verses is None:
+                tmp_verses = list(range(1, self.chapter_lengths[chapter-1]+1))
+            else:
+                tmp_verses = verses
+            if tmp_verses[-1] > self.chapter_lengths[chapter-1]:
+                raise ValueError(u'Book "%s", chapter %d, only have %d verses.' %
+                                 (self.name, chapter, self.chapter_lengths[chapter-1]))
+            refs.extend([offset + self.chapter_offset(chapter-1) + verse-1 for verse in tmp_verses])
+        return refs
+
+    @property
+    def size(self):
+        """
+        Size of book.
+        """
+        # Total verses + chapter heading for each chapter + 1 for book title
+        return sum(self.chapter_lengths) + len(self.chapter_lengths) + 1
+
+
+class BibleStructure(object):
+
+    def __init__(self, versification, testaments=[u'ot', u'nt']):
+        """
+        Initialize structure based on the versification.
+        :param versification: The versification to use.
+        :param testaments: List of testaments in this bible, must be 'ot' and/or 'nt'
+        """
+        self._book_offsets = None  # offsets within sections
+        self._books = {}
+
+        # Find the canon used. The canons are original defined in SWORD header files.
+        if versification not in canons.keys():
+            raise ValueError('The versification "%s" is unknown!' % versification)
+        else:
+            canon = canons[versification]
+
+        # Based on the canon create the BookStructure objects needed
+        for testament in testaments:
+            self._books[testament] = []
+            for book in canon[testament]:
+                self._books[testament].append(BookStructure(*book))
+
+    def _update_book_offsets(self):
+        """
+        Compute index offsets and add other data
+        """
+        # FIXME: this is still a little hairy.
... 2732 lines suppressed ...

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/pysword.git



More information about the Python-modules-commits mailing list