[Python-modules-commits] [pysword] 01/03: Import pysword_0.2.1.orig.tar.gz
Raoul Snyman
superfly-guest at moszumanska.debian.org
Tue Apr 19 14:25:53 UTC 2016
This is an automated email from the git hooks/post-receive script.
superfly-guest pushed a commit to branch master
in repository pysword.
commit 3e0082df2e3f3b2aaf2d60aad73e9fcb0d601c85
Author: Raoul Snyman <raoul at snyman.info>
Date: Tue Apr 19 16:23:08 2016 +0200
Import pysword_0.2.1.orig.tar.gz
---
PKG-INFO | 181 +++
README.rst | 162 +++
pysword.egg-info/PKG-INFO | 181 +++
pysword.egg-info/SOURCES.txt | 13 +
pysword.egg-info/dependency_links.txt | 1 +
pysword.egg-info/top_level.txt | 1 +
pysword/__init__.py | 21 +
pysword/bible.py | 236 ++++
pysword/books.py | 191 +++
pysword/canon-parser.py | 119 ++
pysword/canons.py | 2200 +++++++++++++++++++++++++++++++++
pysword/cleaner.py | 149 +++
pysword/modules.py | 126 ++
setup.cfg | 5 +
setup.py | 32 +
15 files changed, 3618 insertions(+)
diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..c076dee
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,181 @@
+Metadata-Version: 1.1
+Name: pysword
+Version: 0.2.1
+Summary: A native Python2/3 reader module for the SWORD Project Bible Modules
+Home-page: https://gitlab.com/tgc-dk/pysword
+Author: Tomas Groth
+Author-email: tomasgroth at yahoo.dk
+License: GPL2
+Description: A native Python reader of the SWORD Project Bible Modules
+
+ This project is **not** an official `CrossWire <http://crosswire.org/>`_
+ project. It merely provides an alternative way to read the bible modules
+ created by CrossWires `SWORD <http://crosswire.org/sword/index.jsp>`_ project.
+
+ Features
+ --------
+
+ - Read SWORD bibles (not commentaries etc.)
+ - Detection of locally installed bible modules.
+ - Supports all known SWORD module formats (ztext, ztext4, rawtext,
+ rawtext4)
+ - Read from zipped modules, like those available from
+ http://www.crosswire.org/sword/modules/ModDisp.jsp?modType=Bibles
+ - Clean text of OSIS, GBF or ThML tags.
+ - Supports both python 2 and 3 (tested with 2.7 and 3.5)
+
+ License
+ -------
+
+ Since parts of the code is derived and/or copied from the SWORD project
+ (see canons.py) which is GPL2, this code is also under the GPL2 license.
+
+ Installation
+ ------------
+
+ PySwords source code can be downloaded from PySwords `release list <https://gitlab.com/tgc-dk/pysword/tags>`_,
+ but it is also available from `PyPI <https://pypi.python.org/pypi/pysword/>`_
+ for install using ``pip`` or ``easy_install``.
+ It also available for `ArchLinux (AUR) <https://aur.archlinux.org/packages/?K=pysword>`_,
+ and will soon be available as a package in Debian and Fedora.
+
+ Example code
+ ------------
+
+ Use modules from default datapath
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ .. code:: python
+
+ from pysword.modules import SwordModules
+ # Find available modules/bibles in standard data path.
+ # For non-standard data path, pass it as an argument to the SwordModules constructor.
+ modules = SwordModules()
+ # In this case we'll assume the modules found is something like:
+ # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+ found_modules = modules.parse_modules()
+ bible = modules.get_bible_from_module('KJV')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+ Load module from zip-file
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ .. code:: python
+
+ from pysword.modules import SwordModules
+ # Load module in zip
+ # NB: the zip content is only available as long as the SwordModules object exists
+ modules = SwordModules('KJV.zip')
+ # In this case the module found is:
+ # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+ found_modules = modules.parse_modules()
+ bible = modules.get_bible_from_module('KJV')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+ Manually create bible
+ ~~~~~~~~~~~~~~~~~~~~~
+
+ .. code:: python
+
+ from pysword.bible import SwordBible
+ # Create the bible. The arguments are:
+ # SwordBible(<module path>, <module type>, <versification>, <encoding>, <text formatting>)
+ # Only the first is required, the rest have default values which should work in most cases.
+ bible = SwordBible('/home/me/.sword/modules/texts/ztext/kjv/', 'ztext', 'default', 'utf8', 'OSIS')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+ Run tests
+ ---------
+
+ To run the testsuite, first run the script that download the files used
+ for testing, and then use nosetests to run the testsuite:
+
+ .. code:: sh
+
+ $ python tests/resources/download_bibles.py
+ $ nosetests -v tests/
+
+ The tests should run and pass using both python 2 and 3.
+
+ Module formats
+ --------------
+
+ I'll use Python's struct module's format strings to describe byte
+ formatting. See https://docs.python.org/3/library/struct.html
+
+ There are current 4 formats for bible modules in SWORD.
+
+ ztext format documentation
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Take the Old Testament (OT) for example. Three files:
+
+ - ot.bzv: Maps verses to character ranges in compressed buffers. 10
+ bytes ('<IIH') for each verse in the Bible:
+
+ - buffer\_num (I): which compressed buffer the verse is located in
+ - verse\_start (I): the location in the uncompressed buffer where
+ the verse begins
+ - verse\_len (H): length of the verse, in uncompressed characters
+
+ These 10-byte records are densely packed, indexed by VerseKey 'Indicies'
+ (docs later). So the record for the verse with index x starts at byte
+ 10\*x.
+
+ - ot.bzs: Tells where the compressed buffers start and end. 12 bytes
+ ('<III') for each compressed buffer:
+
+ - offset (I): where the compressed buffer starts in the file
+ - size (I): the length of the compressed data, in bytes
+ - uc\_size (I): the length of the uncompressed data, in bytes
+ (unused)
+
+ These 12-byte records are densely packed, indexed by buffer\_num (see
+ previous). So the record for compressed buffer buffer\_num starts at
+ byte 12\*buffer\_num.
+
+ - ot.bzz: Contains the compressed text. Read 'size' bytes starting at
+ 'offset'.
+
+ ztext4 format documentation
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ ztext4 is the same as ztext, except that in the bzv-file the verse\_len
+ is now represented by 4-byte integer (I), making the record 12 bytes in
+ all.
+
+ rawtext format documentation
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Again OT example. Two files:
+
+ - ot.vss: Maps verses to character ranges in text file. 6 bytes ('<IH')
+ for each verse in the Bible:
+
+ - verse\_start (I): the location in the textfile where the verse
+ begins
+ - verse\_len (H): length of the verse, in characters
+
+ - ot: Contains the text. Read 'verse\_len' characters starting at
+ 'verse\_start'.
+
+ rawtext4 format documentation
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ rawtext4 is the same as rawtext, except that in the vss-file the
+ verse\_len is now represented by 4-byte integer (I), making the record 8
+ bytes in all.
+
+Platform: any
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Religion
+Classifier: Intended Audience :: Developers
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Religion
+Classifier: Topic :: Software Development
+Classifier: License :: OSI Approved :: GNU General Public License v2 (GPLv2)
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..babeade
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,162 @@
+A native Python reader of the SWORD Project Bible Modules
+
+This project is **not** an official `CrossWire <http://crosswire.org/>`_
+project. It merely provides an alternative way to read the bible modules
+created by CrossWires `SWORD <http://crosswire.org/sword/index.jsp>`_ project.
+
+Features
+--------
+
+- Read SWORD bibles (not commentaries etc.)
+- Detection of locally installed bible modules.
+- Supports all known SWORD module formats (ztext, ztext4, rawtext,
+ rawtext4)
+- Read from zipped modules, like those available from
+ http://www.crosswire.org/sword/modules/ModDisp.jsp?modType=Bibles
+- Clean text of OSIS, GBF or ThML tags.
+- Supports both python 2 and 3 (tested with 2.7 and 3.5)
+
+License
+-------
+
+Since parts of the code is derived and/or copied from the SWORD project
+(see canons.py) which is GPL2, this code is also under the GPL2 license.
+
+Installation
+------------
+
+PySwords source code can be downloaded from PySwords `release list <https://gitlab.com/tgc-dk/pysword/tags>`_,
+but it is also available from `PyPI <https://pypi.python.org/pypi/pysword/>`_
+for install using ``pip`` or ``easy_install``.
+It also available for `ArchLinux (AUR) <https://aur.archlinux.org/packages/?K=pysword>`_,
+and will soon be available as a package in Debian and Fedora.
+
+Example code
+------------
+
+Use modules from default datapath
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: python
+
+ from pysword.modules import SwordModules
+ # Find available modules/bibles in standard data path.
+ # For non-standard data path, pass it as an argument to the SwordModules constructor.
+ modules = SwordModules()
+ # In this case we'll assume the modules found is something like:
+ # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+ found_modules = modules.parse_modules()
+ bible = modules.get_bible_from_module('KJV')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+Load module from zip-file
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: python
+
+ from pysword.modules import SwordModules
+ # Load module in zip
+ # NB: the zip content is only available as long as the SwordModules object exists
+ modules = SwordModules('KJV.zip')
+ # In this case the module found is:
+ # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+ found_modules = modules.parse_modules()
+ bible = modules.get_bible_from_module('KJV')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+Manually create bible
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: python
+
+ from pysword.bible import SwordBible
+ # Create the bible. The arguments are:
+ # SwordBible(<module path>, <module type>, <versification>, <encoding>, <text formatting>)
+ # Only the first is required, the rest have default values which should work in most cases.
+ bible = SwordBible('/home/me/.sword/modules/texts/ztext/kjv/', 'ztext', 'default', 'utf8', 'OSIS')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+Run tests
+---------
+
+To run the testsuite, first run the script that download the files used
+for testing, and then use nosetests to run the testsuite:
+
+.. code:: sh
+
+ $ python tests/resources/download_bibles.py
+ $ nosetests -v tests/
+
+The tests should run and pass using both python 2 and 3.
+
+Module formats
+--------------
+
+I'll use Python's struct module's format strings to describe byte
+formatting. See https://docs.python.org/3/library/struct.html
+
+There are current 4 formats for bible modules in SWORD.
+
+ztext format documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Take the Old Testament (OT) for example. Three files:
+
+- ot.bzv: Maps verses to character ranges in compressed buffers. 10
+ bytes ('<IIH') for each verse in the Bible:
+
+ - buffer\_num (I): which compressed buffer the verse is located in
+ - verse\_start (I): the location in the uncompressed buffer where
+ the verse begins
+ - verse\_len (H): length of the verse, in uncompressed characters
+
+These 10-byte records are densely packed, indexed by VerseKey 'Indicies'
+(docs later). So the record for the verse with index x starts at byte
+10\*x.
+
+- ot.bzs: Tells where the compressed buffers start and end. 12 bytes
+ ('<III') for each compressed buffer:
+
+ - offset (I): where the compressed buffer starts in the file
+ - size (I): the length of the compressed data, in bytes
+ - uc\_size (I): the length of the uncompressed data, in bytes
+ (unused)
+
+These 12-byte records are densely packed, indexed by buffer\_num (see
+previous). So the record for compressed buffer buffer\_num starts at
+byte 12\*buffer\_num.
+
+- ot.bzz: Contains the compressed text. Read 'size' bytes starting at
+ 'offset'.
+
+ztext4 format documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ztext4 is the same as ztext, except that in the bzv-file the verse\_len
+is now represented by 4-byte integer (I), making the record 12 bytes in
+all.
+
+rawtext format documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Again OT example. Two files:
+
+- ot.vss: Maps verses to character ranges in text file. 6 bytes ('<IH')
+ for each verse in the Bible:
+
+ - verse\_start (I): the location in the textfile where the verse
+ begins
+ - verse\_len (H): length of the verse, in characters
+
+- ot: Contains the text. Read 'verse\_len' characters starting at
+ 'verse\_start'.
+
+rawtext4 format documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+rawtext4 is the same as rawtext, except that in the vss-file the
+verse\_len is now represented by 4-byte integer (I), making the record 8
+bytes in all.
diff --git a/pysword.egg-info/PKG-INFO b/pysword.egg-info/PKG-INFO
new file mode 100644
index 0000000..c076dee
--- /dev/null
+++ b/pysword.egg-info/PKG-INFO
@@ -0,0 +1,181 @@
+Metadata-Version: 1.1
+Name: pysword
+Version: 0.2.1
+Summary: A native Python2/3 reader module for the SWORD Project Bible Modules
+Home-page: https://gitlab.com/tgc-dk/pysword
+Author: Tomas Groth
+Author-email: tomasgroth at yahoo.dk
+License: GPL2
+Description: A native Python reader of the SWORD Project Bible Modules
+
+ This project is **not** an official `CrossWire <http://crosswire.org/>`_
+ project. It merely provides an alternative way to read the bible modules
+ created by CrossWires `SWORD <http://crosswire.org/sword/index.jsp>`_ project.
+
+ Features
+ --------
+
+ - Read SWORD bibles (not commentaries etc.)
+ - Detection of locally installed bible modules.
+ - Supports all known SWORD module formats (ztext, ztext4, rawtext,
+ rawtext4)
+ - Read from zipped modules, like those available from
+ http://www.crosswire.org/sword/modules/ModDisp.jsp?modType=Bibles
+ - Clean text of OSIS, GBF or ThML tags.
+ - Supports both python 2 and 3 (tested with 2.7 and 3.5)
+
+ License
+ -------
+
+ Since parts of the code is derived and/or copied from the SWORD project
+ (see canons.py) which is GPL2, this code is also under the GPL2 license.
+
+ Installation
+ ------------
+
+ PySwords source code can be downloaded from PySwords `release list <https://gitlab.com/tgc-dk/pysword/tags>`_,
+ but it is also available from `PyPI <https://pypi.python.org/pypi/pysword/>`_
+ for install using ``pip`` or ``easy_install``.
+ It also available for `ArchLinux (AUR) <https://aur.archlinux.org/packages/?K=pysword>`_,
+ and will soon be available as a package in Debian and Fedora.
+
+ Example code
+ ------------
+
+ Use modules from default datapath
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ .. code:: python
+
+ from pysword.modules import SwordModules
+ # Find available modules/bibles in standard data path.
+ # For non-standard data path, pass it as an argument to the SwordModules constructor.
+ modules = SwordModules()
+ # In this case we'll assume the modules found is something like:
+ # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+ found_modules = modules.parse_modules()
+ bible = modules.get_bible_from_module('KJV')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+ Load module from zip-file
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ .. code:: python
+
+ from pysword.modules import SwordModules
+ # Load module in zip
+ # NB: the zip content is only available as long as the SwordModules object exists
+ modules = SwordModules('KJV.zip')
+ # In this case the module found is:
+ # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+ found_modules = modules.parse_modules()
+ bible = modules.get_bible_from_module('KJV')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+ Manually create bible
+ ~~~~~~~~~~~~~~~~~~~~~
+
+ .. code:: python
+
+ from pysword.bible import SwordBible
+ # Create the bible. The arguments are:
+ # SwordBible(<module path>, <module type>, <versification>, <encoding>, <text formatting>)
+ # Only the first is required, the rest have default values which should work in most cases.
+ bible = SwordBible('/home/me/.sword/modules/texts/ztext/kjv/', 'ztext', 'default', 'utf8', 'OSIS')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+ Run tests
+ ---------
+
+ To run the testsuite, first run the script that download the files used
+ for testing, and then use nosetests to run the testsuite:
+
+ .. code:: sh
+
+ $ python tests/resources/download_bibles.py
+ $ nosetests -v tests/
+
+ The tests should run and pass using both python 2 and 3.
+
+ Module formats
+ --------------
+
+ I'll use Python's struct module's format strings to describe byte
+ formatting. See https://docs.python.org/3/library/struct.html
+
+ There are current 4 formats for bible modules in SWORD.
+
+ ztext format documentation
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Take the Old Testament (OT) for example. Three files:
+
+ - ot.bzv: Maps verses to character ranges in compressed buffers. 10
+ bytes ('<IIH') for each verse in the Bible:
+
+ - buffer\_num (I): which compressed buffer the verse is located in
+ - verse\_start (I): the location in the uncompressed buffer where
+ the verse begins
+ - verse\_len (H): length of the verse, in uncompressed characters
+
+ These 10-byte records are densely packed, indexed by VerseKey 'Indicies'
+ (docs later). So the record for the verse with index x starts at byte
+ 10\*x.
+
+ - ot.bzs: Tells where the compressed buffers start and end. 12 bytes
+ ('<III') for each compressed buffer:
+
+ - offset (I): where the compressed buffer starts in the file
+ - size (I): the length of the compressed data, in bytes
+ - uc\_size (I): the length of the uncompressed data, in bytes
+ (unused)
+
+ These 12-byte records are densely packed, indexed by buffer\_num (see
+ previous). So the record for compressed buffer buffer\_num starts at
+ byte 12\*buffer\_num.
+
+ - ot.bzz: Contains the compressed text. Read 'size' bytes starting at
+ 'offset'.
+
+ ztext4 format documentation
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ ztext4 is the same as ztext, except that in the bzv-file the verse\_len
+ is now represented by 4-byte integer (I), making the record 12 bytes in
+ all.
+
+ rawtext format documentation
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Again OT example. Two files:
+
+ - ot.vss: Maps verses to character ranges in text file. 6 bytes ('<IH')
+ for each verse in the Bible:
+
+ - verse\_start (I): the location in the textfile where the verse
+ begins
+ - verse\_len (H): length of the verse, in characters
+
+ - ot: Contains the text. Read 'verse\_len' characters starting at
+ 'verse\_start'.
+
+ rawtext4 format documentation
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ rawtext4 is the same as rawtext, except that in the vss-file the
+ verse\_len is now represented by 4-byte integer (I), making the record 8
+ bytes in all.
+
+Platform: any
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Religion
+Classifier: Intended Audience :: Developers
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Religion
+Classifier: Topic :: Software Development
+Classifier: License :: OSI Approved :: GNU General Public License v2 (GPLv2)
diff --git a/pysword.egg-info/SOURCES.txt b/pysword.egg-info/SOURCES.txt
new file mode 100644
index 0000000..4351f91
--- /dev/null
+++ b/pysword.egg-info/SOURCES.txt
@@ -0,0 +1,13 @@
+README.rst
+setup.py
+pysword/__init__.py
+pysword/bible.py
+pysword/books.py
+pysword/canon-parser.py
+pysword/canons.py
+pysword/cleaner.py
+pysword/modules.py
+pysword.egg-info/PKG-INFO
+pysword.egg-info/SOURCES.txt
+pysword.egg-info/dependency_links.txt
+pysword.egg-info/top_level.txt
\ No newline at end of file
diff --git a/pysword.egg-info/dependency_links.txt b/pysword.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/pysword.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/pysword.egg-info/top_level.txt b/pysword.egg-info/top_level.txt
new file mode 100644
index 0000000..41d3e59
--- /dev/null
+++ b/pysword.egg-info/top_level.txt
@@ -0,0 +1 @@
+pysword
diff --git a/pysword/__init__.py b/pysword/__init__.py
new file mode 100644
index 0000000..b0f8c57
--- /dev/null
+++ b/pysword/__init__.py
@@ -0,0 +1,21 @@
+###############################################################################
+# PySword - A native Python reader of the SWORD Project Bible Modules #
+# --------------------------------------------------------------------------- #
+# Copyright (c) 2008-2016 Various developers: #
+# Kenneth Arnold, Joshua Gross, Ryan Hiebert, Matthew Wardrop, Tomas Groth #
+# --------------------------------------------------------------------------- #
+# This program is free software; you can redistribute it and/or modify it #
+# under the terms of the GNU General Public License as published by the Free #
+# Software Foundation; version 2 of the License. #
+# #
+# This program is distributed in the hope that it will be useful, but WITHOUT #
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
+# more details. #
+# #
+# You should have received a copy of the GNU General Public License along #
+# with this program; if not, write to the Free Software Foundation, Inc., 51 #
+# Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #
+###############################################################################
+
+__all__ = ['books', 'modules', 'bible', 'cleaner', 'canons']
diff --git a/pysword/bible.py b/pysword/bible.py
new file mode 100644
index 0000000..1c7ac6b
--- /dev/null
+++ b/pysword/bible.py
@@ -0,0 +1,236 @@
+###############################################################################
+# PySword - A native Python reader of the SWORD Project Bible Modules #
+# --------------------------------------------------------------------------- #
+# Copyright (c) 2008-2016 Various developers: #
+# Kenneth Arnold, Joshua Gross, Ryan Hiebert, Matthew Wardrop, Tomas Groth #
+# --------------------------------------------------------------------------- #
+# This program is free software; you can redistribute it and/or modify it #
+# under the terms of the GNU General Public License as published by the Free #
+# Software Foundation; version 2 of the License. #
+# #
+# This program is distributed in the hope that it will be useful, but WITHOUT #
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
+# more details. #
+# #
+# You should have received a copy of the GNU General Public License along #
+# with this program; if not, write to the Free Software Foundation, Inc., 51 #
+# Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #
+###############################################################################
+
+import os
+import struct
+import zlib
+
+from pysword.books import BibleStructure
+from pysword.cleaner import OSISCleaner, GBFCleaner, ThMLCleaner
+
+
+class SwordModuleType:
+ RAWTEXT = u'rawtext'
+ ZTEXT = u'ztext'
+ RAWTEXT4 = u'rawtext4'
+ ZTEXT4 = u'ztext4'
+
+
+class SwordBible(object):
+
+ def __init__(self, module_path, module_type=SwordModuleType.ZTEXT, versification=u'default', encoding=None,
+ source_type=u'OSIS'):
+ """
+ Initialize the SwordBible object.
+ :param module_path: Path to SWORD modules datapath.
+ :param module_type: Types as defined by SwordModuleType, defaults to 'ztext'.
+ :param versification: Versification used for bible, defaults to 'default'.
+ :param encoding: Encoding used by the bible, should be either 'utf-8' or 'latin1'.
+ :param source_type: Type of (possible) tags in the text, can be 'OSIS', 'GBF' or 'ThML'.
+ :raise IOError: If files cannot be opened.
+ :raise ValueError: If unknown module_type is supplied.
+ """
+ self._module_type = module_type.lower()
+ self._module_path = module_path
+ self._files = {}
+
+ # Open the files needed to read from the module
+ if self._module_type in (SwordModuleType.ZTEXT, SwordModuleType.ZTEXT4):
+ try:
+ self._files[u'ot'] = self._get_ztext_files(u'ot')
+ except IOError:
+ pass
+ try:
+ self._files[u'nt'] = self._get_ztext_files(u'nt')
+ except IOError:
+ pass
+ elif self._module_type in (SwordModuleType.RAWTEXT, SwordModuleType.RAWTEXT4):
+ try:
+ self._files[u'ot'] = self._get_rawtext_files(u'ot')
+ except IOError:
+ pass
+ try:
+ self._files[u'nt'] = self._get_rawtext_files(u'nt')
+ except IOError:
+ pass
+ else:
+ raise ValueError(u'Invalid module type: %s' % module_type)
+ if u'ot' not in self._files and u'nt' not in self._files is None:
+ raise IOError(u'Could not open OT or NT for module')
+
+ # Load the bible structure
+ testaments = self._files.keys()
+ self._structure = BibleStructure(versification, testaments)
+
+ # Set verse record format and size
+ if self._module_type == SwordModuleType.ZTEXT:
+ self._verse_record_format = u'<IIH'
+ self._verse_record_size = 10
+ elif self._module_type == SwordModuleType.ZTEXT4:
+ self._verse_record_format = u'<III'
+ self._verse_record_size = 12
+ elif self._module_type == SwordModuleType.RAWTEXT:
+ self._verse_record_format = u'<IH'
+ self._verse_record_size = 6
+ elif self._module_type == SwordModuleType.RAWTEXT4:
+ self._verse_record_format = u'<II'
+ self._verse_record_size = 8
+
+ # Detect text-encoding if none given
+ if encoding is None:
+ # pick the first available testament for testing
+ testament = self._files.keys()[0]
+ if self._module_type in (SwordModuleType.ZTEXT, SwordModuleType.ZTEXT4):
+ undecoded_text = self._uncompressed_text(testament, 0)
+ else:
+ undecoded_text = self._files[testament][1].read(4096)
+ # Try to decode to utf-8, if it fails we fallback to latin1
+ try:
+ undecoded_text.decode()
+ self._encoding = u'utf-8'
+ except UnicodeDecodeError:
+ self._encoding = u'latin1'
+ else:
+ self._encoding = encoding
+ # Create cleaner to remove OSIS or GBF tags
+ if source_type:
+ if source_type.upper() == u'THML':
+ self._cleaner = ThMLCleaner()
+ elif source_type.upper() == u'GBF':
+ self._cleaner = GBFCleaner()
+ else:
+ self._cleaner = OSISCleaner()
+ else:
+ self._cleaner = OSISCleaner()
+
+ def _get_ztext_files(self, testament):
+ """
+ Given a testament ('ot' or 'nt'), returns a tuple of files (verse_to_buf, buf_to_loc, text)
+ :param testament: 'ot' or 'nt'
+ :return: returns a tuple of files (verse_to_buf, buf_to_loc, text)
+ """
+ v2b_name, b2l_name, text_name = [os.path.join(self._module_path,
+ u'%s.bz%s' % (testament, code))
+ for code in (u'v', u's', u'z')]
+ return [open(name, u'rb') for name in (v2b_name, b2l_name, text_name)]
+
+ def _get_rawtext_files(self, testament):
+ """
+ "Given a testament ('ot' or 'nt'), returns a tuple of files (verse_to_loc, text)
+ :param testament: 'ot' or 'nt'
+ :return: returns a tuple of files (verse_to_loc, text)
+ """
+ v2l_name = os.path.join(self._module_path, u'%s.vss' % testament)
+ text_name = os.path.join(self._module_path, u'%s' % testament)
+ return [open(name, u'rb') for name in (v2l_name, text_name)]
+
+ def _ztext_for_index(self, testament, index):
+ """
+ Get the ztext for a given index.
+ :param testament: 'ot' or 'nt'
+ :param index: Verse buffer to read
+ :return: the text.
+ """
+ verse_to_buf, buf_to_loc, text = self._files[testament]
+
+ # Read the verse record.
+ verse_to_buf.seek(self._verse_record_size*index)
+ buf_num, verse_start, verse_len = struct.unpack(self._verse_record_format,
+ verse_to_buf.read(self._verse_record_size))
+ uncompressed_text = self._uncompressed_text(testament, buf_num)
+ return uncompressed_text[verse_start:verse_start+verse_len].decode(self._encoding, errors=u'replace')
+
+ def _uncompressed_text(self, testament, buf_num):
+ """
+ Decompress ztext at given position.
+ :param testament: 'ot' or 'nt'
+ :param buf_num: Buffer to read
+ :return: The decompressed text
+ """
+ verse_to_buf, buf_to_loc, text = self._files[testament]
+
+ # Determine where the compressed data starts and ends.
+ buf_to_loc.seek(buf_num*12)
+ offset, size, uc_size = struct.unpack(u'<III', buf_to_loc.read(12))
+
+ # Get the compressed data.
+ text.seek(offset)
+ compressed_data = text.read(size)
+ return zlib.decompress(compressed_data)
+
+ def _rawtext_for_index(self, testament, index):
+ """
+ Get the rawtext for a given index.
+ :param testament: 'ot' or 'nt'
+ :param index: Verse buffer to read
+ :return: the text.
+ """
+ verse_to_loc, text = self._files[testament]
+
+ # Read the verse record.
+ verse_to_loc.seek(self._verse_record_size*index)
+ verse_start, verse_len = struct.unpack(self._verse_record_format, verse_to_loc.read(self._verse_record_size))
+ text.seek(verse_start)
+ return text.read(verse_len).decode(self._encoding, errors=u'replace')
+
+ # USER FACING #################################################################################
+ def get_iter(self, books=None, chapters=None, verses=None, clean=True):
+ """
+ Retrieve the text for a given reference as a dict.
+ :param books: Single book name or an array of book names
+ :param chapters: Single chapter number or an array of chapter numbers
+ :param verses: Single verse number or an array of verse numbers
+ :param clean: True for cleaning text for tags, False to keep them.
+ :return: iterator for the dict that contains the text
+ """
+ indicies = self._structure.ref_to_indicies(books=books, chapters=chapters, verses=verses)
+
+ for testament, idxs in indicies.items():
+ for idx in idxs:
+ if self._module_type in (SwordModuleType.ZTEXT, SwordModuleType.ZTEXT4):
+ text = self._ztext_for_index(testament, idx)
+ else:
+ text = self._rawtext_for_index(testament, idx)
+ if text is None:
+ continue
+ if clean and self._cleaner and '<' in text:
+ text = self._cleaner.clean(text)
+ yield text
+
+ def get(self, books=None, chapters=None, verses=None, clean=True, join='\n'):
+ """
+ Retrieve the text for a given reference.
+ :param books: Single book name or an array of book names
+ :param chapters: Single chapter number or an array of chapter numbers
+ :param verses: Single verse number or an array of verse numbers
+ :param clean: True for cleaning text for tags, False to keep them.
+ :param join: The char/string that should be used to mark a new verse, defaults to '\n'
+ :return: the text for the reference.
+ """
+ output = []
+ output.extend(list(self.get_iter(books=books, chapters=chapters, verses=verses, clean=clean)))
+ return join.join(output)
+
+ def get_structure(self):
+ """
+ Retrieve the structure of this bible.
+ :return: BibleStructure of this bible
+ """
+ return self._structure
diff --git a/pysword/books.py b/pysword/books.py
new file mode 100644
index 0000000..47c1f78
--- /dev/null
+++ b/pysword/books.py
@@ -0,0 +1,191 @@
+###############################################################################
+# PySword - A native Python reader of the SWORD Project Bible Modules #
+# --------------------------------------------------------------------------- #
+# Copyright (c) 2008-2016 Various developers: #
+# Kenneth Arnold, Joshua Gross, Ryan Hiebert, Matthew Wardrop, Tomas Groth #
+# --------------------------------------------------------------------------- #
+# This program is free software; you can redistribute it and/or modify it #
+# under the terms of the GNU General Public License as published by the Free #
+# Software Foundation; version 2 of the License. #
+# #
+# This program is distributed in the hope that it will be useful, but WITHOUT #
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
+# more details. #
+# #
+# You should have received a copy of the GNU General Public License along #
+# with this program; if not, write to the Free Software Foundation, Inc., 51 #
+# Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #
+###############################################################################
+
+from pysword.canons import canons
+
+
+class BookStructure(object):
+ def __init__(self, name, osis_name, preferred_abbreviation, chapter_lengths):
+ """
+ :param name: Full English name of book
+ :param osis_name: Abbreviation of book
+ :param preferred_abbreviation: Preferred abbreviation of book
+ :param chapter_lengths: List containing the number of verses for each chapter.
+ """
+ self.name = name
+ self.osis_name = osis_name
+ self.preferred_abbreviation = preferred_abbreviation
+ self.chapter_lengths = chapter_lengths
+ self.num_chapters = len(chapter_lengths)
+
+ def __repr__(self):
+ return u'Book(%s)' % self.name
+
+ def name_matches(self, name):
+ """
+ Check if a name matches the name of this book.
+ :param name: The name to match
+ :return: True if matching else False
+ """
+ name = name.lower()
+ return name in [self.name.lower(), self.osis_name.lower(), self.preferred_abbreviation.lower()]
+
+ def chapter_offset(self, chapter_index):
+ """
+ Get offset based on chapter
+ :param chapter_index: The chapter index to calculate from.
+ :return: The calculated offset.
+ """
+ # Add chapter lengths to this point; plus 1 for every chapter title; plus 1 for book title
+ return sum(self.chapter_lengths[:chapter_index]) + (chapter_index + 1) + 1
+
+ def get_indicies(self, chapters=None, verses=None, offset=0):
+ """
+ Get indicies for given chapter(s) and verse(s).
+ :param chapters: Single chapter number or an array of chapter numbers
+ :param verses: Single verse number or an array of verse numbers
+ :param offset: The offset to used for this book when reading from file.
+ :return: An array of indicies.
+ """
+ if chapters is None:
+ chapters = list(range(1, self.num_chapters+1))
+ elif isinstance(chapters, int):
+ chapters = [chapters]
+ if len(chapters) != 1:
+ verses = None
+ elif isinstance(verses, int):
+ verses = [verses]
+
+ refs = []
+ for chapter in chapters:
+ if chapter > self.num_chapters:
+ raise ValueError(u'Book "%s" only have %d chapters.' % (self.name, self.num_chapters))
+ if verses is None:
+ tmp_verses = list(range(1, self.chapter_lengths[chapter-1]+1))
+ else:
+ tmp_verses = verses
+ if tmp_verses[-1] > self.chapter_lengths[chapter-1]:
+ raise ValueError(u'Book "%s", chapter %d, only have %d verses.' %
+ (self.name, chapter, self.chapter_lengths[chapter-1]))
+ refs.extend([offset + self.chapter_offset(chapter-1) + verse-1 for verse in tmp_verses])
+ return refs
+
+ @property
+ def size(self):
+ """
+ Size of book.
+ """
+ # Total verses + chapter heading for each chapter + 1 for book title
+ return sum(self.chapter_lengths) + len(self.chapter_lengths) + 1
+
+
+class BibleStructure(object):
+
+ def __init__(self, versification, testaments=[u'ot', u'nt']):
+ """
+ Initialize structure based on the versification.
+ :param versification: The versification to use.
+ :param testaments: List of testaments in this bible, must be 'ot' and/or 'nt'
+ """
+ self._book_offsets = None # offsets within sections
+ self._books = {}
+
+ # Find the canon used. The canons are original defined in SWORD header files.
+ if versification not in canons.keys():
+ raise ValueError('The versification "%s" is unknown!' % versification)
+ else:
+ canon = canons[versification]
+
+ # Based on the canon create the BookStructure objects needed
+ for testament in testaments:
+ self._books[testament] = []
+ for book in canon[testament]:
+ self._books[testament].append(BookStructure(*book))
+
+ def _update_book_offsets(self):
+ """
+ Compute index offsets and add other data
+ """
+ # FIXME: this is still a little hairy.
... 2732 lines suppressed ...
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/pysword.git
More information about the Python-modules-commits
mailing list