[Python-modules-commits] [pysword] 01/03: import pysword_0.2.3.orig.tar.gz
Raoul Snyman
superfly-guest at moszumanska.debian.org
Mon Mar 6 22:30:25 UTC 2017
This is an automated email from the git hooks/post-receive script.
superfly-guest pushed a commit to branch master
in repository pysword.
commit 8c39b0b17567d67357843c47e33e6dabf78a2330
Author: Raoul Snyman <raoul at snyman.info>
Date: Mon Mar 6 14:56:26 2017 -0700
import pysword_0.2.3.orig.tar.gz
---
PKG-INFO | 188 +++
README.rst | 169 +++
debian/changelog | 6 +
debian/compat | 1 +
debian/control | 49 +
debian/copyright | 57 +
debian/rules | 11 +
debian/source/format | 1 +
debian/watch | 3 +
pysword.egg-info/PKG-INFO | 188 +++
pysword.egg-info/SOURCES.txt | 13 +
pysword.egg-info/dependency_links.txt | 1 +
pysword.egg-info/top_level.txt | 1 +
pysword/__init__.py | 21 +
pysword/bible.py | 236 ++++
pysword/books.py | 196 +++
pysword/canon-parser.py | 119 ++
pysword/canons.py | 2200 +++++++++++++++++++++++++++++++++
pysword/cleaner.py | 150 +++
pysword/modules.py | 126 ++
setup.cfg | 5 +
setup.py | 32 +
22 files changed, 3773 insertions(+)
diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..cb3ff78
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,188 @@
+Metadata-Version: 1.1
+Name: pysword
+Version: 0.2.3
+Summary: A native Python2/3 reader module for the SWORD Project Bible Modules
+Home-page: https://gitlab.com/tgc-dk/pysword
+Author: Tomas Groth
+Author-email: second at tgc.dk
+License: GPL2
+Description: A native Python reader of the SWORD Project Bible Modules
+
+ This project is **not** an official `CrossWire <http://crosswire.org/>`_
+ project. It merely provides an alternative way to read the bible modules
+ created by CrossWires `SWORD <http://crosswire.org/sword/index.jsp>`_ project.
+
+ Features
+ --------
+
+ - Read SWORD bibles (not commentaries etc.)
+ - Detection of locally installed bible modules.
+ - Supports all known SWORD module formats (ztext, ztext4, rawtext,
+ rawtext4)
+ - Read from zipped modules, like those available from
+ http://www.crosswire.org/sword/modules/ModDisp.jsp?modType=Bibles
+ - Clean text of OSIS, GBF or ThML tags.
+ - Supports both python 2 and 3 (tested with 2.7 and 3.5)
+
+ License
+ -------
+
+ Since parts of the code is derived and/or copied from the SWORD project
+ (see canons.py) which is GPL2, this code is also under the GPL2 license.
+
+ Installation
+ ------------
+
+ PySwords source code can be downloaded from PySwords `release list <https://gitlab.com/tgc-dk/pysword/tags>`_,
+ but it is also available from `PyPI <https://pypi.python.org/pypi/pysword/>`_
+ for install using ``pip`` or ``easy_install``.
+ It also available for `ArchLinux (AUR) <https://aur.archlinux.org/packages/?K=pysword>`_,
+ and will soon be available as a package in Debian and Fedora.
+
+ Example code
+ ------------
+
+ Use modules from default datapath
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ .. code:: python
+
+ from pysword.modules import SwordModules
+ # Find available modules/bibles in standard data path.
+ # For non-standard data path, pass it as an argument to the SwordModules constructor.
+ modules = SwordModules()
+ # In this case we'll assume the modules found is something like:
+ # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+ found_modules = modules.parse_modules()
+ bible = modules.get_bible_from_module('KJV')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+ Load module from zip-file
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ .. code:: python
+
+ from pysword.modules import SwordModules
+ # Load module in zip
+ # NB: the zip content is only available as long as the SwordModules object exists
+ modules = SwordModules('KJV.zip')
+ # In this case the module found is:
+ # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+ found_modules = modules.parse_modules()
+ bible = modules.get_bible_from_module('KJV')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+ Manually create bible
+ ~~~~~~~~~~~~~~~~~~~~~
+
+ .. code:: python
+
+ from pysword.bible import SwordBible
+ # Create the bible. The arguments are:
+ # SwordBible(<module path>, <module type>, <versification>, <encoding>, <text formatting>)
+ # Only the first is required, the rest have default values which should work in most cases.
+ bible = SwordBible('/home/me/.sword/modules/texts/ztext/kjv/', 'ztext', 'default', 'utf8', 'OSIS')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+ Run tests
+ ---------
+
+ To run the testsuite, first run the script that download the files used
+ for testing, and then use nosetests to run the testsuite:
+
+ .. code:: sh
+
+ $ python tests/resources/download_bibles.py
+ $ nosetests -v tests/
+
+ The tests should run and pass using both python 2 and 3.
+
+ Contributing
+ ------------
+
+ If you want to contribute, you are most welcome to do so!
+ Feel free to report issues and create merge request at https://gitlab.com/tgc-dk/pysword
+ If you create a merge request please include a test the proves that your code actually works.
+
+ Module formats
+ --------------
+
+ I'll use Python's struct module's format strings to describe byte
+ formatting. See https://docs.python.org/3/library/struct.html
+
+ There are current 4 formats for bible modules in SWORD.
+
+ ztext format documentation
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Take the Old Testament (OT) for example. Three files:
+
+ - ot.bzv: Maps verses to character ranges in compressed buffers. 10
+ bytes ('<IIH') for each verse in the Bible:
+
+ - buffer\_num (I): which compressed buffer the verse is located in
+ - verse\_start (I): the location in the uncompressed buffer where
+ the verse begins
+ - verse\_len (H): length of the verse, in uncompressed characters
+
+ These 10-byte records are densely packed, indexed by VerseKey 'Indicies'
+ (docs later). So the record for the verse with index x starts at byte
+ 10\*x.
+
+ - ot.bzs: Tells where the compressed buffers start and end. 12 bytes
+ ('<III') for each compressed buffer:
+
+ - offset (I): where the compressed buffer starts in the file
+ - size (I): the length of the compressed data, in bytes
+ - uc\_size (I): the length of the uncompressed data, in bytes
+ (unused)
+
+ These 12-byte records are densely packed, indexed by buffer\_num (see
+ previous). So the record for compressed buffer buffer\_num starts at
+ byte 12\*buffer\_num.
+
+ - ot.bzz: Contains the compressed text. Read 'size' bytes starting at
+ 'offset'.
+
+ ztext4 format documentation
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ ztext4 is the same as ztext, except that in the bzv-file the verse\_len
+ is now represented by 4-byte integer (I), making the record 12 bytes in
+ all.
+
+ rawtext format documentation
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Again OT example. Two files:
+
+ - ot.vss: Maps verses to character ranges in text file. 6 bytes ('<IH')
+ for each verse in the Bible:
+
+ - verse\_start (I): the location in the textfile where the verse
+ begins
+ - verse\_len (H): length of the verse, in characters
+
+ - ot: Contains the text. Read 'verse\_len' characters starting at
+ 'verse\_start'.
+
+ rawtext4 format documentation
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ rawtext4 is the same as rawtext, except that in the vss-file the
+ verse\_len is now represented by 4-byte integer (I), making the record 8
+ bytes in all.
+
+Platform: any
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Religion
+Classifier: Intended Audience :: Developers
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Religion
+Classifier: Topic :: Software Development
+Classifier: License :: OSI Approved :: GNU General Public License v2 (GPLv2)
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..70d0e1e
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,169 @@
+A native Python reader of the SWORD Project Bible Modules
+
+This project is **not** an official `CrossWire <http://crosswire.org/>`_
+project. It merely provides an alternative way to read the bible modules
+created by CrossWires `SWORD <http://crosswire.org/sword/index.jsp>`_ project.
+
+Features
+--------
+
+- Read SWORD bibles (not commentaries etc.)
+- Detection of locally installed bible modules.
+- Supports all known SWORD module formats (ztext, ztext4, rawtext,
+ rawtext4)
+- Read from zipped modules, like those available from
+ http://www.crosswire.org/sword/modules/ModDisp.jsp?modType=Bibles
+- Clean text of OSIS, GBF or ThML tags.
+- Supports both python 2 and 3 (tested with 2.7 and 3.5)
+
+License
+-------
+
+Since parts of the code is derived and/or copied from the SWORD project
+(see canons.py) which is GPL2, this code is also under the GPL2 license.
+
+Installation
+------------
+
+PySwords source code can be downloaded from PySwords `release list <https://gitlab.com/tgc-dk/pysword/tags>`_,
+but it is also available from `PyPI <https://pypi.python.org/pypi/pysword/>`_
+for install using ``pip`` or ``easy_install``.
+It also available for `ArchLinux (AUR) <https://aur.archlinux.org/packages/?K=pysword>`_,
+and will soon be available as a package in Debian and Fedora.
+
+Example code
+------------
+
+Use modules from default datapath
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: python
+
+ from pysword.modules import SwordModules
+ # Find available modules/bibles in standard data path.
+ # For non-standard data path, pass it as an argument to the SwordModules constructor.
+ modules = SwordModules()
+ # In this case we'll assume the modules found is something like:
+ # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+ found_modules = modules.parse_modules()
+ bible = modules.get_bible_from_module('KJV')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+Load module from zip-file
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: python
+
+ from pysword.modules import SwordModules
+ # Load module in zip
+ # NB: the zip content is only available as long as the SwordModules object exists
+ modules = SwordModules('KJV.zip')
+ # In this case the module found is:
+ # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+ found_modules = modules.parse_modules()
+ bible = modules.get_bible_from_module('KJV')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+Manually create bible
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: python
+
+ from pysword.bible import SwordBible
+ # Create the bible. The arguments are:
+ # SwordBible(<module path>, <module type>, <versification>, <encoding>, <text formatting>)
+ # Only the first is required, the rest have default values which should work in most cases.
+ bible = SwordBible('/home/me/.sword/modules/texts/ztext/kjv/', 'ztext', 'default', 'utf8', 'OSIS')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+Run tests
+---------
+
+To run the testsuite, first run the script that download the files used
+for testing, and then use nosetests to run the testsuite:
+
+.. code:: sh
+
+ $ python tests/resources/download_bibles.py
+ $ nosetests -v tests/
+
+The tests should run and pass using both python 2 and 3.
+
+Contributing
+------------
+
+If you want to contribute, you are most welcome to do so!
+Feel free to report issues and create merge request at https://gitlab.com/tgc-dk/pysword
+If you create a merge request please include a test the proves that your code actually works.
+
+Module formats
+--------------
+
+I'll use Python's struct module's format strings to describe byte
+formatting. See https://docs.python.org/3/library/struct.html
+
+There are current 4 formats for bible modules in SWORD.
+
+ztext format documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Take the Old Testament (OT) for example. Three files:
+
+- ot.bzv: Maps verses to character ranges in compressed buffers. 10
+ bytes ('<IIH') for each verse in the Bible:
+
+ - buffer\_num (I): which compressed buffer the verse is located in
+ - verse\_start (I): the location in the uncompressed buffer where
+ the verse begins
+ - verse\_len (H): length of the verse, in uncompressed characters
+
+These 10-byte records are densely packed, indexed by VerseKey 'Indicies'
+(docs later). So the record for the verse with index x starts at byte
+10\*x.
+
+- ot.bzs: Tells where the compressed buffers start and end. 12 bytes
+ ('<III') for each compressed buffer:
+
+ - offset (I): where the compressed buffer starts in the file
+ - size (I): the length of the compressed data, in bytes
+ - uc\_size (I): the length of the uncompressed data, in bytes
+ (unused)
+
+These 12-byte records are densely packed, indexed by buffer\_num (see
+previous). So the record for compressed buffer buffer\_num starts at
+byte 12\*buffer\_num.
+
+- ot.bzz: Contains the compressed text. Read 'size' bytes starting at
+ 'offset'.
+
+ztext4 format documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ztext4 is the same as ztext, except that in the bzv-file the verse\_len
+is now represented by 4-byte integer (I), making the record 12 bytes in
+all.
+
+rawtext format documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Again OT example. Two files:
+
+- ot.vss: Maps verses to character ranges in text file. 6 bytes ('<IH')
+ for each verse in the Bible:
+
+ - verse\_start (I): the location in the textfile where the verse
+ begins
+ - verse\_len (H): length of the verse, in characters
+
+- ot: Contains the text. Read 'verse\_len' characters starting at
+ 'verse\_start'.
+
+rawtext4 format documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+rawtext4 is the same as rawtext, except that in the vss-file the
+verse\_len is now represented by 4-byte integer (I), making the record 8
+bytes in all.
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 0000000..0e83d16
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,6 @@
+pysword (0.2.3-1) unstable; urgency=medium
+
+ * Redo package and repository after making a mess
+ * Initial release. (Closes: #821461)
+
+ -- Raoul Snyman <raoul at snyman.info> Mon, 06 Mar 2017 14:48:14 -0700
diff --git a/debian/compat b/debian/compat
new file mode 100644
index 0000000..ec63514
--- /dev/null
+++ b/debian/compat
@@ -0,0 +1 @@
+9
diff --git a/debian/control b/debian/control
new file mode 100644
index 0000000..d24b599
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,49 @@
+Source: pysword
+Maintainer: Debian Python Modules Team <python-modules-team at lists.alioth.debian.org>
+Uploaders: Raoul Snyman <raoul at snyman.info>
+Section: python
+Priority: optional
+Build-Depends: debhelper (>= 9),
+ dh-python,
+ python-all (>= 2.6.6-3),
+ python-setuptools,
+ python3-all,
+ python3-setuptools
+Standards-Version: 3.9.8
+Homepage: https://gitlab.com/tgc-dk/pysword
+Vcs-Git: https://anonscm.debian.org/git/python-modules/packages/pysword.git
+Vcs-Browser: https://anonscm.debian.org/git/python-modules/packages/pysword.git
+
+Package: python-pysword
+Architecture: all
+Depends: ${misc:Depends}, ${python:Depends}
+Description: native Python reader module for the SWORD Project (Python 2)
+ This project is not an official CrossWire project. It merely provides an
+ alternative way to read the bible modules created by CrossWire's SWORD
+ project.
+ .
+ Features:
+ * Read SWORD bibles (not commentaries etc.)
+ * Detection of locally installed bible modules.
+ * Supports all known SWORD module formats (ztext, ztext4, rawtext, rawtext4)
+ * Read from zipped modules
+ * Clean text of OSIS, GBF or ThML tags.
+ .
+ This package contains the Python 2 bindings.
+
+Package: python3-pysword
+Architecture: all
+Depends: ${misc:Depends}, ${python3:Depends}
+Description: native Python reader module for the SWORD Project (Python 3)
+ This project is not an official CrossWire project. It merely provides an
+ alternative way to read the bible modules created by CrossWire's SWORD
+ project.
+ .
+ Features:
+ * Read SWORD bibles (not commentaries etc.)
+ * Detection of locally installed bible modules.
+ * Supports all known SWORD module formats (ztext, ztext4, rawtext, rawtext4)
+ * Read from zipped modules
+ * Clean text of OSIS, GBF or ThML tags.
+ .
+ This package contains the Python 3 bindings.
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 0000000..158c31e
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1,57 @@
+Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: pysword
+Source: https://gitlab.com/tgc-dk/pysword
+
+Files: *
+Copyright: 2008-2016 Kenneth Arnold <kcarnold at mit.edu>
+ 2008-2016 Joshua Gross <joshua.gross at gmail.com>
+ 2008-2016 Tomas Groth <tomasgroth at yahoo.dk>
+ 2008-2016 Matthew Wardrop <mister.wardrop at gmail.com>
+ 2008-2016 Ryan Hiebert
+License: GPL-2
+ This package is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 dated June, 1991.
+ .
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE. See the GNU General Public License for more
+ details.
+ .
+ You should have received a copy of the GNU General Public
+ License along with this package; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ Boston, MA 02110-1301 USA
+ .
+ On Debian systems, the full text of the GNU General Public
+ License version 2 can be found in the file
+ `/usr/share/common-licenses/GPL-2'.
+
+Files: debian/*
+Copyright: 2016 Unit 193 <unit193 at ubuntu.com>
+ 2016 Raoul Snyman <raoul at snyman.info>
+License: BSD-3-Clause
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ 3. Neither the name of the copyright holder nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+ .
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE HOLDERS OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 0000000..1756190
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,11 @@
+#!/usr/bin/make -f
+
+# Uncomment this to turn on verbose mode.
+#export DH_VERBOSE=1
+
+export PYBUILD_NAME=pysword
+export PYBUILD_DESTDIR_python2=debian/python-pysword/
+export PYBUILD_DESTDIR_python3=debian/python3-pysword/
+
+%:
+ dh $@ --with python2,python3 --buildsystem=pybuild
diff --git a/debian/source/format b/debian/source/format
new file mode 100644
index 0000000..163aaf8
--- /dev/null
+++ b/debian/source/format
@@ -0,0 +1 @@
+3.0 (quilt)
diff --git a/debian/watch b/debian/watch
new file mode 100644
index 0000000..92eac78
--- /dev/null
+++ b/debian/watch
@@ -0,0 +1,3 @@
+version=3
+opts=uversionmangle=s/(rc|a|b|c)/~$1/ \
+http://pypi.debian.net/pysword/pysword-(.+)\.(?:zip|tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz)))
diff --git a/pysword.egg-info/PKG-INFO b/pysword.egg-info/PKG-INFO
new file mode 100644
index 0000000..cb3ff78
--- /dev/null
+++ b/pysword.egg-info/PKG-INFO
@@ -0,0 +1,188 @@
+Metadata-Version: 1.1
+Name: pysword
+Version: 0.2.3
+Summary: A native Python2/3 reader module for the SWORD Project Bible Modules
+Home-page: https://gitlab.com/tgc-dk/pysword
+Author: Tomas Groth
+Author-email: second at tgc.dk
+License: GPL2
+Description: A native Python reader of the SWORD Project Bible Modules
+
+ This project is **not** an official `CrossWire <http://crosswire.org/>`_
+ project. It merely provides an alternative way to read the bible modules
+ created by CrossWires `SWORD <http://crosswire.org/sword/index.jsp>`_ project.
+
+ Features
+ --------
+
+ - Read SWORD bibles (not commentaries etc.)
+ - Detection of locally installed bible modules.
+ - Supports all known SWORD module formats (ztext, ztext4, rawtext,
+ rawtext4)
+ - Read from zipped modules, like those available from
+ http://www.crosswire.org/sword/modules/ModDisp.jsp?modType=Bibles
+ - Clean text of OSIS, GBF or ThML tags.
+ - Supports both python 2 and 3 (tested with 2.7 and 3.5)
+
+ License
+ -------
+
+ Since parts of the code is derived and/or copied from the SWORD project
+ (see canons.py) which is GPL2, this code is also under the GPL2 license.
+
+ Installation
+ ------------
+
+ PySwords source code can be downloaded from PySwords `release list <https://gitlab.com/tgc-dk/pysword/tags>`_,
+ but it is also available from `PyPI <https://pypi.python.org/pypi/pysword/>`_
+ for install using ``pip`` or ``easy_install``.
+ It also available for `ArchLinux (AUR) <https://aur.archlinux.org/packages/?K=pysword>`_,
+ and will soon be available as a package in Debian and Fedora.
+
+ Example code
+ ------------
+
+ Use modules from default datapath
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ .. code:: python
+
+ from pysword.modules import SwordModules
+ # Find available modules/bibles in standard data path.
+ # For non-standard data path, pass it as an argument to the SwordModules constructor.
+ modules = SwordModules()
+ # In this case we'll assume the modules found is something like:
+ # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+ found_modules = modules.parse_modules()
+ bible = modules.get_bible_from_module('KJV')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+ Load module from zip-file
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ .. code:: python
+
+ from pysword.modules import SwordModules
+ # Load module in zip
+ # NB: the zip content is only available as long as the SwordModules object exists
+ modules = SwordModules('KJV.zip')
+ # In this case the module found is:
+ # {'KJV': {'description': 'KingJamesVersion(1769)withStrongsNumbersandMorphology', 'encoding': 'UTF-8', ...}}
+ found_modules = modules.parse_modules()
+ bible = modules.get_bible_from_module('KJV')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+ Manually create bible
+ ~~~~~~~~~~~~~~~~~~~~~
+
+ .. code:: python
+
+ from pysword.bible import SwordBible
+ # Create the bible. The arguments are:
+ # SwordBible(<module path>, <module type>, <versification>, <encoding>, <text formatting>)
+ # Only the first is required, the rest have default values which should work in most cases.
+ bible = SwordBible('/home/me/.sword/modules/texts/ztext/kjv/', 'ztext', 'default', 'utf8', 'OSIS')
+ # Get John chapter 3 verse 16
+ output = bible.get(books=['john'], chapters=[3], verses=[16])
+
+ Run tests
+ ---------
+
+ To run the testsuite, first run the script that download the files used
+ for testing, and then use nosetests to run the testsuite:
+
+ .. code:: sh
+
+ $ python tests/resources/download_bibles.py
+ $ nosetests -v tests/
+
+ The tests should run and pass using both python 2 and 3.
+
+ Contributing
+ ------------
+
+ If you want to contribute, you are most welcome to do so!
+ Feel free to report issues and create merge request at https://gitlab.com/tgc-dk/pysword
+ If you create a merge request please include a test the proves that your code actually works.
+
+ Module formats
+ --------------
+
+ I'll use Python's struct module's format strings to describe byte
+ formatting. See https://docs.python.org/3/library/struct.html
+
+ There are current 4 formats for bible modules in SWORD.
+
+ ztext format documentation
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Take the Old Testament (OT) for example. Three files:
+
+ - ot.bzv: Maps verses to character ranges in compressed buffers. 10
+ bytes ('<IIH') for each verse in the Bible:
+
+ - buffer\_num (I): which compressed buffer the verse is located in
+ - verse\_start (I): the location in the uncompressed buffer where
+ the verse begins
+ - verse\_len (H): length of the verse, in uncompressed characters
+
+ These 10-byte records are densely packed, indexed by VerseKey 'Indicies'
+ (docs later). So the record for the verse with index x starts at byte
+ 10\*x.
+
+ - ot.bzs: Tells where the compressed buffers start and end. 12 bytes
+ ('<III') for each compressed buffer:
+
+ - offset (I): where the compressed buffer starts in the file
+ - size (I): the length of the compressed data, in bytes
+ - uc\_size (I): the length of the uncompressed data, in bytes
+ (unused)
+
+ These 12-byte records are densely packed, indexed by buffer\_num (see
+ previous). So the record for compressed buffer buffer\_num starts at
+ byte 12\*buffer\_num.
+
+ - ot.bzz: Contains the compressed text. Read 'size' bytes starting at
+ 'offset'.
+
+ ztext4 format documentation
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ ztext4 is the same as ztext, except that in the bzv-file the verse\_len
+ is now represented by 4-byte integer (I), making the record 12 bytes in
+ all.
+
+ rawtext format documentation
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Again OT example. Two files:
+
+ - ot.vss: Maps verses to character ranges in text file. 6 bytes ('<IH')
+ for each verse in the Bible:
+
+ - verse\_start (I): the location in the textfile where the verse
+ begins
+ - verse\_len (H): length of the verse, in characters
+
+ - ot: Contains the text. Read 'verse\_len' characters starting at
+ 'verse\_start'.
+
+ rawtext4 format documentation
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ rawtext4 is the same as rawtext, except that in the vss-file the
+ verse\_len is now represented by 4-byte integer (I), making the record 8
+ bytes in all.
+
+Platform: any
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Religion
+Classifier: Intended Audience :: Developers
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Religion
+Classifier: Topic :: Software Development
+Classifier: License :: OSI Approved :: GNU General Public License v2 (GPLv2)
diff --git a/pysword.egg-info/SOURCES.txt b/pysword.egg-info/SOURCES.txt
new file mode 100644
index 0000000..4351f91
--- /dev/null
+++ b/pysword.egg-info/SOURCES.txt
@@ -0,0 +1,13 @@
+README.rst
+setup.py
+pysword/__init__.py
+pysword/bible.py
+pysword/books.py
+pysword/canon-parser.py
+pysword/canons.py
+pysword/cleaner.py
+pysword/modules.py
+pysword.egg-info/PKG-INFO
+pysword.egg-info/SOURCES.txt
+pysword.egg-info/dependency_links.txt
+pysword.egg-info/top_level.txt
\ No newline at end of file
diff --git a/pysword.egg-info/dependency_links.txt b/pysword.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/pysword.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/pysword.egg-info/top_level.txt b/pysword.egg-info/top_level.txt
new file mode 100644
index 0000000..41d3e59
--- /dev/null
+++ b/pysword.egg-info/top_level.txt
@@ -0,0 +1 @@
+pysword
diff --git a/pysword/__init__.py b/pysword/__init__.py
new file mode 100644
index 0000000..b0f8c57
--- /dev/null
+++ b/pysword/__init__.py
@@ -0,0 +1,21 @@
+###############################################################################
+# PySword - A native Python reader of the SWORD Project Bible Modules #
+# --------------------------------------------------------------------------- #
+# Copyright (c) 2008-2016 Various developers: #
+# Kenneth Arnold, Joshua Gross, Ryan Hiebert, Matthew Wardrop, Tomas Groth #
+# --------------------------------------------------------------------------- #
+# This program is free software; you can redistribute it and/or modify it #
+# under the terms of the GNU General Public License as published by the Free #
+# Software Foundation; version 2 of the License. #
+# #
+# This program is distributed in the hope that it will be useful, but WITHOUT #
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
+# more details. #
+# #
+# You should have received a copy of the GNU General Public License along #
+# with this program; if not, write to the Free Software Foundation, Inc., 51 #
+# Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #
+###############################################################################
+
+__all__ = ['books', 'modules', 'bible', 'cleaner', 'canons']
diff --git a/pysword/bible.py b/pysword/bible.py
new file mode 100644
index 0000000..35ec643
--- /dev/null
+++ b/pysword/bible.py
@@ -0,0 +1,236 @@
+###############################################################################
+# PySword - A native Python reader of the SWORD Project Bible Modules #
+# --------------------------------------------------------------------------- #
+# Copyright (c) 2008-2016 Various developers: #
+# Kenneth Arnold, Joshua Gross, Ryan Hiebert, Matthew Wardrop, Tomas Groth #
+# --------------------------------------------------------------------------- #
+# This program is free software; you can redistribute it and/or modify it #
+# under the terms of the GNU General Public License as published by the Free #
+# Software Foundation; version 2 of the License. #
+# #
+# This program is distributed in the hope that it will be useful, but WITHOUT #
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
+# more details. #
+# #
+# You should have received a copy of the GNU General Public License along #
+# with this program; if not, write to the Free Software Foundation, Inc., 51 #
+# Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #
+###############################################################################
+
+import os
+import struct
+import zlib
+
+from pysword.books import BibleStructure
+from pysword.cleaner import OSISCleaner, GBFCleaner, ThMLCleaner
+
+
+class SwordModuleType:
+ RAWTEXT = u'rawtext'
+ ZTEXT = u'ztext'
+ RAWTEXT4 = u'rawtext4'
+ ZTEXT4 = u'ztext4'
+
+
+class SwordBible(object):
+
+ def __init__(self, module_path, module_type=SwordModuleType.ZTEXT, versification=u'default', encoding=None,
+ source_type=u'OSIS'):
+ """
+ Initialize the SwordBible object.
+ :param module_path: Path to SWORD modules datapath.
+ :param module_type: Types as defined by SwordModuleType, defaults to 'ztext'.
+ :param versification: Versification used for bible, defaults to 'default'.
+ :param encoding: Encoding used by the bible, should be either 'utf-8' or 'latin1'.
+ :param source_type: Type of (possible) tags in the text, can be 'OSIS', 'GBF' or 'ThML'.
+ :raise IOError: If files cannot be opened.
+ :raise ValueError: If unknown module_type is supplied.
+ """
+ self._module_type = module_type.lower()
+ self._module_path = module_path
+ self._files = {}
+
+ # Open the files needed to read from the module
+ if self._module_type in (SwordModuleType.ZTEXT, SwordModuleType.ZTEXT4):
+ try:
+ self._files[u'ot'] = self._get_ztext_files(u'ot')
+ except IOError:
+ pass
+ try:
+ self._files[u'nt'] = self._get_ztext_files(u'nt')
+ except IOError:
+ pass
+ elif self._module_type in (SwordModuleType.RAWTEXT, SwordModuleType.RAWTEXT4):
+ try:
+ self._files[u'ot'] = self._get_rawtext_files(u'ot')
+ except IOError:
+ pass
+ try:
+ self._files[u'nt'] = self._get_rawtext_files(u'nt')
+ except IOError:
+ pass
+ else:
+ raise ValueError(u'Invalid module type: %s' % module_type)
+ if u'ot' not in self._files and u'nt' not in self._files is None:
+ raise IOError(u'Could not open OT or NT for module')
+
+ # Load the bible structure
+ testaments = list(self._files)
+ self._structure = BibleStructure(versification, testaments)
+
+ # Set verse record format and size
+ if self._module_type == SwordModuleType.ZTEXT:
+ self._verse_record_format = u'<IIH'
+ self._verse_record_size = 10
+ elif self._module_type == SwordModuleType.ZTEXT4:
+ self._verse_record_format = u'<III'
+ self._verse_record_size = 12
+ elif self._module_type == SwordModuleType.RAWTEXT:
+ self._verse_record_format = u'<IH'
+ self._verse_record_size = 6
+ elif self._module_type == SwordModuleType.RAWTEXT4:
+ self._verse_record_format = u'<II'
+ self._verse_record_size = 8
+
+ # Detect text-encoding if none given
+ if encoding is None:
+ # pick the first available testament for testing
+ testament = list(self._files)[0]
+ if self._module_type in (SwordModuleType.ZTEXT, SwordModuleType.ZTEXT4):
+ undecoded_text = self._uncompressed_text(testament, 0)
+ else:
+ undecoded_text = self._files[testament][1].read(4096)
+ # Try to decode to utf-8, if it fails we fallback to latin1
+ try:
+ undecoded_text.decode()
+ self._encoding = u'utf-8'
+ except UnicodeDecodeError:
+ self._encoding = u'latin1'
+ else:
+ self._encoding = encoding
+ # Create cleaner to remove OSIS or GBF tags
+ if source_type:
+ if source_type.upper() == u'THML':
+ self._cleaner = ThMLCleaner()
+ elif source_type.upper() == u'GBF':
+ self._cleaner = GBFCleaner()
+ else:
+ self._cleaner = OSISCleaner()
+ else:
+ self._cleaner = OSISCleaner()
+
+ def _get_ztext_files(self, testament):
+ """
+ Given a testament ('ot' or 'nt'), returns a tuple of files (verse_to_buf, buf_to_loc, text)
+ :param testament: 'ot' or 'nt'
+ :return: returns a tuple of files (verse_to_buf, buf_to_loc, text)
+ """
+ v2b_name, b2l_name, text_name = [os.path.join(self._module_path,
+ u'%s.bz%s' % (testament, code))
+ for code in (u'v', u's', u'z')]
+ return [open(name, u'rb') for name in (v2b_name, b2l_name, text_name)]
+
+ def _get_rawtext_files(self, testament):
+ """
+ "Given a testament ('ot' or 'nt'), returns a tuple of files (verse_to_loc, text)
+ :param testament: 'ot' or 'nt'
+ :return: returns a tuple of files (verse_to_loc, text)
+ """
+ v2l_name = os.path.join(self._module_path, u'%s.vss' % testament)
+ text_name = os.path.join(self._module_path, u'%s' % testament)
+ return [open(name, u'rb') for name in (v2l_name, text_name)]
+
+ def _ztext_for_index(self, testament, index):
+ """
+ Get the ztext for a given index.
+ :param testament: 'ot' or 'nt'
+ :param index: Verse buffer to read
+ :return: the text.
+ """
+ verse_to_buf, buf_to_loc, text = self._files[testament]
+
+ # Read the verse record.
+ verse_to_buf.seek(self._verse_record_size*index)
+ buf_num, verse_start, verse_len = struct.unpack(self._verse_record_format,
+ verse_to_buf.read(self._verse_record_size))
+ uncompressed_text = self._uncompressed_text(testament, buf_num)
+ return uncompressed_text[verse_start:verse_start+verse_len].decode(self._encoding, errors=u'replace')
+
+ def _uncompressed_text(self, testament, buf_num):
+ """
+ Decompress ztext at given position.
+ :param testament: 'ot' or 'nt'
+ :param buf_num: Buffer to read
+ :return: The decompressed text
+ """
+ verse_to_buf, buf_to_loc, text = self._files[testament]
+
+ # Determine where the compressed data starts and ends.
+ buf_to_loc.seek(buf_num*12)
... 2936 lines suppressed ...
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/pysword.git
More information about the Python-modules-commits
mailing list