[med-svn] [python-latexcodec] 01/04: Imported Upstream version 1.0.1

Fri Oct 9 10:20:51 UTC 2015

This is an automated email from the git hooks/post-receive script.

daube-guest pushed a commit to branch master
in repository python-latexcodec.

commit 83b86f3e5938b89a36ccd7d37b58f0d01b4b714a
Author: Kevin Murray <spam at kdmurray.id.au>
Date:   Thu Oct 8 22:26:36 2015 +1100

    Imported Upstream version 1.0.1
---
 .travis.yml                  |  27 ++
 AUTHORS.rst                  |  26 ++
 CHANGELOG.rst                |  54 +++
 INSTALL.rst                  |  43 +++
 LICENSE.rst                  |  23 ++
 MANIFEST.in                  |  14 +
 README.rst                   |  32 ++
 VERSION                      |   1 +
 doc/Makefile                 | 153 ++++++++
 doc/_build/.gitignore        |   0
 doc/api.rst                  |   8 +
 doc/api/codec.rst            |   1 +
 doc/api/lexer.rst            |   1 +
 doc/authors.rst              |   5 +
 doc/changes.rst              |   7 +
 doc/conf.py                  |  41 +++
 doc/index.rst                |  25 ++
 doc/license.rst              |  11 +
 doc/make.bat                 | 190 ++++++++++
 doc/quickstart.rst           |  13 +
 latexcodec/__init__.py       |   2 +
 latexcodec/codec.py          | 810 +++++++++++++++++++++++++++++++++++++++++++
 latexcodec/lexer.py          | 420 ++++++++++++++++++++++
 requirements.txt             |   1 +
 setup.cfg                    |   8 +
 setup.py                     |  45 +++
 test/test_install_example.py |  19 +
 test/test_latex_codec.py     | 362 +++++++++++++++++++
 test/test_latex_lexer.py     | 442 +++++++++++++++++++++++
 29 files changed, 2784 insertions(+)

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..46a3160
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,27 @@
+language: python
+python:
+  - "3.4"
+  - "3.3"
+  - "2.7"
+  - "2.6"
+  - "pypy"
+branches:
+  only:
+    - develop
+install:
+  - "pip install ."
+  - "if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pip install coveralls check-manifest flake8 Sphinx; fi"
+script:
+  - "if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then check-manifest; fi"
+  - "if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then flake8; fi"
+  - "pushd doc"
+  - "if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then make html; fi"
+  - "popd"
+  - "pushd test"
+  - "if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then coverage run --source=latexcodec `type -p nosetests`; fi"
+  - "if [[ $TRAVIS_PYTHON_VERSION != '2.7' ]]; then nosetests; fi"
+  - "popd"
+after_success:
+  - "pushd test"
+  - "if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then coveralls; fi"
+  - "popd"
diff --git a/AUTHORS.rst b/AUTHORS.rst
new file mode 100644
index 0000000..d97e846
--- /dev/null
+++ b/AUTHORS.rst
@@ -0,0 +1,26 @@
+Main authors:
+
+* David Eppstein
+
+  - wrote the original LaTeX codec as a recipe on ActiveState
+    http://code.activestate.com/recipes/252124-latex-codec/
+
+* Peter Tröger
+
+  - wrote the original latexcodec package, which contained a simple
+    but very effective LaTeX encoder
+
+* Matthias Troffaes (matthias.troffaes at gmail.com)
+
+  - wrote the lexer
+
+  - integrated codec with the lexer for a simpler and more robust
+    design
+
+  - various bugfixes
+
+Contributors:
+
+* Michael Radziej
+
+* Philipp Spitzer
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
new file mode 100644
index 0000000..c9b7570
--- /dev/null
+++ b/CHANGELOG.rst
@@ -0,0 +1,54 @@
+1.0.1 (24 September 2014)
+-------------------------
+
+* br"\par" is now decoded using two newlines (see issue #26, reported
+  by Jorrit Wronski).
+
+* Fix encoding and decoding of the ogonek (see issue #24, reported by
+  beltiste).
+
+1.0.0 (5 August 2014)
+---------------------
+
+* Add Python 3.4 support.
+
+* Fix "DZ" decoding (see issue #21, reported and fixed by Philipp
+  Spitzer).
+
+0.3.2 (17 April 2014)
+---------------------
+
+* Fix underscore "\_" encoding (see issue #17, reported and fixed by
+  Michael Radziej).
+
+0.3.1 (5 February 2014)
+-----------------------
+
+* Drop Python 3.2 support.
+
+* Drop 2to3 and instead use six to support both Python 2 and 3 from a
+  single code base.
+
+* Fix control space "\ " decoding.
+
+* Fix LaTeX encoding of number sign "#" and other special ascii
+  characters (see issues #11 and #13, reported by beltiste).
+
+0.3.0 (19 August 2013)
+----------------------
+
+* Copied lexer and codec from sphinxcontrib-bibtex.
+
+* Initial usage and API documentation.
+
+* Some small bugs fixed.
+
+0.2 (28 September 2012)
+-----------------------
+
+* Adding additional codec with brackets around special characters.
+
+0.1 (26 May 2012)
+-----------------
+
+* Initial release.
diff --git a/INSTALL.rst b/INSTALL.rst
new file mode 100644
index 0000000..5f0503a
--- /dev/null
+++ b/INSTALL.rst
@@ -0,0 +1,43 @@
+Install the module with ``pip install latexcodec``, or from
+source using ``python setup.py install``.
+
+Minimal Example
+---------------
+
+Simply import the :mod:`latexcodec` module to enable ``"latex"``
+to be used as an encoding:
+
+.. code-block:: python
+
+    import latexcodec
+    text_latex = br"\'el\`eve"
+    assert text_latex.decode("latex") == u"élève"
+    text_unicode = u"ångström"
+    assert text_unicode.encode("latex") == br'\aa ngstr\"om'
+
+By default, the LaTeX input is assumed to be ascii, as per standard LaTeX.
+However, you can also specify an extra codec
+as ``latex+<encoding>``, where ``<encoding>`` describes another encoding.
+In this case characters will be
+translated to and from that encoding whenever possible.
+The following code snippet demonstrates this behaviour:
+
+.. code-block:: python
+
+    import latexcodec
+    text_latex = b"\xfe"
+    assert text_latex.decode("latex+latin1") == u"þ"
+    assert text_latex.decode("latex+latin2") == u"ţ"
+    text_unicode = u"ţ"
+    assert text_unicode.encode("latex+latin1") == b'\\c t'  # ţ is not latin1
+    assert text_unicode.encode("latex+latin2") == b'\xfe'   # but it is latin2
+
+Limitations
+-----------
+
+* Not all unicode characters are registered. If you find any missing,
+  please report them on the tracker:
+
+  https://github.com/mcmtroffaes/latexcodec/issues
+
+* Unicode combining characters are currently not handled.
diff --git a/LICENSE.rst b/LICENSE.rst
new file mode 100644
index 0000000..8e9e89e
--- /dev/null
+++ b/LICENSE.rst
@@ -0,0 +1,23 @@
+| latexcodec is a lexer and codec to work with LaTeX code in Python
+| Copyright (c) 2011-2014 by Matthias C. M. Troffaes
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..8fe92ed
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,14 @@
+include VERSION
+include README.rst
+include INSTALL.rst
+include CHANGELOG.rst
+include LICENSE.rst
+include AUTHORS.rst
+include requirements.txt
+include tox.ini
+recursive-include doc *
+recursive-include test *
+global-exclude *.pyc
+global-exclude .gitignore
+prune doc/_build
+exclude .travis.yml
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..26e55b2
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,32 @@
+latexcodec
+==========
+
+|travis| |coveralls| |downloads| |version| |license|
+
+A lexer and codec to work with LaTeX code in Python.
+
+* Download: http://pypi.python.org/pypi/latexcodec/#downloads
+
+* Documentation: http://latexcodec.readthedocs.org/
+
+* Development: http://github.com/mcmtroffaes/latexcodec/
+
+.. |travis| image:: https://travis-ci.org/mcmtroffaes/latexcodec.png?branch=develop
+    :target: https://travis-ci.org/mcmtroffaes/latexcodec
+    :alt: travis-ci
+
+.. |coveralls| image:: https://coveralls.io/repos/mcmtroffaes/latexcodec/badge.png?branch=develop
+    :target: https://coveralls.io/r/mcmtroffaes/latexcodec?branch=develop
+    :alt: coveralls.io
+
+.. |downloads| image:: https://pypip.in/d/latexcodec/badge.png
+    :target: http://pypi.python.org/pypi/latexcodec/
+    :alt: downloads
+
+.. |version| image:: https://pypip.in/v/latexcodec/badge.png
+    :target: http://pypi.python.org/pypi/latexcodec/
+    :alt: latest version
+
+.. |license| image:: https://pypip.in/license/latexcodec/badge.png
+    :target: http://pypi.python.org/pypi/latexcodec/
+    :alt: license
diff --git a/VERSION b/VERSION
new file mode 100644
index 0000000..7dea76e
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+1.0.1
diff --git a/doc/Makefile b/doc/Makefile
new file mode 100644
index 0000000..57c9fc5
--- /dev/null
+++ b/doc/Makefile
@@ -0,0 +1,153 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+BUILDDIR      = _build
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html       to make standalone HTML files"
+	@echo "  dirhtml    to make HTML files named index.html in directories"
+	@echo "  singlehtml to make a single large HTML file"
+	@echo "  pickle     to make pickle files"
+	@echo "  json       to make JSON files"
+	@echo "  htmlhelp   to make HTML files and a HTML help project"
+	@echo "  qthelp     to make HTML files and a qthelp project"
+	@echo "  devhelp    to make HTML files and a Devhelp project"
+	@echo "  epub       to make an epub"
+	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
+	@echo "  text       to make text files"
+	@echo "  man        to make manual pages"
+	@echo "  texinfo    to make Texinfo files"
+	@echo "  info       to make Texinfo files and run them through makeinfo"
+	@echo "  gettext    to make PO message catalogs"
+	@echo "  changes    to make an overview of all changed/added/deprecated items"
+	@echo "  linkcheck  to check all external links for integrity"
+	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+	-rm -rf $(BUILDDIR)/*
+
+html:
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+	@echo
+	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/latexcodec.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/latexcodec.qhc"
+
+devhelp:
+	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+	@echo
+	@echo "Build finished."
+	@echo "To view the help file:"
+	@echo "# mkdir -p $$HOME/.local/share/devhelp/latexcodec"
+	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/latexcodec"
+	@echo "# devhelp"
+
+epub:
+	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+	@echo
+	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make' in that directory to run these through (pdf)latex" \
+	      "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through pdflatex..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+	@echo
+	@echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+	@echo
+	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo
+	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+	@echo "Run \`make' in that directory to run these through makeinfo" \
+	      "(use \`make info' here to do that automatically)."
+
+info:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo "Running Texinfo files through makeinfo..."
+	make -C $(BUILDDIR)/texinfo info
+	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+	@echo
+	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
diff --git a/doc/_build/.gitignore b/doc/_build/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/doc/api.rst b/doc/api.rst
new file mode 100644
index 0000000..c5c989a
--- /dev/null
+++ b/doc/api.rst
@@ -0,0 +1,8 @@
+API
+~~~
+
+.. toctree::
+    :maxdepth: 2
+
+    api/codec
+    api/lexer
diff --git a/doc/api/codec.rst b/doc/api/codec.rst
new file mode 100644
index 0000000..ff39d09
--- /dev/null
+++ b/doc/api/codec.rst
@@ -0,0 +1 @@
+.. automodule:: latexcodec.codec
diff --git a/doc/api/lexer.rst b/doc/api/lexer.rst
new file mode 100644
index 0000000..89f9cbc
--- /dev/null
+++ b/doc/api/lexer.rst
@@ -0,0 +1 @@
+.. automodule:: latexcodec.lexer
diff --git a/doc/authors.rst b/doc/authors.rst
new file mode 100644
index 0000000..45122fc
--- /dev/null
+++ b/doc/authors.rst
@@ -0,0 +1,5 @@
+Authors
+=======
+
+.. include:: ../AUTHORS.rst
+
diff --git a/doc/changes.rst b/doc/changes.rst
new file mode 100644
index 0000000..2eb28cc
--- /dev/null
+++ b/doc/changes.rst
@@ -0,0 +1,7 @@
+:tocdepth: 1
+
+Changes
+=======
+
+.. include:: ../CHANGELOG.rst
+
diff --git a/doc/conf.py b/doc/conf.py
new file mode 100644
index 0000000..0f3942f
--- /dev/null
+++ b/doc/conf.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+#
+# latexcodec documentation build configuration file, created by
+# sphinx-quickstart on Wed Aug  3 15:45:22 2011.
+
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.doctest',
+    'sphinx.ext.intersphinx',
+    'sphinx.ext.todo',
+    'sphinx.ext.coverage',
+    'sphinx.ext.pngmath',
+    'sphinx.ext.viewcode']
+source_suffix = '.rst'
+master_doc = 'index'
+project = u'latexcodec'
+copyright = u'2011-2014, Matthias C. M. Troffaes'
+with open("../VERSION", "rb") as version_file:
+    release = version_file.read().strip()
+version = '.'.join(release.split('.')[:2])
+exclude_patterns = ['_build']
+pygments_style = 'sphinx'
+html_theme = 'default'
+htmlhelp_basename = 'latexcodecdoc'
+latex_documents = [
+    ('index', 'latexcodec.tex',
+     u'latexcodec Documentation',
+     u'Matthias C. M. Troffaes', 'manual'),
+]
+man_pages = [
+    ('index', 'latexcodec', u'latexcodec Documentation',
+     [u'Matthias C. M. Troffaes'], 1)
+]
+texinfo_documents = [
+    ('index', 'latexcodec', u'latexcodec Documentation',
+     u'Matthias C. M. Troffaes',
+     'latexcodec', 'One line description of project.', 'Miscellaneous'),
+]
+intersphinx_mapping = {
+    'python': ('http://docs.python.org/', None),
+}
diff --git a/doc/index.rst b/doc/index.rst
new file mode 100644
index 0000000..05bd2cf
--- /dev/null
+++ b/doc/index.rst
@@ -0,0 +1,25 @@
+Welcome to latexcodec's documentation!
+======================================
+
+:Release: |release|
+:Date:    |today|
+
+Contents
+--------
+
+.. toctree::
+   :maxdepth: 2
+
+   quickstart
+   api
+   changes
+   authors
+   license
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
diff --git a/doc/license.rst b/doc/license.rst
new file mode 100644
index 0000000..81a43fc
--- /dev/null
+++ b/doc/license.rst
@@ -0,0 +1,11 @@
+License
+=======
+
+.. include:: ../LICENSE.rst
+
+.. rubric:: Remark
+
+Versions 0.1 and 0.2 of the latexcodec package were written by
+Peter Tröger, and were released under the Academic Free License 3.0.
+The current version of the latexcodec package shares no code with those
+earlier versions.
diff --git a/doc/make.bat b/doc/make.bat
new file mode 100644
index 0000000..b280cac
--- /dev/null
+++ b/doc/make.bat
@@ -0,0 +1,190 @@
+ at ECHO OFF
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set BUILDDIR=_build
+set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
+set I18NSPHINXOPTS=%SPHINXOPTS% .
+if NOT "%PAPER%" == "" (
+	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
+	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
+)
+
+if "%1" == "" goto help
+
+if "%1" == "help" (
+	:help
+	echo.Please use `make ^<target^>` where ^<target^> is one of
+	echo.  html       to make standalone HTML files
+	echo.  dirhtml    to make HTML files named index.html in directories
+	echo.  singlehtml to make a single large HTML file
+	echo.  pickle     to make pickle files
+	echo.  json       to make JSON files
+	echo.  htmlhelp   to make HTML files and a HTML help project
+	echo.  qthelp     to make HTML files and a qthelp project
+	echo.  devhelp    to make HTML files and a Devhelp project
+	echo.  epub       to make an epub
+	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
+	echo.  text       to make text files
+	echo.  man        to make manual pages
+	echo.  texinfo    to make Texinfo files
+	echo.  gettext    to make PO message catalogs
+	echo.  changes    to make an overview over all changed/added/deprecated items
+	echo.  linkcheck  to check all external links for integrity
+	echo.  doctest    to run all doctests embedded in the documentation if enabled
+	goto end
+)
+
+if "%1" == "clean" (
+	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
+	del /q /s %BUILDDIR%\*
+	goto end
+)
+
+if "%1" == "html" (
+	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
+	goto end
+)
+
+if "%1" == "dirhtml" (
+	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
+	goto end
+)
+
+if "%1" == "singlehtml" (
+	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
+	goto end
+)
+
+if "%1" == "pickle" (
+	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can process the pickle files.
+	goto end
+)
+
+if "%1" == "json" (
+	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can process the JSON files.
+	goto end
+)
+
+if "%1" == "htmlhelp" (
+	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can run HTML Help Workshop with the ^
+.hhp project file in %BUILDDIR%/htmlhelp.
+	goto end
+)
+
+if "%1" == "qthelp" (
+	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can run "qcollectiongenerator" with the ^
+.qhcp project file in %BUILDDIR%/qthelp, like this:
+	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\latexcodec.qhcp
+	echo.To view the help file:
+	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\latexcodec.ghc
+	goto end
+)
+
+if "%1" == "devhelp" (
+	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished.
+	goto end
+)
+
+if "%1" == "epub" (
+	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The epub file is in %BUILDDIR%/epub.
+	goto end
+)
+
+if "%1" == "latex" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "text" (
+	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The text files are in %BUILDDIR%/text.
+	goto end
+)
+
+if "%1" == "man" (
+	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The manual pages are in %BUILDDIR%/man.
+	goto end
+)
+
+if "%1" == "texinfo" (
+	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
+	goto end
+)
+
+if "%1" == "gettext" (
+	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
+	goto end
+)
+
+if "%1" == "changes" (
+	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.The overview file is in %BUILDDIR%/changes.
+	goto end
+)
+
+if "%1" == "linkcheck" (
+	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Link check complete; look for any errors in the above output ^
+or in %BUILDDIR%/linkcheck/output.txt.
+	goto end
+)
+
+if "%1" == "doctest" (
+	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Testing of doctests in the sources finished, look at the ^
+results in %BUILDDIR%/doctest/output.txt.
+	goto end
+)
+
+:end
diff --git a/doc/quickstart.rst b/doc/quickstart.rst
new file mode 100644
index 0000000..d8680a8
--- /dev/null
+++ b/doc/quickstart.rst
@@ -0,0 +1,13 @@
+Getting Started
+===============
+
+Overview
+--------
+
+.. include:: ../README.rst
+   :start-line: 5
+
+Installation
+------------
+
+.. include:: ../INSTALL.rst
diff --git a/latexcodec/__init__.py b/latexcodec/__init__.py
new file mode 100644
index 0000000..9ef80c3
--- /dev/null
+++ b/latexcodec/__init__.py
@@ -0,0 +1,2 @@
+import latexcodec.codec
+latexcodec.codec.register()
diff --git a/latexcodec/codec.py b/latexcodec/codec.py
new file mode 100644
index 0000000..173989e
--- /dev/null
+++ b/latexcodec/codec.py
@@ -0,0 +1,810 @@
+# -*- coding: utf-8 -*-
+"""
+    LaTeX Codec
+    ~~~~~~~~~~~
+
+    The :mod:`latexcodec.codec` module
+    contains all classes and functions for LaTeX code
+    translation. For practical use,
+    you should only ever need to import the :mod:`latexcodec` module,
+    which will automatically register the codec
+    so it can be used by :meth:`str.encode`, :meth:`str.decode`,
+    and any of the functions defined in the :mod:`codecs` module
+    such as :func:`codecs.open` and so on.
+    The other functions and classes
+    are exposed in case someone would want to extend them.
+
+    .. autofunction:: register
+
+    .. autofunction:: find_latex
+
+    .. autoclass:: LatexIncrementalEncoder
+        :show-inheritance:
+        :members:
+
+    .. autoclass:: LatexIncrementalDecoder
+        :show-inheritance:
+        :members:
+
+    .. autoclass:: LatexCodec
+        :show-inheritance:
+        :members:
+
+    .. autoclass:: LatexUnicodeTable
+        :members:
+"""
+
+# Copyright (c) 2003, 2008 David Eppstein
+# Copyright (c) 2011-2014 Matthias C. M. Troffaes
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+from __future__ import print_function
+
+import codecs
+from six import string_types
+from six.moves import range
+
+from latexcodec import lexer
+
+
+def register():
+    """Register the :func:`find_latex` codec search function.
+
+    .. seealso:: :func:`codecs.register`
+    """
+    codecs.register(find_latex)
+
+# returns the codec search function
+# this is used if latex_codec.py were to be placed in stdlib
+
+
+def getregentry():
+    """Encodings module API."""
+    return find_latex('latex')
+
+
+class LatexUnicodeTable:
+
+    """Tabulates a translation between LaTeX and unicode."""
+
+    def __init__(self, lexer):
+        self.lexer = lexer
+        self.unicode_map = {}
+        self.max_length = 0
+        self.latex_map = {}
+        self.register_all()
+
+    def register_all(self):
+        """Register all symbols and their LaTeX equivalents
+        (called by constructor).
+        """
+        # TODO complete this list
+        # register special symbols
+        self.register(u'\n\n', b' \\par', encode=False)
+        self.register(u'\n\n', b'\\par', encode=False)
+        self.register(u' ', b'\\ ', encode=False)
+        self.register(u'\N{EN DASH}', b'--')
+        self.register(u'\N{EN DASH}', b'\\textendash')
+        self.register(u'\N{EM DASH}', b'---')
+        self.register(u'\N{EM DASH}', b'\\textemdash')
+        self.register(u'\N{LEFT SINGLE QUOTATION MARK}', b'`', decode=False)
+        self.register(u'\N{RIGHT SINGLE QUOTATION MARK}', b"'", decode=False)
+        self.register(u'\N{LEFT DOUBLE QUOTATION MARK}', b'``')
+        self.register(u'\N{RIGHT DOUBLE QUOTATION MARK}', b"''")
+        self.register(u'\N{DAGGER}', b'\\dag')
+        self.register(u'\N{DOUBLE DAGGER}', b'\\ddag')
+
+        self.register(u'\N{BULLET}', b'\\bullet', mode='math')
+        self.register(u'\N{BULLET}', b'\\textbullet', package='textcomp')
+
+        self.register(u'\N{NUMBER SIGN}', b'\\#')
+        self.register(u'\N{LOW LINE}', b'\\_')
+        self.register(u'\N{AMPERSAND}', b'\\&')
+        self.register(u'\N{NO-BREAK SPACE}', b'~')
+        self.register(u'\N{INVERTED EXCLAMATION MARK}', b'!`')
+        self.register(u'\N{CENT SIGN}', b'\\not{c}')
+
+        self.register(u'\N{POUND SIGN}', b'\\pounds')
+        self.register(u'\N{POUND SIGN}', b'\\textsterling', package='textcomp')
+
+        self.register(u'\N{SECTION SIGN}', b'\\S')
+        self.register(u'\N{DIAERESIS}', b'\\"{}')
+        self.register(u'\N{NOT SIGN}', b'\\neg')
+        self.register(u'\N{SOFT HYPHEN}', b'\\-')
+        self.register(u'\N{MACRON}', b'\\={}')
+
+        self.register(u'\N{DEGREE SIGN}', b'^\\circ', mode='math')
+        self.register(u'\N{DEGREE SIGN}', b'\\textdegree', package='textcomp')
+
+        self.register(u'\N{PLUS-MINUS SIGN}', b'\\pm', mode='math')
+        self.register(u'\N{PLUS-MINUS SIGN}', b'\\textpm', package='textcomp')
+
+        self.register(u'\N{SUPERSCRIPT TWO}', b'^2', mode='math')
+        self.register(
+            u'\N{SUPERSCRIPT TWO}',
+            b'\\texttwosuperior',
+            package='textcomp')
+
+        self.register(u'\N{SUPERSCRIPT THREE}', b'^3', mode='math')
+        self.register(
+            u'\N{SUPERSCRIPT THREE}',
+            b'\\textthreesuperior',
+            package='textcomp')
+
+        self.register(u'\N{ACUTE ACCENT}', b"\\'{}")
+
+        self.register(u'\N{MICRO SIGN}', b'\\mu', mode='math')
+        self.register(u'\N{MICRO SIGN}', b'\\micro', package='gensymb')
+
+        self.register(u'\N{PILCROW SIGN}', b'\\P')
+
+        self.register(u'\N{MIDDLE DOT}', b'\\cdot', mode='math')
+        self.register(
+            u'\N{MIDDLE DOT}',
+            b'\\textperiodcentered',
+            package='textcomp')
+
+        self.register(u'\N{CEDILLA}', b'\\c{}')
+
+        self.register(u'\N{SUPERSCRIPT ONE}', b'^1', mode='math')
+        self.register(
+            u'\N{SUPERSCRIPT ONE}',
+            b'\\textonesuperior',
+            package='textcomp')
+
+        self.register(u'\N{INVERTED QUESTION MARK}', b'?`')
+        self.register(u'\N{LATIN CAPITAL LETTER A WITH GRAVE}', b'\\`A')
+        self.register(u'\N{LATIN CAPITAL LETTER A WITH CIRCUMFLEX}', b'\\^A')
+        self.register(u'\N{LATIN CAPITAL LETTER A WITH TILDE}', b'\\~A')
+        self.register(u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}', b'\\"A')
+        self.register(u'\N{LATIN CAPITAL LETTER A WITH RING ABOVE}', b'\\AA')
+        self.register(u'\N{LATIN CAPITAL LETTER AE}', b'\\AE')
+        self.register(u'\N{LATIN CAPITAL LETTER C WITH CEDILLA}', b'\\c C')
+        self.register(u'\N{LATIN CAPITAL LETTER E WITH GRAVE}', b'\\`E')
+        self.register(u'\N{LATIN CAPITAL LETTER E WITH ACUTE}', b"\\'E")
+        self.register(u'\N{LATIN CAPITAL LETTER E WITH CIRCUMFLEX}', b'\\^E')
+        self.register(u'\N{LATIN CAPITAL LETTER E WITH DIAERESIS}', b'\\"E')
+        self.register(u'\N{LATIN CAPITAL LETTER I WITH GRAVE}', b'\\`I')
+        self.register(u'\N{LATIN CAPITAL LETTER I WITH CIRCUMFLEX}', b'\\^I')
+        self.register(u'\N{LATIN CAPITAL LETTER I WITH DIAERESIS}', b'\\"I')
+        self.register(u'\N{LATIN CAPITAL LETTER N WITH TILDE}', b'\\~N')
+        self.register(u'\N{LATIN CAPITAL LETTER O WITH GRAVE}', b'\\`O')
+        self.register(u'\N{LATIN CAPITAL LETTER O WITH ACUTE}', b"\\'O")
+        self.register(u'\N{LATIN CAPITAL LETTER O WITH CIRCUMFLEX}', b'\\^O')
+        self.register(u'\N{LATIN CAPITAL LETTER O WITH TILDE}', b'\\~O')
+        self.register(u'\N{LATIN CAPITAL LETTER O WITH DIAERESIS}', b'\\"O')
+        self.register(u'\N{MULTIPLICATION SIGN}', b'\\times', mode='math')
+        self.register(u'\N{LATIN CAPITAL LETTER O WITH STROKE}', b'\\O')
+        self.register(u'\N{LATIN CAPITAL LETTER U WITH GRAVE}', b'\\`U')
+        self.register(u'\N{LATIN CAPITAL LETTER U WITH ACUTE}', b"\\'U")
+        self.register(u'\N{LATIN CAPITAL LETTER U WITH CIRCUMFLEX}', b'\\^U')
+        self.register(u'\N{LATIN CAPITAL LETTER U WITH DIAERESIS}', b'\\"U')
+        self.register(u'\N{LATIN CAPITAL LETTER Y WITH ACUTE}', b"\\'Y")
+        self.register(u'\N{LATIN SMALL LETTER SHARP S}', b'\\ss')
+        self.register(u'\N{LATIN SMALL LETTER A WITH GRAVE}', b'\\`a')
+        self.register(u'\N{LATIN SMALL LETTER A WITH ACUTE}', b"\\'a")
+        self.register(u'\N{LATIN SMALL LETTER A WITH CIRCUMFLEX}', b'\\^a')
+        self.register(u'\N{LATIN SMALL LETTER A WITH TILDE}', b'\\~a')
+        self.register(u'\N{LATIN SMALL LETTER A WITH DIAERESIS}', b'\\"a')
+        self.register(u'\N{LATIN SMALL LETTER A WITH RING ABOVE}', b'\\aa')
+        self.register(u'\N{LATIN SMALL LETTER AE}', b'\\ae')
+        self.register(u'\N{LATIN SMALL LETTER C WITH CEDILLA}', b'\\c c')
+        self.register(u'\N{LATIN SMALL LETTER E WITH GRAVE}', b'\\`e')
+        self.register(u'\N{LATIN SMALL LETTER E WITH ACUTE}', b"\\'e")
+        self.register(u'\N{LATIN SMALL LETTER E WITH CIRCUMFLEX}', b'\\^e')
+        self.register(u'\N{LATIN SMALL LETTER E WITH DIAERESIS}', b'\\"e')
+        self.register(u'\N{LATIN SMALL LETTER I WITH GRAVE}', b'\\`\\i')
+        self.register(u'\N{LATIN SMALL LETTER I WITH GRAVE}', b'\\`i')
+        self.register(u'\N{LATIN SMALL LETTER I WITH ACUTE}', b"\\'\\i")
+        self.register(u'\N{LATIN SMALL LETTER I WITH ACUTE}', b"\\'i")
+        self.register(u'\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}', b'\\^\\i')
+        self.register(u'\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}', b'\\^i')
+        self.register(u'\N{LATIN SMALL LETTER I WITH DIAERESIS}', b'\\"\\i')
+        self.register(u'\N{LATIN SMALL LETTER I WITH DIAERESIS}', b'\\"i')
+        self.register(u'\N{LATIN SMALL LETTER N WITH TILDE}', b'\\~n')
+        self.register(u'\N{LATIN SMALL LETTER O WITH GRAVE}', b'\\`o')
+        self.register(u'\N{LATIN SMALL LETTER O WITH ACUTE}', b"\\'o")
+        self.register(u'\N{LATIN SMALL LETTER O WITH CIRCUMFLEX}', b'\\^o')
+        self.register(u'\N{LATIN SMALL LETTER O WITH TILDE}', b'\\~o')
+        self.register(u'\N{LATIN SMALL LETTER O WITH DIAERESIS}', b'\\"o')
+        self.register(u'\N{DIVISION SIGN}', b'\\div', mode='math')
+        self.register(u'\N{LATIN SMALL LETTER O WITH STROKE}', b'\\o')
+        self.register(u'\N{LATIN SMALL LETTER U WITH GRAVE}', b'\\`u')
+        self.register(u'\N{LATIN SMALL LETTER U WITH ACUTE}', b"\\'u")
+        self.register(u'\N{LATIN SMALL LETTER U WITH CIRCUMFLEX}', b'\\^u')
+        self.register(u'\N{LATIN SMALL LETTER U WITH DIAERESIS}', b'\\"u')
+        self.register(u'\N{LATIN SMALL LETTER Y WITH ACUTE}', b"\\'y")
+        self.register(u'\N{LATIN SMALL LETTER Y WITH DIAERESIS}', b'\\"y')
+        self.register(u'\N{LATIN CAPITAL LETTER A WITH MACRON}', b'\\=A')
+        self.register(u'\N{LATIN SMALL LETTER A WITH MACRON}', b'\\=a')
+        self.register(u'\N{LATIN CAPITAL LETTER A WITH BREVE}', b'\\u A')
+        self.register(u'\N{LATIN SMALL LETTER A WITH BREVE}', b'\\u a')
+        self.register(u'\N{LATIN CAPITAL LETTER A WITH OGONEK}', b'\\k A')
+        self.register(u'\N{LATIN SMALL LETTER A WITH OGONEK}', b'\\k a')
+        self.register(u'\N{LATIN CAPITAL LETTER C WITH ACUTE}', b"\\'C")
+        self.register(u'\N{LATIN SMALL LETTER C WITH ACUTE}', b"\\'c")
+        self.register(u'\N{LATIN CAPITAL LETTER C WITH CIRCUMFLEX}', b'\\^C')
+        self.register(u'\N{LATIN SMALL LETTER C WITH CIRCUMFLEX}', b'\\^c')
+        self.register(u'\N{LATIN CAPITAL LETTER C WITH DOT ABOVE}', b'\\.C')
+        self.register(u'\N{LATIN SMALL LETTER C WITH DOT ABOVE}', b'\\.c')
+        self.register(u'\N{LATIN CAPITAL LETTER C WITH CARON}', b'\\v C')
+        self.register(u'\N{LATIN SMALL LETTER C WITH CARON}', b'\\v c')
+        self.register(u'\N{LATIN CAPITAL LETTER D WITH CARON}', b'\\v D')
+        self.register(u'\N{LATIN SMALL LETTER D WITH CARON}', b'\\v d')
+        self.register(u'\N{LATIN CAPITAL LETTER E WITH MACRON}', b'\\=E')
+        self.register(u'\N{LATIN SMALL LETTER E WITH MACRON}', b'\\=e')
+        self.register(u'\N{LATIN CAPITAL LETTER E WITH BREVE}', b'\\u E')
+        self.register(u'\N{LATIN SMALL LETTER E WITH BREVE}', b'\\u e')
+        self.register(u'\N{LATIN CAPITAL LETTER E WITH DOT ABOVE}', b'\\.E')
+        self.register(u'\N{LATIN SMALL LETTER E WITH DOT ABOVE}', b'\\.e')
+        self.register(u'\N{LATIN CAPITAL LETTER E WITH OGONEK}', b'\\k E')
+        self.register(u'\N{LATIN SMALL LETTER E WITH OGONEK}', b'\\k e')
+        self.register(u'\N{LATIN CAPITAL LETTER E WITH CARON}', b'\\v E')
+        self.register(u'\N{LATIN SMALL LETTER E WITH CARON}', b'\\v e')
+        self.register(u'\N{LATIN CAPITAL LETTER G WITH CIRCUMFLEX}', b'\\^G')
+        self.register(u'\N{LATIN SMALL LETTER G WITH CIRCUMFLEX}', b'\\^g')
+        self.register(u'\N{LATIN CAPITAL LETTER G WITH BREVE}', b'\\u G')
+        self.register(u'\N{LATIN SMALL LETTER G WITH BREVE}', b'\\u g')
+        self.register(u'\N{LATIN CAPITAL LETTER G WITH DOT ABOVE}', b'\\.G')
+        self.register(u'\N{LATIN SMALL LETTER G WITH DOT ABOVE}', b'\\.g')
+        self.register(u'\N{LATIN CAPITAL LETTER G WITH CEDILLA}', b'\\c G')
+        self.register(u'\N{LATIN SMALL LETTER G WITH CEDILLA}', b'\\c g')
+        self.register(u'\N{LATIN CAPITAL LETTER H WITH CIRCUMFLEX}', b'\\^H')
+        self.register(u'\N{LATIN SMALL LETTER H WITH CIRCUMFLEX}', b'\\^h')
+        self.register(u'\N{LATIN CAPITAL LETTER I WITH TILDE}', b'\\~I')
+        self.register(u'\N{LATIN SMALL LETTER I WITH TILDE}', b'\\~\\i')
+        self.register(u'\N{LATIN SMALL LETTER I WITH TILDE}', b'\\~i')
+        self.register(u'\N{LATIN CAPITAL LETTER I WITH MACRON}', b'\\=I')
+        self.register(u'\N{LATIN SMALL LETTER I WITH MACRON}', b'\\=\\i')
+        self.register(u'\N{LATIN SMALL LETTER I WITH MACRON}', b'\\=i')
+        self.register(u'\N{LATIN CAPITAL LETTER I WITH BREVE}', b'\\u I')
+        self.register(u'\N{LATIN SMALL LETTER I WITH BREVE}', b'\\u\\i')
+        self.register(u'\N{LATIN SMALL LETTER I WITH BREVE}', b'\\u i')
+        self.register(u'\N{LATIN CAPITAL LETTER I WITH OGONEK}', b'\\k I')
+        self.register(u'\N{LATIN SMALL LETTER I WITH OGONEK}', b'\\k i')
+        self.register(u'\N{LATIN CAPITAL LETTER I WITH DOT ABOVE}', b'\\.I')
+        self.register(u'\N{LATIN SMALL LETTER DOTLESS I}', b'\\i')
+        self.register(u'\N{LATIN CAPITAL LIGATURE IJ}', b'IJ', decode=False)
+        self.register(u'\N{LATIN SMALL LIGATURE IJ}', b'ij', decode=False)
+        self.register(u'\N{LATIN CAPITAL LETTER J WITH CIRCUMFLEX}', b'\\^J')
+        self.register(u'\N{LATIN SMALL LETTER J WITH CIRCUMFLEX}', b'\\^\\j')
+        self.register(u'\N{LATIN SMALL LETTER J WITH CIRCUMFLEX}', b'\\^j')
+        self.register(u'\N{LATIN CAPITAL LETTER K WITH CEDILLA}', b'\\c K')
+        self.register(u'\N{LATIN SMALL LETTER K WITH CEDILLA}', b'\\c k')
+        self.register(u'\N{LATIN CAPITAL LETTER L WITH ACUTE}', b"\\'L")
+        self.register(u'\N{LATIN SMALL LETTER L WITH ACUTE}', b"\\'l")
+        self.register(u'\N{LATIN CAPITAL LETTER L WITH CEDILLA}', b'\\c L')
+        self.register(u'\N{LATIN SMALL LETTER L WITH CEDILLA}', b'\\c l')
+        self.register(u'\N{LATIN CAPITAL LETTER L WITH CARON}', b'\\v L')
+        self.register(u'\N{LATIN SMALL LETTER L WITH CARON}', b'\\v l')
+        self.register(u'\N{LATIN CAPITAL LETTER L WITH STROKE}', b'\\L')
+        self.register(u'\N{LATIN SMALL LETTER L WITH STROKE}', b'\\l')
+        self.register(u'\N{LATIN CAPITAL LETTER N WITH ACUTE}', b"\\'N")
+        self.register(u'\N{LATIN SMALL LETTER N WITH ACUTE}', b"\\'n")
+        self.register(u'\N{LATIN CAPITAL LETTER N WITH CEDILLA}', b'\\c N')
+        self.register(u'\N{LATIN SMALL LETTER N WITH CEDILLA}', b'\\c n')
+        self.register(u'\N{LATIN CAPITAL LETTER N WITH CARON}', b'\\v N')
+        self.register(u'\N{LATIN SMALL LETTER N WITH CARON}', b'\\v n')
+        self.register(u'\N{LATIN CAPITAL LETTER O WITH MACRON}', b'\\=O')
+        self.register(u'\N{LATIN SMALL LETTER O WITH MACRON}', b'\\=o')
+        self.register(u'\N{LATIN CAPITAL LETTER O WITH BREVE}', b'\\u O')
+        self.register(u'\N{LATIN SMALL LETTER O WITH BREVE}', b'\\u o')
+        self.register(
+            u'\N{LATIN CAPITAL LETTER O WITH DOUBLE ACUTE}',
+            b'\\H O')
+        self.register(u'\N{LATIN SMALL LETTER O WITH DOUBLE ACUTE}', b'\\H o')
+        self.register(u'\N{LATIN CAPITAL LIGATURE OE}', b'\\OE')
+        self.register(u'\N{LATIN SMALL LIGATURE OE}', b'\\oe')
+        self.register(u'\N{LATIN CAPITAL LETTER R WITH ACUTE}', b"\\'R")
+        self.register(u'\N{LATIN SMALL LETTER R WITH ACUTE}', b"\\'r")
+        self.register(u'\N{LATIN CAPITAL LETTER R WITH CEDILLA}', b'\\c R')
+        self.register(u'\N{LATIN SMALL LETTER R WITH CEDILLA}', b'\\c r')
+        self.register(u'\N{LATIN CAPITAL LETTER R WITH CARON}', b'\\v R')
+        self.register(u'\N{LATIN SMALL LETTER R WITH CARON}', b'\\v r')
+        self.register(u'\N{LATIN CAPITAL LETTER S WITH ACUTE}', b"\\'S")
+        self.register(u'\N{LATIN SMALL LETTER S WITH ACUTE}', b"\\'s")
+        self.register(u'\N{LATIN CAPITAL LETTER S WITH CIRCUMFLEX}', b'\\^S')
+        self.register(u'\N{LATIN SMALL LETTER S WITH CIRCUMFLEX}', b'\\^s')
+        self.register(u'\N{LATIN CAPITAL LETTER S WITH CEDILLA}', b'\\c S')
+        self.register(u'\N{LATIN SMALL LETTER S WITH CEDILLA}', b'\\c s')
+        self.register(u'\N{LATIN CAPITAL LETTER S WITH CARON}', b'\\v S')
+        self.register(u'\N{LATIN SMALL LETTER S WITH CARON}', b'\\v s')
+        self.register(u'\N{LATIN CAPITAL LETTER T WITH CEDILLA}', b'\\c T')
+        self.register(u'\N{LATIN SMALL LETTER T WITH CEDILLA}', b'\\c t')
+        self.register(u'\N{LATIN CAPITAL LETTER T WITH CARON}', b'\\v T')
+        self.register(u'\N{LATIN SMALL LETTER T WITH CARON}', b'\\v t')
+        self.register(u'\N{LATIN CAPITAL LETTER U WITH TILDE}', b'\\~U')
+        self.register(u'\N{LATIN SMALL LETTER U WITH TILDE}', b'\\~u')
+        self.register(u'\N{LATIN CAPITAL LETTER U WITH MACRON}', b'\\=U')
+        self.register(u'\N{LATIN SMALL LETTER U WITH MACRON}', b'\\=u')
+        self.register(u'\N{LATIN CAPITAL LETTER U WITH BREVE}', b'\\u U')
+        self.register(u'\N{LATIN SMALL LETTER U WITH BREVE}', b'\\u u')
+        self.register(u'\N{LATIN CAPITAL LETTER U WITH RING ABOVE}', b'\\r U')
+        self.register(u'\N{LATIN SMALL LETTER U WITH RING ABOVE}', b'\\r u')
+        self.register(
+            u'\N{LATIN CAPITAL LETTER U WITH DOUBLE ACUTE}',
+            b'\\H U')
+        self.register(u'\N{LATIN SMALL LETTER U WITH DOUBLE ACUTE}', b'\\H u')
+        self.register(u'\N{LATIN CAPITAL LETTER U WITH OGONEK}', b'\\k U')
+        self.register(u'\N{LATIN SMALL LETTER U WITH OGONEK}', b'\\k u')
+        self.register(u'\N{LATIN CAPITAL LETTER W WITH CIRCUMFLEX}', b'\\^W')
+        self.register(u'\N{LATIN SMALL LETTER W WITH CIRCUMFLEX}', b'\\^w')
+        self.register(u'\N{LATIN CAPITAL LETTER Y WITH CIRCUMFLEX}', b'\\^Y')
+        self.register(u'\N{LATIN SMALL LETTER Y WITH CIRCUMFLEX}', b'\\^y')
+        self.register(u'\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}', b'\\"Y')
+        self.register(u'\N{LATIN CAPITAL LETTER Z WITH ACUTE}', b"\\'Z")
+        self.register(u'\N{LATIN SMALL LETTER Z WITH ACUTE}', b"\\'Z")
+        self.register(u'\N{LATIN CAPITAL LETTER Z WITH DOT ABOVE}', b'\\.Z')
+        self.register(u'\N{LATIN SMALL LETTER Z WITH DOT ABOVE}', b'\\.Z')
+        self.register(u'\N{LATIN CAPITAL LETTER Z WITH CARON}', b'\\v Z')
+        self.register(u'\N{LATIN SMALL LETTER Z WITH CARON}', b'\\v z')
+        self.register(u'\N{LATIN CAPITAL LETTER DZ WITH CARON}', b'D\\v Z')
+        self.register(
+            u'\N{LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}',
+            b'D\\v z')
+        self.register(u'\N{LATIN SMALL LETTER DZ WITH CARON}', b'd\\v z')
+        self.register(u'\N{LATIN CAPITAL LETTER LJ}', b'LJ', decode=False)
+        self.register(
+            u'\N{LATIN CAPITAL LETTER L WITH SMALL LETTER J}',
+            b'Lj',
+            decode=False)
+        self.register(u'\N{LATIN SMALL LETTER LJ}', b'lj', decode=False)
+        self.register(u'\N{LATIN CAPITAL LETTER NJ}', b'NJ', decode=False)
+        self.register(
+            u'\N{LATIN CAPITAL LETTER N WITH SMALL LETTER J}',
+            b'Nj',
+            decode=False)
+        self.register(u'\N{LATIN SMALL LETTER NJ}', b'nj', decode=False)
+        self.register(u'\N{LATIN CAPITAL LETTER A WITH CARON}', b'\\v A')
+        self.register(u'\N{LATIN SMALL LETTER A WITH CARON}', b'\\v a')
+        self.register(u'\N{LATIN CAPITAL LETTER I WITH CARON}', b'\\v I')
+        self.register(u'\N{LATIN SMALL LETTER I WITH CARON}', b'\\v\\i')
+        self.register(u'\N{LATIN CAPITAL LETTER O WITH CARON}', b'\\v O')
+        self.register(u'\N{LATIN SMALL LETTER O WITH CARON}', b'\\v o')
+        self.register(u'\N{LATIN CAPITAL LETTER U WITH CARON}', b'\\v U')
+        self.register(u'\N{LATIN SMALL LETTER U WITH CARON}', b'\\v u')
+        self.register(u'\N{LATIN CAPITAL LETTER G WITH CARON}', b'\\v G')
+        self.register(u'\N{LATIN SMALL LETTER G WITH CARON}', b'\\v g')
+        self.register(u'\N{LATIN CAPITAL LETTER K WITH CARON}', b'\\v K')
+        self.register(u'\N{LATIN SMALL LETTER K WITH CARON}', b'\\v k')
+        self.register(u'\N{LATIN CAPITAL LETTER O WITH OGONEK}', b'\\k O')
+        self.register(u'\N{LATIN SMALL LETTER O WITH OGONEK}', b'\\k o')
+        self.register(u'\N{LATIN SMALL LETTER J WITH CARON}', b'\\v\\j')
+        self.register(u'\N{LATIN CAPITAL LETTER DZ}', b'DZ', decode=False)
+        self.register(
+            u'\N{LATIN CAPITAL LETTER D WITH SMALL LETTER Z}',
+            b'Dz',
+            decode=False)
+        self.register(u'\N{LATIN SMALL LETTER DZ}', b'dz', decode=False)
+        self.register(u'\N{LATIN CAPITAL LETTER G WITH ACUTE}', b"\\'G")
+        self.register(u'\N{LATIN SMALL LETTER G WITH ACUTE}', b"\\'g")
+        self.register(u'\N{LATIN CAPITAL LETTER AE WITH ACUTE}', b"\\'\\AE")
+        self.register(u'\N{LATIN SMALL LETTER AE WITH ACUTE}', b"\\'\\ae")
+        self.register(
+            u'\N{LATIN CAPITAL LETTER O WITH STROKE AND ACUTE}',
+            b"\\'\\O")
+        self.register(
+            u'\N{LATIN SMALL LETTER O WITH STROKE AND ACUTE}',
+            b"\\'\\o")
+        self.register(u'\N{PARTIAL DIFFERENTIAL}', b'\\partial', mode='math')
+        self.register(u'\N{N-ARY PRODUCT}', b'\\prod', mode='math')
+        self.register(u'\N{N-ARY SUMMATION}', b'\\sum', mode='math')
+        self.register(u'\N{SQUARE ROOT}', b'\\surd', mode='math')
+        self.register(u'\N{INFINITY}', b'\\infty', mode='math')
+        self.register(u'\N{INTEGRAL}', b'\\int', mode='math')
+        self.register(u'\N{INTERSECTION}', b'\\cap', mode='math')
+        self.register(u'\N{UNION}', b'\\cup', mode='math')
+        self.register(u'\N{RIGHTWARDS ARROW}', b'\\rightarrow', mode='math')
+        self.register(
+            u'\N{RIGHTWARDS DOUBLE ARROW}',
+            b'\\Rightarrow',
+            mode='math')
+        self.register(u'\N{LEFTWARDS ARROW}', b'\\leftarrow', mode='math')
+        self.register(
+            u'\N{LEFTWARDS DOUBLE ARROW}',
+            b'\\Leftarrow',
+            mode='math')
+        self.register(u'\N{LOGICAL OR}', b'\\vee', mode='math')
+        self.register(u'\N{LOGICAL AND}', b'\\wedge', mode='math')
+        self.register(u'\N{ALMOST EQUAL TO}', b'\\approx', mode='math')
+        self.register(u'\N{NOT EQUAL TO}', b'\\neq', mode='math')
+        self.register(u'\N{LESS-THAN OR EQUAL TO}', b'\\leq', mode='math')
+        self.register(u'\N{GREATER-THAN OR EQUAL TO}', b'\\geq', mode='math')
+        self.register(u'\N{MODIFIER LETTER CIRCUMFLEX ACCENT}', b'\\^{}')
+        self.register(u'\N{CARON}', b'\\v{}')
+        self.register(u'\N{BREVE}', b'\\u{}')
+        self.register(u'\N{DOT ABOVE}', b'\\.{}')
+        self.register(u'\N{RING ABOVE}', b'\\r{}')
+        self.register(u'\N{OGONEK}', b'\\k{}')
+        self.register(u'\N{SMALL TILDE}', b'\\~{}')
+        self.register(u'\N{DOUBLE ACUTE ACCENT}', b'\\H{}')
+        self.register(u'\N{LATIN SMALL LIGATURE FI}', b'fi', decode=False)
+        self.register(u'\N{LATIN SMALL LIGATURE FL}', b'fl', decode=False)
+        self.register(u'\N{LATIN SMALL LIGATURE FF}', b'ff', decode=False)
+
+        self.register(u'\N{GREEK SMALL LETTER ALPHA}', b'\\alpha', mode='math')
+        self.register(u'\N{GREEK SMALL LETTER BETA}', b'\\beta', mode='math')
+        self.register(u'\N{GREEK SMALL LETTER GAMMA}', b'\\gamma', mode='math')
+        self.register(u'\N{GREEK SMALL LETTER DELTA}', b'\\delta', mode='math')
+        self.register(
+            u'\N{GREEK SMALL LETTER EPSILON}',
+            b'\\epsilon',
+            mode='math')
+        self.register(u'\N{GREEK SMALL LETTER ZETA}', b'\\zeta', mode='math')
+        self.register(u'\N{GREEK SMALL LETTER ETA}', b'\\eta', mode='math')
+        self.register(u'\N{GREEK SMALL LETTER THETA}', b'\\theta', mode='math')
+        self.register(u'\N{GREEK SMALL LETTER IOTA}', b'\\iota', mode='math')
+        self.register(u'\N{GREEK SMALL LETTER KAPPA}', b'\\kappa', mode='math')
+        self.register(
+            u'\N{GREEK SMALL LETTER LAMDA}',
+            b'\\lambda',
+            mode='math')  # LAMDA not LAMBDA
+        self.register(u'\N{GREEK SMALL LETTER MU}', b'\\mu', mode='math')
+        self.register(u'\N{GREEK SMALL LETTER NU}', b'\\nu', mode='math')
+        self.register(u'\N{GREEK SMALL LETTER XI}', b'\\xi', mode='math')
+        self.register(
+            u'\N{GREEK SMALL LETTER OMICRON}',
+            b'\\omicron',
+            mode='math')
+        self.register(u'\N{GREEK SMALL LETTER PI}', b'\\pi', mode='math')
+        self.register(u'\N{GREEK SMALL LETTER RHO}', b'\\rho', mode='math')
+        self.register(u'\N{GREEK SMALL LETTER SIGMA}', b'\\sigma', mode='math')
+        self.register(u'\N{GREEK SMALL LETTER TAU}', b'\\tau', mode='math')
+        self.register(
+            u'\N{GREEK SMALL LETTER UPSILON}',
+            b'\\upsilon',
+            mode='math')
+        self.register(u'\N{GREEK SMALL LETTER PHI}', b'\\phi', mode='math')
+        self.register(u'\N{GREEK SMALL LETTER CHI}', b'\\chi', mode='math')
+        self.register(u'\N{GREEK SMALL LETTER PSI}', b'\\psi', mode='math')
+        self.register(u'\N{GREEK SMALL LETTER OMEGA}', b'\\omega', mode='math')
+        self.register(
+            u'\N{GREEK CAPITAL LETTER ALPHA}',
+            b'\\Alpha',
+            mode='math')
+        self.register(u'\N{GREEK CAPITAL LETTER BETA}', b'\\Beta', mode='math')
+        self.register(
+            u'\N{GREEK CAPITAL LETTER GAMMA}',
+            b'\\Gamma',
+            mode='math')
+        self.register(
+            u'\N{GREEK CAPITAL LETTER DELTA}',
+            b'\\Delta',
+            mode='math')
+        self.register(
+            u'\N{GREEK CAPITAL LETTER EPSILON}',
+            b'\\Epsilon',
+            mode='math')
+        self.register(u'\N{GREEK CAPITAL LETTER ZETA}', b'\\Zeta', mode='math')
+        self.register(u'\N{GREEK CAPITAL LETTER ETA}', b'\\Eta', mode='math')
+        self.register(
+            u'\N{GREEK CAPITAL LETTER THETA}',
+            b'\\Theta',
+            mode='math')
+        self.register(u'\N{GREEK CAPITAL LETTER IOTA}', b'\\Iota', mode='math')
+        self.register(
+            u'\N{GREEK CAPITAL LETTER KAPPA}',
+            b'\\Kappa',
+            mode='math')
+        self.register(
+            u'\N{GREEK CAPITAL LETTER LAMDA}',
+            b'\\Lambda',
+            mode='math')  # LAMDA not LAMBDA
+        self.register(u'\N{GREEK CAPITAL LETTER MU}', b'\\Mu', mode='math')
+        self.register(u'\N{GREEK CAPITAL LETTER NU}', b'\\Nu', mode='math')
+        self.register(u'\N{GREEK CAPITAL LETTER XI}', b'\\Xi', mode='math')
+        self.register(
+            u'\N{GREEK CAPITAL LETTER OMICRON}',
+            b'\\Omicron',
+            mode='math')
+        self.register(u'\N{GREEK CAPITAL LETTER PI}', b'\\Pi', mode='math')
+        self.register(u'\N{GREEK CAPITAL LETTER RHO}', b'\\Rho', mode='math')
+        self.register(
+            u'\N{GREEK CAPITAL LETTER SIGMA}',
+            b'\\Sigma',
+            mode='math')
+        self.register(u'\N{GREEK CAPITAL LETTER TAU}', b'\\Tau', mode='math')
+        self.register(
+            u'\N{GREEK CAPITAL LETTER UPSILON}',
+            b'\\Upsilon',
+            mode='math')
+        self.register(u'\N{GREEK CAPITAL LETTER PHI}', b'\\Phi', mode='math')
+        self.register(u'\N{GREEK CAPITAL LETTER CHI}', b'\\Chi', mode='math')
+        self.register(u'\N{GREEK CAPITAL LETTER PSI}', b'\\Psi', mode='math')
+        self.register(
+            u'\N{GREEK CAPITAL LETTER OMEGA}',
+            b'\\Omega',
+            mode='math')
+        self.register(u'\N{COPYRIGHT SIGN}', b'\\copyright')
+        self.register(u'\N{COPYRIGHT SIGN}', b'\\textcopyright')
+        self.register(u'\N{LATIN CAPITAL LETTER A WITH ACUTE}', b"\\'A")
+        self.register(u'\N{LATIN CAPITAL LETTER I WITH ACUTE}', b"\\'I")
+        self.register(u'\N{HORIZONTAL ELLIPSIS}', b'\\ldots')
+        self.register(u'\N{TRADE MARK SIGN}', b'^{TM}', mode='math')
+        self.register(
+            u'\N{TRADE MARK SIGN}',
+            b'\\texttrademark',
+            package='textcomp')
+        # \=O and \=o will be translated into Ō and ō before we can
+        # match the full latex string... so decoding disabled for now
+        self.register(u'Ǭ', br'\textogonekcentered{\=O}', decode=False)
+        self.register(u'ǭ', br'\textogonekcentered{\=o}', decode=False)
+
+    def register(self, unicode_text, latex_text, mode='text', package=None,
+                 decode=True, encode=True):
+        """Register a correspondence between *unicode_text* and *latex_text*.
+
+        :param str unicode_text: A unicode character.
+        :param bytes latex_text: Its corresponding LaTeX translation.
+        :param str mode: LaTeX mode in which the translation applies
+            (``'text'`` or ``'math'``).
+        :param str package: LaTeX package requirements (currently ignored).
+        :param bool decode: Whether this translation applies to decoding
+            (default: ``True``).
+        :param bool encode: Whether this translation applies to encoding
+            (default: ``True``).
+        """
+        if package is not None:
+            # TODO implement packages
+            pass
+        if mode == 'math':
+            # also register text version
+            self.register(unicode_text, b'$' + latex_text + b'$', mode='text',
+                          package=package, decode=decode, encode=encode)
+            # XXX for the time being, we do not perform in-math substitutions
+            return
+        # tokenize, and register unicode translation
+        self.lexer.reset()
+        self.lexer.state = 'M'
+        tokens = tuple(self.lexer.get_tokens(latex_text, final=True))
+        if decode:
+            if tokens not in self.unicode_map:
+                self.max_length = max(self.max_length, len(tokens))
+                self.unicode_map[tokens] = unicode_text
+            # also register token variant with brackets, if appropriate
+            # for instance, "\'{e}" for "\'e", "\c{c}" for "\c c", etc.
+            # note: we do not remove brackets (they sometimes matter,
+            # e.g. bibtex uses them to prevent lower case transformation)
+            if (len(tokens) == 2
+                and tokens[0].name.startswith('control')
+                    and tokens[1].name == 'chars'):
+                alt_tokens = (
+                    tokens[0], lexer.Token('chars', b'{'),
+                    tokens[1], lexer.Token('chars', b'}'),
+                )
+                if alt_tokens not in self.unicode_map:
+                    self.max_length = max(self.max_length, len(alt_tokens))
+                    self.unicode_map[alt_tokens] = u"{" + unicode_text + u"}"
+        if encode and unicode_text not in self.latex_map:
+            assert len(unicode_text) == 1
+            self.latex_map[unicode_text] = (latex_text, tokens)
+
+_LATEX_UNICODE_TABLE = LatexUnicodeTable(lexer.LatexIncrementalDecoder())
+
+# incremental encoder does not need a buffer
+# but decoder does
+
+
+class LatexIncrementalEncoder(lexer.LatexIncrementalEncoder):
+
+    """Translating incremental encoder for latex. Maintains a state to
+    determine whether control spaces etc. need to be inserted.
+    """
+
+    table = _LATEX_UNICODE_TABLE
+    """Translation table."""
+
+    def __init__(self, errors='strict'):
+        lexer.LatexIncrementalEncoder.__init__(self, errors=errors)
+        self.reset()
+
+    def reset(self):
+        self.state = 'M'
+
+    def get_space_bytes(self, bytes_):
+        """Inserts space bytes in space eating mode."""
+        if self.state == 'S':
+            # in space eating mode
+            # control space needed?
+            if bytes_.startswith(b' '):
+                # replace by control space
+                return b'\\ ', bytes_[1:]
+            else:
+                # insert space (it is eaten, but needed for separation)
+                return b' ', bytes_
+        else:
+            return b'', bytes_
+
+    def _get_latex_bytes_tokens_from_char(self, c):
+        # if ascii, try latex equivalents
+        # (this covers \, #, &, and other special LaTeX characters)
+        if ord(c) < 128:
+            try:
+                return self.table.latex_map[c]
+            except KeyError:
+                pass
+        # next, try input encoding
+        try:
+            bytes_ = c.encode(self.inputenc, 'strict')
+        except UnicodeEncodeError:
+            pass
+        else:
+            return bytes_, (lexer.Token(name='chars', text=bytes_),)
+        # next, try latex equivalents of common unicode characters
+        try:
+            return self.table.latex_map[c]
+        except KeyError:
+            # translation failed
+            if self.errors == 'strict':
+                raise UnicodeEncodeError(
+                    "latex",  # codec
+                    c,  # problematic input
+                    0, 1,  # location of problematic character
+                    "don't know how to translate {0} into latex"
+                    .format(repr(c)))
+            elif self.errors == 'ignore':
+                return b'', (lexer.Token(),)
+            elif self.errors == 'replace':
+                # use the \\char command
+                # this assumes
+                # \usepackage[T1]{fontenc}
+                # \usepackage[utf8]{inputenc}
+                bytes_ = b'{\\char' + str(ord(c)).encode("ascii") + b'}'
+                return bytes_, (lexer.Token(name='chars', text=bytes_),)
+            else:
+                raise ValueError(
+                    "latex codec does not support {0} errors"
+                    .format(self.errors))
+
+    def get_latex_bytes(self, unicode_, final=False):
+        if not isinstance(unicode_, string_types):
+            raise TypeError(
+                "expected unicode for encode input, but got {0} instead"
+                .format(unicode_.__class__.__name__))
+        # convert character by character
+        for pos, c in enumerate(unicode_):
+            bytes_, tokens = self._get_latex_bytes_tokens_from_char(c)
+            space, bytes_ = self.get_space_bytes(bytes_)
+            # update state
+            if tokens[-1].name == 'control_word':
+                # we're eating spaces
+                self.state = 'S'
+            else:
+                self.state = 'M'
+            if space:
+                yield space
+            yield bytes_
+
+
+class LatexIncrementalDecoder(lexer.LatexIncrementalDecoder):
+
+    """Translating incremental decoder for LaTeX."""
+
+    table = _LATEX_UNICODE_TABLE
+    """Translation table."""
+
+    def __init__(self, errors='strict'):
+        lexer.LatexIncrementalDecoder.__init__(self, errors=errors)
+
+    def reset(self):
+        lexer.LatexIncrementalDecoder.reset(self)
+        self.token_buffer = []
+
+    # python codecs API does not support multibuffer incremental decoders
+
+    def getstate(self):
+        raise NotImplementedError
+
+    def setstate(self, state):
+        raise NotImplementedError
+
+    def get_unicode_tokens(self, bytes_, final=False):
+        for token in self.get_tokens(bytes_, final=final):
+            # at this point, token_buffer does not match anything
+            self.token_buffer.append(token)
+            # new token appended at the end, see if we have a match now
+            # note: match is only possible at the *end* of the buffer
+            # because all other positions have already been checked in
+            # earlier iterations
+            for i in range(len(self.token_buffer), 0, -1):
+                last_tokens = tuple(self.token_buffer[-i:])  # last i tokens
+                try:
+                    unicode_text = self.table.unicode_map[last_tokens]
+                except KeyError:
+                    # no match: continue
+                    continue
+                else:
+                    # match!! flush buffer, and translate last bit
+                    # exclude last i tokens
+                    for token in self.token_buffer[:-i]:
+                        yield token.decode(self.inputenc)
+                    yield unicode_text
+                    self.token_buffer = []
+                    break
+            # flush tokens that can no longer match
+            while len(self.token_buffer) >= self.table.max_length:
+                yield self.token_buffer.pop(0).decode(self.inputenc)
+        # also flush the buffer at the end
+        if final:
+            for token in self.token_buffer:
+                yield token.decode(self.inputenc)
+            self.token_buffer = []
+
+
+class LatexCodec(codecs.Codec):
+    IncrementalEncoder = None
+    IncrementalDecoder = None
+
+    def encode(self, unicode_, errors='strict'):
+        """Convert unicode string to LaTeX bytes."""
+        encoder = self.IncrementalEncoder(errors=errors)
+        return (
+            encoder.encode(unicode_, final=True),
+            len(unicode_),
+        )
+
+    def decode(self, bytes_, errors='strict'):
+        """Convert LaTeX bytes to unicode string."""
+        decoder = self.IncrementalDecoder(errors=errors)
+        return (
+            decoder.decode(bytes_, final=True),
+            len(bytes_),
+        )
+
+
+def find_latex(encoding):
+    """Return a :class:`codecs.CodecInfo` instance for the requested
+    LaTeX *encoding*, which must be equal to ``latex``,
+    or to ``latex+<encoding>``
+    where ``<encoding>`` describes another encoding.
+    """
+    # check if requested codec info is for latex encoding
+    if not encoding.startswith('latex'):
+        return None
+    # set up all classes with correct latex input encoding
+    inputenc_ = encoding[6:] if encoding.startswith('latex+') else 'ascii'
+
+    class IncrementalEncoder_(LatexIncrementalEncoder):
+        inputenc = inputenc_
+
+    class IncrementalDecoder_(LatexIncrementalDecoder):
+        inputenc = inputenc_
+
+    class Codec(LatexCodec):
+        IncrementalEncoder = IncrementalEncoder_
+        IncrementalDecoder = IncrementalDecoder_
+
+    class StreamWriter(Codec, codecs.StreamWriter):
+        pass
+
+    class StreamReader(Codec, codecs.StreamReader):
+        pass
+
+    return codecs.CodecInfo(
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder_,
+        incrementaldecoder=IncrementalDecoder_,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
diff --git a/latexcodec/lexer.py b/latexcodec/lexer.py
new file mode 100644
index 0000000..031f3e6
--- /dev/null
+++ b/latexcodec/lexer.py
@@ -0,0 +1,420 @@
+# -*- coding: utf-8 -*-
+"""
+    LaTeX Lexer
+    ~~~~~~~~~~~
+
+    This module contains all classes for lexing LaTeX code, as well as
+    general purpose base classes for incremental LaTeX decoders and
+    encoders, which could be useful in case you are writing your own
+    custom LaTeX codec.
+
+    .. autoclass:: Token(name, text)
+       :members: decode, __len__, __nonzero__
+
+    .. autoclass:: LatexLexer
+       :show-inheritance:
+       :members:
+
+    .. autoclass:: LatexIncrementalLexer
+       :show-inheritance:
+       :members:
+
+    .. autoclass:: LatexIncrementalDecoder
+       :show-inheritance:
+       :members:
+
+    .. autoclass:: LatexIncrementalEncoder
+       :show-inheritance:
+       :members:
+"""
+
+# Copyright (c) 2003, 2008 David Eppstein
+# Copyright (c) 2011-2014 Matthias C. M. Troffaes
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import codecs
+import collections
+import re
+from six import string_types
+
+
+class Token(collections.namedtuple("Token", "name text")):
+
+    """A :func:`collections.namedtuple` storing information about a
+    matched token.
+
+    .. seealso:: :attr:`LatexLexer.tokens`
+
+    .. attribute:: name
+
+       The name of the token as a :class:`str`.
+
+    .. attribute:: text
+
+       The matched token text as :class:`bytes`.
+       The constructor also accepts text as :class:`memoryview`,
+       in which case it is automatically converted to :class:`bytes`.
+       This ensures that the token is hashable.
+    """
+
+    __slots__ = ()  # efficiency
+
+    def __new__(cls, name=None, text=None):
+        # text can be memoryview; convert to bytes so Token remains hashable
+        return tuple.__new__(
+            cls,
+            (name if name is not None else 'unknown',
+             bytes(text) if text is not None else b''))
+
+    def __nonzero__(self):
+        """Whether the token contains any text."""
+        return bool(self.text)
+
+    def __len__(self):
+        """Length of the token text."""
+        return len(self.text)
+
+    def decode(self, encoding):
+        """Returns the decoded token text in the specified *encoding*.
+
+        .. note::
+
+           Control words get an extra space added at the back to make
+           sure separation from the next token, so that decoded token
+           sequences can be :meth:`str.join`\ ed together.
+
+           For example, the tokens ``b'\\hello'`` and ``b'world'``
+           will correctly result in ``u'\\hello world'`` (remember
+           that LaTeX eats space following control words). If no space
+           were added, this would wrongfully result in
+           ``u'\\helloworld'``.
+
+        """
+        if self.name == 'control_word':
+            return self.text.decode(encoding) + u' '
+        else:
+            return self.text.decode(encoding)
+
+# implementation note: we derive from IncrementalDecoder because this
+# class serves excellently as a base class for incremental decoders,
+# but of course we don't decode yet until later
+
+
+class LatexLexer(codecs.IncrementalDecoder):
+
+    """A very simple lexer for tex/latex code."""
+
+    # implementation note: every token **must** be decodable by inputenc
+    tokens = [
+        # comment: for ease, and for speed, we handle it as a token
+        ('comment', br'%.*?\n'),
+        # control tokens
+        # in latex, some control tokens skip following whitespace
+        # ('control-word' and 'control-symbol')
+        # others do not ('control-symbol-x')
+        # XXX TBT says no control symbols skip whitespace (except '\ ')
+        # XXX but tests reveal otherwise?
+        ('control_word', br'[\\][a-zA-Z]+'),
+        ('control_symbol', br'[\\][~' br"'" br'"` =^!]'),
+        # TODO should only match ascii
+        ('control_symbol_x', br'[\\][^a-zA-Z]'),
+        # parameter tokens
+        # also support a lone hash so we can lex things like b'#a'
+        ('parameter', br'\#[0-9]|\#'),
+        # any remaining characters; for ease we also handle space and
+        # newline as tokens
+        ('space', br' '),
+        ('newline', br'\n'),
+        ('mathshift', br'[$]'),
+        # note: some chars joined together to make it easier to detect
+        # symbols that have a special function (i.e. --, ---, etc.)
+        ('chars',
+         br'---|--|-|[`][`]'
+         br"|['][']"
+         br'|[?][`]|[!][`]'
+         # separate chars because brackets are optional
+         # e.g. fran\\c cais = fran\\c{c}ais in latex
+         # so only way to detect \\c acting on c only is this way
+         br'|[0-9a-zA-Z{}]'
+         # we have to join everything else together to support
+         # multibyte encodings: every token must be decodable!!
+         # this means for instance that \\c öké is NOT equivalent to
+         # \\c{ö}ké
+         br'|[^ %#$\n\\]+'),
+        # trailing garbage which we cannot decode otherwise
+        # (such as a lone '\' at the end of a buffer)
+        # is never emitted, but used internally by the buffer
+        ('unknown', br'.'),
+    ]
+    """List of token names, and the regular expressions they match."""
+
+    def __init__(self, errors='strict'):
+        """Initialize the codec."""
+        self.errors = errors
+        # regular expression used for matching
+        self.regexp = re.compile(
+            b"|".join(
+                b"(?P<" + name.encode() + b">" + regexp + b")"
+                for name, regexp in self.tokens),
+            re.DOTALL)
+        # reset state
+        self.reset()
+
+    def reset(self):
+        """Reset state."""
+        # buffer for storing last (possibly incomplete) token
+        self.raw_buffer = Token()
+
+    def getstate(self):
+        """Get state."""
+        return (self.raw_buffer.text, 0)
+
+    def setstate(self, state):
+        """Set state. The *state* must correspond to the return value
+        of a previous :meth:`getstate` call.
+        """
+        self.raw_buffer = Token('unknown', state[0])
+
+    def get_raw_tokens(self, bytes_, final=False):
+        """Yield tokens without any further processing. Tokens are one of:
+
+        - ``\\<word>``: a control word (i.e. a command)
+        - ``\\<symbol>``: a control symbol (i.e. \\^ etc.)
+        - ``#<n>``: a parameter
+        - a series of byte characters
+        """
+        if self.raw_buffer:
+            bytes_ = self.raw_buffer.text + bytes_
+        self.raw_buffer = Token()
+        for match in self.regexp.finditer(bytes_):
+            for name, regexp in self.tokens:
+                text = match.group(name)
+                if text is not None:
+                    # yield the buffer token(s)
+                    for token in self.flush_raw_tokens():
+                        yield token
+                    # fill buffer with next token
+                    self.raw_buffer = Token(name, text)
+                    break
+        if final:
+            for token in self.flush_raw_tokens():
+                yield token
+
+    def flush_raw_tokens(self):
+        """Flush the raw token buffer."""
+        if self.raw_buffer:
+            yield self.raw_buffer
+            self.raw_buffer = Token()
+
+
+class LatexIncrementalLexer(LatexLexer):
+
+    """A very simple incremental lexer for tex/latex code. Roughly
+    follows the state machine described in Tex By Topic, Chapter 2.
+
+    The generated tokens satisfy:
+
+    * no newline characters: paragraphs are separated by '\\par'
+    * spaces following control tokens are compressed
+    """
+
+    def reset(self):
+        LatexLexer.reset(self)
+        # three possible states:
+        # newline (N), skipping spaces (S), and middle of line (M)
+        self.state = 'N'
+        # inline math mode?
+        self.inline_math = False
+
+    def getstate(self):
+        # state 'M' is most common, so let that be zero
+        return (
+            self.raw_buffer,
+            {'M': 0, 'N': 1, 'S': 2}[self.state]
+            | (4 if self.inline_math else 0)
+        )
+
+    def setstate(self, state):
+        self.raw_buffer = state[0]
+        self.state = {0: 'M', 1: 'N', 2: 'S'}[state[1] & 3]
+        self.inline_math = bool(state[1] & 4)
+
+    def get_tokens(self, bytes_, final=False):
+        """Yield tokens while maintaining a state. Also skip
+        whitespace after control words and (some) control symbols.
+        Replaces newlines by spaces and \\par commands depending on
+        the context.
+        """
+        # current position relative to the start of bytes_ in the sequence
+        # of bytes that have been decoded
+        pos = -len(self.raw_buffer)
+        for token in self.get_raw_tokens(bytes_, final=final):
+            pos = pos + len(token)
+            assert pos >= 0  # first token includes at least self.raw_buffer
+            if token.name == 'newline':
+                if self.state == 'N':
+                    # if state was 'N', generate new paragraph
+                    yield Token('control_word', b'\\par')
+                elif self.state == 'S':
+                    # switch to 'N' state, do not generate a space
+                    self.state = 'N'
+                elif self.state == 'M':
+                    # switch to 'N' state, generate a space
+                    self.state = 'N'
+                    yield Token('space', b' ')
+                else:
+                    raise AssertionError(
+                        "unknown tex state {0!r}".format(self.state))
+            elif token.name == 'space':
+                if self.state == 'N':
+                    # remain in 'N' state, no space token generated
+                    pass
+                elif self.state == 'S':
+                    # remain in 'S' state, no space token generated
+                    pass
+                elif self.state == 'M':
+                    # in M mode, generate the space,
+                    # but switch to space skip mode
+                    self.state = 'S'
+                    yield token
+                else:
+                    raise AssertionError(
+                        "unknown state {0!r}".format(self.state))
+            elif token.name == 'mathshift':
+                self.inline_math = not self.inline_math
+                yield token
+            elif token.name == 'parameter':
+                self.state = 'M'
+                yield token
+            elif token.name == 'control_word':
+                # go to space skip mode
+                self.state = 'S'
+                yield token
+            elif token.name == 'control_symbol':
+                # go to space skip mode
+                self.state = 'S'
+                yield token
+            elif token.name == 'control_symbol_x':
+                # don't skip following space, so go to M mode
+                self.state = 'M'
+                yield token
+            elif token.name == 'comment':
+                # go to newline mode, no token is generated
+                # note: comment includes the newline
+                self.state = 'N'
+            elif token.name == 'chars':
+                self.state = 'M'
+                yield token
+            elif token.name == 'unknown':
+                if self.errors == 'strict':
+                    # current position within bytes_
+                    # this is the position right after the unknown token
+                    raise UnicodeDecodeError(
+                        "latex",  # codec
+                        bytes_,  # problematic input
+                        pos - len(token),  # start of problematic token
+                        pos,  # end of it
+                        "unknown token {0!r}".format(token.text))
+                elif self.errors == 'ignore':
+                    # do nothing
+                    pass
+                elif self.errors == 'replace':
+                    yield Token('chars', b'?' * len(token))
+                else:
+                    raise NotImplementedError(
+                        "error mode {0!r} not supported".format(self.errors))
+            else:
+                raise AssertionError(
+                    "unknown token name {0!r}".format(token.name))
+
+
+class LatexIncrementalDecoder(LatexIncrementalLexer):
+
+    """Simple incremental decoder. Transforms lexed LaTeX tokens into
+    unicode.
+
+    To customize decoding, subclass and override
+    :meth:`get_unicode_tokens`.
+    """
+
+    inputenc = "ascii"
+    """Input encoding. **Must** extend ascii."""
+
+    def get_unicode_tokens(self, bytes_, final=False):
+        """Decode every token in :attr:`inputenc` encoding. Override to
+        process the tokens in some other way (for example, for token
+        translation).
+        """
+        for token in self.get_tokens(bytes_, final=final):
+            yield token.decode(self.inputenc)
+
+    def decode(self, bytes_, final=False):
+        """Decode LaTeX *bytes_* into a unicode string.
+
+        This implementation calls :meth:`get_unicode_tokens` and joins
+        the resulting unicode strings together.
+        """
+        try:
+            return u''.join(self.get_unicode_tokens(bytes_, final=final))
+        except UnicodeDecodeError as e:
+            # API requires that the encode method raises a ValueError
+            # in this case
+            raise ValueError(e)
+
+
+class LatexIncrementalEncoder(codecs.IncrementalEncoder):
+
+    """Simple incremental encoder for LaTeX. Transforms unicode into
+    :class:`bytes`.
+
+    To customize decoding, subclass and override
+    :meth:`get_latex_bytes`.
+    """
+
+    inputenc = "ascii"
+    """Input encoding. **Must** extend ascii."""
+
+    def get_latex_bytes(self, unicode_, final=False):
+        """Encode every character in :attr:`inputenc` encoding. Override to
+        process the unicode in some other way (for example, for character
+        translation).
+        """
+        if not isinstance(unicode_, string_types):
+            raise TypeError(
+                "expected unicode for encode input, but got {0} instead"
+                .format(unicode_.__class__.__name__))
+        for c in unicode_:
+            yield c.encode(self.inputenc, self.errors)
+
+    def encode(self, unicode_, final=False):
+        """Encode the *unicode_* string into LaTeX :class:`bytes`.
+
+        This implementation calls :meth:`get_latex_bytes` and joins
+        the resulting :class:`bytes` together.
+        """
+        try:
+            return b''.join(self.get_latex_bytes(unicode_, final=final))
+        except UnicodeEncodeError as e:
+            # API requires that the encode method raises a ValueError
+            # in this case
+            raise ValueError(e)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..3fc41aa
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+six>=1.4.1
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..85ffac4
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,8 @@
+[nosetests]
+with-coverage=1
+cover-package=latexcodec
+cover-branches=1
+cover-html=1
+
+[wheel]
+universal = 1
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..f003c0c
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+
+import io
+from setuptools import setup, find_packages
+
+
+def readfile(filename):
+    with io.open(filename, encoding="utf-8") as stream:
+        return stream.read().split("\n")
+
+readme = readfile("README.rst")[5:]  # skip title and badges
+requires = readfile("requirements.txt")
+version = readfile("VERSION")[0].strip()
+
+setup(
+    name='latexcodec',
+    version=version,
+    url='https://github.com/mcmtroffaes/latexcodec',
+    download_url='http://pypi.python.org/pypi/latexcodec',
+    license='MIT',
+    author='Matthias C. M. Troffaes',
+    author_email='matthias.troffaes at gmail.com',
+    description=readme[0],
+    long_description="\n".join(readme[2:]),
+    zip_safe=True,
+    classifiers=[
+        'Development Status :: 5 - Production/Stable',
+        'Environment :: Console',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: MIT License',
+        'Operating System :: OS Independent',
+        'Programming Language :: Python',
+        'Programming Language :: Python :: 2',
+        'Programming Language :: Python :: 2.6',
+        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.3',
+        'Programming Language :: Python :: 3.4',
+        'Topic :: Text Processing :: Markup :: LaTeX',
+        'Topic :: Text Processing :: Filters',
+    ],
+    platforms='any',
+    packages=find_packages(),
+    install_requires=requires,
+)
diff --git a/test/test_install_example.py b/test/test_install_example.py
new file mode 100644
index 0000000..b732d4b
--- /dev/null
+++ b/test/test_install_example.py
@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+
+
+def test_install_example_1():
+    import latexcodec  # noqa
+    text_latex = br"\'el\`eve"
+    assert text_latex.decode("latex") == u"élève"
+    text_unicode = u"ångström"
+    assert text_unicode.encode("latex") == br'\aa ngstr\"om'
+
+
+def test_install_example_2():
+    import latexcodec  # noqa
+    text_latex = b"\xfe"
+    assert text_latex.decode("latex+latin1") == u"þ"
+    assert text_latex.decode("latex+latin2") == u"ţ"
+    text_unicode = u"ţ"
+    assert text_unicode.encode("latex+latin1") == b'\\c t'  # ţ is not latin1
+    assert text_unicode.encode("latex+latin2") == b'\xfe'   # but it is latin2
diff --git a/test/test_latex_codec.py b/test/test_latex_codec.py
new file mode 100644
index 0000000..d1a843e
--- /dev/null
+++ b/test/test_latex_codec.py
@@ -0,0 +1,362 @@
+# -*- coding: utf-8 -*-
+"""Tests for the latex codec."""
+
+from __future__ import print_function
+
+import codecs
+import nose.tools
+from six import text_type, binary_type, BytesIO, PY2
+from unittest import TestCase
+
+import latexcodec
+
+
+def test_getregentry():
+    assert latexcodec.codec.getregentry() is not None
+
+
+def test_find_latex():
+    assert latexcodec.codec.find_latex('hello') is None
+
+
+def test_latex_incremental_decoder_getstate():
+    encoder = codecs.getincrementaldecoder('latex')()
+    nose.tools.assert_raises(NotImplementedError, lambda: encoder.getstate())
+
+
+def test_latex_incremental_decoder_setstate():
+    encoder = codecs.getincrementaldecoder('latex')()
+    state = (u'', 0)
+    nose.tools.assert_raises(
+        NotImplementedError,
+        lambda: encoder.setstate(state))
+
+
+def split_input(input_):
+    """Helper function for testing the incremental encoder and decoder."""
+    if not isinstance(input_, (text_type, binary_type)):
+        raise TypeError("expected unicode or bytes input")
+    if input_:
+        for i in range(len(input_)):
+            if i + 1 < len(input_):
+                yield input_[i:i + 1], False
+            else:
+                yield input_[i:i + 1], True
+    else:
+        yield input_, True
+
+
+class TestDecoder(TestCase):
+
+    """Stateless decoder tests."""
+    maxDiff = None
+
+    def decode(self, text_utf8, text_latex, inputenc=None):
+        """Main test function."""
+        encoding = 'latex+' + inputenc if inputenc else 'latex'
+        decoded, n = codecs.getdecoder(encoding)(text_latex)
+        self.assertEqual((decoded, n), (text_utf8, len(text_latex)))
+
+    @nose.tools.raises(TypeError)
+    def test_invalid_type(self):
+        self.decode(object(), object())
+
+    @nose.tools.raises(ValueError)
+    def test_invalid_code(self):
+        # b'\xe9' is invalid utf-8 code
+        self.decode(u'', b'\xe9  ', 'utf-8')
+
+    def test_null(self):
+        self.decode(u'', b'')
+
+    def test_maelstrom(self):
+        self.decode(u"mælström", br'm\ae lstr\"om')
+
+    def test_maelstrom_latin1(self):
+        self.decode(u"mælström", b'm\\ae lstr\xf6m', 'latin1')
+
+    def test_laren(self):
+        self.decode(
+            u"© låren av björn",
+            br'\copyright\ l\aa ren av bj\"orn')
+
+    def test_laren_brackets(self):
+        self.decode(
+            u"© l{å}ren av bj{ö}rn",
+            br'\copyright\ l{\aa}ren av bj{\"o}rn')
+
+    def test_laren_latin1(self):
+        self.decode(
+            u"© låren av björn",
+            b'\\copyright\\ l\xe5ren av bj\xf6rn',
+            'latin1')
+
+    def test_droitcivil(self):
+        self.decode(
+            u"Même s'il a fait l'objet d'adaptations suite à l'évolution, "
+            u"la transformation sociale, économique et politique du pays, "
+            u"le code civil fran{ç}ais est aujourd'hui encore le texte "
+            u"fondateur "
+            u"du droit civil français mais aussi du droit civil belge "
+            u"ainsi que "
+            u"de plusieurs autres droits civils.",
+            b"M\\^eme s'il a fait l'objet d'adaptations suite "
+            b"\\`a l'\\'evolution, \nla transformation sociale, "
+            b"\\'economique et politique du pays, \nle code civil "
+            b"fran\\c{c}ais est aujourd'hui encore le texte fondateur \n"
+            b"du droit civil fran\\c cais mais aussi du droit civil "
+            b"belge ainsi que \nde plusieurs autres droits civils.",
+        )
+
+    def test_oeuf(self):
+        self.decode(
+            u"D'un point de vue diététique, l'œuf apaise la faim.",
+            br"D'un point de vue di\'et\'etique, l'\oe uf apaise la faim.",
+        )
+
+    def test_oeuf_latin1(self):
+        self.decode(
+            u"D'un point de vue diététique, l'œuf apaise la faim.",
+            b"D'un point de vue di\xe9t\xe9tique, l'\\oe uf apaise la faim.",
+            'latin1'
+        )
+
+    def test_alpha(self):
+        self.decode(u"α", b"$\\alpha$")
+
+    def test_maelstrom_multibyte_encoding(self):
+        self.decode(u"\\c öké", b'\\c \xc3\xb6k\xc3\xa9', 'utf8')
+
+    def test_serafin(self):
+        self.decode(u"Seraf{\xed}n", b"Seraf{\\'i}n")
+
+    def test_astrom(self):
+        self.decode(u"{\xc5}str{\xf6}m", b'{\\AA}str{\\"o}m')
+
+    def test_space_1(self):
+        self.decode(u"ææ", br'\ae \ae')
+
+    def test_space_2(self):
+        self.decode(u"æ æ", br'\ae\ \ae')
+
+    def test_number_sign_1(self):
+        self.decode(u"# hello", br'\#\ hello')
+
+    def test_number_sign_2(self):
+        # LaTeX does not absorb the space following '\#':
+        # check decoding is correct
+        self.decode(u"# hello", br'\# hello')
+
+    def test_number_sign_3(self):
+        # a single '#' is not valid LaTeX:
+        # for the moment we ignore this error and return # unchanged
+        self.decode(u"# hello", br'# hello')
+
+    def test_underscore(self):
+        self.decode(u"_", br'\_')
+
+    def test_dz(self):
+        self.decode(u"DZ", br'DZ')
+
+    def test_newline(self):
+        self.decode(u"hello world", b"hello\nworld")
+
+    def test_par1(self):
+        self.decode(u"hello\n\nworld", b"hello\n\nworld")
+
+    def test_par2(self):
+        self.decode(u"hello\n\nworld", b"hello\\par world")
+
+    def test_par3(self):
+        self.decode(u"hello\n\nworld", b"hello \\par world")
+
+    def test_ogonek1(self):
+        self.decode(u"ĄąĘęĮįǪǫŲų",
+                    br'\k A\k a\k E\k e\k I\k i\k O\k o\k U\k u')
+
+    def test_ogonek2(self):
+        # note: should decode into u"Ǭǭ" but can't support this yet...
+        self.decode(u"\\textogonekcentered {Ō}\\textogonekcentered {ō}",
+                    br'\textogonekcentered{\=O}\textogonekcentered{\=o}')
+
+
+class TestStreamDecoder(TestDecoder):
+
+    """Stream decoder tests."""
+
+    def decode(self, text_utf8, text_latex, inputenc=None):
+        encoding = 'latex+' + inputenc if inputenc else 'latex'
+        stream = BytesIO(text_latex)
+        reader = codecs.getreader(encoding)(stream)
+        self.assertEqual(text_utf8, reader.read())
+
+    # in this test, BytesIO(object()) is eventually called
+    # this is valid on Python 2, so we skip this test there
+    def test_invalid_type(self):
+        if PY2:
+            raise nose.plugins.skip.SkipTest
+        else:
+            TestDecoder.test_invalid_type(self)
+
+
+class TestIncrementalDecoder(TestDecoder):
+
+    """Incremental decoder tests."""
+
+    def decode(self, text_utf8, text_latex, inputenc=None):
+        encoding = 'latex+' + inputenc if inputenc else 'latex'
+        decoder = codecs.getincrementaldecoder(encoding)()
+        decoded_parts = (
+            decoder.decode(text_latex_part, final)
+            for text_latex_part, final in split_input(text_latex))
+        self.assertEqual(text_utf8, u''.join(decoded_parts))
+
+
+class TestEncoder(TestCase):
+
+    """Stateless encoder tests."""
+
+    def encode(self, text_utf8, text_latex, inputenc=None, errors='strict'):
+        """Main test function."""
+        encoding = 'latex+' + inputenc if inputenc else 'latex'
+        encoded, n = codecs.getencoder(encoding)(text_utf8, errors=errors)
+        self.assertEqual((encoded, n), (text_latex, len(text_utf8)))
+
+    @nose.tools.raises(TypeError)
+    def test_invalid_type(self):
+        self.encode(object(), object())
+
+    # note concerning test_invalid_code_* methods:
+    # u'\u2328' (0x2328 = 9000) is unicode for keyboard symbol
+    # we currently provide no translation for this into LaTeX code
+
+    @nose.tools.raises(ValueError)
+    def test_invalid_code_strict(self):
+        self.encode(u'\u2328', b'', 'ascii', 'strict')
+
+    def test_invalid_code_ignore(self):
+        self.encode(u'\u2328', b'', 'ascii', 'ignore')
+
+    def test_invalid_code_replace(self):
+        self.encode(u'\u2328', b'{\\char9000}', 'ascii', 'replace')
+
+    @nose.tools.raises(ValueError)
+    def test_invalid_code_baderror(self):
+        self.encode(u'\u2328', b'', 'ascii', '**baderror**')
+
+    def test_null(self):
+        self.encode(u'', b'')
+
+    def test_maelstrom(self):
+        self.encode(u"mælström", br'm\ae lstr\"om')
+
+    def test_maelstrom_latin1(self):
+        self.encode(u"mælström", b'm\xe6lstr\xf6m', 'latin1')
+
+    def test_laren(self):
+        self.encode(
+            u"© låren av björn",
+            br'\copyright\ l\aa ren av bj\"orn')
+
+    def test_laren_latin1(self):
+        self.encode(
+            u"© låren av björn",
+            b'\xa9 l\xe5ren av bj\xf6rn',
+            'latin1')
+
+    def test_droitcivil(self):
+        self.encode(
+            u"Même s'il a fait l'objet d'adaptations suite à l'évolution, \n"
+            u"la transformation sociale, économique et politique du pays, \n"
+            u"le code civil fran{ç}ais est aujourd'hui encore le texte "
+            u"fondateur \n"
+            u"du droit civil français mais aussi du droit civil belge "
+            u"ainsi que \n"
+            u"de plusieurs autres droits civils.",
+            b"M\\^eme s'il a fait l'objet d'adaptations suite "
+            b"\\`a l'\\'evolution, \nla transformation sociale, "
+            b"\\'economique et politique du pays, \nle code civil "
+            b"fran{\\c c}ais est aujourd'hui encore le texte fondateur \n"
+            b"du droit civil fran\\c cais mais aussi du droit civil "
+            b"belge ainsi que \nde plusieurs autres droits civils.",
+        )
+
+    def test_oeuf(self):
+        self.encode(
+            u"D'un point de vue diététique, l'œuf apaise la faim.",
+            br"D'un point de vue di\'et\'etique, l'\oe uf apaise la faim.",
+        )
+
+    def test_oeuf_latin1(self):
+        self.encode(
+            u"D'un point de vue diététique, l'œuf apaise la faim.",
+            b"D'un point de vue di\xe9t\xe9tique, l'\\oe uf apaise la faim.",
+            'latin1'
+        )
+
+    def test_alpha(self):
+        self.encode(u"α", b"$\\alpha$")
+
+    def test_serafin(self):
+        self.encode(u"Seraf{\xed}n", b"Seraf{\\'\\i }n")
+
+    def test_space_1(self):
+        self.encode(u"ææ", br'\ae \ae')
+
+    def test_space_2(self):
+        self.encode(u"æ æ", br'\ae\ \ae')
+
+    def test_number_sign(self):
+        # note: no need for control space after \#
+        self.encode(u"# hello", br'\# hello')
+
+    def test_underscore(self):
+        self.encode(u"_", br'\_')
+
+    def test_dz1(self):
+        self.encode(u"DZ", br'DZ')
+
+    def test_dz2(self):
+        self.encode(u"Ǳ", br'DZ')
+
+    def test_newline(self):
+        self.encode(u"hello\nworld", b"hello\nworld")
+
+    def test_par1(self):
+        self.encode(u"hello\n\nworld", b"hello\n\nworld")
+
+    def test_par2(self):
+        self.encode(u"hello\\par world", b"hello\\par world")
+
+    def test_ogonek1(self):
+        self.encode(u"ĄąĘęĮįǪǫŲų",
+                    br'\k A\k a\k E\k e\k I\k i\k O\k o\k U\k u')
+
+    def test_ogonek2(self):
+        self.encode(u"Ǭǭ",
+                    br'\textogonekcentered{\=O}\textogonekcentered{\=o}')
+
+
+class TestStreamEncoder(TestEncoder):
+
+    """Stream encoder tests."""
+
+    def encode(self, text_utf8, text_latex, inputenc=None, errors='strict'):
+        encoding = 'latex+' + inputenc if inputenc else 'latex'
+        stream = BytesIO()
+        writer = codecs.getwriter(encoding)(stream, errors=errors)
+        writer.write(text_utf8)
+        self.assertEqual(text_latex, stream.getvalue())
+
+
+class TestIncrementalEncoder(TestEncoder):
+
+    """Incremental encoder tests."""
+
+    def encode(self, text_utf8, text_latex, inputenc=None, errors='strict'):
+        encoding = 'latex+' + inputenc if inputenc else 'latex'
+        encoder = codecs.getincrementalencoder(encoding)(errors=errors)
+        encoded_parts = (
+            encoder.encode(text_utf8_part, final)
+            for text_utf8_part, final in split_input(text_utf8))
+        self.assertEqual(text_latex, b''.join(encoded_parts))
diff --git a/test/test_latex_lexer.py b/test/test_latex_lexer.py
new file mode 100644
index 0000000..924171c
--- /dev/null
+++ b/test/test_latex_lexer.py
@@ -0,0 +1,442 @@
+"""Tests for the tex lexer."""
+
+import nose.tools
+from unittest import TestCase
+
+from latexcodec.lexer import (
+    LatexLexer, LatexIncrementalLexer, LatexIncrementalDecoder,
+    LatexIncrementalEncoder, Token)
+
+
+def test_token_create():
+    t = Token()
+    nose.tools.assert_equal(t.name, 'unknown')
+    nose.tools.assert_equal(t.text, b'')
+
+
+def test_token_create_with_args():
+    t = Token('hello', b'world')
+    nose.tools.assert_equal(t.name, 'hello')
+    nose.tools.assert_equal(t.text, b'world')
+
+
+ at nose.tools.raises(AttributeError)
+def test_token_assign_name():
+    t = Token()
+    t.name = 'test'
+
+
+ at nose.tools.raises(AttributeError)
+def test_token_assign_text():
+    t = Token()
+    t.text = 'test'
+
+
+ at nose.tools.raises(AttributeError)
+def test_token_assign_other():
+    t = Token()
+    t.blabla = 'test'
+
+
+class BaseLatexLexerTest(TestCase):
+
+    errors = 'strict'
+
+    def setUp(self):
+        self.lexer = LatexLexer(errors=self.errors)
+
+    def lex_it(self, latex_code, latex_tokens, final=False):
+        tokens = self.lexer.get_raw_tokens(latex_code, final=final)
+        self.assertEqual(
+            list(token.text for token in tokens),
+            latex_tokens)
+
+    def tearDown(self):
+        del self.lexer
+
+
+class LatexLexerTest(BaseLatexLexerTest):
+
+    def test_null(self):
+        self.lex_it(b'', [], final=True)
+
+    def test_hello(self):
+        self.lex_it(
+            b'hello!  [#1] This \\is\\   \\^ a \ntest.\n'
+            b'    \nHey.\n\n\# x \#x',
+            br'h|e|l|l|o|!| | |[|#1|]| |T|h|i|s| |\is|\ | | |\^| |a| '
+            b'|\n|t|e|s|t|.|\n| | | | |\n|H|e|y|.|\n|\n'
+            br'|\#| |x| |\#|x'.split(b'|'),
+            final=True
+        )
+
+    def test_comment(self):
+        self.lex_it(
+            b'test% some comment\ntest',
+            b't|e|s|t|% some comment\n|t|e|s|t'.split(b'|'),
+            final=True
+        )
+
+    def test_comment_newline(self):
+        self.lex_it(
+            b'test% some comment\n\ntest',
+            b't|e|s|t|% some comment\n|\n|t|e|s|t'.split(b'|'),
+            final=True
+        )
+
+    def test_control(self):
+        self.lex_it(
+            b'\\hello\\world',
+            b'\\hello|\\world'.split(b'|'),
+            final=True
+        )
+
+    def test_control_whitespace(self):
+        self.lex_it(
+            b'\\hello   \\world   ',
+            b'\\hello| | | |\\world| | | '.split(b'|'),
+            final=True
+        )
+
+    def test_controlx(self):
+        self.lex_it(
+            b'\\#\\&',
+            b'\\#|\\&'.split(b'|'),
+            final=True
+        )
+
+    def test_controlx_whitespace(self):
+        self.lex_it(
+            b'\\#    \\&   ',
+            b'\\#| | | | |\\&| | | '.split(b'|'),
+            final=True
+        )
+
+    def test_buffer(self):
+        self.lex_it(
+            b'hi\\t',
+            b'h|i'.split(b'|'),
+        )
+        self.lex_it(
+            b'here',
+            [b'\\there'],
+            final=True,
+        )
+
+    def test_state(self):
+        self.lex_it(
+            b'hi\\t',
+            b'h|i'.split(b'|'),
+        )
+        state = self.lexer.getstate()
+        self.lexer.reset()
+        self.lex_it(
+            b'here',
+            b'h|e|r|e'.split(b'|'),
+            final=True,
+        )
+        self.lexer.setstate(state)
+        self.lex_it(
+            b'here',
+            [b'\\there'],
+            final=True,
+        )
+
+    @nose.tools.raises(NotImplementedError)
+    def test_decode(self):
+            self.lexer.decode(b'')
+
+    def test_final_backslash(self):
+        self.lex_it(
+            b'notsogood\\',
+            b'n|o|t|s|o|g|o|o|d|\\'.split(b'|'),
+            final=True
+        )
+
+    def test_final_comment(self):
+        self.lex_it(
+            b'hello%',
+            b'h|e|l|l|o|%'.split(b'|'),
+            final=True
+        )
+
+    def test_hash(self):
+        self.lex_it(b'#', [b'#'], final=True)
+
+
+class BaseTexLexerTest(TestCase):
+
+    """Tex lexer fixture."""
+
+    errors = 'strict'
+
+    def setUp(self):
+        self.lexer = LatexIncrementalDecoder(self.errors)
+
+    def lex_it(self, latex_code, latex_tokens, final=False):
+        tokens = self.lexer.get_tokens(latex_code, final=final)
+        self.assertEqual(
+            list(token.text for token in tokens),
+            latex_tokens)
+
+    def tearDown(self):
+        del self.lexer
+
+
+class TexLexerTest(BaseTexLexerTest):
+
+    def test_null(self):
+        self.lex_it(b'', [], final=True)
+
+    def test_hello(self):
+        self.lex_it(
+            b'hello!  [#1] This \\is\\   \\^ a \ntest.\n'
+            b'    \nHey.\n\n\# x \#x',
+            br'h|e|l|l|o|!| |[|#1|]| |T|h|i|s| |\is|\ |\^|a| '
+            br'|t|e|s|t|.| |\par|H|e|y|.| '
+            br'|\par|\#| |x| |\#|x'.split(b'|'),
+            final=True
+        )
+
+    def test_comment(self):
+        self.lex_it(
+            b'test% some comment\ntest',
+            b't|e|s|t|t|e|s|t'.split(b'|'),
+            final=True
+        )
+
+    def test_comment_newline(self):
+        self.lex_it(
+            b'test% some comment\n\ntest',
+            b't|e|s|t|\\par|t|e|s|t'.split(b'|'),
+            final=True
+        )
+
+    def test_control(self):
+        self.lex_it(
+            b'\\hello\\world',
+            b'\\hello|\\world'.split(b'|'),
+            final=True
+        )
+
+    def test_control_whitespace(self):
+        self.lex_it(
+            b'\\hello   \\world   ',
+            b'\\hello|\\world'.split(b'|'),
+            final=True
+        )
+
+    def test_controlx(self):
+        self.lex_it(
+            b'\\#\\&',
+            b'\\#|\\&'.split(b'|'),
+            final=True
+        )
+
+    def test_controlx_whitespace(self):
+        self.lex_it(
+            b'\\#    \\&   ',
+            b'\\#| |\\&| '.split(b'|'),
+            final=True
+        )
+
+    def test_buffer(self):
+        self.lex_it(
+            b'hi\\t',
+            b'h|i'.split(b'|'),
+        )
+        self.lex_it(
+            b'here',
+            [b'\\there'],
+            final=True,
+        )
+
+    def test_buffer_decode(self):
+        self.assertEqual(
+            self.lexer.decode(b'hello!  [#1] This \\i'),
+            u'hello! [#1] This ',
+        )
+        self.assertEqual(
+            self.lexer.decode(b's\\   \\^ a \ntest.\n'),
+            u'\\is \\ \\^a test.',
+        )
+        self.assertEqual(
+            self.lexer.decode(b'    \nHey.\n\n\# x \#x', final=True),
+            u' \\par Hey. \\par \\# x \\#x',
+        )
+
+    def test_state_middle(self):
+        self.lex_it(
+            b'hi\\t',
+            b'h|i'.split(b'|'),
+        )
+        state = self.lexer.getstate()
+        self.assertEqual(self.lexer.state, 'M')
+        self.assertEqual(self.lexer.raw_buffer.name, 'control_word')
+        self.assertEqual(self.lexer.raw_buffer.text, b'\\t')
+        self.lexer.reset()
+        self.assertEqual(self.lexer.state, 'N')
+        self.assertEqual(self.lexer.raw_buffer.name, 'unknown')
+        self.assertEqual(self.lexer.raw_buffer.text, b'')
+        self.lex_it(
+            b'here',
+            b'h|e|r|e'.split(b'|'),
+            final=True,
+        )
+        self.lexer.setstate(state)
+        self.assertEqual(self.lexer.state, 'M')
+        self.assertEqual(self.lexer.raw_buffer.name, 'control_word')
+        self.assertEqual(self.lexer.raw_buffer.text, b'\\t')
+        self.lex_it(
+            b'here',
+            [b'\\there'],
+            final=True,
+        )
+
+    def test_state_inline_math(self):
+        self.lex_it(
+            b'hi$t',
+            b'h|i|$'.split(b'|'),
+        )
+        assert self.lexer.inline_math
+        self.lex_it(
+            b'here$',
+            b't|h|e|r|e|$'.split(b'|'),
+            final=True,
+        )
+        assert not self.lexer.inline_math
+
+    # counterintuitive?
+    @nose.tools.raises(UnicodeDecodeError)
+    def test_final_backslash(self):
+        self.lex_it(
+            b'notsogood\\',
+            [b'notsogood'],
+            final=True
+        )
+
+    # counterintuitive?
+    @nose.tools.raises(UnicodeDecodeError)
+    def test_final_comment(self):
+        self.lex_it(
+            b'hello%',
+            [b'hello'],
+            final=True
+        )
+
+    def test_hash(self):
+        self.lex_it(b'#', [b'#'], final=True)
+
+
+class TexLexerReplaceTest(BaseTexLexerTest):
+
+    errors = 'replace'
+
+    def test_errors_replace(self):
+        self.lex_it(
+            b'hello%',
+            b'h|e|l|l|o|?'.split(b'|'),
+            final=True
+        )
+
+
+class TexLexerIgnoreTest(BaseTexLexerTest):
+
+    errors = 'ignore'
+
+    def test_errors_ignore(self):
+        self.lex_it(
+            b'hello%',
+            b'h|e|l|l|o'.split(b'|'),
+            final=True
+        )
+
+
+class TexLexerInvalidErrorTest(BaseTexLexerTest):
+
+    errors = '**baderror**'
+
+    @nose.tools.raises(NotImplementedError)
+    def test_errors_invalid(self):
+        self.lex_it(
+            b'hello%',
+            b'h|e|l|l|o'.split(b'|'),
+            final=True
+        )
+
+
+def invalid_token_test():
+    lexer = LatexIncrementalDecoder()
+    # piggyback an implementation which results in invalid tokens
+    lexer.get_raw_tokens = lambda bytes_, final: [Token('**invalid**', bytes_)]
+    nose.tools.assert_raises(AssertionError, lambda: lexer.decode(b'hello'))
+
+
+def invalid_state_test_1():
+    lexer = LatexIncrementalDecoder()
+    # piggyback invalid state
+    lexer.state = '**invalid**'
+    nose.tools.assert_raises(AssertionError, lambda: lexer.decode(b'\n\n\n'))
+
+
+def invalid_state_test_2():
+    lexer = LatexIncrementalDecoder()
+    # piggyback invalid state
+    lexer.state = '**invalid**'
+    nose.tools.assert_raises(AssertionError, lambda: lexer.decode(b'   '))
+
+
+class LatexIncrementalLexerTest(TestCase):
+
+    errors = 'strict'
+
+    def setUp(self):
+        self.lexer = LatexIncrementalLexer(errors=self.errors)
+
+    def lex_it(self, latex_code, latex_tokens, final=False):
+        tokens = self.lexer.get_tokens(latex_code, final=final)
+        self.assertEqual(
+            list(token.text for token in tokens),
+            latex_tokens)
+
+    def tearDown(self):
+        del self.lexer
+
+    def test_newline(self):
+        self.lex_it(
+            b"hello\nworld", b"h|e|l|l|o| |w|o|r|l|d".split(b'|'),
+            final=True)
+
+    def test_par(self):
+        self.lex_it(
+            b"hello\n\nworld", b"h|e|l|l|o| |\\par|w|o|r|l|d".split(b'|'),
+            final=True)
+
+
+class LatexIncrementalEncoderTest(TestCase):
+
+    """Encoder test fixture."""
+
+    errors = 'strict'
+
+    def setUp(self):
+        self.encoder = LatexIncrementalEncoder(self.errors)
+
+    def encode(self, latex_code, latex_bytes, final=False):
+        result = self.encoder.encode(latex_code, final=final)
+        self.assertEqual(result, latex_bytes)
+
+    def tearDown(self):
+        del self.encoder
+
+    @nose.tools.raises(TypeError)
+    def test_invalid_type(self):
+        self.encoder.encode(object())
+
+    @nose.tools.raises(ValueError)
+    def test_invalid_code(self):
+        # default encoding is ascii, \u00ff is not ascii translatable
+        self.encoder.encode(u"\u00ff")
+
+    def test_hello(self):
+        self.encode(u'hello', b'hello')

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-latexcodec.git