[Python-modules-commits] [pypandoc] 01/05: import pypandoc_1.1.3+ds0.orig.tar.xz
Elena Grandi
valhalla-guest at moszumanska.debian.org
Wed Jun 22 15:45:10 UTC 2016
This is an automated email from the git hooks/post-receive script.
valhalla-guest pushed a commit to branch master
in repository pypandoc.
commit 262c7e93b83a7f6720af2768e6d5ff006e7ade74
Author: Elena Grandi <valhalla-d at trueelena.org>
Date: Wed Jun 22 17:07:22 2016 +0200
import pypandoc_1.1.3+ds0.orig.tar.xz
---
LICENSE | 58 +++++++++
MANIFEST.in | 6 +
README.md | 203 +++++++++++++++++++++++++++++
examples/services.py | 69 ++++++++++
filter_test.md | 27 ++++
pypandoc/__init__.py | 354 ++++++++++++++++++++++++++++++++++++++++++++++++++
pypandoc/py3compat.py | 52 ++++++++
setup.cfg | 5 +
setup.py | 239 ++++++++++++++++++++++++++++++++++
tests.py | 278 +++++++++++++++++++++++++++++++++++++++
10 files changed, 1291 insertions(+)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..6c4c148
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,58 @@
+Copyright (c) 2011 Juho Vepsäläinen
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+compat code from IPython py3compat.py and encoding.py, which is licensed
+the terms of the Modified BSD License (also known as New or
+Revised or 3-Clause BSD)
+
+- Copyright (c) 2008-2014, IPython Development Team
+- Copyright (c) 2001-2007, Fernando Perez <fernando.perez at colorado.edu>
+- Copyright (c) 2001, Janko Hauser <jhauser at zscout.de>
+- Copyright (c) 2001, Nathaniel Gray <n8gray at caltech.edu>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+Neither the name of the IPython Development Team nor the names of its
+contributors may be used to endorse or promote products derived from this
+software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..e0a54b3
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,6 @@
+include README.md
+include LICENSE
+include tests.py
+include filter_test.md
+include examples/*
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e0cee5d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,203 @@
+# pypandoc
+
+[](https://travis-ci.org/bebraw/pypandoc)
+[](https://pypi.python.org/pypi/pypandoc/)
+[](https://anaconda.org/janschulz/pypandoc/)
+
+pypandoc provides a thin wrapper for [pandoc](http://johnmacfarlane.net/pandoc/), a universal
+document converter.
+
+## Installation
+
+pypandoc uses pandoc, so it needs an available installation of pandoc. For some common cases
+(wheels, conda packages), pypandoc already includes pandoc (and pandoc_citeproc) in it's
+prebuilt package.
+
+If pandoc is already installed (`pandoc` is in the PATH), pypandoc uses the version with the
+higher version number and if both are the same, the already installed version. You can point
+to a specific version by setting the environment variable `PYPANDOC_PANDOC` to the full path to the pandoc binary (`PYPANDOC_PANDOC=/home/x/whatever/pandoc` or `PYPANDOC_PANDOC=c:\pandoc\pandoc.exe`). If this environment variabel is set, this is the only
+place where pandoc is searched for.
+
+To use pandoc filters, you must have the relevant filter installed on your machine.
+
+### Installing via pip
+
+Install via `pip install pypandoc`
+
+Prebuilt [wheels for Windows and Mac OS X](https://pypi.python.org/pypi/pypandoc/) include
+pandoc. If there is no prebuilt binary available, you have to
+[install pandoc yourself](#installing-pandoc).
+
+If you use Linux and have [your own wheelhouse](http://wheel.readthedocs.org/en/latest/#usage),
+you can build a wheel which includes pandoc with
+`python setup.py download_pandoc; python setup.py bdist_wheel`. Be aware that this works only
+on 64bit intel systems, as we only download it from the
+[official source](https://github.com/jgm/pandoc/releases).
+
+### Installing via conda
+
+Install via `conda install -c https://conda.anaconda.org/janschulz pypandoc`.
+
+You can also add the channel to your conda config via
+`conda config --add channels https://conda.anaconda.org/janschulz`. This makes it possible to
+use `conda install pypandoc` directly and also lets you update via `conda update pypandoc`.
+
+Conda packages include pandoc and are available for py2.7, py3.4 and py3.5,
+for Windows (32bit and 64bit), Mac OS X (64bit) and Linux (64bit).
+
+### Installing pandoc
+
+pandoc is available for many different platforms:
+
+- Ubuntu/Debian: `sudo apt-get install pandoc`
+- Fedora/Red Hat: `sudo yum install pandoc`
+- Arch: `sudo pacman -S pandoc`
+- Mac OS X with Homebrew: `brew install pandoc`
+- Machine with Haskell: `cabal-install pandoc`
+- Windows: There is an installer available
+ [here](http://johnmacfarlane.net/pandoc/installing.html)
+- [FreeBSD port](http://www.freshports.org/textproc/pandoc/)
+ - Or see http://johnmacfarlane.net/pandoc/installing.html
+
+## Usage
+
+The basic invocation looks like this: `pypandoc.convert('input', 'output format')`. `pypandoc`
+tries to infer the type of the input automatically. If it's a file, it will load it. In case you
+pass a string, you can define the `format` using the parameter. The example below should clarify
+the usage:
+
+```python
+import pypandoc
+
+output = pypandoc.convert('somefile.md', 'rst')
+
+# alternatively you could just pass some string to it and define its format
+output = pypandoc.convert('#some title', 'rst', format='md')
+# output == 'some title\r\n==========\r\n\r\n'
+```
+
+If you pass in a string (and not a filename), `convert` expects this string to be unicode or
+utf-8 encoded bytes. `convert` will always return a unicode string.
+
+It's also possible to directly let pandoc write the output to a file. This is the only way to
+convert to some output formats (e.g. odt, docx, epub, epub3, pdf). In that case `convert()` will
+return an empty string.
+
+```python
+import pypandoc
+
+output = pypandoc.convert('somefile.md', 'docx', outputfile="somefile.docx")
+assert output == ""
+```
+
+In addition to `format`, it is possible to pass `extra_args`.
+That makes it possible to access various pandoc options easily.
+
+```python
+output = pypandoc.convert(
+ '<h1>Primary Heading</h1>',
+ 'md', format='html',
+ extra_args=['--atx-headers'])
+# output == '# Primary Heading\r\n'
+output = pypandoc.convert(
+ '# Primary Heading',
+ 'html', format='md',
+ extra_args=['--base-header-level=2'])
+# output == '<h2 id="primary-heading">Primary Heading</h2>\r\n'
+```
+pypandoc now supports easy addition of
+[pandoc filters](http://johnmacfarlane.net/pandoc/scripting.html).
+
+```python
+filters = ['pandoc-citeproc']
+pdoc_args = ['--mathjax',
+ '--smart']
+output = pd.convert(source=filename,
+ to='html5',
+ format='md',
+ extra_args=pdoc_args,
+ filters=filters)
+```
+Please pass any filters in as a list and not a string.
+
+Please refer to `pandoc -h` and the
+[official documentation](http://johnmacfarlane.net/pandoc/README.html) for further details.
+
+## Dealing with Formatting Arguments
+
+Pandoc supports custom formatting though `-V` parameter. In order to use it through pypandoc, use code such as this:
+
+```python
+output = pypandoc.convert('demo.md', 'pdf', outputfile='demo.pdf',
+ extra_args=['-V', 'geometry:margin=1.5cm'])
+```
+
+Note that it's important to separate `-V` and its argument within a list like that or else it won't work. This gotcha has to do with the way `subprocess.Popen` works.
+
+## Getting Pandoc Version
+
+As it can be useful sometimes to check what Pandoc version is available at your system, `pypandoc` provides an utility for this. Example:
+
+```
+version = pypandoc.get_pandoc_version()
+```
+
+## Related
+
+[pydocverter](https://github.com/msabramo/pydocverter) is a client for a service called
+[Docverter](http://www.docverter.com/), which offers pandoc as a service (plus some extra goodies).
+It has the same API as pypandoc, so you can easily write code that uses one and falls back to the
+other. E.g.:
+
+```python
+try:
+ import pypandoc as converter
+except ImportError:
+ import pydocverter as converter
+
+converter.convert('somefile.md', 'rst')
+```
+
+See [pyandoc](http://pypi.python.org/pypi/pyandoc/) for an alternative implementation of a pandoc
+wrapper from Kenneth Reitz. This one hasn't been active in a while though.
+
+## Contributing
+
+Contributions are welcome. When opening a PR, please keep the following guidelines in mind:
+
+1. Before implementing, please open an issue for discussion.
+2. Make sure you have tests for the new logic.
+3. Make sure your code passes `flake8 pypandoc.py tests.py`
+4. Add yourself to contributors at `README.md` unless you are already there. In that case tweak your contributions.
+
+Note that for citeproc tests to pass you'll need to have [pandoc-citeproc](https://github.com/jgm/pandoc-citeproc) installed. If you installed a prebuilt wheel or conda package, it is already included.
+
+## Contributors
+
+* [Valentin Haenel](https://github.com/esc) - String conversion fix
+* [Daniel Sanchez](https://github.com/ErunamoJAZZ) - Automatic parsing of input/output formats
+* [Thomas G.](https://github.com/coldfix) - Python 3 support
+* [Ben Jao Ming](https://github.com/benjaoming) - Fail gracefully if `pandoc` is missing
+* [Ross Crawford-d'Heureuse](http://github.com/rosscdh) - Encode input in UTF-8 and add Django
+ example
+* [Michael Chow](https://github.com/machow) - Decode output in UTF-8
+* [Janusz Skonieczny](https://github.com/wooyek) - Support Windows newlines and allow encoding to
+ be specified.
+* [gabeos](https://github.com/gabeos) - Fix help parsing
+* [Marc Abramowitz](https://github.com/msabramo) - Make `setup.py` fail hard if `pandoc` is
+ missing, Travis, Dockerfile, PyPI badge, Tox, PEP-8, improved documentation
+* [Daniel L.](https://github.com/mcktrtl) - Add `extra_args` example to README
+* [Amy Guy](https://github.com/rhiaro) - Exception handling for unicode errors
+* [Florian Eßer](https://github.com/flesser) - Allow Markdown extensions in output format
+* [Philipp Wendler](https://github.com/PhilippWendler) - Allow Markdown extensions in input format
+* [Jan Schulz](https://github.com/JanSchulz) - Handling output to a file, Travis to work on newer version of Pandoc, return code checking, get_pandoc_version. Helped to fix the Travis build.
+* [Aaron Gonzales](https://github.com/xysmas) - Added better filter handling
+* [David Lukes](https://github.com/dlukes) - Enabled input from non-plain-text files and made sure tests clean up template files correctly if they fail
+* [valholl](https://github.com/valholl) - Set up licensing information correctly and include examples to distribution version
+* [Cyrille Rossant](https://github.com/rossant) - Fixed bug by trimming out stars in the list of pandoc formats. Helped to fix the Travis build.
+* [Paul Osborne](https://github.com/posborne) - Don't require pandoc to install pypandoc.
+* [Felix Yan](https://github.com/felixonmars) - Added installation instructions for Arch Linux.
+
+## License
+
+`pypandoc` is available under MIT license. See LICENSE for more details. `pandoc` itself is [available under the GPL2 license](https://github.com/jgm/pandoc/blob/master/COPYING).
diff --git a/examples/services.py b/examples/services.py
new file mode 100644
index 0000000..718e286
--- /dev/null
+++ b/examples/services.py
@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+"""
+Example Services for using pypandoc
+"""
+from tempfile import NamedTemporaryFile
+
+import pypandoc
+
+
+class BasePandocService(object):
+ """
+ Base class for converting provided HTML to a doc or docx
+ """
+ file_object = None
+
+ def __init__(self):
+ self.service = self.get_service()
+
+ def get_service(self):
+ return pypandoc
+
+ def generate(self, **kwargs):
+ raise NotImplementedError
+
+
+class PandocPDFService(BasePandocService):
+ """
+ Generate html to pdf format
+ """
+ def generate(self, html, **kwargs):
+ """
+ generate the pdf but needs to be set as tex so pandoc handles it
+ correctly see docs: http://johnmacfarlane.net/pandoc/ #search pdf
+ """
+ from_format = kwargs.get('from_format', 'html')
+ to_format = kwargs.get('to_format', 'tex')
+ # create temp file
+ self.file_object = NamedTemporaryFile(suffix='.pdf')
+
+ extra_args = (
+ '--smart',
+ '--standalone',
+ '-o', self.file_object.name
+ )
+ # generate it using pandoc
+ self.service.convert(html, to_format, format=from_format, extra_args=extra_args)
+ # return the file which is now populated with the docx forms
+ return self.file_object
+
+
+class PandocDocxService(BasePandocService):
+ """
+ Generate html to docx format
+ """
+ def generate(self, html, **kwargs):
+ from_format = kwargs.get('from_format', 'html')
+ to_format = kwargs.get('to_format', 'docx')
+ # create temp file
+ self.file_object = NamedTemporaryFile(suffix='.docx')
+
+ extra_args = (
+ '--smart',
+ '--standalone',
+ '-o', self.file_object.name
+ )
+ # generate it using pandoc
+ self.service.convert(html, to_format, format=from_format, extra_args=extra_args)
+ # return the file which is now populated with the docx forms
+ return self.file_object
diff --git a/filter_test.md b/filter_test.md
new file mode 100644
index 0000000..f30b9f3
--- /dev/null
+++ b/filter_test.md
@@ -0,0 +1,27 @@
+---
+## This is an in-header citation in bibyaml format
+## Example taken from pandoc website
+references:
+- id: fenner2012a
+ title: One-click science marketing
+ author:
+ - family: Fenner
+ given: Martin
+ container-title: Nature Materials
+ volume: 11
+ URL: 'http://dx.doi.org/10.1038/nmat3283'
+ DOI: 10.1038/nmat3283
+ issue: 4
+ publisher: Nature Publishing Group
+ page: 261-263
+ type: article-journal
+ issued:
+ year: 2012
+ month: 3
+---
+
+## Section ##
+
+A reference [@fenner2012a].
+
+# References
diff --git a/pypandoc/__init__.py b/pypandoc/__init__.py
new file mode 100644
index 0000000..e8816d0
--- /dev/null
+++ b/pypandoc/__init__.py
@@ -0,0 +1,354 @@
+# -*- coding: utf-8 -*-
+from __future__ import with_statement, absolute_import
+
+import subprocess
+import sys
+import textwrap
+import os
+import re
+
+from .py3compat import string_types, cast_bytes, cast_unicode
+
+__author__ = u'Juho Vepsäläinen'
+__version__ = '1.1.3'
+__license__ = 'MIT'
+__all__ = ['convert', 'get_pandoc_formats', 'get_pandoc_version', 'get_pandoc_path']
+
+
+def convert(source, to, format=None, extra_args=(), encoding='utf-8',
+ outputfile=None, filters=None):
+ """Converts given `source` from `format` `to` another.
+
+ :param str source: Unicode string or bytes or a file path (see encoding)
+
+ :param str to: format into which the input should be converted; can be one of
+ `pypandoc.get_pandoc_formats()[1]`
+
+ :param str format: the format of the inputs; will be inferred if input is a file with an
+ known filename extension; can be one of `pypandoc.get_pandoc_formats()[1]`
+ (Default value = None)
+
+ :param list extra_args: extra arguments (list of strings) to be passed to pandoc
+ (Default value = ())
+
+ :param str encoding: the encoding of the file or the input bytes (Default value = 'utf-8')
+
+ :param str outputfile: output will be written to outfilename or the converted content
+ returned if None (Default value = None)
+
+ :param list filters: pandoc filters e.g. filters=['pandoc-citeproc']
+
+ :returns: converted string (unicode) or an empty string if an outputfile was given
+ :rtype: unicode
+
+ :raises RuntimeError: if any of the inputs are not valid of if pandoc fails with an error
+ :raises OSError: if pandoc is not found; make sure it has been installed and is available at
+ path.
+ """
+ return _convert(_read_file, _process_file, source, to,
+ format, extra_args, encoding=encoding,
+ outputfile=outputfile, filters=filters)
+
+
+def _convert(reader, processor, source, to, format=None, extra_args=(), encoding=None,
+ outputfile=None, filters=None):
+ source, format, input_type = reader(source, format, encoding=encoding)
+
+ formats = {
+ 'dbk': 'docbook',
+ 'md': 'markdown',
+ 'rest': 'rst',
+ 'tex': 'latex',
+ }
+
+ format = formats.get(format, format)
+ to = formats.get(to, to)
+
+ if not format:
+ raise RuntimeError('Missing format!')
+
+ from_formats, to_formats = get_pandoc_formats()
+
+ if _get_base_format(format) not in from_formats:
+ raise RuntimeError(
+ 'Invalid input format! Got "%s" but expected one of these: %s' % (
+ _get_base_format(format), ', '.join(from_formats)))
+
+ base_to_format = _get_base_format(to)
+ if base_to_format not in to_formats:
+ raise RuntimeError(
+ 'Invalid output format! Expected one of these: ' +
+ ', '.join(to_formats))
+
+ # list from https://github.com/jgm/pandoc/blob/master/pandoc.hs
+ # `[...] where binaries = ["odt","docx","epub","epub3"] [...]`
+ if base_to_format in ["odt", "docx", "epub", "epub3"] and not outputfile:
+ raise RuntimeError(
+ 'Output to %s only works by using a outputfile.' % base_to_format
+ )
+
+ return processor(source, input_type, to, format, extra_args,
+ outputfile=outputfile, filters=filters)
+
+
+def _read_file(source, format, encoding='utf-8'):
+ try:
+ path = os.path.exists(source)
+ except UnicodeEncodeError:
+ path = os.path.exists(source.encode('utf-8'))
+ except ValueError:
+ path = ''
+ if path:
+ format = format or os.path.splitext(source)[1].strip('.')
+ input_type = 'path'
+ else:
+ if encoding != 'utf-8':
+ # if a source and a different encoding is given, try to decode the the source into a
+ # unicode string
+ try:
+ source = cast_unicode(source, encoding=encoding)
+ except (UnicodeDecodeError, UnicodeEncodeError):
+ pass
+ input_type = 'string'
+ return source, format, input_type
+
+
+def _process_file(source, input_type, to, format, extra_args, outputfile=None,
+ filters=None):
+ _ensure_pandoc_path()
+ string_input = input_type == 'string'
+ input_file = [source] if not string_input else []
+ args = [__pandoc_path, '--from=' + format]
+
+ # #59 - pdf output won't work with `--to` set!
+ if to is not 'pdf':
+ args.append('--to=' + to)
+
+ args += input_file
+
+ if outputfile:
+ args.append("--output="+outputfile)
+
+ args.extend(extra_args)
+
+ # adds the proper filter syntax for each item in the filters list
+ if filters is not None:
+ if isinstance(filters, string_types):
+ filters = filters.split()
+ f = ['--filter=' + x for x in filters]
+ args.extend(f)
+
+ p = subprocess.Popen(
+ args,
+ stdin=subprocess.PIPE if string_input else None,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+
+ # something else than 'None' indicates that the process already terminated
+ if not (p.returncode is None):
+ raise RuntimeError(
+ 'Pandoc died with exitcode "%s" before receiving input: %s' % (p.returncode,
+ p.stderr.read())
+ )
+
+ try:
+ source = cast_bytes(source, encoding='utf-8')
+ except (UnicodeDecodeError, UnicodeEncodeError):
+ # assume that it is already a utf-8 encoded string
+ pass
+ try:
+ stdout, stderr = p.communicate(source if string_input else None)
+ except OSError:
+ # this is happening only on Py2.6 when pandoc dies before reading all
+ # the input. We treat that the same as when we exit with an error...
+ raise RuntimeError('Pandoc died with exitcode "%s" during conversion.' % (p.returncode))
+
+ try:
+ stdout = stdout.decode('utf-8')
+ except UnicodeDecodeError:
+ # this shouldn't happen: pandoc more or less garantees that the output is utf-8!
+ raise RuntimeError('Pandoc output was not utf-8.')
+
+ # check that pandoc returned successfully
+ if p.returncode != 0:
+ raise RuntimeError(
+ 'Pandoc died with exitcode "%s" during conversion: %s' % (p.returncode, stderr)
+ )
+
+ # if there is an outputfile, then stdout is likely empty!
+ return stdout
+
+
+def _get_base_format(format):
+ '''
+ According to http://johnmacfarlane.net/pandoc/README.html#general-options,
+ syntax extensions for markdown can be individually enabled or disabled by
+ appending +EXTENSION or -EXTENSION to the format name.
+ Return the base format without any extensions.
+ '''
+ return re.split('\+|-', format)[0]
+
+
+def get_pandoc_formats():
+ '''
+ Dynamic preprocessor for Pandoc formats.
+ Return 2 lists. "from_formats" and "to_formats".
+ '''
+ _ensure_pandoc_path()
+ p = subprocess.Popen(
+ [__pandoc_path, '-h'],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE)
+
+ comm = p.communicate()
+ help_text = comm[0].decode().splitlines(False)
+ if p.returncode != 0 or 'Options:' not in help_text:
+ raise RuntimeError("Couldn't call pandoc to get output formats. Output from pandoc:\n%s" %
+ str(comm))
+ txt = ' '.join(help_text[1:help_text.index('Options:')])
+
+ aux = txt.split('Output formats: ')
+ in_ = re.sub('Input\sformats:\s|\*|\[.*?\]', '', aux[0]).split(',')
+ out = re.sub('\*|\[.*?\]', '', aux[1]).split(',')
+
+ return [f.strip() for f in in_], [f.strip() for f in out]
+
+
+# copied and adapted from jupyter_nbconvert/utils/pandoc.py, Modified BSD License
+
+def _get_pandoc_version(pandoc_path):
+ p = subprocess.Popen(
+ [pandoc_path, '--version'],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE)
+ comm = p.communicate()
+ out_lines = comm[0].decode().splitlines(False)
+ if p.returncode != 0 or len(out_lines) == 0:
+ raise RuntimeError("Couldn't call pandoc to get version information. Output from "
+ "pandoc:\n%s" % str(comm))
+
+ version_pattern = re.compile(r"^\d+(\.\d+){1,}$")
+ for tok in out_lines[0].split():
+ if version_pattern.match(tok):
+ version = tok
+ break
+ return version
+
+
+def get_pandoc_version():
+ """Gets the Pandoc version if Pandoc is installed.
+
+ It will probe Pandoc for its version, cache it and return that value. If a cached version is
+ found, it will return the cached version and stop probing Pandoc
+ (unless :func:`clean_version_cache()` is called).
+
+ :raises OSError: if pandoc is not found; make sure it has been installed and is available at
+ path.
+ """
+ global __version
+
+ if __version is None:
+ _ensure_pandoc_path()
+ __version = _get_pandoc_version(__pandoc_path)
+ return __version
+
+
+def get_pandoc_path():
+ """Gets the Pandoc path if Pandoc is installed.
+
+ It will return a path to pandoc which is used by pypandoc.
+
+ This might be a full path or, if pandoc is on PATH, simple `pandoc`. It's garanteed
+ to be callable (i.e. we could get version information from `pandoc --version`).
+ If `PYPANDOC_PANDOC` is set and valid, it will return that value. If the environment
+ variable is not set, either the full path to the included pandoc or the pandoc in
+ `PATH` (whatever is the higher version) will be returned.
+
+ If a cached path is found, it will return the cached path and stop probing Pandoc
+ (unless :func:`clean_pandocpath_cache()` is called).
+
+ :raises OSError: if pandoc is not found
+ """
+ _ensure_pandoc_path()
+ return __pandoc_path
+
+
+def _ensure_pandoc_path():
+ global __pandoc_path
+
+ if __pandoc_path is None:
+ included_pandoc = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+ "files", "pandoc")
+ search_paths = ["pandoc", included_pandoc]
+ # If a user added the complete path to pandoc to an env, use that as the
+ # only way to get pandoc so that a user can overwrite even a higher
+ # version in some other places.
+ if os.getenv('PYPANDOC_PANDOC', None):
+ search_paths = [os.getenv('PYPANDOC_PANDOC')]
+ for path in search_paths:
+ curr_version = [0, 0, 0]
+ version_string = "0.0.0"
+ try:
+ version_string = _get_pandoc_version(path)
+ except:
+ # we can't use that path...
+ # print(e)
+ continue
+ version = [int(x) for x in version_string.split(".")]
+ while len(version) < len(curr_version):
+ version.append(0)
+ # print("%s, %s" % (path, version))
+ for pos in range(len(curr_version)):
+ # Only use the new version if it is any bigger...
+ if version[pos] > curr_version[pos]:
+ # print("Found: %s" % path)
+ __pandoc_path = path
+ curr_version = version
+ break
+
+ if __pandoc_path is None:
+ if os.path.exists('/usr/local/bin/brew'):
+ sys.stderr.write(textwrap.dedent("""\
+ Maybe try:
+
+ brew install pandoc
+ """))
+ elif os.path.exists('/usr/bin/apt-get'):
+ sys.stderr.write(textwrap.dedent("""\
+ Maybe try:
+
+ sudo apt-get install pandoc
+ """))
+ elif os.path.exists('/usr/bin/yum'):
+ sys.stderr.write(textwrap.dedent("""\
+ Maybe try:
+
+ sudo yum install pandoc
+ """))
+ sys.stderr.write(textwrap.dedent("""\
+ See http://johnmacfarlane.net/pandoc/installing.html
+ for installation options
+ """))
+ sys.stderr.write(textwrap.dedent("""\
+ ---------------------------------------------------------------
+
+ """))
+ raise OSError("No pandoc was found: either install pandoc and add it\n"
+ "to your PATH or install pypandoc wheels with included pandoc.")
+
+
+# -----------------------------------------------------------------------------
+# Internal state management
+# -----------------------------------------------------------------------------
+def clean_version_cache():
+ global __version
+ __version = None
+
+
+def clean_pandocpath_cache():
+ global __pandoc_path
+ __pandoc_path = None
+
+
+__version = None
+__pandoc_path = None
diff --git a/pypandoc/py3compat.py b/pypandoc/py3compat.py
new file mode 100644
index 0000000..05c72be
--- /dev/null
+++ b/pypandoc/py3compat.py
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+from __future__ import with_statement
+
+import sys
+import locale
+
+# compat code from IPython py3compat.py and encoding.py, which is licensed under the terms of the
+# Modified BSD License (also known as New or Revised or 3-Clause BSD)
+_DEFAULT_ENCODING = None
+try:
+ # There are reports of getpreferredencoding raising errors
+ # in some cases, which may well be fixed, but let's be conservative here.
+ _DEFAULT_ENCODING = locale.getpreferredencoding()
+except Exception:
+ pass
+
+_DEFAULT_ENCODING = _DEFAULT_ENCODING or sys.getdefaultencoding()
+
+
+def _decode(s, encoding=None):
+ encoding = encoding or _DEFAULT_ENCODING
+ return s.decode(encoding)
+
+
+def _encode(u, encoding=None):
+ encoding = encoding or _DEFAULT_ENCODING
+ return u.encode(encoding)
+
+
+def cast_unicode(s, encoding=None):
+ if isinstance(s, bytes):
+ return _decode(s, encoding)
+ return s
+
+
+def cast_bytes(s, encoding=None):
+ # bytes == str on py2.7 -> always encode on py2
+ if not isinstance(s, bytes):
+ return _encode(s, encoding)
+ return s
+
+
+if sys.version_info[0] >= 3:
+ PY3 = True
+
+ string_types = (str,)
+ unicode_type = str
+else:
+ PY3 = False
+
+ string_types = (str, unicode)
+ unicode_type = unicode
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..b14b0bc
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,5 @@
+[egg_info]
+tag_build =
+tag_date = 0
+tag_svn_revision = 0
+
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..f260fea
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,239 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import pypandoc
+from setuptools import setup, Command
+
+import sys
+import os
+import shutil
+import tempfile
+import os.path
+import subprocess
+import platform
+
+try:
+ from urllib.request import urlopen
+except ImportError:
+ from urllib import urlopen
+
+try:
+ long_description = pypandoc.convert('README.md', 'rst')
+ long_description = long_description.replace("\r","")
+except OSError:
+ print("\n\n!!! pandoc not found, long_description is bad, don't upload this to PyPI !!!\n\n")
+ import io
+ # pandoc is not installed, fallback to using raw contents
+ with io.open('README.md', encoding="utf-8") as f:
+ long_description = f.read()
+
+# Uses sys.platform keys, but removes the 2 from linux2
+# Adding a new platform means implementing unpacking in "DownloadPandocCommand" and adding the URL here
+PANDOC_URLS = {
+ "win32": "https://github.com/jgm/pandoc/releases/download/1.15.1.1/pandoc-1.15.1.1-windows.msi",
+ "linux": "https://github.com/jgm/pandoc/releases/download/1.15.1/pandoc-1.15.1-1-amd64.deb",
+ "darwin": "https://github.com/jgm/pandoc/releases/download/1.15.1/pandoc-1.15.1-osx.pkg"
+}
+
+class DownloadPandocCommand(Command):
+
+ """Download pandoc"""
+
+ description = "downloads a pandoc release and adds it to the package"
+
+ user_options = []
+
+ def initialize_options(self):
+ pass
+
+ def finalize_options(self):
+ pass
+
+ def _unpack_linux(self, filename, targetfolder):
+
+ assert platform.architecture()[0] == "64bit", "Downloaded linux pandoc is only compiled for 64bit."
+
+ print("* Unpacking %s to tempfolder..." % (filename))
+
+ tempfolder = tempfile.mkdtemp()
+ cur_wd = os.getcwd()
+ filename = os.path.abspath(filename)
+ try:
+ os.chdir(tempfolder)
+ cmd = ["ar", "x", filename]
+ # if only 3.5 is supported, should be `run(..., check=True)`
+ subprocess.check_call(cmd)
+ cmd = ["tar", "xzf", "data.tar.gz"]
+ subprocess.check_call(cmd)
+ # pandoc and pandoc-citeproc are in ./usr/bin subfolder
+ for exe in ["pandoc", "pandoc-citeproc"]:
+ src = os.path.join(tempfolder, "usr", "bin", exe)
+ dst = os.path.join(targetfolder, exe)
+ print("* Copying %s to %s ..." % (exe, targetfolder))
+ shutil.copyfile(src, dst)
+ src = os.path.join(tempfolder, "usr", "share", "doc", "pandoc", "copyright")
+ dst = os.path.join(targetfolder, "copyright")
+ print("* Copying copyright to %s ..." % (targetfolder))
+ shutil.copyfile(src, dst)
+ finally:
+ os.chdir(cur_wd)
+ shutil.rmtree(tempfolder)
+
+ def _unpack_darwin(self, filename, targetfolder):
+ print("* Unpacking %s to tempfolder..." % (filename))
+
+ tempfolder = tempfile.mkdtemp()
+
+ pkgutilfolder = os.path.join(tempfolder, 'tmp')
+ cmd = ["pkgutil", "--expand", filename, pkgutilfolder]
+ # if only 3.5 is supported, should be `run(..., check=True)`
+ subprocess.check_call(cmd)
+
+ # this will generate usr/local/bin below the dir
+ cmd = ["tar", "xvf", os.path.join(pkgutilfolder, "pandoc.pkg", "Payload"),
+ "-C", pkgutilfolder]
+ subprocess.check_call(cmd)
+
+ # pandoc and pandoc-citeproc are in the ./usr/local/bin subfolder
+ for exe in ["pandoc", "pandoc-citeproc"]:
+ src = os.path.join(pkgutilfolder, "usr", "local", "bin", exe)
+ dst = os.path.join(targetfolder, exe)
+ print("* Copying %s to %s ..." % (exe, targetfolder))
+ shutil.copyfile(src, dst)
+
+ # remove temporary dir
+ shutil.rmtree(tempfolder)
+ print("* Done.")
+
+ def _unpack_win32(self, filename, targetfolder):
+ print("* Unpacking %s to tempfolder..." % (filename))
+
+ tempfolder = tempfile.mkdtemp()
+
+ cmd = ["msiexec", "/a", filename, "/qb", "TARGETDIR=%s" % (tempfolder)]
+ # if only 3.5 is supported, should be `run(..., check=True)`
+ subprocess.check_call(cmd)
+
+ # pandoc.exe, pandoc-citeproc.exe, and the COPYRIGHT are in the Pandoc subfolder
+ for exe in ["pandoc.exe", "pandoc-citeproc.exe", "COPYRIGHT.txt"]:
+ src = os.path.join(tempfolder, "Pandoc", exe)
+ dst = os.path.join(targetfolder, exe)
+ print("* Copying %s to %s ..." % (exe, targetfolder))
+ shutil.copyfile(src, dst)
+
+ # remove temporary dir
+ shutil.rmtree(tempfolder)
+ print("* Done.")
+
+
+ def run(self):
+ pf = sys.platform
+ # compatibility with py3
+ if pf.startswith("linux"):
+ pf = "linux"
+
+ try:
+ url = PANDOC_URLS[pf]
+ except:
+ raise Exception("No prebuilt pandoc available or not yet implemented for your platform")
+
+ filename = url.split("/")[-1]
+ if os.path.isfile(filename):
+ print("* Using already downloaded file %s" % (filename))
+ else:
+ print("* Downloading pandoc from %s ..." % url)
+ # https://stackoverflow.com/questions/30627937/tracebaclk-attributeerroraddinfourl-instance-has-no-attribute-exit
+ response = urlopen(url)
+ with open(filename, 'wb') as out_file:
+ shutil.copyfileobj(response, out_file)
+
+ targetfolder = os.path.join(os.path.dirname(os.path.realpath(__file__)), "pypandoc", "files")
+
+ # Make sure target folder exists...
+ try:
+ os.makedirs(targetfolder)
+ except OSError:
... 369 lines suppressed ...
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/pypandoc.git
More information about the Python-modules-commits
mailing list