.coveragerc | 5 +
.gitignore | 38 +
.travis.yml | 29 +
COPYING | 197 ++
MANIFEST.in | 1 +
README.md | 40 +
README.rst | 44 +
RELEASE | 13 +
TODO | 24 +
bibtexparser/__init__.py | 91 +
bibtexparser/bibdatabase.py | 57 +
bibtexparser/bparser.py | 422 ++++
bibtexparser/bwriter.py | 114 +
bibtexparser/customization.py | 252 ++
bibtexparser/latexenc.py | 2643 ++++++++++++++++++++
bibtexparser/tests/data/article.bib | 13 +
bibtexparser/tests/data/article_comma_first.bib | 18 +
bibtexparser/tests/data/article_missing_coma.bib | 18 +
bibtexparser/tests/data/article_output.bib | 14 +
.../tests/data/article_start_with_whitespace.bib | 15 +
bibtexparser/tests/data/book.bib | 8 +
bibtexparser/tests/data/book_comma_first.bib | 9 +
bibtexparser/tests/data/book_output.bib | 9 +
bibtexparser/tests/data/comments_only.bib | 3 +
bibtexparser/tests/data/comments_only_output.bib | 6 +
bibtexparser/tests/data/encoding.bib | 13 +
bibtexparser/tests/data/features.bib | 18 +
bibtexparser/tests/data/features2.bib | 27 +
bibtexparser/tests/data/features_output.bib | 25 +
bibtexparser/tests/data/multiline_comments.bib | 22 +
bibtexparser/tests/data/multiple_entries.bib | 24 +
.../tests/data/multiple_entries_and_comments.bib | 29 +
.../data/multiple_entries_and_comments_output.bib | 29 +
.../tests/data/multiple_entries_output.bib | 25 +
bibtexparser/tests/data/traps.bib | 14 +
bibtexparser/tests/data/website.bib | 6 +
bibtexparser/tests/data/wrong.bib | 9 +
bibtexparser/tests/test_bibdatabase.py | 28 +
bibtexparser/tests/test_bibtex_strings.py | 47 +
bibtexparser/tests/test_bibtexparser.py | 83 +
bibtexparser/tests/test_bibtexwriter.py | 147 ++
bibtexparser/tests/test_bparser.py | 413 +++
bibtexparser/tests/test_bwriter.py | 83 +
bibtexparser/tests/test_comments.py | 78 +
bibtexparser/tests/test_customization.py | 109 +
bibtexparser/tests/test_homogenise_fields.py | 25 +
bibtexparser/tests/test_latexenc.py | 71 +
bibtexparser/tests/test_preambles.py | 44 +
docs/Makefile | 153 ++
docs/source/bibtex_conv.rst | 45 +
docs/source/bibtexparser.rst | 37 +
docs/source/conf.py | 248 ++
docs/source/index.rst | 47 +
docs/source/install.rst | 36 +
docs/source/logging.rst | 67 +
docs/source/tutorial.rst | 267 ++
docs/source/who.rst | 10 +
setup.py | 19 +
tox.ini | 5 +
61 files changed, 6504 insertions(+)
diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..651eb51
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,5 @@
+source = bibtexparser
+omit = bibtexparser/test*
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..61ec8e4
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,38 @@
+# C extensions
+# Packages
+# Installer logs
+# Unit test / coverage reports
+# Translations
+# Mr Developer
+# Pycharm
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..b026bd5
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,29 @@
+language: python
+ include:
+ - python: "2.7"
+ env: TEST_SUITE=suite_2_7
+ - python: "3.2"
+ env: TEST_SUITE=suite_3_2
+ - python: "3.3"
+ env: TEST_SUITE=suite_3_3
+ - python: "3.4"
+ env: TEST_SUITE=suite_3_4
+ - python: "pypy"
+ env: TEST_SUITE=suite_pypy
+ - if [[ $TEST_SUITE == suite_3_4 ]]; then
+ pip install sphinx;
+ fi;
+ - pip install coverage
+ - python setup.py install
+ - nosetests --with-coverage --cover-erase --cover-package=bibtexparser
+ - if [[ $TEST_SUITE == suite_3_4 ]]; then
+ cd docs;
+ make html;
+ fi;
+ - pip install coveralls
+ - coveralls
new file mode 100644
index 0000000..c5689bc
--- /dev/null
@@ -0,0 +1,89 @@
+* API: Previous type and id keywords which are automatically added to
+the dictionnary are now ENTRYTYPE and ID, respectively (#42).
+* ENH: comma first syntax support (#49) by Michal Grochmal.
+* DOC: clarify version number
+* ENH: support for bibtex with leading spaces (#34)
+* FIX: if title contained multiples words in braces
+* ENH: code refactoring (#33)
+* ENH: support for comment blocks (#32)
+* ENH: Removed comma after last key-value pair by faph (#28)
+* ENH: optional keys sanitising by faph (#29)
+* FIX: missing coma at the end of a record (#24)
+* DOC: clarify the usecase of to_bibtex
+* FIX: raise exception for TypeError in to_bibtex (#22)
+* ENH: json output
+* ENH: Add (optional) support for non-standard entry types by Georg C. Brückmann
+* FIX: protect uppercase only on unprotected characters. #18
+* ENH: string replacement by Uwe Schmidt (#13 #20)
+* ENH: json output
+* API: enhance the naming choice for bwriter
+* ENH: add writer (#16), thanks to Lucas Verney
+* MAINT: Remove non-standard --BREAK-- command detection
+* FIX: missing strip() (#14) by Sebastien Diemer
+* API breakage: the parser takes data instead of a filehandler
+* ENH: fix tests latex encoding
+* ENH: support @comment @preambule (escaped)
+* ENH: check that bibtype belongs to a known type
+* ENH: split keywords with various separators
+* ENH: get_entry_dict make the dict once
+* ENH: add messages with logging
+* FIX: fix unittest related to braces detection
+* Permission from original authors and OKFN to use LGPLv3
+* ENH: Python 2.7 support
+* FIX: issue related to accents
+* ENH: Transformations on characters are now considered as a customization
+* ENH: New customization: clean latex style
+* FIX: issue related to name processing
+* DOC: moved to readsthedoc
+* DOC: several improvements
+* MAINT: separate customizations
+* TEST: initialized
+* DOC: initialized
+* First preliminary release
diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
new file mode 100644
index 0000000..0b3bcf3
--- /dev/null
@@ -0,0 +1,29 @@
+- François Boulogne
+ Project coordinator
+- bibserver's contributors
+ for the parser's core and the permission to release this project under LGPLv3 and BSD
+- Shuen-Huei (Drake) Guan
+ Python 2.7 porting
+- Sebastien Diemer
+ Bugfix
+- Georg C. Brückmann
+ Support for non-standard entry types
+- Uwe Schmidt
+ String replacement
+- faph
+ coma fixes, optional keys sanitising, refactoring and other improvements
+- Steven M. Bellovin
+ Fix braces detection
+- Sven Goossens
+ Support for bibtex with leading spaces
+- Michal Grochmal
+ Comma first syntax support
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..9bec424
--- /dev/null
@@ -0,0 +1,197 @@
+The code is distributed under a dual license (at your choice).
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+ (1) Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ (2) Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ (3)The name of the author may not be used to
+ endorse or promote products derived from this software without
+ specific prior written permission.
+ Version 3, 29 June 2007
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+ This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+ 0. Additional Definitions.
+ As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+ "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+ An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+ A "Combined Work" is a work produced by combining or linking an
+Application with the Library. The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+ The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+ The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+ 1. Exception to Section 3 of the GNU GPL.
+ You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+ 2. Conveying Modified Versions.
+ If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+ a) under this License, provided that you make a good faith effort to
+ ensure that, in the event an Application does not supply the
+ function or data, the facility still operates, and performs
+ whatever part of its purpose remains meaningful, or
+ b) under the GNU GPL, with none of the additional permissions of
+ this License applicable to that copy.
+ 3. Object Code Incorporating Material from Library Header Files.
+ The object code form of an Application may incorporate material from
+a header file that is part of the Library. You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+ a) Give prominent notice with each copy of the object code that the
+ Library is used in it and that the Library and its use are
+ covered by this License.
+ b) Accompany the object code with a copy of the GNU GPL and this license
+ document.
+ 4. Combined Works.
+ You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+ a) Give prominent notice with each copy of the Combined Work that
+ the Library is used in it and that the Library and its use are
+ covered by this License.
+ b) Accompany the Combined Work with a copy of the GNU GPL and this license
+ document.
+ c) For a Combined Work that displays copyright notices during
+ execution, include the copyright notice for the Library among
+ these notices, as well as a reference directing the user to the
+ copies of the GNU GPL and this license document.
+ d) Do one of the following:
+ 0) Convey the Minimal Corresponding Source under the terms of this
+ License, and the Corresponding Application Code in a form
+ suitable for, and under terms that permit, the user to
+ recombine or relink the Application with a modified version of
+ the Linked Version to produce a modified Combined Work, in the
+ manner specified by section 6 of the GNU GPL for conveying
+ Corresponding Source.
+ 1) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (a) uses at run time
+ a copy of the Library already present on the user's computer
+ system, and (b) will operate properly with a modified version
+ of the Library that is interface-compatible with the Linked
+ Version.
+ e) Provide Installation Information, but only if you would otherwise
+ be required to provide such information under section 6 of the
+ GNU GPL, and only to the extent that such information is
+ necessary to install and execute a modified version of the
+ Combined Work produced by recombining or relinking the
+ Application with a modified version of the Linked Version. (If
+ you use option 4d0, the Installation Information must accompany
+ the Minimal Corresponding Source and Corresponding Application
+ Code. If you use option 4d1, you must provide the Installation
+ Information in the manner specified by section 6 of the GNU GPL
+ for conveying Corresponding Source.)
+ 5. Combined Libraries.
+ You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+ a) Accompany the combined library with a copy of the same work based
+ on the Library, uncombined with any other library facilities,
+ conveyed under the terms of this License.
+ b) Give prominent notice with the combined library that part of it
+ is a work based on the Library, and explaining where to find the
+ accompanying uncombined form of the same work.
+ 6. Revised Versions of the GNU Lesser General Public License.
+ The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+ Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+ If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..efa752e
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+include *.md
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f00e171
--- /dev/null
+++ b/README.md
@@ -0,0 +1,40 @@
+Bibtex parser in Python 2.7 and 3.
+The original source code was part of bibserver from okfn
+This project is released under the AGPLv3. Okfn and the original authors
+kindly provided the permission to use a subpart of their project
+(ie the bibtex parser) under LGPLv3. Many thanks to them!
+The aim of this project is to provide a standalone library in python.
+Documentation including installation procedure and archives.
+Please, read the changelog before upgrading for API modifications.
+Dual license (at your choice):
+* LGPLv3.
+* BSD
+See COPYING for details.
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..41802d5
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,44 @@
+.. image:: http://api.flattr.com/button/flattr-badge-large.png
+ :target: https://flattr.com/submit/auto?user_id=fboulogne&url=https://github.com/sciunto/python-bibtexparser&title=python-bibtexparser&language=&tags=github&category=software
+ :alt: Flattr this git repo
+.. image:: https://secure.travis-ci.org/sciunto/python-bibtexparser.png
+ :target: http://travis-ci.org/sciunto/python-bibtexparser
+ :alt: Build Status
+Bibtex parser in Python 2.7 and 3.
+The original source code was part of bibserver from okfn
+This project is released under the AGPLv3. Okfn and the original authors
+kindly provided the permission to use a subpart of their project
+(ie the bibtex parser) under LGPLv3. Many thanks to them!
+The aim of this project is to provide a standalone library in python.
+Documentation including installation procedure and archives.
+Please, read the changelog before upgrading for API modifications.
+Dual license (at your choice):
+* LGPLv3.
+* BSD
+See COPYING for details.
diff --git a/RELEASE b/RELEASE
new file mode 100644
index 0000000..d52cb2e
--- /dev/null
@@ -0,0 +1,13 @@
+How to release
+* Update version in __init__.py
+* git tag -a 'vX'
+* merge in branch latest
+* Create a tarball and upload it on the server
+ git archive master --prefix 'bibtexparser/' | bzip2 > bibtexparser-x.y.tar.bz2
+* Send the package on pypi
+ python setup.py sdist upload
+* tick the doc version on readthedocs
+* Update version in __init__.py
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..ffea767
--- /dev/null
+++ b/TODO
@@ -0,0 +1,24 @@
+* add unittests
+* Satisfy bibjson: http://www.bibjson.org/
+* String replacement, some cases are implemented, but there is still todos. See #20.
+* getnames: missing cases
+* docstrings
+* doctests
+* add examples in docs/
diff --git a/bibtexparser/__init__.py b/bibtexparser/__init__.py
new file mode 100644
index 0000000..69fdf4d
--- /dev/null
+++ b/bibtexparser/__init__.py
@@ -0,0 +1,91 @@
+BibTeX <http://en.wikipedia.org/wiki/BibTeX> is a bibliographic data file format.
+The :mod:`bibtexparser` module provides parsing and writing of BibTeX files functionality. The API is similar to the
+:mod:`json` module. The parsed data is returned as a simple :class:`BibDatabase` object with the main attribute being
+:attr:`entries` representing bibliographic sources such as books and journal articles.
+Parsing is a simple as::
+ >>>> import bibtexparser
+ >>>> with open('bibtex.bib') as bibtex_file:
+ >>>> bibtex_database = bibtexparser.load(bibtex_file)
+And writing::
+ >>>> import bibtexparser
+ >>>> with open('bibtex.bib', 'w') as bibtex_file:
+ >>>> bibtexparser.dump(bibtex_database, bibtex_file)
+__all__ = [
+ 'loads', 'load', 'dumps', 'dump', 'bibdatabase',
+ 'bparser', 'bwriter', 'latexenc', 'customization',
+__version__ = '0.6.1'
+from . import bibdatabase, bparser, bwriter, latexenc, customization
+def loads(bibtex_str, parser=None):
+ """
+ Load :class:`BibDatabase` object from a string
+ :param bibtex_str: input BibTeX string to be parsed
+ :type bibtex_str: str or unicode
+ :param parser: custom parser to use (optional)
+ :type parser: BibTexParser
+ :return: bibliographic database object
+ :rtype: BibDatabase
+ """
+ if parser is None:
+ parser = bparser.BibTexParser()
+ return parser.parse(bibtex_str)
+def load(bibtex_file, parser=None):
+ """
+ Load :class:`BibDatabase` object from a file
+ :param bibtex_file: input file to be parsed
+ :type bibtex_file: file
+ :param parser: custom parser to use (optional)
+ :type parser: BibTexParser
+ :return: bibliographic database object
+ :rtype: BibDatabase
+ """
+ if parser is None:
+ parser = bparser.BibTexParser()
+ return parser.parse_file(bibtex_file)
+def dumps(bib_database, writer=None):
+ """
+ Dump :class:`BibDatabase` object to a BibTeX string
+ :param bib_database: bibliographic database object
+ :type bib_database: BibDatabase
+ :param writer: custom writer to use (optional) (not yet implemented)
+ :type writer: BibTexWriter
+ :return: BibTeX string
+ :rtype: unicode
+ """
+ if writer is None:
+ writer = bwriter.BibTexWriter()
+ return writer.write(bib_database)
+def dump(bib_database, bibtex_file, writer=None):
+ """
+ Save :class:`BibDatabase` object as a BibTeX text file
+ :param bib_database: bibliographic database object
+ :type bib_database: BibDatabase
+ :param bibtex_file: file to write to
+ :type bibtex_file: file
+ :param writer: custom writer to use (optional) (not yet implemented)
+ :type writer: BibTexWriter
+ """
+ if writer is None:
+ writer = bwriter.BibTexWriter()
+ bibtex_file.write(writer.write(bib_database))
diff --git a/bibtexparser/bibdatabase.py b/bibtexparser/bibdatabase.py
new file mode 100644
index 0000000..6f71b4f
--- /dev/null
+++ b/bibtexparser/bibdatabase.py
@@ -0,0 +1,57 @@
+from collections import OrderedDict
+import sys
+if sys.version_info.major == 2:
+ TEXT_TYPE = unicode
+ TEXT_TYPE = str
+class BibDatabase(object):
+ """
+ A bibliographic database object following the data structure of a BibTeX file.
+ """
+ def __init__(self):
+ #: List of BibTeX entries, for example `@book{...}`, `@article{...}`, etc. Each entry is a simple dict with
+ #: BibTeX field-value pairs, for example `'author': 'Bird, R.B. and Armstrong, R.C. and Hassager, O.'` Each
+ #: entry will always have the following dict keys (in addition to other BibTeX fields):
+ #: - `ID` (BibTeX key)
+ #: - `ENTRYTYPE` (entry type in lowercase, e.g. `book`, `article` etc.)
+ self.entries = []
+ self._entries_dict = {}
+ #: List of BibTeX comment (`@comment{...}`) blocks.
+ self.comments = []
+ #: OrderedDict of BibTeX string definitions (`@string{...}`). In order of definition.
+ self.strings = OrderedDict() # Not sure if order is import, keep order just in case
+ #: List of BibTeX preamble (`@preamble{...}`) blocks.
+ self.preambles = []
+ def get_entry_list(self):
+ """Get a list of bibtex entries.
+ :returns: BibTeX entries
+ :rtype: list
+ .. deprecated:: 0.5.6
+ Use :attr:`entries` instead.
+ """
+ return self.entries
+ @staticmethod
+ def entry_sort_key(entry, fields):
+ result = []
+ for field in fields:
+ result.append(TEXT_TYPE(entry.get(field, '')).lower()) # Sorting always as string
+ return tuple(result)
+ def get_entry_dict(self):
+ """Return a dictionary of BibTeX entries.
+ The dict key is the BibTeX entry key
+ """
+ # If the hash has never been made, make it
+ if not self._entries_dict:
+ for entry in self.entries:
+ self._entries_dict[entry['ID']] = entry
+ return self._entries_dict
+ entries_dict = property(get_entry_dict)
diff --git a/bibtexparser/bparser.py b/bibtexparser/bparser.py
new file mode 100644
index 0000000..bd622a9
--- /dev/null
+++ b/bibtexparser/bparser.py
@@ -0,0 +1,422 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Original source: github.com/okfn/bibserver
+# Authors:
+# markmacgillivray
+# Etienne Posthumus (epoz)
+# Francois Boulogne <fboulogne at april dot org>
+import sys
+import logging
+import io
+import re
+from .bibdatabase import BibDatabase
+logger = logging.getLogger(__name__)
+__all__ = ['BibTexParser']
+if sys.version_info >= (3, 0):
+ from io import StringIO
+ ustr = str
+ from StringIO import StringIO
+ ustr = unicode
+class BibTexParser(object):
+ """
+ A parser for reading BibTeX bibliographic data files.
+ Example::
+ from bibtexparser.bparser import BibTexParser
+ bibtex_str = ...
+ parser = BibTexParser()
+ parser.ignore_nonstandard_types = False
+ parser.homogenise_fields = False
+ bib_database = bibtexparser.loads(bibtex_str, parser)
+ """
+ def __new__(cls, data=None,
+ customization=None,
+ ignore_nonstandard_types=True,
+ homogenise_fields=True):
+ """
+ To catch the old API structure in which creating the parser would immediately parse and return data.
+ """
+ if data is None:
+ return super(BibTexParser, cls).__new__(cls)
+ else:
+ # For backwards compatibility: if data is given, parse and return the `BibDatabase` object instead of the
+ # parser.
+ parser = BibTexParser()
+ parser.customization = customization
+ parser.ignore_nonstandard_types = ignore_nonstandard_types
+ parser.homogenise_fields = homogenise_fields
+ return parser.parse(data)
+ def __init__(self):
+ """
+ Creates a parser for rading BibTeX files
+ :return: parser
+ :rtype: `BibTexParser`
+ """
+ self.bib_database = BibDatabase()
+ #: Callback function to process BibTeX entries after parsing, for example to create a list from a string with
+ #: multiple values. By default all BibTeX values are treated as simple strings. Default: `None`.
+ self.customization = None
+ #: Ignore non-standard BibTeX types (`book`, `article`, etc). Default: `True`.
+ self.ignore_nonstandard_types = True
+ #: Sanitise BibTeX field names, for example change `url` to `link` etc. Field names are always converted to
+ #: lowercase names. Default: `True`.
+ self.homogenise_fields = True
+ # On some sample data files, the character encoding detection simply
+ # hangs We are going to default to utf8, and mandate it.
+ self.encoding = 'utf8'
+ # pre-defined set of key changes
+ self.alt_dict = {
+ 'keyw': 'keyword',
+ 'keywords': 'keyword',
+ 'authors': 'author',
+ 'editors': 'editor',
+ 'url': 'link',
+ 'urls': 'link',
+ 'links': 'link',
+ 'subjects': 'subject'
+ }
+ self.replace_all_re = re.compile(r'((?P<pre>"?)\s*(#|^)\s*(?P<id>[^\d\W]\w*)\s*(#|$)\s*(?P<post>"?))', re.UNICODE)
+ def _bibtex_file_obj(self, bibtex_str):
+ # Some files have Byte-order marks inserted at the start
+ byte = '\xef\xbb\xbf'
+ if not isinstance(byte, ustr):
+ byte = ustr('\xef\xbb\xbf', self.encoding, 'ignore')
+ if bibtex_str[:3] == byte:
+ bibtex_str = bibtex_str[3:]
+ return StringIO(bibtex_str)
+ def parse(self, bibtex_str):
+ """Parse a BibTeX string into an object
+ :param bibtex_str: BibTeX string
+ :type: str or unicode
+ :return: bibliographic database
+ :rtype: BibDatabase
+ """
+ self.bibtex_file_obj = self._bibtex_file_obj(bibtex_str)
+ self._parse_records(customization=self.customization)
+ return self.bib_database
+ def parse_file(self, file):
+ """Parse a BibTeX file into an object
+ :param file: BibTeX file or file-like object
+ :type: file
+ :return: bibliographic database
+ :rtype: BibDatabase
+ """
+ return self.parse(file.read())
+ def _parse_records(self, customization=None):
+ """Parse the bibtex into a list of records.
+ :param customization: a function
+ """
+ def _add_parsed_record(record, records):
+ """
+ Atomic function to parse a record
+ and append the result in records
+ """
+ if record != "":
+ logger.debug('The record is not empty. Let\'s parse it.')
+ parsed = self._parse_record(record, customization=customization)
+ if parsed:
+ logger.debug('Store the result of the parsed record')
+ records.append(parsed)
+ else:
+ logger.debug('Nothing returned from the parsed record!')
+ else:
+ logger.debug('The record is empty')
+ records = []
+ record = ""
+ # read each line, bundle them up until they form an object, then send for parsing
+ for linenumber, line in enumerate(self.bibtex_file_obj):
+ logger.debug('Inspect line %s', linenumber)
+ if line.strip().startswith('@'):
+ # Remove leading whitespaces
+ line = line.lstrip()
+ logger.debug('Line starts with @')
+ # Parse previous record
+ _add_parsed_record(record, records)
+ # Start new record
+ logger.debug('The record is set to empty')
+ record = ""
+ # Keep adding lines to the record
+ record += line
+ # catch any remaining record and send it for parsing
+ _add_parsed_record(record, records)
+ logger.debug('Set the list of entries')
+ self.bib_database.entries = records
+ def _parse_record(self, record, customization=None):
+ """Parse a record.
+ * tidy whitespace and other rubbish
+ * parse out the bibtype and citekey
+ * find all the key-value pairs it contains
+ :param record: a record
+ :param customization: a function
+ :returns: dict --
+ """
+ d = {}
+ if not record.startswith('@'):
+ logger.debug('The record does not start with @. Return empty dict.')
... 5942 lines suppressed ...
