<DKIM> [PATCH,review]: add lmdb folder backend
Nicolas Sebrecht
nicolas.s-dev at laposte.net
Mon Dec 19 12:31:12 GMT 2016
Hi,
I did a quick review.
This feature would obviously require documentation in offlineimap.conf
and this must be marked EXPERIMENTAL.
On Mon, Dec 19, 2016 at 09:29:29AM +0000, lkcl wrote:
> diff --git a/offlineimap/folder/LocalStatusLMDB.py b/offlineimap/folder/LocalStatusLMDB.py
> index e69de29..3108595 100644
> --- a/offlineimap/folder/LocalStatusLMDB.py
> +++ b/offlineimap/folder/LocalStatusLMDB.py
> @@ -0,0 +1,299 @@
> +# Local status cache virtual folder: LMDB backend
> +# Copyright (C) 2009-2016 Stewart Smith and contributors.
> +# Copyright (C) 2016 Luke Kenneth Casson Leighton <lkcl at lkcl.net>
We decided to use "and contributors" to avoid adding new copyright
lines.
If this new file is yours, I'd suggest:
# Copyright (C) 2016 Luke Kenneth Casson Leighton and contributors.
to keep things simple. If you really want to keep Stewart copyright
# Copyright (C) 2009-2016 Stewart Smith and contributors.
is good, too.
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write to the Free Software
> +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
> +
> +import os
> +import lmdb
Python 2.7.10 (default, Nov 9 2016, 23:16:09)
[GCC 4.9.3] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import lmdb
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ImportError: No module named lmdb
>>>
We need to have this dependency made optional. Notice this is likely
easier to achieve while importing/using LocalStatusLMDB in
repository/LocalStatus.py.
> +from sys import exc_info
> +
> +import six
> +try:
> + # Use ultra-fast json library if available (much faster,
> + # see https://blog.hartleybrody.com/python-serialize/)
> + import ujson as json
> +except ImportError:
> + # not available, fall back to standard python json library (slower)
> + import json
> +
> +from .Base import BaseFolder
> +
> +
> +class LocalStatusLMDBFolder(BaseFolder):
> + """ LocalStatus backend implemented with an LMDB database
> + """
"""LocalStatus backend implemented with an LMDB database."""
(style)
> +
> + # Current version of our db format.
> + cur_version = 1
> +
> + def __init__(self, name, repository):
> + self.sep = '.' # Needs to be set before super().__init__().
> + super(LocalStatusLMDBFolder, self).__init__(name, repository)
> + self.root = repository.root
> + self.filename = os.path.join(self.getroot(), self.getfolderbasename())
> +
> + self._newfolder = False # Flag if the folder is new.
> +
> + dirname = os.path.dirname(self.filename)
> + if not os.path.exists(dirname):
> + os.makedirs(dirname)
> + if not os.path.isdir(dirname):
> + raise UserWarning("LMDB database path '%s' is not a directory."%
> + dirname)
Race condition. Not signicant, though.
> +
> + self._env = None
> +
> + def openfiles(self):
> + """ Open database, check it, upgrade if needed
> + """
> +
> + # Try to establish connection
> + try:
> + self._env = lmdb.open(self.filename, max_dbs=10)
> + except lmdb.Error as e:
> + # Operation had failed.
> + six.reraise(UserWarning,
> + UserWarning(
> + "cannot open database file '%s': %s.\nYou might"
> + " want to check the rights to that file and if "
> + "it cleanly opens with the 'lmdb<3>' command"%
> + (self.filename, e)),
> + exc_info()[2])
> +
> + with self._env.begin() as txn:
I wonder it's missing of locks. This class will be *instanciated* and
used more than once in different threads.
In order to avoid repeating the same code/patterns in the backends, I
guess the best would be to add a new layer (class) to serialize the I/O
on top of them. Not sure about that, though.
> + # Test if db version is current enough and if db is readable.
> + try:
> + db = self.env.open_db('metadata')
> + with self._env.begin(db=db) as txn:
> + cursor = txn.cursor()
> + version = int(cursor.get('db_version'))
> + except:
> + # db file missing or corrupt, recreate it.
> + self.__create_db()
> + else:
> + # Fetch db version and upgrade if needed.
> + if version < LocalStatusLMDBFolder.cur_version:
> + self.__upgrade_db(version)
> +
> + def purge(self):
> + """ Remove any pre-existing database. Do not call in dry-run mode.
> + """
> +
> + try:
> + os.unlink(self.filename)
> + except OSError as e:
> + self.ui.debug('', "could not remove file %s: %s"%
> + (self.filename, e))
> +
> + def storesmessages(self):
> + return False
> +
> + def getfullname(self):
> + return self.filename
> +
> + # Interface from LocalStatusFolder
> + def isnewfolder(self):
> + return self._newfolder
> +
> + def __upgrade_db(self, from_ver):
> + """ Upgrade the lmdb format from version 'from_ver' to current
> + """
> +
> + # Future version upgrades come here...
> + # if from_ver <= 1: ... #upgrade from 1 to 2
> + # if from_ver <= 2: ... #upgrade from 2 to 3
> + # if from_ver <= 3: ... #upgrade from 3 to 4
> +
> +
> + def __create_db(self):
> + """Create a new db file.
> + """
> + self.ui._msg('Creating new Local Status db for %s:%s'%
> + (self.repository, self))
> + self._metadata_db = self._env.open_db('metadata')
> + self._status_db = self._env.open_db('status')
> + with self._env.begin(write=True) as txn:
> + txn.put('db_version', str(LocalStatusLMDBFolder.cur_version),
> + db=self._metadata_db)
> + self._newfolder = True
> +
> + # Interface from BaseFolder
> + def msglist_item_initializer(self, uid):
> + # XXX not used (there's no point)
> + return { 'uid': uid,
> + 'flags': set(),
> + 'labels': set(),
Trailing spaces for above two lines.
> + 'time': 0,
> + 'mtime': 0
> + }
> +
> + # Interface from BaseFolder
> + def cachemessagelist(self):
> + """ caches in memory all messages in the lmdb status_db
> + """
> + self.dropmessagelistcache()
> + with self._env.begin(db=self._status_db) as txn:
> + for key, val in txn.cursor():
> + uid = int(key)
> + print "cachemsglist", uid, val
> + # if flags or labels are empty they're stored as null
> + # so subst an empty tuple, converts to empty set
> + (flags, labels, _time, mtime) = json.loads(val)
> + msg = { 'time': _time,
> + 'mtime': mtime,
> + 'flags': set(flags or () ),
^
> + 'labels': set(labels or () )
^
'labels': set(labels or ())
> + }
> + self.messagelist[uid] = msg
> +
> + def closefiles(self):
> + self._env.close()
> + self._env = None
> + self._metadata_db = None
> + self._status_db = None
> +
> + # Interface from LocalStatusFolder
> + def save(self):
> + pass
> + # Noop. every transaction commits to database!
> +
> + def _save_msg(self, txn, uid, msg):
> + # take relevant stuff from msg, convert to tuple.
> + # empty sets are saved as null
> + msg = (msg['flags'] or None, # empty set evaluates True => save space
> + msg['labels'] or None, # ditto
> + msg['time'],
> + msg['mtime'])
> + txn.put(str(uid), json.dumps(msg), db=self._status_db)
> +
> + def saveall(self):
> + """ Saves the entire messagelist to the database.
> + """
> + with self._env.begin(write=True) as txn:
> + for uid in self.messagelist:
> + self._save_msg(txn, uid, self.messagelist[uid])
> +
> + # Interface from BaseFolder
> + def savemessage(self, uid, content, flags, rtime, mtime=0, labels=None):
> + """ Writes a new message, with the specified uid.
> +
> + See folder/Base for detail. Note that savemessage() does not
> + check against dryrun settings, so you need to ensure that
> + savemessage is never called in a dryrun mode.
> + """
> +
> + if uid < 0:
> + # We cannot assign a uid.
> + return uid
> +
> + if self.uidexists(uid): # Already have it.
> + self.savemessageflags(uid, flags)
> + return uid
> +
> + msg = {'uid': uid,
> + 'flags': flags,
> + 'time': rtime,
> + 'mtime': mtime,
Trailing spaces.
> + 'labels': labels or set()
> + }
> + self.messagelist[uid] = msg
> +
> + with self._env.begin(write=True) as txn:
> + self._save_msg(txn, uid, msg)
> +
> + return uid
> +
> + # Interface from BaseFolder
> + def savemessageflags(self, uid, flags):
> + assert self.uidexists(uid)
> + msg = self.messagelist[uid]
> + msg['flags'] = flags
> +
> + with self._env.begin(write=True) as txn:
> + self._save_msg(txn, uid, msg)
> +
> + def getmessageflags(self, uid):
> + return self.messagelist[uid]['flags']
> +
> + def savemessagelabels(self, uid, labels, mtime=None):
> + msg = self.messagelist[uid]
> + msg['labels'] = labels
> + if mtime:
> + msg['mtime'] = mtime
> +
> + with self._env.begin(write=True) as txn:
> + self._save_msg(txn, uid, msg)
> +
> + def _save_by_uids(self, uids):
> + with self._env.begin(write=True) as txn:
> + for uid in uids: # dict iterates keys, list iterates members
> + self._save_msg(txn, uid, self.messagelist[uid])
> +
> + def savemessageslabelsbulk(self, labels):
> + """ Saves labels from a dictionary in a single database operation.
> + """
> + for uid in labels:
> + self.messagelist[uid]['labels'] = l
> + self._save_by_uids(labels) # use for on dict to get uids as keys
> +
> + def addmessageslabels(self, uids, labels):
> + for uid in uids:
> + self.messagelist[uid]['labels'].update(labels)
> + self._save_by_uids(uids)
> +
> + def deletemessageslabels(self, uids, labels):
> + for uid in uids:
> + self.messagelist[uid]['labels'] -= labels
> + self._save_by_uids(uids) # use for on dict to get uids as keys
> +
> + def getmessagelabels(self, uid):
> + return self.messagelist[uid]['labels']
> +
> + def savemessagesmtimebulk(self, mtimes):
> + """ Saves mtimes from a dictionary in a single database operation.
> + """
> +
> + for uid in mtimes:
> + mt = mtimes[uid]
> + self.messagelist[uid]['mtime'] = mt
> + self._save_by_uids(mtimes) # use for on dict to get uids as keys
> +
> + def getmessagemtime(self, uid):
> + return self.messagelist[uid]['mtime']
> +
> + # Interface from BaseFolder
> + def deletemessage(self, uid):
> + if not uid in self.messagelist:
> + return
> + with self._env.begin(write=True) as txn:
> + txn.drop(self._status_db, str(uid))
> + del self.messagelist[uid]
> +
> + # Interface from BaseFolder
> + def deletemessages(self, uidlist):
> + """ Delete list of UIDs from status cache
> + """
> +
> + # Weed out ones not in self.messagelist
> + uidlist = [uid for uid in uidlist if uid in self.messagelist]
> + if len(uidlist) == 0:
> + return
> + with self._env.begin(write=True) as txn:
> + for uid in uidlist:
> + txn.drop(self._status_db, str(uid))
> + for uid in uidlist:
> + del self.messagelist[uid]
> +
> diff --git a/offlineimap/repository/LocalStatus.py b/offlineimap/repository/LocalStatus.py
> index f23020f..b21f07b 100644
> --- a/offlineimap/repository/LocalStatus.py
> +++ b/offlineimap/repository/LocalStatus.py
> @@ -19,6 +19,7 @@ import os
>
> from offlineimap.folder.LocalStatus import LocalStatusFolder
> from offlineimap.folder.LocalStatusSQLite import LocalStatusSQLiteFolder
> +from offlineimap.folder.LocalStatusLMDB import LocalStatusLMDBFolder
> from offlineimap.repository.Base import BaseRepository
>
> class LocalStatusRepository(BaseRepository):
> @@ -32,6 +33,11 @@ class LocalStatusRepository(BaseRepository):
> 'root': os.path.join(account.getaccountmeta(), 'LocalStatus-sqlite')
> }
>
> + self.backends['lmdb'] = {
> + 'class': LocalStatusLMDBFolder,
> + 'root': os.path.join(account.getaccountmeta(), 'LocalStatus-lmdb')
> + }
> +
> self.backends['plain'] = {
> 'class': LocalStatusFolder,
> 'root': os.path.join(account.getaccountmeta(), 'LocalStatus')
>
--
Nicolas Sebrecht
More information about the OfflineIMAP-project
mailing list