<DKIM> [PATCH,review]: add lmdb folder backend

Nicolas Sebrecht nicolas.s-dev at laposte.net
Mon Dec 19 12:31:12 GMT 2016


  Hi,

I did a quick review.

This feature would obviously require documentation in offlineimap.conf
and this must be marked EXPERIMENTAL.

On Mon, Dec 19, 2016 at 09:29:29AM +0000, lkcl wrote:

> diff --git a/offlineimap/folder/LocalStatusLMDB.py b/offlineimap/folder/LocalStatusLMDB.py
> index e69de29..3108595 100644
> --- a/offlineimap/folder/LocalStatusLMDB.py
> +++ b/offlineimap/folder/LocalStatusLMDB.py
> @@ -0,0 +1,299 @@
> +# Local status cache virtual folder: LMDB backend
> +# Copyright (C) 2009-2016 Stewart Smith and contributors.
> +# Copyright (C) 2016 Luke Kenneth Casson Leighton <lkcl at lkcl.net>

We decided to use "and contributors" to avoid adding new copyright
lines.

If this new file is yours, I'd suggest:

  # Copyright (C) 2016 Luke Kenneth Casson Leighton and contributors.

to keep things simple. If you really want to keep Stewart copyright

  # Copyright (C) 2009-2016 Stewart Smith and contributors.

is good, too.

> +#
> +#    This program is free software; you can redistribute it and/or modify
> +#    it under the terms of the GNU General Public License as published by
> +#    the Free Software Foundation; either version 2 of the License, or
> +#    (at your option) any later version.
> +#
> +#    This program is distributed in the hope that it will be useful,
> +#    but WITHOUT ANY WARRANTY; without even the implied warranty of
> +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +#    GNU General Public License for more details.
> +#
> +#    You should have received a copy of the GNU General Public License
> +#    along with this program; if not, write to the Free Software
> +#    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
> +
> +import os
> +import lmdb


Python 2.7.10 (default, Nov  9 2016, 23:16:09) 
[GCC 4.9.3] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import lmdb
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  ImportError: No module named lmdb
>>>

We need to have this dependency made optional. Notice this is likely
easier to achieve while importing/using LocalStatusLMDB in
repository/LocalStatus.py.

> +from sys import exc_info
> +
> +import six
> +try:
> +    # Use ultra-fast json library if available (much faster,
> +    # see https://blog.hartleybrody.com/python-serialize/)
> +    import ujson as json
> +except ImportError:
> +    # not available, fall back to standard python json library (slower)
> +    import json
> +
> +from .Base import BaseFolder
> +
> +
> +class LocalStatusLMDBFolder(BaseFolder):
> +    """ LocalStatus backend implemented with an LMDB database
> +    """

    """LocalStatus backend implemented with an LMDB database."""

(style)


> +
> +    # Current version of our db format.
> +    cur_version = 1
> +
> +    def __init__(self, name, repository):
> +        self.sep = '.' # Needs to be set before super().__init__().
> +        super(LocalStatusLMDBFolder, self).__init__(name, repository)
> +        self.root = repository.root
> +        self.filename = os.path.join(self.getroot(), self.getfolderbasename())
> +
> +        self._newfolder = False # Flag if the folder is new.
> +
> +        dirname = os.path.dirname(self.filename)
> +        if not os.path.exists(dirname):
> +            os.makedirs(dirname)
> +        if not os.path.isdir(dirname):
> +            raise UserWarning("LMDB database path '%s' is not a directory."%
> +                               dirname)

Race condition. Not signicant, though.

> +
> +        self._env = None
> +
> +    def openfiles(self):
> +        """ Open database, check it, upgrade if needed
> +        """
> +
> +        # Try to establish connection
> +        try:
> +            self._env = lmdb.open(self.filename, max_dbs=10)
> +        except lmdb.Error as e:
> +            # Operation had failed.
> +            six.reraise(UserWarning,
> +                        UserWarning(
> +                            "cannot open database file '%s': %s.\nYou might"
> +                            " want to check the rights to that file and if "
> +                            "it cleanly opens with the 'lmdb<3>' command"%
> +                            (self.filename, e)),
> +                        exc_info()[2])
> +
> +        with self._env.begin() as txn:

I wonder it's missing of locks. This class will be *instanciated* and
used more than once in different threads.

In order to avoid repeating the same code/patterns in the backends, I
guess the best would be to add a new layer (class) to serialize the I/O
on top of them. Not sure about that, though.

> +            # Test if db version is current enough and if db is readable.
> +            try:
> +                db = self.env.open_db('metadata')
> +                with self._env.begin(db=db) as txn:
> +                    cursor = txn.cursor()
> +                    version = int(cursor.get('db_version'))
> +            except:
> +                # db file missing or corrupt, recreate it.
> +                self.__create_db()
> +            else:
> +                # Fetch db version and upgrade if needed.
> +                if version < LocalStatusLMDBFolder.cur_version:
> +                    self.__upgrade_db(version)
> +
> +    def purge(self):
> +        """ Remove any pre-existing database. Do not call in dry-run mode.
> +        """
> +
> +        try:
> +            os.unlink(self.filename)
> +        except OSError as e:
> +            self.ui.debug('', "could not remove file %s: %s"%
> +                (self.filename, e))
> +
> +    def storesmessages(self):
> +        return False
> +
> +    def getfullname(self):
> +        return self.filename
> +
> +    # Interface from LocalStatusFolder
> +    def isnewfolder(self):
> +        return self._newfolder
> +
> +    def __upgrade_db(self, from_ver):
> +        """ Upgrade the lmdb format from version 'from_ver' to current
> +        """
> +
> +        # Future version upgrades come here...
> +        # if from_ver <= 1: ... #upgrade from 1 to 2
> +        # if from_ver <= 2: ... #upgrade from 2 to 3
> +        # if from_ver <= 3: ... #upgrade from 3 to 4
> +
> +
> +    def __create_db(self):
> +        """Create a new db file.
> +        """
> +        self.ui._msg('Creating new Local Status db for %s:%s'%
> +                     (self.repository, self))
> +        self._metadata_db = self._env.open_db('metadata')
> +        self._status_db = self._env.open_db('status')
> +        with self._env.begin(write=True) as txn:
> +            txn.put('db_version', str(LocalStatusLMDBFolder.cur_version),
> +                    db=self._metadata_db)
> +        self._newfolder = True
> +
> +    # Interface from BaseFolder
> +    def msglist_item_initializer(self, uid):
> +        # XXX not used (there's no point)
> +        return { 'uid': uid,
> +                 'flags': set(), 
> +                 'labels': set(), 

Trailing spaces for above two lines.

> +                 'time': 0,
> +                 'mtime': 0
> +               }
> +
> +    # Interface from BaseFolder
> +    def cachemessagelist(self):
> +        """ caches in memory all messages in the lmdb status_db
> +        """
> +        self.dropmessagelistcache()
> +        with self._env.begin(db=self._status_db) as txn:
> +            for key, val in txn.cursor():
> +                uid = int(key)
> +                print "cachemsglist", uid, val
> +                # if flags or labels are empty they're stored as null
> +                # so subst an empty tuple, converts to empty set
> +                (flags, labels, _time, mtime) = json.loads(val)
> +                msg = { 'time': _time,
> +                        'mtime': mtime,
> +                        'flags': set(flags or () ),
                                                   ^

> +                        'labels': set(labels or () )
                                                     ^

                        'labels': set(labels or ())

> +                      }
> +                self.messagelist[uid] = msg
> +
> +    def closefiles(self):
> +        self._env.close()
> +        self._env = None
> +        self._metadata_db = None
> +        self._status_db = None
> +
> +    # Interface from LocalStatusFolder
> +    def save(self):
> +        pass
> +        # Noop. every transaction commits to database!
> +
> +    def _save_msg(self, txn, uid, msg):
> +        # take relevant stuff from msg, convert to tuple.
> +        # empty sets are saved as null
> +        msg = (msg['flags'] or None, # empty set evaluates True => save space
> +               msg['labels'] or None, # ditto
> +               msg['time'],
> +               msg['mtime'])
> +        txn.put(str(uid), json.dumps(msg), db=self._status_db)
> +
> +    def saveall(self):
> +        """ Saves the entire messagelist to the database.
> +        """
> +        with self._env.begin(write=True) as txn:
> +            for uid in self.messagelist:
> +                self._save_msg(txn, uid, self.messagelist[uid])
> +
> +    # Interface from BaseFolder
> +    def savemessage(self, uid, content, flags, rtime, mtime=0, labels=None):
> +        """ Writes a new message, with the specified uid.
> +
> +            See folder/Base for detail. Note that savemessage() does not
> +            check against dryrun settings, so you need to ensure that
> +            savemessage is never called in a dryrun mode.
> +        """
> +
> +        if uid < 0:
> +            # We cannot assign a uid.
> +            return uid
> +
> +        if self.uidexists(uid): # Already have it.
> +            self.savemessageflags(uid, flags)
> +            return uid
> +
> +        msg = {'uid': uid,
> +               'flags': flags, 
> +               'time': rtime, 
> +               'mtime': mtime, 

Trailing spaces.

> +               'labels': labels or set()
> +              }
> +        self.messagelist[uid] = msg
> +
> +        with self._env.begin(write=True) as txn:
> +            self._save_msg(txn, uid, msg)
> +
> +        return uid
> +
> +    # Interface from BaseFolder
> +    def savemessageflags(self, uid, flags):
> +        assert self.uidexists(uid)
> +        msg = self.messagelist[uid]
> +        msg['flags'] = flags
> +
> +        with self._env.begin(write=True) as txn:
> +            self._save_msg(txn, uid, msg)
> +
> +    def getmessageflags(self, uid):
> +        return self.messagelist[uid]['flags']
> +
> +    def savemessagelabels(self, uid, labels, mtime=None):
> +        msg = self.messagelist[uid]
> +        msg['labels'] = labels
> +        if mtime:
> +            msg['mtime'] = mtime
> +
> +        with self._env.begin(write=True) as txn:
> +            self._save_msg(txn, uid, msg)
> +
> +    def _save_by_uids(self, uids):
> +        with self._env.begin(write=True) as txn:
> +            for uid in uids: # dict iterates keys, list iterates members
> +                self._save_msg(txn, uid, self.messagelist[uid])
> +
> +    def savemessageslabelsbulk(self, labels):
> +        """ Saves labels from a dictionary in a single database operation.
> +        """
> +        for uid in labels:
> +            self.messagelist[uid]['labels'] = l
> +        self._save_by_uids(labels) # use for on dict to get uids as keys
> +
> +    def addmessageslabels(self, uids, labels):
> +        for uid in uids:
> +            self.messagelist[uid]['labels'].update(labels)
> +        self._save_by_uids(uids)
> +
> +    def deletemessageslabels(self, uids, labels):
> +        for uid in uids:
> +            self.messagelist[uid]['labels'] -= labels
> +        self._save_by_uids(uids) # use for on dict to get uids as keys
> +
> +    def getmessagelabels(self, uid):
> +        return self.messagelist[uid]['labels']
> +
> +    def savemessagesmtimebulk(self, mtimes):
> +        """ Saves mtimes from a dictionary in a single database operation.
> +        """
> +
> +        for uid in mtimes:
> +            mt = mtimes[uid]
> +            self.messagelist[uid]['mtime'] = mt
> +        self._save_by_uids(mtimes) # use for on dict to get uids as keys
> +
> +    def getmessagemtime(self, uid):
> +        return self.messagelist[uid]['mtime']
> +
> +    # Interface from BaseFolder
> +    def deletemessage(self, uid):
> +        if not uid in self.messagelist:
> +            return
> +        with self._env.begin(write=True) as txn:
> +            txn.drop(self._status_db, str(uid))
> +        del self.messagelist[uid]
> +
> +    # Interface from BaseFolder
> +    def deletemessages(self, uidlist):
> +        """ Delete list of UIDs from status cache
> +        """
> +
> +        # Weed out ones not in self.messagelist
> +        uidlist = [uid for uid in uidlist if uid in self.messagelist]
> +        if len(uidlist) == 0:
> +            return
> +        with self._env.begin(write=True) as txn:
> +            for uid in uidlist:
> +                txn.drop(self._status_db, str(uid))
> +        for uid in uidlist:
> +            del self.messagelist[uid]
> +
> diff --git a/offlineimap/repository/LocalStatus.py b/offlineimap/repository/LocalStatus.py
> index f23020f..b21f07b 100644
> --- a/offlineimap/repository/LocalStatus.py
> +++ b/offlineimap/repository/LocalStatus.py
> @@ -19,6 +19,7 @@ import os
>  
>  from offlineimap.folder.LocalStatus import LocalStatusFolder
>  from offlineimap.folder.LocalStatusSQLite import LocalStatusSQLiteFolder
> +from offlineimap.folder.LocalStatusLMDB import LocalStatusLMDBFolder
>  from offlineimap.repository.Base import BaseRepository
>  
>  class LocalStatusRepository(BaseRepository):
> @@ -32,6 +33,11 @@ class LocalStatusRepository(BaseRepository):
>              'root': os.path.join(account.getaccountmeta(), 'LocalStatus-sqlite')
>          }
>  
> +        self.backends['lmdb'] = {
> +            'class': LocalStatusLMDBFolder,
> +            'root': os.path.join(account.getaccountmeta(), 'LocalStatus-lmdb')
> +        }
> +
>          self.backends['plain'] = {
>              'class': LocalStatusFolder,
>              'root': os.path.join(account.getaccountmeta(), 'LocalStatus')
> 

-- 
Nicolas Sebrecht




More information about the OfflineIMAP-project mailing list