[PATCH] Re: make maxage use UIDs to avoid timezone issues

Nicolas Sebrecht nicolas.s-dev at laposte.net
Wed Mar 25 04:34:05 GMT 2015


On Tue, Mar 24, 2015 at 07:43:03PM -0400, Janna Martl wrote:

> When using maxage, local and remote messagelists are supposed to only
> contain messages from at most maxage days ago. But local and remote used
> different timezones to calculate what "maxage days ago" means, resulting
> in loss of mail. Now, we ask the local folder for maxage days' worth of
> mail, find the lowest UID, and then ask the remote folder for all UID's
> starting with that lowest one.

Nice commit message.

My comments are below and some are nitpicking.  But this patch was sent
signed-off while obviously not tested which is something important to
note. ,-)

I've fixed what I've seen but this would require a review and some
tests.  Patches will follow.

> Signed-off-by: Janna Martl <janna.martl109 at gmail.com>
> ---
>  docs/offlineimap.txt               | 14 +++++++-
>  offlineimap/accounts.py            | 41 +++++++++++++++++++-----
>  offlineimap/folder/Base.py         |  8 +++++
>  offlineimap/folder/Gmail.py        |  5 +--
>  offlineimap/folder/GmailMaildir.py |  4 +--
>  offlineimap/folder/IMAP.py         | 62 +++++++++++++++++++++++++++++-------
>  offlineimap/folder/Maildir.py      | 65 ++++++++++++++++++++++++++++----------
>  7 files changed, 157 insertions(+), 42 deletions(-)
> 
> diff --git a/docs/offlineimap.txt b/docs/offlineimap.txt
> index 858fc0b..54939ad 100644
> --- a/docs/offlineimap.txt
> +++ b/docs/offlineimap.txt
> @@ -135,7 +135,8 @@ Ignore any autorefresh setting in the configuration file.
>    Run only quick synchronizations.
>  +
>  Ignore any flag updates on IMAP servers. If a flag on the remote IMAP changes,
> -and we have the message locally, it will be left untouched in a quick run.
> +and we have the message locally, it will be left untouched in a quick run. This
> +option is ignored if maxage is set in offlineimap.conf.
>  
>  
>  -u <UI>::
> @@ -400,6 +401,17 @@ If you then point your local mutt, or whatever MUA you use to `~/mail/`
>  as root, it should still recognize all folders.
>  
>  
> +* Edge cases with maxage causing too many messages to be synced.
> ++
> +All messages from at most maxage days ago (+/- a few hours, depending on
> +timezones) are synced, but there are cases in which older messages can also be
> +synced. This happens when a message's UID is significantly higher than those of
> +other messages with similar dates, e.g. when messages are added to the local
> +folder behind offlineimap's back, causing them to get assigned a new UID, or
> +when offlineimap first syncs a pre-existing Maildir. In the latter case, it
> +could appear as if a noticeable and random subset of old messages are synced.
> +
> +
>  Authors
>  -------
>  
> diff --git a/offlineimap/accounts.py b/offlineimap/accounts.py
> index cac4d88..19fff23 100644
> --- a/offlineimap/accounts.py
> +++ b/offlineimap/accounts.py
> @@ -429,19 +429,28 @@ def syncfolder(account, remotefolder, quick):
>  
>          statusfolder.cachemessagelist()
>  
> -        if quick:
> -            if (not localfolder.quickchanged(statusfolder) and
> -                not remotefolder.quickchanged(statusfolder)):
> -                ui.skippingfolder(remotefolder)
> -                localrepos.restore_atime()
> -                return
> +        maxage = localfolder.getmaxage()
>  
>          # Load local folder.
>          ui.syncingfolder(remoterepos, remotefolder, localrepos, localfolder)
>          ui.loadmessagelist(localrepos, localfolder)
> -        localfolder.cachemessagelist()
> +        # For maxage, returns messages with uid > min(uids of within-maxage messages).
> +        localfolder.cachemessagelist(maxage=maxage)
>          ui.messagelistloaded(localrepos, localfolder, localfolder.getmessagecount())
>  
> +        if quick:
> +            # IMAP quickchanged isn't compatible with maxage, since the "quick"
> +            # check can only retrieve a full list of UIDs in the folder.
> +            if maxage:
> +                ui.warn("Quick syncs (-q) not supported in conjunction "\
> +                    "with maxage; ignoring -q.")
> +            else:
> +                if (not localfolder.quickchanged(statusfolder) and
> +                    not remotefolder.quickchanged(statusfolder)):
> +                    ui.skippingfolder(remotefolder)
> +                    localrepos.restore_atime()
> +                    return
> +
>          # If either the local or the status folder has messages and
>          # there is a UID validity problem, warn and abort.  If there are
>          # no messages, UW IMAPd loses UIDVALIDITY.  But we don't really
> @@ -463,10 +472,26 @@ def syncfolder(account, remotefolder, quick):
>  
>          # Load remote folder.
>          ui.loadmessagelist(remoterepos, remotefolder)
> -        remotefolder.cachemessagelist()
> +        # Enforce maxage on remote folder by restricting to the lowest UID in
> +        # the local messagelist (which has already been restricted using maxage)
> +        if maxage:
> +            # local messagelist might contain new messages (with uid's < 0)
> +            positive_uids = filter(lambda uid: uid > 0,
> +                localfolder.getmessageuidlist())
> +            if positive_uids:
> +                remotefolder.cachemessagelist(min_uid=min(positive_uids))
> +            else: # no messages with UID > 0 in range in localfolder
> +                # maxage is calculated with respect to folder timezone, and
> +                # remote folder timezone might be different from local, so be
> +                # safe and make sure the range isn't bigger than in local
> +                remotefolder.cachemessagelist(maxage=maxage - 1)
> +        else:
> +            remotefolder.cachemessagelist()
>          ui.messagelistloaded(remoterepos, remotefolder,
>                               remotefolder.getmessagecount())
>  
> +
> +
>          # Synchronize remote changes.
>          if not localrepos.getconfboolean('readonly', False):
>              ui.syncingmessages(remoterepos, remotefolder, localrepos, localfolder)
> diff --git a/offlineimap/folder/Base.py b/offlineimap/folder/Base.py
> index e3c80b7..123508c 100644
> --- a/offlineimap/folder/Base.py
> +++ b/offlineimap/folder/Base.py
> @@ -298,6 +298,14 @@ class BaseFolder(object):
>  
>          raise NotImplementedError
>  
> +    def getmaxage(self):
> +        return self.config.getdefaultint("Account %s"%
> +            self.accountname, "maxage", None)
> +
> +    def getmaxsize(self):
> +        return self.config.getdefaultint("Account %s"%
> +            self.accountname, "maxsize", None)
> +
>      def savemessage(self, uid, content, flags, rtime):
>          """Writes a new message, with the specified uid.
>  
> diff --git a/offlineimap/folder/Gmail.py b/offlineimap/folder/Gmail.py
> index 1afbe47..93e8eee 100644
> --- a/offlineimap/folder/Gmail.py
> +++ b/offlineimap/folder/Gmail.py
> @@ -121,9 +121,10 @@ class GmailFolder(IMAPFolder):
>  
>      # TODO: merge this code with the parent's cachemessagelist:
>      # TODO: they have too much common logics.
> -    def cachemessagelist(self):
> +    def cachemessagelist(self, maxage=None, min_uid=None):
>          if not self.synclabels:
> -            return super(GmailFolder, self).cachemessagelist()
> +            return super(GmailFolder, self).cachemessagelist(maxage=maxage,
> +                min_uid=min_uid)
>  
>          self.messagelist = {}
>  
> diff --git a/offlineimap/folder/GmailMaildir.py b/offlineimap/folder/GmailMaildir.py
> index 894792d..0ae00bf 100644
> --- a/offlineimap/folder/GmailMaildir.py
> +++ b/offlineimap/folder/GmailMaildir.py
> @@ -64,9 +64,9 @@ class GmailMaildirFolder(MaildirFolder):
>                  'filename': '/no-dir/no-such-file/', 'mtime': 0}
>  
>  
> -    def cachemessagelist(self):
> +    def cachemessagelist(self, maxage=None, min_uid=None):
>          if self.ismessagelistempty():
> -            self.messagelist = self._scanfolder()
> +            self.messagelist = self._scanfolder(maxage=maxage, min_uid=min_uid)
>  
>          # Get mtimes
>          if self.synclabels:
> diff --git a/offlineimap/folder/IMAP.py b/offlineimap/folder/IMAP.py
> index 4b470a2..bf61ebe 100644
> --- a/offlineimap/folder/IMAP.py
> +++ b/offlineimap/folder/IMAP.py
> @@ -144,7 +144,7 @@ class IMAPFolder(BaseFolder):
>          return False
>  
>  
> -    def _msgs_to_fetch(self, imapobj):
> +    def _msgs_to_fetch(self, imapobj, maxage=None, min_uid=None):
>          """Determines sequence numbers of messages to be fetched.
>  
>          Message sequence numbers (MSNs) are more easily compacted
> @@ -152,6 +152,18 @@ class IMAPFolder(BaseFolder):
>  
>          Arguments:
>          - imapobj: instance of IMAPlib
> +        - maxage (optional): only fetch messages up to maxage days ago
> +        - min_uid (optional): only fetch messages with uid > min_uid

s/>/>=/

> +
> +        This function should be called with at most one of maxage or
> +        min_uid.
> +
> +        If using maxage, this finds the min uid of all messages within
> +        maxage, and returns all messages with uid > this min. As maxage
> +        is only set if this folder is the local folder,

It can also have maxage if no min(UIDs) was not found on local.

> +                                                        this ensures
> +        consistency with remote (which only restricts to uid > this min)
> +        in edge cases where the date is much earlier than messages with
> +        similar UID's (e.g. the UID was reassigned much later).
>  
>          Returns: range(s) for messages or None if no messages
>          are to be fetched."""
> @@ -164,16 +176,13 @@ class IMAPFolder(BaseFolder):
>          # By default examine all messages in this folder
>          msgsToFetch = '1:*'
>  
> -        maxage = self.config.getdefaultint(
> -            "Account %s"% self.accountname, "maxage", -1)
> -        maxsize = self.config.getdefaultint(
> -            "Account %s"% self.accountname, "maxsize", -1)
> +        maxsize = self.getmaxsize()
>  
>          # Build search condition
> -        if (maxage != -1) | (maxsize != -1):
> +        if maxage or min_uid != None or maxsize != None:
>              search_cond = "(";
>  
> -            if(maxage != -1):
> +            if maxage:

maxage != None

>                  #find out what the oldest message is that we should look at
>                  oldest_struct = time.gmtime(time.time() - (60*60*24*maxage))
>                  if oldest_struct[0] < 1900:
> @@ -184,9 +193,14 @@ class IMAPFolder(BaseFolder):
>                      oldest_struct[2],
>                      MonthNames[oldest_struct[1]],
>                      oldest_struct[0])
> +            if min_uid != None:
> +                if maxage: # There are two conditions, add space

maxage != None

> +                    search_cond += " "
> +                search_cond += "UID %d:*"% min_uid
>  
> -            if(maxsize != -1):
> -                if(maxage != -1): # There are two conditions, add space
> +            if maxsize != None:
> +                if maxage or min_uid != None:
> +                    # There are at least two conditions, add space
>                      search_cond += " "
>                  search_cond += "SMALLER %d"% maxsize
>  
> @@ -199,6 +213,30 @@ class IMAPFolder(BaseFolder):
>                      self.getrepository(), self, search_cond, res_type, res_data),
>                      OfflineImapError.ERROR.FOLDER)
>  
> +            if maxage:
> +                # Found messages within maxage, but want all messages with UID
> +                # > the min uid of the within-maxage messages. Ordering by UID
> +                # is the same as ordering by MSN, so we get the messages with
> +                # MSN > the min MSN of the within-maxage messages.
> +                msg_seq_numbers = map(lambda s : int(s), res_data[0].split())
> +                if msg_seq_numbers:
> +                    min_msn = min(msg_seq_numbers)
> +                else:
> +                    return None
> +                if maxsize == None:
> +                    # If no maxsize, can just ask for all messages with MSN > min_msn
> +                    return "%d:*"% min_msn
> +                else:
> +                    # Restrict the range min_msn:* to those with acceptable size.
> +                    # Single-quotes prevent imaplib2 from quoting the sequence.
> +                    search_cond = "'%d:* (SMALLER %d)'"% (min_msn, maxsize)
> +                    res_type, res_data = imapobj.search(None, search_cond)
> +                    if res_type != 'OK':
> +                        raise OfflineImapError("SEARCH in folder [%s]%s failed. "
> +                            "Search string was '%s'. Server responded '[%s] %s'"%
> +                            (self.getrepository(), self, search_cond, res_type,
> +                            res_data), OfflineImapError.ERROR.FOLDER)

Why would we need this entire block?

> +
>              # Resulting MSN are separated by space, coalesce into ranges
>              msgsToFetch = imaputil.uid_sequence(res_data[0].split())
>  
> @@ -210,19 +248,19 @@ class IMAPFolder(BaseFolder):
>  
>  
>      # Interface from BaseFolder
> -    def cachemessagelist(self):
> +    def cachemessagelist(self, maxage=None, min_uid=None):
>          self.messagelist = {}
>  
>          imapobj = self.imapserver.acquireconnection()
>          try:
> -            msgsToFetch = self._msgs_to_fetch(imapobj)
> +            msgsToFetch = self._msgs_to_fetch(imapobj, maxage=maxage, min_uid=min_uid)
>              if not msgsToFetch:
>                  return # No messages to sync
>  
>              # Get the flags and UIDs for these. single-quotes prevent
>              # imaplib2 from quoting the sequence.
>              res_type, response = imapobj.fetch("'%s'"%
> -                msgsToFetch, '(FLAGS UID)')
> +                msgsToFetch, '(FLAGS UID INTERNALDATE)')
>              if res_type != 'OK':
>                  raise OfflineImapError("FETCHING UIDs in folder [%s]%s failed. "
>                      "Server responded '[%s] %s'"% (self.getrepository(), self,
> diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py
> index 6df8180..c0ad516 100644
> --- a/offlineimap/folder/Maildir.py
> +++ b/offlineimap/folder/Maildir.py
> @@ -150,18 +150,23 @@ class MaildirFolder(BaseFolder):
>              flags = set((c for c in flagmatch.group(1) if not c.islower()))
>          return prefix, uid, fmd5, flags
>  
> -    def _scanfolder(self):
> +    def _scanfolder(self, maxage=None, min_uid=None):

This hunk badly diverge from before.  Unless I'm mistaken from the
beginning, cachemessagelist() is called once per side for each folder in
order to know what messages we have to sync.

The "localfolder" might counter-intuitively be a remote IMAP server but
Maildir can only be a localfolder. Maildir is processed first, so
min_uid is never known.

>          """Cache the message list from a Maildir.
>  
> +        If using maxage, this finds the min uid of all messages within
> +        maxage, and returns all messages with uid > this min. This
> +        ensures consistency with remote (which only restricts to uid >

s/>/>=/

> +        this min) in edge cases where the date is much earlier than
> +        messages with similar UID's (e.g. the UID was reassigned much
> +        later).
> +
>          Maildir flags are: R (replied) S (seen) T (trashed) D (draft) F
>          (flagged).
>          :returns: dict that can be used as self.messagelist.
>          """
>  
> -        maxage = self.config.getdefaultint("Account " + self.accountname,
> -                                           "maxage", None)
> -        maxsize = self.config.getdefaultint("Account " + self.accountname,
> -                                            "maxsize", None)
> +        maxsize = self.getmaxsize()
> +
>          retval = {}
>          files = []
>          nouidcounter = -1          # Messages without UIDs get negative UIDs.
> @@ -170,12 +175,12 @@ class MaildirFolder(BaseFolder):
>              files.extend((dirannex, filename) for
>                           filename in os.listdir(fulldirname))
>  
> +        maxage_excludees = {}
> +
>          for dirannex, filename in files:
>              # We store just dirannex and filename, ie 'cur/123...'
>              filepath = os.path.join(dirannex, filename)
> -            # Check maxage/maxsize if this message should be considered.
> -            if maxage and not self._iswithinmaxage(filename, maxage):
> -                continue
> +            # Check maxsize if this message should be considered.
>              if maxsize and (os.path.getsize(os.path.join(
>                          self.getfullname(), filepath)) > maxsize):
>                  continue
> @@ -185,6 +190,9 @@ class MaildirFolder(BaseFolder):
>                  uid = nouidcounter
>                  nouidcounter -= 1
>              else:                       # It comes from our folder.
> +                # Check min_uid if this message should be considered.
> +                if min_uid != None and uid < min_uid:
> +                    continue
>                  uidmatch = re_uidmatch.search(filename)
>                  uid = None
>                  if not uidmatch:
> @@ -192,16 +200,36 @@ class MaildirFolder(BaseFolder):
>                      nouidcounter -= 1
>                  else:
>                      uid = long(uidmatch.group(1))
> -            # 'filename' is 'dirannex/filename', e.g. cur/123,U=1,FMD5=1:2,S
> -            retval[uid] = self.msglist_item_initializer(uid)
> -            retval[uid]['flags'] = flags
> -            retval[uid]['filename'] = filepath
> +            if maxage and not self._iswithinmaxage(filename, maxage):
> +                # Keep track of messages outside of maxage, because the ones
> +                # with uid > min(uids of within-maxage messages) will get
> +                # re-included.

Reworded.

> +                maxage_excludees[uid] = self.msglist_item_initializer(uid)
> +                maxage_excludees[uid]['flags'] = flags
> +                maxage_excludees[uid]['filename'] = filepath
> +            else:
> +                # 'filename' is 'dirannex/filename', e.g. cur/123,U=1,FMD5=1:2,S
> +                retval[uid] = self.msglist_item_initializer(uid)
> +                retval[uid]['flags'] = flags
> +                retval[uid]['filename'] = filepath
> +        if maxage:
> +            # Re-include messages with high enough uid's.
> +            positive_uids = filter(lambda uid: uid > 0, retval)
> +            if positive_uids:
> +                min_uid = min(positive_uids)
> +                for uid in maxage_excludees.keys():
> +                    if uid > min_uid:
> +                        # This message was originally excluded because of
> +                        # maxage, but is included now because we want all
> +                        # messages with uid > min_uid
> +                        retval[uid] = maxage_excludees[uid]
>          return retval
>  
>      # Interface from BaseFolder
>      def quickchanged(self, statusfolder):
> -        """Returns True if the Maildir has changed"""
> -        self.cachemessagelist()
> +        """Returns True if the Maildir has changed
> +
> +        Assumes cachemessagelist() has already been called """
>          # Folder has different uids than statusfolder => TRUE.
>          if sorted(self.getmessageuidlist()) != \
>                  sorted(statusfolder.getmessageuidlist()):
> @@ -218,9 +246,9 @@ class MaildirFolder(BaseFolder):
>          return {'flags': set(), 'filename': '/no-dir/no-such-file/'}
>  
>      # Interface from BaseFolder
> -    def cachemessagelist(self):
> +    def cachemessagelist(self, maxage=None, min_uid=None):
>          if self.ismessagelistempty():
> -            self.messagelist = self._scanfolder()
> +            self.messagelist = self._scanfolder(maxage=maxage, min_uid=min_uid)
>  
>      # Interface from BaseFolder
>      def getmessagelist(self):
> @@ -445,7 +473,10 @@ class MaildirFolder(BaseFolder):
>              os.unlink(filepath)
>          except OSError:
>              # Can't find the file -- maybe already deleted?
> -            newmsglist = self._scanfolder()
> +            maxage = localfolder.getmaxage()

GLURP!

> +            # get more than enough messages to be sure we captured the min UID
> +            # expected on the other side
> +            newmsglist = self._scanfolder(maxage=maxage + 1)
>              if uid in newmsglist:       # Nope, try new filename.
>                  filename = newmsglist[uid]['filename']
>                  filepath = os.path.join(self.getfullname(), filename)
-- 
Nicolas Sebrecht




More information about the OfflineIMAP-project mailing list