[PATCH] make maxage use UIDs to avoid timezone issues

Janna Martl janna.martl109 at gmail.com
Tue Mar 24 23:43:03 GMT 2015


When using maxage, local and remote messagelists are supposed to only
contain messages from at most maxage days ago. But local and remote used
different timezones to calculate what "maxage days ago" means, resulting
in loss of mail. Now, we ask the local folder for maxage days' worth of
mail, find the lowest UID, and then ask the remote folder for all UID's
starting with that lowest one.

Signed-off-by: Janna Martl <janna.martl109 at gmail.com>
---
 docs/offlineimap.txt               | 14 +++++++-
 offlineimap/accounts.py            | 41 +++++++++++++++++++-----
 offlineimap/folder/Base.py         |  8 +++++
 offlineimap/folder/Gmail.py        |  5 +--
 offlineimap/folder/GmailMaildir.py |  4 +--
 offlineimap/folder/IMAP.py         | 62 +++++++++++++++++++++++++++++-------
 offlineimap/folder/Maildir.py      | 65 ++++++++++++++++++++++++++++----------
 7 files changed, 157 insertions(+), 42 deletions(-)

diff --git a/docs/offlineimap.txt b/docs/offlineimap.txt
index 858fc0b..54939ad 100644
--- a/docs/offlineimap.txt
+++ b/docs/offlineimap.txt
@@ -135,7 +135,8 @@ Ignore any autorefresh setting in the configuration file.
   Run only quick synchronizations.
 +
 Ignore any flag updates on IMAP servers. If a flag on the remote IMAP changes,
-and we have the message locally, it will be left untouched in a quick run.
+and we have the message locally, it will be left untouched in a quick run. This
+option is ignored if maxage is set in offlineimap.conf.
 
 
 -u <UI>::
@@ -400,6 +401,17 @@ If you then point your local mutt, or whatever MUA you use to `~/mail/`
 as root, it should still recognize all folders.
 
 
+* Edge cases with maxage causing too many messages to be synced.
++
+All messages from at most maxage days ago (+/- a few hours, depending on
+timezones) are synced, but there are cases in which older messages can also be
+synced. This happens when a message's UID is significantly higher than those of
+other messages with similar dates, e.g. when messages are added to the local
+folder behind offlineimap's back, causing them to get assigned a new UID, or
+when offlineimap first syncs a pre-existing Maildir. In the latter case, it
+could appear as if a noticeable and random subset of old messages are synced.
+
+
 Authors
 -------
 
diff --git a/offlineimap/accounts.py b/offlineimap/accounts.py
index cac4d88..19fff23 100644
--- a/offlineimap/accounts.py
+++ b/offlineimap/accounts.py
@@ -429,19 +429,28 @@ def syncfolder(account, remotefolder, quick):
 
         statusfolder.cachemessagelist()
 
-        if quick:
-            if (not localfolder.quickchanged(statusfolder) and
-                not remotefolder.quickchanged(statusfolder)):
-                ui.skippingfolder(remotefolder)
-                localrepos.restore_atime()
-                return
+        maxage = localfolder.getmaxage()
 
         # Load local folder.
         ui.syncingfolder(remoterepos, remotefolder, localrepos, localfolder)
         ui.loadmessagelist(localrepos, localfolder)
-        localfolder.cachemessagelist()
+        # For maxage, returns messages with uid > min(uids of within-maxage messages).
+        localfolder.cachemessagelist(maxage=maxage)
         ui.messagelistloaded(localrepos, localfolder, localfolder.getmessagecount())
 
+        if quick:
+            # IMAP quickchanged isn't compatible with maxage, since the "quick"
+            # check can only retrieve a full list of UIDs in the folder.
+            if maxage:
+                ui.warn("Quick syncs (-q) not supported in conjunction "\
+                    "with maxage; ignoring -q.")
+            else:
+                if (not localfolder.quickchanged(statusfolder) and
+                    not remotefolder.quickchanged(statusfolder)):
+                    ui.skippingfolder(remotefolder)
+                    localrepos.restore_atime()
+                    return
+
         # If either the local or the status folder has messages and
         # there is a UID validity problem, warn and abort.  If there are
         # no messages, UW IMAPd loses UIDVALIDITY.  But we don't really
@@ -463,10 +472,26 @@ def syncfolder(account, remotefolder, quick):
 
         # Load remote folder.
         ui.loadmessagelist(remoterepos, remotefolder)
-        remotefolder.cachemessagelist()
+        # Enforce maxage on remote folder by restricting to the lowest UID in
+        # the local messagelist (which has already been restricted using maxage)
+        if maxage:
+            # local messagelist might contain new messages (with uid's < 0)
+            positive_uids = filter(lambda uid: uid > 0,
+                localfolder.getmessageuidlist())
+            if positive_uids:
+                remotefolder.cachemessagelist(min_uid=min(positive_uids))
+            else: # no messages with UID > 0 in range in localfolder
+                # maxage is calculated with respect to folder timezone, and
+                # remote folder timezone might be different from local, so be
+                # safe and make sure the range isn't bigger than in local
+                remotefolder.cachemessagelist(maxage=maxage - 1)
+        else:
+            remotefolder.cachemessagelist()
         ui.messagelistloaded(remoterepos, remotefolder,
                              remotefolder.getmessagecount())
 
+
+
         # Synchronize remote changes.
         if not localrepos.getconfboolean('readonly', False):
             ui.syncingmessages(remoterepos, remotefolder, localrepos, localfolder)
diff --git a/offlineimap/folder/Base.py b/offlineimap/folder/Base.py
index e3c80b7..123508c 100644
--- a/offlineimap/folder/Base.py
+++ b/offlineimap/folder/Base.py
@@ -298,6 +298,14 @@ class BaseFolder(object):
 
         raise NotImplementedError
 
+    def getmaxage(self):
+        return self.config.getdefaultint("Account %s"%
+            self.accountname, "maxage", None)
+
+    def getmaxsize(self):
+        return self.config.getdefaultint("Account %s"%
+            self.accountname, "maxsize", None)
+
     def savemessage(self, uid, content, flags, rtime):
         """Writes a new message, with the specified uid.
 
diff --git a/offlineimap/folder/Gmail.py b/offlineimap/folder/Gmail.py
index 1afbe47..93e8eee 100644
--- a/offlineimap/folder/Gmail.py
+++ b/offlineimap/folder/Gmail.py
@@ -121,9 +121,10 @@ class GmailFolder(IMAPFolder):
 
     # TODO: merge this code with the parent's cachemessagelist:
     # TODO: they have too much common logics.
-    def cachemessagelist(self):
+    def cachemessagelist(self, maxage=None, min_uid=None):
         if not self.synclabels:
-            return super(GmailFolder, self).cachemessagelist()
+            return super(GmailFolder, self).cachemessagelist(maxage=maxage,
+                min_uid=min_uid)
 
         self.messagelist = {}
 
diff --git a/offlineimap/folder/GmailMaildir.py b/offlineimap/folder/GmailMaildir.py
index 894792d..0ae00bf 100644
--- a/offlineimap/folder/GmailMaildir.py
+++ b/offlineimap/folder/GmailMaildir.py
@@ -64,9 +64,9 @@ class GmailMaildirFolder(MaildirFolder):
                 'filename': '/no-dir/no-such-file/', 'mtime': 0}
 
 
-    def cachemessagelist(self):
+    def cachemessagelist(self, maxage=None, min_uid=None):
         if self.ismessagelistempty():
-            self.messagelist = self._scanfolder()
+            self.messagelist = self._scanfolder(maxage=maxage, min_uid=min_uid)
 
         # Get mtimes
         if self.synclabels:
diff --git a/offlineimap/folder/IMAP.py b/offlineimap/folder/IMAP.py
index 4b470a2..bf61ebe 100644
--- a/offlineimap/folder/IMAP.py
+++ b/offlineimap/folder/IMAP.py
@@ -144,7 +144,7 @@ class IMAPFolder(BaseFolder):
         return False
 
 
-    def _msgs_to_fetch(self, imapobj):
+    def _msgs_to_fetch(self, imapobj, maxage=None, min_uid=None):
         """Determines sequence numbers of messages to be fetched.
 
         Message sequence numbers (MSNs) are more easily compacted
@@ -152,6 +152,18 @@ class IMAPFolder(BaseFolder):
 
         Arguments:
         - imapobj: instance of IMAPlib
+        - maxage (optional): only fetch messages up to maxage days ago
+        - min_uid (optional): only fetch messages with uid > min_uid
+
+        This function should be called with at most one of maxage or
+        min_uid.
+
+        If using maxage, this finds the min uid of all messages within
+        maxage, and returns all messages with uid > this min. As maxage
+        is only set if this folder is the local folder, this ensures
+        consistency with remote (which only restricts to uid > this min)
+        in edge cases where the date is much earlier than messages with
+        similar UID's (e.g. the UID was reassigned much later).
 
         Returns: range(s) for messages or None if no messages
         are to be fetched."""
@@ -164,16 +176,13 @@ class IMAPFolder(BaseFolder):
         # By default examine all messages in this folder
         msgsToFetch = '1:*'
 
-        maxage = self.config.getdefaultint(
-            "Account %s"% self.accountname, "maxage", -1)
-        maxsize = self.config.getdefaultint(
-            "Account %s"% self.accountname, "maxsize", -1)
+        maxsize = self.getmaxsize()
 
         # Build search condition
-        if (maxage != -1) | (maxsize != -1):
+        if maxage or min_uid != None or maxsize != None:
             search_cond = "(";
 
-            if(maxage != -1):
+            if maxage:
                 #find out what the oldest message is that we should look at
                 oldest_struct = time.gmtime(time.time() - (60*60*24*maxage))
                 if oldest_struct[0] < 1900:
@@ -184,9 +193,14 @@ class IMAPFolder(BaseFolder):
                     oldest_struct[2],
                     MonthNames[oldest_struct[1]],
                     oldest_struct[0])
+            if min_uid != None:
+                if maxage: # There are two conditions, add space
+                    search_cond += " "
+                search_cond += "UID %d:*"% min_uid
 
-            if(maxsize != -1):
-                if(maxage != -1): # There are two conditions, add space
+            if maxsize != None:
+                if maxage or min_uid != None:
+                    # There are at least two conditions, add space
                     search_cond += " "
                 search_cond += "SMALLER %d"% maxsize
 
@@ -199,6 +213,30 @@ class IMAPFolder(BaseFolder):
                     self.getrepository(), self, search_cond, res_type, res_data),
                     OfflineImapError.ERROR.FOLDER)
 
+            if maxage:
+                # Found messages within maxage, but want all messages with UID
+                # > the min uid of the within-maxage messages. Ordering by UID
+                # is the same as ordering by MSN, so we get the messages with
+                # MSN > the min MSN of the within-maxage messages.
+                msg_seq_numbers = map(lambda s : int(s), res_data[0].split())
+                if msg_seq_numbers:
+                    min_msn = min(msg_seq_numbers)
+                else:
+                    return None
+                if maxsize == None:
+                    # If no maxsize, can just ask for all messages with MSN > min_msn
+                    return "%d:*"% min_msn
+                else:
+                    # Restrict the range min_msn:* to those with acceptable size.
+                    # Single-quotes prevent imaplib2 from quoting the sequence.
+                    search_cond = "'%d:* (SMALLER %d)'"% (min_msn, maxsize)
+                    res_type, res_data = imapobj.search(None, search_cond)
+                    if res_type != 'OK':
+                        raise OfflineImapError("SEARCH in folder [%s]%s failed. "
+                            "Search string was '%s'. Server responded '[%s] %s'"%
+                            (self.getrepository(), self, search_cond, res_type,
+                            res_data), OfflineImapError.ERROR.FOLDER)
+
             # Resulting MSN are separated by space, coalesce into ranges
             msgsToFetch = imaputil.uid_sequence(res_data[0].split())
 
@@ -210,19 +248,19 @@ class IMAPFolder(BaseFolder):
 
 
     # Interface from BaseFolder
-    def cachemessagelist(self):
+    def cachemessagelist(self, maxage=None, min_uid=None):
         self.messagelist = {}
 
         imapobj = self.imapserver.acquireconnection()
         try:
-            msgsToFetch = self._msgs_to_fetch(imapobj)
+            msgsToFetch = self._msgs_to_fetch(imapobj, maxage=maxage, min_uid=min_uid)
             if not msgsToFetch:
                 return # No messages to sync
 
             # Get the flags and UIDs for these. single-quotes prevent
             # imaplib2 from quoting the sequence.
             res_type, response = imapobj.fetch("'%s'"%
-                msgsToFetch, '(FLAGS UID)')
+                msgsToFetch, '(FLAGS UID INTERNALDATE)')
             if res_type != 'OK':
                 raise OfflineImapError("FETCHING UIDs in folder [%s]%s failed. "
                     "Server responded '[%s] %s'"% (self.getrepository(), self,
diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py
index 6df8180..c0ad516 100644
--- a/offlineimap/folder/Maildir.py
+++ b/offlineimap/folder/Maildir.py
@@ -150,18 +150,23 @@ class MaildirFolder(BaseFolder):
             flags = set((c for c in flagmatch.group(1) if not c.islower()))
         return prefix, uid, fmd5, flags
 
-    def _scanfolder(self):
+    def _scanfolder(self, maxage=None, min_uid=None):
         """Cache the message list from a Maildir.
 
+        If using maxage, this finds the min uid of all messages within
+        maxage, and returns all messages with uid > this min. This
+        ensures consistency with remote (which only restricts to uid >
+        this min) in edge cases where the date is much earlier than
+        messages with similar UID's (e.g. the UID was reassigned much
+        later).
+
         Maildir flags are: R (replied) S (seen) T (trashed) D (draft) F
         (flagged).
         :returns: dict that can be used as self.messagelist.
         """
 
-        maxage = self.config.getdefaultint("Account " + self.accountname,
-                                           "maxage", None)
-        maxsize = self.config.getdefaultint("Account " + self.accountname,
-                                            "maxsize", None)
+        maxsize = self.getmaxsize()
+
         retval = {}
         files = []
         nouidcounter = -1          # Messages without UIDs get negative UIDs.
@@ -170,12 +175,12 @@ class MaildirFolder(BaseFolder):
             files.extend((dirannex, filename) for
                          filename in os.listdir(fulldirname))
 
+        maxage_excludees = {}
+
         for dirannex, filename in files:
             # We store just dirannex and filename, ie 'cur/123...'
             filepath = os.path.join(dirannex, filename)
-            # Check maxage/maxsize if this message should be considered.
-            if maxage and not self._iswithinmaxage(filename, maxage):
-                continue
+            # Check maxsize if this message should be considered.
             if maxsize and (os.path.getsize(os.path.join(
                         self.getfullname(), filepath)) > maxsize):
                 continue
@@ -185,6 +190,9 @@ class MaildirFolder(BaseFolder):
                 uid = nouidcounter
                 nouidcounter -= 1
             else:                       # It comes from our folder.
+                # Check min_uid if this message should be considered.
+                if min_uid != None and uid < min_uid:
+                    continue
                 uidmatch = re_uidmatch.search(filename)
                 uid = None
                 if not uidmatch:
@@ -192,16 +200,36 @@ class MaildirFolder(BaseFolder):
                     nouidcounter -= 1
                 else:
                     uid = long(uidmatch.group(1))
-            # 'filename' is 'dirannex/filename', e.g. cur/123,U=1,FMD5=1:2,S
-            retval[uid] = self.msglist_item_initializer(uid)
-            retval[uid]['flags'] = flags
-            retval[uid]['filename'] = filepath
+            if maxage and not self._iswithinmaxage(filename, maxage):
+                # Keep track of messages outside of maxage, because the ones
+                # with uid > min(uids of within-maxage messages) will get
+                # re-included.
+                maxage_excludees[uid] = self.msglist_item_initializer(uid)
+                maxage_excludees[uid]['flags'] = flags
+                maxage_excludees[uid]['filename'] = filepath
+            else:
+                # 'filename' is 'dirannex/filename', e.g. cur/123,U=1,FMD5=1:2,S
+                retval[uid] = self.msglist_item_initializer(uid)
+                retval[uid]['flags'] = flags
+                retval[uid]['filename'] = filepath
+        if maxage:
+            # Re-include messages with high enough uid's.
+            positive_uids = filter(lambda uid: uid > 0, retval)
+            if positive_uids:
+                min_uid = min(positive_uids)
+                for uid in maxage_excludees.keys():
+                    if uid > min_uid:
+                        # This message was originally excluded because of
+                        # maxage, but is included now because we want all
+                        # messages with uid > min_uid
+                        retval[uid] = maxage_excludees[uid]
         return retval
 
     # Interface from BaseFolder
     def quickchanged(self, statusfolder):
-        """Returns True if the Maildir has changed"""
-        self.cachemessagelist()
+        """Returns True if the Maildir has changed
+
+        Assumes cachemessagelist() has already been called """
         # Folder has different uids than statusfolder => TRUE.
         if sorted(self.getmessageuidlist()) != \
                 sorted(statusfolder.getmessageuidlist()):
@@ -218,9 +246,9 @@ class MaildirFolder(BaseFolder):
         return {'flags': set(), 'filename': '/no-dir/no-such-file/'}
 
     # Interface from BaseFolder
-    def cachemessagelist(self):
+    def cachemessagelist(self, maxage=None, min_uid=None):
         if self.ismessagelistempty():
-            self.messagelist = self._scanfolder()
+            self.messagelist = self._scanfolder(maxage=maxage, min_uid=min_uid)
 
     # Interface from BaseFolder
     def getmessagelist(self):
@@ -445,7 +473,10 @@ class MaildirFolder(BaseFolder):
             os.unlink(filepath)
         except OSError:
             # Can't find the file -- maybe already deleted?
-            newmsglist = self._scanfolder()
+            maxage = localfolder.getmaxage()
+            # get more than enough messages to be sure we captured the min UID
+            # expected on the other side
+            newmsglist = self._scanfolder(maxage=maxage + 1)
             if uid in newmsglist:       # Nope, try new filename.
                 filename = newmsglist[uid]['filename']
                 filepath = os.path.join(self.getfullname(), filename)
-- 
2.3.4





More information about the OfflineIMAP-project mailing list