Moving messages redux

Edward Z. Yang ezyang at MIT.EDU
Thu Aug 23 14:30:44 BST 2012


Here is a horrible, non-error checking, non-cleaned up, non-encapsulation respecting
proof of concept pair of patches which implement this concept.  (It works though!)

Cheers,
Edward

commit 8318b824159227ba7c898caa6ba896001d5671dc
Author: Edward Z. Yang <ezyang at mit.edu>
Date:   Wed Aug 22 20:20:51 2012 -0400

    Factor out parsefilename functionality of Maildir.
    
    Signed-off-by: Edward Z. Yang <ezyang at mit.edu>

diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py
index df5dd2e..f826ba5 100644
--- a/offlineimap/folder/Maildir.py
+++ b/offlineimap/folder/Maildir.py
@@ -64,6 +64,8 @@ class MaildirFolder(BaseFolder):
         self.root = root
         self.sep = sep
         self.messagelist = None
+        self.nouidcounter = -1 # Messages without UIDs get
+                               # negative UID numbers.
 
         self.wincompatible = self.config.getdefaultboolean(
             "Account "+self.accountname, "maildir-windows-compatible", False)
@@ -116,10 +118,6 @@ class MaildirFolder(BaseFolder):
         and must occur in ASCII order."""
         retval = {}
         files = []
-        nouidcounter = -1               # Messages without UIDs get
-                                        # negative UID numbers.
-        foldermd5 = md5(self.getvisiblename()).hexdigest()
-        folderstr = ',FMD5=' + foldermd5
         for dirannex in ['new', 'cur']:
             fulldirname = os.path.join(self.getfullname(), dirannex)
             files.extend(os.path.join(dirannex, filename) for
@@ -144,31 +142,39 @@ class MaildirFolder(BaseFolder):
                 if(size > maxsize):
                     continue
 
-            foldermatch = messagename.find(folderstr) != -1
-            if not foldermatch:
-                # If there is no folder MD5 specified, or if it mismatches,
-                # assume it is a foreign (new) message and generate a
-                # negative uid for it
-                uid = nouidcounter
-                nouidcounter -= 1
-            else:                       # It comes from our folder.
-                uidmatch = uidmatchre.search(messagename)
-                uid = None
-                if not uidmatch:
-                    uid = nouidcounter
-                    nouidcounter -= 1
-                else:
-                    uid = long(uidmatch.group(1))
-            #identify flags in the path name
-            flagmatch = self.flagmatchre.search(messagename)
-            if flagmatch:
-                flags = set(flagmatch.group(1))
-            else:
-                flags = set()
+            uid, flags = self.parsefilename(file)
+
             # 'filename' is 'dirannex/filename', e.g. cur/123,U=1,FMD5=1:2,S
             retval[uid] = {'flags': flags, 'filename': file}
         return retval
 
+    def parsefilename(self, file):
+        foldermd5 = md5(self.getvisiblename()).hexdigest()
+        folderstr = ',FMD5=' + foldermd5
+        messagename = os.path.basename(file)
+        foldermatch = messagename.find(folderstr) != -1
+        if not foldermatch:
+            # If there is no folder MD5 specified, or if it mismatches,
+            # assume it is a foreign (new) message and generate a
+            # negative uid for it
+            uid = self.nouidcounter
+            self.nouidcounter -= 1
+        else:                       # It comes from our folder.
+            uidmatch = uidmatchre.search(messagename)
+            uid = None
+            if not uidmatch:
+                uid = self.nouidcounter
+                self.nouidcounter -= 1
+            else:
+                uid = long(uidmatch.group(1))
+        #identify flags in the path name
+        flagmatch = self.flagmatchre.search(messagename)
+        if flagmatch:
+            flags = set(flagmatch.group(1))
+        else:
+            flags = set()
+        return uid, flags
+
     def quickchanged(self, statusfolder):
         """Returns True if the Maildir has changed"""
         self.cachemessagelist()

commit 40412d42ec58716da7ca93928b71c30566e6bab2
Author: Edward Z. Yang <ezyang at mit.edu>
Date:   Wed Aug 22 23:54:04 2012 -0400

    Short circuit moves implementation.
    
    Signed-off-by: Edward Z. Yang <ezyang at mit.edu>

diff --git a/offlineimap/accounts.py b/offlineimap/accounts.py
index 71f08fe..880fc2a 100644
--- a/offlineimap/accounts.py
+++ b/offlineimap/accounts.py
@@ -270,6 +270,9 @@ class SyncableAccount(Account):
                 self.ui.syncfolders(remoterepos, localrepos)
                 remoterepos.syncfoldersto(localrepos, statusrepos)
 
+            # try to short circuit moves
+            localrepos.syncmoves(remoterepos, statusrepos)
+
             # iterate through all folders on the remote repo and sync
             for remotefolder in remoterepos.getfolders():
                 if not remotefolder.sync_this:
diff --git a/offlineimap/folder/Base.py b/offlineimap/folder/Base.py
index 91ee08a..c8ba819 100644
--- a/offlineimap/folder/Base.py
+++ b/offlineimap/folder/Base.py
@@ -240,6 +240,9 @@ class BaseFolder(object):
         for uid in uidlist:
             self.deletemessage(uid)
 
+    def remotecopymessage(self, uid, remote_newfolder, local_newfolder, status_newfolder):
+        raise NotImplementedException
+
     def copymessageto(self, uid, dstfolder, statusfolder, register = 1):
         """Copies a message from self to dst if needed, updating the status
 
diff --git a/offlineimap/folder/IMAP.py b/offlineimap/folder/IMAP.py
index 35a0942..9d79ece 100644
--- a/offlineimap/folder/IMAP.py
+++ b/offlineimap/folder/IMAP.py
@@ -619,6 +619,47 @@ class IMAPFolder(BaseFolder):
             flags = imaputil.flags2hash(imaputil.imapsplit(result)[1])['FLAGS']
             self.messagelist[uid]['flags'] = imaputil.flagsimap2maildir(flags)
 
+    def remotecopymessage(self, uid, remote_newfolder):
+        assert isinstance(remote_newfolder, IMAPFolder)
+        # XXX debugging
+        # XXX make sure uid is actually present in folder
+        imapobj = self.imapserver.acquireconnection()
+        try:
+            use_uidplus = 'UIDPLUS' in imapobj.capabilities
+            try:
+                imapobj.select(self.getfullname(), True, True)
+                (typ, dat) = imapobj.uid("copy", "%d" % uid, remote_newfolder.getfullname())
+                if typ == 'NO':
+                    return 0
+            except imapobj.error, e:
+                self.ui.warn('When copying message with UID %s, got error %s' % (uid, str(e)))
+                return 0
+            (typ,dat) = imapobj.check()
+            assert(typ == 'OK')
+            if use_uidplus or imapobj._get_untagged_response('COPYUID', True):
+                if not imapobj._get_untagged_response('COPYUID', True):
+                    self.ui.warn("Server supports UIDPLUS but got no COPYUID "
+                                 "copying a message.")
+                    return 0
+                resp = imapobj._get_untagged_response('COPYUID', True)
+                newuid = long(resp[-1].split(' ')[2])
+            else:
+                assert False # XXX do something better here (the X-OfflineIMAP header still exists)
+
+        finally:
+            self.imapserver.releaseconnection(imapobj)
+
+        # flags are nonsense to either become irrelevant (trashed) or
+        # overwritten (by a savemessageflags)
+        self.messagelist = {}
+        remote_newfolder.messagelist = {}
+        if uid:
+            self.messagelist[uid] = {'uid': uid, 'flags': set()}
+        if newuid:
+            remote_newfolder.messagelist[newuid] = {'uid': newuid, 'flags': set()}
+
+        return newuid
+
     def addmessageflags(self, uid, flags):
         self.addmessagesflags([uid], flags)
 
@@ -712,6 +753,15 @@ class IMAPFolder(BaseFolder):
         for uid in uidlist:
             del self.messagelist[uid]
 
+    def doexpunge(self):
+        imapobj = self.imapserver.acquireconnection()
+        try:
+            imapobj.select(self.getfullname())
+            r = imapobj.expunge()[0]
+            assert r == 'OK'
+        finally:
+            self.imapserver.releaseconnection(imapobj)
+
     def syncmessagesto_delete(self, dest, applyto):
         """Pass 3 of folder synchronization.
 
diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py
index f826ba5..19418de 100644
--- a/offlineimap/folder/Maildir.py
+++ b/offlineimap/folder/Maildir.py
@@ -269,6 +269,21 @@ class MaildirFolder(BaseFolder):
     def getmessageflags(self, uid):
         return self.messagelist[uid]['flags']
 
+    def _move_file(self, oldfilename, uid, flags):
+        timeval, timeseq = gettimeseq()
+        messagename = '%d_%d.%d.%s,U=%d,FMD5=%s' % \
+            (timeval,
+             timeseq,
+             os.getpid(),
+             socket.gethostname(),
+             uid,
+             md5(self.getvisiblename()).hexdigest())
+        os.rename(os.path.join(self.getfullname(), oldfilename),
+                  os.path.join(self.getfullname(), os.path.join('tmp', messagename)))
+        self.messagelist = {}
+        self.messagelist[uid] = {'flags': set(), 'filename': os.path.join('tmp', messagename)}
+        self.savemessageflags(uid, flags)
+
     def savemessageflags(self, uid, flags):
         oldfilename = self.messagelist[uid]['filename']
         dir_prefix, newname = os.path.split(oldfilename)
diff --git a/offlineimap/repository/Base.py b/offlineimap/repository/Base.py
index 972aefb..ca9fe34 100644
--- a/offlineimap/repository/Base.py
+++ b/offlineimap/repository/Base.py
@@ -132,6 +132,17 @@ class BaseRepository(object, CustomConfig.ConfigHelperMixin):
 
     def getfolder(self, foldername):
         raise NotImplementedError
+
+    def syncmoves(self, remoterepos, statusrepos):
+        """Optimization pass for synchronizing moved messages.
+
+        In some cases, it may be possible to more efficiently synchronize
+        messages that moved (contents and uid stayed the same, but
+        the message is in a different folder) than deleting and reuploading.
+        Any local repository may optionally provide an implementation for
+        detecting and carrying out moves.  Classes are not required to implement
+        this."""
+        pass
     
     def syncfoldersto(self, dst_repo, status_repo):
         """Syncs the folders in this repository to those in dest.
diff --git a/offlineimap/repository/Maildir.py b/offlineimap/repository/Maildir.py
index ae09486..6ec9b05 100644
--- a/offlineimap/repository/Maildir.py
+++ b/offlineimap/repository/Maildir.py
@@ -116,6 +116,9 @@ class MaildirRepository(BaseRepository):
     def deletefolder(self, foldername):
         self.ui.warn("NOT YET IMPLEMENTED: DELETE FOLDER %s" % foldername)
 
+    def forgetfolders(self):
+        self.folders = None
+
     def getfolder(self, foldername):
         """Return a Folder instance of this Maildir
 
@@ -192,4 +195,134 @@ class MaildirRepository(BaseRepository):
         if self.folders == None:
             self.folders = self._getfolders_scandir(self.root)
         return self.folders
+
+    def _getmoves(self):
+        for folder in self.getfolders():
+            try:
+                for entry in os.listdir(os.path.join(folder.getfullname(), "mv")):
+                    fn = os.path.join(folder.getfullname(), "mv", entry)
+                    newdest = open(fn).read().strip()
+                    newfolder = os.path.dirname(os.path.dirname(os.path.relpath(newdest, self.root)))
+                    yield fn,\
+                          folder.getname(),\
+                          newfolder,\
+                          entry,\
+                          os.path.join(os.path.basename(os.path.dirname(newdest)), os.path.basename(newdest)),\
+                          newdest
+            except OSError:
+                continue
+
+    def _getmoves_postdelete(self):
+        for x in self._getmoves():
+            try:
+                yield x
+            finally:
+                # Remove entries after processing, since it's harmless
+                # to double-process except from a performance
+                # perspective
+                os.unlink(x[0])
     
+    def syncmoves(self, remoterepos, statusrepos):
+        # TODO thread me!
+        # TODO does not respect do not sync (this is actually kind of useful)
+
+        if not next(self._getmoves(), False):
+            return
+
+        # Initialize by instantiating cache for all status folders
+        # We setup cache for access because we need messagelist to
+        # persist over invocations to 'getfolder' (why doesn't
+        # caching give us that already?)
+        cache = {}
+        for statusfolder in statusrepos.getfolders():
+            statusfolder.cachemessagelist()
+            cache[statusfolder.getname()] = statusfolder
+
+        for fn, oldfolder, newfolder, old_filename, filename, fullname in self._getmoves_postdelete():
+            # example data:
+            #
+            # fn = /home/ezyang/Mail/MIT/INBOX/mv/1345706203_9.22226.javelin,U=400917,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,
+            # oldfolder = INBOX
+            # newfolder = INBOX.Archive
+            # old_filename = 1345706203_9.22226.javelin,U=400917,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,
+            # filename = new/1345706203_9.22226.javelin,U=400917,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,S
+            # fullname = /home/ezyang/Mail/MIT/INBOX.Archive/new/1345706203_9.22226.javelin,U=400917,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,S
+            #
+            # old_filename doesn't have leading cur/ or new/, we only
+            # care about it for the old flags value (XXX the value is
+            # inaccurate if Sup first changed the flag (move 1) and then
+            # moved source (move 2); we do OK if the change is atomic
+            # e.g. you did 'A')
+
+            if oldfolder == newfolder:
+                continue
+
+            if not os.path.exists(fullname):
+                continue
+
+            # XXX uid validity check (twice)
+            # XXX readonly
+            # XXX error handling and UI
+            # XXX restoreatime
+
+            # If we fail, we bail out and just ask the later phases
+            # to do it the slow way.
+
+            # Note: newfolder/filename == access in Maildir
+
+            # - Lookup old and new folders both local and IMAP
+            local_oldfolder = self.getfolder(oldfolder) # Must be Maildir
+            local_newfolder = self.getfolder(newfolder) # ditto
+            remote_oldfolder = remoterepos.getfolder(oldfolder.replace(self.getsep(), remoterepos.getsep()))
+            remote_newfolder = remoterepos.getfolder(newfolder.replace(self.getsep(), remoterepos.getsep()))
+            status_oldfolder = cache[oldfolder.replace(self.getsep(), statusrepos.getsep())]
+            status_newfolder = cache[newfolder.replace(self.getsep(), statusrepos.getsep())]
+            # We don't read out the message lists for local/remote, since the
+            # filenames *give us the information we need*.
+
+            # - Parse filename into uid and flags AND
+            #   (Note: Use old folder so that the directory is correct)
+            # XXX I think old_flags is strictly unnecessary
+            _, old_flags = local_oldfolder.parsefilename(old_filename) # data will get overwritten
+            uid, flags = local_oldfolder.parsefilename(filename)
+
+            if uid < 0:
+                continue
+
+            # In IMAP land (this is new function)
+            # - Check if uid exists in old folder on server
+            # - Initiate IMAP COPY from old folder to new folder on server,
+            #   retrieving the new assigned ID
+            # - Rename the maildir copy according to new ID (changeuid)
+            # - Delete old ID
+            try:
+                newuid = remote_oldfolder.remotecopymessage(uid, remote_newfolder)
+            except NotImplementedError:
+                continue
+            if newuid <= 0:
+                continue
+
+            local_newfolder._move_file(filename, newuid, flags) # if they don't match, this will trigger an update
+            # not using methods because they are too slow (and it is
+            # safe not to update status)
+            # XXX Maybe save periodically
+            status_newfolder.messagelist[newuid] = {'uid': newuid, 'flags': old_flags }
+            if uid in status_oldfolder.messagelist:
+                del status_oldfolder.messagelist[uid]
+            # Explicit delete is necessary
+            remote_oldfolder.expunge = False # XXX speed hack
+            remote_oldfolder.deletemessage(uid)
+
+            print "Moved %s" % newuid
+
+        for remotefolder in remoterepos.getfolders():
+            remotefolder.doexpunge()
+        for statusfolder in cache.values():
+            statusfolder.save()
+
+        # critical, since we've polluted the message cache
+        # (especially for self and remote; probably status
+        # can get away with not doing this)
+        self.forgetfolders()
+        remoterepos.forgetfolders()
+        statusrepos.forgetfolders()




More information about the OfflineIMAP-project mailing list