Moving messages redux

Nicolas Sebrecht nicolas.s-dev at laposte.net
Sat Jan 26 16:52:52 UTC 2013


Hi Edward,

Since the recent call for patches, I guess you might like to resend the
patch.

On Thu, Aug 23, 2012 at 09:30:44AM -0400, Edward Z. Yang wrote:

> Here is a horrible, non-error checking, non-cleaned up, non-encapsulation respecting
> proof of concept pair of patches which implement this concept.  (It works though!)
> 
> Cheers,
> Edward
> 
> commit 8318b824159227ba7c898caa6ba896001d5671dc
> Author: Edward Z. Yang <ezyang at mit.edu>
> Date:   Wed Aug 22 20:20:51 2012 -0400
> 
>     Factor out parsefilename functionality of Maildir.
>     
>     Signed-off-by: Edward Z. Yang <ezyang at mit.edu>
>
>
> diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py
> index df5dd2e..f826ba5 100644
> --- a/offlineimap/folder/Maildir.py
> +++ b/offlineimap/folder/Maildir.py
> @@ -64,6 +64,8 @@ class MaildirFolder(BaseFolder):
>          self.root = root
>          self.sep = sep
>          self.messagelist = None
> +        self.nouidcounter = -1 # Messages without UIDs get
> +                               # negative UID numbers.
>  
>          self.wincompatible = self.config.getdefaultboolean(
>              "Account "+self.accountname, "maildir-windows-compatible", False)
> @@ -116,10 +118,6 @@ class MaildirFolder(BaseFolder):
>          and must occur in ASCII order."""
>          retval = {}
>          files = []
> -        nouidcounter = -1               # Messages without UIDs get
> -                                        # negative UID numbers.
> -        foldermd5 = md5(self.getvisiblename()).hexdigest()
> -        folderstr = ',FMD5=' + foldermd5
>          for dirannex in ['new', 'cur']:
>              fulldirname = os.path.join(self.getfullname(), dirannex)
>              files.extend(os.path.join(dirannex, filename) for
> @@ -144,31 +142,39 @@ class MaildirFolder(BaseFolder):
>                  if(size > maxsize):
>                      continue
>  
> -            foldermatch = messagename.find(folderstr) != -1
> -            if not foldermatch:
> -                # If there is no folder MD5 specified, or if it mismatches,
> -                # assume it is a foreign (new) message and generate a
> -                # negative uid for it
> -                uid = nouidcounter
> -                nouidcounter -= 1
> -            else:                       # It comes from our folder.
> -                uidmatch = uidmatchre.search(messagename)
> -                uid = None
> -                if not uidmatch:
> -                    uid = nouidcounter
> -                    nouidcounter -= 1
> -                else:
> -                    uid = long(uidmatch.group(1))
> -            #identify flags in the path name
> -            flagmatch = self.flagmatchre.search(messagename)
> -            if flagmatch:
> -                flags = set(flagmatch.group(1))
> -            else:
> -                flags = set()
> +            uid, flags = self.parsefilename(file)
> +
>              # 'filename' is 'dirannex/filename', e.g. cur/123,U=1,FMD5=1:2,S
>              retval[uid] = {'flags': flags, 'filename': file}
>          return retval
>  
> +    def parsefilename(self, file):
> +        foldermd5 = md5(self.getvisiblename()).hexdigest()
> +        folderstr = ',FMD5=' + foldermd5
> +        messagename = os.path.basename(file)
> +        foldermatch = messagename.find(folderstr) != -1
> +        if not foldermatch:
> +            # If there is no folder MD5 specified, or if it mismatches,
> +            # assume it is a foreign (new) message and generate a
> +            # negative uid for it
> +            uid = self.nouidcounter
> +            self.nouidcounter -= 1
> +        else:                       # It comes from our folder.
> +            uidmatch = uidmatchre.search(messagename)
> +            uid = None
> +            if not uidmatch:
> +                uid = self.nouidcounter
> +                self.nouidcounter -= 1
> +            else:
> +                uid = long(uidmatch.group(1))
> +        #identify flags in the path name
> +        flagmatch = self.flagmatchre.search(messagename)
> +        if flagmatch:
> +            flags = set(flagmatch.group(1))
> +        else:
> +            flags = set()
> +        return uid, flags
> +
>      def quickchanged(self, statusfolder):
>          """Returns True if the Maildir has changed"""
>          self.cachemessagelist()
> 
> commit 40412d42ec58716da7ca93928b71c30566e6bab2
> Author: Edward Z. Yang <ezyang at mit.edu>
> Date:   Wed Aug 22 23:54:04 2012 -0400
> 
>     Short circuit moves implementation.
>     
>     Signed-off-by: Edward Z. Yang <ezyang at mit.edu>
> 
> diff --git a/offlineimap/accounts.py b/offlineimap/accounts.py
> index 71f08fe..880fc2a 100644
> --- a/offlineimap/accounts.py
> +++ b/offlineimap/accounts.py
> @@ -270,6 +270,9 @@ class SyncableAccount(Account):
>                  self.ui.syncfolders(remoterepos, localrepos)
>                  remoterepos.syncfoldersto(localrepos, statusrepos)
>  
> +            # try to short circuit moves
> +            localrepos.syncmoves(remoterepos, statusrepos)
> +
>              # iterate through all folders on the remote repo and sync
>              for remotefolder in remoterepos.getfolders():
>                  if not remotefolder.sync_this:
> diff --git a/offlineimap/folder/Base.py b/offlineimap/folder/Base.py
> index 91ee08a..c8ba819 100644
> --- a/offlineimap/folder/Base.py
> +++ b/offlineimap/folder/Base.py
> @@ -240,6 +240,9 @@ class BaseFolder(object):
>          for uid in uidlist:
>              self.deletemessage(uid)
>  
> +    def remotecopymessage(self, uid, remote_newfolder, local_newfolder, status_newfolder):
> +        raise NotImplementedException
> +
>      def copymessageto(self, uid, dstfolder, statusfolder, register = 1):
>          """Copies a message from self to dst if needed, updating the status
>  
> diff --git a/offlineimap/folder/IMAP.py b/offlineimap/folder/IMAP.py
> index 35a0942..9d79ece 100644
> --- a/offlineimap/folder/IMAP.py
> +++ b/offlineimap/folder/IMAP.py
> @@ -619,6 +619,47 @@ class IMAPFolder(BaseFolder):
>              flags = imaputil.flags2hash(imaputil.imapsplit(result)[1])['FLAGS']
>              self.messagelist[uid]['flags'] = imaputil.flagsimap2maildir(flags)
>  
> +    def remotecopymessage(self, uid, remote_newfolder):
> +        assert isinstance(remote_newfolder, IMAPFolder)
> +        # XXX debugging
> +        # XXX make sure uid is actually present in folder
> +        imapobj = self.imapserver.acquireconnection()
> +        try:
> +            use_uidplus = 'UIDPLUS' in imapobj.capabilities
> +            try:
> +                imapobj.select(self.getfullname(), True, True)
> +                (typ, dat) = imapobj.uid("copy", "%d" % uid, remote_newfolder.getfullname())
> +                if typ == 'NO':
> +                    return 0
> +            except imapobj.error, e:
> +                self.ui.warn('When copying message with UID %s, got error %s' % (uid, str(e)))
> +                return 0
> +            (typ,dat) = imapobj.check()
> +            assert(typ == 'OK')
> +            if use_uidplus or imapobj._get_untagged_response('COPYUID', True):
> +                if not imapobj._get_untagged_response('COPYUID', True):
> +                    self.ui.warn("Server supports UIDPLUS but got no COPYUID "
> +                                 "copying a message.")
> +                    return 0
> +                resp = imapobj._get_untagged_response('COPYUID', True)
> +                newuid = long(resp[-1].split(' ')[2])
> +            else:
> +                assert False # XXX do something better here (the X-OfflineIMAP header still exists)
> +
> +        finally:
> +            self.imapserver.releaseconnection(imapobj)
> +
> +        # flags are nonsense to either become irrelevant (trashed) or
> +        # overwritten (by a savemessageflags)
> +        self.messagelist = {}
> +        remote_newfolder.messagelist = {}
> +        if uid:
> +            self.messagelist[uid] = {'uid': uid, 'flags': set()}
> +        if newuid:
> +            remote_newfolder.messagelist[newuid] = {'uid': newuid, 'flags': set()}
> +
> +        return newuid
> +
>      def addmessageflags(self, uid, flags):
>          self.addmessagesflags([uid], flags)
>  
> @@ -712,6 +753,15 @@ class IMAPFolder(BaseFolder):
>          for uid in uidlist:
>              del self.messagelist[uid]
>  
> +    def doexpunge(self):
> +        imapobj = self.imapserver.acquireconnection()
> +        try:
> +            imapobj.select(self.getfullname())
> +            r = imapobj.expunge()[0]
> +            assert r == 'OK'
> +        finally:
> +            self.imapserver.releaseconnection(imapobj)
> +
>      def syncmessagesto_delete(self, dest, applyto):
>          """Pass 3 of folder synchronization.
>  
> diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py
> index f826ba5..19418de 100644
> --- a/offlineimap/folder/Maildir.py
> +++ b/offlineimap/folder/Maildir.py
> @@ -269,6 +269,21 @@ class MaildirFolder(BaseFolder):
>      def getmessageflags(self, uid):
>          return self.messagelist[uid]['flags']
>  
> +    def _move_file(self, oldfilename, uid, flags):
> +        timeval, timeseq = gettimeseq()
> +        messagename = '%d_%d.%d.%s,U=%d,FMD5=%s' % \
> +            (timeval,
> +             timeseq,
> +             os.getpid(),
> +             socket.gethostname(),
> +             uid,
> +             md5(self.getvisiblename()).hexdigest())
> +        os.rename(os.path.join(self.getfullname(), oldfilename),
> +                  os.path.join(self.getfullname(), os.path.join('tmp', messagename)))
> +        self.messagelist = {}
> +        self.messagelist[uid] = {'flags': set(), 'filename': os.path.join('tmp', messagename)}
> +        self.savemessageflags(uid, flags)
> +
>      def savemessageflags(self, uid, flags):
>          oldfilename = self.messagelist[uid]['filename']
>          dir_prefix, newname = os.path.split(oldfilename)
> diff --git a/offlineimap/repository/Base.py b/offlineimap/repository/Base.py
> index 972aefb..ca9fe34 100644
> --- a/offlineimap/repository/Base.py
> +++ b/offlineimap/repository/Base.py
> @@ -132,6 +132,17 @@ class BaseRepository(object, CustomConfig.ConfigHelperMixin):
>  
>      def getfolder(self, foldername):
>          raise NotImplementedError
> +
> +    def syncmoves(self, remoterepos, statusrepos):
> +        """Optimization pass for synchronizing moved messages.
> +
> +        In some cases, it may be possible to more efficiently synchronize
> +        messages that moved (contents and uid stayed the same, but
> +        the message is in a different folder) than deleting and reuploading.
> +        Any local repository may optionally provide an implementation for
> +        detecting and carrying out moves.  Classes are not required to implement
> +        this."""
> +        pass
>      
>      def syncfoldersto(self, dst_repo, status_repo):
>          """Syncs the folders in this repository to those in dest.
> diff --git a/offlineimap/repository/Maildir.py b/offlineimap/repository/Maildir.py
> index ae09486..6ec9b05 100644
> --- a/offlineimap/repository/Maildir.py
> +++ b/offlineimap/repository/Maildir.py
> @@ -116,6 +116,9 @@ class MaildirRepository(BaseRepository):
>      def deletefolder(self, foldername):
>          self.ui.warn("NOT YET IMPLEMENTED: DELETE FOLDER %s" % foldername)
>  
> +    def forgetfolders(self):
> +        self.folders = None
> +
>      def getfolder(self, foldername):
>          """Return a Folder instance of this Maildir
>  
> @@ -192,4 +195,134 @@ class MaildirRepository(BaseRepository):
>          if self.folders == None:
>              self.folders = self._getfolders_scandir(self.root)
>          return self.folders
> +
> +    def _getmoves(self):
> +        for folder in self.getfolders():
> +            try:
> +                for entry in os.listdir(os.path.join(folder.getfullname(), "mv")):
> +                    fn = os.path.join(folder.getfullname(), "mv", entry)
> +                    newdest = open(fn).read().strip()
> +                    newfolder = os.path.dirname(os.path.dirname(os.path.relpath(newdest, self.root)))
> +                    yield fn,\
> +                          folder.getname(),\
> +                          newfolder,\
> +                          entry,\
> +                          os.path.join(os.path.basename(os.path.dirname(newdest)), os.path.basename(newdest)),\
> +                          newdest
> +            except OSError:
> +                continue
> +
> +    def _getmoves_postdelete(self):
> +        for x in self._getmoves():
> +            try:
> +                yield x
> +            finally:
> +                # Remove entries after processing, since it's harmless
> +                # to double-process except from a performance
> +                # perspective
> +                os.unlink(x[0])
>      
> +    def syncmoves(self, remoterepos, statusrepos):
> +        # TODO thread me!
> +        # TODO does not respect do not sync (this is actually kind of useful)
> +
> +        if not next(self._getmoves(), False):
> +            return
> +
> +        # Initialize by instantiating cache for all status folders
> +        # We setup cache for access because we need messagelist to
> +        # persist over invocations to 'getfolder' (why doesn't
> +        # caching give us that already?)
> +        cache = {}
> +        for statusfolder in statusrepos.getfolders():
> +            statusfolder.cachemessagelist()
> +            cache[statusfolder.getname()] = statusfolder
> +
> +        for fn, oldfolder, newfolder, old_filename, filename, fullname in self._getmoves_postdelete():
> +            # example data:
> +            #
> +            # fn = /home/ezyang/Mail/MIT/INBOX/mv/1345706203_9.22226.javelin,U=400917,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,
> +            # oldfolder = INBOX
> +            # newfolder = INBOX.Archive
> +            # old_filename = 1345706203_9.22226.javelin,U=400917,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,
> +            # filename = new/1345706203_9.22226.javelin,U=400917,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,S
> +            # fullname = /home/ezyang/Mail/MIT/INBOX.Archive/new/1345706203_9.22226.javelin,U=400917,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,S
> +            #
> +            # old_filename doesn't have leading cur/ or new/, we only
> +            # care about it for the old flags value (XXX the value is
> +            # inaccurate if Sup first changed the flag (move 1) and then
> +            # moved source (move 2); we do OK if the change is atomic
> +            # e.g. you did 'A')
> +
> +            if oldfolder == newfolder:
> +                continue
> +
> +            if not os.path.exists(fullname):
> +                continue
> +
> +            # XXX uid validity check (twice)
> +            # XXX readonly
> +            # XXX error handling and UI
> +            # XXX restoreatime
> +
> +            # If we fail, we bail out and just ask the later phases
> +            # to do it the slow way.
> +
> +            # Note: newfolder/filename == access in Maildir
> +
> +            # - Lookup old and new folders both local and IMAP
> +            local_oldfolder = self.getfolder(oldfolder) # Must be Maildir
> +            local_newfolder = self.getfolder(newfolder) # ditto
> +            remote_oldfolder = remoterepos.getfolder(oldfolder.replace(self.getsep(), remoterepos.getsep()))
> +            remote_newfolder = remoterepos.getfolder(newfolder.replace(self.getsep(), remoterepos.getsep()))
> +            status_oldfolder = cache[oldfolder.replace(self.getsep(), statusrepos.getsep())]
> +            status_newfolder = cache[newfolder.replace(self.getsep(), statusrepos.getsep())]
> +            # We don't read out the message lists for local/remote, since the
> +            # filenames *give us the information we need*.
> +
> +            # - Parse filename into uid and flags AND
> +            #   (Note: Use old folder so that the directory is correct)
> +            # XXX I think old_flags is strictly unnecessary
> +            _, old_flags = local_oldfolder.parsefilename(old_filename) # data will get overwritten
> +            uid, flags = local_oldfolder.parsefilename(filename)
> +
> +            if uid < 0:
> +                continue
> +
> +            # In IMAP land (this is new function)
> +            # - Check if uid exists in old folder on server
> +            # - Initiate IMAP COPY from old folder to new folder on server,
> +            #   retrieving the new assigned ID
> +            # - Rename the maildir copy according to new ID (changeuid)
> +            # - Delete old ID
> +            try:
> +                newuid = remote_oldfolder.remotecopymessage(uid, remote_newfolder)
> +            except NotImplementedError:
> +                continue
> +            if newuid <= 0:
> +                continue
> +
> +            local_newfolder._move_file(filename, newuid, flags) # if they don't match, this will trigger an update
> +            # not using methods because they are too slow (and it is
> +            # safe not to update status)
> +            # XXX Maybe save periodically
> +            status_newfolder.messagelist[newuid] = {'uid': newuid, 'flags': old_flags }
> +            if uid in status_oldfolder.messagelist:
> +                del status_oldfolder.messagelist[uid]
> +            # Explicit delete is necessary
> +            remote_oldfolder.expunge = False # XXX speed hack
> +            remote_oldfolder.deletemessage(uid)
> +
> +            print "Moved %s" % newuid
> +
> +        for remotefolder in remoterepos.getfolders():
> +            remotefolder.doexpunge()
> +        for statusfolder in cache.values():
> +            statusfolder.save()
> +
> +        # critical, since we've polluted the message cache
> +        # (especially for self and remote; probably status
> +        # can get away with not doing this)
> +        self.forgetfolders()
> +        remoterepos.forgetfolders()
> +        statusrepos.forgetfolders()
> 
> _______________________________________________
> OfflineIMAP-project mailing list
> OfflineIMAP-project at lists.alioth.debian.org
> http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/offlineimap-project
> 
> OfflineIMAP homepage: http://software.complete.org/offlineimap

-- 
Nicolas Sebrecht



More information about the OfflineIMAP-project mailing list