Moving messages redux
Nicolas Sebrecht
nicolas.s-dev at laposte.net
Sat Jan 26 16:52:52 GMT 2013
Hi Edward,
Since the recent call for patches, I guess you might like to resend the
patch.
On Thu, Aug 23, 2012 at 09:30:44AM -0400, Edward Z. Yang wrote:
> Here is a horrible, non-error checking, non-cleaned up, non-encapsulation respecting
> proof of concept pair of patches which implement this concept. (It works though!)
>
> Cheers,
> Edward
>
> commit 8318b824159227ba7c898caa6ba896001d5671dc
> Author: Edward Z. Yang <ezyang at mit.edu>
> Date: Wed Aug 22 20:20:51 2012 -0400
>
> Factor out parsefilename functionality of Maildir.
>
> Signed-off-by: Edward Z. Yang <ezyang at mit.edu>
>
>
> diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py
> index df5dd2e..f826ba5 100644
> --- a/offlineimap/folder/Maildir.py
> +++ b/offlineimap/folder/Maildir.py
> @@ -64,6 +64,8 @@ class MaildirFolder(BaseFolder):
> self.root = root
> self.sep = sep
> self.messagelist = None
> + self.nouidcounter = -1 # Messages without UIDs get
> + # negative UID numbers.
>
> self.wincompatible = self.config.getdefaultboolean(
> "Account "+self.accountname, "maildir-windows-compatible", False)
> @@ -116,10 +118,6 @@ class MaildirFolder(BaseFolder):
> and must occur in ASCII order."""
> retval = {}
> files = []
> - nouidcounter = -1 # Messages without UIDs get
> - # negative UID numbers.
> - foldermd5 = md5(self.getvisiblename()).hexdigest()
> - folderstr = ',FMD5=' + foldermd5
> for dirannex in ['new', 'cur']:
> fulldirname = os.path.join(self.getfullname(), dirannex)
> files.extend(os.path.join(dirannex, filename) for
> @@ -144,31 +142,39 @@ class MaildirFolder(BaseFolder):
> if(size > maxsize):
> continue
>
> - foldermatch = messagename.find(folderstr) != -1
> - if not foldermatch:
> - # If there is no folder MD5 specified, or if it mismatches,
> - # assume it is a foreign (new) message and generate a
> - # negative uid for it
> - uid = nouidcounter
> - nouidcounter -= 1
> - else: # It comes from our folder.
> - uidmatch = uidmatchre.search(messagename)
> - uid = None
> - if not uidmatch:
> - uid = nouidcounter
> - nouidcounter -= 1
> - else:
> - uid = long(uidmatch.group(1))
> - #identify flags in the path name
> - flagmatch = self.flagmatchre.search(messagename)
> - if flagmatch:
> - flags = set(flagmatch.group(1))
> - else:
> - flags = set()
> + uid, flags = self.parsefilename(file)
> +
> # 'filename' is 'dirannex/filename', e.g. cur/123,U=1,FMD5=1:2,S
> retval[uid] = {'flags': flags, 'filename': file}
> return retval
>
> + def parsefilename(self, file):
> + foldermd5 = md5(self.getvisiblename()).hexdigest()
> + folderstr = ',FMD5=' + foldermd5
> + messagename = os.path.basename(file)
> + foldermatch = messagename.find(folderstr) != -1
> + if not foldermatch:
> + # If there is no folder MD5 specified, or if it mismatches,
> + # assume it is a foreign (new) message and generate a
> + # negative uid for it
> + uid = self.nouidcounter
> + self.nouidcounter -= 1
> + else: # It comes from our folder.
> + uidmatch = uidmatchre.search(messagename)
> + uid = None
> + if not uidmatch:
> + uid = self.nouidcounter
> + self.nouidcounter -= 1
> + else:
> + uid = long(uidmatch.group(1))
> + #identify flags in the path name
> + flagmatch = self.flagmatchre.search(messagename)
> + if flagmatch:
> + flags = set(flagmatch.group(1))
> + else:
> + flags = set()
> + return uid, flags
> +
> def quickchanged(self, statusfolder):
> """Returns True if the Maildir has changed"""
> self.cachemessagelist()
>
> commit 40412d42ec58716da7ca93928b71c30566e6bab2
> Author: Edward Z. Yang <ezyang at mit.edu>
> Date: Wed Aug 22 23:54:04 2012 -0400
>
> Short circuit moves implementation.
>
> Signed-off-by: Edward Z. Yang <ezyang at mit.edu>
>
> diff --git a/offlineimap/accounts.py b/offlineimap/accounts.py
> index 71f08fe..880fc2a 100644
> --- a/offlineimap/accounts.py
> +++ b/offlineimap/accounts.py
> @@ -270,6 +270,9 @@ class SyncableAccount(Account):
> self.ui.syncfolders(remoterepos, localrepos)
> remoterepos.syncfoldersto(localrepos, statusrepos)
>
> + # try to short circuit moves
> + localrepos.syncmoves(remoterepos, statusrepos)
> +
> # iterate through all folders on the remote repo and sync
> for remotefolder in remoterepos.getfolders():
> if not remotefolder.sync_this:
> diff --git a/offlineimap/folder/Base.py b/offlineimap/folder/Base.py
> index 91ee08a..c8ba819 100644
> --- a/offlineimap/folder/Base.py
> +++ b/offlineimap/folder/Base.py
> @@ -240,6 +240,9 @@ class BaseFolder(object):
> for uid in uidlist:
> self.deletemessage(uid)
>
> + def remotecopymessage(self, uid, remote_newfolder, local_newfolder, status_newfolder):
> + raise NotImplementedException
> +
> def copymessageto(self, uid, dstfolder, statusfolder, register = 1):
> """Copies a message from self to dst if needed, updating the status
>
> diff --git a/offlineimap/folder/IMAP.py b/offlineimap/folder/IMAP.py
> index 35a0942..9d79ece 100644
> --- a/offlineimap/folder/IMAP.py
> +++ b/offlineimap/folder/IMAP.py
> @@ -619,6 +619,47 @@ class IMAPFolder(BaseFolder):
> flags = imaputil.flags2hash(imaputil.imapsplit(result)[1])['FLAGS']
> self.messagelist[uid]['flags'] = imaputil.flagsimap2maildir(flags)
>
> + def remotecopymessage(self, uid, remote_newfolder):
> + assert isinstance(remote_newfolder, IMAPFolder)
> + # XXX debugging
> + # XXX make sure uid is actually present in folder
> + imapobj = self.imapserver.acquireconnection()
> + try:
> + use_uidplus = 'UIDPLUS' in imapobj.capabilities
> + try:
> + imapobj.select(self.getfullname(), True, True)
> + (typ, dat) = imapobj.uid("copy", "%d" % uid, remote_newfolder.getfullname())
> + if typ == 'NO':
> + return 0
> + except imapobj.error, e:
> + self.ui.warn('When copying message with UID %s, got error %s' % (uid, str(e)))
> + return 0
> + (typ,dat) = imapobj.check()
> + assert(typ == 'OK')
> + if use_uidplus or imapobj._get_untagged_response('COPYUID', True):
> + if not imapobj._get_untagged_response('COPYUID', True):
> + self.ui.warn("Server supports UIDPLUS but got no COPYUID "
> + "copying a message.")
> + return 0
> + resp = imapobj._get_untagged_response('COPYUID', True)
> + newuid = long(resp[-1].split(' ')[2])
> + else:
> + assert False # XXX do something better here (the X-OfflineIMAP header still exists)
> +
> + finally:
> + self.imapserver.releaseconnection(imapobj)
> +
> + # flags are nonsense to either become irrelevant (trashed) or
> + # overwritten (by a savemessageflags)
> + self.messagelist = {}
> + remote_newfolder.messagelist = {}
> + if uid:
> + self.messagelist[uid] = {'uid': uid, 'flags': set()}
> + if newuid:
> + remote_newfolder.messagelist[newuid] = {'uid': newuid, 'flags': set()}
> +
> + return newuid
> +
> def addmessageflags(self, uid, flags):
> self.addmessagesflags([uid], flags)
>
> @@ -712,6 +753,15 @@ class IMAPFolder(BaseFolder):
> for uid in uidlist:
> del self.messagelist[uid]
>
> + def doexpunge(self):
> + imapobj = self.imapserver.acquireconnection()
> + try:
> + imapobj.select(self.getfullname())
> + r = imapobj.expunge()[0]
> + assert r == 'OK'
> + finally:
> + self.imapserver.releaseconnection(imapobj)
> +
> def syncmessagesto_delete(self, dest, applyto):
> """Pass 3 of folder synchronization.
>
> diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py
> index f826ba5..19418de 100644
> --- a/offlineimap/folder/Maildir.py
> +++ b/offlineimap/folder/Maildir.py
> @@ -269,6 +269,21 @@ class MaildirFolder(BaseFolder):
> def getmessageflags(self, uid):
> return self.messagelist[uid]['flags']
>
> + def _move_file(self, oldfilename, uid, flags):
> + timeval, timeseq = gettimeseq()
> + messagename = '%d_%d.%d.%s,U=%d,FMD5=%s' % \
> + (timeval,
> + timeseq,
> + os.getpid(),
> + socket.gethostname(),
> + uid,
> + md5(self.getvisiblename()).hexdigest())
> + os.rename(os.path.join(self.getfullname(), oldfilename),
> + os.path.join(self.getfullname(), os.path.join('tmp', messagename)))
> + self.messagelist = {}
> + self.messagelist[uid] = {'flags': set(), 'filename': os.path.join('tmp', messagename)}
> + self.savemessageflags(uid, flags)
> +
> def savemessageflags(self, uid, flags):
> oldfilename = self.messagelist[uid]['filename']
> dir_prefix, newname = os.path.split(oldfilename)
> diff --git a/offlineimap/repository/Base.py b/offlineimap/repository/Base.py
> index 972aefb..ca9fe34 100644
> --- a/offlineimap/repository/Base.py
> +++ b/offlineimap/repository/Base.py
> @@ -132,6 +132,17 @@ class BaseRepository(object, CustomConfig.ConfigHelperMixin):
>
> def getfolder(self, foldername):
> raise NotImplementedError
> +
> + def syncmoves(self, remoterepos, statusrepos):
> + """Optimization pass for synchronizing moved messages.
> +
> + In some cases, it may be possible to more efficiently synchronize
> + messages that moved (contents and uid stayed the same, but
> + the message is in a different folder) than deleting and reuploading.
> + Any local repository may optionally provide an implementation for
> + detecting and carrying out moves. Classes are not required to implement
> + this."""
> + pass
>
> def syncfoldersto(self, dst_repo, status_repo):
> """Syncs the folders in this repository to those in dest.
> diff --git a/offlineimap/repository/Maildir.py b/offlineimap/repository/Maildir.py
> index ae09486..6ec9b05 100644
> --- a/offlineimap/repository/Maildir.py
> +++ b/offlineimap/repository/Maildir.py
> @@ -116,6 +116,9 @@ class MaildirRepository(BaseRepository):
> def deletefolder(self, foldername):
> self.ui.warn("NOT YET IMPLEMENTED: DELETE FOLDER %s" % foldername)
>
> + def forgetfolders(self):
> + self.folders = None
> +
> def getfolder(self, foldername):
> """Return a Folder instance of this Maildir
>
> @@ -192,4 +195,134 @@ class MaildirRepository(BaseRepository):
> if self.folders == None:
> self.folders = self._getfolders_scandir(self.root)
> return self.folders
> +
> + def _getmoves(self):
> + for folder in self.getfolders():
> + try:
> + for entry in os.listdir(os.path.join(folder.getfullname(), "mv")):
> + fn = os.path.join(folder.getfullname(), "mv", entry)
> + newdest = open(fn).read().strip()
> + newfolder = os.path.dirname(os.path.dirname(os.path.relpath(newdest, self.root)))
> + yield fn,\
> + folder.getname(),\
> + newfolder,\
> + entry,\
> + os.path.join(os.path.basename(os.path.dirname(newdest)), os.path.basename(newdest)),\
> + newdest
> + except OSError:
> + continue
> +
> + def _getmoves_postdelete(self):
> + for x in self._getmoves():
> + try:
> + yield x
> + finally:
> + # Remove entries after processing, since it's harmless
> + # to double-process except from a performance
> + # perspective
> + os.unlink(x[0])
>
> + def syncmoves(self, remoterepos, statusrepos):
> + # TODO thread me!
> + # TODO does not respect do not sync (this is actually kind of useful)
> +
> + if not next(self._getmoves(), False):
> + return
> +
> + # Initialize by instantiating cache for all status folders
> + # We setup cache for access because we need messagelist to
> + # persist over invocations to 'getfolder' (why doesn't
> + # caching give us that already?)
> + cache = {}
> + for statusfolder in statusrepos.getfolders():
> + statusfolder.cachemessagelist()
> + cache[statusfolder.getname()] = statusfolder
> +
> + for fn, oldfolder, newfolder, old_filename, filename, fullname in self._getmoves_postdelete():
> + # example data:
> + #
> + # fn = /home/ezyang/Mail/MIT/INBOX/mv/1345706203_9.22226.javelin,U=400917,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,
> + # oldfolder = INBOX
> + # newfolder = INBOX.Archive
> + # old_filename = 1345706203_9.22226.javelin,U=400917,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,
> + # filename = new/1345706203_9.22226.javelin,U=400917,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,S
> + # fullname = /home/ezyang/Mail/MIT/INBOX.Archive/new/1345706203_9.22226.javelin,U=400917,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,S
> + #
> + # old_filename doesn't have leading cur/ or new/, we only
> + # care about it for the old flags value (XXX the value is
> + # inaccurate if Sup first changed the flag (move 1) and then
> + # moved source (move 2); we do OK if the change is atomic
> + # e.g. you did 'A')
> +
> + if oldfolder == newfolder:
> + continue
> +
> + if not os.path.exists(fullname):
> + continue
> +
> + # XXX uid validity check (twice)
> + # XXX readonly
> + # XXX error handling and UI
> + # XXX restoreatime
> +
> + # If we fail, we bail out and just ask the later phases
> + # to do it the slow way.
> +
> + # Note: newfolder/filename == access in Maildir
> +
> + # - Lookup old and new folders both local and IMAP
> + local_oldfolder = self.getfolder(oldfolder) # Must be Maildir
> + local_newfolder = self.getfolder(newfolder) # ditto
> + remote_oldfolder = remoterepos.getfolder(oldfolder.replace(self.getsep(), remoterepos.getsep()))
> + remote_newfolder = remoterepos.getfolder(newfolder.replace(self.getsep(), remoterepos.getsep()))
> + status_oldfolder = cache[oldfolder.replace(self.getsep(), statusrepos.getsep())]
> + status_newfolder = cache[newfolder.replace(self.getsep(), statusrepos.getsep())]
> + # We don't read out the message lists for local/remote, since the
> + # filenames *give us the information we need*.
> +
> + # - Parse filename into uid and flags AND
> + # (Note: Use old folder so that the directory is correct)
> + # XXX I think old_flags is strictly unnecessary
> + _, old_flags = local_oldfolder.parsefilename(old_filename) # data will get overwritten
> + uid, flags = local_oldfolder.parsefilename(filename)
> +
> + if uid < 0:
> + continue
> +
> + # In IMAP land (this is new function)
> + # - Check if uid exists in old folder on server
> + # - Initiate IMAP COPY from old folder to new folder on server,
> + # retrieving the new assigned ID
> + # - Rename the maildir copy according to new ID (changeuid)
> + # - Delete old ID
> + try:
> + newuid = remote_oldfolder.remotecopymessage(uid, remote_newfolder)
> + except NotImplementedError:
> + continue
> + if newuid <= 0:
> + continue
> +
> + local_newfolder._move_file(filename, newuid, flags) # if they don't match, this will trigger an update
> + # not using methods because they are too slow (and it is
> + # safe not to update status)
> + # XXX Maybe save periodically
> + status_newfolder.messagelist[newuid] = {'uid': newuid, 'flags': old_flags }
> + if uid in status_oldfolder.messagelist:
> + del status_oldfolder.messagelist[uid]
> + # Explicit delete is necessary
> + remote_oldfolder.expunge = False # XXX speed hack
> + remote_oldfolder.deletemessage(uid)
> +
> + print "Moved %s" % newuid
> +
> + for remotefolder in remoterepos.getfolders():
> + remotefolder.doexpunge()
> + for statusfolder in cache.values():
> + statusfolder.save()
> +
> + # critical, since we've polluted the message cache
> + # (especially for self and remote; probably status
> + # can get away with not doing this)
> + self.forgetfolders()
> + remoterepos.forgetfolders()
> + statusrepos.forgetfolders()
>
> _______________________________________________
> OfflineIMAP-project mailing list
> OfflineIMAP-project at lists.alioth.debian.org
> http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/offlineimap-project
>
> OfflineIMAP homepage: http://software.complete.org/offlineimap
--
Nicolas Sebrecht
More information about the OfflineIMAP-project
mailing list