Moving messages redux
Edward Z. Yang
ezyang at MIT.EDU
Thu Aug 23 14:30:44 BST 2012
Here is a horrible, non-error checking, non-cleaned up, non-encapsulation respecting
proof of concept pair of patches which implement this concept. (It works though!)
Cheers,
Edward
commit 8318b824159227ba7c898caa6ba896001d5671dc
Author: Edward Z. Yang <ezyang at mit.edu>
Date: Wed Aug 22 20:20:51 2012 -0400
Factor out parsefilename functionality of Maildir.
Signed-off-by: Edward Z. Yang <ezyang at mit.edu>
diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py
index df5dd2e..f826ba5 100644
--- a/offlineimap/folder/Maildir.py
+++ b/offlineimap/folder/Maildir.py
@@ -64,6 +64,8 @@ class MaildirFolder(BaseFolder):
self.root = root
self.sep = sep
self.messagelist = None
+ self.nouidcounter = -1 # Messages without UIDs get
+ # negative UID numbers.
self.wincompatible = self.config.getdefaultboolean(
"Account "+self.accountname, "maildir-windows-compatible", False)
@@ -116,10 +118,6 @@ class MaildirFolder(BaseFolder):
and must occur in ASCII order."""
retval = {}
files = []
- nouidcounter = -1 # Messages without UIDs get
- # negative UID numbers.
- foldermd5 = md5(self.getvisiblename()).hexdigest()
- folderstr = ',FMD5=' + foldermd5
for dirannex in ['new', 'cur']:
fulldirname = os.path.join(self.getfullname(), dirannex)
files.extend(os.path.join(dirannex, filename) for
@@ -144,31 +142,39 @@ class MaildirFolder(BaseFolder):
if(size > maxsize):
continue
- foldermatch = messagename.find(folderstr) != -1
- if not foldermatch:
- # If there is no folder MD5 specified, or if it mismatches,
- # assume it is a foreign (new) message and generate a
- # negative uid for it
- uid = nouidcounter
- nouidcounter -= 1
- else: # It comes from our folder.
- uidmatch = uidmatchre.search(messagename)
- uid = None
- if not uidmatch:
- uid = nouidcounter
- nouidcounter -= 1
- else:
- uid = long(uidmatch.group(1))
- #identify flags in the path name
- flagmatch = self.flagmatchre.search(messagename)
- if flagmatch:
- flags = set(flagmatch.group(1))
- else:
- flags = set()
+ uid, flags = self.parsefilename(file)
+
# 'filename' is 'dirannex/filename', e.g. cur/123,U=1,FMD5=1:2,S
retval[uid] = {'flags': flags, 'filename': file}
return retval
+ def parsefilename(self, file):
+ foldermd5 = md5(self.getvisiblename()).hexdigest()
+ folderstr = ',FMD5=' + foldermd5
+ messagename = os.path.basename(file)
+ foldermatch = messagename.find(folderstr) != -1
+ if not foldermatch:
+ # If there is no folder MD5 specified, or if it mismatches,
+ # assume it is a foreign (new) message and generate a
+ # negative uid for it
+ uid = self.nouidcounter
+ self.nouidcounter -= 1
+ else: # It comes from our folder.
+ uidmatch = uidmatchre.search(messagename)
+ uid = None
+ if not uidmatch:
+ uid = self.nouidcounter
+ self.nouidcounter -= 1
+ else:
+ uid = long(uidmatch.group(1))
+ #identify flags in the path name
+ flagmatch = self.flagmatchre.search(messagename)
+ if flagmatch:
+ flags = set(flagmatch.group(1))
+ else:
+ flags = set()
+ return uid, flags
+
def quickchanged(self, statusfolder):
"""Returns True if the Maildir has changed"""
self.cachemessagelist()
commit 40412d42ec58716da7ca93928b71c30566e6bab2
Author: Edward Z. Yang <ezyang at mit.edu>
Date: Wed Aug 22 23:54:04 2012 -0400
Short circuit moves implementation.
Signed-off-by: Edward Z. Yang <ezyang at mit.edu>
diff --git a/offlineimap/accounts.py b/offlineimap/accounts.py
index 71f08fe..880fc2a 100644
--- a/offlineimap/accounts.py
+++ b/offlineimap/accounts.py
@@ -270,6 +270,9 @@ class SyncableAccount(Account):
self.ui.syncfolders(remoterepos, localrepos)
remoterepos.syncfoldersto(localrepos, statusrepos)
+ # try to short circuit moves
+ localrepos.syncmoves(remoterepos, statusrepos)
+
# iterate through all folders on the remote repo and sync
for remotefolder in remoterepos.getfolders():
if not remotefolder.sync_this:
diff --git a/offlineimap/folder/Base.py b/offlineimap/folder/Base.py
index 91ee08a..c8ba819 100644
--- a/offlineimap/folder/Base.py
+++ b/offlineimap/folder/Base.py
@@ -240,6 +240,9 @@ class BaseFolder(object):
for uid in uidlist:
self.deletemessage(uid)
+ def remotecopymessage(self, uid, remote_newfolder, local_newfolder, status_newfolder):
+ raise NotImplementedException
+
def copymessageto(self, uid, dstfolder, statusfolder, register = 1):
"""Copies a message from self to dst if needed, updating the status
diff --git a/offlineimap/folder/IMAP.py b/offlineimap/folder/IMAP.py
index 35a0942..9d79ece 100644
--- a/offlineimap/folder/IMAP.py
+++ b/offlineimap/folder/IMAP.py
@@ -619,6 +619,47 @@ class IMAPFolder(BaseFolder):
flags = imaputil.flags2hash(imaputil.imapsplit(result)[1])['FLAGS']
self.messagelist[uid]['flags'] = imaputil.flagsimap2maildir(flags)
+ def remotecopymessage(self, uid, remote_newfolder):
+ assert isinstance(remote_newfolder, IMAPFolder)
+ # XXX debugging
+ # XXX make sure uid is actually present in folder
+ imapobj = self.imapserver.acquireconnection()
+ try:
+ use_uidplus = 'UIDPLUS' in imapobj.capabilities
+ try:
+ imapobj.select(self.getfullname(), True, True)
+ (typ, dat) = imapobj.uid("copy", "%d" % uid, remote_newfolder.getfullname())
+ if typ == 'NO':
+ return 0
+ except imapobj.error, e:
+ self.ui.warn('When copying message with UID %s, got error %s' % (uid, str(e)))
+ return 0
+ (typ,dat) = imapobj.check()
+ assert(typ == 'OK')
+ if use_uidplus or imapobj._get_untagged_response('COPYUID', True):
+ if not imapobj._get_untagged_response('COPYUID', True):
+ self.ui.warn("Server supports UIDPLUS but got no COPYUID "
+ "copying a message.")
+ return 0
+ resp = imapobj._get_untagged_response('COPYUID', True)
+ newuid = long(resp[-1].split(' ')[2])
+ else:
+ assert False # XXX do something better here (the X-OfflineIMAP header still exists)
+
+ finally:
+ self.imapserver.releaseconnection(imapobj)
+
+ # flags are nonsense to either become irrelevant (trashed) or
+ # overwritten (by a savemessageflags)
+ self.messagelist = {}
+ remote_newfolder.messagelist = {}
+ if uid:
+ self.messagelist[uid] = {'uid': uid, 'flags': set()}
+ if newuid:
+ remote_newfolder.messagelist[newuid] = {'uid': newuid, 'flags': set()}
+
+ return newuid
+
def addmessageflags(self, uid, flags):
self.addmessagesflags([uid], flags)
@@ -712,6 +753,15 @@ class IMAPFolder(BaseFolder):
for uid in uidlist:
del self.messagelist[uid]
+ def doexpunge(self):
+ imapobj = self.imapserver.acquireconnection()
+ try:
+ imapobj.select(self.getfullname())
+ r = imapobj.expunge()[0]
+ assert r == 'OK'
+ finally:
+ self.imapserver.releaseconnection(imapobj)
+
def syncmessagesto_delete(self, dest, applyto):
"""Pass 3 of folder synchronization.
diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py
index f826ba5..19418de 100644
--- a/offlineimap/folder/Maildir.py
+++ b/offlineimap/folder/Maildir.py
@@ -269,6 +269,21 @@ class MaildirFolder(BaseFolder):
def getmessageflags(self, uid):
return self.messagelist[uid]['flags']
+ def _move_file(self, oldfilename, uid, flags):
+ timeval, timeseq = gettimeseq()
+ messagename = '%d_%d.%d.%s,U=%d,FMD5=%s' % \
+ (timeval,
+ timeseq,
+ os.getpid(),
+ socket.gethostname(),
+ uid,
+ md5(self.getvisiblename()).hexdigest())
+ os.rename(os.path.join(self.getfullname(), oldfilename),
+ os.path.join(self.getfullname(), os.path.join('tmp', messagename)))
+ self.messagelist = {}
+ self.messagelist[uid] = {'flags': set(), 'filename': os.path.join('tmp', messagename)}
+ self.savemessageflags(uid, flags)
+
def savemessageflags(self, uid, flags):
oldfilename = self.messagelist[uid]['filename']
dir_prefix, newname = os.path.split(oldfilename)
diff --git a/offlineimap/repository/Base.py b/offlineimap/repository/Base.py
index 972aefb..ca9fe34 100644
--- a/offlineimap/repository/Base.py
+++ b/offlineimap/repository/Base.py
@@ -132,6 +132,17 @@ class BaseRepository(object, CustomConfig.ConfigHelperMixin):
def getfolder(self, foldername):
raise NotImplementedError
+
+ def syncmoves(self, remoterepos, statusrepos):
+ """Optimization pass for synchronizing moved messages.
+
+ In some cases, it may be possible to more efficiently synchronize
+ messages that moved (contents and uid stayed the same, but
+ the message is in a different folder) than deleting and reuploading.
+ Any local repository may optionally provide an implementation for
+ detecting and carrying out moves. Classes are not required to implement
+ this."""
+ pass
def syncfoldersto(self, dst_repo, status_repo):
"""Syncs the folders in this repository to those in dest.
diff --git a/offlineimap/repository/Maildir.py b/offlineimap/repository/Maildir.py
index ae09486..6ec9b05 100644
--- a/offlineimap/repository/Maildir.py
+++ b/offlineimap/repository/Maildir.py
@@ -116,6 +116,9 @@ class MaildirRepository(BaseRepository):
def deletefolder(self, foldername):
self.ui.warn("NOT YET IMPLEMENTED: DELETE FOLDER %s" % foldername)
+ def forgetfolders(self):
+ self.folders = None
+
def getfolder(self, foldername):
"""Return a Folder instance of this Maildir
@@ -192,4 +195,134 @@ class MaildirRepository(BaseRepository):
if self.folders == None:
self.folders = self._getfolders_scandir(self.root)
return self.folders
+
+ def _getmoves(self):
+ for folder in self.getfolders():
+ try:
+ for entry in os.listdir(os.path.join(folder.getfullname(), "mv")):
+ fn = os.path.join(folder.getfullname(), "mv", entry)
+ newdest = open(fn).read().strip()
+ newfolder = os.path.dirname(os.path.dirname(os.path.relpath(newdest, self.root)))
+ yield fn,\
+ folder.getname(),\
+ newfolder,\
+ entry,\
+ os.path.join(os.path.basename(os.path.dirname(newdest)), os.path.basename(newdest)),\
+ newdest
+ except OSError:
+ continue
+
+ def _getmoves_postdelete(self):
+ for x in self._getmoves():
+ try:
+ yield x
+ finally:
+ # Remove entries after processing, since it's harmless
+ # to double-process except from a performance
+ # perspective
+ os.unlink(x[0])
+ def syncmoves(self, remoterepos, statusrepos):
+ # TODO thread me!
+ # TODO does not respect do not sync (this is actually kind of useful)
+
+ if not next(self._getmoves(), False):
+ return
+
+ # Initialize by instantiating cache for all status folders
+ # We setup cache for access because we need messagelist to
+ # persist over invocations to 'getfolder' (why doesn't
+ # caching give us that already?)
+ cache = {}
+ for statusfolder in statusrepos.getfolders():
+ statusfolder.cachemessagelist()
+ cache[statusfolder.getname()] = statusfolder
+
+ for fn, oldfolder, newfolder, old_filename, filename, fullname in self._getmoves_postdelete():
+ # example data:
+ #
+ # fn = /home/ezyang/Mail/MIT/INBOX/mv/1345706203_9.22226.javelin,U=400917,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,
+ # oldfolder = INBOX
+ # newfolder = INBOX.Archive
+ # old_filename = 1345706203_9.22226.javelin,U=400917,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,
+ # filename = new/1345706203_9.22226.javelin,U=400917,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,S
+ # fullname = /home/ezyang/Mail/MIT/INBOX.Archive/new/1345706203_9.22226.javelin,U=400917,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,S
+ #
+ # old_filename doesn't have leading cur/ or new/, we only
+ # care about it for the old flags value (XXX the value is
+ # inaccurate if Sup first changed the flag (move 1) and then
+ # moved source (move 2); we do OK if the change is atomic
+ # e.g. you did 'A')
+
+ if oldfolder == newfolder:
+ continue
+
+ if not os.path.exists(fullname):
+ continue
+
+ # XXX uid validity check (twice)
+ # XXX readonly
+ # XXX error handling and UI
+ # XXX restoreatime
+
+ # If we fail, we bail out and just ask the later phases
+ # to do it the slow way.
+
+ # Note: newfolder/filename == access in Maildir
+
+ # - Lookup old and new folders both local and IMAP
+ local_oldfolder = self.getfolder(oldfolder) # Must be Maildir
+ local_newfolder = self.getfolder(newfolder) # ditto
+ remote_oldfolder = remoterepos.getfolder(oldfolder.replace(self.getsep(), remoterepos.getsep()))
+ remote_newfolder = remoterepos.getfolder(newfolder.replace(self.getsep(), remoterepos.getsep()))
+ status_oldfolder = cache[oldfolder.replace(self.getsep(), statusrepos.getsep())]
+ status_newfolder = cache[newfolder.replace(self.getsep(), statusrepos.getsep())]
+ # We don't read out the message lists for local/remote, since the
+ # filenames *give us the information we need*.
+
+ # - Parse filename into uid and flags AND
+ # (Note: Use old folder so that the directory is correct)
+ # XXX I think old_flags is strictly unnecessary
+ _, old_flags = local_oldfolder.parsefilename(old_filename) # data will get overwritten
+ uid, flags = local_oldfolder.parsefilename(filename)
+
+ if uid < 0:
+ continue
+
+ # In IMAP land (this is new function)
+ # - Check if uid exists in old folder on server
+ # - Initiate IMAP COPY from old folder to new folder on server,
+ # retrieving the new assigned ID
+ # - Rename the maildir copy according to new ID (changeuid)
+ # - Delete old ID
+ try:
+ newuid = remote_oldfolder.remotecopymessage(uid, remote_newfolder)
+ except NotImplementedError:
+ continue
+ if newuid <= 0:
+ continue
+
+ local_newfolder._move_file(filename, newuid, flags) # if they don't match, this will trigger an update
+ # not using methods because they are too slow (and it is
+ # safe not to update status)
+ # XXX Maybe save periodically
+ status_newfolder.messagelist[newuid] = {'uid': newuid, 'flags': old_flags }
+ if uid in status_oldfolder.messagelist:
+ del status_oldfolder.messagelist[uid]
+ # Explicit delete is necessary
+ remote_oldfolder.expunge = False # XXX speed hack
+ remote_oldfolder.deletemessage(uid)
+
+ print "Moved %s" % newuid
+
+ for remotefolder in remoterepos.getfolders():
+ remotefolder.doexpunge()
+ for statusfolder in cache.values():
+ statusfolder.save()
+
+ # critical, since we've polluted the message cache
+ # (especially for self and remote; probably status
+ # can get away with not doing this)
+ self.forgetfolders()
+ remoterepos.forgetfolders()
+ statusrepos.forgetfolders()
More information about the OfflineIMAP-project
mailing list