[PATCH/RFC 7/7] Get content from old folder structure and copy it to new structure

Hubert Pineault hpineault at riseup.net
Sun Mar 3 08:57:03 GMT 2019


Two new methods are added. this is where the actual update is done.

SyncableAccount.get_content_from_account(oldaccount, move):

  Called by the newaccount to get content from oldaccount. Loop
  through remote folders, skiping ignored folders and folders not yet
  synced in oldaccount maildir structure. For each folder, get old and
  new local folder names, then call self.__get_folder_content().

SyncableAccount.__get_folder_content(oldaccount, old_localfolder,
                                     new_localfolder, new_remotefolder,
                                     movecontent):

  Initialize message list from both local folders (old and new) and
  run folder.Base.syncmessagesto().  Unfortunatly, this is not much
  faster than downloading the whole account from imap. I've tried
  writing a new method for moving files instead of copying messages by
  adapting __syncmessagesto_copy(), but it wasn't much faster
  either. I need to know more about mbnames to improve performance. I
  little help on this subject would be greatly appreciated.

Signed-off-by: Hubert Pineault <hpineault at riseup.net>
---
 offlineimap/accounts.py       | 134 ++++++++++++++++++++++++++++++++++
 offlineimap/folder/Maildir.py | 165 +++++++++++++++++++++++++++---------------
 2 files changed, 239 insertions(+), 60 deletions(-)

diff --git a/offlineimap/accounts.py b/offlineimap/accounts.py
index 0ae8b0d..6544b98 100644
--- a/offlineimap/accounts.py
+++ b/offlineimap/accounts.py
@@ -559,6 +559,140 @@ class SyncableAccount(Account):
         hook = self.getconf('postupdateconfhook', '')  # Is this right?
         self.callhook(hook)
 
+    def get_content_from_account(self, oldaccount):
+        newaccount = self
+
+        old_remote_hash, new_remote_hash = {}, {}
+        old_local_hash, new_local_hash = {}, {}
+
+        for folder in oldaccount.remoterepos.getfolders():
+            old_remote_hash[folder.getimapname()] = folder
+
+        for folder in oldaccount.localrepos.getfolders():
+            old_local_hash[folder.getname()] = folder
+
+        for folder in newaccount.remoterepos.getfolders():
+            new_remote_hash[folder.getimapname()] = folder
+
+        for folder in newaccount.localrepos.getfolders():
+            new_local_hash[folder.getname()] = folder
+
+        # Loop through remote folder and get local folder correspondance
+        for new_remote_imapname, new_remote_folder in new_remote_hash.items():
+            if not new_remote_folder.sync_this:
+                self.ui.debug('', "Ignoring filtered folder in new config '%s'"
+                              "[%s]"% (new_remote_folder.getname(),
+                                       newaccount.remoterepos))
+                continue  # Ignore filtered folder.
+            if not new_remote_imapname in old_remote_hash.keys():
+                self.ui.debug('', "Ignoring filtered folder in old config '%s'"
+                              "[%s]"% (new_remote_folder.getname(),
+                                       newaccount.remoterepos))
+                continue  # Ignore filtered folder.
+
+            # Apply old remote nametrans and fix serparator.
+            old_remote_folder = old_remote_hash[new_remote_imapname]
+            old_local_name = old_remote_folder.getvisiblename().replace(
+                oldaccount.remoterepos.getsep(),
+                oldaccount.localrepos.getsep())
+            if old_local_name not in old_local_hash.keys():
+                self.ui.debug('', "Ignoring unsynced folder '%s'"
+                              "[%s]"% (new_remote_folder.getname(),
+                                       newaccount.remoterepos))
+                continue  # Ignore unsynced folder.
+            old_local_folder = oldaccount.get_local_folder(old_remote_folder)
+
+            # Check for CTRL-C or SIGTERM (not sure if it's ok).
+            if (oldaccount.abort_NOW_signal.is_set()
+              or newaccount.abort_NOW_signal.is_set()):
+                break
+
+            if not newaccount.localrepos.getconfboolean('readonly', False):
+                if not newaccount.dryrun:
+                    new_local_folder = newaccount.get_local_folder(new_remote_folder)
+                else:
+                    new_local_folder = new_remote_folder.getvisiblename().replace(
+                        newaccount.remoterepos.getsep(),
+                        newaccount.localrepos.getsep())
+                self.__get_folder_content(oldaccount, old_local_folder,
+                                          new_local_folder, new_remote_folder)
+
+        newaccount.localrepos.restore_atime()
+        mbnames.writeIntermediateFile(self.name) # Write out mailbox names.
+
+    def __get_folder_content(self, oldaccount, old_localfolder,
+                             new_localfolder, new_remotefolder):
+        """Get the content from old_local_folder"""
+
+        newaccount = self
+        dststatusrepos = newaccount.statusrepos
+        srcstatusrepos = oldaccount.statusrepos
+        remoterepos = newaccount.remoterepos
+        movecontent = self.config.getboolean('general', 'movecontent')
+        old_localfolder_name = old_localfolder.name
+        new_localfolder_name = new_localfolder if self.dryrun \
+                               else new_localfolder.name
+
+        newaccount.ui.getfoldercontent(old_localfolder_name,
+                                       new_localfolder_name,
+                                       movecontent)
+
+        if self.dryrun:
+            newaccount.ui.getfoldercontentdone(old_localfolder_name)
+            return
+
+        # Load status folders.
+        srcstatusfolder = srcstatusrepos.getfolder(old_localfolder_name.
+                                                   replace(old_localfolder.getsep(),
+                                                           srcstatusrepos.getsep()))
+        srcstatusfolder.openfiles()
+        dststatusfolder = dststatusrepos.getfolder(new_remotefolder.getvisiblename().
+                                             replace(remoterepos.getsep(), dststatusrepos.getsep()))
+        dststatusfolder.openfiles()
+
+        #TODO: check that local folder does not contain local sep
+        ''' # The remote folder names must not have the local sep char in
+            # their names since this would cause troubles while converting
+            # the name back (from local to remote).
+            sep = localrepos.getsep()
+            if (sep != os.path.sep and
+                sep != remoterepos.getsep() and
+                sep in remotefolder.getname()):
+                self.ui.warn('', "Ignoring folder '%s' due to unsupported "
+                    "'%s' character serving as local separator."%
+                    (remotefolder.getname(), localrepos.getsep()))
+                continue # Ignore unsupported folder name.'''
+
+        try:
+            # Add the folder to the mbnames mailboxes.
+            mbnames.add(newaccount.name, newaccount.localrepos.root,
+                        new_localfolder.getname())
+
+            # At this point, is this test necessary?
+            if not newaccount.localrepos.getconfboolean('readonly', False):
+                old_localfolder.sendcontentto(new_localfolder, srcstatusfolder, dststatusfolder, movecontent)
+            else:
+                self.ui.debug('', "Not sending content to read-only repository '%s'"%
+                              newaccount.localrepos.getname())
+
+            newaccount.localrepos.restore_atime()
+
+        except (KeyboardInterrupt, SystemExit):
+            raise
+        except Exception as e:
+            self.ui.error(e, msg="ERROR while getting folder content for %s: %s"%
+                          (new_localfolder.getvisiblename(), traceback.format_exc()))
+            raise  # Raise unknown Exceptions so we can fix them.
+        else:
+            newaccount.ui.getfoldercontentdone()
+        finally:
+            for folder in ["old_localfolder", "new_localfolder"]:
+                if folder in locals():
+                    locals()[folder].dropmessagelistcache()
+            for folder in ["srcstatusfolder", "dststatusfolder"]:
+                if folder in locals():
+                    locals()[folder].closefiles()
+
 
 #XXX: This function should likely be refactored. This should not be passed the
 # account instance.
diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py
index bab8284..b54879d 100644
--- a/offlineimap/folder/Maildir.py
+++ b/offlineimap/folder/Maildir.py
@@ -552,69 +552,114 @@ class MaildirFolder(BaseFolder):
                               " Neither `%s' nor `%s' found")
                              % (filename, oldfmd5, self._foldermd5))
 
-    def sendcontentto(self, dstfolder, movecontent=False, dryrun=False):
-        from shutil import move, copy2
+    def sendcontentto(self, dstfolder, srcstatusfolder, dststatusfolder,
+                      movecontent=False, dryrun=False):
+        '''We avoid using Maildir.cachemessagelist() in order to drastically
+        improve performance. Instead, we use statusrepos and
+        listdir().'''
+        from shutil import move, copy2, rmtree
 
         srcfolder = self
-        folderpath = os.path.join(srcfolder.root, srcfolder.name)
-
-        for d in os.listdir(folderpath):
-            if os.path.isdir(os.path.join(folderpath, d)):
-                messages = os.listdir(os.path.join(folderpath, d))
-                num = 0
-                totalnum = len(messages)
-                self.ui.nbmessagestosend(os.path.join(dstfolder.name, d),
-                                         totalnum, movecontent)
-
-                for f in messages:
-                    #TODO: Show progression
-
-                    if os.path.isdir(os.path.join(folderpath, d, f)):
-                        '''There should not be any folder in here. If there
-                        is, should we also deal with it? In a sane folder
-                        structure, this should not happen. Or am I wrong?'''
-
-                        self.ui.info("A folder was found in source folder '{0}'"
-                                     "while trying to move its content. Ignoring"
-                                     "'{1}'. This indicate that the original"
-                                     "folder structure is probably corrupt in"
-                                     "some way. Please investigate.",
-                                     format(os.path.join(srcfolder.name, d), f))
-                        continue
+        srcstatusfolder.cachemessagelist()
+        dststatusfolder.cachemessagelist()
+        srcfolderpath = os.path.join(srcfolder.root, srcfolder.name)
+        dstfolderpath = os.path.join(dstfolder.root, dstfolder.name)
+        savemsgtostatusfolder = True
+
+        # To speed up process, when dststatusfolder is empty, we copy status
+        # file and test on maildir folder for existing messages. If
+        # dststatusfolder is not empty, we procede to savemessage.
+        if len(dststatusfolder.getmessageuidlist()) == 0 \
+           and len(srcstatusfolder.getmessageuidlist()) > 0:
+            savemsgtostatusfolder = False
+
+        if savemsgtostatusfolder:
+            sendmsglist = [uid for uid in srcstatusfolder.getmessageuidlist()
+                           if not dststatusfolder.uidexists(uid)]
+        else:
+            sendmsglist = srcstatusfolder.getmessageuidlist()
+        num_of_msg = len(sendmsglist)
 
-                    dstfile = os.path.join(dstfolder.root, dstfolder.name, d, f)
-                    if movecontent:
-                        #TODO: Prevent or ask file overwrite
-                        move(os.path.join(folderpath, d, f),
-                             dstfile)
-                    else:
-                        num += 1
-                        if os.path.exists(dstfile):
-                            self.ui.ignorecopyingmessage(f, srcfolder, dstfolder)
-                            continue
-                        if not dryrun:
-                            copy2(os.path.join(folderpath, d, f),
-                                  dstfile)
-
-                if (movecontent
-                    and len(os.listdir(os.path.join(folderpath, d))) == 0
-                        and not dryrun):
-                    os.rmdir(os.path.join(folderpath, d))
-                elif movecontent and not dryrun:
-                    self.ui.info("Folder {0} is not empty. Some files were not moved".
-                                 format(os.path.join(folderpath, d)))
+        if not savemsgtostatusfolder and num_of_msg > 0:
+            try:
+                dststatusfolder.closefiles()
+                copy2(srcstatusfolder.filename, dststatusfolder.filename)
+            except OSError:
+                savemsgtostatusfolder = True
+                pass
 
-            else:
-                '''If there is a file in srcfolder, should we also deal with it? In a 
-                sane folder structure, this should not happen. Or am I wrong?
-                move(os.path.join(folderpath, d),
-                     os.join(dstfolder.root, dstfolder.name, f))'''
-                self.ui.ignorecopyingmessage(self._parse_filename(d)['UID'],
-                                             srcfolder, dstfolder)
-
-        if movecontent and len(os.listdir(folderpath)) == 0 \
+        self.ui.nbmessagestosend(dstfolder.name, num_of_msg, movecontent)
+
+        filelist = {}
+        try:
+            for f in os.listdir(os.path.join(srcfolderpath, 'cur')) \
+                   + os.listdir(os.path.join(srcfolderpath, 'new')):
+                uidmatch = re_uidmatch.search(f)
+                if uidmatch:
+                    filelist[int(uidmatch.group(1))] = f
+        except OSError:
+            pass
+
+        #TODO :
+        # -Ignore UIDs???
+
+        for num, uid in enumerate(sendmsglist):
+            #TODO: Bail out on CTRL-C or SIGTERM.
+            '''if offlineimap.accounts.Account.abort_NOW_signal.is_set():
+                break'''
+            try:
+                # Should we check that UID > 0?
+                # With Maildir, there shouldn't be any UID = 0, or am I wrong?
+                num += 1
+                filename = filelist[uid]
+                flags = srcstatusfolder.getmessageflags(uid)
+                dir_prefix = 'cur' if 'S' in flags else 'new'
+                srcfilepath = os.path.join(srcfolderpath, dir_prefix, filename)
+                dstfilepath = os.path.join(dstfolderpath, dir_prefix, filename)
+
+                self.ui.sendingmessage(uid, num, num_of_msg,
+                                       srcfolder, dstfolder, movecontent)
+
+                if dryrun:
+                    continue
+                if movecontent:
+                    #TODO: Prevent or ask file overwrite
+                    move(srcfilepath, dstfilepath)
+                else:
+                    if os.path.exists(dstfilepath):
+                        self.ui.ignorecopyingmessage(filename, srcfolder, dstfolder)
+                    elif not dryrun:
+                        copy2(srcfilepath, dstfilepath)
+            except Exception as e:
+                self.ui.info(
+                    "Error while sending content of folder {0}, to {1} : '{2}'".
+                    format(srcfolder.name, dstfolder.name, e))
+                raise
+
+            if savemsgtostatusfolder:
+                labels = srcstatusfolder.getmessagelabels(uid)
+                # Should we save rtime and mtime?
+                dststatusfolder.savemessage(uid, None, flags, 0, 0, labels)
+                dststatusfolder.save()
+
+        for folder in ["srcfolder", "srcstatusfolder", "dststatusfolder"]:
+            if folder in locals():
+                locals()[folder].dropmessagelistcache()
+
+        if num_of_msg == 0:
+            num = 0
+
+        if movecontent \
+           and num == num_of_msg \
            and not dryrun:
-            os.rmdir(folderpath)
+            try:
+                rmtree(srcfolderpath)
+            except OSError:
+                self.ui.warn(
+                    "Error while removing source folder {0}, but no \
+                    messages were left in it.".format(srcfolder.root))
+                pass
         elif movecontent and not dryrun:
-            self.ui.info("Folder {0} is not empty. Some files were not moved.".
-                         format(folderpath))
+            self.ui.warn(
+                "Folder {0} is not empty. Some files were not moved. \
+                Please investigate source statusfolder.".format(srcfolder.root))
-- 
2.11.0




More information about the OfflineIMAP-project mailing list