[PATCH 1/2] Refactor parsing out maildirs filename components

Sebastian Spaeth Sebastian at SSpaeth.de
Mon Aug 22 16:10:53 BST 2011


Create a helper function that gets the UID, folder MD5, and Flags from a
maildir message filename.

We need these items when we simply want to rename (=new UID) a Maildir
message file later. The new function can give us these components.

Rework so we cache the calculation of the folder's md5 value once, it
never changes and we call it a lot.

Signed-off-by: Sebastian Spaeth <Sebastian at SSpaeth.de>
---
 offlineimap/folder/Maildir.py |  105 +++++++++++++++++++++++++----------------
 1 files changed, 64 insertions(+), 41 deletions(-)

diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py
index f72f4ea..05d5ee7 100644
--- a/offlineimap/folder/Maildir.py
+++ b/offlineimap/folder/Maildir.py
@@ -35,6 +35,8 @@ except NameError:
 
 from offlineimap import OfflineImapError
 
+#everything up to the first colon or comma:
+prefixmatchre = re.compile('([^:,]*)')
 uidmatchre = re.compile(',U=(\d+)')
 timestampmatchre = re.compile('(\d+)');
 
@@ -79,6 +81,8 @@ class MaildirFolder(BaseFolder):
         self.flagmatchre = re.compile(self.infosep + '.*2,([A-Z]+)')
 
         BaseFolder.__init__(self)
+        #folder's md, so we can match with recorded file md5 for validity
+        self._foldermd5 = md5(self.getvisiblename()).hexdigest()
         #self.ui is set in BaseFolder.init()
         # Cache the full folder path, as we use getfullname() very often
         self._fullname = os.path.join(self.getroot(), self.getname())
@@ -116,55 +120,73 @@ class MaildirFolder(BaseFolder):
         else:
             return True
 
+    def _parse_filename(self, filename):
+        """Returns a messages file name components
+
+        Receives the file name (without path) of a msg.  Usual format is
+        '<%d_%d.%d.%s>,U=<%d>,FMD5=<%s>:2,<FLAGS>' (pointy brackets
+        denoting the various components).
+
+        If FMD5 does not correspond with the current folder MD5, we will
+        return None for the UID & FMD5 (as it is not valid in this
+        folder).  If UID or FMD5 can not be detected, we return `None`
+        for the respective element.  If flags are empty or cannot be
+        detected, we return an empty flags list.
+
+        :returns: (prefix, UID, FMD5, flags)"""
+        prefix, uid, fmd5, flags = None, None, None, []
+
+        prefixmatch = prefixmatchre.match(filename)
+        if prefixmatch:
+            prefix = prefixmatch.group(1)
+        folderstr = ',FMD5=%s' % self._foldermd5
+        foldermatch = folderstr in filename
+        # If there was no folder MD5 specified, or if it mismatches,
+        # assume it is a foreign (new) message and ret: uid, fmd5 = None, None
+        if foldermatch:
+            uidmatch = uidmatchre.search(filename)
+            if uidmatch:
+                uid = long(uidmatch.group(1))
+        flagmatch = self.flagmatchre.search(filename)
+        if flagmatch:
+            flags = [x for x in flagmatch.group(1)]
+            flags.sort()
+        return prefix, uid, fmd5, flags
 
     def _scanfolder(self):
-        """Cache the message list.  Maildir flags are:
-        R (replied)
-        S (seen)
-        T (trashed)
-        D (draft)
-        F (flagged)
-        and must occur in ASCII order."""
+        """Cache the message list from a Maildir.
+
+        Maildir flags are: R (replied) S (seen) T (trashed) D (draft) F
+        (flagged).
+        :returns: dict that can be used as self.messagelist"""
+        maxage = self.config.getdefaultint("Account " + self.accountname,
+                                           "maxage", None)
+        maxsize = self.config.getdefaultint("Account " + self.accountname,
+                                            "maxsize", None)
         retval = {}
         files = []
-        nouidcounter = -1               # Messages without UIDs get
-                                        # negative UID numbers.
-        foldermd5 = md5(self.getvisiblename()).hexdigest()
-        folderstr = ',FMD5=' + foldermd5
+        nouidcounter = -1          # Messages without UIDs get negative UIDs.
         for dirannex in ['new', 'cur']:
             fulldirname = os.path.join(self.getfullname(), dirannex)
-            files.extend(os.path.join(fulldirname, filename) for
+            files.extend((dirannex, filename) for
                          filename in os.listdir(fulldirname))
-        for file in files:
-            messagename = os.path.basename(file)
-
-            #check if there is a parameter for maxage / maxsize - then see if this
-            #message should be considered or not
-            maxage = self.config.getdefaultint("Account " + self.accountname, "maxage", -1)
-            maxsize = self.config.getdefaultint("Account " + self.accountname, "maxsize", -1)
-
-            if(maxage != -1):
-                isnewenough = self._iswithinmaxage(messagename, maxage)
-                if(isnewenough != True):
-                    #this message is older than we should consider....
-                    continue
-
-            #Check and see if the message is too big if the maxsize for this account is set
-            if(maxsize != -1):
-                filesize = os.path.getsize(file)
-                if(filesize > maxsize):
-                    continue
-            
 
-            foldermatch = messagename.find(folderstr) != -1
-            if not foldermatch:
-                # If there is no folder MD5 specified, or if it mismatches,
-                # assume it is a foreign (new) message and generate a
-                # negative uid for it
+        for dirannex, filename in files:
+            #we store just dirannex and filename, ie 'cur/123...'
+            filepath = os.path.join(dirannex, filename)
+            # check maxage/maxsize if this message should be considered
+            if maxage and not self._iswithinmaxage(filename, maxage):
+                continue
+            if maxsize and (os.path.getsize(os.path.join(
+                        self.getfullname(), filepath)) > maxsize):
+                continue
+
+            (prefix, uid, fmd5, flags) = self._parse_filename(filename)
+            if uid is None: # assign negative uid to upload it.
                 uid = nouidcounter
                 nouidcounter -= 1
             else:                       # It comes from our folder.
-                uidmatch = uidmatchre.search(messagename)
+                uidmatch = uidmatchre.search(filename)
                 uid = None
                 if not uidmatch:
                     uid = nouidcounter
@@ -172,14 +194,15 @@ class MaildirFolder(BaseFolder):
                 else:
                     uid = long(uidmatch.group(1))
             #identify flags in the path name
-            flagmatch = self.flagmatchre.search(messagename)
+            flagmatch = self.flagmatchre.search(filename)
             if flagmatch:
                 flags = set(flagmatch.group(1))
             else:
                 flags = set()
+            # filepath' is eg 'cur/123_U=1_FMD5=2:2,S'
             retval[uid] = {'uid': uid,
                            'flags': flags,
-                           'filename': file}
+                           'filename': filepath}
         return retval
 
     def quickchanged(self, statusfolder):
-- 
1.7.4.1





More information about the OfflineIMAP-project mailing list