[PATCH] fix: support Gmail labels with non-ascii characters

Nicolas Sebrecht nicolas.s-dev at laposte.net
Tue Dec 23 02:16:02 UTC 2014

Blame commit which introduced support for labels: 09556d645ea8.

Signed-off-by: Nicolas Sebrecht <nicolas.s-dev at laposte.net>

WARNING: Never tested. Testers are welcome. Remember to make a full
backup or your mails and cache before each sync while testing.

 offlineimap/folder/LocalStatus.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/offlineimap/folder/LocalStatus.py b/offlineimap/folder/LocalStatus.py
index 1dccf90..3455314 100644
--- a/offlineimap/folder/LocalStatus.py
+++ b/offlineimap/folder/LocalStatus.py
@@ -98,8 +98,18 @@ class LocalStatusFolder(BaseFolder):
                 uid, flags, mtime, labels = line.split('|')
                 uid = long(uid)
-                flags = set(flags)
+                # It must be safe to blindly convert flags to str type.
+                # We do this conversion because we have lot of str type
+                # assertions in the code.
+                flags = set(flags.encode('ascii', errors='ignore'))
                 mtime = long(mtime)
+                # NOTE: remember that each labels elements might either be of
+                # type unicode or str.
+                # TODO: get clear knowledge from Google of what is the supported
+                # encoding. Then, we should fix the encoding of the cache file
+                # accordingly. If multiple encodings are supported or if we
+                # assert it can change over time, we must handle encoding
+                # conversions nicely.
                 labels = set([lb.strip() for lb in labels.split(',') if len(lb.strip()) > 0])
             except ValueError as e:
                 errstr = "Corrupt line '%s' in cache file '%s'" % \
@@ -120,9 +130,14 @@ class LocalStatusFolder(BaseFolder):
         # loop as many times as version, and update format
         for i in range(1, self.cur_version+1):
+            # NOTE: Due to Gmail labels written in this cache file, python might
+            # open() with a codec other than ascii.
             file = open(self.filename, "rt")
+            # It is safe to expect that the magic line contains only ascii
+            # characters. So, we can encode it to ascii to ensure we work with
+            # the expected str type.
+            line = file.readline().strip().encode('ascii', errors='ignore')
             self.messagelist = {}
-            line = file.readline().strip()
             # convert from format v1
             if line == (self.magicline % 1):
Nicolas Sebrecht

More information about the OfflineIMAP-project mailing list