[PATCH 3/4] introduce utils/uni.py module

Nicolas Sebrecht nicolas.s-dev at laposte.net
Wed Feb 11 15:09:27 GMT 2015


This module gets all unicode-related stuff.

Signed-off-by: Nicolas Sebrecht <nicolas.s-dev at laposte.net>
---
 offlineimap/utils/uni.py | 1125 ++++++++++++++++++++++++++++++++++++++++++++++
 uni-tests.py             |  737 ++++++++++++++++++++++++++++++
 2 files changed, 1862 insertions(+)
 create mode 100644 offlineimap/utils/uni.py
 create mode 100644 uni-tests.py

diff --git a/offlineimap/utils/uni.py b/offlineimap/utils/uni.py
new file mode 100644
index 0000000..6d9cb19
--- /dev/null
+++ b/offlineimap/utils/uni.py
@@ -0,0 +1,1125 @@
+# Copyright (C) 2015 Nicolas Sebrecht
+#
+#    This work is free. You can redistribute it and/or modify it under the
+#    terms of the Do What The Fuck You Want To Public License, Version 2,
+#    as published by Sam Hocevar. See http://www.wtfpl.net/ for more details.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+
+# Low-level functions to work with unicode.
+#
+# Do the best to prevent from double encoding/decoding. For this, we rely on
+# variables type.
+
+#
+# Unicode in OfflineIMAP is documented in API.rst.
+#
+
+import sys
+import shutil
+import os
+import logging
+from functools import total_ordering
+
+from .IMAPClient import imap_utf7
+
+#
+# TODO: move out, add CLI argument for -d.
+#
+_DEBUG = True
+_DEBUG = False
+if _DEBUG:
+    import inspect
+_DEBUG_OBJS = True    # For 'BaseString' objects. This is be VERY verbose.
+_DEBUG_OBJS = False   # For 'BaseString' objects. This is be VERY verbose.
+
+# Expected encoding to work with, outside specific requirements. This encoding
+# can be used as a transition encoding.
+# There are assumptions that the ASCII charset is a subset
+# of this encoding.
+# Default encoding must support ALL characters of Unicode.
+ENCODING = 'UTF-8'
+
+# Depends on LANG or LC_TYPE environment variables.
+FS_ENCODING = sys.getfilesystemencoding()
+
+# Standard legacy encoding is fixed. Mostly used to encode exception
+# messages.
+STD_ENCODING = 'ascii'
+
+
+#
+# Store context.
+#
+_use_unicode = None
+
+#
+# Exceptions.
+#
+class UniError(Exception):
+    """Raised on errors in this module. Just namespace."""
+    pass
+
+
+"""
+#############################################################
+
+# Helpers.
+
+#############################################################
+"""
+
+
+def _debug_info(msg=''):
+    if _DEBUG_OBJS:
+        print("DEBUG INFO"),
+        print(": %s"% msg)
+
+def _warn(msg):
+    logging.warn(u"UNICODE WARNING: %s"% msg)
+
+#
+# Don't be arrogant, for now. We will be stricter over time. ,-)
+#
+def _setter_helpers():
+    if use_unicode():
+        return [unicode]
+    else:
+        return [str]
+# Don't forget this might fail and raise an exception.
+def _assert(condition, friendly_funcs=[], data=None, stack_level=4, msg=''):
+    if condition:
+        return data
+    else:
+        msg = "Assertion error detected: %s"% (msg)
+        _warn(msg)
+        for friendly_func in friendly_funcs:
+            data = friendly_func(data)
+        return data
+
+
+#
+# XXX: might need more love to make it more eyes-friendly and easier to use.
+#
+def _format_exception_msg(original_msg, exception_msg=None, add_msg=[]):
+    """Format exception message to be raised with UniError.
+
+    Meant for internal use only (functions on this module).
+
+    Understanding Python Unicode exceptions might be hard. Improve basic
+    exception by adding surrounding informations about context.
+
+    :params: handles 3 kinds of informations passed as arguments:
+    - original_msg: message from the original Python Unicode exception;
+    - exception_msg: optional context message about the caller's context (from
+      outside this module);
+    - add_msg: additional optional message from the function in this module
+      where the Python Unicode exception is raised.
+
+    Third argument is a list of lines nicely printed."""
+
+    # str() call ensure correct casting from other types like int.
+    exception_msg = str(uni2std(exception_msg))
+
+    msg = "Module 'uni:"
+    msg += "\nException message\n  (from original exception): %s"% \
+        uni2std(original_msg)
+    if exception_msg:
+        msg += "\nException message"
+        for line in exception_msg.split('\n'):
+            msg += "\n  (from caller): %s"% uni2std(line)
+    if add_msg:
+        msg += "\nException message"
+        if type(add_msg) == list:
+            for line in add_msg:
+                msg += "\n  (from function in module uni): %s"% \
+                    uni2std(line)
+        else:
+            msg += uni2std(add_msg)
+    return msg
+
+
+def set_unicode_context(use_unicode):
+    global _use_unicode
+
+    if _use_unicode is not None:
+        # Set context once for all.
+        raise UniError("Trying to change of context!")
+    if use_unicode not in [True, False]:
+        raise UniError("unicode context must be True or False (got: %s)"%
+            use_unicode)
+
+    _debug_info("setting 'use unicode' context to: %s"% use_unicode)
+    _use_unicode = use_unicode
+
+def use_unicode():
+    global _use_unicode
+    if _use_unicode is None:
+        raise UniError("asking for a unicode job while context is not set")
+    return _use_unicode
+
+
+"""
+#############################################################
+
+# Actually work on encodings.
+
+# Preventing from double encoding/decoding is not an easy task. We try to avoid
+# that mess by relying on variable types.
+
+# Most functions here accept the unusual 'exception_msg' parameter, optional,
+# allowing better information on Unicode exceptions.
+
+#############################################################
+"""
+
+def isASCII(s, exception_msg=None):
+    if not isinstance(s, basestring):
+        raise UniError("isASCII cannot work on %s"% repr(s))
+    try:
+        s.encode('ascii')
+        return True
+    except:
+        if exception_msg is not None:
+            exception_msg += ", got '%s'"% uni2fs(s)
+            raise UniError(exception_msg)
+        return False
+
+
+def convert(direction, s, encoding, errors, exception_msg=''):
+    """Lowest level function of the module to encode/decode."""
+
+    if not isinstance(s, basestring):
+        raise UniError("cannot encode/decode on %s"% repr(s))
+    try:
+        if direction == 'uni2bytes': #and type(s) == unicode:
+            target = s.encode(encoding, errors=errors)
+        elif direction == 'bytes2uni': #and type(s) == str:
+            target = s.decode(encoding, errors=errors)
+        else:
+            target = s
+        return target
+    except (UnicodeDecodeError, UnicodeEncodeError):
+        eclass, ex, tb = sys.exc_info()
+        msg = _format_exception_msg(ex, uni2std(exception_msg), [
+            "direction=%s"% uni2std(direction),
+            "type(s): %s"% str(type(s)),
+            "de/encoding=%s"% uni2std(encoding),
+            "errors=%s"% uni2std(errors)])
+        raise UniError(msg), None, tb
+
+
+def uni2bytes(u, encoding=ENCODING, errors='strict', exception_msg=''):
+    """Wrapper to encode unicode types back to string of bytes.
+
+    The advantages are:
+    - tune the error raised to make it more user-friendly
+    - simplify/compact the code: do not require extra try/expect"""
+
+    return convert('uni2bytes', u, encoding, errors, exception_msg)
+
+
+def bytes2uni(b, encoding=ENCODING, errors='strict', exception_msg=''):
+    """Wrapper to decode a string of bytes to unicode."""
+
+    return convert('bytes2uni', b, encoding, errors, exception_msg)
+
+
+def uni2str(u, exception_msg=''):
+    """Convert unicode to str type the hard way.
+
+    Will raise an exception if the string has any character outside of the ASCII
+    subset of Unicode."""
+
+    try:
+        if type(u) == unicode:
+            target = str(u)
+        else:
+            target = u
+        return target
+    except (UnicodeDecodeError, UnicodeEncodeError):
+        eclass, ex, tb = sys.exc_info()
+        msg = _format_exception_msg(ex, uni2std(exception_msg), [
+            "unsupported character in input '%s'"% uni2std(u)])
+        raise UniError(msg), None, tb
+
+
+def uni2std(s, exception_msg=''):
+    """Convert string of bytes or unicode to a string of ASCII characters only.
+
+    Must always work without error so that it can safely be called whatever
+    Unicode support is enabled or not. Only takes exception_msg argument for
+    symmetry.
+
+    Usefull to encode text in exceptions if you're not sure encoding will work,
+    for example."""
+
+    # Handle string of bytes since it could already by encoded.
+    # In this case, we assume encoding to be ENCODING.
+    if type(s) == str:
+        try:
+            s = bytes2uni(s)
+        except UniError:
+            # This is the best we can do.
+            _warn("WARNING: a string could not be decoded from default "
+                "ENCODING to Unicode, ignoring")
+    return uni2bytes(s, encoding=STD_ENCODING, errors='replace')
+
+
+def fs2uni(s, errors='strict', exception_msg=''):
+    """Expected argument s is type str.
+
+    Returns s in Unicode from a filesystem encoded string of bytes
+    (type unicode)."""
+
+    try:
+        if type(s) == str:
+            target = bytes2uni(s, encoding=FS_ENCODING, errors=errors)
+        else:
+            target = s
+        return target
+    except (UnicodeDecodeError, UnicodeEncodeError):
+        eclass, ex, tb = sys.exc_info()
+        msg = _format_exception_msg(ex, uni2std(exception_msg), [
+            "unsupported character in '%s'"% uni2std(s)])
+        raise UniError(msg), None, tb
+
+
+def uni2fs(u, errors='strict', exception_msg=''):
+    """Expected argument u is unicode or str encoded with ENCODING encoding.
+
+    Returns u filesystem encoded (type str)."""
+
+    try:
+        if type(u) == unicode:
+            target = uni2bytes(u, encoding=FS_ENCODING, errors=errors)
+        else:
+            target = u
+        return target
+    except UnicodeEncodeError:
+        eclass, ex, tb = sys.exc_info()
+        msg = _format_exception_msg(ex, uni2std(exception_msg), [
+            "unsupported character in '%s'"% uni2std(u)])
+        raise UniError(msg), None, tb
+
+
+"""
+# IMAP modified UTF-7 charset has to be handled differently.
+#
+# IMAP charset is a charset allowing to encode non-ASCII characters with only
+# ASCII characters. Encoded characters are variable-length.  E.g. minus e-acute
+# is encoded to '&AOk-'.
+#
+# UTF-7 is NOT a Unicode standard but it is more efficient on the internet and
+# legacy compatible with the expectations of the server-side softwares running
+# Usenet, SMTP, etc.
+#
+# IMAP use a modified version of UTF-7. See http://tools.ietf.org/html/rfc2060
+#
+# Anyway, this pure-ASCII encoding means that the encoded string can either be
+# in bytes or unicode types in Python.
+#
+# Have fun! ,-)
+"""
+
+def imap2uni(b, exception_msg=None):
+    """Input may still be a unicode string.
+
+    Returned value is Unicode."""
+
+    try:
+        return imap_utf7.decode(b)
+    except Exception as e:
+        eclass, ex, tb = sys.exc_info()
+        msg = _format_exception_msg(ex, exception_msg, [
+            "unsupported character in input '%s'"% uni2std(b)])
+        raise UniError(msg), None, tb
+
+
+def uni2imap(u, exception_msg=None):
+    """Output is still a unicode string."""
+
+    try:
+        return imap_utf7.encode(u)
+    except Exception as e:
+        eclass, ex, tb = sys.exc_info()
+        msg = _format_exception_msg(ex, exception_msg, [
+            "unsupported character in input '%s'"% uni2std(u)])
+        raise UniError(msg), None, tb
+
+
+"""
+#############################################################
+
+# Core uni objects and factories.
+
+# Read the API documentation about the design.
+
+#############################################################
+"""
+
+#
+# BaseString (parent base object).
+#
+ at total_ordering # Implement all the methods for comparisons.
+class BaseString(object):
+    """Base Object for StrObject and UnicodeObject.
+
+    It's a composition for the unicode/str types. Encode and decode must be done
+    by childs. BaseString might coerce strings in order to get expected types to
+    not let Python do it itself at random times. This helps catching impossible
+    coercing as soon as possible.
+
+    Any attempt to get the bundled value will raise an UniError if not
+    previously set.
+
+    Special methods are supported as far as possible while until avoiding mixing
+    encodings is not possible. E.g. Any call to the implicit __str__ and
+    __unicode__ methods raise a UniError."""
+
+    def __init__(self, expected_type, value):
+        _debug_info(u"in BaseString:__init__")
+        self._expected_type = expected_type
+
+        # Avoid loop recursion, value setter expects None.
+        self.__value = None
+
+        # This line is using value setter...
+        self.value = value
+
+    def __repr__(self):
+        # Tune output a bit.
+        type_name = str(type(self)).split('.').pop().rstrip("'>")
+        if self.__value is None:
+            return "<%s (empty)>"% type_name
+        else:
+            try:
+                return "<%s %s>"% (type_name, repr(uni2fs(self.__value)))
+            except:
+                pass
+        # Just in case... we don't want to fail on a call to __repr__!
+        return "<%s %s>"% (type_name, repr(uni2std(self.__value)))
+
+    def __getattr__(self, name):
+        """Bind all undefined method calls to the value."""
+
+        _debug_info("in %s:__getattr__, asked for '%s'"% (repr(self), name))
+        # Can't remember why. Probably avoid loop recursion.
+        if name == '_expected_type':
+            raise AttributeError, name
+
+        # Python will fully namespace the attributes on some attempts to work on
+        # non-existing attributes from a child with such prefix prefix. Strip
+        # such prefix to fallback on asked BaseString attribute.
+        for child_attr_prefix in ['_UnicodeObject', '_StrObject']:
+            if name.startswith(child_attr_prefix):
+                # Remove the prefix.
+                name = name[len(child_attr_prefix):]
+                _debug_info("in %:__getattr__, stripped child "
+                    "prefix, now: %s"% (repr(self), name))
+        # Special case: a child is reading self.__value.
+        # Also, avoid loop recursion.
+        if name == '__value':
+            _debug_info("in %s:__getattr__, returning BaseString.value"%
+                repr(self))
+            name = 'value'
+
+        _debug_info("in %s:__getattr__, applying method %s to "
+            "BaseString.__value"% (repr(self), name))
+
+        def callable(*args, **kwargs):
+            """Re-bundle basestring objects into uni objects."""
+            retval = getattr(self.__value, name)(*args, **kwargs)
+            if isinstance(retval, basestring):
+                retval = valueString(retval)
+            return retval
+
+        return callable
+    #
+    # With new style objects, python bypass the __getattr__ method for special
+    # methods... They are implemented later.
+    #
+
+    #
+    # Forbid calls to these methods. They are used with wrong assumptions.
+    # Users should apply these methods if they know on what they are working on,
+    # so they should explicitly ask for getter.
+    #
+    def _raise_forbidden(self, attr, helper=''):
+        raise AttributeError("'%s' has implicit encoding usage, requested method "
+            "%s is forbidden%s"% (repr(self), attr, helper))
+    def __str__(self):
+        self._raise_forbidden('__str__')
+    def __unicode__(self):
+        self._raise_forbidden('__unicode__')
+    def __len__(self):
+        self._raise_forbidden('__len__', ", perhaps forgot the getter "
+            "in the statement")
+    #
+    # Making these special methods smart is a bit awkard. We want to allow
+    # implicit conversions while we do our best to support most unexpected types.
+    #
+    def __explicit_special(self, other, attr, op=None, reverse=None):
+        """Apply requested attribute."""
+
+        other_basestring_type=None
+        _debug_info("in %s, got %s request with %s"%
+            (repr(self), attr, repr(other)))
+
+        if isinstance(other, basestring):
+            try:
+                # This is not ideal and might fail; catch callers.
+                _warn("got unexpected type %s while applying '%s', "%
+                    (type(other), attr))
+
+                other_basestring_type = type(other)
+                # Other type should match our.
+                if self._expected_type != other_basestring_type:
+                    # Encode both sides directly to expected type. Basically
+                    # checks if both are plain ASCII to avoid mixing
+                    # incompatible encodings. We might want to improve
+                    # this check: if _expected_type is unicode, latin-1 encoded
+                    # strings should be allowed.
+                    uni2str(other, exception_msg=
+                        "asked to mix incompatible strings for uni object %s"
+                        ", other is not plain ASCII"% repr(other))
+                    uni2str(self.value, exception_msg=
+                        "asked to mix incompatible strings for uni object %s"
+                        ", we are not plain ASCII"% repr(self))
+                other = valueString(self._expected_type(other))
+            except (UnicodeDecodeError, UnicodeEncodeError):
+                # Prevent from mixing with implicit encoding in Unicode context.
+                if op is None:
+                    op = ''
+                else:
+                    op = ' (%s operator)'% op
+                raise AttributeError("'%s' is not of expected %s or is not plain "
+                    "ASCII, requested operation '%s'%s is "
+                    "forbidden when encoding is implicit"%
+                    (str(type(other)), self._expected_type, attr, op))
+        if isinstance(other, BaseString):
+            if reverse:
+                return other_basestring_type(
+                    getattr(other.value, reverse)(self.value))
+            retval = getattr(self.value, attr)(other.value)
+            if isinstance(retval, basestring):
+                return valueString(retval)
+            return retval
+
+        raise AttributeError, attr
+
+    # TODO: implement __i*__ special methods if it worth.
+    def __lt__(self, other):
+        return self.__explicit_special(other, '__lt__')
+    def __eq__(self, other):
+        # None requires special treatments.
+        if other is None:
+            if self.value is None:
+                return True
+            return self.value.__eq__(None)
+        return self.__explicit_special(other, '__eq__', '==')
+    def __add__(self, other):
+        return self.__explicit_special(other, '__add__', '+')
+    def __radd__(self, other):
+        return self.__explicit_special(other, '__radd__', reverse='__add__')
+    def __mul__(self, other):
+        return self.__explicit_special(other, '__mul__')
+    def __rmul__(self, other):
+        return self.__explicit_special(other, '__rmul__', reverse='__mul__')
+    def __mod__(self, other):
+        return self.__explicit_special(other, '__mod__', '%')
+    def __rmod__(self, other):
+        return self.__explicit_special(other, '__rmod__', '%', reverse='__mod__')
+    def __contains__(self, other):
+        return self.__explicit_special(other, '__contains__', "'in'")
+    #
+    # Blindly apply following special methods to the embedded value is fine.
+    #
+    def __iter__(self):
+        _debug_info("in %s:__iter__"% repr(self))
+        return iter(self.value)
+    def __format__(self, *args, **kwargs):
+        _debug_info("in %s:__format__"% repr(self))
+        return valueString(self.value.format(*args, **kwargs))
+    def __getitem__(self, key):
+        _debug_info("in %s:__getitem__"% repr(self))
+        return valueString(self.value.__getitem__(key))
+    # __eq__() is defined on self.value.
+    def __hash__(self):
+        _debug_info("in %s:__hash__"% repr(self))
+        return self.value.__hash__()
+    def __nonzero__(self):
+        _debug_info("in %s:__nonzero__"% repr(self))
+        if self.value is None:
+            return False
+        return self.value.__len__()
+    #
+    # Not a special method but it is so common.
+    #
+    def split(self, *args, **kwargs):
+        """Implement the split() method. Return a list of uni objects."""
+
+        # FIXME: implement support of uni objects as splitter.
+        return [valueString(obj) for obj in self.value.split(*args, **kwargs)]
+
+    #
+    # Factorized helpers.
+    #
+    def _prevent_double_composing(self, value):
+        if isinstance(value, BaseString):
+            _warn("preventing from double composing, got %s"% (repr(value)))
+            return value.value
+        else:
+            return value
+    def _raise_if_empty(self):
+        if self.__value is None:
+            raise UniError("asked for a value while empty")
+    def _coerce_string(self, value, where):
+        try:
+            return _assert(self._expected_type == type(value),
+                _setter_helpers(), value, stack_level=4, msg=
+                "%s, value has unexpected type %s"% (where, type(value)))
+        except Exception as e:
+            raise UniError("in %svalue(setter), helper failed to 'cast' "
+                "value %s to %s:\n %s"%
+                (repr(self), repr(value), self._expected_type, str(e))), \
+                None, sys.exc_info()[2]
+
+    #
+    # Implement common properties.
+    #
+    @property
+    def value(self):
+        _debug_info("in %s:value (getter)"% repr(self))
+        return self.__value
+
+    @value.setter
+    def value(self, value):
+        info = "in %s:value (setter)"% repr(self)
+        _debug_info(info)
+        value = self._prevent_double_composing(value)
+        # Sanity check.
+        if value == None:
+            self.__value = None
+            return
+        if not isinstance(value, basestring):
+            raise UniError("a uni object setter requires a basestring "
+                "instance or None to work properly, got %s"% repr(value))
+        value = self._coerce_string(value, info)
+        self.__value = value
+
+#
+# UnicodeObject (object used in unicode context).
+#
+class UnicodeObject(BaseString):
+    """High level object used with Unicode support enabled.
+
+    The correct encoded string relies on the context and it's easy to mix
+    encodings in practice. We avoid wrong expectations by requiring explicit
+    encoding through the accessors.
+
+    To prevent from most basic errors on writes, there are (gentle) type
+    assertions on setters.
+
+    The encode/decode operations are done at the first read/get access from the
+    bundled/external value. The requested encoding is immediately cached.
+
+    See BaseString for more details on low internals."""
+
+    def __init__(self, value=None):
+        # This object is broken if the context is wrong.
+        if not _DEBUG_OBJS and not use_unicode():
+            raise UniError("cannot instanciate UnicodeObject in this context")
+        _debug_info(u"in UnicodeObject:__init__")
+        BaseString.__init__(self, unicode, value)
+
+        self._empty_caches()
+
+    def _empty_caches(self):
+        self._dbytes = None
+        self._imap = None
+        self._fs = None
+        self._std = None
+
+    #
+    # Factorize getters.
+    #
+    def _getter(self, name, encode_funcs):
+        info = "in %s:%s (getter)"% (repr(self), name)
+        _debug_info(info)
+        cache = '_%s'% name
+        # Use cache.
+        if self.__dict__[cache]:
+            _debug_info("%s, returning cached value"% info)
+            return self.__dict__[cache]
+        # Need to set the cache.
+        if self.value is None:
+            self.__dict__[cache] = None
+            return self.__dict__[cache]
+        # Cache string for further use.
+        value = self.value
+        _debug_info("%s, encoding"% info)
+        #XXX do not default to encode_func!
+        for encode_func in encode_funcs:
+            value = encode_func(value, exception_msg=
+                "%s, cannot convert bundled string"% info)
+        self.__dict__[cache] = value
+        return self.__dict__[cache]
+    #
+    # Factorize setters.
+    #
+    def _setter(self, name, decode_funcs, value, assert_type):
+        info = "in %s:%s (setter)"% (repr(self), name)
+        _debug_info(info)
+        cache = '_%s'% name
+        self._empty_caches()
+        if value is None:
+            self.__dict__[name] = None
+            return
+        if isinstance(value, BaseString):
+            _warn("preventing from double composing, got %s"% repr(value))
+            value = value.value
+        self.__dict__[cache] = value # Cache given string for further use.
+        for func in decode_funcs:
+            value = func(value, exception_msg=
+                "%s, cannot convert bundled string"% info)
+        value = self._coerce_string(value, info)
+        # Store Unicode string.
+        self.value = value
+
+    @property
+    def uni(self):
+        _debug_info(u"in UnicodeObject:uni (getter)")
+        return self.value
+
+    @property
+    def dbytes(self):
+        return self._getter('dbytes', [uni2bytes])
+
+    @property
+    def imap(self):
+        return self._getter('imap', [uni2imap, uni2str])
+
+    @property
+    def fs(self):
+        return self._getter('fs', [uni2fs])
+
+    @property
+    def std(self):
+        return self._getter('std', [uni2std])
+
+    @uni.setter
+    def uni(self, u):
+        return self._setter('uni', [], u, unicode)
+
+    @dbytes.setter
+    def dbytes(self, s):
+        return self._setter('dbytes', [bytes2uni], s, str)
+
+    @imap.setter
+    def imap(self, s):
+        return self._setter('imap', [imap2uni], s, str)
+
+    @fs.setter
+    def fs(self, s):
+        return self._setter('fs', [fs2uni], s, str)
+
+    @std.setter
+    def std(self, s):
+        _debug_info(u"in %s:std (setter)"% repr(self))
+        raise UniError("std is non conventionnal encoding,\n it's not "
+            "possible to recover the original string because the "
+            "encode operation is destructive.")
+
+#
+# StrObject (object used in str legacy context).
+#
+class StrObject(BaseString):
+    """High level object in legacy context.
+
+    Semantic match UnicodeObject. See BaseString and UnicodeObject for more
+    details."""
+
+    def __init__(self, value=None):
+        # This object is broken if the context is wrong. Instances are usefull
+        # for debugging, though.
+        if use_unicode():
+            raise UniError("cannot instanciate StrObject in this context")
+        _debug_info(u"in StrObject:__init__")
+        BaseString.__init__(self, str, value)
+
+    @property
+    def uni(self):
+        _debug_info(u"in %s:uni (getter)"% repr(self))
+        return self.value
+
+    # Not a true encoding, here for ease of use.
+    @property
+    def std(self):
+        _debug_info(u"in %s:std (getter)"% repr(self))
+        # Don't cache this one. It would be too much code complexity for too few
+        # use cases. Getting std values more than once is unlikely because it's
+        # mainly intended for exception message.
+        return uni2std(self.value)
+
+    @uni.setter
+    def uni(self, u):
+        _debug_info(u"in %s:uni (setter)"% repr(self))
+        if type(u) == unicode:
+            # Raise if unicode is not plain ASCII.
+            u = uni2str(u, exception_msg=
+                "StrObject:uni (setter) could not convert string from "
+                "unicode to str the hard way, provided string is not full "
+                "ASCII")
+        self.value = u
+
+    # Not a true encoding, here for ease of use.
+    @std.setter
+    def std(self, s):
+        _debug_info(u"in %s:std (setter)"% repr(self))
+        raise UniError("std is non conventionnal encoding, it's not "
+            "possible to recover the original string because the "
+            "encode operation is destructive.")
+
+    # Aliasing expected methods where we can: StrObject methods must match
+    # UnicodeObject to be used in a context-free way. It's only possible if
+    # the setter gets a str value.
+    imap = BaseString.value
+    fs = BaseString.value
+    dbytes = BaseString.value
+
+
+#
+# RawObject
+#
+class RawObject(object):
+    """Raw bundler.
+
+    This object is a bit special. Currently, StrObject and UniObject can cast
+    types internally in order to be friendly with current code base. Also, used
+    for variable needing to *forbid* any attempt to encode/decode.
+
+    RawObject ensure no such thing will happen. The semantic is intentionally
+    broken to ensure no encoding/decoding is badly assumed while using this
+    object.
+
+    In short, this helps not taking a simple variable for a uni object while
+    forcing about bad encoding expectations. The intend of the developer is made
+    clear and explicit: "not forgot to use an uni object and zero
+    encodings/coercing on it".
+
+    This is usefull in very rare cases like password handling."""
+
+    def __init__(self, value=None):
+        _debug_info(u"in RawObject:__init__")
+        self.__value = None
+
+    def __getattr__(self, attr, *args, **kwargs):
+        return getattr(self.__value, attr)(*args, **kwargs)
+
+    @property
+    def raw(self):
+        return self.__value
+
+    @raw.setter
+    def raw(self, value):
+        self.__value = value
+
+#
+# _String factory (uniq entry point whatever context).
+#
+def _String(value=None):
+    """Main object to instanciate objects matching the context.
+
+    Returns a UnicodeObject if Unicode support is enabled, otherwise returns a
+    StrObject.
+
+    Both StrObject and UnicodeObject semantics match each other. This allows the
+    same usage without requiring to know the context and without having to care
+    about what we work with."""
+
+    _debug_info("in _String")
+
+    if use_unicode():
+        inst = UnicodeObject(value)
+    else:
+        inst = StrObject(value)
+    return inst
+
+# TODO: implement function to check identical semantics.
+
+"""
+#############################################################
+
+# Public factories.
+
+# Returns an uni String object to work with strings.
+
+# The argument encoding must match the prefix of the function name. This is how
+# we make the encoding explicit when taking a string.
+
+#############################################################
+"""
+
+def noneString():
+    """Returns an empty 'String' object."""
+
+    _debug_info("in noneString factory")
+    return _String()
+
+def valueString(s):
+    """Returns 'String' object from unicode encoded strings."""
+
+    _debug_info("in valueString factory for %s"% repr(s))
+    stringobj = _String()
+    stringobj.value = s
+    return stringobj
+
+def uniString(s):
+    """Returns 'String' object from unicode encoded strings."""
+
+    _debug_info("in uniString factory for %s"% repr(s))
+    stringobj = _String()
+    stringobj.uni = s
+    return stringobj
+
+def dbytesString(s):
+    """Read as "from default encoded string of bytes to String object".
+
+    Returns 'String' object from default encoded strings."""
+
+    _debug_info("in dbytesString factory for %s"% repr(s))
+    stringobj = _String()
+    stringobj.dbytes = s
+    return stringobj
+
+def imapString(s):
+    """Returns 'String' object from imap encoded strings (type str or
+    unicode)."""
+
+    _debug_info("in imapString factory for %s"% repr(s))
+    stringobj = _String()
+    stringobj.imap = s
+    return stringobj
+
+def fsString(s):
+    """Returns 'String' object from filesystem encoded strings."""
+
+    _debug_info("in fsString factory for %s"% repr(s))
+    stringobj = _String()
+    stringobj.fs = s
+    return stringobj
+
+
+"""
+#############################################################
+
+# Factorized stuff.
+
+# From here, add the factorized functions/classes. They are usefull for us only
+# when dealing with Unicode, that's why they stand in this module.
+
+# On the other hand, they are not purely related to Unicode in the sense that
+# they suppose knowledge of OfflineIMAP logic. No other kind of software would
+# make use of them. That's why they are considered as outside fonctions
+# regarding the exception_msg handling point of view of this module.
+
+#############################################################
+"""
+
+class UnicodeFormatter(logging.Formatter):
+    """Wrap logging.Formatter to handle Unicode.
+
+    We have to to this because each Handler handles Unicode in its own way.
+    Some handlers might do not handle Unicode at all.
+    On top of that, encoding vary with the Handler."""
+
+    def __init__(self, fmt, datefmt=None, encode_function=None):
+        logging.Formatter.__init__(self, fmt, datefmt)
+        self.encode_function = encode_function
+
+    def format(self, record):
+        """Give precedence to the method defined encode_function argument over
+        the class attribute."""
+
+        result = logging.Formatter.format(self, record)
+        if self.encode_function:
+            if _DEBUG and type(record.msg) == str:
+                infos = inspect.stack()[9]
+                _warn(u"logger '%s' called with str type in: %s:%s\n"
+                    "  in function %s() \"%s\""% (record.name,
+                    record.pathname, record.lineno, infos[3], record.msg))
+            result = self.encode_function(result)
+        return result
+
+
+def diverged_foldernames(foldername):
+    """Compare foldernames between the expected and unexpected encodings.
+
+    Support of Unicode for foldernames has to do more than just encoding strings
+    right. If previously run without Unicode support, the folder might exist on
+    disk with the wrong encoding.
+
+    Above statement is also true for the opposite: if currently running with
+    Unicode support disabled while previously run with it enabled.
+
+    :param foldername: uni object.
+        It MUST be the basename to avoid mixing encodings with the dirname part.
+
+    Returns 3 values:
+    - True if fodernames diverged, False otherwise (bool)
+    - string with the unexpected encoding (bytes)
+    - string with the expected encoding (bytes)
+    """
+
+    # Work on (str) UTF-8 decoded strings to compare results.
+
+    assert type(foldername) == type(noneString()), (
+        "diverged_foldernames: expects uni object argument, got %s"%
+        type(foldername))
+    foldername = foldername.fs
+
+    if use_unicode():
+        assert type(foldername) == str
+        expected = foldername
+        # If unicode support were disabled we would have work with a ASCII
+        # string of bytes encoded with IMAP UTF-7.  Bundled foldername is
+        # currently standard Unicode code points.
+        #
+        # Encoding chain to redress from current Unicode is:
+        #   filesystem -> Unicode -> IMAP -> str
+        unexpected = uni2str(uni2imap(fs2uni(foldername)))
+    else:
+        assert type(foldername) == str
+        # Currently, bundled foldername is an ASCII string of bytes encoded with
+        # IMAP UTF-7.
+        expected = foldername
+        # If unicode support were enabled we would have worked with a filesystem
+        # encoded string of bytes from the Unicode of the IMAP _decoded_ string
+        # points). Again, bundled foldername is currently an ASCII string of
+        # bytes encoded with IMAP UTF-7.
+        #
+        # Encoding chain to redress from current IMAP encoded string is:
+        #   IMAP -> Unicode -> filesystem
+        unexpected = uni2fs(imap2uni(foldername))
+
+    diverged = ( expected != unexpected )
+
+    if diverged and _DEBUG:
+        _warn(u"diverged_foldernames: got: %s"% repr(foldername))
+        _warn(u"diverged_foldernames: unexpected: %s"% fs2uni(unexpected))
+        _warn(u"diverged_foldernames: expected: %s"% fs2uni(expected))
+
+    return diverged, unexpected, expected
+
+
+def rename_diverged(root, old, new):
+    """Move old to new (both full path in bytes).
+
+    We require root to avoid mixing encodings.
+
+    :param:
+    - root: the dirname (as opposed to basename) (uni object)
+    - old: old filename (uni object)
+    - new: new filename (uni object)
+    """
+
+    assert diverged_foldernames(root)[0] == False, ("won't rename a "
+        "folder if dirname is diverging")
+    assert type(old) == str
+    assert type(new) == str
+
+    fs_old = os.path.sep.join([root, old])
+    fs_new = os.path.sep.join([root, new])
+
+    try:
+        if _DEBUG:
+            _warn(u"rename_diverged (old): %s"% fsString(fs_old).uni)
+            _warn(u"rename_diverged (new): %s"% fsString(fs_new).uni)
+        shutil.move(fs_old, fs_new)
+        return True
+    except IOError as e:
+        if e.errno == 2:
+            if _DEBUG:
+                _warn(u"rename_diverged: not renaming folder '%s'"%
+                    fsString(root).uni)
+            return False
+        else:
+            raise
+
+
+def help_message():
+    print("""
+Welcome to the Unicode world with OfflineIMAP. :-)
+
+Unicode is still an EXPERIMENTAL feature. Toying with it is very welcome because
+I can't test all possible options but you're advised to make good backups of
+both your mails and the cache. I aim to make Unicode the default but it won't
+happen without your help. So, here is a good way to play with this new feature.
+
+Some configuration options support UTF-8, some not. First, check the
+'offlineimap.conf' coming with your version for details. The very last WIP
+version (standing in the "next" branch) can be found online at
+
+ https://github.com/OfflineIMAP/offlineimap/blob/next/offlineimap.conf
+
+but it might not match your local version of OfflineIMAP.
+
+Do keep your current configuration file intact. The best approach is to copy
+your 'offlineimaprc' to 'offlineimaprc.utf-8' and update the latter with UTF-8
+in mind.  Then the correct configuration file can be set (with the -c CLI
+option), according to the unicode CLI option you use.
+
+It's a good thing to also copy the content of your current 'metadata' and
+'localfolders'. Then, you'll have free hands to play on the copy (don't forget
+to update the paths in your 'offlineimaprc.utf8' accordingly).
+
+Working on a copy does not mean you should bypass the backups steps. Something
+might go very bad and delete all your mails from the server. Make REGULARY
+backups.
+
+Now that you are warned, I can tell you: the true option is --enable-unicode.
+Please, keep the existence of this option for you (don't communicate it to
+others) so that new comers will fall on this warning message, too.
+
+Not afraid? Good, I need you!
+
+Python 2 is not really consistent when it comes to Unicode and I expect unicode
+to come with subtle bugs. Subtle bugs require meticulous bug reports. This is
+not something hard to do, it just asks to be a bit rigorous. If you have to
+report bugs, follow the procedure at
+
+ https://github.com/OfflineIMAP/offlineimap/wiki/Unicode:-Reporting-bugs-about-Unicode-issues
+
+I intend to REJECT all the bug reports not following this procedure. I'm not a
+strong guy. I'd just like to keep both your life and mine as easy as possible
+while communicating about such bugs. I'm providing you all the ressources you
+might need to do so. It asked me a significant amount of time. Please, take the
+10 minutes to read the doc and follow the steps!
+
+I'm also requesting for POSITIVE feedbacks. For them to be usefull, read the
+link page above. Positive feedbacks will help to know when it will be suitable
+to turn Unicode support from EXPERIMENTAL to TESTING, remove this message, and
+finally make it the default.
+
+Last but not least, as soon as Unicode is used once it might not be possible to
+come back to --no-unicode safely (e.g. if any Unicode character was written to
+the cache). I've tried hard to make it not happen but I can't be categorical.
+This is code. Well, you actually took my advices into account and made a copy of
+your mails, metadata and configuration... Good! You're not concerned by this
+issue anymore.
+
+If you intend to hack on Unicode, you should read both the API documentation and
+the utils/uni.py module. Last online versions can be found here:
+
+ http://docs.offlineimap.org/en/latest/API.html
+ https://github.com/OfflineIMAP/offlineimap/blob/next/offlineimap/utils/uni.py
+
+
+Have fun!
+
+--
+Nicolas Sebrecht """)
diff --git a/uni-tests.py b/uni-tests.py
new file mode 100644
index 0000000..7c7623e
--- /dev/null
+++ b/uni-tests.py
@@ -0,0 +1,737 @@
+# Copyright (C) 2015 Nicolas Sebrecht
+#
+#    This work is free. You can redistribute it and/or modify it under the
+#    terms of the Do What The Fuck You Want To Public License, Version 2,
+#    as published by Sam Hocevar. See http://www.wtfpl.net/ for more details.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+
+import sys
+import traceback
+
+from offlineimap.utils import uni
+
+_DEBUG_OBJS = False
+_DEBUG_OBJS = True
+
+_STOP_ON_FAILURE = False
+_STOP_ON_FAILURE = True
+
+USE_UNICODE = False
+USE_UNICODE = True
+
+# Avoid mixed output.
+sys.stderr = sys.stdout
+
+def output(msg=''):
+    print(msg)
+
+class ExceptionExpected(Exception):
+    pass
+
+class TestMalformed(Exception):
+    pass
+
+
+class UnitTestEnv(object):
+    pass
+
+#
+# UnitTest
+#
+class UnitTest(object):
+    no_result = 'no result expected for this UnitTest'
+
+    def __init__(self, test_func, title, expected_result, debug_objs):
+        self.test_func = test_func
+        self.title = title
+        self.expected_result = expected_result
+        self.debug_objs = debug_objs
+
+    def run(self):
+        # Initialise test environment for uni module.
+        uni._DEBUG_OBJS = self.debug_objs
+        # Run the test.
+        try:
+            retval = self.test_func()
+        except Exception as e:
+            if self.expected_result == ExceptionExpected:
+                raise ExceptionExpected("%s %s"% (str(type(e)), e))
+            raise Exception(e), None, sys.exc_info()[2]
+
+        # Restore uni module environment.
+        uni._DEBUG_OBJS = False
+        # Return test result.
+        return retval
+
+#
+# TestSuites
+#
+class TestSuites(object):
+    def __init__(self, enable_unicode=True):
+        self._unicode = enable_unicode
+
+        self._tests = []
+        self._win = '--- Test %i: PASSED ---'
+        self._lost = '--- Test %i: FAILED!'
+        self._magic_flag = 'magic 123 unexpected 456 flag'
+        self._failed_test = []
+
+        # All about test being run.
+        self._count = 0
+        self._unittest = None
+        self._errors = self._magic_flag
+        self._expected = self._magic_flag
+        self._result = self._magic_flag
+        self._finished = False
+
+    def context(self):
+        msg = "(context: "
+        if self._unicode:
+            return msg + 'Unicode is enabled)'
+        return msg + 'Unicode is disabled)'
+
+    def restore_env(self):
+        self._unittest = None
+        self._errors = self._magic_flag
+        self._expected = self._magic_flag
+        self._result = self._magic_flag
+        self._stacks = None
+        self._finished = False
+
+    def print_start_banner(self, title=''):
+        output("\n--- Starting test %i: --- (%s)"% (self._count, title))
+
+    def print_end_banner(self):
+        output("--- Finished test %i: ---"% self._count)
+
+    def print_results(self):
+        # What we got.
+        if self._result != self._magic_flag:
+            output("Result  : %s"% repr(self._result))
+
+        # What we expected.
+        if self._expected != self._magic_flag:
+            # Did we expect a result at all?
+            if self._expected != UnitTest.no_result:
+                output("Expected: %s"% repr(self._expected))
+            else:
+                output("expecting no result")
+
+    def test_succeed(self, msg=None):
+        if msg:
+            output(msg)
+        # results
+        self.print_results()
+        # win banner
+        output(self._win % self._count)
+        self._finished = True
+
+    def test_failed(self, msg=None):
+        banner = self._lost % self._count
+        banner = "%s (%s)"% (banner, self._unittest.title)
+
+        self._failed_test.append(banner)
+
+        if msg:
+            output(msg)
+        # results
+        self.print_results()
+        # print traceback
+        if self._stack:
+            output(self._stack)
+        output(self.context())
+        # lost banner
+        output(banner)
+        self._finished = True
+        if _STOP_ON_FAILURE:
+            raise Exception('test failed')
+
+    def add_test(self, test_func, title=None,
+        expected_result=UnitTest.no_result,
+        debug_objs=_DEBUG_OBJS):
+        """Add a test to the suites."""
+
+        unittest = UnitTest(test_func, title, expected_result, debug_objs)
+        self._tests.append(unittest)
+
+    def run(self):
+        """Run the test suites."""
+
+        for unittest in self._tests:
+            self._count += 1    # Starting new test.
+            self.restore_env()
+            self.print_start_banner(unittest.title)
+
+            try:
+                # Initialise test unit.
+                try:
+                    self._unittest = unittest
+                    self._expected = self._unittest.expected_result
+                except Exception:
+                    self._stack = traceback.format_exc()
+                    raise TestMalformed("cound not initialise test "), \
+                        None, sys.exc_info()[2]
+
+                # Actually run the test.
+                self._result = self._unittest.run()
+
+                self._errors = False
+                self._stack = None
+            except TestMalformed as e:
+                raise   # Might want to handle this, later.
+                # Test is malformed.
+                self._errors = True
+                # Fix results.
+                self._result = self._magic_flag
+                self._expected = self._magic_flag
+            except ExceptionExpected as e:
+                # Test raised exception as expected.
+                self._errors = False
+                self._stack = str(e)
+                # Fix results.
+                self._result = self._magic_flag
+                self._expected = ExceptionExpected
+            except Exception as e:
+                # Test failed.
+                self._errors = True
+                self._stack = traceback.format_exc()
+
+            finally:
+                self.print_end_banner()
+
+                # Did we expect an exception?
+                if self._expected == ExceptionExpected:
+                    # Fix this to not get it printed later.
+                    self._expected = self._magic_flag
+                    self.test_succeed(
+                        "! EXPECTED EXCEPTION, got '%s'"% self._stack)
+
+                if not self._finished:
+                    # Did we expected a result?
+                    if self._unittest.expected_result == UnitTest.no_result:
+                        self.test_succeed()
+
+                if not self._finished:
+                    # Did test failed during the run?
+                    if self._errors:
+                        self.test_failed()
+
+                if not self._finished:
+                    # Are results corrects?
+                    if self._result == self._expected:
+                        if type(self._result) == type(self._expected):
+                            self.test_succeed()
+                    else:
+                        # Results diverge!
+                        self.test_failed()
+
+                if not self._finished:
+                    assert False, 'should never have reach this point!'
+
+
+        # All tests have run. Print final results.
+        print('\n')
+        if len(self._failed_test) < 1:
+            output("All (%s) tests passed!"% self._count)
+        else:
+            output("Tests FAILED:")
+        for failed in self._failed_test:
+            output("%s"% failed)
+        print('\n')
+
+##############
+#
+# The tests.
+#
+##############
+
+# Make debugging of uni module easier.
+def ___(tag=None):
+    if uni._DEBUG_OBJS:
+        if tag is not None:
+            print(tag)
+        else:
+            print
+
+# Define environment.
+class Env(UnitTestEnv):
+    def __init__(self):
+        if USE_UNICODE:
+            self.x = u'x'
+            self.y = u'y'
+            self.e = u'\xe9' # minus e-acute
+            self.e_imap = str(uni.uni2imap(self.e))
+            self.e_dbytes = uni.uni2fs(self.e)
+            self.e_fs = uni.uni2fs(self.e)
+            self.substitution = u"substitution %s"
+            self.unexpected_encoded = unicode(self.e)
+            self.wrong_obj = uni.StrObject
+            self.e_diverging = self.e_fs
+            self.e_unexpected = uni.uni2fs(uni.uni2imap(self.e))
+        else:
+            self.x = 'x'
+            self.y = 'y'
+            self.e = 'e'
+            self.e_uni = u'\xe9' # minus e-acute
+            self.e_imap = self.e
+            self.e_dbytes = self.e
+            self.e_fs = self.e
+            self.substitution = "substitution %s"
+            self.unexpected_encoded = unicode(self.e_uni)
+            self.wrong_obj = uni.UnicodeObject
+            self.e_diverging = str(uni.uni2imap(self.e_uni))
+            self.e_unexpected = uni.uni2fs(self.e_uni)
+
+env = Env()
+suites = TestSuites()
+
+# Erase function using logging.
+def uni_test_warn(msg):
+    output("UNI WARN: %s"% msg)
+uni._warn = uni_test_warn
+
+def context():
+    uni.set_unicode_context(USE_UNICODE)
+
+suites.add_test(context,
+    "set context twice --- uni.use_unicode(USE_UNICODE)",
+    ExceptionExpected)
+
+def convert():
+    env = Env()
+    uni.convert('bytes2uni', env.unexpected_encoded,
+        uni.ENCODING, 'strict')
+suites.add_test(convert,
+    "convert strict with double encoded character",
+    ExceptionExpected)
+
+def uni2bytes():
+    env = Env()
+    uni.uni2bytes(env.unexpected_encoded)
+suites.add_test(uni2bytes,
+    "uni2bytes strict with double encoded character",
+    ExceptionExpected)
+
+def bytes2uni():
+    env = Env()
+    uni.bytes2uni(env.unexpected_encoded)
+suites.add_test(bytes2uni,
+    "bytes2uni strict with double encoded character",
+    ExceptionExpected)
+
+def uni2str():
+    env = Env()
+    uni.uni2str(env.unexpected_encoded)
+suites.add_test(uni2str,
+    "uni2str strict with double encoded character",
+    ExceptionExpected)
+
+def uni2std():
+    env = Env()
+    return uni.uni2std(env.unexpected_encoded)
+suites.add_test(uni2std,
+    "uni2std strict with double encoded character",
+    '?')
+
+def fs2uni():
+    env = Env()
+    uni.uni2str(env.unexpected_encoded)
+suites.add_test(fs2uni,
+    "fs2uni strict with double encoded character",
+    ExceptionExpected)
+
+def uni2fs():
+    env = Env()
+    uni.uni2fs(env.unexpected_encoded)
+suites.add_test(uni2fs,
+    "uni2fs strict with double encoded character",
+    ExceptionExpected)
+
+def isASCII():
+    env = Env()
+    return uni.isASCII(env.unexpected_encoded)
+suites.add_test(isASCII,
+    "isASCII, simple",
+    False)
+
+def isASCII_raise():
+    env = Env()
+    return uni.isASCII(env.unexpected_encoded, exception_msg='raise')
+suites.add_test(isASCII_raise,
+    "isASCII, must raise exception",
+    ExceptionExpected)
+
+def imap2uni():
+    env = Env()
+    return uni.imap2uni(env.unexpected_encoded)
+suites.add_test(imap2uni,
+    "imap2uni with double encoded character",
+    ExceptionExpected)
+
+def uni2imap():
+    env = Env()
+    return uni.uni2imap(env.unexpected_encoded)
+suites.add_test(uni2imap,
+    "uni2imap with double encoded character",
+    ExceptionExpected)
+
+def repr_BaseString():
+    env = Env()
+    repr(uni.BaseString(unicode, env.unexpected_encoded))
+suites.add_test(repr_BaseString,
+    "repr_String with double encoded character",)
+
+def replace_BaseString():
+    env = Env()
+    s = uni.BaseString(unicode, env.unexpected_encoded)
+    return s.replace('e', '')
+suites.add_test(replace_BaseString,
+    "repr_String with double encoded character",
+    ExceptionExpected)
+
+
+def string_factory():
+    env = Env()
+
+    if USE_UNICODE:
+        expected_type = uni.UnicodeObject
+    else:
+        expected_type = uni.StrObject
+    s = uni._String(env.x)
+    return type(s) == expected_type
+suites.add_test(string_factory,
+    "uni._String factory",
+    True)
+
+def value_getter():
+    env = Env()
+    return uni._String(env.e).value
+suites.add_test(value_getter,
+    "value getter",
+    env.e)
+
+def value_setter():
+    env = Env()
+    return uni._String(env.e).value
+suites.add_test(value_setter,
+    "value setter",
+    env.e)
+
+def none_factory():
+    return uni.noneString().value
+suites.add_test(none_factory,
+    "noneString factory",
+    None)
+
+def uni_getter_setter():
+    env = Env()
+    s = uni._String(env.e)
+    ___()
+    s.uni = env.e
+    ___()
+    s = s.uni
+    return s
+suites.add_test(uni_getter_setter,
+    "uni (setter, getter)",
+    env.e)
+
+def uni_factory():
+    return uni.uniString(env.e).uni
+suites.add_test(uni_factory,
+    "uni factory",
+    env.e)
+
+def value_factory():
+    env = Env()
+    return uni.valueString(env.e).value
+suites.add_test(value_factory,
+    "value factory",
+    env.e)
+
+def dbytes_factory():
+    env = Env()
+    return uni.dbytesString(env.e_dbytes).dbytes
+suites.add_test(dbytes_factory,
+    "dbytes factory",
+    env.e_dbytes)
+
+def imap_factory():
+    env = Env()
+    return uni.imapString(env.e_imap).imap
+suites.add_test(imap_factory,
+    "imap factory",
+    env.e_imap)
+
+def fs_factory():
+    env = Env()
+    return uni.fsString(env.e_fs).fs
+suites.add_test(fs_factory,
+    "fs factory",
+    env.e_fs)
+
+def erase():
+    env = Env()
+    s = uni.fsString(env.e_fs)
+    s.uni = env.x
+    return s.uni
+suites.add_test(erase,
+    "erase",
+    env.x)
+
+def substitution():
+    env = Env()
+    s = uni.uniString(env.substitution)
+    e = uni.imapString(env.e_imap)
+    s = s % e
+    return s.uni
+suites.add_test(substitution,
+    "substitution",
+    env.substitution % env.e)
+
+def substitution_string():
+    env = Env()
+    return (uni.uniString(env.substitution) % env.e_imap).uni
+suites.add_test(substitution_string,
+    "substitution with string",
+    env.substitution % env.e_imap)
+
+def concatenation():
+    env = Env()
+    one = uni.uniString(env.e)
+    two = uni.imapString(env.e_imap)
+    concat = one + two
+    return concat.uni
+suites.add_test(concatenation,
+    "string concatenation",
+    env.e + env.e)
+
+def concatenation_string():
+    env = Env()
+    one = uni.uniString(env.e)
+    two = env.e
+    concat = one + two
+    return concat.uni
+suites.add_test(concatenation_string,
+    "string concatenation with string",
+    env.e + env.e)
+
+def concatenation_string_mix():
+    env = Env()
+    one = uni.uniString(env.e)
+    two = env.e_imap
+    concat = one + two
+    return concat.uni
+suites.add_test(concatenation_string_mix,
+    "string concatenation with incompatible string",
+    ExceptionExpected)
+
+def concatenation_string_left():
+    env = Env()
+    one = uni.uniString(env.e)
+    two = env.e
+    concat = two + one
+    return concat
+suites.add_test(concatenation_string_left,
+    "string concatenation with string at left",
+    env.e + env.e)
+
+def double_composing():
+    env = Env()
+    return uni.uniString(uni.uniString(env.e)).uni
+suites.add_test(double_composing,
+    "double composing",
+    env.e)
+
+def in_iterable():
+    env = Env()
+    e = uni.uniString(env.e)
+    x = uni.uniString(env.x)
+    return e in [e, x]
+suites.add_test(in_iterable,
+    "in iterable",
+    True)
+
+def equal():
+    env = Env()
+    e = uni.uniString(env.e)
+    x = uni.uniString(env.e)
+    return e == x
+suites.add_test(equal,
+    "equal comparison",
+    True)
+
+def not_equal():
+    env = Env()
+    e = uni.uniString(env.e)
+    x = uni.uniString(env.x)
+    return e != x
+suites.add_test(not_equal,
+    "not equal comparison",
+    True)
+
+def not_equal_f():
+    env = Env()
+    def a():
+        return uni.uniString(env.e)
+    x = uni.uniString(env.x)
+    return a() != x
+suites.add_test(not_equal,
+    "not equal comparison",
+    True)
+
+def lesser_than():
+    env = Env()
+    y = uni.uniString(env.y)
+    x = uni.uniString(env.x)
+    return x < y
+suites.add_test(lesser_than,
+    "lesser than comparison",
+    True)
+
+def sort_uni_obj_list():
+    # Mix sort creation to avoid comparing objects id.
+    env = Env()
+    y = uni.uniString(env.y)
+    x = uni.uniString(env.x)
+    i = uni.uniString(env.x)
+    j = uni.uniString(env.y)
+    return sorted([y, x]) == [i, j]
+suites.add_test(sort_uni_obj_list,
+    "sort list of uni objects (somewhat duplicata)",
+    True)
+
+def in_dict_keys():
+    env = Env()
+    e = uni.uniString(env.e)
+    x = uni.uniString(env.x)
+    return e in {x: 1, e: 2}
+suites.add_test(in_dict_keys,
+    "uni object in dict with uni object keys (somewhat duplicata)",
+    True)
+
+def split():
+    env = Env()
+    e = uni.uniString(env.e)
+    x = uni.uniString(env.x)
+    exe = e + x + e
+    # WARNING: objects in list must be uni objects, hence this little bit more
+    # complex test.
+    return exe.split(x.uni)[1].uni
+suites.add_test(split,
+    "split with string",
+    env.e)
+
+def rawobject():
+    env = Env()
+    raw = uni.RawObject(env.x)
+    raw.value = env.e
+    return raw.value
+suites.add_test(rawobject,
+    "RawObject.value, getter and setter",
+    env.e)
+
+def wrong_obj():
+    env = Env()
+    env.wrong_obj(env.x)
+suites.add_test(wrong_obj,
+    "instanciate wrong object",
+    ExceptionExpected,
+    debug_objs=False)   # Required for the correct exception.
+
+def unexpected_type_setter():
+    env = Env()
+    s = uni.uniString(Env())
+suites.add_test(unexpected_type_setter,
+    "unexpected type for value setter",
+    ExceptionExpected)
+
+def std_setter():
+    env = Env()
+    s = uni.uniString(env.e)
+    s.std = env.e
+suites.add_test(std_setter,
+    "use std setter",
+    ExceptionExpected)
+
+def diverging_folders():
+    env = Env()
+    return uni.diverged_foldernames(uni.fsString(env.e_diverging))
+suites.add_test(diverging_folders,
+    "diverging foldernames",
+    (True, env.e_unexpected, env.e_diverging))
+
+def non_diverging_folders():
+    env = Env()
+    return uni.diverged_foldernames(uni.uniString(env.x))
+suites.add_test(non_diverging_folders,
+    "non diverging foldernames",
+    (False, env.x, env.x))
+
+def root_diverging_rename_diverged():
+    env = Env()
+    return uni.rename_diverged(
+        uni.uniString(env.e_unexpected), env.x, env.x)
+suites.add_test(root_diverging_rename_diverged,
+    "root of rename is diverging",
+    ExceptionExpected)
+
+def nonzero_nonzero():
+    env = Env()
+    if uni.uniString(env.e):
+        return True
+    return False
+suites.add_test(nonzero_nonzero,
+    "nonzero on non zero value ('if uni_obj:' pattern)",
+    True)
+
+def nonzero_emptystring():
+    env = Env()
+    if uni.uniString(''):
+        return True
+    return False
+suites.add_test(nonzero_emptystring,
+    "nonzero on empty string ('if uni_obj:' pattern)",
+    False)
+
+def nonzero_None():
+    env = Env()
+    if uni.noneString():
+        return True
+    return False
+suites.add_test(nonzero_None,
+    "nonzero on None value ('if uni_obj:' pattern)",
+    False)
+
+def equal_None():
+    env = Env()
+    if uni.noneString() == None:
+        return True
+    return False
+suites.add_test(equal_None,
+    "equal None value ('if uni_obj == None' pattern)",
+    True)
+
+def equal_None_string():
+    env = Env()
+    if uni.uniString(env.e) == None:
+        return True
+    return False
+suites.add_test(equal_None_string,
+    "equal None on bundled non-empty string ('if uni_obj == None' pattern)",
+    False)
+
+def equal_None_empty_string():
+    env = Env()
+    if uni.uniString('') == None:
+        return True
+    return False
+suites.add_test(equal_None_empty_string,
+    "equal None on bundled empty string ('if uni_obj == None' pattern)",
+    False)
+
+suites.run()
+
-- 
2.2.2





More information about the OfflineIMAP-project mailing list