[PATCH 3/4] introduce utils/uni.py module
Nicolas Sebrecht
nicolas.s-dev at laposte.net
Wed Feb 11 15:09:27 GMT 2015
This module gets all unicode-related stuff.
Signed-off-by: Nicolas Sebrecht <nicolas.s-dev at laposte.net>
---
offlineimap/utils/uni.py | 1125 ++++++++++++++++++++++++++++++++++++++++++++++
uni-tests.py | 737 ++++++++++++++++++++++++++++++
2 files changed, 1862 insertions(+)
create mode 100644 offlineimap/utils/uni.py
create mode 100644 uni-tests.py
diff --git a/offlineimap/utils/uni.py b/offlineimap/utils/uni.py
new file mode 100644
index 0000000..6d9cb19
--- /dev/null
+++ b/offlineimap/utils/uni.py
@@ -0,0 +1,1125 @@
+# Copyright (C) 2015 Nicolas Sebrecht
+#
+# This work is free. You can redistribute it and/or modify it under the
+# terms of the Do What The Fuck You Want To Public License, Version 2,
+# as published by Sam Hocevar. See http://www.wtfpl.net/ for more details.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# Low-level functions to work with unicode.
+#
+# Do the best to prevent from double encoding/decoding. For this, we rely on
+# variables type.
+
+#
+# Unicode in OfflineIMAP is documented in API.rst.
+#
+
+import sys
+import shutil
+import os
+import logging
+from functools import total_ordering
+
+from .IMAPClient import imap_utf7
+
+#
+# TODO: move out, add CLI argument for -d.
+#
+_DEBUG = True
+_DEBUG = False
+if _DEBUG:
+ import inspect
+_DEBUG_OBJS = True # For 'BaseString' objects. This is be VERY verbose.
+_DEBUG_OBJS = False # For 'BaseString' objects. This is be VERY verbose.
+
+# Expected encoding to work with, outside specific requirements. This encoding
+# can be used as a transition encoding.
+# There are assumptions that the ASCII charset is a subset
+# of this encoding.
+# Default encoding must support ALL characters of Unicode.
+ENCODING = 'UTF-8'
+
+# Depends on LANG or LC_TYPE environment variables.
+FS_ENCODING = sys.getfilesystemencoding()
+
+# Standard legacy encoding is fixed. Mostly used to encode exception
+# messages.
+STD_ENCODING = 'ascii'
+
+
+#
+# Store context.
+#
+_use_unicode = None
+
+#
+# Exceptions.
+#
+class UniError(Exception):
+ """Raised on errors in this module. Just namespace."""
+ pass
+
+
+"""
+#############################################################
+
+# Helpers.
+
+#############################################################
+"""
+
+
+def _debug_info(msg=''):
+ if _DEBUG_OBJS:
+ print("DEBUG INFO"),
+ print(": %s"% msg)
+
+def _warn(msg):
+ logging.warn(u"UNICODE WARNING: %s"% msg)
+
+#
+# Don't be arrogant, for now. We will be stricter over time. ,-)
+#
+def _setter_helpers():
+ if use_unicode():
+ return [unicode]
+ else:
+ return [str]
+# Don't forget this might fail and raise an exception.
+def _assert(condition, friendly_funcs=[], data=None, stack_level=4, msg=''):
+ if condition:
+ return data
+ else:
+ msg = "Assertion error detected: %s"% (msg)
+ _warn(msg)
+ for friendly_func in friendly_funcs:
+ data = friendly_func(data)
+ return data
+
+
+#
+# XXX: might need more love to make it more eyes-friendly and easier to use.
+#
+def _format_exception_msg(original_msg, exception_msg=None, add_msg=[]):
+ """Format exception message to be raised with UniError.
+
+ Meant for internal use only (functions on this module).
+
+ Understanding Python Unicode exceptions might be hard. Improve basic
+ exception by adding surrounding informations about context.
+
+ :params: handles 3 kinds of informations passed as arguments:
+ - original_msg: message from the original Python Unicode exception;
+ - exception_msg: optional context message about the caller's context (from
+ outside this module);
+ - add_msg: additional optional message from the function in this module
+ where the Python Unicode exception is raised.
+
+ Third argument is a list of lines nicely printed."""
+
+ # str() call ensure correct casting from other types like int.
+ exception_msg = str(uni2std(exception_msg))
+
+ msg = "Module 'uni:"
+ msg += "\nException message\n (from original exception): %s"% \
+ uni2std(original_msg)
+ if exception_msg:
+ msg += "\nException message"
+ for line in exception_msg.split('\n'):
+ msg += "\n (from caller): %s"% uni2std(line)
+ if add_msg:
+ msg += "\nException message"
+ if type(add_msg) == list:
+ for line in add_msg:
+ msg += "\n (from function in module uni): %s"% \
+ uni2std(line)
+ else:
+ msg += uni2std(add_msg)
+ return msg
+
+
+def set_unicode_context(use_unicode):
+ global _use_unicode
+
+ if _use_unicode is not None:
+ # Set context once for all.
+ raise UniError("Trying to change of context!")
+ if use_unicode not in [True, False]:
+ raise UniError("unicode context must be True or False (got: %s)"%
+ use_unicode)
+
+ _debug_info("setting 'use unicode' context to: %s"% use_unicode)
+ _use_unicode = use_unicode
+
+def use_unicode():
+ global _use_unicode
+ if _use_unicode is None:
+ raise UniError("asking for a unicode job while context is not set")
+ return _use_unicode
+
+
+"""
+#############################################################
+
+# Actually work on encodings.
+
+# Preventing from double encoding/decoding is not an easy task. We try to avoid
+# that mess by relying on variable types.
+
+# Most functions here accept the unusual 'exception_msg' parameter, optional,
+# allowing better information on Unicode exceptions.
+
+#############################################################
+"""
+
+def isASCII(s, exception_msg=None):
+ if not isinstance(s, basestring):
+ raise UniError("isASCII cannot work on %s"% repr(s))
+ try:
+ s.encode('ascii')
+ return True
+ except:
+ if exception_msg is not None:
+ exception_msg += ", got '%s'"% uni2fs(s)
+ raise UniError(exception_msg)
+ return False
+
+
+def convert(direction, s, encoding, errors, exception_msg=''):
+ """Lowest level function of the module to encode/decode."""
+
+ if not isinstance(s, basestring):
+ raise UniError("cannot encode/decode on %s"% repr(s))
+ try:
+ if direction == 'uni2bytes': #and type(s) == unicode:
+ target = s.encode(encoding, errors=errors)
+ elif direction == 'bytes2uni': #and type(s) == str:
+ target = s.decode(encoding, errors=errors)
+ else:
+ target = s
+ return target
+ except (UnicodeDecodeError, UnicodeEncodeError):
+ eclass, ex, tb = sys.exc_info()
+ msg = _format_exception_msg(ex, uni2std(exception_msg), [
+ "direction=%s"% uni2std(direction),
+ "type(s): %s"% str(type(s)),
+ "de/encoding=%s"% uni2std(encoding),
+ "errors=%s"% uni2std(errors)])
+ raise UniError(msg), None, tb
+
+
+def uni2bytes(u, encoding=ENCODING, errors='strict', exception_msg=''):
+ """Wrapper to encode unicode types back to string of bytes.
+
+ The advantages are:
+ - tune the error raised to make it more user-friendly
+ - simplify/compact the code: do not require extra try/expect"""
+
+ return convert('uni2bytes', u, encoding, errors, exception_msg)
+
+
+def bytes2uni(b, encoding=ENCODING, errors='strict', exception_msg=''):
+ """Wrapper to decode a string of bytes to unicode."""
+
+ return convert('bytes2uni', b, encoding, errors, exception_msg)
+
+
+def uni2str(u, exception_msg=''):
+ """Convert unicode to str type the hard way.
+
+ Will raise an exception if the string has any character outside of the ASCII
+ subset of Unicode."""
+
+ try:
+ if type(u) == unicode:
+ target = str(u)
+ else:
+ target = u
+ return target
+ except (UnicodeDecodeError, UnicodeEncodeError):
+ eclass, ex, tb = sys.exc_info()
+ msg = _format_exception_msg(ex, uni2std(exception_msg), [
+ "unsupported character in input '%s'"% uni2std(u)])
+ raise UniError(msg), None, tb
+
+
+def uni2std(s, exception_msg=''):
+ """Convert string of bytes or unicode to a string of ASCII characters only.
+
+ Must always work without error so that it can safely be called whatever
+ Unicode support is enabled or not. Only takes exception_msg argument for
+ symmetry.
+
+ Usefull to encode text in exceptions if you're not sure encoding will work,
+ for example."""
+
+ # Handle string of bytes since it could already by encoded.
+ # In this case, we assume encoding to be ENCODING.
+ if type(s) == str:
+ try:
+ s = bytes2uni(s)
+ except UniError:
+ # This is the best we can do.
+ _warn("WARNING: a string could not be decoded from default "
+ "ENCODING to Unicode, ignoring")
+ return uni2bytes(s, encoding=STD_ENCODING, errors='replace')
+
+
+def fs2uni(s, errors='strict', exception_msg=''):
+ """Expected argument s is type str.
+
+ Returns s in Unicode from a filesystem encoded string of bytes
+ (type unicode)."""
+
+ try:
+ if type(s) == str:
+ target = bytes2uni(s, encoding=FS_ENCODING, errors=errors)
+ else:
+ target = s
+ return target
+ except (UnicodeDecodeError, UnicodeEncodeError):
+ eclass, ex, tb = sys.exc_info()
+ msg = _format_exception_msg(ex, uni2std(exception_msg), [
+ "unsupported character in '%s'"% uni2std(s)])
+ raise UniError(msg), None, tb
+
+
+def uni2fs(u, errors='strict', exception_msg=''):
+ """Expected argument u is unicode or str encoded with ENCODING encoding.
+
+ Returns u filesystem encoded (type str)."""
+
+ try:
+ if type(u) == unicode:
+ target = uni2bytes(u, encoding=FS_ENCODING, errors=errors)
+ else:
+ target = u
+ return target
+ except UnicodeEncodeError:
+ eclass, ex, tb = sys.exc_info()
+ msg = _format_exception_msg(ex, uni2std(exception_msg), [
+ "unsupported character in '%s'"% uni2std(u)])
+ raise UniError(msg), None, tb
+
+
+"""
+# IMAP modified UTF-7 charset has to be handled differently.
+#
+# IMAP charset is a charset allowing to encode non-ASCII characters with only
+# ASCII characters. Encoded characters are variable-length. E.g. minus e-acute
+# is encoded to '&AOk-'.
+#
+# UTF-7 is NOT a Unicode standard but it is more efficient on the internet and
+# legacy compatible with the expectations of the server-side softwares running
+# Usenet, SMTP, etc.
+#
+# IMAP use a modified version of UTF-7. See http://tools.ietf.org/html/rfc2060
+#
+# Anyway, this pure-ASCII encoding means that the encoded string can either be
+# in bytes or unicode types in Python.
+#
+# Have fun! ,-)
+"""
+
+def imap2uni(b, exception_msg=None):
+ """Input may still be a unicode string.
+
+ Returned value is Unicode."""
+
+ try:
+ return imap_utf7.decode(b)
+ except Exception as e:
+ eclass, ex, tb = sys.exc_info()
+ msg = _format_exception_msg(ex, exception_msg, [
+ "unsupported character in input '%s'"% uni2std(b)])
+ raise UniError(msg), None, tb
+
+
+def uni2imap(u, exception_msg=None):
+ """Output is still a unicode string."""
+
+ try:
+ return imap_utf7.encode(u)
+ except Exception as e:
+ eclass, ex, tb = sys.exc_info()
+ msg = _format_exception_msg(ex, exception_msg, [
+ "unsupported character in input '%s'"% uni2std(u)])
+ raise UniError(msg), None, tb
+
+
+"""
+#############################################################
+
+# Core uni objects and factories.
+
+# Read the API documentation about the design.
+
+#############################################################
+"""
+
+#
+# BaseString (parent base object).
+#
+ at total_ordering # Implement all the methods for comparisons.
+class BaseString(object):
+ """Base Object for StrObject and UnicodeObject.
+
+ It's a composition for the unicode/str types. Encode and decode must be done
+ by childs. BaseString might coerce strings in order to get expected types to
+ not let Python do it itself at random times. This helps catching impossible
+ coercing as soon as possible.
+
+ Any attempt to get the bundled value will raise an UniError if not
+ previously set.
+
+ Special methods are supported as far as possible while until avoiding mixing
+ encodings is not possible. E.g. Any call to the implicit __str__ and
+ __unicode__ methods raise a UniError."""
+
+ def __init__(self, expected_type, value):
+ _debug_info(u"in BaseString:__init__")
+ self._expected_type = expected_type
+
+ # Avoid loop recursion, value setter expects None.
+ self.__value = None
+
+ # This line is using value setter...
+ self.value = value
+
+ def __repr__(self):
+ # Tune output a bit.
+ type_name = str(type(self)).split('.').pop().rstrip("'>")
+ if self.__value is None:
+ return "<%s (empty)>"% type_name
+ else:
+ try:
+ return "<%s %s>"% (type_name, repr(uni2fs(self.__value)))
+ except:
+ pass
+ # Just in case... we don't want to fail on a call to __repr__!
+ return "<%s %s>"% (type_name, repr(uni2std(self.__value)))
+
+ def __getattr__(self, name):
+ """Bind all undefined method calls to the value."""
+
+ _debug_info("in %s:__getattr__, asked for '%s'"% (repr(self), name))
+ # Can't remember why. Probably avoid loop recursion.
+ if name == '_expected_type':
+ raise AttributeError, name
+
+ # Python will fully namespace the attributes on some attempts to work on
+ # non-existing attributes from a child with such prefix prefix. Strip
+ # such prefix to fallback on asked BaseString attribute.
+ for child_attr_prefix in ['_UnicodeObject', '_StrObject']:
+ if name.startswith(child_attr_prefix):
+ # Remove the prefix.
+ name = name[len(child_attr_prefix):]
+ _debug_info("in %:__getattr__, stripped child "
+ "prefix, now: %s"% (repr(self), name))
+ # Special case: a child is reading self.__value.
+ # Also, avoid loop recursion.
+ if name == '__value':
+ _debug_info("in %s:__getattr__, returning BaseString.value"%
+ repr(self))
+ name = 'value'
+
+ _debug_info("in %s:__getattr__, applying method %s to "
+ "BaseString.__value"% (repr(self), name))
+
+ def callable(*args, **kwargs):
+ """Re-bundle basestring objects into uni objects."""
+ retval = getattr(self.__value, name)(*args, **kwargs)
+ if isinstance(retval, basestring):
+ retval = valueString(retval)
+ return retval
+
+ return callable
+ #
+ # With new style objects, python bypass the __getattr__ method for special
+ # methods... They are implemented later.
+ #
+
+ #
+ # Forbid calls to these methods. They are used with wrong assumptions.
+ # Users should apply these methods if they know on what they are working on,
+ # so they should explicitly ask for getter.
+ #
+ def _raise_forbidden(self, attr, helper=''):
+ raise AttributeError("'%s' has implicit encoding usage, requested method "
+ "%s is forbidden%s"% (repr(self), attr, helper))
+ def __str__(self):
+ self._raise_forbidden('__str__')
+ def __unicode__(self):
+ self._raise_forbidden('__unicode__')
+ def __len__(self):
+ self._raise_forbidden('__len__', ", perhaps forgot the getter "
+ "in the statement")
+ #
+ # Making these special methods smart is a bit awkard. We want to allow
+ # implicit conversions while we do our best to support most unexpected types.
+ #
+ def __explicit_special(self, other, attr, op=None, reverse=None):
+ """Apply requested attribute."""
+
+ other_basestring_type=None
+ _debug_info("in %s, got %s request with %s"%
+ (repr(self), attr, repr(other)))
+
+ if isinstance(other, basestring):
+ try:
+ # This is not ideal and might fail; catch callers.
+ _warn("got unexpected type %s while applying '%s', "%
+ (type(other), attr))
+
+ other_basestring_type = type(other)
+ # Other type should match our.
+ if self._expected_type != other_basestring_type:
+ # Encode both sides directly to expected type. Basically
+ # checks if both are plain ASCII to avoid mixing
+ # incompatible encodings. We might want to improve
+ # this check: if _expected_type is unicode, latin-1 encoded
+ # strings should be allowed.
+ uni2str(other, exception_msg=
+ "asked to mix incompatible strings for uni object %s"
+ ", other is not plain ASCII"% repr(other))
+ uni2str(self.value, exception_msg=
+ "asked to mix incompatible strings for uni object %s"
+ ", we are not plain ASCII"% repr(self))
+ other = valueString(self._expected_type(other))
+ except (UnicodeDecodeError, UnicodeEncodeError):
+ # Prevent from mixing with implicit encoding in Unicode context.
+ if op is None:
+ op = ''
+ else:
+ op = ' (%s operator)'% op
+ raise AttributeError("'%s' is not of expected %s or is not plain "
+ "ASCII, requested operation '%s'%s is "
+ "forbidden when encoding is implicit"%
+ (str(type(other)), self._expected_type, attr, op))
+ if isinstance(other, BaseString):
+ if reverse:
+ return other_basestring_type(
+ getattr(other.value, reverse)(self.value))
+ retval = getattr(self.value, attr)(other.value)
+ if isinstance(retval, basestring):
+ return valueString(retval)
+ return retval
+
+ raise AttributeError, attr
+
+ # TODO: implement __i*__ special methods if it worth.
+ def __lt__(self, other):
+ return self.__explicit_special(other, '__lt__')
+ def __eq__(self, other):
+ # None requires special treatments.
+ if other is None:
+ if self.value is None:
+ return True
+ return self.value.__eq__(None)
+ return self.__explicit_special(other, '__eq__', '==')
+ def __add__(self, other):
+ return self.__explicit_special(other, '__add__', '+')
+ def __radd__(self, other):
+ return self.__explicit_special(other, '__radd__', reverse='__add__')
+ def __mul__(self, other):
+ return self.__explicit_special(other, '__mul__')
+ def __rmul__(self, other):
+ return self.__explicit_special(other, '__rmul__', reverse='__mul__')
+ def __mod__(self, other):
+ return self.__explicit_special(other, '__mod__', '%')
+ def __rmod__(self, other):
+ return self.__explicit_special(other, '__rmod__', '%', reverse='__mod__')
+ def __contains__(self, other):
+ return self.__explicit_special(other, '__contains__', "'in'")
+ #
+ # Blindly apply following special methods to the embedded value is fine.
+ #
+ def __iter__(self):
+ _debug_info("in %s:__iter__"% repr(self))
+ return iter(self.value)
+ def __format__(self, *args, **kwargs):
+ _debug_info("in %s:__format__"% repr(self))
+ return valueString(self.value.format(*args, **kwargs))
+ def __getitem__(self, key):
+ _debug_info("in %s:__getitem__"% repr(self))
+ return valueString(self.value.__getitem__(key))
+ # __eq__() is defined on self.value.
+ def __hash__(self):
+ _debug_info("in %s:__hash__"% repr(self))
+ return self.value.__hash__()
+ def __nonzero__(self):
+ _debug_info("in %s:__nonzero__"% repr(self))
+ if self.value is None:
+ return False
+ return self.value.__len__()
+ #
+ # Not a special method but it is so common.
+ #
+ def split(self, *args, **kwargs):
+ """Implement the split() method. Return a list of uni objects."""
+
+ # FIXME: implement support of uni objects as splitter.
+ return [valueString(obj) for obj in self.value.split(*args, **kwargs)]
+
+ #
+ # Factorized helpers.
+ #
+ def _prevent_double_composing(self, value):
+ if isinstance(value, BaseString):
+ _warn("preventing from double composing, got %s"% (repr(value)))
+ return value.value
+ else:
+ return value
+ def _raise_if_empty(self):
+ if self.__value is None:
+ raise UniError("asked for a value while empty")
+ def _coerce_string(self, value, where):
+ try:
+ return _assert(self._expected_type == type(value),
+ _setter_helpers(), value, stack_level=4, msg=
+ "%s, value has unexpected type %s"% (where, type(value)))
+ except Exception as e:
+ raise UniError("in %svalue(setter), helper failed to 'cast' "
+ "value %s to %s:\n %s"%
+ (repr(self), repr(value), self._expected_type, str(e))), \
+ None, sys.exc_info()[2]
+
+ #
+ # Implement common properties.
+ #
+ @property
+ def value(self):
+ _debug_info("in %s:value (getter)"% repr(self))
+ return self.__value
+
+ @value.setter
+ def value(self, value):
+ info = "in %s:value (setter)"% repr(self)
+ _debug_info(info)
+ value = self._prevent_double_composing(value)
+ # Sanity check.
+ if value == None:
+ self.__value = None
+ return
+ if not isinstance(value, basestring):
+ raise UniError("a uni object setter requires a basestring "
+ "instance or None to work properly, got %s"% repr(value))
+ value = self._coerce_string(value, info)
+ self.__value = value
+
+#
+# UnicodeObject (object used in unicode context).
+#
+class UnicodeObject(BaseString):
+ """High level object used with Unicode support enabled.
+
+ The correct encoded string relies on the context and it's easy to mix
+ encodings in practice. We avoid wrong expectations by requiring explicit
+ encoding through the accessors.
+
+ To prevent from most basic errors on writes, there are (gentle) type
+ assertions on setters.
+
+ The encode/decode operations are done at the first read/get access from the
+ bundled/external value. The requested encoding is immediately cached.
+
+ See BaseString for more details on low internals."""
+
+ def __init__(self, value=None):
+ # This object is broken if the context is wrong.
+ if not _DEBUG_OBJS and not use_unicode():
+ raise UniError("cannot instanciate UnicodeObject in this context")
+ _debug_info(u"in UnicodeObject:__init__")
+ BaseString.__init__(self, unicode, value)
+
+ self._empty_caches()
+
+ def _empty_caches(self):
+ self._dbytes = None
+ self._imap = None
+ self._fs = None
+ self._std = None
+
+ #
+ # Factorize getters.
+ #
+ def _getter(self, name, encode_funcs):
+ info = "in %s:%s (getter)"% (repr(self), name)
+ _debug_info(info)
+ cache = '_%s'% name
+ # Use cache.
+ if self.__dict__[cache]:
+ _debug_info("%s, returning cached value"% info)
+ return self.__dict__[cache]
+ # Need to set the cache.
+ if self.value is None:
+ self.__dict__[cache] = None
+ return self.__dict__[cache]
+ # Cache string for further use.
+ value = self.value
+ _debug_info("%s, encoding"% info)
+ #XXX do not default to encode_func!
+ for encode_func in encode_funcs:
+ value = encode_func(value, exception_msg=
+ "%s, cannot convert bundled string"% info)
+ self.__dict__[cache] = value
+ return self.__dict__[cache]
+ #
+ # Factorize setters.
+ #
+ def _setter(self, name, decode_funcs, value, assert_type):
+ info = "in %s:%s (setter)"% (repr(self), name)
+ _debug_info(info)
+ cache = '_%s'% name
+ self._empty_caches()
+ if value is None:
+ self.__dict__[name] = None
+ return
+ if isinstance(value, BaseString):
+ _warn("preventing from double composing, got %s"% repr(value))
+ value = value.value
+ self.__dict__[cache] = value # Cache given string for further use.
+ for func in decode_funcs:
+ value = func(value, exception_msg=
+ "%s, cannot convert bundled string"% info)
+ value = self._coerce_string(value, info)
+ # Store Unicode string.
+ self.value = value
+
+ @property
+ def uni(self):
+ _debug_info(u"in UnicodeObject:uni (getter)")
+ return self.value
+
+ @property
+ def dbytes(self):
+ return self._getter('dbytes', [uni2bytes])
+
+ @property
+ def imap(self):
+ return self._getter('imap', [uni2imap, uni2str])
+
+ @property
+ def fs(self):
+ return self._getter('fs', [uni2fs])
+
+ @property
+ def std(self):
+ return self._getter('std', [uni2std])
+
+ @uni.setter
+ def uni(self, u):
+ return self._setter('uni', [], u, unicode)
+
+ @dbytes.setter
+ def dbytes(self, s):
+ return self._setter('dbytes', [bytes2uni], s, str)
+
+ @imap.setter
+ def imap(self, s):
+ return self._setter('imap', [imap2uni], s, str)
+
+ @fs.setter
+ def fs(self, s):
+ return self._setter('fs', [fs2uni], s, str)
+
+ @std.setter
+ def std(self, s):
+ _debug_info(u"in %s:std (setter)"% repr(self))
+ raise UniError("std is non conventionnal encoding,\n it's not "
+ "possible to recover the original string because the "
+ "encode operation is destructive.")
+
+#
+# StrObject (object used in str legacy context).
+#
+class StrObject(BaseString):
+ """High level object in legacy context.
+
+ Semantic match UnicodeObject. See BaseString and UnicodeObject for more
+ details."""
+
+ def __init__(self, value=None):
+ # This object is broken if the context is wrong. Instances are usefull
+ # for debugging, though.
+ if use_unicode():
+ raise UniError("cannot instanciate StrObject in this context")
+ _debug_info(u"in StrObject:__init__")
+ BaseString.__init__(self, str, value)
+
+ @property
+ def uni(self):
+ _debug_info(u"in %s:uni (getter)"% repr(self))
+ return self.value
+
+ # Not a true encoding, here for ease of use.
+ @property
+ def std(self):
+ _debug_info(u"in %s:std (getter)"% repr(self))
+ # Don't cache this one. It would be too much code complexity for too few
+ # use cases. Getting std values more than once is unlikely because it's
+ # mainly intended for exception message.
+ return uni2std(self.value)
+
+ @uni.setter
+ def uni(self, u):
+ _debug_info(u"in %s:uni (setter)"% repr(self))
+ if type(u) == unicode:
+ # Raise if unicode is not plain ASCII.
+ u = uni2str(u, exception_msg=
+ "StrObject:uni (setter) could not convert string from "
+ "unicode to str the hard way, provided string is not full "
+ "ASCII")
+ self.value = u
+
+ # Not a true encoding, here for ease of use.
+ @std.setter
+ def std(self, s):
+ _debug_info(u"in %s:std (setter)"% repr(self))
+ raise UniError("std is non conventionnal encoding, it's not "
+ "possible to recover the original string because the "
+ "encode operation is destructive.")
+
+ # Aliasing expected methods where we can: StrObject methods must match
+ # UnicodeObject to be used in a context-free way. It's only possible if
+ # the setter gets a str value.
+ imap = BaseString.value
+ fs = BaseString.value
+ dbytes = BaseString.value
+
+
+#
+# RawObject
+#
+class RawObject(object):
+ """Raw bundler.
+
+ This object is a bit special. Currently, StrObject and UniObject can cast
+ types internally in order to be friendly with current code base. Also, used
+ for variable needing to *forbid* any attempt to encode/decode.
+
+ RawObject ensure no such thing will happen. The semantic is intentionally
+ broken to ensure no encoding/decoding is badly assumed while using this
+ object.
+
+ In short, this helps not taking a simple variable for a uni object while
+ forcing about bad encoding expectations. The intend of the developer is made
+ clear and explicit: "not forgot to use an uni object and zero
+ encodings/coercing on it".
+
+ This is usefull in very rare cases like password handling."""
+
+ def __init__(self, value=None):
+ _debug_info(u"in RawObject:__init__")
+ self.__value = None
+
+ def __getattr__(self, attr, *args, **kwargs):
+ return getattr(self.__value, attr)(*args, **kwargs)
+
+ @property
+ def raw(self):
+ return self.__value
+
+ @raw.setter
+ def raw(self, value):
+ self.__value = value
+
+#
+# _String factory (uniq entry point whatever context).
+#
+def _String(value=None):
+ """Main object to instanciate objects matching the context.
+
+ Returns a UnicodeObject if Unicode support is enabled, otherwise returns a
+ StrObject.
+
+ Both StrObject and UnicodeObject semantics match each other. This allows the
+ same usage without requiring to know the context and without having to care
+ about what we work with."""
+
+ _debug_info("in _String")
+
+ if use_unicode():
+ inst = UnicodeObject(value)
+ else:
+ inst = StrObject(value)
+ return inst
+
+# TODO: implement function to check identical semantics.
+
+"""
+#############################################################
+
+# Public factories.
+
+# Returns an uni String object to work with strings.
+
+# The argument encoding must match the prefix of the function name. This is how
+# we make the encoding explicit when taking a string.
+
+#############################################################
+"""
+
+def noneString():
+ """Returns an empty 'String' object."""
+
+ _debug_info("in noneString factory")
+ return _String()
+
+def valueString(s):
+ """Returns 'String' object from unicode encoded strings."""
+
+ _debug_info("in valueString factory for %s"% repr(s))
+ stringobj = _String()
+ stringobj.value = s
+ return stringobj
+
+def uniString(s):
+ """Returns 'String' object from unicode encoded strings."""
+
+ _debug_info("in uniString factory for %s"% repr(s))
+ stringobj = _String()
+ stringobj.uni = s
+ return stringobj
+
+def dbytesString(s):
+ """Read as "from default encoded string of bytes to String object".
+
+ Returns 'String' object from default encoded strings."""
+
+ _debug_info("in dbytesString factory for %s"% repr(s))
+ stringobj = _String()
+ stringobj.dbytes = s
+ return stringobj
+
+def imapString(s):
+ """Returns 'String' object from imap encoded strings (type str or
+ unicode)."""
+
+ _debug_info("in imapString factory for %s"% repr(s))
+ stringobj = _String()
+ stringobj.imap = s
+ return stringobj
+
+def fsString(s):
+ """Returns 'String' object from filesystem encoded strings."""
+
+ _debug_info("in fsString factory for %s"% repr(s))
+ stringobj = _String()
+ stringobj.fs = s
+ return stringobj
+
+
+"""
+#############################################################
+
+# Factorized stuff.
+
+# From here, add the factorized functions/classes. They are usefull for us only
+# when dealing with Unicode, that's why they stand in this module.
+
+# On the other hand, they are not purely related to Unicode in the sense that
+# they suppose knowledge of OfflineIMAP logic. No other kind of software would
+# make use of them. That's why they are considered as outside fonctions
+# regarding the exception_msg handling point of view of this module.
+
+#############################################################
+"""
+
+class UnicodeFormatter(logging.Formatter):
+ """Wrap logging.Formatter to handle Unicode.
+
+ We have to to this because each Handler handles Unicode in its own way.
+ Some handlers might do not handle Unicode at all.
+ On top of that, encoding vary with the Handler."""
+
+ def __init__(self, fmt, datefmt=None, encode_function=None):
+ logging.Formatter.__init__(self, fmt, datefmt)
+ self.encode_function = encode_function
+
+ def format(self, record):
+ """Give precedence to the method defined encode_function argument over
+ the class attribute."""
+
+ result = logging.Formatter.format(self, record)
+ if self.encode_function:
+ if _DEBUG and type(record.msg) == str:
+ infos = inspect.stack()[9]
+ _warn(u"logger '%s' called with str type in: %s:%s\n"
+ " in function %s() \"%s\""% (record.name,
+ record.pathname, record.lineno, infos[3], record.msg))
+ result = self.encode_function(result)
+ return result
+
+
+def diverged_foldernames(foldername):
+ """Compare foldernames between the expected and unexpected encodings.
+
+ Support of Unicode for foldernames has to do more than just encoding strings
+ right. If previously run without Unicode support, the folder might exist on
+ disk with the wrong encoding.
+
+ Above statement is also true for the opposite: if currently running with
+ Unicode support disabled while previously run with it enabled.
+
+ :param foldername: uni object.
+ It MUST be the basename to avoid mixing encodings with the dirname part.
+
+ Returns 3 values:
+ - True if fodernames diverged, False otherwise (bool)
+ - string with the unexpected encoding (bytes)
+ - string with the expected encoding (bytes)
+ """
+
+ # Work on (str) UTF-8 decoded strings to compare results.
+
+ assert type(foldername) == type(noneString()), (
+ "diverged_foldernames: expects uni object argument, got %s"%
+ type(foldername))
+ foldername = foldername.fs
+
+ if use_unicode():
+ assert type(foldername) == str
+ expected = foldername
+ # If unicode support were disabled we would have work with a ASCII
+ # string of bytes encoded with IMAP UTF-7. Bundled foldername is
+ # currently standard Unicode code points.
+ #
+ # Encoding chain to redress from current Unicode is:
+ # filesystem -> Unicode -> IMAP -> str
+ unexpected = uni2str(uni2imap(fs2uni(foldername)))
+ else:
+ assert type(foldername) == str
+ # Currently, bundled foldername is an ASCII string of bytes encoded with
+ # IMAP UTF-7.
+ expected = foldername
+ # If unicode support were enabled we would have worked with a filesystem
+ # encoded string of bytes from the Unicode of the IMAP _decoded_ string
+ # points). Again, bundled foldername is currently an ASCII string of
+ # bytes encoded with IMAP UTF-7.
+ #
+ # Encoding chain to redress from current IMAP encoded string is:
+ # IMAP -> Unicode -> filesystem
+ unexpected = uni2fs(imap2uni(foldername))
+
+ diverged = ( expected != unexpected )
+
+ if diverged and _DEBUG:
+ _warn(u"diverged_foldernames: got: %s"% repr(foldername))
+ _warn(u"diverged_foldernames: unexpected: %s"% fs2uni(unexpected))
+ _warn(u"diverged_foldernames: expected: %s"% fs2uni(expected))
+
+ return diverged, unexpected, expected
+
+
+def rename_diverged(root, old, new):
+ """Move old to new (both full path in bytes).
+
+ We require root to avoid mixing encodings.
+
+ :param:
+ - root: the dirname (as opposed to basename) (uni object)
+ - old: old filename (uni object)
+ - new: new filename (uni object)
+ """
+
+ assert diverged_foldernames(root)[0] == False, ("won't rename a "
+ "folder if dirname is diverging")
+ assert type(old) == str
+ assert type(new) == str
+
+ fs_old = os.path.sep.join([root, old])
+ fs_new = os.path.sep.join([root, new])
+
+ try:
+ if _DEBUG:
+ _warn(u"rename_diverged (old): %s"% fsString(fs_old).uni)
+ _warn(u"rename_diverged (new): %s"% fsString(fs_new).uni)
+ shutil.move(fs_old, fs_new)
+ return True
+ except IOError as e:
+ if e.errno == 2:
+ if _DEBUG:
+ _warn(u"rename_diverged: not renaming folder '%s'"%
+ fsString(root).uni)
+ return False
+ else:
+ raise
+
+
+def help_message():
+ print("""
+Welcome to the Unicode world with OfflineIMAP. :-)
+
+Unicode is still an EXPERIMENTAL feature. Toying with it is very welcome because
+I can't test all possible options but you're advised to make good backups of
+both your mails and the cache. I aim to make Unicode the default but it won't
+happen without your help. So, here is a good way to play with this new feature.
+
+Some configuration options support UTF-8, some not. First, check the
+'offlineimap.conf' coming with your version for details. The very last WIP
+version (standing in the "next" branch) can be found online at
+
+ https://github.com/OfflineIMAP/offlineimap/blob/next/offlineimap.conf
+
+but it might not match your local version of OfflineIMAP.
+
+Do keep your current configuration file intact. The best approach is to copy
+your 'offlineimaprc' to 'offlineimaprc.utf-8' and update the latter with UTF-8
+in mind. Then the correct configuration file can be set (with the -c CLI
+option), according to the unicode CLI option you use.
+
+It's a good thing to also copy the content of your current 'metadata' and
+'localfolders'. Then, you'll have free hands to play on the copy (don't forget
+to update the paths in your 'offlineimaprc.utf8' accordingly).
+
+Working on a copy does not mean you should bypass the backups steps. Something
+might go very bad and delete all your mails from the server. Make REGULARY
+backups.
+
+Now that you are warned, I can tell you: the true option is --enable-unicode.
+Please, keep the existence of this option for you (don't communicate it to
+others) so that new comers will fall on this warning message, too.
+
+Not afraid? Good, I need you!
+
+Python 2 is not really consistent when it comes to Unicode and I expect unicode
+to come with subtle bugs. Subtle bugs require meticulous bug reports. This is
+not something hard to do, it just asks to be a bit rigorous. If you have to
+report bugs, follow the procedure at
+
+ https://github.com/OfflineIMAP/offlineimap/wiki/Unicode:-Reporting-bugs-about-Unicode-issues
+
+I intend to REJECT all the bug reports not following this procedure. I'm not a
+strong guy. I'd just like to keep both your life and mine as easy as possible
+while communicating about such bugs. I'm providing you all the ressources you
+might need to do so. It asked me a significant amount of time. Please, take the
+10 minutes to read the doc and follow the steps!
+
+I'm also requesting for POSITIVE feedbacks. For them to be usefull, read the
+link page above. Positive feedbacks will help to know when it will be suitable
+to turn Unicode support from EXPERIMENTAL to TESTING, remove this message, and
+finally make it the default.
+
+Last but not least, as soon as Unicode is used once it might not be possible to
+come back to --no-unicode safely (e.g. if any Unicode character was written to
+the cache). I've tried hard to make it not happen but I can't be categorical.
+This is code. Well, you actually took my advices into account and made a copy of
+your mails, metadata and configuration... Good! You're not concerned by this
+issue anymore.
+
+If you intend to hack on Unicode, you should read both the API documentation and
+the utils/uni.py module. Last online versions can be found here:
+
+ http://docs.offlineimap.org/en/latest/API.html
+ https://github.com/OfflineIMAP/offlineimap/blob/next/offlineimap/utils/uni.py
+
+
+Have fun!
+
+--
+Nicolas Sebrecht """)
diff --git a/uni-tests.py b/uni-tests.py
new file mode 100644
index 0000000..7c7623e
--- /dev/null
+++ b/uni-tests.py
@@ -0,0 +1,737 @@
+# Copyright (C) 2015 Nicolas Sebrecht
+#
+# This work is free. You can redistribute it and/or modify it under the
+# terms of the Do What The Fuck You Want To Public License, Version 2,
+# as published by Sam Hocevar. See http://www.wtfpl.net/ for more details.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+import sys
+import traceback
+
+from offlineimap.utils import uni
+
+_DEBUG_OBJS = False
+_DEBUG_OBJS = True
+
+_STOP_ON_FAILURE = False
+_STOP_ON_FAILURE = True
+
+USE_UNICODE = False
+USE_UNICODE = True
+
+# Avoid mixed output.
+sys.stderr = sys.stdout
+
+def output(msg=''):
+ print(msg)
+
+class ExceptionExpected(Exception):
+ pass
+
+class TestMalformed(Exception):
+ pass
+
+
+class UnitTestEnv(object):
+ pass
+
+#
+# UnitTest
+#
+class UnitTest(object):
+ no_result = 'no result expected for this UnitTest'
+
+ def __init__(self, test_func, title, expected_result, debug_objs):
+ self.test_func = test_func
+ self.title = title
+ self.expected_result = expected_result
+ self.debug_objs = debug_objs
+
+ def run(self):
+ # Initialise test environment for uni module.
+ uni._DEBUG_OBJS = self.debug_objs
+ # Run the test.
+ try:
+ retval = self.test_func()
+ except Exception as e:
+ if self.expected_result == ExceptionExpected:
+ raise ExceptionExpected("%s %s"% (str(type(e)), e))
+ raise Exception(e), None, sys.exc_info()[2]
+
+ # Restore uni module environment.
+ uni._DEBUG_OBJS = False
+ # Return test result.
+ return retval
+
+#
+# TestSuites
+#
+class TestSuites(object):
+ def __init__(self, enable_unicode=True):
+ self._unicode = enable_unicode
+
+ self._tests = []
+ self._win = '--- Test %i: PASSED ---'
+ self._lost = '--- Test %i: FAILED!'
+ self._magic_flag = 'magic 123 unexpected 456 flag'
+ self._failed_test = []
+
+ # All about test being run.
+ self._count = 0
+ self._unittest = None
+ self._errors = self._magic_flag
+ self._expected = self._magic_flag
+ self._result = self._magic_flag
+ self._finished = False
+
+ def context(self):
+ msg = "(context: "
+ if self._unicode:
+ return msg + 'Unicode is enabled)'
+ return msg + 'Unicode is disabled)'
+
+ def restore_env(self):
+ self._unittest = None
+ self._errors = self._magic_flag
+ self._expected = self._magic_flag
+ self._result = self._magic_flag
+ self._stacks = None
+ self._finished = False
+
+ def print_start_banner(self, title=''):
+ output("\n--- Starting test %i: --- (%s)"% (self._count, title))
+
+ def print_end_banner(self):
+ output("--- Finished test %i: ---"% self._count)
+
+ def print_results(self):
+ # What we got.
+ if self._result != self._magic_flag:
+ output("Result : %s"% repr(self._result))
+
+ # What we expected.
+ if self._expected != self._magic_flag:
+ # Did we expect a result at all?
+ if self._expected != UnitTest.no_result:
+ output("Expected: %s"% repr(self._expected))
+ else:
+ output("expecting no result")
+
+ def test_succeed(self, msg=None):
+ if msg:
+ output(msg)
+ # results
+ self.print_results()
+ # win banner
+ output(self._win % self._count)
+ self._finished = True
+
+ def test_failed(self, msg=None):
+ banner = self._lost % self._count
+ banner = "%s (%s)"% (banner, self._unittest.title)
+
+ self._failed_test.append(banner)
+
+ if msg:
+ output(msg)
+ # results
+ self.print_results()
+ # print traceback
+ if self._stack:
+ output(self._stack)
+ output(self.context())
+ # lost banner
+ output(banner)
+ self._finished = True
+ if _STOP_ON_FAILURE:
+ raise Exception('test failed')
+
+ def add_test(self, test_func, title=None,
+ expected_result=UnitTest.no_result,
+ debug_objs=_DEBUG_OBJS):
+ """Add a test to the suites."""
+
+ unittest = UnitTest(test_func, title, expected_result, debug_objs)
+ self._tests.append(unittest)
+
+ def run(self):
+ """Run the test suites."""
+
+ for unittest in self._tests:
+ self._count += 1 # Starting new test.
+ self.restore_env()
+ self.print_start_banner(unittest.title)
+
+ try:
+ # Initialise test unit.
+ try:
+ self._unittest = unittest
+ self._expected = self._unittest.expected_result
+ except Exception:
+ self._stack = traceback.format_exc()
+ raise TestMalformed("cound not initialise test "), \
+ None, sys.exc_info()[2]
+
+ # Actually run the test.
+ self._result = self._unittest.run()
+
+ self._errors = False
+ self._stack = None
+ except TestMalformed as e:
+ raise # Might want to handle this, later.
+ # Test is malformed.
+ self._errors = True
+ # Fix results.
+ self._result = self._magic_flag
+ self._expected = self._magic_flag
+ except ExceptionExpected as e:
+ # Test raised exception as expected.
+ self._errors = False
+ self._stack = str(e)
+ # Fix results.
+ self._result = self._magic_flag
+ self._expected = ExceptionExpected
+ except Exception as e:
+ # Test failed.
+ self._errors = True
+ self._stack = traceback.format_exc()
+
+ finally:
+ self.print_end_banner()
+
+ # Did we expect an exception?
+ if self._expected == ExceptionExpected:
+ # Fix this to not get it printed later.
+ self._expected = self._magic_flag
+ self.test_succeed(
+ "! EXPECTED EXCEPTION, got '%s'"% self._stack)
+
+ if not self._finished:
+ # Did we expected a result?
+ if self._unittest.expected_result == UnitTest.no_result:
+ self.test_succeed()
+
+ if not self._finished:
+ # Did test failed during the run?
+ if self._errors:
+ self.test_failed()
+
+ if not self._finished:
+ # Are results corrects?
+ if self._result == self._expected:
+ if type(self._result) == type(self._expected):
+ self.test_succeed()
+ else:
+ # Results diverge!
+ self.test_failed()
+
+ if not self._finished:
+ assert False, 'should never have reach this point!'
+
+
+ # All tests have run. Print final results.
+ print('\n')
+ if len(self._failed_test) < 1:
+ output("All (%s) tests passed!"% self._count)
+ else:
+ output("Tests FAILED:")
+ for failed in self._failed_test:
+ output("%s"% failed)
+ print('\n')
+
+##############
+#
+# The tests.
+#
+##############
+
+# Make debugging of uni module easier.
+def ___(tag=None):
+ if uni._DEBUG_OBJS:
+ if tag is not None:
+ print(tag)
+ else:
+ print
+
+# Define environment.
+class Env(UnitTestEnv):
+ def __init__(self):
+ if USE_UNICODE:
+ self.x = u'x'
+ self.y = u'y'
+ self.e = u'\xe9' # minus e-acute
+ self.e_imap = str(uni.uni2imap(self.e))
+ self.e_dbytes = uni.uni2fs(self.e)
+ self.e_fs = uni.uni2fs(self.e)
+ self.substitution = u"substitution %s"
+ self.unexpected_encoded = unicode(self.e)
+ self.wrong_obj = uni.StrObject
+ self.e_diverging = self.e_fs
+ self.e_unexpected = uni.uni2fs(uni.uni2imap(self.e))
+ else:
+ self.x = 'x'
+ self.y = 'y'
+ self.e = 'e'
+ self.e_uni = u'\xe9' # minus e-acute
+ self.e_imap = self.e
+ self.e_dbytes = self.e
+ self.e_fs = self.e
+ self.substitution = "substitution %s"
+ self.unexpected_encoded = unicode(self.e_uni)
+ self.wrong_obj = uni.UnicodeObject
+ self.e_diverging = str(uni.uni2imap(self.e_uni))
+ self.e_unexpected = uni.uni2fs(self.e_uni)
+
+env = Env()
+suites = TestSuites()
+
+# Erase function using logging.
+def uni_test_warn(msg):
+ output("UNI WARN: %s"% msg)
+uni._warn = uni_test_warn
+
+def context():
+ uni.set_unicode_context(USE_UNICODE)
+
+suites.add_test(context,
+ "set context twice --- uni.use_unicode(USE_UNICODE)",
+ ExceptionExpected)
+
+def convert():
+ env = Env()
+ uni.convert('bytes2uni', env.unexpected_encoded,
+ uni.ENCODING, 'strict')
+suites.add_test(convert,
+ "convert strict with double encoded character",
+ ExceptionExpected)
+
+def uni2bytes():
+ env = Env()
+ uni.uni2bytes(env.unexpected_encoded)
+suites.add_test(uni2bytes,
+ "uni2bytes strict with double encoded character",
+ ExceptionExpected)
+
+def bytes2uni():
+ env = Env()
+ uni.bytes2uni(env.unexpected_encoded)
+suites.add_test(bytes2uni,
+ "bytes2uni strict with double encoded character",
+ ExceptionExpected)
+
+def uni2str():
+ env = Env()
+ uni.uni2str(env.unexpected_encoded)
+suites.add_test(uni2str,
+ "uni2str strict with double encoded character",
+ ExceptionExpected)
+
+def uni2std():
+ env = Env()
+ return uni.uni2std(env.unexpected_encoded)
+suites.add_test(uni2std,
+ "uni2std strict with double encoded character",
+ '?')
+
+def fs2uni():
+ env = Env()
+ uni.uni2str(env.unexpected_encoded)
+suites.add_test(fs2uni,
+ "fs2uni strict with double encoded character",
+ ExceptionExpected)
+
+def uni2fs():
+ env = Env()
+ uni.uni2fs(env.unexpected_encoded)
+suites.add_test(uni2fs,
+ "uni2fs strict with double encoded character",
+ ExceptionExpected)
+
+def isASCII():
+ env = Env()
+ return uni.isASCII(env.unexpected_encoded)
+suites.add_test(isASCII,
+ "isASCII, simple",
+ False)
+
+def isASCII_raise():
+ env = Env()
+ return uni.isASCII(env.unexpected_encoded, exception_msg='raise')
+suites.add_test(isASCII_raise,
+ "isASCII, must raise exception",
+ ExceptionExpected)
+
+def imap2uni():
+ env = Env()
+ return uni.imap2uni(env.unexpected_encoded)
+suites.add_test(imap2uni,
+ "imap2uni with double encoded character",
+ ExceptionExpected)
+
+def uni2imap():
+ env = Env()
+ return uni.uni2imap(env.unexpected_encoded)
+suites.add_test(uni2imap,
+ "uni2imap with double encoded character",
+ ExceptionExpected)
+
+def repr_BaseString():
+ env = Env()
+ repr(uni.BaseString(unicode, env.unexpected_encoded))
+suites.add_test(repr_BaseString,
+ "repr_String with double encoded character",)
+
+def replace_BaseString():
+ env = Env()
+ s = uni.BaseString(unicode, env.unexpected_encoded)
+ return s.replace('e', '')
+suites.add_test(replace_BaseString,
+ "repr_String with double encoded character",
+ ExceptionExpected)
+
+
+def string_factory():
+ env = Env()
+
+ if USE_UNICODE:
+ expected_type = uni.UnicodeObject
+ else:
+ expected_type = uni.StrObject
+ s = uni._String(env.x)
+ return type(s) == expected_type
+suites.add_test(string_factory,
+ "uni._String factory",
+ True)
+
+def value_getter():
+ env = Env()
+ return uni._String(env.e).value
+suites.add_test(value_getter,
+ "value getter",
+ env.e)
+
+def value_setter():
+ env = Env()
+ return uni._String(env.e).value
+suites.add_test(value_setter,
+ "value setter",
+ env.e)
+
+def none_factory():
+ return uni.noneString().value
+suites.add_test(none_factory,
+ "noneString factory",
+ None)
+
+def uni_getter_setter():
+ env = Env()
+ s = uni._String(env.e)
+ ___()
+ s.uni = env.e
+ ___()
+ s = s.uni
+ return s
+suites.add_test(uni_getter_setter,
+ "uni (setter, getter)",
+ env.e)
+
+def uni_factory():
+ return uni.uniString(env.e).uni
+suites.add_test(uni_factory,
+ "uni factory",
+ env.e)
+
+def value_factory():
+ env = Env()
+ return uni.valueString(env.e).value
+suites.add_test(value_factory,
+ "value factory",
+ env.e)
+
+def dbytes_factory():
+ env = Env()
+ return uni.dbytesString(env.e_dbytes).dbytes
+suites.add_test(dbytes_factory,
+ "dbytes factory",
+ env.e_dbytes)
+
+def imap_factory():
+ env = Env()
+ return uni.imapString(env.e_imap).imap
+suites.add_test(imap_factory,
+ "imap factory",
+ env.e_imap)
+
+def fs_factory():
+ env = Env()
+ return uni.fsString(env.e_fs).fs
+suites.add_test(fs_factory,
+ "fs factory",
+ env.e_fs)
+
+def erase():
+ env = Env()
+ s = uni.fsString(env.e_fs)
+ s.uni = env.x
+ return s.uni
+suites.add_test(erase,
+ "erase",
+ env.x)
+
+def substitution():
+ env = Env()
+ s = uni.uniString(env.substitution)
+ e = uni.imapString(env.e_imap)
+ s = s % e
+ return s.uni
+suites.add_test(substitution,
+ "substitution",
+ env.substitution % env.e)
+
+def substitution_string():
+ env = Env()
+ return (uni.uniString(env.substitution) % env.e_imap).uni
+suites.add_test(substitution_string,
+ "substitution with string",
+ env.substitution % env.e_imap)
+
+def concatenation():
+ env = Env()
+ one = uni.uniString(env.e)
+ two = uni.imapString(env.e_imap)
+ concat = one + two
+ return concat.uni
+suites.add_test(concatenation,
+ "string concatenation",
+ env.e + env.e)
+
+def concatenation_string():
+ env = Env()
+ one = uni.uniString(env.e)
+ two = env.e
+ concat = one + two
+ return concat.uni
+suites.add_test(concatenation_string,
+ "string concatenation with string",
+ env.e + env.e)
+
+def concatenation_string_mix():
+ env = Env()
+ one = uni.uniString(env.e)
+ two = env.e_imap
+ concat = one + two
+ return concat.uni
+suites.add_test(concatenation_string_mix,
+ "string concatenation with incompatible string",
+ ExceptionExpected)
+
+def concatenation_string_left():
+ env = Env()
+ one = uni.uniString(env.e)
+ two = env.e
+ concat = two + one
+ return concat
+suites.add_test(concatenation_string_left,
+ "string concatenation with string at left",
+ env.e + env.e)
+
+def double_composing():
+ env = Env()
+ return uni.uniString(uni.uniString(env.e)).uni
+suites.add_test(double_composing,
+ "double composing",
+ env.e)
+
+def in_iterable():
+ env = Env()
+ e = uni.uniString(env.e)
+ x = uni.uniString(env.x)
+ return e in [e, x]
+suites.add_test(in_iterable,
+ "in iterable",
+ True)
+
+def equal():
+ env = Env()
+ e = uni.uniString(env.e)
+ x = uni.uniString(env.e)
+ return e == x
+suites.add_test(equal,
+ "equal comparison",
+ True)
+
+def not_equal():
+ env = Env()
+ e = uni.uniString(env.e)
+ x = uni.uniString(env.x)
+ return e != x
+suites.add_test(not_equal,
+ "not equal comparison",
+ True)
+
+def not_equal_f():
+ env = Env()
+ def a():
+ return uni.uniString(env.e)
+ x = uni.uniString(env.x)
+ return a() != x
+suites.add_test(not_equal,
+ "not equal comparison",
+ True)
+
+def lesser_than():
+ env = Env()
+ y = uni.uniString(env.y)
+ x = uni.uniString(env.x)
+ return x < y
+suites.add_test(lesser_than,
+ "lesser than comparison",
+ True)
+
+def sort_uni_obj_list():
+ # Mix sort creation to avoid comparing objects id.
+ env = Env()
+ y = uni.uniString(env.y)
+ x = uni.uniString(env.x)
+ i = uni.uniString(env.x)
+ j = uni.uniString(env.y)
+ return sorted([y, x]) == [i, j]
+suites.add_test(sort_uni_obj_list,
+ "sort list of uni objects (somewhat duplicata)",
+ True)
+
+def in_dict_keys():
+ env = Env()
+ e = uni.uniString(env.e)
+ x = uni.uniString(env.x)
+ return e in {x: 1, e: 2}
+suites.add_test(in_dict_keys,
+ "uni object in dict with uni object keys (somewhat duplicata)",
+ True)
+
+def split():
+ env = Env()
+ e = uni.uniString(env.e)
+ x = uni.uniString(env.x)
+ exe = e + x + e
+ # WARNING: objects in list must be uni objects, hence this little bit more
+ # complex test.
+ return exe.split(x.uni)[1].uni
+suites.add_test(split,
+ "split with string",
+ env.e)
+
+def rawobject():
+ env = Env()
+ raw = uni.RawObject(env.x)
+ raw.value = env.e
+ return raw.value
+suites.add_test(rawobject,
+ "RawObject.value, getter and setter",
+ env.e)
+
+def wrong_obj():
+ env = Env()
+ env.wrong_obj(env.x)
+suites.add_test(wrong_obj,
+ "instanciate wrong object",
+ ExceptionExpected,
+ debug_objs=False) # Required for the correct exception.
+
+def unexpected_type_setter():
+ env = Env()
+ s = uni.uniString(Env())
+suites.add_test(unexpected_type_setter,
+ "unexpected type for value setter",
+ ExceptionExpected)
+
+def std_setter():
+ env = Env()
+ s = uni.uniString(env.e)
+ s.std = env.e
+suites.add_test(std_setter,
+ "use std setter",
+ ExceptionExpected)
+
+def diverging_folders():
+ env = Env()
+ return uni.diverged_foldernames(uni.fsString(env.e_diverging))
+suites.add_test(diverging_folders,
+ "diverging foldernames",
+ (True, env.e_unexpected, env.e_diverging))
+
+def non_diverging_folders():
+ env = Env()
+ return uni.diverged_foldernames(uni.uniString(env.x))
+suites.add_test(non_diverging_folders,
+ "non diverging foldernames",
+ (False, env.x, env.x))
+
+def root_diverging_rename_diverged():
+ env = Env()
+ return uni.rename_diverged(
+ uni.uniString(env.e_unexpected), env.x, env.x)
+suites.add_test(root_diverging_rename_diverged,
+ "root of rename is diverging",
+ ExceptionExpected)
+
+def nonzero_nonzero():
+ env = Env()
+ if uni.uniString(env.e):
+ return True
+ return False
+suites.add_test(nonzero_nonzero,
+ "nonzero on non zero value ('if uni_obj:' pattern)",
+ True)
+
+def nonzero_emptystring():
+ env = Env()
+ if uni.uniString(''):
+ return True
+ return False
+suites.add_test(nonzero_emptystring,
+ "nonzero on empty string ('if uni_obj:' pattern)",
+ False)
+
+def nonzero_None():
+ env = Env()
+ if uni.noneString():
+ return True
+ return False
+suites.add_test(nonzero_None,
+ "nonzero on None value ('if uni_obj:' pattern)",
+ False)
+
+def equal_None():
+ env = Env()
+ if uni.noneString() == None:
+ return True
+ return False
+suites.add_test(equal_None,
+ "equal None value ('if uni_obj == None' pattern)",
+ True)
+
+def equal_None_string():
+ env = Env()
+ if uni.uniString(env.e) == None:
+ return True
+ return False
+suites.add_test(equal_None_string,
+ "equal None on bundled non-empty string ('if uni_obj == None' pattern)",
+ False)
+
+def equal_None_empty_string():
+ env = Env()
+ if uni.uniString('') == None:
+ return True
+ return False
+suites.add_test(equal_None_empty_string,
+ "equal None on bundled empty string ('if uni_obj == None' pattern)",
+ False)
+
+suites.run()
+
--
2.2.2
More information about the OfflineIMAP-project
mailing list