+                           default=False, 
+                           help='Switch the installer in serenity mode. Everythings are installed in a virtualenv')
+    argparser.add_argument('--virtualenv',
+                           dest='virtual', 
+                           type=str,
+                           action='store',
+                           default="%s-%s" % (package,version), 
+                           help='Specify the name of the virtualenv used by the serenity mode [default: %s-%s]' % (package,version))    
+    args, unknown = argparser.parse_known_args()
+    sys.argv = [sys.argv[0]] + unknown
+    if args.serenity:
+        local_serenity.append(True)
+        serenity_snake(args.virtual,package,version)
+    else:
+        local_serenity.append(False)       
+    log.set_threshold(old)
+    return args.serenity
diff --git a/distutils.ext/obidistutils/serenity/checkpackage.py b/distutils.ext/obidistutils/serenity/checkpackage.py
new file mode 100644
index 0000000..7bf9c1e
--- /dev/null
+++ b/distutils.ext/obidistutils/serenity/checkpackage.py
@@ -0,0 +1,184 @@
+Created on 2 oct. 2014
+ at author: coissac
+import re
+import sys
+import os
+from distutils.version import StrictVersion          # @UnusedImport
+from distutils.errors import DistutilsError
+from distutils import log
+from obidistutils.serenity.checkpip import get_a_pip_module
+def is_installed(requirement,pip=None):
+    if pip is None:
+        pip = get_a_pip_module()
+    get_installed_distributions=pip.util.get_installed_distributions
+    requirement_project,requirement_relation,requirement_version = parse_package_requirement(requirement)
+    package = [x for x in get_installed_distributions() if x.project_name==requirement_project]
+    if len(package)==1:
+        if requirement_version is not None and requirement_relation is not None:    
+            rep = (len(package)==1) and eval("StrictVersion('%s') %s StrictVersion('%s')" % (package[0].version,
+                                                                                           requirement_relation,
+                                                                                           requirement_version)
+                                             )
+        else:
+            rep=True
+    else:
+        rep=False
+    if rep:
+        if requirement_version is not None and requirement_relation is not None:        
+            log.info("Look for package %s (%s%s) : ok version %s installed" % (requirement_project,
+                                                                               requirement_relation,
+                                                                               requirement_version,
+                                                                               package[0].version))
+        else:
+            log.info("Look for package %s : ok version %s installed" % (requirement_project,
+                                                                        package[0].version))
+    else:
+        if len(package)!=1:
+            log.info("Look for package %s (%s%s) : not installed" % (requirement_project,
+                                                                     requirement_relation,
+                                                                     requirement_version))
+        else:
+            log.info("Look for package %s (%s%s) : failed only version %s installed" % (requirement_project,
+                                                                                        requirement_relation,
+                                                                                        requirement_version,
+                                                                                        package[0].version))
+    return rep
+def get_requirements(pip=None):
+    if pip is None:
+        pip = get_a_pip_module()
+    try:
+        requirements = open('requirements.txt').readlines()
+        requirements = [x.strip() for x in requirements]
+        requirements = [x for x in requirements if x[0]!='-']
+    except IOError:
+        requirements = []
+    return requirements
+def install_requirements(skip_virtualenv=True,pip=None):
+    if pip is None:
+        pip = get_a_pip_module()
+    install_something=False
+    try:
+        requirements = open('requirements.txt').readlines()
+        requirements = [x.strip() for x in requirements]
+        requirements = [x for x in requirements if x[0]!='-']
+        log.info("Required packages for the installation :")
+        for x in requirements:
+            if not skip_virtualenv or x[0:10]!='virtualenv':
+                ok = is_installed(x,pip)
+                if not ok:
+                    log.info("  Installing requirement : %s" % x)
+                    pip_install_package(x,pip=pip)
+                    install_something=True
+    except IOError:
+        pass
+    return install_something
+def check_requirements(skip_virtualenv=True,pip=None):
+    if pip is None:
+        pip = get_a_pip_module()
+    try:
+        requirements = open('requirements.txt').readlines()
+        requirements = [x.strip() for x in requirements]
+        requirements = [x for x in requirements if x[0]!='-']
+        log.info("Required packages for the installation :")
+        for x in requirements:
+            if not skip_virtualenv or x[0:10]!='virtualenv':
+                ok = is_installed(x,pip)
+                if not ok:
+                    log.error("  Missing requirement : %s -- Package installation stopped" % x)
+                    sys.exit(0)
+    except IOError:
+        pass
+def parse_package_requirement(requirement):
+    version_pattern = re.compile('[=><]+(.*)$')
+    project_pattern  = re.compile('[^=><]+')
+    relationship_pattern = re.compile('[=><]+')
+    try:
+        requirement_project = project_pattern.search(requirement).group(0)
+        requirement_version = version_pattern.search(requirement)
+        if requirement_version is not None:
+            requirement_version=requirement_version.group(1)
+        requirement_relation= relationship_pattern.search(requirement)
+        if requirement_relation is not None:
+            requirement_relation=requirement_relation.group(0)
+    except:
+        raise DistutilsError,"Requirement : %s not correctly formated" % requirement
+    return requirement_project,requirement_relation,requirement_version
+def get_package_requirement(package,pip=None):            
+    if pip is None:
+        pip = get_a_pip_module()
+    requirements = get_requirements(pip)
+    req = [x for x in requirements
+             if x[0:len(package)]==package
+          ]
+    if len(req)==1:
+        return req[0]
+    else:
+        return None
+def pip_install_package(package,directory=None,pip=None):
+    log.info('installing %s in directory %s' % (package,str(directory)))
+    if 'http_proxy' in os.environ and 'https_proxy' not in os.environ:
+        os.environ['https_proxy']=os.environ['http_proxy']
+    if pip is None:
+        pip = get_a_pip_module()
+    args = ['install']
+    if 'http_proxy' in os.environ:
+        args.append('--proxy=%s' % os.environ['http_proxy'])
+    if directory is not None:
+        args.append('--target=%s' % directory)
+    args.append(package)
+    return pip.main(args)
diff --git a/distutils.ext/obidistutils/serenity/checkpip.py b/distutils.ext/obidistutils/serenity/checkpip.py
new file mode 100644
index 0000000..b5f5834
--- /dev/null
+++ b/distutils.ext/obidistutils/serenity/checkpip.py
@@ -0,0 +1,82 @@
+Created on 2 oct. 2014
+ at author: coissac
+#import urllib2
+import os
+#import imp
+#import base64
+#import zipimport
+import importlib
+from distutils.version import StrictVersion
+#from distutils.errors import DistutilsError
+from distutils import log
+from obidistutils.serenity.globals import PIP_MINVERSION, \
+                                          local_pip                       # @UnusedImport
+from obidistutils.serenity.util import get_serenity_dir
+import sys
+import pkgutil
+def is_pip_installed(minversion=PIP_MINVERSION):
+    try:
+        log.info("Try to load pip module...")
+        pipmodule = importlib.import_module('pip')
+        if hasattr(pipmodule,'__version__'):
+            ok = StrictVersion(pipmodule.__version__) >= StrictVersion(minversion)
+            log.info("Pip installed version %s" % pipmodule.__version__)
+        else:
+            ok = False
+            log.info("A too old version of pip is installed on your system")
+        # We clean up the imported pip module for test purpose
+        for m in [x for x in sys.modules if x.startswith('pip.')]:
+            del sys.modules[m]
+        del sys.modules['pip']
+    except:
+        ok = False
+        log.info("No pip installed on your system")
+    return ok
+def get_a_pip_module(minversion=PIP_MINVERSION):
+    global local_pip
+    tmpdir = get_serenity_dir()
+    if not local_pip:    
+        serenity = importlib.import_module('obidistutils.serenity')
+        sys.path.insert(0, os.path.dirname(serenity.__file__))
+        pip = importlib.import_module('pip')
+        local_pip.append(pip)
+    else:
+        pip = local_pip[-1]
+    # Prepare the CERT certificat for https download
+    cert_path = os.path.join(tmpdir, "cacert.pem")
+    certificate = pkgutil.get_data("pip._vendor.requests", "cacert.pem")
+    with open(cert_path, "wb") as cert:
+        cert.write(certificate)
+    os.environ.setdefault("PIP_CERT", cert_path)
+    assert hasattr(pip,'__version__') and StrictVersion(pip.__version__) >= StrictVersion(minversion), \
+               "Unable to find suitable version of pip"
+    return local_pip[0]
diff --git a/distutils.ext/obidistutils/serenity/checkpython.py b/distutils.ext/obidistutils/serenity/checkpython.py
new file mode 100644
index 0000000..9503b00
--- /dev/null
+++ b/distutils.ext/obidistutils/serenity/checkpython.py
@@ -0,0 +1,170 @@
+Created on 2 oct. 2014
+ at author: coissac
+import subprocess
+import sys
+import os
+import glob
+from distutils.version import StrictVersion
+from distutils import sysconfig
+from obidistutils.serenity.checksystem import is_mac_system, \
+                                              is_windows_system
+def is_python27(path=None):
+    '''
+    Checks that the python is a python2.7 
+        @param path: if None consider the running python
+                     otherwise the python pointed by the path
+        @return: True if the python is a 2.7
+        @rtype: bool
+    '''
+    if path is None:
+        pythonversion = StrictVersion(sysconfig.get_python_version())
+    else:
+        command = """'%s' -c 'from distutils import sysconfig; """ \
+                  """print sysconfig.get_python_version()'""" % path
+        p = subprocess.Popen(command, 
+                             shell=True, 
+                             stdout=subprocess.PIPE)
+        pythonversion = StrictVersion(p.communicate()[0])
+    return     pythonversion >=StrictVersion("2.7") \
+           and pythonversion < StrictVersion("2.8") 
+def is_mac_system_python(path=None):
+    '''
+    Checks on a mac platform if the python is the original 
+    python provided with the systems
+    .
+        @param path: if None consider the running python
+                     otherwise the python pointed by the path
+        @return: True if the python is the system one
+        @rtype: bool
+    '''
+    if path is None:
+        path = sys.executable
+    p1 = '/System/Library/Frameworks/Python.framework'
+    p2 = '/usr/bin'
+    return path[0:len(p1)]==p1 or \
+           path[0:len(p2)]==p2
+def is_a_virtualenv_python(path=None):
+    '''
+    Check if the python is belonging a virtualenv
+        @param path: the path pointing to the python executable.
+                     if path is None then the running python is
+                     considered.
+        @param path: str or None 
+        @return: True if the python belongs a virtualenv
+                 False otherwise
+        @rtype: bool
+    '''
+    if path is None:
+        rep = hasattr(sys, 'real_prefix')
+    else:
+        command = """'%s' -c 'import sys; print hasattr(sys,"real_prefix")'""" % path
+        p = subprocess.Popen(command, 
+                             shell=True, 
+                             stdout=subprocess.PIPE)
+        rep = eval(p.communicate()[0])
+    return rep
+def which_virtualenv(path=None,full=False):
+    '''
+    Returns the name of the virtualenv.
+        @param path: the path to a python binary or None
+                     if you want to consider the running python
+        @type path: str or None
+        @param full: if set to True, returns the absolute path,
+                     otherwise only return a simple directory name
+        @type full: bool
+        @return: the virtual environment name or None if the
+                 path does not belong a virtualenv
+        @rtype: str or None
+    '''
+    if path is None:
+        path = sys.executable
+    if is_a_virtualenv_python(path):
+        parts = path.split(os.sep)
+        try:
+            if full:
+                rep = os.sep.join(parts[0:parts.index('bin')])
+                rep = os.path.realpath(rep)
+            else:
+                rep = parts[parts.index('bin')-1]
+        except ValueError:
+            rep = None
+    else:
+        rep=None
+    return rep
+def is_good_python27(path = None):    
+    '''
+    Checks if the python is usable for the package install.
+    Actually the python must be a 2.7 version and not being the
+    default python included with the system on a mac.
+        @param path: the path to a python binary or None
+                     if you want to consider the running python
+        @type path: str or None
+        @return: True if the python is ok
+                 False otherwise
+        @rtype: bool
+    '''
+    rep = is_python27(path) and \
+          (not is_mac_system() or \
+           not is_mac_system_python(path) \
+          )
+    return rep
+def lookfor_good_python27():
+    exe = []
+    if not is_windows_system():
+        paths = os.environ['PATH'].split(os.pathsep)
+        for p in paths:
+            candidates = glob.glob(os.path.join(p,'python2.7')) + \
+                         glob.glob(os.path.join(p,'python2')) + \
+                         glob.glob(os.path.join(p,'python'))
+            pexe = []
+            for e in candidates:
+                if os.path.islink(e):
+                    e = os.path.realpath(e)
+                if os.path.isfile(e) and \
+                   os.access(e, os.X_OK) and \
+                   is_good_python27(e) and \
+                   not is_a_virtualenv_python(e):
+                    pexe.append(e)
+            exe.extend(set(pexe))
+    return exe
diff --git a/distutils.ext/obidistutils/serenity/checksystem.py b/distutils.ext/obidistutils/serenity/checksystem.py
new file mode 100644
index 0000000..a60a4e1
--- /dev/null
+++ b/distutils.ext/obidistutils/serenity/checksystem.py
@@ -0,0 +1,19 @@
+Created on 2 oct. 2014
+ at author: coissac
+from distutils import util
+from distutils import log
+def is_mac_system():
+    platform = util.get_platform().split('-')[0]
+    if platform=='macosx':
+        log.info('You are running on a Mac platform')
+    return platform=='macosx'
+def is_windows_system():
+    platform = util.get_platform().split('-')[0]
+    return platform=='Windows'
diff --git a/distutils.ext/obidistutils/serenity/getcython.py b/distutils.ext/obidistutils/serenity/getcython.py
new file mode 100644
index 0000000..6453ddf
--- /dev/null
+++ b/distutils.ext/obidistutils/serenity/getcython.py
@@ -0,0 +1,72 @@
+Created on 2 oct. 2014
+ at author: coissac
+import imp
+import importlib
+from distutils.errors import DistutilsError
+from distutils.version import StrictVersion  
+from distutils import log       
+from obidistutils.serenity.globals import local_cython  # @UnusedImport
+from obidistutils.serenity.checkpip import get_a_pip_module
+from obidistutils.serenity.checkpackage import get_package_requirement
+from obidistutils.serenity.checkpackage import parse_package_requirement
+from obidistutils.serenity.checkpackage import is_installed
+from obidistutils.serenity.checkpackage import pip_install_package
+from obidistutils.serenity.util import get_serenity_dir
+def get_a_cython_module(pip=None):
+    global local_cython
+    if not local_cython:
+        if pip is None:
+            pip = get_a_pip_module()
+        cython_req = get_package_requirement('Cython',pip)
+        if cython_req is None:
+            cython_req='Cython'
+        requirement_project,requirement_relation,minversion = parse_package_requirement(cython_req)  # @UnusedVariable
+        if cython_req is None or not is_installed(cython_req, pip):
+            tmpdir = get_serenity_dir()
+            ok = pip_install_package(cython_req,directory=tmpdir,pip=pip)
+            log.debug('temp install dir : %s' % tmpdir)
+            if ok!=0:
+                raise DistutilsError, "I cannot install a cython package"
+            f, filename, description = imp.find_module('Cython', [tmpdir])
+            cythonmodule = imp.load_module('Cython', f, filename, description)
+            if minversion is not None:
+                assert StrictVersion(cythonmodule.__version__) >= minversion, \
+                       "Unable to find suitable version of cython get %s instead of %s" % (cythonmodule.__version__,
+                                                                                        minversion)
+        else:
+            cythonmodule = importlib.import_module('Cython') 
+        local_cython.append(cythonmodule)
+    return local_cython[0]
diff --git a/distutils.ext/obidistutils/serenity/globals.py b/distutils.ext/obidistutils/serenity/globals.py
new file mode 100644
index 0000000..39ad3cd
--- /dev/null
+++ b/distutils.ext/obidistutils/serenity/globals.py
@@ -0,0 +1,15 @@
+Created on 2 oct. 2014
+ at author: coissac
diff --git a/distutils.ext/obidistutils/serenity/rerun.py b/distutils.ext/obidistutils/serenity/rerun.py
new file mode 100644
index 0000000..d33a7de
--- /dev/null
+++ b/distutils.ext/obidistutils/serenity/rerun.py
@@ -0,0 +1,60 @@
+Created on 2 oct. 2014
+ at author: coissac
+import sys
+import os
+from distutils import log
+from distutils.errors import DistutilsError
+from obidistutils.serenity.globals import saved_args
+from obidistutils.serenity.checkpython import is_good_python27, \
+                                              lookfor_good_python27
+def rerun_with_anothe_python(path, fork=False):
+    if saved_args:
+        args = saved_args
+    else:
+        args = list(sys.argv)
+    assert is_good_python27(path), \
+           'the selected python is not adapted to the installation of this package'
+    args.insert(0, path)
+    sys.stderr.flush()
+    sys.stdout.flush()
+    if fork:
+        log.info('Forking a new install process')
+        os.system(' '.join(list(args)))
+        log.info('External process ended')
+        sys.exit(0)
+    else:
+        os.execv(path,list(args))
+def enforce_good_python():
+    if is_good_python27():
+        return True
+    goodpython = lookfor_good_python27()
+    if not goodpython:
+        raise DistutilsError,'No good python identified on your system'
+    goodpython=goodpython[0]
+    log.warn("========================================")    
+    log.warn("")
+    log.warn("    Switching to python : %s" % goodpython)
+    log.warn("")
+    log.warn("========================================")    
+    rerun_with_anothe_python(goodpython)
diff --git a/distutils.ext/obidistutils/serenity/snake.py b/distutils.ext/obidistutils/serenity/snake.py
new file mode 100644
index 0000000..79c1f39
--- /dev/null
+++ b/distutils.ext/obidistutils/serenity/snake.py
@@ -0,0 +1,35 @@
+Created on 2 oct. 2014
+ at author: coissac
+snake ="""
+        ___
+      ,'._,`.
+     (-.___.-)
+     (-.___.-)
+     `-.___.-'
+      ((  @ @|              .            __
+       \   ` |         ,\   |`.    @|   |  |      _.-._
+      __`.`=-=mm===mm:: |   | |`.   |   |  |    ,'=` '=`.
+     (    `-'|:/  /:/  `/  @| | |   |, @| @|   /---)W(---\ 
+      \ \   / /  / /         @| |   '         (----| |----) ,~
+      |\ \ / /| / /            @|              \---| |---/  |
+      | \ V /||/ /                              `.-| |-,'   |
+      |  `-' |V /                                 \| |/    @'
+      |    , |-'                                 __| |__
+      |    .;: _,-.                         ,--""..| |..""--.
+      ;;:::' "    )                        (`--::__|_|__::--')
+    ,-"      _,  /                          \`--...___...--'/
+   (    -:--'/  /                           /`--...___...--'\  
+    "-._  `"'._/                           /`---...___...---'\  
+        "-._   "---.                      (`---....___....---')
+         .' ",._ ,' )                     |`---....___....---'|
+         /`._|  `|  |                     (`---....___....---')
+        (   \    |  /                      \`---...___...---'/
+         `.  `,  ^""                        `:--...___...--;'
+           `.,'               hh              `-._______.-'
\ No newline at end of file
diff --git a/distutils.ext/obidistutils/serenity/util.py b/distutils.ext/obidistutils/serenity/util.py
new file mode 100644
index 0000000..14c0283
--- /dev/null
+++ b/distutils.ext/obidistutils/serenity/util.py
@@ -0,0 +1,27 @@
+Created on 2 oct. 2014
+ at author: coissac
+import sys 
+import tempfile
+from obidistutils.serenity.globals import tmpdir         # @UnusedImport
+from obidistutils.serenity.globals import saved_args     # @UnusedImport
+def get_serenity_dir():
+    global tmpdir
+    if not tmpdir:
+        tmpdir.append(tempfile.mkdtemp())
+    return tmpdir[0]
+def save_argv():
+    global saved_args
+    del saved_args[:]
+    saved_args.extend(list(sys.argv))
diff --git a/distutils.ext/obidistutils/serenity/virtual.py b/distutils.ext/obidistutils/serenity/virtual.py
new file mode 100644
index 0000000..e8e95fc
--- /dev/null
+++ b/distutils.ext/obidistutils/serenity/virtual.py
@@ -0,0 +1,133 @@
+Created on 2 oct. 2014
+ at author: coissac
+import imp
+import importlib
+import os
+import sys
+from distutils.errors import DistutilsError
+from distutils.version import StrictVersion  
+from distutils import log       
+from obidistutils.serenity.globals import PIP_MINVERSION, \
+                                          local_virtualenv  # @UnusedImport
+from obidistutils.serenity.checkpip import get_a_pip_module
+from obidistutils.serenity.checkpackage import get_package_requirement,\
+    install_requirements
+from obidistutils.serenity.checkpackage import parse_package_requirement
+from obidistutils.serenity.checkpackage import is_installed
+from obidistutils.serenity.checkpackage import pip_install_package
+from obidistutils.serenity.checkpython import is_a_virtualenv_python
+from obidistutils.serenity.checkpython import which_virtualenv
+from obidistutils.serenity.checkpython import is_good_python27
+from obidistutils.serenity.util import get_serenity_dir
+def get_a_virtualenv_module(pip=None):
+    global local_virtualenv
+    if not local_virtualenv:
+        if pip is None:
+            pip = get_a_pip_module()
+        virtualenv_req = get_package_requirement('virtualenv',pip)
+        if virtualenv_req is None:
+            virtualenv_req='virtualenv'
+        requirement_project,requirement_relation,minversion = parse_package_requirement(virtualenv_req)  # @UnusedVariable
+        if virtualenv_req is None or not is_installed(virtualenv_req, pip):
+            tmpdir = get_serenity_dir()
+            ok = pip_install_package(virtualenv_req,directory=tmpdir,pip=pip)
+            log.debug('temp install dir : %s' % tmpdir)
+            if ok!=0:
+                raise DistutilsError, "I cannot install a virtualenv package"
+            f, filename, description = imp.find_module('virtualenv', [tmpdir])
+            vitualenvmodule = imp.load_module('virtualenv', f, filename, description)
+            if minversion is not None:
+                assert StrictVersion(vitualenvmodule.__version__) >= minversion, \
+                       "Unable to find suitable version of virtualenv get %s instead of %s" % (vitualenvmodule.__version__,
+                                                                                        minversion)
+        else:
+            vitualenvmodule = importlib.import_module('virtualenv') 
+        local_virtualenv.append(vitualenvmodule)
+    return local_virtualenv[0]
+def serenity_virtualenv(envname,package,version,minversion=PIP_MINVERSION,pip=None):
+    #
+    # Checks if we are already running under the good virtualenv
+    #
+    if is_a_virtualenv_python():
+        ve = which_virtualenv(full=True)
+        if ve == os.path.realpath(envname) and is_good_python27():
+            return sys.executable
+    #
+    # We are not in the good virtualenv
+    #
+    if pip is None:
+        pip = get_a_pip_module(minversion)
+    #
+    # Check if the virtualenv exist
+    # 
+    python = None
+    if os.path.isdir(envname):
+        python = os.path.join(envname,'bin','python')
+        ok = (is_good_python27(python) and 
+              is_a_virtualenv_python(python))
+        #
+        # The virtualenv already exist but it is not ok
+        #
+        if not ok:
+            raise DistutilsError, "A virtualenv %s already exists but not with the required python"
+    else:
+        ok = False
+    #
+    # Creates a new virtualenv
+    #
+    if not ok:
+        virtualenv = get_a_virtualenv_module(pip)
+        if virtualenv is not None:
+            virtualenv.create_environment(envname)
+            # check the newly created virtualenv
+            return serenity_virtualenv(envname,package,version,minversion,pip)
+    return os.path.realpath(python)
\ No newline at end of file
diff --git a/distutils.ext/src/littlebigman.c b/distutils.ext/src/littlebigman.c
new file mode 100644
index 0000000..5e2ea2a
--- /dev/null
+++ b/distutils.ext/src/littlebigman.c
@@ -0,0 +1,24 @@
+ * littlebigman.c
+ *
+ *  Created on: 11 juil. 2012
+ *      Author: coissac
+ */
+int main(int argc, char *argv[])
+    union { int entier;
+            char caractere[4] ;
+    } test;
+    test.entier=0x01020304;
+    if (test.caractere[3] == 1)
+       printf("-DLITTLE_END");
+    else
+        printf("-DBIG_END");
+	return 0;
diff --git a/distutils.ext/src/pidname.c b/distutils.ext/src/pidname.c
new file mode 100644
index 0000000..ff61eb4
--- /dev/null
+++ b/distutils.ext/src/pidname.c
@@ -0,0 +1,24 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <libproc.h>
+int main (int argc, char* argv[])
+	pid_t pid; int ret;
+	if ( argc > 1 ) {
+		pid = (pid_t) atoi(argv[1]);
+		ret = proc_pidpath (pid, pathbuf, sizeof(pathbuf));
+		if ( ret <= 0 ) {
+			fprintf(stderr, "PID %d: proc_pidpath ();\n", pid);
+			fprintf(stderr,	"    %s\n", strerror(errno));
+		} else {
+			printf("proc %d: %s\n", pid, pathbuf);
+		}
+	}
+	return 0;
diff --git a/doc/sphinx/Makefile b/doc/sphinx/Makefile
new file mode 100644
index 0000000..1464560
--- /dev/null
+++ b/doc/sphinx/Makefile
@@ -0,0 +1,100 @@
+# Makefile for Sphinx documentation
+# You can set these variables from the command line.
+SPHINXBUILD   = sphinx-build
+#SPHINXBUILD   = /Library/Frameworks/Python.framework/Versions/2.7/bin/sphinx-build
+PAPER         =
+BUILDDIR      = build
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html      to make standalone HTML files"
+	@echo "  dirhtml   to make HTML files named index.html in directories"
+	@echo "  pickle    to make pickle files"
+	@echo "  json      to make JSON files"
+	@echo "  htmlhelp  to make HTML files and a HTML help project"
+	@echo "  qthelp    to make HTML files and a qthelp project"
+	@echo "  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  changes   to make an overview of all changed/added/deprecated items"
+	@echo "  linkcheck to check all external links for integrity"
+	@echo "  doctest   to run all doctests embedded in the documentation (if enabled)"
+	-rm -rf $(BUILDDIR)/*
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+	@echo
+	@echo "Build finished. The e-Pub pages are in $(BUILDDIR)/epub."
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+	@echo
+	@echo "Build finished."
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/OBITools.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/OBITools.qhc"
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
+	      "run these through (pdf)latex."
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
diff --git a/doc/sphinx/make.bat b/doc/sphinx/make.bat
new file mode 100644
index 0000000..55bbd89
--- /dev/null
+++ b/doc/sphinx/make.bat
@@ -0,0 +1,113 @@
+REM Command file for Sphinx documentation
+set SPHINXBUILD=sphinx-build
+set BUILDDIR=build
+if NOT "%PAPER%" == "" (
+if "%1" == "" goto help
+if "%1" == "help" (
+	:help
+	echo.Please use `make ^<target^>` where ^<target^> is one of
+	echo.  html      to make standalone HTML files
+	echo.  dirhtml   to make HTML files named index.html in directories
+	echo.  pickle    to make pickle files
+	echo.  json      to make JSON files
+	echo.  htmlhelp  to make HTML files and a HTML help project
+	echo.  qthelp    to make HTML files and a qthelp project
+	echo.  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter
+	echo.  changes   to make an overview over all changed/added/deprecated items
+	echo.  linkcheck to check all external links for integrity
+	echo.  doctest   to run all doctests embedded in the documentation if enabled
+	goto end
+if "%1" == "clean" (
+	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
+	del /q /s %BUILDDIR%\*
+	goto end
+if "%1" == "html" (
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
+	goto end
+if "%1" == "dirhtml" (
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
+	goto end
+if "%1" == "pickle" (
+	echo.
+	echo.Build finished; now you can process the pickle files.
+	goto end
+if "%1" == "json" (
+	echo.
+	echo.Build finished; now you can process the JSON files.
+	goto end
+if "%1" == "htmlhelp" (
+	echo.
+	echo.Build finished; now you can run HTML Help Workshop with the ^
+.hhp project file in %BUILDDIR%/htmlhelp.
+	goto end
+if "%1" == "qthelp" (
+	echo.
+	echo.Build finished; now you can run "qcollectiongenerator" with the ^
+.qhcp project file in %BUILDDIR%/qthelp, like this:
+	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\OBITools.qhcp
+	echo.To view the help file:
+	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\OBITools.ghc
+	goto end
+if "%1" == "latex" (
+	echo.
+	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
+	goto end
+if "%1" == "changes" (
+	echo.
+	echo.The overview file is in %BUILDDIR%/changes.
+	goto end
+if "%1" == "linkcheck" (
+	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
+	echo.
+	echo.Link check complete; look for any errors in the above output ^
+or in %BUILDDIR%/linkcheck/output.txt.
+	goto end
+if "%1" == "doctest" (
+	echo.
+	echo.Testing of doctests in the sources finished, look at the ^
+results in %BUILDDIR%/doctest/output.txt.
+	goto end
diff --git a/doc/sphinx/source/annotations.rst b/doc/sphinx/source/annotations.rst
new file mode 100644
index 0000000..b07bf10
--- /dev/null
+++ b/doc/sphinx/source/annotations.rst
@@ -0,0 +1,11 @@
+Sequence annotations
+.. toctree::
+   :maxdepth: 2
+   scripts/ecotag
+   scripts/obiannotate
+   scripts/obiaddtaxids
diff --git a/doc/sphinx/source/attributes.rst b/doc/sphinx/source/attributes.rst
new file mode 100644
index 0000000..595224f
--- /dev/null
+++ b/doc/sphinx/source/attributes.rst
@@ -0,0 +1,128 @@
+The extended OBITools fasta and fastq format
+.. _obitools-fasta:
+The *extended OBITools Fasta format* is a strict :doc:`fasta format file <fasta>`.
+The file in *extended OBITools Fasta format* can be readed by all programs
+reading fasta files.
+Difference between standard and extended fasta is just the structure of the title
+line. For OBITools title line is divided in three parts :
+        - Seqid : the sequence identifier
+        - key=value; : a set of key/value keys
+        - the sequence definition
+    >my_sequence taxid=3456; direct=True; sample=A354; this is my pretty sequence
+Following these rules, the title line can be parsed :
+        - The sequence identifier of this sequence is : *my_sequence* 
+        - Three keys are assigned to this sequence :
+              - Key *taxid* with value *3456*
+              - Key *direct* with value *True*
+              - Key *sample* with value *A354*
+        - The definition of this sequence is this is *my pretty sequence* 
+Values can be any valid python expression. If a key value cannot be evaluated as
+a python expression, it is them assumed as a simple string. Following this rule,
+taxid value is considered as an integer value, direct value as a boolean and sample
+value is not a valid python expression so it is considered as a string value.
+Names reserved for attributes
+The following attribute names are created by some obitools programs and used by others.
+They have a special meaning. So we recommend not to use them with another semantic.
+.. toctree::
+   :maxdepth: 2
+   attributes/ali_dir
+   attributes/ali_length
+   attributes/avg_quality 
+   attributes/best_match 
+   attributes/best_identity 
+   attributes/class 
+   attributes/cluster 
+   attributes/complemented 
+   attributes/count 
+   attributes/cut 
+   attributes/direction 
+   attributes/distance 
+   attributes/error 
+   attributes/experiment 
+   attributes/family 
+   attributes/family_name 
+   attributes/forward_error 
+   attributes/forward_match 
+   attributes/forward_primer 
+   attributes/forward_score 
+   attributes/forward_tag 
+   attributes/forward_tm 
+   attributes/genus 
+   attributes/genus_name 
+   attributes/head_quality 
+   attributes/id_status 
+   attributes/merged_star 
+   attributes/merged 
+   attributes/mid_quality 
+   attributes/mode 
+   attributes/obiclean_cluster 
+   attributes/obiclean_count 
+   attributes/obiclean_head 
+   attributes/obiclean_headcount 
+   attributes/obiclean_internalcount 
+   attributes/obiclean_samplecount 
+   attributes/obiclean_singletoncount
+   attributes/obiclean_status 
+   attributes/occurrence 
+   attributes/order 
+   attributes/order_name 
+   attributes/pairend_limit  
+   attributes/partial  
+   attributes/rank 
+   attributes/reverse_error 
+   attributes/reverse_match 
+   attributes/reverse_primer 
+   attributes/reverse_score 
+   attributes/reverse_tag 
+   attributes/reverse_tm 
+   attributes/sample 
+   attributes/scientific_name 
+   attributes/score
+   attributes/score_norm
+   attributes/select 
+   attributes/seq_ab_match
+   attributes/seq_a_single
+   attributes/seq_a_mismatch
+   attributes/seq_a_deletion
+   attributes/seq_a_insertion
+   attributes/seq_b_single
+   attributes/seq_b_mismatch
+   attributes/seq_b_deletion
+   attributes/seq_b_insertion
+   attributes/seq_length 
+   attributes/seq_length_ori
+   attributes/seq_rank 
+   attributes/sminL 
+   attributes/sminR 
+   attributes/species 
+   attributes/species_list 
+   attributes/species_name 
+   attributes/status 
+   attributes/strand 
+   attributes/tail_quality
+   attributes/taxid
\ No newline at end of file
diff --git a/doc/sphinx/source/attributes/ali_dir.rst b/doc/sphinx/source/attributes/ali_dir.rst
new file mode 100644
index 0000000..99c8fde
--- /dev/null
+++ b/doc/sphinx/source/attributes/ali_dir.rst
@@ -0,0 +1,9 @@
+    Either 'left' or 'right'. Indicates the way the alignment has been done, and especially where
+    the overlapping part is located on the forward read (either its 'right' part, or its
+    'left' part).
+    Attribute added by the program:
+        - :doc:`illuminapairedend <../scripts/illuminapairedend>`
diff --git a/doc/sphinx/source/attributes/ali_length.rst b/doc/sphinx/source/attributes/ali_length.rst
new file mode 100644
index 0000000..1d98f28
--- /dev/null
+++ b/doc/sphinx/source/attributes/ali_length.rst
@@ -0,0 +1,9 @@
+    An integer value indicating the length of the alignment between the two 
+    paired-end reads.
+    Attribute added by the program:
+        - :doc:`illuminapairedend <../scripts/illuminapairedend>`
diff --git a/doc/sphinx/source/attributes/avg_quality.rst b/doc/sphinx/source/attributes/avg_quality.rst
new file mode 100644
index 0000000..60ec6eb
--- /dev/null
+++ b/doc/sphinx/source/attributes/avg_quality.rst
@@ -0,0 +1,18 @@
+    A float value indicating the average quality of the raw sequence.
+    .. note:: 
+       This tag can be used to investigate why sequences have not been assigned to any sample by 
+       :doc:`ngsfilter <../scripts/ngsfilter>`
+    .. seealso:: 
+       - :doc:`head_quality <./head_quality>`
+       - :doc:`mid_quality <./mid_quality>`
+       - :doc:`tail_quality <./tail_quality>`
+    Attribute added by the programs:
+        - :doc:`ngsfilter <../scripts/ngsfilter>`
diff --git a/doc/sphinx/source/attributes/best_identity.rst b/doc/sphinx/source/attributes/best_identity.rst
new file mode 100644
index 0000000..0bfb9dd
--- /dev/null
+++ b/doc/sphinx/source/attributes/best_identity.rst
@@ -0,0 +1,11 @@
+    A float value indicating the alignment score of the best match in the reference database.
+    .. seealso:: 
+       - :doc:`best_match <./best_match>`
+    Attribute added by the program:
+        - :doc:`ecotag <../scripts/ecotag>`
diff --git a/doc/sphinx/source/attributes/best_match.rst b/doc/sphinx/source/attributes/best_match.rst
new file mode 100644
index 0000000..cc847a0
--- /dev/null
+++ b/doc/sphinx/source/attributes/best_match.rst
@@ -0,0 +1,11 @@
+    The sequence *id* of the best match in the reference database.
+    .. seealso:: 
+       - :doc:`best_identity <./best_identity>`
+    Attribute added by the program:
+        - :doc:`ecotag <../scripts/ecotag>`
diff --git a/doc/sphinx/source/attributes/class.rst b/doc/sphinx/source/attributes/class.rst
new file mode 100644
index 0000000..ef718c4
--- /dev/null
+++ b/doc/sphinx/source/attributes/class.rst
@@ -0,0 +1,11 @@
+    A string value indicating the group (more exactly :doc:`sample <./sample>` or 
+    :doc:`taxid <./taxid>`) in which the :doc:`obiselect <../scripts/obiselect>` 
+    program will select sequences. 
+    Attribute added by the programs:
+        - :doc:`obiselect <../scripts/obiselect>`
\ No newline at end of file
diff --git a/doc/sphinx/source/attributes/cluster.rst b/doc/sphinx/source/attributes/cluster.rst
new file mode 100644
index 0000000..55e196f
--- /dev/null
+++ b/doc/sphinx/source/attributes/cluster.rst
@@ -0,0 +1,8 @@
+    A integer value indicating the cluster this sequence belongs to.
+    Attribute added by the programs:
+        - :doc:`obiannotate <../scripts/obiannotate>`
diff --git a/doc/sphinx/source/attributes/complemented.rst b/doc/sphinx/source/attributes/complemented.rst
new file mode 100644
index 0000000..1788000
--- /dev/null
+++ b/doc/sphinx/source/attributes/complemented.rst
@@ -0,0 +1,9 @@
+    A boolean value indicating whether the sequence has been complemented before
+     tag and primer identification.
+    Attribute added by the program:
+        - :doc:`ngsfilter <../scripts/ngsfilter>`
diff --git a/doc/sphinx/source/attributes/count.rst b/doc/sphinx/source/attributes/count.rst
new file mode 100644
index 0000000..1ed31a2
--- /dev/null
+++ b/doc/sphinx/source/attributes/count.rst
@@ -0,0 +1,16 @@
+    An integer value indicating how many times this sequence occurs in the dataset.
+    Attribute added by the programs:
+        - :doc:`obiuniq <../scripts/obiuniq>`
+        - :doc:`obisample <../scripts/obisample>`
+    Attribute used by the programs:
+        - :doc:`ecotag <../scripts/ecotag>`
+        - :doc:`ecotaxspecificity <../scripts/ecotaxspecificity>`
+        - :doc:`obiclean <../scripts/obiclean>`
+        - :doc:`obicount <../scripts/obicount>`
+        - :doc:`obisample <../scripts/obisample>`
+        - :doc:`obistat <../scripts/obistat>`
\ No newline at end of file
diff --git a/doc/sphinx/source/attributes/cut.rst b/doc/sphinx/source/attributes/cut.rst
new file mode 100644
index 0000000..33287de
--- /dev/null
+++ b/doc/sphinx/source/attributes/cut.rst
@@ -0,0 +1,11 @@
+    A list with two integers indicating the beginning and end of the barcode 
+    sequence itself within the raw sequence.
+    Attribute added by the programs:
+        - :doc:`ngsfilter <../scripts/ngsfilter>`
diff --git a/doc/sphinx/source/attributes/direction.rst b/doc/sphinx/source/attributes/direction.rst
new file mode 100644
index 0000000..2093750
--- /dev/null
+++ b/doc/sphinx/source/attributes/direction.rst
@@ -0,0 +1,8 @@
+    Either 'forward' or 'reverse'. Indicates if the primers have been identified on the 'forward' or 
+    'reverse' strand.  
+    Attribute added by the program:
+        - :doc:`ngsfilter <../scripts/ngsfilter>`
diff --git a/doc/sphinx/source/attributes/distance.rst b/doc/sphinx/source/attributes/distance.rst
new file mode 100644
index 0000000..d20161a
--- /dev/null
+++ b/doc/sphinx/source/attributes/distance.rst
@@ -0,0 +1,11 @@
+    The distance between the optimal value and the value computed for this sequence record.
+    .. seealso::
+       - :doc:`select <./select>`
+    Attribute added by the programs:
+        - :doc:`obiselect <../scripts/obiselect>`
diff --git a/doc/sphinx/source/attributes/error.rst b/doc/sphinx/source/attributes/error.rst
new file mode 100644
index 0000000..da8d6a6
--- /dev/null
+++ b/doc/sphinx/source/attributes/error.rst
@@ -0,0 +1,11 @@
+    An integer value corresponding to the number of mismatches between each 
+    primer and its match in the sequence. 
+    Attribute added by the programs:
+        - :doc:`ngsfilter <../scripts/ngsfilter>`
diff --git a/doc/sphinx/source/attributes/experiment.rst b/doc/sphinx/source/attributes/experiment.rst
new file mode 100644
index 0000000..df49872
--- /dev/null
+++ b/doc/sphinx/source/attributes/experiment.rst
@@ -0,0 +1,9 @@
+    A string value indicating the name of the experiment the sequence and sample
+    belong to. This name is mentioned in the first column of the :doc:`ngsfilter <../scripts/ngsfilter>` samples description file.
+    Attribute added by the program:
+        - :doc:`ngsfilter <../scripts/ngsfilter>`
diff --git a/doc/sphinx/source/attributes/family.rst b/doc/sphinx/source/attributes/family.rst
new file mode 100644
index 0000000..c02ddf1
--- /dev/null
+++ b/doc/sphinx/source/attributes/family.rst
@@ -0,0 +1,30 @@
+    An integer value corresponding to the family of the :doc:`taxid <./taxid>` stored into the 
+    :doc:`taxid <taxid>` attribute. If the family is not defined for this :doc:`taxid <./taxid>`, 
+    this value is *None*.
+    .. warning::  This taxonomic information is just added to the sequence for the end-user
+                  convenience and not used by other ``OBITools`` programs as taxonomic information.
+                  Only the taxonomic information included in the :doc:`taxid <taxid>`
+                  attribute is used as taxonomic annotation.
+    .. seealso:: 
+       - :doc:`taxid <./taxid>`
+       - :doc:`scientific_name <./scientific_name>`
+       - :doc:`family_name <./family_name>`
+       - :doc:`genus <./genus>`
+       - :doc:`genus_name <./genus_name>`
+       - :doc:`order <./order>`
+       - :doc:`order_name <./order_name>`
+       - :doc:`species <./species>`
+       - :doc:`species_name <./species_name>`
+    Attribute added by the programs:
+        - :doc:`obiuniq <../scripts/obiuniq>`
+        - :doc:`obiannotate <../scripts/obiannotate>`
\ No newline at end of file
diff --git a/doc/sphinx/source/attributes/family_name.rst b/doc/sphinx/source/attributes/family_name.rst
new file mode 100644
index 0000000..27bcd57
--- /dev/null
+++ b/doc/sphinx/source/attributes/family_name.rst
@@ -0,0 +1,29 @@
+    A string value indicating the family name of the :doc:`taxid <./taxid>` stored into the 
+    :doc:`taxid <taxid>`. If the family is not defined for this :doc:`taxid <./taxid>`,
+    this value is *None*.
+    .. warning::  This taxonomic information is just added to the sequence for the end-user
+                  convenience and not used by other ``OBITools`` programs as taxonomic information.
+                  Only the taxonomic information included in the :doc:`taxid <taxid>`
+                  attribute is used as taxonomic annotation.
+    .. seealso:: 
+       - :doc:`taxid <./taxid>`
+       - :doc:`scientific_name <./scientific_name>`
+       - :doc:`family <./family>`
+       - :doc:`genus <./genus>`
+       - :doc:`genus_name <./genus_name>`
+       - :doc:`order <./order>`
+       - :doc:`order_name <./order_name>`
+       - :doc:`species <./species>`
+       - :doc:`species_name <./species_name>`
+    Attribute added by the programs:
+        - :doc:`obiuniq <../scripts/obiuniq>`
+        - :doc:`obiannotate <../scripts/obiannotate>`
\ No newline at end of file
diff --git a/doc/sphinx/source/attributes/forward_error.rst b/doc/sphinx/source/attributes/forward_error.rst
new file mode 100644
index 0000000..28e0c3b
--- /dev/null
+++ b/doc/sphinx/source/attributes/forward_error.rst
@@ -0,0 +1,13 @@
+    An integer value indicating the number of mismatches between the forward 
+    primer and its match on the sequence under consideration.
+    .. seealso:: 
+       - :doc:`reverse_error <./reverse_error>`
+    Attribute added by the program:
+        - :doc:`obiconvert <../scripts/obiconvert>`
diff --git a/doc/sphinx/source/attributes/forward_match.rst b/doc/sphinx/source/attributes/forward_match.rst
new file mode 100644
index 0000000..1c046e0
--- /dev/null
+++ b/doc/sphinx/source/attributes/forward_match.rst
@@ -0,0 +1,15 @@
+    A string value corresponding to the forward primer match used to identify 
+    the sequence.
+    .. seealso:: 
+       - :doc:`reverse_match <./reverse_match>`
+    Attribute added by the programs:
+        - :doc:`ngsfilter <../scripts/ngsfilter>`
+        - :doc:`obiconvert <../scripts/obiconvert>`
\ No newline at end of file
diff --git a/doc/sphinx/source/attributes/forward_primer.rst b/doc/sphinx/source/attributes/forward_primer.rst
new file mode 100644
index 0000000..4855d35
--- /dev/null
+++ b/doc/sphinx/source/attributes/forward_primer.rst
@@ -0,0 +1,12 @@
+    A string value indicating the forward primer used to obtain the sequence.
+    .. seealso:: 
+       - :doc:`reverse_primer <./reverse_primer>`
+    Attribute added by the program:
+        - :doc:`ngsfilter <../scripts/ngsfilter>`
diff --git a/doc/sphinx/source/attributes/forward_score.rst b/doc/sphinx/source/attributes/forward_score.rst
new file mode 100644
index 0000000..0c334a6
--- /dev/null
+++ b/doc/sphinx/source/attributes/forward_score.rst
@@ -0,0 +1,11 @@
+    A real value indicating the score of the alignment of the 5' primer against the sequence.
+    .. seealso:: 
+       - :doc:`reverse_score <./reverse_score>`
+    Attribute added by the programs:
+        - :doc:`ngsfilter <../scripts/ngsfilter>`
diff --git a/doc/sphinx/source/attributes/forward_tag.rst b/doc/sphinx/source/attributes/forward_tag.rst
new file mode 100644
index 0000000..fe28513
--- /dev/null
+++ b/doc/sphinx/source/attributes/forward_tag.rst
@@ -0,0 +1,14 @@
+    A string value corresponding to the individual tag attached in 5' of the 
+    forward primer and used to assign the sequence to a sample. 
+    .. seealso:: 
+       - :doc:`reverse_tag <./reverse_tag>`
+    Attribute added by the program:
+        - :doc:`ngsfilter <../scripts/ngsfilter>`
\ No newline at end of file
diff --git a/doc/sphinx/source/attributes/forward_tm.rst b/doc/sphinx/source/attributes/forward_tm.rst
new file mode 100644
index 0000000..5ac68fe
--- /dev/null
+++ b/doc/sphinx/source/attributes/forward_tm.rst
@@ -0,0 +1,13 @@
+    A float value indicating the *Tm* of the forward primer match on the 
+    sequence under consideration.
+    .. seealso:: 
+       - :doc:`forward_tm <./forward_tm>`
+    Attribute added by the program:
+        - :doc:`obiconvert <../scripts/obiconvert>`
diff --git a/doc/sphinx/source/attributes/genus.rst b/doc/sphinx/source/attributes/genus.rst
new file mode 100644
index 0000000..2c7929b
--- /dev/null
+++ b/doc/sphinx/source/attributes/genus.rst
@@ -0,0 +1,33 @@
+    An integer value corresponding to the genus of the :doc:`taxid <./taxid>` stored into the 
+    :doc:`taxid <taxid>` attribute. If the genus is not defined for this :doc:`taxid <./taxid>`, 
+    this value is *None*.
+    .. warning::  This taxonomic information is just added to the sequence for the end-user
+                  convenience and not used by other ``OBITools`` programs as taxonomic information.
+                  Only the taxonomic information included in the :doc:`taxid <taxid>`
+                  attribute is used as taxonomic annotation.
+    .. seealso:: 
+       - :doc:`taxid <./taxid>`
+       - :doc:`scientific_name <./scientific_name>`
+       - :doc:`family <./family>`
+       - :doc:`family_name <./family_name>`
+       - :doc:`genus_name <./genus_name>`
+       - :doc:`order <./order>`
+       - :doc:`order_name <./order_name>`
+       - :doc:`species <./species>`
+       - :doc:`species_name <./species_name>`
+    Attribute added by the programs:
+        - :doc:`obiuniq <../scripts/obiuniq>`
+        - :doc:`obiannotate <../scripts/obiannotate>`
\ No newline at end of file
diff --git a/doc/sphinx/source/attributes/genus_name.rst b/doc/sphinx/source/attributes/genus_name.rst
new file mode 100644
index 0000000..f9eb238
--- /dev/null
+++ b/doc/sphinx/source/attributes/genus_name.rst
@@ -0,0 +1,29 @@
+    A string value indicating the genus name of the :doc:`taxid <./taxid>` stored into the 
+    :doc:`taxid <taxid>`. If the genus is not defined for this :doc:`taxid <./taxid>`,
+    this value is *None*.
+    .. warning::  This taxonomic information is just added to the sequence for the end-user
+                  convenience and not used by other ``OBITools`` programs as taxonomic information.
+                  Only the taxonomic information included in the :doc:`taxid <taxid>`
+                  attribute is used as taxonomic annotation.
+    .. seealso:: 
+       - :doc:`taxid <./taxid>`
+       - :doc:`scientific_name <./scientific_name>`
+       - :doc:`family <./family>`
+       - :doc:`family_name <./family_name>`
+       - :doc:`genus <./genus>`
+       - :doc:`order <./order>`
+       - :doc:`order_name <./order_name>`
+       - :doc:`species <./species>`
+       - :doc:`species_name <./species_name>`
+    Attribute added by the programs:
+        - :doc:`obiuniq <../scripts/obiuniq>`
+        - :doc:`obiannotate <../scripts/obiannotate>`
diff --git a/doc/sphinx/source/attributes/head_quality.rst b/doc/sphinx/source/attributes/head_quality.rst
new file mode 100644
index 0000000..0004c63
--- /dev/null
+++ b/doc/sphinx/source/attributes/head_quality.rst
@@ -0,0 +1,18 @@
+    A float value indicating the average quality of the 10 first nucleotide of the raw sequence.
+    .. note:: 
+       This tag can be used to investigate why sequences have not been assigned to any sample by 
+       :doc:`ngsfilter <../scripts/ngsfilter>`
+    .. seealso:: 
+       - :doc:`avg_quality <./avg_quality>`
+       - :doc:`mid_quality <./mid_quality>`
+       - :doc:`tail_quality <./tail_quality>`
+    Attribute added by the programs:
+        - :doc:`ngsfilter <../scripts/ngsfilter>`
diff --git a/doc/sphinx/source/attributes/id_status.rst b/doc/sphinx/source/attributes/id_status.rst
new file mode 100644
index 0000000..4d189a4
--- /dev/null
+++ b/doc/sphinx/source/attributes/id_status.rst
@@ -0,0 +1,8 @@
+    A boolean indicating whether a sequence match above the minimum threshold score has been found in
+    the reference database.
+    Attribute added by the program:
+        - :doc:`ecotag <../scripts/ecotag>`
diff --git a/doc/sphinx/source/attributes/merged.rst b/doc/sphinx/source/attributes/merged.rst
new file mode 100644
index 0000000..8e4be0a
--- /dev/null
+++ b/doc/sphinx/source/attributes/merged.rst
@@ -0,0 +1,9 @@
+    The `merged` key contains all *ids* of a group of sequences.
+    Attribute added by the program:
+        - :doc:`obiuniq <../scripts/obiuniq>`
+        - :doc:`obiselect <../scripts/obiselect>`
diff --git a/doc/sphinx/source/attributes/merged_star.rst b/doc/sphinx/source/attributes/merged_star.rst
new file mode 100644
index 0000000..eeafe40
--- /dev/null
+++ b/doc/sphinx/source/attributes/merged_star.rst
@@ -0,0 +1,14 @@
+    The `merged_*` attribute is built based on another attribute `*` (for example, 
+    `sample`) by the :doc:`obiuniq <../scripts/obiuniq>` program. The value associated to the `merged_*` 
+    attribute is a contingency table summarizing modality frequencies associated to the `*` attribute.
+    For instance, `merged_sample={'X1': 12, 'X2': 10}` means that among the 22 identical sequences merged 
+    by the :doc:`obiuniq <../scripts/obiuniq>`, the `sample` attribute was set 12 and 10 times to the modality 'X1' 
+    and 'X2', respectively.
+    Attribute added by the program:
+        - :doc:`obiuniq <../scripts/obiuniq>`
+        - :doc:`obiselect <../scripts/obiselect>`
diff --git a/doc/sphinx/source/attributes/mid_quality.rst b/doc/sphinx/source/attributes/mid_quality.rst
new file mode 100644
index 0000000..6d58fb1
--- /dev/null
+++ b/doc/sphinx/source/attributes/mid_quality.rst
@@ -0,0 +1,19 @@
+    A float value indicating the average quality of the raw sequence except its first and 
+    last 10 nucleotides.
+    .. note:: 
+       This tag can be used to investigate why sequences have not been assigned to any sample by 
+       doc:`ngsfilter <../scripts/ngsfilter>`
+    .. seealso:: 
+       - :doc:`avg_quality <./avg_quality>`
+       - :doc:`head_quality <./head_quality>`
+       - :doc:`tail_quality <./tail_quality>`
+    Attribute added by the programs:
+        - :doc:`ngsfilter <../scripts/ngsfilter>`
diff --git a/doc/sphinx/source/attributes/mode.rst b/doc/sphinx/source/attributes/mode.rst
new file mode 100644
index 0000000..b767e56
--- /dev/null
+++ b/doc/sphinx/source/attributes/mode.rst
@@ -0,0 +1,11 @@
+    Either 'alignment' or 'joined'. Indicates whether the reported sequence is the consensus of the 
+    aligned reads ('alignment') or just the concatenation of the two reads ('joined').
+    Attribute added by the program:
+        - :doc:`illuminapairedend <../scripts/illuminapairedend>`
\ No newline at end of file
diff --git a/doc/sphinx/source/attributes/obiclean_cluster.rst b/doc/sphinx/source/attributes/obiclean_cluster.rst
new file mode 100644
index 0000000..a614a4a
--- /dev/null
+++ b/doc/sphinx/source/attributes/obiclean_cluster.rst
@@ -0,0 +1,11 @@
+    An associative array indicating to which cluster each sequence is associated in each sample.
+    .. seealso:: 
+       :doc:`obiclean_count <./obiclean_count>`
+    Attribute added by the program:
+        - :doc:`obiclean <../scripts/obiclean>`
diff --git a/doc/sphinx/source/attributes/obiclean_count.rst b/doc/sphinx/source/attributes/obiclean_count.rst
new file mode 100644
index 0000000..3f9cabd
--- /dev/null
+++ b/doc/sphinx/source/attributes/obiclean_count.rst
@@ -0,0 +1,13 @@
+    This attribute is added by :doc:`obiclean <../scripts/obiclean>` only to the sequences corresponding to
+    a cluster centre. It is an associative array indicating for each sample the abundance of the cluster.
+    .. seealso:: 
+       :doc:`obiclean_cluster <./obiclean_cluster>`
+       :doc:`obiclean_head <./obiclean_head>`
+    Attribute added by the program:
+        - :doc:`obiclean <../scripts/obiclean>`
diff --git a/doc/sphinx/source/attributes/obiclean_head.rst b/doc/sphinx/source/attributes/obiclean_head.rst
new file mode 100644
index 0000000..b42b1b9
--- /dev/null
+++ b/doc/sphinx/source/attributes/obiclean_head.rst
@@ -0,0 +1,12 @@
+    A boolean value set to True if the sequence has the `head` status in at least one sample. 
+    .. seealso:: 
+       :doc:`obiclean_cluster <./obiclean_cluster>`
+       :doc:`obiclean_count <./obiclean_count>`
+    Attribute added by the program:
+        - :doc:`obiclean <../scripts/obiclean>`
diff --git a/doc/sphinx/source/attributes/obiclean_headcount.rst b/doc/sphinx/source/attributes/obiclean_headcount.rst
new file mode 100644
index 0000000..cc1ab9f
--- /dev/null
+++ b/doc/sphinx/source/attributes/obiclean_headcount.rst
@@ -0,0 +1,14 @@
+    An integer value indicating the count of sample where the sequence has the `head` status.
+    .. seealso:: 
+       :doc:`obiclean_singletoncount <./obiclean_singletoncount>`
+       :doc:`obiclean_internalcount <./obiclean_internalcount>`
+       :doc:`obiclean_samplecount <./obiclean_samplecount>`
+       :doc:`obiclean_status <./obiclean_status>`
+    Attribute added by the program:
+        - :doc:`obiclean <../scripts/obiclean>`
diff --git a/doc/sphinx/source/attributes/obiclean_internalcount.rst b/doc/sphinx/source/attributes/obiclean_internalcount.rst
new file mode 100644
index 0000000..04e6c12
--- /dev/null
+++ b/doc/sphinx/source/attributes/obiclean_internalcount.rst
@@ -0,0 +1,14 @@
+    An integer value indicating the count of sample where the sequence has the `internal` status.
+    .. seealso:: 
+       :doc:`obiclean_headcount <./obiclean_headcount>`
+       :doc:`obiclean_singletoncount <./obiclean_singletoncount>`
+       :doc:`obiclean_samplecount <./obiclean_samplecount>`
+       :doc:`obiclean_status <./obiclean_status>`
+    Attribute added by the program:
+        - :doc:`obiclean <../scripts/obiclean>`
diff --git a/doc/sphinx/source/attributes/obiclean_samplecount.rst b/doc/sphinx/source/attributes/obiclean_samplecount.rst
new file mode 100644
index 0000000..6979b95
--- /dev/null
+++ b/doc/sphinx/source/attributes/obiclean_samplecount.rst
@@ -0,0 +1,13 @@
+    An integer value indicating the count of sample where the sequence is observed.
+    .. seealso:: 
+       :doc:`obiclean_headcount <./obiclean_headcount>`
+       :doc:`obiclean_internalcount <./obiclean_internalcount>`
+       :doc:`obiclean_singletoncount <./obiclean_singletoncount>`
+    Attribute added by the program:
+        - :doc:`obiclean <../scripts/obiclean>`
diff --git a/doc/sphinx/source/attributes/obiclean_singletoncount.rst b/doc/sphinx/source/attributes/obiclean_singletoncount.rst
new file mode 100644
index 0000000..f979887
--- /dev/null
+++ b/doc/sphinx/source/attributes/obiclean_singletoncount.rst
@@ -0,0 +1,14 @@
+    An integer value indicating the count of sample where the sequence has the `singleton` status.
+    .. seealso:: 
+       :doc:`obiclean_headcount <./obiclean_headcount>`
+       :doc:`obiclean_internalcount <./obiclean_internalcount>`
+       :doc:`obiclean_samplecount <./obiclean_samplecount>`
+       :doc:`obiclean_status <./obiclean_status>`
+    Attribute added by the program:
+        - :doc:`obiclean <../scripts/obiclean>`
diff --git a/doc/sphinx/source/attributes/obiclean_status.rst b/doc/sphinx/source/attributes/obiclean_status.rst
new file mode 100644
index 0000000..e66bd42
--- /dev/null
+++ b/doc/sphinx/source/attributes/obiclean_status.rst
@@ -0,0 +1,14 @@
+    An associative array storing the status of the sequence `h` (head), `i` (internal) or
+    `s` (singleton) in each sample
+    .. seealso:: 
+       :doc:`obiclean_headcount <./obiclean_headcount>`
+       :doc:`obiclean_singletoncount <./obiclean_singletoncount>`
+       :doc:`obiclean_internalcount <./obiclean_internalcount>`
+    Attribute added by the program:
+        - :doc:`obiclean <../scripts/obiclean>`
diff --git a/doc/sphinx/source/attributes/occurrence.rst b/doc/sphinx/source/attributes/occurrence.rst
new file mode 100644
index 0000000..14cd1fe
--- /dev/null
+++ b/doc/sphinx/source/attributes/occurrence.rst
@@ -0,0 +1,9 @@
+    An integer value indicating the number of samples in which the sequence has 
+    been observed at least once.
+    Attribute added by the program:
+        - :doc:`obiclean <../scripts/obiclean>`
diff --git a/doc/sphinx/source/attributes/order.rst b/doc/sphinx/source/attributes/order.rst
new file mode 100644
index 0000000..f5bb882
--- /dev/null
+++ b/doc/sphinx/source/attributes/order.rst
@@ -0,0 +1,30 @@
+    An integer value corresponding to the order of the :doc:`taxid <./taxid>` stored into the 
+    :doc:`taxid <taxid>` attribute. If the order is not defined for this :doc:`taxid <./taxid>`, 
+    this value is *None*.
+    .. warning::  This taxonomic information is just added to the sequence for the end-user
+                  convenience and not used by other ``OBITools`` programs as taxonomic information.
+                  Only the taxonomic information included in the :doc:`taxid <taxid>`
+                  attribute is used as taxonomic annotation.
+    .. seealso:: 
+       - :doc:`taxid <./taxid>`
+       - :doc:`scientific_name <./scientific_name>`
+       - :doc:`family <./family>`
+       - :doc:`family_name <./family_name>`
+       - :doc:`genus <./genus>`
+       - :doc:`genus_name <./genus_name>`
+       - :doc:`order_name <./order_name>`
+       - :doc:`species <./species>`
+       - :doc:`species_name <./species_name>`
+    Attribute added by the programs:
+        - :doc:`obiuniq <../scripts/obiuniq>`
+        - :doc:`obiannotate <../scripts/obiannotate>`
diff --git a/doc/sphinx/source/attributes/order_name.rst b/doc/sphinx/source/attributes/order_name.rst
new file mode 100644
index 0000000..85493bc
--- /dev/null
+++ b/doc/sphinx/source/attributes/order_name.rst
@@ -0,0 +1,29 @@
+    A string value indicating the order name of the :doc:`taxid <./taxid>` stored into the 
+    :doc:`taxid <taxid>`. If the order is not defined for this :doc:`taxid <./taxid>`,
+    this value is *None*.
+    .. warning::  This taxonomic information is just added to the sequence for the end-user
+                  convenience and not used by other ``OBITools`` programs as taxonomic information.
+                  Only the taxonomic information included in the :doc:`taxid <taxid>`
+                  attribute is used as taxonomic annotation.
+    .. seealso:: 
+       - :doc:`taxid <./taxid>`
+       - :doc:`scientific_name <./scientific_name>`
+       - :doc:`family <./family>`
+       - :doc:`family_name <./family_name>`
+       - :doc:`genus <./genus>`
+       - :doc:`genus_name <./genus_name>`
+       - :doc:`order <./order>`
+       - :doc:`species <./species>`
+       - :doc:`species_name <./species_name>`
+    Attribute added by the programs:
+        - :doc:`obiuniq <../scripts/obiuniq>`
+        - :doc:`obiannotate <../scripts/obiannotate>`
diff --git a/doc/sphinx/source/attributes/pairend_limit.rst b/doc/sphinx/source/attributes/pairend_limit.rst
new file mode 100644
index 0000000..7829224
--- /dev/null
+++ b/doc/sphinx/source/attributes/pairend_limit.rst
@@ -0,0 +1,13 @@
+    Indicates, when the reported sequence is the concatenation of the two reads (mode='joined'), 
+    the position in the reported sequence where the second read starts.
+    .. seealso:: 
+       - :doc:`mode <./mode>`
+    Attribute added by the program:
+        - :doc:`illuminapairedend <../scripts/illuminapairedend>`
diff --git a/doc/sphinx/source/attributes/partial.rst b/doc/sphinx/source/attributes/partial.rst
new file mode 100644
index 0000000..069ceeb
--- /dev/null
+++ b/doc/sphinx/source/attributes/partial.rst
@@ -0,0 +1,11 @@
+    A boolean value indicating whether both sample tags and both primers have 
+    been identified on both extremities of the sequence, more exactly whether we expect 
+    the sequence to be partially sequenced (**partial=True;**) or completely sequenced 
+    (**partial=False;**).
+    Attribute added by the program:
+        - :doc:`ngsfilter <../scripts/ngsfilter>`
diff --git a/doc/sphinx/source/attributes/rank.rst b/doc/sphinx/source/attributes/rank.rst
new file mode 100644
index 0000000..3ee8221
--- /dev/null
+++ b/doc/sphinx/source/attributes/rank.rst
@@ -0,0 +1,15 @@
+    A string value corresponding to the lowest taxonomic rank of :doc:`taxid <./taxid>` stored 
+    into the :doc:`taxid <taxid>` attribute.
+    .. warning::  This taxonomic information is just added to the sequence for the end-user
+          convenience and not used by other ``obitools`` programs as taxonomic information.
+          Only the taxonomic information included in the :doc:`taxid <../attributes/taxid>`
+          attribute is used as taxonomic annotation.
+    Attribute added by the programs:
+        - :doc:`obiuniq <../scripts/obiuniq>`
+        - :doc:`obiannotate <../scripts/obiannotate>`
diff --git a/doc/sphinx/source/attributes/reverse_error.rst b/doc/sphinx/source/attributes/reverse_error.rst
new file mode 100644
index 0000000..7efc2e5
--- /dev/null
+++ b/doc/sphinx/source/attributes/reverse_error.rst
@@ -0,0 +1,13 @@
+    An integer value indicating the number of mismatches between the reverse 
+    primer and its match on the sequence under consideration.
+    .. seealso:: 
+       - :doc:`forward_error <./forward_error>`
+    Attribute added by the programs:
+       - :doc:`obiconvert <../scripts/obiconvert>`
diff --git a/doc/sphinx/source/attributes/reverse_match.rst b/doc/sphinx/source/attributes/reverse_match.rst
new file mode 100644
index 0000000..10154d4
--- /dev/null
+++ b/doc/sphinx/source/attributes/reverse_match.rst
@@ -0,0 +1,15 @@
+    A string value corresponding to the reverse primer match used to identify 
+    the sequence.
+    .. seealso:: 
+       - :doc:`reverse_match <./reverse_match>`
+    Attribute added by the programs:
+       - :doc:`ngsfilter <../scripts/ngsfilter>`
+       - :doc:`obiconvert <../scripts/obiconvert>`
\ No newline at end of file
diff --git a/doc/sphinx/source/attributes/reverse_primer.rst b/doc/sphinx/source/attributes/reverse_primer.rst
new file mode 100644
index 0000000..962eae2
--- /dev/null
+++ b/doc/sphinx/source/attributes/reverse_primer.rst
@@ -0,0 +1,13 @@
+    A string value indicating the reverse primer used to obtain the sequence.
+    .. seealso:: 
+       - :doc:`forward_primer <./forward_primer>`
+    Attribute added by the program:
+       - :doc:`ngsfilter <../scripts/ngsfilter>`
\ No newline at end of file
diff --git a/doc/sphinx/source/attributes/reverse_score.rst b/doc/sphinx/source/attributes/reverse_score.rst
new file mode 100644
index 0000000..6dd9b62
--- /dev/null
+++ b/doc/sphinx/source/attributes/reverse_score.rst
@@ -0,0 +1,11 @@
+    A real value indicating the score of the alignment of the 3' primer against the sequence.
+    .. seealso:: 
+       - :doc:`forward_score <./forward_score>`
+    Attribute added by the programs:
+       - :doc:`ngsfilter <../scripts/ngsfilter>`
diff --git a/doc/sphinx/source/attributes/reverse_tag.rst b/doc/sphinx/source/attributes/reverse_tag.rst
new file mode 100644
index 0000000..aec33b6
--- /dev/null
+++ b/doc/sphinx/source/attributes/reverse_tag.rst
@@ -0,0 +1,13 @@
+    A string value corresponding to the individual tag attached in 5' of the 
+    reverse primer and used to assign the sequence to a sample. 
+    .. seealso:: 
+       - :doc:`forward_tag <./forward_tag>`
+    Attribute added by the program:
+       - :doc:`ngsfilter <../scripts/ngsfilter>`
diff --git a/doc/sphinx/source/attributes/reverse_tm.rst b/doc/sphinx/source/attributes/reverse_tm.rst
new file mode 100644
index 0000000..3292690
--- /dev/null
+++ b/doc/sphinx/source/attributes/reverse_tm.rst
@@ -0,0 +1,13 @@
+    A float value indicating the *Tm* of the reverse primer match on the 
+    sequence under consideration.
+    .. seealso:: 
+       - :doc:`forward_tm <./forward_tm>`
+    Attribute added by the program:
+       - :doc:`obiconvert <../scripts/obiconvert>`
diff --git a/doc/sphinx/source/attributes/sample.rst b/doc/sphinx/source/attributes/sample.rst
new file mode 100644
index 0000000..11f50c8
--- /dev/null
+++ b/doc/sphinx/source/attributes/sample.rst
@@ -0,0 +1,10 @@
+    A string value indicating the name of the sample the sequence belongs to. 
+    This name is mentioned in the second column of the :doc:`ngsfilter <../scripts/ngsfilter>` 
+    samples description file.
+    Attribute added by the program:
+        - :doc:`ngsfilter <../scripts/ngsfilter>`
diff --git a/doc/sphinx/source/attributes/scientific_name.rst b/doc/sphinx/source/attributes/scientific_name.rst
new file mode 100644
index 0000000..53fd77a
--- /dev/null
+++ b/doc/sphinx/source/attributes/scientific_name.rst
@@ -0,0 +1,28 @@
+    A string value indicating the scientific name corresponding to the :doc:`taxid <./taxid>` stored
+    into the :doc:`taxid <taxid>` attribute. 
+    .. warning::  This taxonomic information is just added to the sequence for the end-user
+                  convenience and not used by other ``OBITools`` programs as taxonomic information.
+                  Only the taxonomic information included in the :doc:`taxid <../attributes/taxid>`
+                  attribute is used as taxonomic annotation.
+     .. seealso:: 
+       - :doc:`taxid <./taxid>`
+       - :doc:`family <./family>`
+       - :doc:`family_name <./family_name>`
+       - :doc:`genus <./genus>`
+       - :doc:`genus_name <./genus_name>`
+       - :doc:`order <./order>`
+       - :doc:`order_name <./order_name>`
+       - :doc:`species <./species>`
+       - :doc:`species_name <./species_name>`
+    Attribute added by the programs:
+        - :doc:`obiuniq <../scripts/obiuniq>`
+        - :doc:`obiannotate <../scripts/obiannotate>`
diff --git a/doc/sphinx/source/attributes/score.rst b/doc/sphinx/source/attributes/score.rst
new file mode 100644
index 0000000..ffdd488
--- /dev/null
+++ b/doc/sphinx/source/attributes/score.rst
@@ -0,0 +1,8 @@
+    A real value computed based on the alignment of two paired-end reads.
+    Attribute added by the program:
+        - :doc:`illuminapairedend <../scripts/illuminapairedend>`
diff --git a/doc/sphinx/source/attributes/score_norm.rst b/doc/sphinx/source/attributes/score_norm.rst
new file mode 100644
index 0000000..3be2b9c
--- /dev/null
+++ b/doc/sphinx/source/attributes/score_norm.rst
@@ -0,0 +1,8 @@
+    A real value computed based on the alignment score divided by the alignment length.
+    Attribute added by the program:
+        - :doc:`illuminapairedend <../scripts/illuminapairedend>`
diff --git a/doc/sphinx/source/attributes/select.rst b/doc/sphinx/source/attributes/select.rst
new file mode 100644
index 0000000..f18fac3
--- /dev/null
+++ b/doc/sphinx/source/attributes/select.rst
@@ -0,0 +1,11 @@
+    The value evaluated for this sequence record.
+    .. seealso::
+       - :doc:`distance <./distance>`
+    Attribute added by the programs:
+       - :doc:`obiselect <../scripts/obiselect>`
diff --git a/doc/sphinx/source/attributes/seq_a_deletion.rst b/doc/sphinx/source/attributes/seq_a_deletion.rst
new file mode 100644
index 0000000..b8e334e
--- /dev/null
+++ b/doc/sphinx/source/attributes/seq_a_deletion.rst
@@ -0,0 +1,8 @@
+    Integer value indicating the number of deletions between the first 
+    read and the consensus sequence in the aligned part.
+    Attribute added by the program:
+        - :doc:`illuminapairedend <../scripts/illuminapairedend>`
diff --git a/doc/sphinx/source/attributes/seq_a_insertion.rst b/doc/sphinx/source/attributes/seq_a_insertion.rst
new file mode 100644
index 0000000..9864fe7
--- /dev/null
+++ b/doc/sphinx/source/attributes/seq_a_insertion.rst
@@ -0,0 +1,8 @@
+    Integer value indicating the number of insertions between the first 
+    read and the consensus sequence in the aligned part.
+    Attribute added by the program:
+        - :doc:`illuminapairedend <../scripts/illuminapairedend>`
diff --git a/doc/sphinx/source/attributes/seq_a_mismatch.rst b/doc/sphinx/source/attributes/seq_a_mismatch.rst
new file mode 100644
index 0000000..e230750
--- /dev/null
+++ b/doc/sphinx/source/attributes/seq_a_mismatch.rst
@@ -0,0 +1,8 @@
+    Integer value indicating the number of mismatches between the first 
+    read and the consensus sequence in the aligned part.
+    Attribute added by the program:
+        - :doc:`illuminapairedend <../scripts/illuminapairedend>`
diff --git a/doc/sphinx/source/attributes/seq_a_single.rst b/doc/sphinx/source/attributes/seq_a_single.rst
new file mode 100644
index 0000000..add9b42
--- /dev/null
+++ b/doc/sphinx/source/attributes/seq_a_single.rst
@@ -0,0 +1,9 @@
+    Integer value indicating the number of nucleotides of the first read 
+    that belong to the consensus sequence and were not aligned with the 
+    second read.
+    Attribute added by the program:
+        - :doc:`illuminapairedend <../scripts/illuminapairedend>`
diff --git a/doc/sphinx/source/attributes/seq_ab_match.rst b/doc/sphinx/source/attributes/seq_ab_match.rst
new file mode 100644
index 0000000..63db8b9
--- /dev/null
+++ b/doc/sphinx/source/attributes/seq_ab_match.rst
@@ -0,0 +1,7 @@
+    Integer value indicating the number of matches in the aligned part. 
+    Attribute added by the program:
+        - :doc:`illuminapairedend <../scripts/illuminapairedend>`
diff --git a/doc/sphinx/source/attributes/seq_b_deletion.rst b/doc/sphinx/source/attributes/seq_b_deletion.rst
new file mode 100644
index 0000000..ae48c14
--- /dev/null
+++ b/doc/sphinx/source/attributes/seq_b_deletion.rst
@@ -0,0 +1,8 @@
+    Integer value indicating the number of deletions between the second 
+    read and the consensus sequence in the aligned part.
+    Attribute added by the program:
+        - :doc:`illuminapairedend <../scripts/illuminapairedend>`
diff --git a/doc/sphinx/source/attributes/seq_b_insertion.rst b/doc/sphinx/source/attributes/seq_b_insertion.rst
new file mode 100644
index 0000000..1db9bec
--- /dev/null
+++ b/doc/sphinx/source/attributes/seq_b_insertion.rst
@@ -0,0 +1,8 @@
+    Integer value indicating the number of insertions between the second 
+    read and the consensus sequence in the aligned part.
+    Attribute added by the program:
+        - :doc:`illuminapairedend <../scripts/illuminapairedend>`
diff --git a/doc/sphinx/source/attributes/seq_b_mismatch.rst b/doc/sphinx/source/attributes/seq_b_mismatch.rst
new file mode 100644
index 0000000..8c3f8ff
--- /dev/null
+++ b/doc/sphinx/source/attributes/seq_b_mismatch.rst
@@ -0,0 +1,8 @@
+    Integer value indicating the number of mismatches between the second 
+    read and the consensus sequence in the aligned part.
+    Attribute added by the program:
+        - :doc:`illuminapairedend <../scripts/illuminapairedend>`
diff --git a/doc/sphinx/source/attributes/seq_b_single.rst b/doc/sphinx/source/attributes/seq_b_single.rst
new file mode 100644
index 0000000..4a4d5f9
--- /dev/null
+++ b/doc/sphinx/source/attributes/seq_b_single.rst
@@ -0,0 +1,9 @@
+    Integer value indicating the number of nucleotides of the second read 
+    that belong to the consensus sequence and were not aligned with the 
+    first read.
+    Attribute added by the program:
+        - :doc:`illuminapairedend <../scripts/illuminapairedend>`
diff --git a/doc/sphinx/source/attributes/seq_length.rst b/doc/sphinx/source/attributes/seq_length.rst
new file mode 100644
index 0000000..76c3ffa
--- /dev/null
+++ b/doc/sphinx/source/attributes/seq_length.rst
@@ -0,0 +1,10 @@
+    A integer value indicating the length of the sequence.
+    Attribute added by the programs:
+        - :doc:`ngsfilter <../scripts/ngsfilter>`
+        - :doc:`obiannotate <../scripts/obiannotate>`
\ No newline at end of file
diff --git a/doc/sphinx/source/attributes/seq_length_ori.rst b/doc/sphinx/source/attributes/seq_length_ori.rst
new file mode 100644
index 0000000..57fdbec
--- /dev/null
+++ b/doc/sphinx/source/attributes/seq_length_ori.rst
@@ -0,0 +1,9 @@
+    An integer value indicating the length of the sequence before tag and primer
+    removal.
+    Attribute added by the program:
+        - :doc:`ngsfilter <../scripts/ngsfilter>`
diff --git a/doc/sphinx/source/attributes/seq_rank.rst b/doc/sphinx/source/attributes/seq_rank.rst
new file mode 100644
index 0000000..c35933c
--- /dev/null
+++ b/doc/sphinx/source/attributes/seq_rank.rst
@@ -0,0 +1,9 @@
+    An integer value indicating the rank of the sequence in the file.
+    Attribute added by the programs:
+        - :doc:`obiannotate <../scripts/obiannotate>`
\ No newline at end of file
diff --git a/doc/sphinx/source/attributes/sminL.rst b/doc/sphinx/source/attributes/sminL.rst
new file mode 100644
index 0000000..b7e6616
--- /dev/null
+++ b/doc/sphinx/source/attributes/sminL.rst
@@ -0,0 +1,8 @@
+    A real value corresponding to the minimum score (specified with the ``--score-min`` option) above 
+    which ``left`` alignment are discarded.
+    Attribute added by the programs:
+        - :doc:`illuminapairedend <../scripts/illuminapairedend>`
diff --git a/doc/sphinx/source/attributes/sminR.rst b/doc/sphinx/source/attributes/sminR.rst
new file mode 100644
index 0000000..775b45c
--- /dev/null
+++ b/doc/sphinx/source/attributes/sminR.rst
@@ -0,0 +1,8 @@
+    A real value corresponding to the minimum score (specified with the ``--score-min`` option) above 
+    which ``right`` alignment are discarded.
+    Attribute added by the programs:
+        - :doc:`illuminapairedend <../scripts/illuminapairedend>`
diff --git a/doc/sphinx/source/attributes/species.rst b/doc/sphinx/source/attributes/species.rst
new file mode 100644
index 0000000..3cea8bd
--- /dev/null
+++ b/doc/sphinx/source/attributes/species.rst
@@ -0,0 +1,32 @@
+    An integer value corresponding to the species of the :doc:`taxid <./taxid>` stored into the 
+    :doc:`taxid <taxid>` attribute. If the species is not defined for this :doc:`taxid <./taxid>`, 
+    this value is *None*.
+    .. warning::  This taxonomic information is just added to the sequence for the end-user
+                  convenience and not used by other ``OBITools`` programs as taxonomic information.
+                  Only the taxonomic information included in the :doc:`taxid <taxid>`
+                  attribute is used as taxonomic annotation.
+    .. seealso:: 
+       - :doc:`taxid <./taxid>`
+       - :doc:`scientific_name <./scientific_name>`
+       - :doc:`family <./family>`
+       - :doc:`family_name <./family_name>`
+       - :doc:`genus <./genus>`
+       - :doc:`genus_name <./genus_name>`
+       - :doc:`order <./order>`
+       - :doc:`order_name <./order_name>`
+       - :doc:`species_name <./species_name>`
+    Attribute added by the programs:
+        - :doc:`obiuniq <../scripts/obiuniq>`
+        - :doc:`obiannotate <../scripts/obiannotate>`
\ No newline at end of file
diff --git a/doc/sphinx/source/attributes/species_list.rst b/doc/sphinx/source/attributes/species_list.rst
new file mode 100644
index 0000000..35f22f3
--- /dev/null
+++ b/doc/sphinx/source/attributes/species_list.rst
@@ -0,0 +1,12 @@
+    A list of strings corresponding to the species scientific names which are under the 
+    assigned :doc:`taxid <./taxid>` (when the list becomes too long, the list is empty).
+    .. warning:: 
+       This list should not be used for assignment purposes.
+    Attribute added by the program:
+        - :doc:`ecotag <../scripts/ecotag>`
diff --git a/doc/sphinx/source/attributes/species_name.rst b/doc/sphinx/source/attributes/species_name.rst
new file mode 100644
index 0000000..3e442d8
--- /dev/null
+++ b/doc/sphinx/source/attributes/species_name.rst
@@ -0,0 +1,31 @@
+    A string value indicating the species scientific name of the :doc:`taxid <./taxid>` stored 
+    into the :doc:`taxid <taxid>`. If the species is not defined for this :doc:`taxid <./taxid>`,
+    this value is *None*.
+    .. warning::  This taxonomic information is just added to the sequence for the end-user
+                  convenience and not used by other ``OBITools`` programs as taxonomic information.
+                  Only the taxonomic information included in the :doc:`taxid <taxid>`
+                  attribute is used as taxonomic annotation.
+    .. seealso:: 
+       - :doc:`taxid <./taxid>`
+       - :doc:`scientific_name <./scientific_name>`
+       - :doc:`family <./family>`
+       - :doc:`family_name <./family_name>`
+       - :doc:`genus <./genus>`
+       - :doc:`genus_name <./genus_name>`
+       - :doc:`order <./order>`
+       - :doc:`order_name <./order_name>`
+       - :doc:`species <./species>`
+    Attribute added by the programs:
+        - :doc:`obiuniq <../scripts/obiuniq>`
+        - :doc:`obiannotate <../scripts/obiannotate>`
\ No newline at end of file
diff --git a/doc/sphinx/source/attributes/status.rst b/doc/sphinx/source/attributes/status.rst
new file mode 100644
index 0000000..add86ac
--- /dev/null
+++ b/doc/sphinx/source/attributes/status.rst
@@ -0,0 +1,10 @@
+    Either *full* if the amplicon has been sequenced entirely, or *partial* if not.
+    Attribute added by the programs:
+        - :doc:`ngsfilter <../scripts/ngsfilter>`
diff --git a/doc/sphinx/source/attributes/strand.rst b/doc/sphinx/source/attributes/strand.rst
new file mode 100644
index 0000000..8831fd4
--- /dev/null
+++ b/doc/sphinx/source/attributes/strand.rst
@@ -0,0 +1,10 @@
+    A string value indicating whether the sequence was amplified directly from 
+    the reference (strand=D) or from its reverse complement (strand=R).
+    Attribute added by the programs:
+        - :doc:`obiconvert <../scripts/obiconvert>`
\ No newline at end of file
diff --git a/doc/sphinx/source/attributes/tail_quality.rst b/doc/sphinx/source/attributes/tail_quality.rst
new file mode 100644
index 0000000..4cc944c
--- /dev/null
+++ b/doc/sphinx/source/attributes/tail_quality.rst
@@ -0,0 +1,18 @@
+    A float value indicating the average quality of the 10 last nucleotides of the barcode.
+    .. note:: 
+       This tag can be used to investigate why sequences have not been assigned to any sample by 
+       :`ngsfilter <../scripts/ngsfilter>`
+    .. seealso:: 
+       - :doc:`avg_quality <./avg_quality>`
+       - :doc:`head_quality <./head_quality>`
+       - :doc:`mid_quality <./mid_quality>`
+    Attribute added by the programs:
+       - :doc:`ngsfilter <../scripts/ngsfilter>`
diff --git a/doc/sphinx/source/attributes/taxid.rst b/doc/sphinx/source/attributes/taxid.rst
new file mode 100644
index 0000000..067d907
--- /dev/null
+++ b/doc/sphinx/source/attributes/taxid.rst
@@ -0,0 +1,30 @@
+    An integer referring unambiguously to one taxon in the taxonomic associated database.
+    Attribute added by the programs:
+        - :doc:`ecotag <../scripts/ecotag>`
+        - :doc:`ecopcr <../scripts/ecotag>`
+        - :doc:`obiaddtaxids <../scripts/obiaddtaxids>`
+    .. seealso:: 
+       - :doc:`scientific_name <./scientific_name>`
+       - :doc:`family <./family>`
+       - :doc:`family_name <./family_name>`
+       - :doc:`genus <./genus>`
+       - :doc:`genus_name <./genus_name>`
+       - :doc:`order <./order>`
+       - :doc:`order_name <./order_name>`
+       - :doc:`species <./species>`
+       - :doc:`species_name <./species_name>`
+    Attribute used by the programs:
+        - :doc:`obiselect <../scripts/ecotag>`
+        - :doc:`obiannotate <../scripts/obiannotate>`
+        - :doc:`ecodbtaxstat <../scripts/ecodbtaxstat>`
+        - :doc:`ecotaxspecificity <../scripts/ecotaxspecificity>`
+        - :doc:`obiuniq <../scripts/obiuniq>`
\ No newline at end of file
diff --git a/doc/sphinx/source/barcodes.rst b/doc/sphinx/source/barcodes.rst
new file mode 100644
index 0000000..eacd41b
--- /dev/null
+++ b/doc/sphinx/source/barcodes.rst
@@ -0,0 +1,11 @@
+Metabarcode design and quality assessment
+.. toctree::
+   :maxdepth: 2
+   scripts/ecoPCR
+   scripts/ecoPrimers
+   scripts/ecotaxstat
+   scripts/ecotaxspecificity
diff --git a/doc/sphinx/source/conf.py b/doc/sphinx/source/conf.py
new file mode 100644
index 0000000..5afacf8
--- /dev/null
+++ b/doc/sphinx/source/conf.py
@@ -0,0 +1,262 @@
+# -*- coding: utf-8 -*-
+# OBITools documentation build configuration file, created by
+# sphinx-quickstart on Tue Dec  8 21:30:02 2009.
+# This file is execfile()d with the current directory set to its containing dir.
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+import sys, os
+import glob
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+sys.path.insert(0, os.path.abspath('../../../%s' % build_dir))
+sys.path.insert(0, os.path.abspath('../../../build/raw_scripts'))
+sys.path.insert(0, os.path.abspath('../sphinxext'))
+import obitools
+import obitools.version
+# Add any Sphinx extension module names here, as strings. They can
+# be extensions coming with Sphinx (named 'sphinx.ext.*') or your
+# custom ones.
+# -- General configuration -----------------------------------------------------
+# Add any Sphinx extension module names here, as strings. They can be extensions
+# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 
+              'sphinx.ext.coverage', 'sphinx.ext.viewcode',
+              'sphinx.ext.graphviz', 'sphinx.ext.inheritance_diagram',
+              'sphinx.ext.pngmath',
+#              'matplotlib.sphinxext.mathmpl',
+#              'matplotlib.sphinxext.only_directives',
+#              'matplotlib.sphinxext.plot_directive',
+#              'matplotlib.sphinxext.ipython_directive',
+              'sphinx.ext.doctest',
+              'ipython_console_highlighting',
+              'numpydoc']
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+# The suffix of source filenames.
+source_suffix = '.rst'
+# The encoding of source files.
+#source_encoding = 'utf-8'
+source_encoding = 'latin1'
+# The master toctree document.
+master_doc = 'index'
+# General information about the project.
+project = u'OBITools'
+copyright = u'2009 - 2015, OBITool Development Team'
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+# The short X.Y version.
+version = obitools.version.version
+# The full version, including alpha/beta/rc tags.
+release = obitools.version.version
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+# List of documents that shouldn't be included in the build.
+#unused_docs = []
+# List of directories, relative to source directory, that shouldn't be searched
+# for source files.
+exclude_trees = []
+# The reST default role (used for this markup: `text`) to use for all documents.
+#default_role = None
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = ['obitools.']
+# -- Options for HTML output ---------------------------------------------------
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = 'nature'
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#html_short_title = None
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+# If false, no module index is generated.
+#html_use_modindex = True
+# If false, no index is generated.
+#html_use_index = True
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = ''
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'OBIToolsdoc'
+# -- Options for LaTeX output --------------------------------------------------
+latex_elements = {'papersize' : 'a4paper',
+                  'pointsize' : '11pts'}
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass [howto/manual]).
+latex_documents = [
+  ('index', 'OBITools.tex', u'OBITools Documentation',
+   u'OBITools Development Team', 'manual'),
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+latex_logo = 'OBITools.png'
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+latex_use_parts = True
+# Additional stuff for the LaTeX preamble.
+#latex_preamble = ''
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+# If false, no module index is generated.
+#latex_use_modindex = True
+inheritance_graph_attrs = dict(rankdir="TB", size='"5.0, 6.0"',
+                               fontsize=12, ratio='compress')
+inheritance_node_attrs = dict(fontsize=12, height=0.5,
+                              color='dodgerblue1', style='filled')
+# Subclass the latex formatter to reduce font size of code examples
+from sphinx.highlighting import PygmentsBridge
+from pygments.formatters.latex import LatexFormatter
+class CustomLatexFormatter(LatexFormatter):
+    def __init__(self, **options):
+        super(CustomLatexFormatter, self).__init__(**options)
+        self.verboptions = r"formatcom=\footnotesize"
+PygmentsBridge.latex_formatter = CustomLatexFormatter
+#--option for epub format ----------------------------------
+epub_author=u"OBITools Development Team"
+#--options for the man format -------------------------------
+man_pages = [] 
+for f in glob.glob('scripts/*.rst'): 
+    man_pages.append(( 
+        f[:-4],                    # source file (no extension) 
+        os.path.split(f[:-4])[1],  # output file (under output dir) 
+        'description of %s' % os.path.split(f[:-4])[1],   # description 
+        'The OBITools Development Team - LECA',               # author 
+        1,                         # section 
+    )) 
diff --git a/doc/sphinx/source/conversions.rst b/doc/sphinx/source/conversions.rst
new file mode 100644
index 0000000..14141a1
--- /dev/null
+++ b/doc/sphinx/source/conversions.rst
@@ -0,0 +1,11 @@
+File format conversions
+.. toctree::
+   :maxdepth: 2
+   scripts/obiconvert
+   scripts/obipr2
+   scripts/obisilva
+   scripts/obitaxonomy
+   scripts/obitab
\ No newline at end of file
diff --git a/doc/sphinx/source/embl.rst b/doc/sphinx/source/embl.rst
new file mode 100644
index 0000000..baf3910
--- /dev/null
+++ b/doc/sphinx/source/embl.rst
@@ -0,0 +1,2 @@
+The EMBL sequence format
\ No newline at end of file
diff --git a/doc/sphinx/source/fasta.rst b/doc/sphinx/source/fasta.rst
new file mode 100644
index 0000000..f17de3b
--- /dev/null
+++ b/doc/sphinx/source/fasta.rst
@@ -0,0 +1,47 @@
+The *fasta* format
+.. _classical-fasta:
+The *fasta* format is certainly the most widely used sequence file format. 
+This is certainly due to its great simplicity. It was originally created 
+for the Lipman and Pearson `FASTA program`_. OBITools use in more
+of the classical :ref:`fasta <classical-fasta>` format an
+:ref:`extended version <obitools-fasta>` of this format where structured 
+data are included in the title line.
+In *fasta* format a sequence is represented by a title line beginning with a **>** character and
+the sequences by itself following the :doc:`iupac <iupac>` code. The sequence is usually split other 
+severals lines of the same length (expect for the last one) ::
+    >my_sequence this is my pretty sequence
+This is no special format for the title line excepting that this line should be unique.
+Usually the first word following the **>** character is considered as the sequence identifier.
+The end of the title line corresponding to a description of the sequence.
+Several sequences can be concatenated in a same file. The description of the next sequence
+is just pasted at the end of the record of the previous one ::
+    >sequence_A this is my first pretty sequence
+    >sequence_B this is my second pretty sequence
+    >sequence_C this is my third pretty sequence
+.. _`FASTA program`: http://www.ncbi.nlm.nih.gov/pubmed/3162770?dopt=Citation
\ No newline at end of file
diff --git a/doc/sphinx/source/fastq.rst b/doc/sphinx/source/fastq.rst
new file mode 100644
index 0000000..a4f974a
--- /dev/null
+++ b/doc/sphinx/source/fastq.rst
@@ -0,0 +1,163 @@
+The *fastq* sequence format
+.. _classical-fastq:
+.. note::
+    This article uses material from the Wikipedia article 
+    `FASTQ format <http://en.wikipedia.org/wiki/FASTQ_format>`
+    which is released under the 
+    `Creative Commons Attribution-Share-Alike License 3.0 <http://creativecommons.org/licenses/by-sa/3.0/>`
+**fastq format** is a text-based format  for storing both a biological sequence 
+(usually nucleotide sequence) and its corresponding quality scores.
+Both the sequence letter and quality score are encoded with a single
+ASCII character for brevity. It was originally developed at the `Wellcome Trust Sanger
+Institute` to bundle a
+:ref:`fasta <classical-fasta>` sequence and its quality data, but has recently
+become the *de facto* standard for storing the output of high throughput
+sequencing instruments such as the Illumina Genome
+Analyzer Illumina. [1]_
+A fastq file normally uses four lines per sequence.
+-  Line 1 begins with a '@' character and is followed by a sequence
+   identifier and an *optional* description (like a
+   :ref:`fasta <classical-fasta>` title line).
+-  Line 2 is the raw sequence letters.
+-  Line 3 begins with a '+' character and is *optionally* followed by
+   the same sequence identifier (and any description) again.
+-  Line 4 encodes the quality values for the sequence in Line 2, and
+   must contain the same number of symbols as letters in the sequence.
+A fastq file containing a single sequence might look like this:
+    @SEQ_ID
+    +
+    !''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65
+The character '!' represents the lowest quality while '~' is the
+highest. Here are the quality value characters in left-to-right
+increasing order of quality (`ASCII`):
+     !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
+The original Sanger FASTQ files also allowed the sequence and quality
+strings to be wrapped (split over multiple lines), but this is generally
+discouraged as it can make parsing complicated due to the unfortunate
+choice of "@" and "+" as markers (these characters can also occur in the
+quality string).
+A quality value *Q* is an integer mapping of *p* (i.e., the probability
+that the corresponding base call is incorrect). Two different equations
+have been in use. The first is the standard Sanger variant to assess
+reliability of a base call, otherwise known as Phred quality
+:math:`Q_\text{sanger} = -10 \, \log_{10} p`
+The Solexa pipeline (i.e., the software delivered with the Illumina
+Genome Analyzer) earlier used a different mapping, encoding the
+odds *p*/(1-*p*) instead of the probability *p*:
+:math:`Q_\text{solexa-prior to v.1.3} = -10 \, \log_{10} \frac{p}{1-p}`
+Although both mappings are asymptotically identical at higher quality
+values, they differ at lower quality levels (i.e., approximately *p* >
+0.05, or equivalently, *Q* < 13).
+|Relationship between *Q* and *p* using the Sanger (red) and Solexa
+(black) equations (described above). The vertical dotted line indicates
+*p* = 0.05, or equivalently, *Q* � 13.|
+-  Sanger format can encode a Phred quality
+   score from 0 to 93 using ASCII 33 to 126
+   (although in raw read data the Phred quality score rarely exceeds 60,
+   higher scores are possible in assemblies or read maps). 
+-  Solexa/Illumina 1.0 format can encode a Solexa/Illumina quality score
+   from -5 to 62 using ASCII 59 to 126 (although in raw read
+   data Solexa scores from -5 to 40 only are expected)
+-  Starting with Illumina 1.3 and before Illumina 1.8, the format
+   encoded a Phred quality score from 0 to 62
+   using ASCII 64 to 126 (although in raw read data Phred
+   scores from 0 to 40 only are expected).
+-  Starting in Illumina 1.5 and before Illumina 1.8, the Phred scores 0
+   to 2 have a slightly different meaning. The values 0 and 1 are no
+   longer used and the value 2, encoded by ASCII 66 "B".
+Sequencing Control Software, Version 2.6, Catalog # SY-960-2601, Part #
+15009921 Rev. A, November
+2009]\ http://watson.nci.nih.gov/solexa/Using_SCSv2.6_15009921_A.pdf\ 
+(page 30) states the following: *If a read ends with a segment of mostly
+low quality (Q15 or below), then all of the quality values in the
+segment are replaced with a value of 2 (encoded as the letter B in
+Illumina's text-based encoding of quality scores)... This Q2 indicator
+does not predict a specific error rate, but rather indicates that a
+specific final portion of the read should not be used in further
+analyses.* Also, the quality score encoded as "B" letter may occur
+internally within reads at least as late as pipeline version 1.6, as
+shown in the following example:
+    @HWI-EAS209_0006_FC706VJ:5:58:5894:21141#ATCACG/1
+    +HWI-EAS209_0006_FC706VJ:5:58:5894:21141#ATCACG/1
+    efcfffffcfeefffcffffffddf`feed]`]_Ba_^__[YBBBBBBBBBBRTT\]][]dddd`ddd^dddadd^BBBBBBBBBBBBBBBBBBBBBBBB
+An alternative interpretation of this ASCII encoding has been
+proposed.  Also, in Illumina runs using PhiX controls, the character
+'B' was observed to represent an "unknown quality score". The error rate
+of 'B' reads was roughly 3 phred scores lower the mean observed score of
+a given run.
+-  Starting in Illumina 1.8, the quality scores have basically returned
+   to the use of the Sanger format (Phred+33).
+File extension
+There is no standard file extension for a FASTQ
+file, but .fq and .fastq, are commonly used.
+See also
+-  :ref:`fasta <classical-fasta>`
+.. [1]
+   Cock et al (2009) The Sanger FASTQ file format for sequences with
+   quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids
+   Research, 
+.. [2]
+   Illumina Quality Scores, Tobias Mann, Bioinformatics, San Diego,
+   Illumina `1 <http://seqanswers.com/forums/showthread.php?t=4721>`__
+.. |Relationship between *Q* and *p* using the Sanger (red) and Solexa (black) equations (described above). The vertical dotted line indicates *p* = 0.05, or equivalently, *Q* � 13.| image:: Probability metrics.png
+See http://en.wikipedia.org/wiki/FASTQ_format
diff --git a/doc/sphinx/source/filtering.rst b/doc/sphinx/source/filtering.rst
new file mode 100644
index 0000000..6d9e0a4
--- /dev/null
+++ b/doc/sphinx/source/filtering.rst
@@ -0,0 +1,14 @@
+Sequence sampling and filtering
+.. toctree::
+   :maxdepth: 2
+   scripts/obiextract
+   scripts/obigrep
+   scripts/obihead
+   scripts/obisample
+   scripts/obiselect
+   scripts/obisplit
+   scripts/obisubset
+   scripts/obitail
diff --git a/doc/sphinx/source/formats.rst b/doc/sphinx/source/formats.rst
new file mode 100644
index 0000000..f734d1c
--- /dev/null
+++ b/doc/sphinx/source/formats.rst
@@ -0,0 +1,59 @@
+File formats usable with OBITools
+.. _the-sequence-files:
+The sequence files
+Sequences can be stored following various format. OBITools knows
+some of them. The central format for sequence files manipulated by OBITools scripts 
+is the :doc:`fasta format <fasta>`. OBITools extends the fasta format by specifying 
+a syntax to include in the definition line data qualifying the sequence.
+All file formats use the :doc:`IUPAC <iupac>` code for encoding nucleotides and 
+.. toctree::
+   :maxdepth: 2
+   iupac
+   fasta
+   fastq
+   attributes
+   genbank
+   embl
+The taxonomy files
+Many OBITools are able to take into account taxonomic data. This is done in general by specifying 
+either a directory containing all :doc:`NCBI taxonomy dump files <./taxdump>` or an 
+:doc:`obitaxonomy <./obitaxonomy>` formatted database.
+.. toctree::
+   :maxdepth: 2
+   taxdump
+   obitaxonomy 
+	The ecoPCR files
+	----------------
+	ecoPCR_ simulates a PCR experiment by selecting in a sequence database, sequences matching 
+	simultaneously two primers sequences in a way allowing a PCR amplification of a DNA region.
+	The ecoPrimers files
+	--------------------
+	The OBITools files
+	------------------
+.. _ecoPCR: http://www.grenoble.prabi.fr/trac/ecoPCR
+.. _LECA: http://www-leca.ujf-grenoble.fr
+.. _`NCBI taxonomy`: http://www.ncbi.nlm.nih.gov/taxonomy
\ No newline at end of file
diff --git a/doc/sphinx/source/genbank.rst b/doc/sphinx/source/genbank.rst
new file mode 100644
index 0000000..fe64560
--- /dev/null
+++ b/doc/sphinx/source/genbank.rst
@@ -0,0 +1,2 @@
+The genbank sequence format
\ No newline at end of file
diff --git a/doc/sphinx/source/index.rst b/doc/sphinx/source/index.rst
new file mode 100644
index 0000000..c8c2ded
--- /dev/null
+++ b/doc/sphinx/source/index.rst
@@ -0,0 +1,23 @@
+.. OBITools documentation master file, created by
+   sphinx-quickstart on Tue Dec  8 21:30:02 2009.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+.. role:: latex(raw)
+   :format: latex
+.. toctree::
+   :maxdepth: 2
+   Welcome to OBITools's <introduction>
+   The OBITools scripts <scripts>
+   Sample tutorials <tutorials>
+Indices and tables
+* :ref:`genindex`
+* :ref:`search`
diff --git a/doc/sphinx/source/introduction.rst b/doc/sphinx/source/introduction.rst
new file mode 100644
index 0000000..648d615
--- /dev/null
+++ b/doc/sphinx/source/introduction.rst
@@ -0,0 +1,8 @@
+.. toctree::
+   :maxdepth: 2
+   Welcome to OBITools's <welcome>
+   The file formats <formats>
diff --git a/doc/sphinx/source/iupac.rst b/doc/sphinx/source/iupac.rst
new file mode 100644
index 0000000..c3e00a0
--- /dev/null
+++ b/doc/sphinx/source/iupac.rst
@@ -0,0 +1,63 @@
+The IUPAC code
+The International Union of Pure and Applied Chemistry (IUPAC_) defined
+the standard code for representing protein or DNA sequences.
+Nucleic IUPAC Code
+======== =================================
+**Code**     **Nucleotide**
+======== =================================
+   A       Adenine
+   C       Cytosine
+   G       Guanine
+   T       Thymine
+   U       Uracil
+   R       Purine (A or G)
+   Y       Pyrimidine (C, T, or U)
+   M       C or A
+   K       T, U, or G
+   W       T, U, or A
+   S       C or G
+   B       C, T, U, or G (not A)
+   D       A, T, U, or G (not C)
+   H       A, T, U, or C (not G)
+   V       A, C, or G (not T, not U)
+   N       Any base (A, C, G, T, or U)
+======== =================================
+Peptidic one and three letters IUPAC code
+============  =============  =======================================
+**1-letter**  **3-letters**          **Amino acid**    
+============  =============  =======================================
+   A             Ala                   Alanine
+   R             Arg                   Arginine
+   N             Asn                   Asparagine
+   D             Asp                   Aspartic acid
+   C             Cys                   Cysteine
+   Q             Gln                   Glutamine
+   E             Glu                   Glutamic acid
+   G             Gly                   Glycine
+   H             His                   Histidine
+   I             Ile                   Isoleucine
+   L             Leu                   Leucine
+   K             Lys                   Lysine
+   M             Met                   Methionine
+   F             Phe                   Phenylalanine
+   P             Pro                   Proline
+   S             Ser                   Serine
+   T             Thr                   Threonine
+   W             Trp                   Tryptophan
+   Y             Tyr                   Tyrosine
+   V             Val                   Valine
+   B             Asx                   Aspartic acid or Asparagine
+   Z             Glx                   Glutamine or Glutamic acid
+   X             Xaa                   Any amino acid
+============  =============  =======================================
+.. _IUPAC: http://www.iupac.org/
\ No newline at end of file
diff --git a/doc/sphinx/source/manipulations.rst b/doc/sphinx/source/manipulations.rst
new file mode 100644
index 0000000..04cc398
--- /dev/null
+++ b/doc/sphinx/source/manipulations.rst
@@ -0,0 +1,15 @@
+Computations on sequences
+.. toctree::
+   :maxdepth: 2
+   scripts/illuminapairedend
+   scripts/ngsfilter
+   scripts/obicomplement
+   scripts/obiclean
+   scripts/obicut
+   scripts/obijoinpairedend
+   scripts/obiuniq
\ No newline at end of file
diff --git a/doc/sphinx/source/obitaxonomy.rst b/doc/sphinx/source/obitaxonomy.rst
new file mode 100644
index 0000000..7dee5a0
--- /dev/null
+++ b/doc/sphinx/source/obitaxonomy.rst
@@ -0,0 +1,26 @@
+The OBITools formatted taxonomy
+Management of the taxonomy
+Filtering and annotation steps in the processing of DNA metabarcoding sequence data are greatly 
+eased by the explicit association of taxonomic information to sequences together with an easy 
+access to the taxonomy. Taxonomic information, including a taxonomic identifier, can thus be 
+stored in the set of attributes of each sequence record. Specifically, the `taxid` attribute 
+is used by the OBITools when querying taxonomic information of a sequence record, nevertheless 
+several OBITools commands can annotate sequence records with taxonomy-related attributes for 
+the user's convenience. The value of the `taxid` attribute must be a unique integer referring 
+unambiguously to one taxon in the taxonomic associated database (note that a taxon can be any node 
+in the taxonomic tree). Although this is not mandatory, the NCBI taxonomy is a preferred source of 
+taxonomic information as the OBITools provide commands to easily extract the full taxonomic 
+information from it. The command `obitaxonomy` is useful to build a taxonomic database in the 
+OBITools format from a dump of the NCBI taxonomic database (downloadable at the following 
+URL: ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz). Moreover, the `obitaxonomy` command can 
+enrich an existing taxonomy with private taxa, therefore enabling to associate sequence records to 
+taxa not initially present in the reference taxonomic database. As the OBITools have access to the 
+full taxonomic tree topology, they are able to inform higher taxonomic levels from a taxon identifier 
+(e.g. the family, order, class, phylum, etc. corresponding to a genus) leading to efficient and 
+simple annotation and querying of taxonomic information. 
diff --git a/doc/sphinx/source/optionsSet/defaultoptions.txt b/doc/sphinx/source/optionsSet/defaultoptions.txt
new file mode 100644
index 0000000..20b6f3a
--- /dev/null
+++ b/doc/sphinx/source/optionsSet/defaultoptions.txt
@@ -0,0 +1,13 @@
+Common options
+.. program:: obitools
+.. cmdoption::    -h, --help  
+      Shows this help message and exits.
+.. cmdoption::    --DEBUG     
+      Sets logging in debug mode.
diff --git a/doc/sphinx/source/optionsSet/inputformat.txt b/doc/sphinx/source/optionsSet/inputformat.txt
new file mode 100644
index 0000000..8a8d181
--- /dev/null
+++ b/doc/sphinx/source/optionsSet/inputformat.txt
@@ -0,0 +1,79 @@
+Options to specify input format
+.. program:: obitools
+Restrict the analysis to a sub-part of the input file
+.. cmdoption::  --skip <N>
+      The N first sequence records of the file are discarded from the analysis and 
+      not reported to the output file
+.. cmdoption::  --only <N>
+      Only the N next sequence records of the file are analyzed. The following sequences
+      in the file are neither analyzed, neither reported to the output file.
+      This option can be used conjointly with the `--skip` option.
+Sequence annotated format
+.. cmdoption::  --genbank 
+      Input file is in :doc:`genbank <../genbank>` format.
+.. cmdoption::    --embl     
+      Input file is in :doc:`embl <../embl>` format.
+:doc:`fasta <../fasta>` related format
+.. cmdoption::    --fasta               
+      Input file is in :doc:`fasta  <../fasta>` format (including
+      OBITools :doc:`fasta  <../fasta>` extensions).
+:doc:`fastq <../fastq>` related format
+.. cmdoption::      --sanger              
+       Input file is in Sanger :doc:`fastq <../fastq>` format (standard
+       :doc:`fastq <../fastq>` used by HiSeq/MiSeq sequencers).
+.. cmdoption::      --solexa              
+       Input file is in :doc:`fastq <../fastq>` format produced by
+       Solexa (Ga IIx) sequencers.
+ecoPCR related format
+.. cmdoption::      --ecopcr              
+       Input file is in :doc:`ecoPCR <../formats>` format.
+.. cmdoption::      --ecopcrdb              
+       Input is an :doc:`ecoPCR <../formats>` database.
+Specifying the sequence type
+.. cmdoption::    --nuc       
+      Input file contains nucleic sequences.
+.. cmdoption::    --prot      
+      Input file contains protein sequences.
\ No newline at end of file
diff --git a/doc/sphinx/source/optionsSet/outputformat.txt b/doc/sphinx/source/optionsSet/outputformat.txt
new file mode 100644
index 0000000..4e58cc8
--- /dev/null
+++ b/doc/sphinx/source/optionsSet/outputformat.txt
@@ -0,0 +1,30 @@
+Options to specify output format
+.. program:: obitools
+Standard output format
+.. cmdoption::  --fasta-output 
+      Output sequences in ``OBITools`` :doc:`fasta <../fasta>` format
+.. cmdoption::  --fastq-output 
+      Output sequences in Sanger :doc:`fastq <../fastq>` format
+Generating an ecoPCR database
+.. cmdoption::  --ecopcrdb-output=<PREFIX_FILENAME> 
+      Creates an ecoPCR database from sequence records results
+Miscellaneous option
+.. cmdoption::  --uppercase 
+      Print sequences in upper case (default is lower case)
diff --git a/doc/sphinx/source/optionsSet/sequenceEdit.txt b/doc/sphinx/source/optionsSet/sequenceEdit.txt
new file mode 100644
index 0000000..1952f6d
--- /dev/null
+++ b/doc/sphinx/source/optionsSet/sequenceEdit.txt
@@ -0,0 +1,83 @@
+Sequence record editing options
+.. cmdoption::  --seq-rank
+                        Adds a new attribute named ``seq_rank`` to the sequence record indicating 
+                        its entry number in the sequence file.
+.. cmdoption::  -R <OLD_NAME>:<NEW_NAME>, --rename-tag=<OLD_NAME>:<NEW_NAME>
+                        Changes attribute name <OLD_NAME> to <NEW_NAME>. When attribute 
+                        named <OLD_NAME> is missing, the sequence record is 
+                        skipped and the next one is examined.
+.. cmdoption::  --delete-tag=<KEY>
+                        Deletes attribute named <ATTRIBUTE_NAME>.When this attribute
+                        is missing, the sequence record is skipped and the 
+                        next one is examined.
+.. cmdoption::  -S <KEY>:<PYTHON_EXPRESSION>, --set-tag=<KEY>:<PYTHON_EXPRESSION>
+                        Creates a new attribute named with a key <KEY> and a 
+                        value computed from <PYTHON_EXPRESSION>. 
+.. cmdoption::  --tag-list=<FILENAME>
+                        <FILENAME> points to a file containing attribute 
+                        names and values to modify for specified sequence records.
+.. cmdoption::  --set-identifier=<PYTHON_EXPRESSION>
+                        Sets sequence record identifier with a value computed 
+                        from <PYTHON_EXPRESSION>. 
+.. cmdoption::  --run=<PYTHON_EXPRESSION>
+                        Runs a python expression on each selected sequence.
+.. cmdoption::  --set-sequence=<PYTHON_EXPRESSION>
+                        Changes the sequence itself with a value computed from 
+                        <PYTHON_EXPRESSION>.
+.. cmdoption::  -T, --set-definition=<PYTHON_EXPRESSION>
+                        Sets sequence definition with a value computed from 
+                        <PYTHON_EXPRESSION>.
+.. cmdoption::  -O, --only-valid-python
+                        Allows only valid python expressions.
+.. cmdoption::  -C, --clear
+                        Clears all attributes associated to the sequence records.
+.. cmdoption::  -k <KEY>, --keep=<KEY>
+                        Keeps only attribute with key <KEY>. Several ``-k`` 
+                        options can be combined.
+.. cmdoption::  --length
+                        Adds attribute with ``seq_length`` as a key and sequence length as a value.
+.. cmdoption::  --with-taxon-at-rank=<RANK_NAME>
+                        Adds taxonomic annotation at taxonomic rank 
+                        <RANK_NAME>.
+.. cmdoption::  -m <MCLFILE>, --mcl=<MCLFILE>
+                        Creates a new attribute containing the number of the 
+                        cluster the sequence record was assigned to, as 
+                        indicated in file <MCLFILE>.
+.. cmdoption::  --uniq-id
+                        Forces sequence record ids to be unique.
diff --git a/doc/sphinx/source/optionsSet/sequenceFilter.txt b/doc/sphinx/source/optionsSet/sequenceFilter.txt
new file mode 100644
index 0000000..c56e29d
--- /dev/null
+++ b/doc/sphinx/source/optionsSet/sequenceFilter.txt
@@ -0,0 +1,174 @@
+Sequence record selection options
+.. cmdoption::  -s <REGULAR_PATTERN>, --sequence=<REGULAR_PATTERN>
+                        Regular expression pattern to be tested against the
+                        sequence itself. The pattern is case insensitive.
+    *Examples:*
+        .. code-block:: bash
+                > obigrep -s 'GAATTC' seq1.fasta > seq2.fasta
+        Selects only the sequence records that contain an *EcoRI* restriction site.
+        .. code-block:: bash
+                > obigrep -s 'A{10,}' seq1.fasta > seq2.fasta
+        Selects only the sequence records that contain a stretch of at least 10 ``A``.
+        .. code-block:: bash
+                > obigrep -s '^[ACGT]+$' seq1.fasta > seq2.fasta
+        Selects only the sequence records that do not contain ambiguous nucleotides.
+.. cmdoption::  -D <REGULAR_PATTERN>, --definition=<REGULAR_PATTERN>
+                        Regular expression pattern to be tested against the
+                        definition of the sequence record. The pattern is case
+                        sensitive.
+    *Example:*
+        .. code-block:: bash
+                > obigrep -D '[Cc]hloroplast' seq1.fasta > seq2.fasta
+        Selects only the sequence records whose definition contains ``chloroplast`` or
+        ``Chloroplast``.
+.. cmdoption::  -I <REGULAR_PATTERN>, --identifier=<REGULAR_PATTERN>
+                        Regular expression pattern to be tested against the
+                        identifier of the sequence record. The pattern is case
+                        sensitive.
+    *Example:*
+        .. code-block:: bash
+                > obigrep -I '^GH' seq1.fasta > seq2.fasta
+        Selects only the sequence records whose identifier begins with ``GH``.
+.. cmdoption::  --id-list=<FILENAME>
+                        ``<FILENAME>`` points to a text file containing the list of sequence 
+                        record identifiers to be selected. 
+                        The file format consists in a single identifier per line.
+    *Example:*
+        .. code-block:: bash
+               > obigrep --id-list=my_id_list.txt seq1.fasta > seq2.fasta
+        Selects only the sequence records whose identifier is present in the
+        ``my_id_list.txt`` file.
+.. cmdoption::  -a <KEY>:<REGULAR_PATTERN>, 
+.. cmdoption::  --attribute=<KEY>:<REGULAR_PATTERN>
+                        Regular expression pattern matched against the
+                        :doc:`attributes of the sequence record <../fasta>`. the value of this attribute
+                        is of the form : key:regular_pattern. The
+                        pattern is case sensitive. Several ``-a`` options can be
+                        used on the same command line and in this last case, 
+                        the selected sequence records will match all constraints.
+    *Example:*
+        .. code-block:: bash
+                > obigrep -a 'family_name:Asteraceae' seq1.fasta > seq2.fasta
+        Selects the sequence records containing an attribute whose key is ``family_name`` and value
+        is ``Asteraceae``.
+.. cmdoption::  -A <ATTRIBUTE_NAME>, --has-attribute=<KEY>
+                        Selects sequence records having an attribute whose key = <KEY>.
+    *Example:*
+        .. code-block:: bash
+                > obigrep -A taxid seq1.fasta > seq2.fasta
+        Selects only the sequence records having a *taxid* attribute defined.
+.. cmdoption::  -p <PYTHON_EXPRESSION>, --predicat=<PYTHON_EXPRESSION>
+                        Python boolean expression to be evaluated for each
+                        sequence record. The attribute keys defined for each sequence record
+                        can be used in the expression as variable names. 
+                        An extra variable named 'sequence' refers to the 
+                        sequence record itself.
+                        Several -p options can be used on the same command
+                        line and in this last case, 
+                        the selected sequence records will match all constraints.
+    *Example:*
+        .. code-block:: bash
+                >  obigrep -p '(forward_error<2) and (reverse_error<2)' \
+                   seq1.fasta > seq2.fasta
+        Selects only the sequence records whose ``forward_error`` and ``reverse_error``
+        attributes have a value smaller than two.
+.. cmdoption::  -L <##>, --lmax=<##>  
+                        Keeps sequence records whose sequence length is 
+                        equal or shorter than ``lmax``.
+    *Example:*
+        .. code-block:: bash
+                > obigrep -L 100 seq1.fasta > seq2.fasta
+        Selects only the sequence records that have a sequence 
+        length equal or shorter than 100bp.
+.. cmdoption::  -l <##>, --lmin=<##>  
+                        Selects sequence records whose sequence length is 
+                        equal or longer than ``lmin``.
+    *Examples:*
+        .. code-block:: bash
+                > obigrep -l 100 seq1.fasta > seq2.fasta
+        Selects only the sequence records that have a sequence length 
+        equal or longer than 100bp.
+.. cmdoption::  -v, --inverse-match   
+                        Inverts the sequence record selection.
+    *Examples:*
+        .. code-block:: bash
+                > obigrep -v -l 100 seq1.fasta > seq2.fasta
+        Selects only the sequence records that have a sequence length shorter than 100bp.
\ No newline at end of file
diff --git a/doc/sphinx/source/optionsSet/taxonomyDB.txt b/doc/sphinx/source/optionsSet/taxonomyDB.txt
new file mode 100644
index 0000000..5e7a417
--- /dev/null
+++ b/doc/sphinx/source/optionsSet/taxonomyDB.txt
@@ -0,0 +1,13 @@
+Taxonomy related options
+.. program:: taxonomy
+.. cmdoption::      -d <FILENAME>, --database=<FILENAME>
+      ecoPCR taxonomy Database name
+.. cmdoption::      -t <FILENAME>, --taxonomy-dump=<FILENAME>
+      NCBI Taxonomy dump repository name
diff --git a/doc/sphinx/source/optionsSet/taxonomyFilter.txt b/doc/sphinx/source/optionsSet/taxonomyFilter.txt
new file mode 100644
index 0000000..568be9b
--- /dev/null
+++ b/doc/sphinx/source/optionsSet/taxonomyFilter.txt
@@ -0,0 +1,14 @@
+.. include:: ../optionsSet/taxonomyDB.txt
+.. cmdoption::  --require-rank=<RANK_NAME>
+                        select sequence with taxid tag containing a parent of
+                        rank <RANK_NAME>
+.. cmdoption::  -r <TAXID>, --required=<TAXID>
+                        required taxid
+.. cmdoption::  -i <TAXID>, --ignore=<TAXID>
+                        ignored taxid
\ No newline at end of file
diff --git a/doc/sphinx/source/scripts.rst b/doc/sphinx/source/scripts.rst
new file mode 100644
index 0000000..3e85117
--- /dev/null
+++ b/doc/sphinx/source/scripts.rst
@@ -0,0 +1,13 @@
+OBITools scripts
+.. toctree::
+   :maxdepth: 2
+   barcodes
+   conversions
+   annotations
+   manipulations
+   filtering
+   statistics
+   utilities
diff --git a/doc/sphinx/source/scripts/ecoPCR.rst b/doc/sphinx/source/scripts/ecoPCR.rst
new file mode 100644
index 0000000..720cf6d
--- /dev/null
+++ b/doc/sphinx/source/scripts/ecoPCR.rst
@@ -0,0 +1,193 @@
+:py:mod:`ecoPCR`: *in silico* PCR
+:py:mod:`ecoPCR` *in silico* PCR preserves the taxonomic information 
+of the selected sequences, and allows various specified conditions for the
+*in silico* amplification.
+Additionally to the different options, the command requires two arguments corresponding 
+to the two primers.
+    Bellemain E, Carlsen T, Brochmann C, Coissac E, Taberlet P, Kauserud H (2010) ITS as an environmental DNA barcode for fungi: an *in silico* approach reveals potential PCR biases BMC Microbiology, 10, 189.
+    Ficetola GF, Coissac E, Zundel S, Riaz T, Shehzad W, Bessiere J, Taberlet P, Pompanon F (2010) An *in silico* approach for the evaluation of DNA barcodes. BMC Genomics, 11, 434.
+:py:mod:`ecoPCR` specific options
+   .. cmdoption::   -d <filename>   
+        Filename containing the database used for the *in silico* PCR. The database
+        must be in the ``ecoPCR format`` (see :doc:`obiconvert <./obiconvert>`). 
+        .. WARNING:: This option is compulsory.
+   .. cmdoption::  -e <INTEGER>  
+         Maximum number of errors (mismatches) allowed per primer (default: 0).
+         See example 2 for avoiding errors on the 3' end of the primers.
+   .. cmdoption::  -l <INTEGER>   
+			Minimum length of the *in silico* amplified DNA fragment, excluding primers.
+   .. cmdoption::  -L <INTEGER>   
+			Maximum length of the *in silico* amplified DNA fragment, excluding primers.
+   .. cmdoption::  -r <TAXID>   
+            Only the sequence records corresponding to the taxonomic group identified by its 
+            ``TAXID`` are considered for the *in silico* PCR. The ``TAXID`` is an integer that 
+            can be found either in the NCBI taxonomic database, or using the :doc:`ecofind <./ecofind>` program. 
+   .. cmdoption::  -i <TAXID>   
+			The sequences of the taxonomic group identified by its ``TAXID`` are not considered for 
+			the *in silico* PCR.
+   .. cmdoption::  -c   
+			Considers that the sequences of the database are circular (e.g. mitochondrial
+			or chloroplast DNA).
+   .. cmdoption::  -D <INTEGER>   
+			Keeps the specified number of nucleotides on each side of the *in silico* 
+			amplified sequences, (including the amplified DNA fragment plus the two target 
+			sequences of the primers).
+   .. cmdoption::  -k   
+			Print in the programme output the kingdom of the *in silico* amplified 
+			sequences (default: print the superkingdom).
+   .. cmdoption::  -m <1|2>  
+			Defines the method used for estimating the Tm (melting temperature) between 
+			the primers and their corresponding target sequences (default: 1).
+				1 SantaLucia method (SantaLucia J (1998) A unified view of polymer, dumbbell, and oligonucleotide DNA nearest-neighbor thermodynamics. PNAS, 95, 1460-1465).
+				2 Owczarzy method (Owczarzy R, Vallone PM, Gallo FJ *et al.* (1997) Predicting sequence-dependent melting stability of short duplex DNA oligomers. Biopolymers, 44, 217-239).
+   .. cmdoption::  -a <FLOAT>
+			Salt concentration used for estimating the *Tm* (default: 0.05).
+   .. cmdoption::  -h   
+            Print help.
+Output file
+		The output file contains several columns, with '|' as separator, and describes 
+		the properties of the *in silico* amplified sequences.
+		column 1: sequence identification in the reference database (= accession number when using EMBL or GenBank for building the reference database)
+		column 2: length of the original sequence
+		column 3: scientific name as indicated in the reference database
+		column 4: taxonomic rank as indicated in the reference database
+		column 5: *taxid* of the species
+		column 6: scientific name of the species
+		column 7: *taxid* of the genus
+		column 8: genus name
+		column 9: *taxid* of the family
+		column 10: family name
+		column 11: *taxid* of the super kingdom (or of the kingdom if the ``-k`` option is set)
+		column 12: super kingdom name (or kingdom name if the ``-k`` option is set)
+		column 13: strand (D or R, corresponding to direct or reverse, respectively)
+		column 14: target sequence of the first primer
+		column 15: number of mismatches for the first primer
+		column 16: target sequence of the second primer
+		column 17: number of mismatches for the second primer
+		column 18: length of the amplified fragment (excluding primers)
+		column 19: sequence
+		column 20: definition  
+	*Example 1:*
+	    		.. code-block:: bash
+	       			>  ecoPCR -d mydatabase -e 3 -l 50 -L 500 \
+	       			   TCACAGACCTGTTATTGC TYTGTCTGSTTRATTSCG > mysequences.ecopcr 
+			Launches an *in silico* PCR on mydatabase (see :doc:`obiconvert <./obiconvert>` for a description
+			of the database format), with a maximum of three mismatches for each primer. The minimum and 
+			maximum amplified sequence lengths (excluding primers) are 50 bp and 500 bp, respectively. The 
+			primers used are TCACAGACCTGTTATTGC and TYTGTCTGSTTRATTSCG (possibility to use 
+			:doc:`IUPAC codes <../iupac>`). They amplify a short portion of the nuclear 18S gene. The 
+			results are saved in the *mysequence.ecopcr* file.
+	*Example 2:*
+	    		.. code-block:: bash
+	       			> ecoPCR -d mydatabase -e 2  -l 80 -L 120 -D 50 -r 7742 \
+	       			  TTAGATACCCCACTATG#C# TAGAACAGGCTCCTCTA#G# > mysequences.ecopcr
+            Launches an *in silico* PCR on mydatabase (see :doc:`obiconvert <./obiconvert>` for a description
+            of the database format), with a maximum of two mismatches for each primer, but with a perfect match 
+            on the last two nucleotides	of the 3' end of each primer (a perfect match can be enforced by adding 
+            a '#' after the considered nucleotide). The minimum and maximum amplified sequence lengths (excluding 
+            primers) are 80 bp and 120 bp, respectively. The ``-D`` option keeps 50 nucleotides on each side of 
+            the *in silico* amplified sequences, (including the amplified DNA fragment plus the two target 
+            sequences of the primers). The primers used	are TTAGATACCCCACTATGC and TAGAACAGGCTCCTCTAG. They 
+            amplify a short portion of the mitochondrial 12S gene. The ``-r`` option restricts the search to 
+            vertebrates (7742 is the :doc:`taxid <../attributes/taxid>` of vertebrates). The results are saved 
+            in the ``mysequence.ecopcr`` file.
+:py:mod:`ecoPCR` used sequence attributes
+           - :doc:`taxid <../attributes/taxid>`
diff --git a/doc/sphinx/source/scripts/ecoPrimers.rst b/doc/sphinx/source/scripts/ecoPrimers.rst
new file mode 100644
index 0000000..313c084
--- /dev/null
+++ b/doc/sphinx/source/scripts/ecoPrimers.rst
@@ -0,0 +1,253 @@
+:py:mod:`ecoPrimers`: new barcode markers and primers
+Authors: 	Eric Coissac <eric.coissac at metabarcoding.org> and Tiayyba Riaz <tiayyba.riaz at metabarcoding.org>
+:py:mod:`ecoPrimers` designs the most efficient barcode markers and primers, based 
+on a set of reference sequence records, and according to specified parameters.
+    Riaz T, Shehzad W, Viari A, Pompanon F, Taberlet P, Coissac E (2011) ecoPrimers: inference of new DNA 
+    barcode markers from whole genome sequence analysis. Nucleic Acids Research, 39, e145.
+:py:mod:`ecoPrimers` specific options
+   .. cmdoption:: -d <filename>   
+        Filename containing the reference sequence records used for designing the barcode 
+        markers and primers (see :doc:`obiconvert <./obiconvert>` for a description
+        of the database format).
+   .. WARNING:: This option is compulsory.
+   .. cmdoption:: -e <INTEGER>  
+         Maximum number of errors (mismatches) allowed per primer (default: 0).
+   .. cmdoption::  -l <INTEGER>   
+		 Minimum length of the barcode, excluding primers.
+   .. cmdoption::  -L <INTEGER>   
+		 Maximum length of the barcode, excluding primers.
+   .. cmdoption::  -r <TAXID>   
+         Defines the example sequence records (example dataset). Only the sequences of the corresponding 
+         taxonomic group identified by its ``TAXID`` are taken into account for designing the barcodes and 
+         the primers. The ``TAXID`` is an integer that can be found either in the NCBI taxonomic database, 
+         or using the :doc:`ecofind <ecofind>` program.
+   .. cmdoption::  -i <TAXID>   
+		 Defines the counterexample sequence records (counterexample dataset). The barcodes and primers 
+		 will be selected in order to avoid the counterexample taxonomic group identified by its ``TAXID``.
+   .. cmdoption::  -E <TAXID>   
+		 Defines an counterexample taxonomic group (identified by its ``TAXID``) within the example
+		 dataset.
+   .. cmdoption::  -c   
+			Considers that the sequences of the database are circular (e.g. mitochondrial
+			or chloroplast DNA).
+   .. cmdoption::  -3 <INTEGER>   
+			Defines the number of nucleotides on the 3' end of the primers that must have a strict match
+			with their target sequences.
+   .. cmdoption::  -q <FLOAT>   
+			Defines the strict matching quorum, i.e. the proportion of the sequence records in which a 
+			strict match between the primers and their targets occurs (default: 0.7)
+   .. cmdoption::  -s <FLOAT>  
+			Defines the sensitivity quorum, i.e. the proportion of the example sequence records that
+			must fulfill the specified parameters for designing the barcodes and the primers.
+   .. cmdoption::  -x <FLOAT>   
+			Defines the false positive quorum, i.e. the maximum proportion of the counterexample 
+			sequence records that fulfill the specified parameters for designing the barcodes and 
+			the primers.
+   .. cmdoption::  -t <TAXONOMIC_LEVEL>
+			Defines the taxonomic level that is considered for evaluating the barcodes and primers in 
+			the output of :py:mod:`ecoPrimers`. The default taxonomic level is the species level. When 
+			using a taxonomic database builts from a :doc:`NCBI taxonomy dump files <../taxdump>`, the 
+			other possible taxonomic levels are genus, family, order, class, phylum, kingdom, and 
+			superkingdom.
+   .. cmdoption::  -D   
+			Sets the double strand mode.
+   .. cmdoption::  -S   
+			Sets the single strand mode.
+   .. cmdoption::  -O <INTEGER>
+			Sets the primer length (default: 18).
+   .. cmdoption::  -m <1|2>  
+			Defines the method used for estimating the *Tm* (melting temperature) between 
+			the primers and their corresponding target sequences (default: 1).
+				1 SantaLucia method (SantaLucia J (1998) A unified view of polymer, dumbbell, and oligonucleotide DNA nearest-neighbor thermodynamics. PNAS, 95, 1460-1465).
+				2 Owczarzy method (Owczarzy R, Vallone PM, Gallo FJ *et al.* (1997) Predicting sequence-dependent melting stability of short duplex DNA oligomers. Biopolymers, 44, 217-239).
+   .. cmdoption::  -a <FLOAT>
+			Salt concentration used for estimating the *Tm* (default: 0.05).
+   .. cmdoption::  -U
+			No multi match of a primer on the same sequence record.
+   .. cmdoption::  -R <TEXT>
+			Defines the reference sequence by indicating its identifier in the database.
+   .. cmdoption::  -A
+			Prints the list of all identifiers of sequence records in the database.
+   .. cmdoption::  -f
+			Remove data mining step during strict primer identification.
+   .. cmdoption::  -v
+			Stores statistic file about memory usage during strict primer identification.
+   .. cmdoption::  -h   
+            Print help.
+Output file
+		The output file contains several columns, with '|' as separator, and describes 
+		the characteristics of each barcode and its associated primers.
+		column 1: serial number
+		column 2: sequence of primer 1
+		column 3: sequence of primer 2
+		column 4: *Tm* (melting temperature) of primer 1, without mismatch
+		column 5: lowest *Tm* of primer 1 against example sequence records
+		column 6: *Tm* of primer 2, without mismatch
+		column 7: lowest *Tm* of primer 2 against example sequence records
+		column 8: number of C or G in primer 1
+		column 9: number of C or G in primer 2
+		column 10: GG (*Good-Good*) means that both primer are specific to the example dataset,
+		           GB or BG (*Good-Bad* or *Bad-Good*) means that only one of the two primers
+		           is specific to the example dataset
+		column 11: number of sequence records of the example dataset that are properly amplified according to the specified parameters
+		column 12: proportion of sequence records of the example dataset that are properly amplified according to the specified parameters
+		column 13: yule-like output 
+		column 14: number of taxa of the example dataset that are properly amplified according to the specified parameters
+		column 15: number of taxa of the counterexample dataset that are properly amplified according to the specified parameters
+		column 16: proportion of taxa of the example dataset that are properly amplified according to the specified parameters (*Bc* index)
+		column 17: number of taxa of the example dataset that are properly identified
+		column 18: proportion of taxa of the example dataset that are properly identified (*Bs* index)
+		column 19: minimum length of the barcode in base pairs for the example sequence records (excluding primers)
+		column 20: maximum length of the barcode in base pairs for the example sequence records (excluding primers)
+		column 21: average length of the barcode in base pairs for the example sequence records(excluding primers)
+	*Example 1:*
+	    		.. code-block:: bash
+	       			>  ecoPrimers -d mydatabase -e 3 -l 50 \
+	       			   -L 800 -r 2759 -3 2 > mybarcodes.ecoprimers
+			Launches a search for barcodes and corresponding primers on mydatabase (see 
+			:doc:`obiconvert <./obiconvert>` for a description of the database format), with a maximum
+			of three mismatches for each primer. The minimum and maximum barcode lengths (excluding 
+			primers) are 50 bp and 800 bp, respectively. The search is restricted to the taxonomic 
+			group identified by its *taxid* (2759 corresponds to the Diatoma). The two last 
+			Nucleotides on the 3' end of the primers must have a perfect match with their target sequences. 
+			The results are saved in the mybarcodes.ecoprimers file.
+	*Example 2:*
+	    		.. code-block:: bash
+	       			> ecoPrimers -d mydatabase -e 2 -l 30 -L 120 \
+	       			  -r 7742 - i 2 -E 9604 -3 2 > mybarcodes.ecoprimers
+			Launches a search for barcodes and corresponding primers on mydatabase (see :doc:`obiconvert <./obiconvert>` 
+			for a description of the database format), with a maximum of two mismatches for each primer. The minimum and 
+			maximum barcode lengths (excluding primers) are 30 bp and 120 bp, respectively. The search is 
+			restricted to the Vertebrates, excluding Bacteria and Hominidae (7742, 2, and 9604 corresponds to 
+			the `TAXID` of Vertebrates, Bacteria, and Hominidae, respectively. The two last nucleotides on 
+			the 3' end of the primers must have a perfect match with their target sequences. The results 
+			are saved in the mybarcodes.ecoprimers file.
diff --git a/doc/sphinx/source/scripts/ecodbtaxstat.rst b/doc/sphinx/source/scripts/ecodbtaxstat.rst
new file mode 100644
index 0000000..4b89815
--- /dev/null
+++ b/doc/sphinx/source/scripts/ecodbtaxstat.rst
@@ -0,0 +1,49 @@
+.. automodule:: ecodbtaxstat
+   :py:mod:`ecodbtaxstat` specific option
+   --------------------------------------   
+   .. cmdoption::  --rank=<TAXONOMIC_RANK>   
+        The taxonomic rank at which frequencies have to be computed. 
+        Possible values are:
+            -  class
+            -  family 
+            -  forma 
+            -  genus 
+            -  infraclass 
+            -  infraorder 
+            -  kingdom 
+            -  order 
+            -  parvorder 
+            -  phylum 
+            -  species           (default)
+            -  species group 
+            -  species subgroup 
+            -  subclass 
+            -  subfamily 
+            -  subgenus 
+            -  subkingdom 
+            -  suborder 
+            -  subphylum 
+            -  subspecies 
+            -  subtribe 
+            -  superclass 
+            -  superfamily 
+            -  superkingdom 
+            -  superorder 
+            -  superphylum 
+            -  tribe 
+            -  varietas 
+   .. include:: ../optionsSet/taxonomyFilter.txt
+   :py:mod:`ecodbtaxstat` used sequence attributes
+   -----------------------------------------------
+           - :doc:`taxid <../attributes/taxid>`
\ No newline at end of file
diff --git a/doc/sphinx/source/scripts/ecofind.rst b/doc/sphinx/source/scripts/ecofind.rst
new file mode 100644
index 0000000..9fcc032
--- /dev/null
+++ b/doc/sphinx/source/scripts/ecofind.rst
@@ -0,0 +1,85 @@
+:py:mod:`ecofind`: querying a taxonomic database
+:py:mod:`ecofind` retrive taxonomic information from taxonomic database 
+given either a *taxid* or a regular expression patterns.
+:py:mod:`ecofind` specific options
+   .. cmdoption::   -d <filename>   
+        Filename containing the database used for the *in silico* PCR. The database
+        must be in the ``ecoPCR format`` (see :doc:`obiconvert <./obiconvert>`). 
+        .. WARNING:: This option is compulsory.
+   .. cmdoption::   -a
+        Enable the search on all alternative names and not only scientific names.
+   .. cmdoption::   -L
+        List all taxonomic rank available for -r option and exit.
+   .. cmdoption::  -r
+        Restrict to given taxonomic rank.
+   .. cmdoption::  -s
+        Displays all subtree's information for the given taxid.
+   .. cmdoption::  -p 
+        Displays all parental tree's information for the given taxid.
+   .. cmdoption::  -P
+        Display taxonomic Path as suplementary column in output
+   .. cmdoption::  -h   
+            Print help.
+Output file
+		The output file contains several columns, with '|' as separator, and describes 
+		the properties of the retrieved *taxids*.
+		column 1: the *taxid*
+		column 2: the taxonomic rank
+		column 3: the name (not only scientific)
+		column 4: class name
+		column 5: the scientific name
+		column 6 (optional): the full taxonomic path of the *taxid*
+	*Example 1:*
+	    		.. code-block:: bash
+	       			>  ecofind -d mydatabase 'homo ' > homo_.tax 
+			Retrieve all *taxids* whose 'homo ' is contained in the associated names.
+	*Example 2:*
+	    		.. code-block:: bash
+	       			> ecofind -d mydatabase  -p 9606 -P > sapiens.info.tax
+			Retrieve all parents taxa of the 9606 *taxid*. The -P option add a supplementary column
+			with the full path for each *taxid*.  
\ No newline at end of file
diff --git a/doc/sphinx/source/scripts/ecotag.rst b/doc/sphinx/source/scripts/ecotag.rst
new file mode 100644
index 0000000..88f8a86
--- /dev/null
+++ b/doc/sphinx/source/scripts/ecotag.rst
@@ -0,0 +1,91 @@
+.. automodule:: ecotag
+   :py:mod:`ecotag` specific options
+   ---------------------------------
+   .. cmdoption::  -R <FILENAME>, --ref-database=<FILENAME>
+        <FILENAME> is the fasta file containing the reference sequences
+   .. cmdoption::  -m FLOAT, --minimum-identity=FLOAT
+        When the best match with the reference database present an identity
+        level below FLOAT, the taxonomic assignment for the sequence record
+        is not computed. The sequence record is nevertheless included in the
+        output file. FLOAT is included in a [0,1] interval.
+   .. cmdoption::    --minimum-circle=FLOAT
+        minimum identity considered for the assignment circle.
+        FLOAT is included in a [0,1] interval.
+   .. cmdoption::  -x RANK, --explain=RANK
+   .. cmdoption::  -u, --uniq
+        When this option is specified, the program first dereplicates the sequence
+        records to work on unique sequences only. This option greatly improves
+        the program's speed, especially for highly redundant datasets.
+   .. cmdoption::  --sort=<KEY>
+        The output is sorted based on the values of the relevant attribute.
+   .. cmdoption::  -r, --reverse
+        The output is sorted in reverse order (should be used with the --sort option).
+        (Works even if the --sort option is not set, but could not find on what
+        the output is sorted).
+   .. cmdoption::  -E FLOAT, --errors=FLOAT
+        FLOAT is the fraction of reference sequences that will
+        be ignored when looking for the lowest common ancestor. This
+        option is useful when a non-negligible proportion of reference sequences
+        is expected to be assigned to the wrong taxon, for example because of
+        taxonomic misidentification. FLOAT is included in a [0,1] interval.
+   .. cmdoption::  -M INTEGER, --min-matches=FLOAT
+        Define the minimum congruent assignation. If this minimum is reached and
+        the -E option is activated, the lowest common ancestor algorithm tolarated
+        that some sequences do not provide the same taxonomic annotation (see the
+        -E option).
+   .. cmdoption::  --cache-size=INTEGER
+        A cache for computed similarities is maintained by `ecotag`. the default
+        size for this cache is 1,000,000 of scores. This option allows to change
+        the cache size.
+   .. include:: ../optionsSet/taxonomyDB.txt
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/outputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
+   :py:mod:`ecotag` added sequence attributes
+   ------------------------------------------
+      .. hlist::
+           :columns: 3
+           - :doc:`best_identity <../attributes/best_identity>`
+           - :doc:`best_match <../attributes/best_match>`
+           - :doc:`family <../attributes/family>`
+           - :doc:`family_name <../attributes/family_name>`
+           - :doc:`genus <../attributes/genus>`
+           - :doc:`genus_name <../attributes/genus_name>`
+           - :doc:`id_status <../attributes/id_status>`
+           - :doc:`order <../attributes/order>`
+           - :doc:`order_name <../attributes/order_name>`
+           - :doc:`rank <../attributes/rank>`
+           - :doc:`scientific_name <../attributes/scientific_name>`
+           - :doc:`species <../attributes/species>`
+           - :doc:`species_list <../attributes/species_list>`
+           - :doc:`species_name <../attributes/species_name>`
+           - :doc:`taxid <../attributes/taxid>`
diff --git a/doc/sphinx/source/scripts/ecotaxspecificity.rst b/doc/sphinx/source/scripts/ecotaxspecificity.rst
new file mode 100644
index 0000000..47a6e8d
--- /dev/null
+++ b/doc/sphinx/source/scripts/ecotaxspecificity.rst
@@ -0,0 +1,31 @@
+.. automodule:: ecotaxspecificity
+   :py:mod:`ecotaxspecificity` specific options
+   --------------------------------------------
+   .. cmdoption::  -e INT, --errors=<INT>   
+        Two sequences are considered as different if they have INT or more
+        differences (default: 1).
+    *Example:*
+            .. code-block:: bash
+                > ecotaxspecificity -d my_ecopcr_database -e 5 seq.fasta
+        This command considers that two sequences with less than 5 differences 
+        correspond to the same barcode.
+   .. include:: ../optionsSet/taxonomyDB.txt
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
+   :py:mod:`ecotaxspecificity` used sequence attribute
+   ---------------------------------------------------
+           - :doc:`taxid <../attributes/taxid>`
diff --git a/doc/sphinx/source/scripts/ecotaxstat.rst b/doc/sphinx/source/scripts/ecotaxstat.rst
new file mode 100644
index 0000000..c2c2497
--- /dev/null
+++ b/doc/sphinx/source/scripts/ecotaxstat.rst
@@ -0,0 +1,21 @@
+.. automodule:: ecotaxstat
+   :py:mod:`ecotaxstat` specific options
+   --------------------------------------------
+   .. cmdoption::  -r TAXID, --required=<TAXID>   
+        Taxids can be specified to focus the coverage on a smaller part of the taxonomy.
+    *Example:*
+            .. code-block:: bash
+                > ecotaxstat -d my_ecopcr_database seq.ecopcr
+        This command will print taxonomy coverage for the considered primer pair
+   .. include:: ../optionsSet/taxonomyDB.txt
+   .. include:: ../optionsSet/defaultoptions.txt
diff --git a/doc/sphinx/source/scripts/illuminapairedend.rst b/doc/sphinx/source/scripts/illuminapairedend.rst
new file mode 100644
index 0000000..bdf3b28
--- /dev/null
+++ b/doc/sphinx/source/scripts/illuminapairedend.rst
@@ -0,0 +1,61 @@
+.. automodule:: illuminapairedend
+    :py:mod:`illuminapairedend` specific options
+    -------------------------------------------- 
+    .. cmdoption::      -r <FILENAME>, --reverse-reads=<FILENAME>
+        Filename points to the file containing the reverse reads.
+    .. cmdoption::      --index-file=<FILENAME>
+        Filename  points to the file containing the illumina index reads
+    .. cmdoption::      --score-min=<FLOAT>    
+        minimum score for keeping alignment. If the alignment score is
+        below this threshold both the sequences are just concatenated.
+        The ``mode`` attribute is set to the value ``joined``.
+    Options to specify input format
+    -------------------------------
+    .. program:: obitools
+    Fastq related format
+    ....................
+    .. cmdoption::      --sanger              
+           Input file is in :doc:`Sanger fastq nucleic format <../fastq>`  (standard
+           fastq used by HiSeq/MiSeq sequencers).
+    .. cmdoption::      --solexa              
+           Input file is in :doc:`fastq nucleic format <../fastq>` produced by
+           Solexa (Ga IIx) sequencers.
+    .. include:: ../optionsSet/outputformat.txt
+    .. include:: ../optionsSet/defaultoptions.txt
+    :py:mod:`illuminapairedend` added sequence attributes
+    -----------------------------------------------------
+           - :doc:`ali_dir <../attributes/ali_dir>`
+           - :doc:`ali_length <../attributes/ali_length>`
+           - :doc:`score <../attributes/score>`
+           - :doc:`score_norm <../attributes/score_norm>`
+           - :doc:`mode <../attributes/mode>`
+           - :doc:`pairend_limit <../attributes/pairend_limit>`
+           - :doc:`sminL <../attributes/sminL>`
+           - :doc:`sminR <../attributes/sminR>`
+           - :doc:`seq_ab_match <../attributes/seq_ab_match>`
+           - :doc:`seq_a_single <../attributes/seq_a_single>`
+           - :doc:`seq_b_single <../attributes/seq_b_single>`
+           - :doc:`seq_a_mismatch <../attributes/seq_a_mismatch>`
+           - :doc:`seq_b_mismatch <../attributes/seq_b_mismatch>`
+           - :doc:`seq_a_deletion <../attributes/seq_a_deletion>`
+           - :doc:`seq_b_deletion <../attributes/seq_b_deletion>`
+           - :doc:`seq_b_insertion <../attributes/seq_b_insertion>`
+           - :doc:`seq_a_insertion <../attributes/seq_a_insertion>`
diff --git a/doc/sphinx/source/scripts/ngsfilter.rst b/doc/sphinx/source/scripts/ngsfilter.rst
new file mode 100644
index 0000000..74abfda
--- /dev/null
+++ b/doc/sphinx/source/scripts/ngsfilter.rst
@@ -0,0 +1,54 @@
+.. automodule:: ngsfilter
+   :py:mod:`ngsfilter` specific options
+   ------------------------------------
+   .. cmdoption::  -t, --tag-list   
+                   Used to specify the file containing the samples description (with tags, primers, sample names,...) 
+   .. cmdoption::  -u, --unidentified  
+                   Filename used to store the sequences unassigned to any sample
+   .. cmdoption::  -e, --error  
+                   Used to specify the number of errors allowed for matching primers [default = 2]
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/outputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
+   :py:mod:`ngsfilter` added sequence attributes
+   ---------------------------------------------
+      .. hlist::
+           :columns: 3
+           - :doc:`avg_quality <../attributes/avg_quality>`
+           - :doc:`complemented <../attributes/complemented>`
+           - :doc:`cut <../attributes/cut>`
+           - :doc:`direction <../attributes/direction>`
+           - :doc:`error <../attributes/error>`
+           - :doc:`experiment <../attributes/experiment>`
+           - :doc:`forward_match <../attributes/forward_match>`
+           - :doc:`forward_primer <../attributes/forward_primer>`
+           - :doc:`forward_score <../attributes/forward_score>`
+           - :doc:`forward_tag <../attributes/forward_tag>`
+           - :doc:`head_quality <../attributes/head_quality>`
+           - :doc:`mid_quality <../attributes/mid_quality>`
+           - :doc:`partial <../attributes/partial>`
+           - :doc:`reverse_match <../attributes/reverse_match>`
+           - :doc:`reverse_primer <../attributes/reverse_primer>`
+           - :doc:`reverse_score <../attributes/reverse_score>`
+           - :doc:`reverse_tag <../attributes/reverse_tag>`
+           - :doc:`sample <../attributes/sample>`
+           - :doc:`seq_length <../attributes/seq_length>`
+           - :doc:`seq_length_ori <../attributes/seq_length_ori>`
+           - :doc:`status <../attributes/status>`
+           - :doc:`tail_quality <../attributes/tail_quality>`
diff --git a/doc/sphinx/source/scripts/obiaddtaxids.rst b/doc/sphinx/source/scripts/obiaddtaxids.rst
new file mode 100644
index 0000000..bce8402
--- /dev/null
+++ b/doc/sphinx/source/scripts/obiaddtaxids.rst
@@ -0,0 +1,57 @@
+.. automodule:: obiaddtaxids 
+    :py:mod:`obiaddtaxids` specific options
+    --------------------------------------- 
+    .. cmdoption::  -f <FORMAT>, --format=<FORMAT>
+                        Format of the sequence file. Possible formats are: 
+                            - ``raw``: for regular ``OBITools`` extended :doc:`fasta <../fasta>` files (default value).
+                            - ``UNITE``: for :doc:`fasta <../fasta>` files downloaded from the `UNITE web site <http://unite.ut.ee/>`_.
+                            - ``SILVA``: for :doc:`fasta <../fasta>` files downloaded from the `SILVA web site <http://www.arb-silva.de/>`_.
+    .. cmdoption::  -k <KEY>, --key-name=<KEY>
+                        Key of the attribute containing the taxon name in sequence files in the ``OBITools`` extended
+                        :doc:`fasta <../fasta>` format. 
+    .. cmdoption::  -a <ANCESTOR>, --restricting_ancestor=<ANCESTOR>
+                        Enables to restrict the search of *taxids* under a specified ancestor.
+                        ``<ANCESTOR>`` can be a *taxid* (integer) or a key (string). 
+                            - If it is a *taxid*, this *taxid* is used to restrict the search for all the sequence
+                              records.
+                            - If it is a key, :py:mod:`obiaddtaxids` looks for the ancestor *taxid* in the
+                              corresponding attribute. This allows having a different ancestor restriction
+                              for each sequence record.
+    .. cmdoption::  -g <FILENAME>, --genus_found=<FILENAME>
+                        File used to store sequences with a match found for the genus.
+                        .. CAUTION:: this option is not valid with the UNITE format.
+    .. cmdoption::  -u <FILENAME>, --unidentified=<FILENAME>
+                        File used to store sequences with no taxonomic match found.
+    .. include:: ../optionsSet/taxonomyDB.txt
+    .. include:: ../optionsSet/defaultoptions.txt
+    :py:mod:`obiaddtaxids` added sequence attribute
+    -----------------------------------------------
+           - :doc:`taxid <../attributes/taxid>`
diff --git a/doc/sphinx/source/scripts/obiannotate.rst b/doc/sphinx/source/scripts/obiannotate.rst
new file mode 100644
index 0000000..8c81373
--- /dev/null
+++ b/doc/sphinx/source/scripts/obiannotate.rst
@@ -0,0 +1,36 @@
+.. automodule:: obiannotate
+   .. include:: ../optionsSet/sequenceEdit.txt
+   .. include:: ../optionsSet/sequenceFilter.txt
+   .. include:: ../optionsSet/taxonomyFilter.txt
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/outputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
+   :py:mod:`obiannotate` added sequence attributes
+   -----------------------------------------------
+      .. hlist::
+           :columns: 3
+           - :doc:`seq_length <../attributes/seq_length>`
+           - :doc:`seq_rank <../attributes/seq_rank>`
+           - :doc:`cluster <../attributes/cluster>`
+           - :doc:`scientific_name <../attributes/scientific_name>`
+           - :doc:`taxid <../attributes/taxid>`
+           - :doc:`rank <../attributes/rank>`
+           - :doc:`family <../attributes/family>`
+           - :doc:`family_name <../attributes/family_name>`
+           - :doc:`genus <../attributes/genus>`
+           - :doc:`genus_name <../attributes/genus_name>`       
+           - :doc:`order <../attributes/order>`
+           - :doc:`order_name <../attributes/order_name>`
+           - :doc:`species <../attributes/species>`
+           - :doc:`species_name <../attributes/species_name>`
\ No newline at end of file
diff --git a/doc/sphinx/source/scripts/obiclean.rst b/doc/sphinx/source/scripts/obiclean.rst
new file mode 100644
index 0000000..79a2f89
--- /dev/null
+++ b/doc/sphinx/source/scripts/obiclean.rst
@@ -0,0 +1,63 @@
+.. automodule:: obiclean
+   :py:mod:`obiclean` specific options
+   -----------------------------------  
+   .. cmdoption::  -d <INTEGER>, --distance=<INTEGER>   
+                   Maximum numbers of differences between two variant sequences (default: 1).
+   .. cmdoption::  -s <KEY>, --sample=<KEY>  
+                   Attribute containing sample descriptions.
+   .. cmdoption::  -r <FLOAT>, --ratio=<FLOAT>  
+                   Threshold ratio between counts (rare/abundant counts) of two sequence records 
+                   so that the less abundant one is a variant of the more abundant
+                   (default: 1, i.e. all less abundant sequences are variants).
+   .. cmdoption::  -C, --cluster  
+                   Switch :py:mod:`obiclean` into its clustering mode. This adds information
+                   to each sequence about the true.
+   .. cmdoption::  -H, --head  
+                   Select only sequences with the head status in a least one sample.
+   .. cmdoption::  -g, --graph  
+                   Creates a file containing the set of DAG used by the obiclean clustering algorithm.
+                   The graph file follows the `dot` format
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/outputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
+   :py:mod:`obiclean` used sequence attributes
+   -----------------------------------------------
+      .. hlist::
+           :columns: 3
+           - :doc:`count <../attributes/count>`
+   :py:mod:`obiclean` added sequence attributes
+   -----------------------------------------------
+      .. hlist::
+           :columns: 3
+           - :doc:`obiclean_cluster <../attributes/obiclean_cluster>`
+           - :doc:`obiclean_count <../attributes/obiclean_count>`
+           - :doc:`obiclean_head <../attributes/obiclean_head>`
+           - :doc:`obiclean_headcount <../attributes/obiclean_headcount>`
+           - :doc:`obiclean_internalcount <../attributes/obiclean_internalcount>`
+           - :doc:`obiclean_samplecount <../attributes/obiclean_samplecount>`
+           - :doc:`obiclean_singletoncount <../attributes/obiclean_singletoncount>`
+           - :doc:`obiclean_status <../attributes/obiclean_status>`
\ No newline at end of file
diff --git a/doc/sphinx/source/scripts/obicomplement.rst b/doc/sphinx/source/scripts/obicomplement.rst
new file mode 100644
index 0000000..851f7e7
--- /dev/null
+++ b/doc/sphinx/source/scripts/obicomplement.rst
@@ -0,0 +1,7 @@
+.. automodule:: obicomplement
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
diff --git a/doc/sphinx/source/scripts/obiconvert.rst b/doc/sphinx/source/scripts/obiconvert.rst
new file mode 100644
index 0000000..b9d88cd
--- /dev/null
+++ b/doc/sphinx/source/scripts/obiconvert.rst
@@ -0,0 +1,21 @@
+.. automodule:: obiconvert
+   *Examples:*
+        .. code-block:: bash
+                > obiconvert --ecopcrdb --fasta-output \
+                  'my_ecopcr_database' > sequences.fasta
+        Converts an ecoPCR database in a sequence file in *extended OBITools fasta* format.
+   .. include:: ../optionsSet/taxonomyDB.txt
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/outputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
\ No newline at end of file
diff --git a/doc/sphinx/source/scripts/obicount.rst b/doc/sphinx/source/scripts/obicount.rst
new file mode 100644
index 0000000..077f190
--- /dev/null
+++ b/doc/sphinx/source/scripts/obicount.rst
@@ -0,0 +1,42 @@
+.. automodule:: obicount
+   :py:mod:`obicount` specific options
+   -----------------------------------  
+   .. cmdoption::  -a, --all   
+                   Prints only the sum of ``count`` attributes.
+                   If a sequence has no `count` attribute, its default count is 1.
+      *Example:*
+         .. code-block:: bash
+           > obicount -a seq.fasta
+        For all sequence records contained in the ``seq.fasta`` file, prints only 
+        the sum of ``count`` attributes.
+   .. cmdoption::  -s, --sequence  
+                   Prints only the number of sequence records.
+      *Example:*
+        .. code-block:: bash
+           > obicount -s seq.fasta
+        Prints only the number of sequence records contained in the ``seq.fasta`` file.
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
+   :py:mod:`obicount` added sequence attribute
+   -------------------------------------------  
+           - :doc:`count <../attributes/count>`
diff --git a/doc/sphinx/source/scripts/obicut.rst b/doc/sphinx/source/scripts/obicut.rst
new file mode 100644
index 0000000..72d3420
--- /dev/null
+++ b/doc/sphinx/source/scripts/obicut.rst
@@ -0,0 +1,21 @@
+.. automodule:: obicut
+   :py:mod:`obicut` specific options
+   ---------------------------------   
+   .. cmdoption:: -b <INTEGER>, --begin=<INTEGER>
+         Integer value (possibly calculated using a python expression) 
+         indicating the first position of the sequence to be kept.
+   .. cmdoption::  -e <INTEGER>, --end=<INTEGER>
+         Integer value (possibly calculated using a python expression) 
+         indicating the last position of the sequence to be kept.
+   .. include:: ../optionsSet/sequenceFilter.txt
+   .. include:: ../optionsSet/taxonomyFilter.txt
+   .. include:: ../optionsSet/defaultoptions.txt
\ No newline at end of file
diff --git a/doc/sphinx/source/scripts/obidistribute.rst b/doc/sphinx/source/scripts/obidistribute.rst
new file mode 100644
index 0000000..e418f17
--- /dev/null
+++ b/doc/sphinx/source/scripts/obidistribute.rst
@@ -0,0 +1,20 @@
+.. automodule:: obidistribute
+   :py:mod:`obidistribute` specific options
+   ----------------------------------------   
+   .. cmdoption::  -n <INT>, --number=<INT>   
+                   Number of files to distribute over.
+   .. cmdoption::  -p <STRING>, --prefix=<STRING>   
+                   Prefix added at each file name.
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/outputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
diff --git a/doc/sphinx/source/scripts/obiextract.rst b/doc/sphinx/source/scripts/obiextract.rst
new file mode 100644
index 0000000..9d45f51
--- /dev/null
+++ b/doc/sphinx/source/scripts/obiextract.rst
@@ -0,0 +1,61 @@
+.. automodule:: obiextract
+   :py:mod:`obiextract` specific options
+   -------------------------------------  
+   .. cmdoption::  -s <KEY>, --sample=<KEY>  
+                   Attribute containing sample descriptions. By default the attribute
+                   name used for describing samples is set to ``merged_sample``.
+   .. cmdoption::  -e <SAMPLE_NAME>, --extract=<KEY>   
+        Attribute indicating which <SAMPLE_NAME> have to be extracted. 
+        Several ``-p`` options can be added for specifying several samples.
+        If you want to extract a large number of samples, please refer to the ``-E``
+        option described below 
+        .. TIP:: The ``<KEY>`` can be simply the key of an attribute, or a *Python* expression
+                 similarly to the ``-p`` option of :py:mod:`obigrep`.
+    *Example:*
+    		.. code-block:: bash
+       			> obiextract -e sampleA -e sampleB allseqs.fasta > samplesAB.fasta
+		This command extracts from the ``allseqs.fasta`` file data related to samples ``A`` and ``B``.
+   .. cmdoption::  -E <FILENAME>, --extract-list=<FILENAME>  
+		Allows for indicating a file name where a list of sample is stored. The file must be a simple
+		text file with a sample name per line.
+   	*Example:*
+    		.. code-block:: bash
+       			> obiextract -E subset.txt allseqs.fasta > subset_samples.fasta
+		This command extracts from the ``allseqs.fasta`` file data related to samples listed in the ``subset.txt`` file.
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/outputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
+   :py:mod:`obiextract` modified sequence attributes
+   -------------------------------------------------
+		- :doc:`count <../attributes/count>`
+   :py:mod:`obiextract` used sequence attribute
+   --------------------------------------------
+		- :doc:`count <../attributes/count>`
\ No newline at end of file
diff --git a/doc/sphinx/source/scripts/obigrep.rst b/doc/sphinx/source/scripts/obigrep.rst
new file mode 100644
index 0000000..a16fdf3
--- /dev/null
+++ b/doc/sphinx/source/scripts/obigrep.rst
@@ -0,0 +1,11 @@
+.. automodule:: obigrep
+   .. include:: ../optionsSet/sequenceFilter.txt
+   .. include:: ../optionsSet/taxonomyFilter.txt
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/outputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
\ No newline at end of file
diff --git a/doc/sphinx/source/scripts/obihead.rst b/doc/sphinx/source/scripts/obihead.rst
new file mode 100644
index 0000000..f9c6d64
--- /dev/null
+++ b/doc/sphinx/source/scripts/obihead.rst
@@ -0,0 +1,14 @@
+.. automodule:: obihead
+   :py:mod:`obihead` specific options
+   ----------------------------------   
+   .. cmdoption::  -n <INTEGER>, --sequence-count=<INTEGER>   
+                   Number of sequence records to be selected (default value : 10).
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
\ No newline at end of file
diff --git a/doc/sphinx/source/scripts/obijoinpairedend.rst b/doc/sphinx/source/scripts/obijoinpairedend.rst
new file mode 100644
index 0000000..b521023
--- /dev/null
+++ b/doc/sphinx/source/scripts/obijoinpairedend.rst
@@ -0,0 +1,15 @@
+.. automodule:: obijoinpairedend
+    :py:mod:`obijoinpairedend` specific options
+    -------------------------------------------
+    .. cmdoption::      -r <FILENAME>, --reverse-reads=<FILENAME>
+        Filename points to the file containing the reverse reads.
+    .. include:: ../optionsSet/inputformat.txt
+    .. include:: ../optionsSet/outputformat.txt
+    .. include:: ../optionsSet/defaultoptions.txt
diff --git a/doc/sphinx/source/scripts/obipr2.rst b/doc/sphinx/source/scripts/obipr2.rst
new file mode 100644
index 0000000..3c0bfb6
--- /dev/null
+++ b/doc/sphinx/source/scripts/obipr2.rst
@@ -0,0 +1,22 @@
+.. automodule:: obipr2
+   :py:mod:`obipr2` specific options
+   -------------------------------------  
+   .. cmdoption::  --local=<DIRNAME> 
+		Specify you have already downloaded a copy of the PR2 database located at the following URL
+		`<>`_
+    *Example:*
+    		.. code-block:: bash
+       			> obipr2 --local=PR2Dir
+       		This format **PR2** database pre-downloaded in the `PR2Dir` directory.
+   .. include:: ../optionsSet/defaultoptions.txt
\ No newline at end of file
diff --git a/doc/sphinx/source/scripts/obisample.rst b/doc/sphinx/source/scripts/obisample.rst
new file mode 100644
index 0000000..af4b062
--- /dev/null
+++ b/doc/sphinx/source/scripts/obisample.rst
@@ -0,0 +1,67 @@
+.. automodule:: obisample
+   :py:mod:`obisample` specific options
+   ------------------------------------   
+   .. cmdoption::  -s ###, --sample-size ###   
+        Specifies the size of the generated sample.
+            - without the ``-a`` option, sample size is expressed as the exact number of sequence 
+              records to be sampled (default: number of sequence records in the input file). 
+            - with the ``-a`` option, sample size is expressed as a fraction of the
+              sequence record numbers in the input file 
+              (expressed as a number between 0 and 1).
+    *Example:*
+    		.. code-block:: bash
+       			> obisample -s 1000 seq1.fasta > seq2.fasta
+		Samples randomly 1000 sequence records from the ``seq1.fasta`` file, with replacement, 
+		and saves them in the ``seq2.fasta`` file.
+   .. cmdoption::  -a, --approx-sampling   
+                   Switches the resampling algorithm to an approximative one, 
+                   useful for large files.
+                   The default algorithm selects exactly the number of sequence records
+                   specified with the ``-s`` option. When the ``-a`` option is set, 
+                   each sequence record has a probability to be selected related to the
+                   ``count`` attribute of the sequence record and the ``-s`` fraction. 
+   	*Example:*
+    		.. code-block:: bash
+       			> obisample -s 0.5 -a seq1.fastq > seq2.fastq
+		Samples randomly half of the sequence records of the ``seq1.fastq`` file, 
+		without replacement, 
+		and saves them in the ``seq2.fastq`` file.
+   .. cmdoption::  -w, --without-replacement   
+                   Asks for sampling without replacement.
+   	*Example:*
+    		.. code-block:: bash
+       			> obisample -s 1000 -w seq1.fasta > seq2.fasta
+   		Samples randomly 1000 sequence records from the ``seq1.fasta`` file, without replacement 
+   		(the input file must contain at least 1000 sequences), and saves them in the ``seq2.fasta`` file.
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
+   :py:mod:`obisample` used sequence attribute
+   -------------------------------------------   
+           - :doc:`count <../attributes/count>`
diff --git a/doc/sphinx/source/scripts/obiselect.rst b/doc/sphinx/source/scripts/obiselect.rst
new file mode 100644
index 0000000..06c214d
--- /dev/null
+++ b/doc/sphinx/source/scripts/obiselect.rst
@@ -0,0 +1,126 @@
+.. automodule:: obiselect
+   In each group as definied by a set of `-c` options, sequence records are ordered according
+   to a score function. The `N` first sequences (`N`is selected using the `-n` option) are kept
+   in the result subset of sequence records.
+   By default the score function is a random function and one sequence record is retrieved per
+   group. This leads to select randomly one sequence per group.
+   :py:mod:`obiselect` specific options
+   ------------------------------------   
+   .. cmdoption::  -c <KEY>, --category-attribute=<KEY>   
+        Attribute used to categorize the sequence records. Several ``-c`` options can be combined. 
+        .. TIP:: The ``<KEY>`` can be simply the key of an attribute, or a *Python* expression
+                 similarly to the ``-p`` option of :py:mod:`obigrep`.
+    *Example:*
+            .. code-block:: bash
+                > obiselect -c sample -c seq_length seq.fasta
+        This command select randomly one sequence record per sample and sequence length from
+        the sequence records included in the `seq.fasta` file.
+        The selected sequence records are printed on the screen.
+   .. cmdoption:: -n <INTEGER>, --number=<INTEGER>
+        Indicates how many sequence records per group have to be retrieved.
+        If the size of the group is lesser than this `NUMBER`, the whole group
+        is retrieved.
+    *Example:*
+            .. code-block:: bash
+                > obiselect -n 2 -c sample -c seq_length seq.fasta
+        This command has the same effect than the previous example except that two
+        sequences are retrieved by class of sample/length.
+   .. cmdoption:: --merge=<KEY>   
+     Attribute to merge.
+     *Example:*
+        .. code-block:: bash
+            > obiselect -c seq_length -n 2 -m sample seq1.fasta > seq2.fasta
+        This command keeps two sequences per sequence length, and records how 
+        many times they were observed for each sample in the new attribute 
+        ``merged_sample``.
+   .. cmdoption::  --merge-ids
+     Adds a ``merged`` attribute containing the list of sequence record ids merged
+     within this group.
+   .. cmdoption:: -m, --min             
+     Sets the function used for scoring sequence records into a group to the minimum function. 
+     The minimum function is applied to the values used to define categories (see option `-c`).
+     Sequences will be ordered according to the distance of their values to the minimum value.
+   .. cmdoption::    -M, --max 
+     Sets the function used for scoring sequence records into a group to the maximum function. 
+     The maximum function is applied to the values used to define categories (see option `-c`).
+     Sequences will be ordered according to the distance of their values to the maximum value.
+   .. cmdoption::    -a, --mean  
+     Sets the function used for scoring sequence records into a group to the mean function. 
+     The mean function is applied to the values used to define categories (see option `-c`).
+     Sequences will be ordered according to the distance of their values to the mean value.
+   .. cmdoption::    --median  
+     Sets the function used for scoring sequence records into a group to the median function. 
+     The median function is applied to the values used to define categories (see option `-c`).
+     Sequences will be ordered according to the distance of their values to the median value.
+   .. cmdoption::    -f FUNCTION, --function=FUNCTION
+     Sets the function used for scoring sequence records into a group to a user define function. 
+     The user define function is declared using `Python` syntax. Attribute keys can be used as variables.
+     An extra `sequence` variable representing the full sequence record is available. If option for
+     loading a taxonomy database is provided, a `taxonomy` variable is also available.
+     The function is estimated for each sequence record and the minimum value of this function in
+     each group.
+     Sequences will be ordered in each group according to the distance of their function estimation
+     to the minimum value of their group.
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/taxonomyDB.txt
+   .. include:: ../optionsSet/outputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
+   :py:mod:`obiselect` added sequence attributes
+   ---------------------------------------------
+           - :doc:`class <../attributes/class>`
+           - :doc:`distance <../attributes/distance>`
+           - :doc:`merged <../attributes/merged>`
+           - :doc:`class <../attributes/class>`
+           - :doc:`merged_* <../attributes/merged_star>`
+           - :doc:`select <../attributes/select>`
+   :py:mod:`obiselect` used sequence attribute
+   -------------------------------------------
+           - :doc:`taxid <../attributes/taxid>`
diff --git a/doc/sphinx/source/scripts/obisilva.rst b/doc/sphinx/source/scripts/obisilva.rst
new file mode 100644
index 0000000..615dfe0
--- /dev/null
+++ b/doc/sphinx/source/scripts/obisilva.rst
@@ -0,0 +1,71 @@
+.. automodule:: obisilva
+   :py:mod:`obisilva` specific options
+   -------------------------------------  
+   .. cmdoption::  -s , --ssu  
+        Specify that you are interested in the **SSU** database.
+    *Example:*
+    		.. code-block:: bash
+       			> obisilva --ssu --parc
+		This download and format into an ecoPCR database the latest version of the **SSUParc** database of **Silva**.
+   .. cmdoption::  -l, --lsu   
+        Specify that you are interested in the **LSU** database.
+    *Example:*
+    		.. code-block:: bash
+       			> obisilva --ssu --parc
+		This download and format into an ecoPCR database the latest version of the **LSUParc** database of **Silva**.
+   .. cmdoption::  -p , --parc  
+		Specify that you are interested in the **Parc** (complete) version of the **Silva** database.
+   .. cmdoption::  -r , --ref  
+		Specify that you are interested in the **Reference** (cleaned to keep only high quality sequences) 
+		version of the **Silva** database.
+   .. cmdoption::  -n , --nr  
+		Specify that you are interested in the **Non redundant** version of the **Silva** database.
+		just a version of the to closely related sequence is kept in this version of the database
+		.. warning::
+			Non redundant version of **Silva** exists only for the SSU sequences 
+			in its **Reference** and  **Truncated** version
+   .. cmdoption::  -t , --trunc 
+		Specify that you are interested in the **Truncated** (limited to the rDNA element without flanked regions) 
+		version of the **Silva** database.
+   .. cmdoption::  --local=<DIRNAME> 
+		Specify you have already downloaded a copy of the **Silva** database located at the following URL
+		`<http://www.arb-**Silva**.de/no_cache/download/archive/current/Exports/>`_
+    *Example:*
+    		.. code-block:: bash
+       			> obisilva --ssu --parc --local=**Silva**Dir
+       		This format the **SSUParc** version of the **Silva** database pre-downloaded in the `**Silva**Dir` directory.
+   .. include:: ../optionsSet/defaultoptions.txt
\ No newline at end of file
diff --git a/doc/sphinx/source/scripts/obisort.rst b/doc/sphinx/source/scripts/obisort.rst
new file mode 100644
index 0000000..c742c93
--- /dev/null
+++ b/doc/sphinx/source/scripts/obisort.rst
@@ -0,0 +1,35 @@
+.. automodule:: obisort
+   :py:mod:`obisort` specific options
+   ---------------------------------- 
+   .. cmdoption::  -k <KEY>, --key=<KEY>   
+        Attribute used to sort the sequence records. 
+    *Example:*
+    		.. code-block:: bash
+       			> obisort -k count seq1.fasta > seq2.fasta
+		Sorts the sequence records of file ``seq1.fasta`` according to their `count` 
+		(numeric order) and prints the results in the ``seq2.fasta`` file.
+   .. cmdoption::  -r, --reverse   
+		Sorts in reverse order. 
+   	*Example:*
+    		.. code-block:: bash
+       			> obisort -r -k count seq1.fastq > seq2.fastq
+		Sorts the sequence records of file ``seq1.fasta`` according to their `count` 
+		(reverse numeric order) and prints the results in the ``seq2.fasta`` file.
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
diff --git a/doc/sphinx/source/scripts/obisplit.rst b/doc/sphinx/source/scripts/obisplit.rst
new file mode 100644
index 0000000..6f0dfa1
--- /dev/null
+++ b/doc/sphinx/source/scripts/obisplit.rst
@@ -0,0 +1,21 @@
+.. automodule:: obisplit
+   :py:mod:`obisplit` specific options
+   -----------------------------------   
+   .. cmdoption::  -p <PREFIX FILENAME>, --prefix=<PREFIX FILENAME>
+         Prefix added to each subfile name.
+   .. cmdoption::  -t <KEY>, --tag-name=<KEY>
+         Attribute key used to split the sequence file.
+   .. cmdoption::  -u <FILENAME>, --undefined=<FILENAME>
+         Name of the file where sequence records without attribute ``<KEY>`` are stored.
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
diff --git a/doc/sphinx/source/scripts/obistat.rst b/doc/sphinx/source/scripts/obistat.rst
new file mode 100644
index 0000000..46670ea
--- /dev/null
+++ b/doc/sphinx/source/scripts/obistat.rst
@@ -0,0 +1,95 @@
+.. automodule:: obistat
+   :py:mod:`obistat` specific options
+   ----------------------------------  
+   .. cmdoption::  -c <KEY>, --category-attribute=<KEY>   
+        Attribute used to categorize the sequence records. Several ``-c`` options can be combined. 
+        .. TIP:: The ``<KEY>`` can be simply the key of an attribute, or a *Python* expression
+                 similarly to the ``-p`` option of :py:mod:`obigrep`.
+    *Example:*
+    		.. code-block:: bash
+       			> obistat -c sample -c seq_length seq.fasta
+		This command prints the number of sequence records and total count for each combination of
+		sample and sequence length.
+   .. cmdoption::  -m <KEY>, --min=<KEY>  
+		Computes the minimum value of attribute <KEY> for each category. 
+   	*Example:*
+    		.. code-block:: bash
+       			> obistat -c sample -m seq_length seq.fastq
+		This command computes the minimum sequence length observed for each sample.
+   .. cmdoption::  -M <KEY>, --max=<KEY>  
+		Computes the maximum value of attribute <KEY> for each category. 
+   	*Example:*
+    		.. code-block:: bash
+       			> obistat -c sample -M seq_length seq.fastq
+		This command computes the maximum sequence length observed for each sample.
+   .. cmdoption::  -a <KEY>, --mean=<KEY>  
+		Computes the mean value of attribute <KEY> for each category. 
+	*Example:*
+    		.. code-block:: bash
+       			> obistat -c sample -a seq_length seq.fastq
+		This command computes the mean sequence length observed for each sample.
+	.. cmdoption::  -v <KEY>, --variance=<KEY>  
+		Computes the variance of attribute <KEY> for each category. 
+	*Example:*
+    		.. code-block:: bash
+       			> obistat -c genus_name -v reverse_error seq.fastq
+		This command computes the variance of the number of errors observed in the reverse primer for each genus.
+	.. cmdoption::  -s <KEY>, -std-dev=<KEY>  
+		Computes the standard deviation of attribute <KEY> for each category. 
+	*Example:*
+    		.. code-block:: bash
+       			> obistat -c genus_name -s reverse_error seq.fastq
+		This command computes the standard deviation of the number of errors observed in the reverse primer for each genus.
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/taxonomyDB.txt
+   .. include:: ../optionsSet/defaultoptions.txt
+   :py:mod:`obistat` used sequence attribute
+   -----------------------------------------
+              - :doc:`count <../attributes/count>`
\ No newline at end of file
diff --git a/doc/sphinx/source/scripts/obisubset.rst b/doc/sphinx/source/scripts/obisubset.rst
new file mode 100644
index 0000000..7218a89
--- /dev/null
+++ b/doc/sphinx/source/scripts/obisubset.rst
@@ -0,0 +1,81 @@
+.. automodule:: obisubset
+   :py:mod:`obisubset` specific options
+   ------------------------------------
+   .. cmdoption::  -s <TAGNAME>,  --sample=<TAGNAME>,
+     The option ``-s`` allows to specify the tag containing sample descriptions,
+     the default value is set to *merged_sample*.
+     *Example:*
+         .. code-block:: bash
+             > obiuniq -m sample seq1.fasta > seq2.fasta
+             > obisubset -s merged_sample -n sample1 seq2.fasta > seq3.fasta
+         After the dereplication of the sequences using the
+         in the new attribute ``merged_sample``.
+   .. cmdoption::  -o <TAGNAME>,  --other-tag=<TAGNAME>,
+     Another tag to clean according to the sample subset
+     *Example:*
+          .. code-block:: bash
+              > obisubset -s merged_sample -o -n sample1 seq2.fasta > seq3.fasta
+   .. cmdoption::  -l <FILENAME>,  --sample-list=<FILENAME>,
+     File containing the samples names (one sample id per line).
+     *Example:*
+          .. code-block:: bash
+              > obisubset -s merged_sample -o -l ids.txt seq2.fasta > seq3.fasta
+   .. cmdoption::  -p <REGEX>,  --sample-pattern=<REGEX>,
+     A regular expression pattern matching the sample ids to extract.
+     *Example:*
+         .. code-block:: bash
+             > obisubset -s merged_sample -o -p "negative_.*" seq2.fasta > seq3.fasta
+   .. cmdoption::  -n <SAMPLEIDS>,  --sample-name=<SAMPLEIDS>,
+     A sample id to extract
+     *Example:*
+          .. code-block:: bash
+              > obisubset -s merged_sample -o -n sample1 seq2.fasta > seq3.fasta
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/outputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
+   :py:mod:`obisubset` modifies sequence attributes
+   ------------------------------------------------
+      .. hlist::
+           :columns: 3
+           - :doc:`count <../attributes/count>`
+           - :doc:`merged_* <../attributes/merged_star>`
+   :py:mod:`obisubset` used sequence attribute
+   -------------------------------------------
+           - :doc:`count <../attributes/taxid>`
+           - :doc:`merged_* <../attributes/merged_star>`
diff --git a/doc/sphinx/source/scripts/obitab.rst b/doc/sphinx/source/scripts/obitab.rst
new file mode 100644
index 0000000..4011e24
--- /dev/null
+++ b/doc/sphinx/source/scripts/obitab.rst
@@ -0,0 +1,48 @@
+.. automodule:: obitab
+    :py:mod:`obitab` specific options
+    ---------------------------------   
+    .. cmdoption::   -n <NOT AVAILABLE STRING>, --na-string=<NOT AVAILABLE STRING>
+                   String written in the table for the not available values 
+                   (default value ``NA``).
+    .. cmdoption::    --output-field-separator=<STRING>
+                   Field separator for the tabular file 
+                   (default value ``TAB``).
+    .. cmdoption::    -o, --output-seq      
+                   Adds an extra column at the end of the table for 
+                   the sequence itself.
+    .. cmdoption::    -d, --no-definition   
+                   Removes column containing the sequence definition in
+                   the output tab file.
+    .. cmdoption::    -a <KEY>, --omit-attribute=<KEY>
+                   Attributes whose key is in this list will not be printed in 
+                   the output tab file.  
+    .. include:: ../optionsSet/inputformat.txt
+    .. include:: ../optionsSet/defaultoptions.txt
+    Example
+    -------
+        .. code-block:: bash
+              > obitab -d -o seq1.fasta > seq1.txt
+        Reformats all sequence records present in the ``seq1.fasta`` file 
+        into a tabular file without outputing the sequence definition but
+        with an extra column containing the sequence itself. The result is
+        stored in the ``seq1.txt`` file.
\ No newline at end of file
diff --git a/doc/sphinx/source/scripts/obitail.rst b/doc/sphinx/source/scripts/obitail.rst
new file mode 100644
index 0000000..0ca2392
--- /dev/null
+++ b/doc/sphinx/source/scripts/obitail.rst
@@ -0,0 +1,14 @@
+.. automodule:: obitail
+   :py:mod:`obitail` specific options
+   ----------------------------------   
+   .. cmdoption::  -n <INTEGER>, --sequence-count <INTEGER>   
+        Number of sequence records to be selected (default value : 10).
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
\ No newline at end of file
diff --git a/doc/sphinx/source/scripts/obitaxonomy.rst b/doc/sphinx/source/scripts/obitaxonomy.rst
new file mode 100644
index 0000000..b621110
--- /dev/null
+++ b/doc/sphinx/source/scripts/obitaxonomy.rst
@@ -0,0 +1,160 @@
+.. automodule:: obitaxonomy 
+    :py:mod:`obitaxonomy` specific options
+    --------------------------------------- 
+    .. cmdoption::  -a <TAXON_INFOS>, --add-taxon=<TAXON_INFOS>
+                        Adds a new taxon to the taxonomy. The new taxon 
+                        is described by three values separated by colons: 
+                        its scientific name, its taxonomic rank, and the 
+                        taxid of its first ancestor.
+        *Example:*
+            .. code-block:: bash
+                    > obitaxonomy -d my_ecopcr_database \
+                      -a 'Gentiana alpina':'species':49934
+            Adds a taxon with the scientific name *Gentiana alpina* and the rank *species* under
+            the taxon whose taxid is 49934.
+    .. cmdoption::  -m <####>, --min-taxid=<####>
+                        Minimum *taxid* for the newly added *taxid(s)*.
+        *Example:*
+            .. code-block:: bash
+                    > obitaxonomy -d my_ecopcr_database -m 1000000000 \
+                      -a 'Gentiana alpina':'species':49934
+            Adds a taxon with the scientific name *Gentiana alpina* and the rank *species* under
+            the taxon whose *taxid* is 49934, with a *taxid* greater than or equal to 1000000000.
+    .. cmdoption::  -D <TAXID>, --delete-local-taxon=<TAXID>
+                        Deletes the local taxon with the *taxid* <TAXID> from the 
+                        taxonomic database. 
+        *Example:*
+            .. code-block:: bash
+                    > obitaxonomy -d my_ecopcr_database -D 10000832
+            Deletes the local taxon with the taxid 10000832 from the taxonomic database.
+    .. cmdoption::  -s <SPECIES_NAME>, --add-species=<SPECIES_NAME>
+                        Adds a new species to the taxonomy. The new species 
+                        is described by its scientific name. The genus of the 
+                        species must already exist in the database. 
+                        The species will be added under its genus.
+        *Example:*
+            .. code-block:: bash
+                    > obitaxonomy -d my_ecopcr_database -s 'Gentiana alpina'
+            Adds the species with the scientific name *Gentiana alpina* under the genus *Gentiana*.
+    .. cmdoption::  -f <TAXON_NAME>:<TAXID>, --add-favorite-name=<TAXON_NAME>:<TAXID>
+                        Adds a new favorite scientific name to the taxonomy. 
+                        The new name is described by two values separated by 
+                        a colon: the new favorite name and the *taxid* of the taxon.
+        *Example:*
+            .. code-block:: bash
+                    > obitaxonomy -d my_ecopcr_database \
+                      -f 'Gentiana algida':50748
+            Adds the favorite scientific name *Gentiana algida* for the *taxid* 50748 in the taxonomic database.
+    .. cmdoption::  -F <FILE_NAME>, --file-name=<FILE_NAME>
+                        Adds all the taxa from a sequence file in ``OBITools`` extended 
+                        doc:`fasta <../fasta>` format, and eventually their ancestors to the database 
+                        (see documentation). Each sequence record must contain the 
+                        attribute specified by the ``-k`` option.
+        *Example:*
+            .. code-block:: bash
+                    > obitaxonomy -d my_ecopcr_database \
+                      -k my_taxon_name_key -F my_sequences.fasta
+            Adds the taxon of each sequence record from the file ``my_sequences.fasta`` in the taxonomic 
+            database, based on the scientific name contained in the ``my_taxon_name_key`` attribute.
+    .. cmdoption::  -k <KEY_NAME>, --key-name=<KEY_NAME>
+                        Works with the ``-F`` option. Defines the key of the 
+                        attribute that contains the scientific name of 
+                        the taxon to be added. See example above.
+    .. cmdoption::  -A <ANCESTOR>, --restricting_ancestor=<ANCESTOR>
+                        Works with the ``-F`` option. Can be a *taxid* (integer) or 
+                        a key (string). If it is a *taxid*, this *taxid* is the 
+                        default *taxid* under which the new taxon is added if 
+                        none of his ancestors are specified or can be found. 
+                        If it is a key, :py:mod:`obitaxonomy` looks for the 
+                        ancestor *taxid* in the corresponding attribute, and the 
+                        new taxon is *systematically* added under this ancestor. 
+                        By default, the restricting ancestor is the root of the 
+                        taxonomic tree for all the new taxa.
+        *Example:*
+            .. code-block:: bash
+                    > obitaxonomy -d my_ecopcr_database -a 33090 \
+                      -k my_taxon_name_key -F my_sequences.fasta
+            Adds the taxon of each sequence record from the file ``my_sequences.fasta`` in the taxonomic 
+            database, based on the scientific name contained in the ``my_taxon_name_key`` attribute. If
+            the genus of the new taxon cannot be found, the new taxon is added under the taxon whose 
+            *taxid* is 33090.
+    .. cmdoption::  -p <PATH>, --path=<PATH>
+                        Works with the ``-F`` option. Key of the attribute containing 
+                        the taxonomic paths of the taxa if they are in the headers of 
+                        the sequence records. The value contained in this attribute 
+                        must be of the form 'Fungi, Agaricomycetes, Thelephorales, 
+                        Thelephoraceae' with the highest ancestors first and commas 
+                        between ancestors.
+        *Example:*
+            .. code-block:: bash
+                    > obitaxonomy -d my_ecopcr_database -p my_taxonomic_path_key \
+                      -k my_taxon_name_key -F my_sequences.fasta
+            Adds the taxon of each sequence record from the file ``my_sequences.fasta`` in the taxonomic 
+            database, based on the scientific name contained in the ``my_taxon_name_key`` attribute. 
+            Each ancestor contained in the ``my_taxonomic_path_key`` attribute is added if it does not 
+            already exist, and the new taxon is added under the latest ancestor of the path.            
+    .. include:: ../optionsSet/taxonomyDB.txt
+    .. include:: ../optionsSet/defaultoptions.txt
diff --git a/doc/sphinx/source/scripts/obiuniq.rst b/doc/sphinx/source/scripts/obiuniq.rst
new file mode 100644
index 0000000..37d594a
--- /dev/null
+++ b/doc/sphinx/source/scripts/obiuniq.rst
@@ -0,0 +1,79 @@
+.. automodule:: obiuniq
+   :py:mod:`obiuniq` specific options
+   ----------------------------------
+   .. cmdoption::  -m <KEY>, --merge=<KEY>   
+     Attribute to merge.
+     *Example:*
+        .. code-block:: bash
+            > obiuniq -m sample seq1.fasta > seq2.fasta
+        Dereplicates sequences and keeps the value distribution of the ``sample`` attribute
+        in the new attribute ``merged_sample``.
+   .. cmdoption::  -i , --merge-ids
+     Adds a ``merged`` attribute containing the list of sequence record ids merged
+     within this group.
+   .. cmdoption::  -c <KEY>, --category-attribute=<KEY>
+     Adds one attribute to the list of attributes used to define sequence groups
+     (this option can be used several times).
+     *Example:*
+        .. code-block:: bash
+            > obiuniq -c sample seq1.fasta > seq2.fasta
+        Dereplicates sequences within each sample.
+   .. cmdoption::  -p, --prefix
+     Dereplication is done based on prefix matching:
+            1. The shortest sequence of each group is a prefix of any sequence of its group
+            2. The shortest sequence of a group is the prefix of only the sequences belonging
+               to its group 
+   .. include:: ../optionsSet/taxonomyDB.txt
+   .. include:: ../optionsSet/inputformat.txt
+   .. include:: ../optionsSet/defaultoptions.txt
+   :py:mod:`obiuniq` added sequence attributes
+   -------------------------------------------
+      .. hlist::
+           :columns: 3
+           - :doc:`count <../attributes/count>`
+           - :doc:`merged_* <../attributes/merged_star>`
+           - :doc:`merged <../attributes/merged>`
+           - :doc:`scientific_name <../attributes/scientific_name>`
+           - :doc:`rank <../attributes/rank>`
+           - :doc:`family <../attributes/family>`
+           - :doc:`family_name <../attributes/family_name>`
+           - :doc:`genus <../attributes/genus>`
+           - :doc:`genus_name <../attributes/genus_name>`       
+           - :doc:`order <../attributes/order>`
+           - :doc:`order_name <../attributes/order_name>`
+           - :doc:`species <../attributes/species>`
+           - :doc:`species_name <../attributes/species_name>`
+   :py:mod:`obiuniq` used sequence attribute
+   -----------------------------------------
+           - :doc:`taxid <../attributes/taxid>`
\ No newline at end of file
diff --git a/doc/sphinx/source/scripts/oligotag.rst b/doc/sphinx/source/scripts/oligotag.rst
new file mode 100644
index 0000000..a3c4d01
--- /dev/null
+++ b/doc/sphinx/source/scripts/oligotag.rst
@@ -0,0 +1,134 @@
+.. automodule:: oligotag
+   :py:mod:`oligotag` specific options
+   -----------------------------------  
+   .. cmdoption::   -L <filename>, --oligo-list=<filename>   
+        Filename containing a list of oligonucleotides. `oligotag` selects within this list 
+        the oligonucleotides that match the specified options.
+        .. CAUTION:: Cannot be used with the ``-s`` option.
+   .. cmdoption::  -s ###, --oligo-size=###   
+        Size of oligonucleotides to be generated.
+        .. CAUTION:: Cannot be used with the ``-L`` option.
+        .. WARNING:: A size equal or greater than eight often leads to a very long 
+                     computing time and requires a large amount of memory.
+   .. cmdoption::  -f ###, --family-size=###   
+            Minimal size of the oligonucleotide family to be generated.
+   .. cmdoption::  -d ###, --distance=###   
+			Minimal Hamming distance (number of differences) 
+			between two oligonucleotides.
+   .. cmdoption::  -g ###, --gc-max=###   
+			Maximum number of G or C in the oligonucleotides.
+   .. cmdoption::  -a <IUPAC_PATTERN>, --accepted=<IUPAC_PATTERN>   
+			Selected oligonucleotides are constrained by the given pattern 
+			(only :doc:`IUPAC <../iupac>` symbols are allowed).
+         .. CAUTION:: pattern length must have the same length as oligonucleotides.
+   .. cmdoption::  -r <IUPAC_PATTERN>, --rejected=<IUPAC_PATTERN>   
+            Selected oligonucleotides do not match the given pattern 
+            (only :doc:`IUPAC <../iupac>` symbols are allowed).
+         .. CAUTION:: pattern length must have the same length as oligonucleotides.
+   .. cmdoption::  -p ###, --homopolymer=###   
+			Selected oligonucleotides do not contain any homopolymer 
+			longer than the specified length.
+   .. cmdoption::  -P ###, --homopolymer-min=###   
+			Selected oligonucleotides contain at least one homopolymer longer 
+			or equal to the specified length.
+   .. cmdoption::  -T <seconde>, --timeout=<seconde>   
+			Timeout to identify a set of oligonucleotides of required size, 
+			as defined by the ``-f`` option.
+   .. include:: ../optionsSet/defaultoptions.txt
+   Examples
+   --------
+	*Example 1:*
+	    		.. code-block:: bash
+	       			> oligotag -s 5 -f 24 -d 3 -g 3 -p 2 > mytags.txt 
+			Searches for a family of at least 24 oligonucleotides of a length of 5 nucleotides,
+			with at least 3 differences among them, with a maximum of 3 C/G, and without
+			homopolymers longer than 2. The resulting list of oligonucleotides is saved in
+			the ``mytags.txt`` file. 
+	*Example 2:*
+	    		.. code-block:: bash
+	       			>  oligotag -d 5 -L my_oligos.txt -f 10 -p 1 
+			Searches for a subset of at least 10 oligonucleotides listed in the ``my_oligos.txt`` file, with 
+			at least 5 differences among them, and without homopolymers. The ``my_oligos.txt`` file must 
+			contain a set of oligonucleotides of the same length, with only one oligonucleotide per line.
+			The resulting list of oligonucleotides is printed on the terminal window.
+	*Example 3:*
+	    		.. code-block:: bash
+	       			> oligotag -s 7 -f 96 -d 3 -p 1 -r cnnnnnn > mytags.txt 
+			Searches for a family of at least 96 oligonucleotides of a length of 7 nucleotides,
+			with at least 3 differences among them, without homopolymers, and without a ``C`` in 
+			the first position. The resulting list is saved in the ``mytags.txt`` file.
+	*Example 4:*
+	    		.. code-block:: bash
+	       			> oligotag -s 9 -f 24 -d 3 -a yryryryry > mytags.txt 
+			Searches for a family of at least 24 oligonucleotides of a length of 9 nucleotides,
+			with at least 3 differences among them, and an alternation of pyrimidines and purines. 
+			The resulting list is saved in the ``mytags.txt`` file. Because of the 
+			constraints imposed by the ``-a`` option, it is possible to compute longer oligonucleotides 
+			in a reasonable time.
+   Reference
+   ---------
+   E. Coissac. Oligotag: a program for designing sets of tags for next-generation sequencing of multiplexed samples. Methods Mol Biol, 888:13-31, 2012.
\ No newline at end of file
diff --git a/doc/sphinx/source/statistics.rst b/doc/sphinx/source/statistics.rst
new file mode 100644
index 0000000..b29337a
--- /dev/null
+++ b/doc/sphinx/source/statistics.rst
@@ -0,0 +1,9 @@
+Statistics over sequence file
+.. toctree::
+   :maxdepth: 2
+   scripts/ecodbtaxstat
+   scripts/obicount
+   scripts/obistat
diff --git a/doc/sphinx/source/taxdump.rst b/doc/sphinx/source/taxdump.rst
new file mode 100644
index 0000000..bd951e7
--- /dev/null
+++ b/doc/sphinx/source/taxdump.rst
@@ -0,0 +1,2 @@
+The NCBI taxonomy dump files
diff --git a/doc/sphinx/source/tutorials.rst b/doc/sphinx/source/tutorials.rst
new file mode 100644
index 0000000..0bf7269
--- /dev/null
+++ b/doc/sphinx/source/tutorials.rst
@@ -0,0 +1,12 @@
+OBITools tutorials
+Some basic tutorials.
+.. toctree::
+   :maxdepth: 2
+   wolves
diff --git a/doc/sphinx/source/utilities.rst b/doc/sphinx/source/utilities.rst
new file mode 100644
index 0000000..c559716
--- /dev/null
+++ b/doc/sphinx/source/utilities.rst
@@ -0,0 +1,13 @@
+.. toctree::
+   :maxdepth: 2
+   scripts/oligotag
+   scripts/obidistribute
+   scripts/obisort
+   scripts/obitaxonomy
+   scripts/ecofind
\ No newline at end of file
diff --git a/doc/sphinx/source/welcome.rst b/doc/sphinx/source/welcome.rst
new file mode 100644
index 0000000..eeb051e
--- /dev/null
+++ b/doc/sphinx/source/welcome.rst
@@ -0,0 +1,405 @@
+.. role:: latex(raw)
+   :format: latex
+Welcome to the ``OBITools`` 
+The ``OBITools`` package is a set of programs specifically designed for analyzing
+NGS data in a DNA metabarcoding context, taking into account taxonomic
+information. It is distributed as an open source software available on the
+following website: http://metabarcoding.org/obitools.
+Citation: Boyer F., Mercier C., Bonin A., Taberlet P., Coissac E. (2014)
+OBITools: a Unix-inspired software package for DNA metabarcoding. *Molecular
+Ecology Resources*, submitted.
+Installing the ``OBITools``
+Availability of the ``OBITools``
+The ``OBITools`` are open source and protected by the CeCILL 2.1 license
+(`http://www.cecill.info/licences/Licence_CeCILL_V2.1-en.html <http://www.cecill.info/licences/Licence_CeCILL_V2.1-en.html>`_). 
+The ``OBITools`` are deposited on the Python Package Index (PyPI : `https://pypi.python.org/pypi/obitools <https://pypi.python.org/pypi/obitools>`_)
+and all the sources can be downloaded from our subversion server
+(`http://www.grenoble.prabi.fr/public-svn/OBISofts/OBITools <http://www.grenoble.prabi.fr/public-svn/OBISofts/OBITools>`_). 
+To install the ``OBITools``, you need that these softwares are installed on your 
+* Python 2.7 (installed by default on most ``Unix`` systems, available from 
+  `the Python website <http://www.python.org/>`_)
+* ``gcc`` (installed by default on most ``Unix`` systems, available from the 
+  GNU sites dedicated to `GCC <https://www.gnu.org/software/gcc/>`_ and 
+  `GMake <https://www.gnu.org/software/make/>`_)
+On a linux system 
+You have to take care that the Python-dev packages are installed.
+On MacOSX
+The C compiler and all the other compilation tools are included in the `XCode <https://itunes.apple.com/fr/app/xcode/id497799835?mt=12>`_
+application not installed by default. The Python included in the system is not
+suitable for running the ``OBITools``. You have to install a complete distribution
+of Python that you can download as a `MacOSX package from the Python website <https://www.python.org/downloads/>`_.
+Downloading and installing the ``OBITools``
+The ``OBITools`` are downloaded and installed using the :download:`get-obitools.py <../../../get_obitools/get-obitools.py>` script.
+This is a user level installation that does not need administrator privilege.
+Once downloaded, move the file :download:`get-obitools.py <../../../get_obitools/get-obitools.py>` in the directory where you want to install
+the ``OBITools``. From a Unix terminal you must now run the command :
+  .. code-block:: bash
+      > python get-obitools.py
+The script will create a new directory at the place you are running it in which all the
+``OBITools`` will be installed. No system privilege are required, and you system will not
+be altered in any way by the obitools installation.
+The newly created directory is named OBITools-VERSION where version is substituted by the
+latest version number available.
+Inside the newly created directory all the ``OBITools`` are installed. Close to this directory
+there is a shell script named ``obitools``. Running this script activate the ``OBITools``
+by reconfiguring your Unix environment. 
+  .. code-block:: bash
+	> ./obitools
+Once activated you can desactivate the ``OBITools`` byt typing the command ``exit``.
+  .. code-block:: bash
+	> exit
+	OBITools are no more activated, Bye...
+	======================================
+System level installation
+To install the ``OBITools`` at the system level you can follow two options :
+	- copy the ``obitools`` script in a usual directory for installing program like ``/usr/local/bin``
+	  but never move the ``OBITools`` directory itself after the installation by the 
+	  :download:`get-obitools.py <../../../get_obitools/get-obitools.py>`.
+	- The other solution is to add the ``export/bin`` directory located in the ``OBITools`` directory
+	  to the ``PATH``environment variable.
+Retrieving the sources of the OBITools
+If you want to compile by yourself the ``OBITools``, you will need to install the same
+  .. code-block:: bash
+	> pip install -U virtualenv
+	> pip install -U sphinx
+	> pip install -U cython
+moreover you need to install any subversion client (a list of clients is available from `Wikipedia <http://en.wikipedia.org/wiki/Comparison_of_Subversion_clients>`_)
+Then you can download the 
+  .. code-block:: bash
+      > svn co http://www.grenoble.prabi.fr/public-svn/OBISofts/OBITools/branches/OBITools-1.00/ OBITools
+This command will create a new directory called ``OBITools``.
+Compiling and installing the ``OBITools``
+From the directory where you retrieved the sources, execute the following commands:
+  .. code-block:: bash
+      > cd OBITools
+      > python setup.py --serenity install
+Once installed, you can test your installation by running the commands of the 
+:doc:`tutorials <./tutorials>`.
+DNA metabarcoding is an emerging approach for biodiversity studies (Taberlet et
+al. 2012). Originally mainly developed by microbiologists (e.g. Sogin et al.
+2006), it is now widely used for plants (e.g. Sonstebo et al. 2010, Parducci et
+al. 2012, Yoccoz et al. 2012) and animals from meiofauna (e.g. Chariton et al.
+2010, Baldwin et al. 2013) to larger organisms (e.g. Andersen et al. 2012,
+Thomsen et al. 2012). Interestingly, this method is not limited to *sensu
+stricto* biodiversity surveys, but it can also be implemented in other
+ecological contexts such as for herbivore (e.g. Valentini et al. 2009, Kowalczyk
+et al. 2011) or carnivore (e.g. Deagle et al. 2009, Shehzad et al. 2012) diet
+Whatever the biological question under consideration, the DNA metabarcoding
+methodology relies heavily on next-generation sequencing (NGS), and generates
+considerable numbers of DNA sequence reads (typically million of reads).
+Manipulation of such large datasets requires dedicated programs usually running
+on a Unix system. Unix is an operating system, whose first version was created 
+during the sixties. Since its early stages, it is dedicated to scientific
+computing and includes a large set of simple tools to efficiently process text
+files. Most of those programs can be viewed as filters extracting information
+from a text file to create a new text file. These programs process text files as
+streams, line per line, therefore allowing computation on a huge dataset without
+requiring a large memory. Unix programs usually print their results to their
+standard output (*stdout*), which by default is the terminal, so the results can
+be examined on screen. The main philosophy of the Unix environment is to allow
+easy redirection of the *stdout* either to a file, for saving the results, or to
+the standard input (*stdin*) of a second program thus allowing to easily create
+complex processing from simple base commands. Access to Unix computers is
+increasingly easier for scientists nowadays. Indeed, the Linux operating system,
+an open source version of Unix, can be freely installed on every PC machine and
+the MacOSX operating system, running on Apple computers, is also a Unix system. 
+The ``OBITools`` programs imitate Unix standard programs because they usually act as
+filters, reading their data from text files or the *stdin* and writing their
+results to the *stdout*. The main difference with classical Unix programs is that
+text files are not analyzed line per line but sequence record per sequence
+record (see below for a detailed description of a sequence record).
+Compared to packages for similar purposes like mothur (Schloss et al. 2009) or
+QIIME (Caporaso et al. 2010), the ``OBITools`` mainly rely on filtering and sorting
+algorithms. This allows users to set up versatile data analysis pipelines
+(Figure 1), adjustable to the broad range of DNA metabarcoding applications. 
+The innovation of the ``OBITools`` is their ability to take into account the
+taxonomic annotations, ultimately allowing sorting and filtering of sequence
+records based on the taxonomy. 
+|Pipeline example for a standard biodiversity survey|
+Andersen K, Bird KL, Rasmussen M, Haile J, Breuning-Madsen H, Kj�r KH, Orlando
+L, Gilbert MTP, Willerslev E (2012) Meta-barcoding of "dirt" DNA from soil
+reflects vertebrate biodiversity. Molecular Ecology, 21, 1966-1979.
+Baldwin DS, Colloff MJ, Rees GN, Chariton AA, Watson GO, Court LN, Hartley DM,
+Morgan Mj, King AJ, Wilson JS, Hodda M, Hardy CM (2013) Impacts of inundation
+and drought on eukaryote biodiversity in semi-arid floodplain soils. Molecular
+Ecology, 22, 1746-1758.
+Caporaso JG, Kuczynski J, Stombaugh J, Bittinger K, Bushman FD, Costello EK,
+Fierer N, Pena AG, Goodrich JK, Gordon JI, Huttley GA, Kelley ST, Knights D,
+Koenig JE, Ley RE, Lozupone CA, McDonald D, Muegge BD, Pirrung M, Reeder J,
+Sevinsky JR, Tumbaugh PJ, Walters WA, Widmann J, Yatsunenko T, Zaneveld J,
+Knight R (2010) QIIME allows analysis of high-throughput community sequencing
+data. Nature Methods, 7, 335-336.
+Chariton AA, Court LN, Hartley DM, Colloff MJ, Hardy CM (2010) Ecological
+assessment of estuarine sediments by pyrosequencing eukaryotic ribosomal DNA.
+Frontiers in Ecology and the Environment, 8, 233-238.
+Deagle BE, Kirkwood R, Jarman SN (2009) Analysis of Australian fur seal diet by
+pyrosequencing prey DNA in faeces. Molecular Ecology, 18, 2022-2038.
+Kowalczyk R, Taberlet P, Coissac E, Valentini A, Miquel C, Kaminski T, W�jcik JM
+(2011) Influence of management practices on large herbivore diet - case of
+European bison in Bialowieza Primeval Forest (Poland). Forest Ecology and
+Management, 261, 821-828.
+Parducci L, Jorgensen T, Tollefsrud MM, Elverland E, Alm T, Fontana SL, Bennett
+KD, Haile J, Matetovici I, Suyama Y, Edwards ME, Andersen K, Rasmussen M,
+Boessenkool S, Coissac E, Brochmann C, Taberlet P, Houmark-Nielsen M, Larsen NK,
+Orlando L, Gilbert MTP, Kjaer KH, Alsos IG, Willerslev E (2012) Glacial Survival
+of Boreal Trees in Northern Scandinavia. Science, 335, 1083-1086.
+Schloss PD, Westcott SL, Ryabin T, Hall JR, Hartmann M, Hollister EB, Lesniewski
+RA, Oakley BB, Parks DH, Robinson CJ, Sahl JW, Stres B, Thallinger GG, Van Horn
+DJ, Weber CF (2009) Introducing mothur: open-source, platform-independent,
+community-supported software for describing and comparing microbial communities.
+Applied and Environmental Microbiology, 75, 7537-7541.
+Shehzad W, Riaz T, Nawaz MA, Miquel C, Poillot C, Shah SA, Pompanon F, Coissac
+E, Taberlet P (2012) Carnivore diet analysis based on next generation
+sequencing: application to the leopard cat (*Prionailurus bengalensis*) in
+Pakistan. Molecular Ecology, 21, 1951-1965.
+Sogin ML, Morrison HG, Huber JA, Welch DM, Huse SM, Neal PR, Arrieta JM, Herndl
+GJ (2006) Microbial diversity in the deep sea and the underexplored "rare
+biosphere". Proceedings of the National Academy of Sciences of the United States
+of America, 103, 12115-12120.
+S�nsteb� JH, Gielly L, Brysting A, Reidar E, Edwards M, Haile J, Willerslev E,
+Coissac E, Rioux D, Sannier J, Taberlet P, Brochmann C (2010) Using
+next-generation sequencing for molecular reconstruction of past Arctic
+vegetation and climate. Molecular Ecology Resources, 10, 1009-1018.
+Taberlet P, Coissac E, Hajibabaei M, Rieseberg LH (2012) Environmental DNA.
+Molecular Ecology, 21, 1789-1793.
+Thomsen PF, Kielgast J, Iversen LL, Wiuf C, Rasmussen M, Gilbert MTP, Orlando L,
+Willerslev E (2012) Monitoring endangered freshwater biodiversity using
+environmental DNA. Molecular Ecology, 21, 2565-2573.
+Valentini A, Miquel C, Nawaz MA, Bellemain E, Coissac E, Pompanon F, Gielly L,
+Cruaud C, Nascetti G, Wincker P, Swenson JE, Taberlet P (2009) New perspectives
+in diet analysis based on DNA barcoding and parallel pyrosequencing: the trnL
+approach. Molecular Ecology Resources, 9, 51-60.
+Yoccoz NG, Br�then KA, Gielly L, Haile J, Edwards ME, Goslar T, von Stedingk H,
+Brysting AK, Coissac E, Pompanon F, S�nsteb� JH, Miquel C, Valentini A, de Bello
+F, Chave J, Thuiller W, Wincker P, Cruaud C, Gavory F, Rasmussen M, Gilbert MTP,
+Orlando L, Brochmann C, Willerslev E, Taberlet P (2012) DNA from soil mirrors
+plant taxonomic and growth form diversity. Molecular Ecology, 21, 3647-3655.
+Basic concepts of the ``OBITools``
+Once installed, the ``OBITools`` enrich the Unix command line interface with a set
+of new commands dedicated to NGS data processing. Most of them have a name
+starting with the `obi` prefix. They automatically recognize the input file
+format amongst most of the standard sequence file formats (i.e. :doc:`fasta <fasta>`, :doc:`fastq <fastq>`,
+:doc:`EMBL <embl>`, and :doc:`GenBank <genbank>` formats). Nevertheless, options are available to enforce some
+format specificity such as the encoding system used in :doc:`fastq <fastq>` files for quality
+codes. Most of the basic Unix commands have their ``OBITools`` equivalent (e.g.
+`obihead` *vs* `head`, `obitail` *vs* `tail`, `obigrep` *vs* `grep`), which is
+convenient for scientists familiar with Unix. The main difference between any
+standard Unix command and its ``OBITools`` counterpart is that the treatment unit is
+no longer the text line but the sequence record. As a sequence record is more
+complex than a single text line, the ``OBITools`` programs have many supplementary
+options compared to their Unix equivalents.
+The structure of a sequence record
+The ``OBITools`` commands consider a sequence record as an entity composed of five
+distinct elements. Two of them are mandatory, the identifier (*id*) and the DNA or
+protein sequence itself. The *id* is a single word composed of characters, digits,
+and other symbols like dots or underscores excluding spaces. Formally, the *ids*
+should be unique within a dataset and should identify each sequence record
+unambiguously, but only a few ``OBITools`` actually rely on this property. The
+sequence is an ordered set of characters corresponding to nucleotides or
+amino-acids according to the International Union of Pure and Applied Chemistry
+(IUPAC) nomenclature (Cornish-Bowden 1985). The three other elements composing a
+sequence record are optional. They consist in a sequence definition, a quality
+vector, and a set of attributes. The sequence definition is a free text
+describing the sequence briefly. The quality vector associates a quality score
+to each nucleotide or amino-acid. Usually this quality score is the result of
+the base-calling process by the sequencer. The last element is a set of
+attributes qualifying the sequence, each attribute being described by a
+`key=value` pair. The set of attributes is the central concept of the ``OBITools``
+system. When an ``OBITools`` command is run on the sequence records included in a
+dataset, the result of the computation often consist in the addition of new
+attributes completing the annotation of each sequence record. This strategy of
+sequence annotation allows the ``OBITools`` to return their results as a new
+sequence record file that can be used as the input of another ``OBITools`` program,
+ultimately creating complex pipelines. 
+Managed sequence file formats
+Most of the ``OBITools`` commands read sequence records from a file or from the
+*stdin*, make some computations on the sequence records and output annotated
+sequence records. As inputs, the ``OBITools`` are able to automatically recognize
+the most common sequence file formats (i.e. :doc:`fasta <fasta>`, :doc:`fastq <fastq>`, :doc:`EMBL <embl>`, and :doc:`GenBank <genbank>`).
+They are also able to read `ecoPCR` (Ficetola et al. 2010) result files and
+`ecoPCR`/`ecoPrimers` formatted sequence databases (Riaz et al. 2011) as
+ordinary sequence files. File format outputs are more limited. By default,
+sequences without and with quality information are written in :doc:`fasta <fasta>` and Sanger
+:doc:`fastq <fastq>` formats, respectively. However, dedicated options allow enforcing the
+output format, and the ``OBITools`` are also able to write sequences in the
+`ecoPCR`/`ecoPrimers` database format, to produce reference databases for these
+programs. In the :doc:`fasta <fasta>` or :doc:`fastq <fastq>` format, the attributes are written in the header
+line just after the *id*, following a `key=value;` format (Figure 2).
+|The structure of an OBITools sequence record and its representation in fasta and fastq formats|
+Taxonomical aspects
+Filtering and annotation steps in the processing of DNA metabarcoding sequence
+data are greatly eased by the explicit association of taxonomic information to
+sequences together with an easy access to the taxonomy. Taxonomic information,
+including a taxonomic identifier, can thus be stored in the set of attributes of
+each sequence record. Specifically, the `taxid` attribute is used by the
+``OBITools`` when querying taxonomic information of a sequence record, nevertheless
+several ``OBITools`` commands can annotate sequence records with taxonomy-related
+attributes for the user's convenience. The value of the `taxid` attribute must
+be a unique integer referring unambiguously to one taxon in the taxonomic
+associated database. Although this is not mandatory, the NCBI taxonomy is a
+preferred source of taxonomic information as the ``OBITools`` provide commands to
+easily extract the full taxonomic information from it. The command `obitaxonomy`
+is useful to build a taxonomic database in the ``OBITools`` format from a dump of
+the NCBI taxonomic database (downloadable at the following URL:
+Implemented algorithms
+Most of the algorithms implemented in the ``OBITools`` are basic algorithms allowing
+sampling, filtering and annotation of sequence records based on their associated
+attribute set or sequence (e.g. `obisample`, `obigrep`, `obiannotate`). Some
+others implement algorithms directly related to NGS or to DNA metabarcoding
+(e.g. `illuminapairedend`, `ngsfilter`, `ecotag`). Finally, a few of them do not
+run on sequence records and/or do not provide their results as sequence records.
+Amongst them, `oligotag` (Coissac 2012) generates a set of short oligonucleotide
+sequences (hereafter referred to as `tags` useful to uniquely identify
+individual samples within a single NGS library containing many samples. Hereby,
+we will describe some of the implemented algorithms pertaining directly to DNA
+metabarcoding, as well as the corresponding programs. A full description of all
+programs included in the ``OBITools`` suite is available on the web
+Implementation of the ``OBITools``
+The ``OBITools`` are a set of Python programs relying on an eponym Python library.
+The ``OBITools`` library is mainly developed in Python (version 2.7 see
+(http://www.python.org). For increasing the speed of execution, many parts of
+the ``OBITools`` library are developed using `cython` (http://cython.org/, a Python
+to C compiler) or the C language directly. The ``OBITools`` compile on Unix systems
+including Linux and MacOSX. 
+Coissac E (2012) Oligotag: a program for designing sets of tags for
+next-generation sequencing of multiplexed samples. In: Data Production and
+Analysis in Population Genomics: Methods and Protocols (eds. Pompanon F, Bonin
+A), pp. 13-31. Springer Science+Business Media, New York.
+Cornish-Bowden A (1985) Nomenclature for incompletely specified bases in nucleic
+acid sequences: recommendations 1984. Nucleic Acids Research, 13, 3021-3030.
+Ficetola GF, Coissac E, Zundel S, Riaz T, Shehzad W, Bessi�re J, Taberlet P,
+Pompanon F (2010) An in silico approach for the evaluation of DNA barcodes. BMC
+Genomics, 11, 434.
+Riaz T, Shehzad W, Viari A, Pompanon F, Taberlet P, Coissac E (2011) ecoPrimers:
+inference of new DNA barcode markers from whole genome sequence analysis.
+Nucleic Acids Research, 39, e145.
+.. |Pipeline example for a standard biodiversity survey| image:: fig-Pipeline.*
+.. |The structure of an OBITools sequence record and its representation in fasta and fastq formats| image:: fig-Record.*
diff --git a/doc/sphinx/source/wolves.rst b/doc/sphinx/source/wolves.rst
new file mode 100644
index 0000000..34aa594
--- /dev/null
+++ b/doc/sphinx/source/wolves.rst
@@ -0,0 +1,648 @@
+Wolves' diet based on DNA metabarcoding
+Here is a tutorial on how to analyze DNA metabarcoding data produced on Illumina 
+sequencers using:
+    - the *OBITools*
+    - some basic *Unix* commands
+The data used in this tutorial correspond to the analysis of four wolf scats, using the 
+protocol published in Shehzad et al. (2012) for assessing carnivore diet.
+After extracting DNA from the faeces, the DNA amplifications were carried out using the 
+primers TTAGATACCCCACTATGC and TAGAACAGGCTCCTCTAG amplifiying the 12S-V5 region 
+(Riaz et al. 2011), together with a wolf blocking oligonucleotide. 
+The complete data set can be downloaded here: :download:`the tutorial dataset<../../../wolf_tutorial.zip>`
+| Good to remember: I am working with tons of sequences       |
+| It is always a good idea to have a look at the intermediate |
+| results or to evaluate the best parameter for each step.    |
+| Some commands are designed for that purpose, for example    |
+| you can use :                                               |
+|                                                             |
+| - :doc:`obicount <scripts/obicount>` to count the number    |
+|   of sequence records in a file                             |
+| - :doc:`obihead <scripts/obihead>` and                      |
+|   :doc:`obitail <scripts/obitail>` to view the first        |
+|   or last sequence records of a file                        |
+| - :doc:`obistat <scripts/obistat>` to get some basic        |
+|   statistics (count, mean, standard deviation) on the       |
+|   attributes (key=value combinations) in the header of each |
+|   sequence record (see The `extended OBITools fasta format` |
+|   in the :doc:`fasta format <fasta>` description)           |
+| - any *Unix* command such as ``less``, ``awk``, ``sort``,   |
+|   ``wc`` to check your files                                |
+The data needed to run the tutorial are the following:
+- :doc:`fastq <fastq>` files resulting of a GA IIx (Illumina) paired-end (2 x 108 bp) 
+  sequencing assay of DNA extracted and amplified from 
+  four wolf faeces:
+    * ``wolf_F.fastq``
+    * ``wolf_R.fastq``
+- the file describing the primers and tags used for all samples sequenced:
+    * ``wolf_diet_ngsfilter.txt``
+      The tags correspond to short and specific sequences added on the 5' end of each 
+      primer to distinguish the different samples
+- the file containing the reference database in a fasta format:
+    * ``db_v05_r117.fasta``
+      This reference database has been extracted from the release 117 of EMBL using 
+      :doc:`ecoPCR <scripts/ecoPCR>`
+- the NCBI taxonomy formatted in the :doc:`ecoPCR <scripts/ecoPCR>` format (see the 
+  :doc:`obiconvert <scripts/obiconvert>` utility for details) :
+    * ``embl_r117.ndx`` 
+    * ``embl_r117.rdx`` 
+    * ``embl_r117.tdx`` 
+Step by step analysis
+Recover full sequence reads from forward and reverse partial reads
+When using the result of a paired-end sequencing assay with supposedly overlapping forward
+and reverse reads, the first step is to recover the assembled sequence.
+The forward and reverse reads of the same fragment are *at the same line position* in the 
+two fastq files obtained after sequencing. 
+Based on these two files, the assembly of the forward and reverse reads is done with the 
+:doc:`illuminapairedend <scripts/illuminapairedend>` utility that aligns the two reads 
+and returns the reconstructed sequence.
+In our case, the command is: 
+.. code-block:: bash
+   > illuminapairedend --score-min=40 -r wolf_R.fastq wolf_F.fastq > wolf.fastq
+The :py:mod:`--score-min` option allows discarding sequences with low alignment quality. 
+If the alignment score is below 40, the forward and reverse reads are not aligned but 
+concatenated, and the value of the :py:mod:`mode` attribute in the sequence header is set 
+to :py:mod:`joined` instead of :py:mod:`alignment`   
+Remove unaligned sequence records
+Unaligned sequences (:py:mod:`mode=joined`) cannot be used. The following command allows 
+removing them from the dataset:
+.. code-block:: bash
+   > obigrep -p 'mode!="joined"' wolf.fastq > wolf.ali.fastq
+The :py:mod:`-p` requires a *python* expression. :py:mod:`mode!="joined"` means that if 
+the value of the :py:mod:`mode` attribute is different from :py:mod:`joined`, the 
+corresponding sequence record will be kept. 
+The first sequence record of ``wolf.ali.fastq`` can be obtained using the following 
+command line:
+.. code-block:: bash
+   > obihead --without-progress-bar -n 1 wolf.ali.fastq
+And the result is:
+.. code-block:: bash
+   @HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/1_CONS ali_length=61; 
+   direction=left; seq_ab_match=47; sminR=40.0; seq_a_mismatch=7; seq_b_deletion=1; 
+   seq_b_mismatch=7; seq_a_deletion=1; score_norm=1.89772607661; 
+   score=115.761290673; seq_a_insertion=0; mode=alignment; sminL=40.0; 
+   seq_a_single=46; seq_b_single=46; seq_b_insertion=0;
+   ccgcctcctttagataccccactatgcttagccctaaacacaagtaattattataacaaaatcattcgccagagtgtagc
+   gggagtaggttaaaactcaaaggacttggcggtgctttatacccttctagaggagcctgttctaaggaggcgg
+   +
+   ddddddddddddddddddddddcddddcacdddddddddddddc\d~b~~~b~~~~~~b`ryK~|uxyXk`}~ccBccBc
+   ccBcBcccBcBccccccc~~~~b|~~xdbaddaaWcccdaaddddadacddddddcddadbbddddddddddd
+Assign each sequence record to the corresponding sample/marker combination
+Each sequence record is assigned to its corresponding sample and marker using the data
+provided in a text file (here ``wolf_diet_ngsfilter.txt``). This text file contains one 
+line per sample, with the name of the experiment (several experiments can be included in 
+the same file), the name of the tags (for example: ``aattaac`` if the same tag has been 
+used on each extremity of the PCR products, or ``aattaac:gaagtag`` if the tags were 
+different), the sequence of the forward primer, the sequence of the reverse primer, the 
+letter ``T`` or ``F`` for sample identification using the forward primer and tag only or 
+using both primers and both tags, respectively (see :doc:`ngsfilter  <scripts/ngsfilter>` 
+for details).
+.. code-block:: bash
+   > ngsfilter -t wolf_diet_ngsfilter.txt -u unidentified.fastq wolf.ali.fastq > \
+     wolf.ali.assigned.fastq
+This command creates two files:
+- ``unidentified.fastq`` containing all the sequence records that were not assigned to a 
+  sample/marker combination
+- ``wolf.ali.assigned.fastq`` containing all the sequence records that were properly 
+  assigned to a sample/marker combination
+Note that each sequence record of the ``wolf.ali.assigned.fastq`` file contains only the 
+barcode sequence as the sequences of primers and tags are removed by the 
+:doc:`ngsfilter <scripts/ngsfilter>` program. Information concerning the experiment, 
+sample, primers and tags is added as attributes in the sequence header.
+For instance, the first sequence record of ``wolf.ali.assigned.fastq`` is:
+.. code-block:: bash
+   @HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/1_CONS_SUB_SUB status=full; 
+   seq_ab_match=47; sminR=40.0; ali_length=61; tail_quality=67.0; 
+   reverse_match=tagaacaggctcctctag; seq_a_deletion=1; sample=29a_F260619; 
+   forward_match=ttagataccccactatgc; forward_primer=ttagataccccactatgc; 
+   reverse_primer=tagaacaggctcctctag; sminL=40.0; forward_score=72.0; 
+   score=115.761290673; seq_a_mismatch=7; forward_tag=gcctcct; seq_b_mismatch=7; 
+   experiment=wolf_diet; mid_quality=69.4210526316; avg_quality=69.1045751634; 
+   seq_a_single=46; score_norm=1.89772607661; reverse_score=72.0; 
+   direction=forward; seq_b_insertion=0; seq_b_deletion=1; seq_a_insertion=0; 
+   seq_length_ori=153; reverse_tag=gcctcct; seq_length=99; mode=alignment; 
+   head_quality=67.0; seq_b_single=46; 
+   ttagccctaaacacaagtaattattataacaaaatcattcgccagagtgtagcgggagtaggttaaaactcaaaggact
+   tggcggtgctttataccctt
+   +
+   cacdddddddddddddc\d~b~~~b~~~~~~b`ryK~|uxyXk`}~ccBccBcccBcBcccBcBccccccc~~~~b|~~
+   xdbaddaaWcccdaadddda
+Dereplicate reads into uniq sequences
+The same DNA molecule can be sequenced several times. In order to reduce both file size 
+and computations time, and to get easier interpretable results, 
+it is convenient to work with unique *sequences* instead of *reads*. To *dereplicate* such 
+*reads* into unique *sequences*, we use the :doc:`obiuniq <scripts/obiuniq>` command.
+| Definition: Dereplicate reads into unique sequences         |
+| 1. compare all the reads in a data set to each other        |
+| 2. group strictly identical reads together                  |
+| 3. output the sequence for each group and its count in the  |
+|    original dataset (in this way, all duplicated reads are  |
+|    removed)                                                 |
+|                                                             |
+| Definition adapted from Seguritan and Rohwer (2001)         |
+For dereplication, we use the :doc:`obiuniq <scripts/obiuniq>` command with the `-m 
+sample`. The `-m sample` option is used to keep the information of the samples of origin 
+for each unique sequence.
+.. code-block:: bash
+   > obiuniq -m sample wolf.ali.assigned.fastq > wolf.ali.assigned.uniq.fasta
+Note that :doc:`obiuniq <scripts/obiuniq>` returns a fasta file.
+The first sequence record of ``wolf.ali.assigned.uniq.fasta`` is:
+.. code-block:: bash
+   >HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/1_CONS_SUB_SUB_CMP ali_length=61; 
+   seq_ab_match=47; sminR=40.0; tail_quality=67.0; reverse_match=ttagataccccactatgc; 
+   seq_a_deletion=1; forward_match=tagaacaggctcctctag; forward_primer=tagaacaggctcctctag; 
+   reverse_primer=ttagataccccactatgc; sminL=40.0; merged_sample={'29a_F260619': 1}; 
+   forward_score=72.0; seq_a_mismatch=7; forward_tag=gcctcct; seq_b_mismatch=7; 
+   score=115.761290673; mid_quality=69.4210526316; avg_quality=69.1045751634; 
+   seq_a_single=46; score_norm=1.89772607661; reverse_score=72.0; direction=reverse; 
+   seq_b_insertion=0; experiment=wolf_diet; seq_b_deletion=1; seq_a_insertion=0; 
+   seq_length_ori=153; reverse_tag=gcctcct; count=1; seq_length=99; status=full; 
+   mode=alignment; head_quality=67.0; seq_b_single=46; 
+   aagggtataaagcaccgccaagtcctttgagttttaacctactcccgctacactctggcg
+   aatgattttgttataataattacttgtgtttagggctaa
+The run of :doc:`obiuniq <scripts/obiuniq>` has added two key=values entries in the header
+of the fasta sequence:
+   - :py:mod:`merged_sample={'29a_F260619': 1}`: this sequence have been found once in a 
+     single sample called 29a_F260619
+   - :py:mod:`count=1` : the total count for this sequence is 1 
+To keep only these two ``key=value`` attributes, we can use the 
+:doc:`obiannotate <scripts/obiannotate>` command:
+.. code-block:: bash
+   > obiannotate -k count -k merged_sample \
+     wolf.ali.assigned.uniq.fasta > $$ ; mv $$ wolf.ali.assigned.uniq.fasta
+The first five sequence records of ``wolf.ali.assigned.uniq.fasta`` become:
+.. code-block:: bash
+   >HELIUM_000100422_612GNAAXX:7:119:14871:19157#0/1_CONS_SUB_SUB_CMP merged_sample={'29a_F260619': 1}; count=1; 
+   aagggtataaagcaccgccaagtcctttgagttttaacctactcccgctacactctggcg
+   aatgattttgttataataattacttgtgtttagggctaa
+   >HELIUM_000100422_612GNAAXX:7:108:5640:3823#0/1_CONS_SUB_SUB_CMP merged_sample={'29a_F260619': 7, '15a_F730814': 2}; count=9; 
+   aagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggc
+   gaacaattttgttatattaattacttgtgtttagggctaa
+   >HELIUM_000100422_612GNAAXX:7:97:14311:19299#0/1_CONS_SUB_SUB_CMP merged_sample={'29a_F260619': 5, '15a_F730814': 4}; count=9; 
+   aagggtataaagcaccgccaagtcctttgagttttaagctcttgccggtagtactctggc
+   gaataattttgttatattaattacttgtgtttagggctaa
+   >HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/1_CONS_SUB_SUB merged_sample={'29a_F260619': 4697, '15a_F730814': 7638}; count=12335; 
+   aagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggc
+   gaataattttgttatattaattacttgtgtttagggctaa
+   >HELIUM_000100422_612GNAAXX:7:57:18459:16145#0/1_CONS_SUB_SUB_CMP merged_sample={'26a_F040644': 10490}; count=10490; 
+   agggatgtaaagcaccgccaagtcctttgagtttcaggctgttgctagtagtactctggc
+   gaacattcttgtttattgaatgtttatgtttagggctaa
+Denoise the sequence dataset
+To have a set of sequences assigned to their corresponding samples does not mean that all 
+sequences are *biologically* meaningful i.e. some of these sequences can contains PCR 
+and/or sequencing errors, or chimeras. To remove such sequences as much as possible, we 
+first discard rare sequences and then rsequence variants that likely correspond to 
+Get the count statistics
+In that case, we use :doc:`obistat <scripts/obistat>` to get the counting statistics on 
+the 'count' attribute (the count attribute has been added by the :doc:`obiuniq 
+<scripts/obiuniq>` command). By piping the result in the *Unix* commands ``sort`` and 
+``head``, we keep only the count statistics for the 20 lowest values of the 'count' 
+.. code-block:: bash
+   > obistat -c count wolf.ali.assigned.uniq.fasta |  \  
+     sort -nk1 | head -20
+This print the output:
+.. code-block:: bash
+    count      count     total
+    1          3504      3504
+    2           228       456
+    3           136       408
+    4            73       292
+    5            61       305
+    6            47       282
+    7            34       238
+    8            27       216
+    9            26       234
+    10           25       250
+    11           13       143
+    12           14       168
+    13           10       130
+    14            5        70
+    15            9       135
+    16            8       128
+    17            4        68
+    18            9       162
+    19            5        95
+The dataset contains 3504 sequences occurring only once.  
+Keep only the sequences having a count greater or equal to 10 and a length shorter than 80 bp
+Based on the previous observation, we set the cut-off for keeping sequences for further 
+analysis to a count of 10. To do this, we use the :doc:`obigrep <scripts/obigrep>` 
+The ``-p 'count>=10'`` option means that the ``python`` expression :py:mod:`count>=10` 
+must be evaluated to :py:mod:`True` for each sequence to be kept. Based on previous 
+knowledge we also remove sequences with a length shorter than 80 bp (option -l) as we know 
+that the amplified 12S-V5 barcode for vertebrates must have a length around 100bp.
+.. code-block:: bash
+   > obigrep -l 80 -p 'count>=10' wolf.ali.assigned.uniq.fasta \
+       > wolf.ali.assigned.uniq.c10.l80.fasta
+The first sequence record of ``wolf.ali.assigned.uniq.c10.l80.fasta`` is:
+.. code-block:: bash    
+   >HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/1_CONS_SUB_SUB count=12335; merged_sample={'29a_F260619': 4697, '15a_F730814': 7638}; 
+   aagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggc
+   gaataattttgttatattaattacttgtgtttagggctaa
+Clean the sequences for PCR/sequencing errors (sequence variants)
+As a final denoising step, using the :doc:`obiclean <scripts/obiclean>` program, we keep 
+the `head` sequences (``-H`` option) that are sequences with no variants with a count 
+greater than 5% of their own count  (``-r 0.05`` option).
+.. code-block:: bash
+   > obiclean -s merged_sample -r 0.05 -H \
+     wolf.ali.assigned.uniq.c10.l80.fasta > wolf.ali.assigned.uniq.c10.l80.clean.fasta 
+The first sequence record of ``wolf.ali.assigned.uniq.c10.l80.clean.fasta`` is:
+.. code-block:: bash
+   >HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/1_CONS_SUB_SUB 
+   merged_sample={'29a_F260619': 4697, '15a_F730814': 7638}; 
+   obiclean_count={'29a_F260619': 5438, '15a_F730814': 8642}; obiclean_head=True; 
+   obiclean_cluster={'29a_F260619': 
+   'HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/1_CONS_SUB_SUB', '15a_F730814': 
+   'HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/1_CONS_SUB_SUB'}; 
+   count=12335; obiclean_internalcount=0; obiclean_status={'29a_F260619': 'h', '15a_F730814': 'h'}; 
+   obiclean_samplecount=2; obiclean_headcount=2; obiclean_singletoncount=0; 
+   aagggtataaagcaccgccaagtcctttgagttttaagctattgccggtagtactctggc
+   gaataattttgttatattaattacttgtgtttagggctaa
+Taxonomic assignment of sequences
+Once denoising has been done, the next step in diet analysis is to assign the barcodes to 
+the corresponding species in order to get the complete list of species associated to each 
+Taxonomic assignment of sequences requires a reference database compiling all possible 
+species to be identified in the sample. Assignment is then done based on sequence 
+comparison between sample sequences and reference sequences.
+Build a reference database
+One way to build the reference database is to use the :doc:`ecoPCR <scripts/ecoPCR>` 
+program to simulate a PCR and to extract all sequences from the EMBL that may be amplified 
+`in silico` by the two primers (`TTAGATACCCCACTATGC` and `TAGAACAGGCTCCTCTAG`) used for 
+PCR amplification. 
+The full list of steps for building this reference database would then be:
+1. Download the whole set of EMBL sequences (available from: 
+   ftp://ftp.ebi.ac.uk/pub/databases/embl/release/)
+2. Download the NCBI taxonomy (available from: 
+   ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz)
+3. Format them into the ecoPCR format (see :doc:`obiconvert <scripts/obiconvert>` for how 
+   you can produce ecoPCR compatible files)
+4. Use ecoPCR to simulate amplification and build a reference database based on putatively
+   amplified barcodes together with their recorded taxonomic information  
+As step 1 and step 3 can be really time-consuming (about one day), we alredy provide the 
+reference database produced by the following commands so that you can skip its 
+construction. Note that as the EMBL database and taxonomic data can evolve daily, if you 
+run the following commands you may end up with quite different results.
+Any utility allowing file downloading from a ftp site can be used. In the following 
+commands, we use the commonly used ``wget`` *Unix* command.
+Download the sequences
+.. code-block:: bash
+   > mkdir EMBL
+   > cd EMBL
+   > wget -nH --cut-dirs=4 -Arel_std_\*.dat.gz -m ftp://ftp.ebi.ac.uk/pub/databases/embl/release/
+   > cd ..
+Download the taxonomy
+.. code-block:: bash
+   > mkdir TAXO
+   > cd TAXO
+   > wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz
+   > tar -zxvf taxdump.tar.gz
+   > cd ..
+Format the data
+.. code-block:: bash
+   > obiconvert --embl -t ./TAXO --ecopcrDB-output=embl_last ./EMBL/*.dat
+Use ecoPCR to simulate an in silico` PCR
+.. code-block:: bash
+   > ecoPCR -d ./ECODB/embl_last -e 3 -l 50 -L 150 \ 
+Note that the primers must be in the same order both in ``wolf_diet_ngsfilter.txt`` and in 
+the :doc:`ecoPCR <scripts/ecoPCR>` command.
+Clean the database
+    1. filter sequences so that they have a good taxonomic description at the species, 
+       genus, and family levels (:doc:`obigrep <scripts/obigrep>` command below).
+    2. remove redundant sequences (:doc:`obiuniq <scripts/obiuniq>` command below).
+    3. ensure that the dereplicated sequences have a taxid at the family level 
+       (:doc:`obigrep <scripts/obigrep>` command below).
+    4. ensure that sequences each have a unique identification 
+       (:doc:`obiannotate <scripts/obiannotate>` command below)
+.. code-block:: bash
+   > obigrep -d embl_last --require-rank=species \
+     --require-rank=genus --require-rank=family v05.ecopcr > v05_clean.fasta
+   > obiuniq -d embl_last \ 
+     v05_clean.fasta > v05_clean_uniq.fasta
+   > obigrep -d embl_last --require-rank=family \ 
+     v05_clean_uniq.fasta > v05_clean_uniq_clean.fasta
+   > obiannotate --uniq-id v05_clean_uniq_clean.fasta > db_v05.fasta
+.. warning::
+   From now on, for the sake of clarity, the following commands will use the filenames of 
+   the files provided with the tutorial. If you decided to run the last steps and use the 
+   files you have produced, you'll have to use ``db_v05.fasta`` instead of 
+   ``db_v05_r117.fasta`` and ``embl_last`` instead of ``embl_r117``
+Assign each sequence to a taxon
+Once the reference database is built, taxonomic assignment can be carried out using
+the :doc:`ecotag <scripts/ecotag>` command.
+.. code-block:: bash
+   > ecotag -d embl_r117 -R db_v05_r117.fasta wolf.ali.assigned.uniq.c10.l80.clean.fasta > \
+     wolf.ali.assigned.uniq.c10.l80.clean.tag.fasta
+The :doc:`ecotag <scripts/ecotag>` adds several `key=value` attributes in the sequence 
+record header, among them:
+- best_match=ACCESSION where ACCESSION is the id of hte sequence in the reference database 
+  that best aligns to the query sequence;
+- best_identity=FLOAT where FLOAT*100 is the percentage of identity between the best match 
+  sequence and the query sequence;
+- taxid=TAXID where TAXID is the final assignation of the sequence by 
+  :doc:`ecotag <scripts/ecotag>` 
+- scientific_name=NAME where NAME is the scientific name of the assigned taxid.
+The first sequence record of ``wolf.ali.assigned.uniq.c10.l80.clean.tag.fasta`` is:
+.. code-block:: bash
+   >HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/1_CONS_SUB_SUB_CMP 
+   species_name=Capreolus capreolus; family=9850; scientific_name=Capreolus 
+   capreolus; rank=species; taxid=9858; best_identity={'db_v05_r117': 1.0}; 
+   scientific_name_by_db={'db_v05_r117': 'Capreolus capreolus'}; 
+   obiclean_samplecount=2; species=9858; merged_sample={'29a_F260619': 4697, 
+   '15a_F730814': 7638}; obiclean_count={'29a_F260619': 5438, '15a_F730814': 8642}; 
+   obiclean_singletoncount=0; obiclean_cluster={'29a_F260619': 
+   'HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/1_CONS_SUB_SUB_CMP', 
+   '15a_F730814': 
+   'HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/1_CONS_SUB_SUB_CMP'}; 
+   species_list={'db_v05_r117': ['Capreolus capreolus']}; obiclean_internalcount=0; 
+   match_count={'db_v05_r117': 1}; obiclean_head=True; taxid_by_db={'db_v05_r117': 
+   9858}; family_name=Cervidae; genus_name=Capreolus; 
+   obiclean_status={'29a_F260619': 'h', '15a_F730814': 'h'}; obiclean_headcount=2; 
+   count=12335; id_status={'db_v05_r117': True}; best_match={'db_v05_r117': 
+   'AJ885202'}; order_name=None; rank_by_db={'db_v05_r117': 'species'}; genus=9857; 
+   order=None; 
+   ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat
+   agcttaaaactcaaaggacttggcggtgctttataccctt
+Generate the final result table
+Some unuseful attributes can be removed at this stage. 
+.. code-block:: bash
+   > obiannotate  --delete-tag=scientific_name_by_db --delete-tag=obiclean_samplecount \
+     --delete-tag=obiclean_count --delete-tag=obiclean_singletoncount \
+     --delete-tag=obiclean_cluster --delete-tag=obiclean_internalcount \
+     --delete-tag=obiclean_head --delete-tag=taxid_by_db --delete-tag=obiclean_headcount \
+     --delete-tag=id_status --delete-tag=rank_by_db --delete-tag=order_name \
+     --delete-tag=order wolf.ali.assigned.uniq.c10.l80.clean.tag.fasta > \
+     wolf.ali.assigned.uniq.c10.l80.clean.tag.ann.fasta
+The first sequence record of ``wolf.ali.assigned.uniq.c10.l80.clean.tag.ann.fasta`` is 
+.. code-block:: bash
+   >HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/1_CONS_SUB_SUB_CMP 
+   match_count={'db_v05_r117': 1}; count=12335; species_name=Capreolus capreolus; 
+   best_match={'db_v05_r117': 'AJ885202'}; family=9850; family_name=Cervidae; 
+   scientific_name=Capreolus capreolus; taxid=9858; rank=species; 
+   obiclean_status={'29a_F260619': 'h', '15a_F730814': 'h'}; 
+   best_identity={'db_v05_r117': 1.0}; merged_sample={'29a_F260619': 4697, 
+   '15a_F730814': 7638}; genus_name=Capreolus; genus=9857; species=9858; 
+   species_list={'db_v05_r117': ['Capreolus capreolus']}; 
+   ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat
+   agcttaaaactcaaaggacttggcggtgctttataccctt
+The sequences can be sorted by decreasing order of `count`.
+.. code-block:: bash
+   > obisort -k count -r wolf.ali.assigned.uniq.c10.l80.clean.tag.ann.fasta >  \ 
+     wolf.ali.assigned.uniq.c10.l80.clean.tag.ann.sort.fasta 
+The first sequence record of ``wolf.ali.assigned.uniq.c10.l80.clean.tag.ann.sort.fasta`` is then:
+.. code-block:: bash
+   >HELIUM_000100422_612GNAAXX:7:22:8540:14708#0/1_CONS_SUB_SUB_CMP count=12335; 
+   match_count={'db_v05_r117': 1}; species_name=Capreolus capreolus; 
+   best_match={'db_v05_r117': 'AJ885202'}; family=9850; family_name=Cervidae; 
+   scientific_name=Capreolus capreolus; taxid=9858; rank=species; 
+   obiclean_status={'29a_F260619': 'h', '15a_F730814': 'h'}; 
+   best_identity={'db_v05_r117': 1.0}; merged_sample={'29a_F260619': 4697, 
+   '15a_F730814': 7638}; genus_name=Capreolus; genus=9857; species=9858; 
+   species_list={'db_v05_r117': ['Capreolus capreolus']}; 
+   ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat
+   agcttaaaactcaaaggacttggcggtgctttataccctt
+Finally, a tab-delimited file that can be open by excel or R is generated. 
+.. code-block:: bash
+   > obitab -o wolf.ali.assigned.uniq.c10.l80.clean.tag.ann.sort.fasta > \ 
+     wolf.ali.assigned.uniq.c10.l80.clean.tag.ann.sort.tab
+This file contains 26 sequences. You can deduce the diet of each sample:
+ - 13a_F730603: Cervus elaphus
+ - 15a_F730814: Capreolus capreolus
+ - 26a_F040644: Marmota sp. (according to the location, it is Marmota marmota)
+ - 29a_F260619: Capreolus capreolus
+Note that we also obtained a few wolf sequences although a wolf-blocking oligonucleotide 
+was used.
+ - Shehzad W, Riaz T, Nawaz MA, Miquel C, Poillot C, Shah SA, Pompanon F, Coissac E, 
+   Taberlet P (2012) Carnivore diet analysis based on next generation sequencing: 
+   application to the leopard cat (Prionailurus bengalensis) in Pakistan. Molecular 
+   Ecology, 21, 1951-1965.
+ - Riaz T, Shehzad W, Viari A, Pompanon F, Taberlet P, Coissac E (2011) ecoPrimers: 
+   inference of new DNA barcode markers from whole genome sequence analysis. Nucleic 
+   Acids Research, 39, e145.
+ - Seguritan V, Rohwer F. (2001) FastGroup: a program to dereplicate libraries of 
+   16S rDNA sequences. BMC Bioinformatics. 2001;2:9. Epub 2001 Oct 16.
+For any suggestion or improvement, please contact :
+    - eric.coissac at metabarcoding.org
+    - frederic.boyer at metabarcoding.org
diff --git a/doc/sphinx/sphinxext/apigen.py b/doc/sphinx/sphinxext/apigen.py
new file mode 100644
index 0000000..1237409
--- /dev/null
+++ b/doc/sphinx/sphinxext/apigen.py
@@ -0,0 +1,427 @@
+"""Attempt to generate templates for module reference with Sphinx
+XXX - we exclude extension modules
+To include extension modules, first identify them as valid in the
+``_uri2path`` method, then handle them in the ``_parse_module`` script.
+We get functions and classes by parsing the text of .py files.
+Alternatively we could import the modules for discovery, and we'd have
+to do that for extension modules.  This would involve changing the
+``_parse_module`` method to work via import and introspection, and
+might involve changing ``discover_modules`` (which determines which
+files are modules, and therefore which module URIs will be passed to
+NOTE: this is a modified version of a script originally shipped with the
+PyMVPA project, which we've adapted for NIPY use.  PyMVPA is an MIT-licensed
+# Stdlib imports
+import os
+import re
+# Functions and classes
+class ApiDocWriter(object):
+    ''' Class for automatic detection and parsing of API docs
+    to Sphinx-parsable reST format'''
+    # only separating first two levels
+    rst_section_levels = ['*', '=', '-', '~', '^']
+    def __init__(self,
+                 package_name,
+                 rst_extension='.rst',
+                 package_skip_patterns=None,
+                 module_skip_patterns=None,
+                 ):
+        ''' Initialize package for parsing
+        Parameters
+        ----------
+        package_name : string
+            Name of the top-level package.  *package_name* must be the
+            name of an importable package
+        rst_extension : string, optional
+            Extension for reST files, default '.rst'
+        package_skip_patterns : None or sequence of {strings, regexps}
+            Sequence of strings giving URIs of packages to be excluded
+            Operates on the package path, starting at (including) the
+            first dot in the package path, after *package_name* - so,
+            if *package_name* is ``sphinx``, then ``sphinx.util`` will
+            result in ``.util`` being passed for earching by these
+            regexps.  If is None, gives default. Default is:
+            ['\.tests$']
+        module_skip_patterns : None or sequence
+            Sequence of strings giving URIs of modules to be excluded
+            Operates on the module name including preceding URI path,
+            back to the first dot after *package_name*.  For example
+            ``sphinx.util.console`` results in the string to search of
+            ``.util.console``
+            If is None, gives default. Default is:
+            ['\.setup$', '\._']
+        '''
+        if package_skip_patterns is None:
+            package_skip_patterns = ['\\.tests$']
+        if module_skip_patterns is None:
+            module_skip_patterns = ['\\.setup$', '\\._']
+        self.package_name = package_name
+        self.rst_extension = rst_extension
+        self.package_skip_patterns = package_skip_patterns
+        self.module_skip_patterns = module_skip_patterns
+    def get_package_name(self):
+        return self._package_name
+    def set_package_name(self, package_name):
+        ''' Set package_name
+        >>> docwriter = ApiDocWriter('sphinx')
+        >>> import sphinx
+        >>> docwriter.root_path == sphinx.__path__[0]
+        True
+        >>> docwriter.package_name = 'docutils'
+        >>> import docutils
+        >>> docwriter.root_path == docutils.__path__[0]
+        True
+        '''
+        # It's also possible to imagine caching the module parsing here
+        self._package_name = package_name
+        self.root_module = __import__(package_name)
+        self.root_path = self.root_module.__path__[0]
+        self.written_modules = None
+    package_name = property(get_package_name, set_package_name, None,
+                            'get/set package_name')
+    def _get_object_name(self, line):
+        ''' Get second token in line
+        >>> docwriter = ApiDocWriter('sphinx')
+        >>> docwriter._get_object_name("  def func():  ")
+        'func'
+        >>> docwriter._get_object_name("  class Klass(object):  ")
+        'Klass'
+        >>> docwriter._get_object_name("  class Klass:  ")
+        'Klass'
+        '''
+        name = line.split()[1].split('(')[0].strip()
+        # in case we have classes which are not derived from object
+        # ie. old style classes
+        return name.rstrip(':')
+    def _uri2path(self, uri):
+        ''' Convert uri to absolute filepath
+        Parameters
+        ----------
+        uri : string
+            URI of python module to return path for
+        Returns
+        -------
+        path : None or string
+            Returns None if there is no valid path for this URI
+            Otherwise returns absolute file system path for URI
+        Examples
+        --------
+        >>> docwriter = ApiDocWriter('sphinx')
+        >>> import sphinx
+        >>> modpath = sphinx.__path__[0]
+        >>> res = docwriter._uri2path('sphinx.builder')
+        >>> res == os.path.join(modpath, 'builder.py')
+        True
+        >>> res = docwriter._uri2path('sphinx')
+        >>> res == os.path.join(modpath, '__init__.py')
+        True
+        >>> docwriter._uri2path('sphinx.does_not_exist')
+        '''
+        if uri == self.package_name:
+            return os.path.join(self.root_path, '__init__.py')
+        path = uri.replace('.', os.path.sep)
+        path = path.replace(self.package_name + os.path.sep, '')
+        path = os.path.join(self.root_path, path)
+        # XXX maybe check for extensions as well?
+        if os.path.exists(path + '.py'): # file
+            path += '.py'
+        elif os.path.exists(os.path.join(path, '__init__.py')):
+            path = os.path.join(path, '__init__.py')
+        else:
+            return None
+        return path
+    def _path2uri(self, dirpath):
+        ''' Convert directory path to uri '''
+        relpath = dirpath.replace(self.root_path, self.package_name)
+        if relpath.startswith(os.path.sep):
+            relpath = relpath[1:]
+        return relpath.replace(os.path.sep, '.')
+    def _parse_module(self, uri):
+        ''' Parse module defined in *uri* '''
+        filename = self._uri2path(uri)
+        if filename is None:
+            # nothing that we could handle here.
+            return ([],[])
+        f = open(filename, 'rt')
+        functions, classes = self._parse_lines(f)
+        f.close()
+        return functions, classes
+    def _parse_lines(self, linesource):
+        ''' Parse lines of text for functions and classes '''
+        functions = []
+        classes = []
+        for line in linesource:
+            if line.startswith('def ') and line.count('('):
+                # exclude private stuff
+                name = self._get_object_name(line)
+                if not name.startswith('_'):
+                    functions.append(name)
+            elif line.startswith('class '):
+                # exclude private stuff
+                name = self._get_object_name(line)
+                if not name.startswith('_'):
+                    classes.append(name)
+            else:
+                pass
+        functions.sort()
+        classes.sort()
+        return functions, classes
+    def generate_api_doc(self, uri):
+        '''Make autodoc documentation template string for a module
+        Parameters
+        ----------
+        uri : string
+            python location of module - e.g 'sphinx.builder'
+        Returns
+        -------
+        S : string
+            Contents of API doc
+        '''
+        # get the names of all classes and functions
+        functions, classes = self._parse_module(uri)
+        if not len(functions) and not len(classes):
+            print 'WARNING: Empty -',uri  # dbg
+            return ''
+        # Make a shorter version of the uri that omits the package name for
+        # titles 
+        uri_short = re.sub(r'^%s\.' % self.package_name,'',uri)
+        ad = '.. AUTO-GENERATED FILE -- DO NOT EDIT!\n\n'
+        chap_title = uri_short
+        ad += (chap_title+'\n'+ self.rst_section_levels[1] * len(chap_title)
+               + '\n\n')
+        # Set the chapter title to read 'module' for all modules except for the
+        # main packages
+        if '.' in uri:
+            title = 'Module: :mod:`' + uri_short + '`'
+        else:
+            title = ':mod:`' + uri_short + '`'
+        ad += title + '\n' + self.rst_section_levels[2] * len(title)
+        if len(classes):
+            ad += '\nInheritance diagram for ``%s``:\n\n' % uri
+            ad += '.. inheritance-diagram:: %s \n' % uri
+            ad += '   :parts: 3\n'
+        ad += '\n.. automodule:: ' + uri + '\n'
+        ad += '\n.. currentmodule:: ' + uri + '\n'
+        multi_class = len(classes) > 1
+        multi_fx = len(functions) > 1
+        if multi_class:
+            ad += '\n' + 'Classes' + '\n' + \
+                  self.rst_section_levels[2] * 7 + '\n'
+        elif len(classes) and multi_fx:
+            ad += '\n' + 'Class' + '\n' + \
+                  self.rst_section_levels[2] * 5 + '\n'
+        for c in classes:
+            ad += '\n:class:`' + c + '`\n' \
+                  + self.rst_section_levels[multi_class + 2 ] * \
+                  (len(c)+9) + '\n\n'
+            ad += '\n.. autoclass:: ' + c + '\n'
+            # must NOT exclude from index to keep cross-refs working
+            ad += '  :members:\n' \
+                  '  :undoc-members:\n' \
+                  '  :show-inheritance:\n' \
+                  '  :inherited-members:\n' \
+                  '\n' \
+                  '  .. automethod:: __init__\n'
+        if multi_fx:
+            ad += '\n' + 'Functions' + '\n' + \
+                  self.rst_section_levels[2] * 9 + '\n\n'
+        elif len(functions) and multi_class:
+            ad += '\n' + 'Function' + '\n' + \
+                  self.rst_section_levels[2] * 8 + '\n\n'
+        for f in functions:
+            # must NOT exclude from index to keep cross-refs working
+            ad += '\n.. autofunction:: ' + uri + '.' + f + '\n\n'
+        return ad
+    def _survives_exclude(self, matchstr, match_type):
+        ''' Returns True if *matchstr* does not match patterns
+        ``self.package_name`` removed from front of string if present
+        Examples
+        --------
+        >>> dw = ApiDocWriter('sphinx')
+        >>> dw._survives_exclude('sphinx.okpkg', 'package')
+        True
+        >>> dw.package_skip_patterns.append('^\\.badpkg$')
+        >>> dw._survives_exclude('sphinx.badpkg', 'package')
+        False
+        >>> dw._survives_exclude('sphinx.badpkg', 'module')
+        True
+        >>> dw._survives_exclude('sphinx.badmod', 'module')
+        True
+        >>> dw.module_skip_patterns.append('^\\.badmod$')
+        >>> dw._survives_exclude('sphinx.badmod', 'module')
+        False
+        '''
+        if match_type == 'module':
+            patterns = self.module_skip_patterns
+        elif match_type == 'package':
+            patterns = self.package_skip_patterns
+        else:
+            raise ValueError('Cannot interpret match type "%s"' 
+                             % match_type)
+        # Match to URI without package name
+        L = len(self.package_name)
+        if matchstr[:L] == self.package_name:
+            matchstr = matchstr[L:]
+        for pat in patterns:
+            try:
+                pat.search
+            except AttributeError:
+                pat = re.compile(pat)
+            if pat.search(matchstr):
+                return False
+        return True
+    def discover_modules(self):
+        ''' Return module sequence discovered from ``self.package_name`` 
+        Parameters
+        ----------
+        None
+        Returns
+        -------
+        mods : sequence
+            Sequence of module names within ``self.package_name``
+        Examples
+        --------
+        >>> dw = ApiDocWriter('sphinx')
+        >>> mods = dw.discover_modules()
+        >>> 'sphinx.util' in mods
+        True
+        >>> dw.package_skip_patterns.append('\.util$')
+        >>> 'sphinx.util' in dw.discover_modules()
+        False
+        >>> 
+        '''
+        modules = [self.package_name]
+        # raw directory parsing
+        for dirpath, dirnames, filenames in os.walk(self.root_path):
+            # Check directory names for packages
+            root_uri = self._path2uri(os.path.join(self.root_path,
+                                                   dirpath))
+            for dirname in dirnames[:]: # copy list - we modify inplace
+                package_uri = '.'.join((root_uri, dirname))
+                if (self._uri2path(package_uri) and
+                    self._survives_exclude(package_uri, 'package')):
+                    modules.append(package_uri)
+                else:
+                    dirnames.remove(dirname)
+            # Check filenames for modules
+            for filename in filenames:
+                module_name = filename[:-3]
+                module_uri = '.'.join((root_uri, module_name))
+                if (self._uri2path(module_uri) and
+                    self._survives_exclude(module_uri, 'module')):
+                    modules.append(module_uri)
+        return sorted(modules)
+    def write_modules_api(self, modules,outdir):
+        # write the list
+        written_modules = []
+        for m in modules:
+            api_str = self.generate_api_doc(m)
+            if not api_str:
+                continue
+            # write out to file
+            outfile = os.path.join(outdir,
+                                   m + self.rst_extension)
+            fileobj = open(outfile, 'wt')
+            fileobj.write(api_str)
+            fileobj.close()
+            written_modules.append(m)
+        self.written_modules = written_modules
+    def write_api_docs(self, outdir):
+        """Generate API reST files.
+        Parameters
+        ----------
+        outdir : string
+            Directory name in which to store files
+            We create automatic filenames for each module
+        Returns
+        -------
+        None
+        Notes
+        -----
+        Sets self.written_modules to list of written modules
+        """
+        if not os.path.exists(outdir):
+            os.mkdir(outdir)
+        # compose list of modules
+        modules = self.discover_modules()
+        self.write_modules_api(modules,outdir)
+    def write_index(self, outdir, froot='gen', relative_to=None):
+        """Make a reST API index file from written files
+        Parameters
+        ----------
+        path : string
+            Filename to write index to
+        outdir : string
+            Directory to which to write generated index file
+        froot : string, optional
+            root (filename without extension) of filename to write to
+            Defaults to 'gen'.  We add ``self.rst_extension``.
+        relative_to : string
+            path to which written filenames are relative.  This
+            component of the written file path will be removed from
+            outdir, in the generated index.  Default is None, meaning,
+            leave path as it is.
+        """
+        if self.written_modules is None:
+            raise ValueError('No modules written')
+        # Get full filename path
+        path = os.path.join(outdir, froot+self.rst_extension)
+        # Path written into index is relative to rootpath
+        if relative_to is not None:
+            relpath = outdir.replace(relative_to + os.path.sep, '')
+        else:
+            relpath = outdir
+        idx = open(path,'wt')
+        w = idx.write
+        w('.. AUTO-GENERATED FILE -- DO NOT EDIT!\n\n')
+        w('.. toctree::\n\n')
+        for f in self.written_modules:
+            w('   %s\n' % os.path.join(relpath,f))
+        idx.close()
diff --git a/doc/sphinx/sphinxext/docscrape.py b/doc/sphinx/sphinxext/docscrape.py
new file mode 100644
index 0000000..f374b3d
--- /dev/null
+++ b/doc/sphinx/sphinxext/docscrape.py
@@ -0,0 +1,497 @@
+"""Extract reference documentation from the NumPy source tree.
+import inspect
+import textwrap
+import re
+import pydoc
+from StringIO import StringIO
+from warnings import warn
+class Reader(object):
+    """A line-based string reader.
+    """
+    def __init__(self, data):
+        """
+        Parameters
+        ----------
+        data : str
+           String with lines separated by '\n'.
+        """
+        if isinstance(data,list):
+            self._str = data
+        else:
+            self._str = data.split('\n') # store string as list of lines
+        self.reset()
+    def __getitem__(self, n):
+        return self._str[n]
+    def reset(self):
+        self._l = 0 # current line nr
+    def read(self):
+        if not self.eof():
+            out = self[self._l]
+            self._l += 1
+            return out
+        else:
+            return ''
+    def seek_next_non_empty_line(self):
+        for l in self[self._l:]:
+            if l.strip():
+                break
+            else:
+                self._l += 1
+    def eof(self):
+        return self._l >= len(self._str)
+    def read_to_condition(self, condition_func):
+        start = self._l
+        for line in self[start:]:
+            if condition_func(line):
+                return self[start:self._l]
+            self._l += 1
+            if self.eof():
+                return self[start:self._l+1]
+        return []
+    def read_to_next_empty_line(self):
+        self.seek_next_non_empty_line()
+        def is_empty(line):
+            return not line.strip()
+        return self.read_to_condition(is_empty)
+    def read_to_next_unindented_line(self):
+        def is_unindented(line):
+            return (line.strip() and (len(line.lstrip()) == len(line)))
+        return self.read_to_condition(is_unindented)
+    def peek(self,n=0):
+        if self._l + n < len(self._str):
+            return self[self._l + n]
+        else:
+            return ''
+    def is_empty(self):
+        return not ''.join(self._str).strip()
+class NumpyDocString(object):
+    def __init__(self,docstring):
+        docstring = textwrap.dedent(docstring).split('\n')
+        self._doc = Reader(docstring)
+        self._parsed_data = {
+            'Signature': '',
+            'Summary': [''],
+            'Extended Summary': [],
+            'Parameters': [],
+            'Returns': [],
+            'Raises': [],
+            'Warns': [],
+            'Other Parameters': [],
+            'Attributes': [],
+            'Methods': [],
+            'See Also': [],
+            'Notes': [],
+            'Warnings': [],
+            'References': '',
+            'Examples': '',
+            'index': {}
+            }
+        self._parse()
+    def __getitem__(self,key):
+        return self._parsed_data[key]
+    def __setitem__(self,key,val):
+        if not self._parsed_data.has_key(key):
+            warn("Unknown section %s" % key)
+        else:
+            self._parsed_data[key] = val
+    def _is_at_section(self):
+        self._doc.seek_next_non_empty_line()
+        if self._doc.eof():
+            return False
+        l1 = self._doc.peek().strip()  # e.g. Parameters
+        if l1.startswith('.. index::'):
+            return True
+        l2 = self._doc.peek(1).strip() #    ---------- or ==========
+        return l2.startswith('-'*len(l1)) or l2.startswith('='*len(l1))
+    def _strip(self,doc):
+        i = 0
+        j = 0
+        for i,line in enumerate(doc):
+            if line.strip(): break
+        for j,line in enumerate(doc[::-1]):
+            if line.strip(): break
+        return doc[i:len(doc)-j]
+    def _read_to_next_section(self):
+        section = self._doc.read_to_next_empty_line()
+        while not self._is_at_section() and not self._doc.eof():
+            if not self._doc.peek(-1).strip(): # previous line was empty
+                section += ['']
+            section += self._doc.read_to_next_empty_line()
+        return section
+    def _read_sections(self):
+        while not self._doc.eof():
+            data = self._read_to_next_section()
+            name = data[0].strip()
+            if name.startswith('..'): # index section
+                yield name, data[1:]
+            elif len(data) < 2:
+                yield StopIteration
+            else:
+                yield name, self._strip(data[2:])
+    def _parse_param_list(self,content):
+        r = Reader(content)
+        params = []
+        while not r.eof():
+            header = r.read().strip()
+            if ' : ' in header:
+                arg_name, arg_type = header.split(' : ')[:2]
+            else:
+                arg_name, arg_type = header, ''
+            desc = r.read_to_next_unindented_line()
+            desc = dedent_lines(desc)
+            params.append((arg_name,arg_type,desc))
+        return params
+    _name_rgx = re.compile(r"^\s*(:(?P<role>\w+):`(?P<name>[a-zA-Z0-9_.-]+)`|"
+                           r" (?P<name2>[a-zA-Z0-9_.-]+))\s*", re.X)
+    def _parse_see_also(self, content):
+        """
+        func_name : Descriptive text
+            continued text
+        another_func_name : Descriptive text
+        func_name1, func_name2, :meth:`func_name`, func_name3
+        """
+        items = []
+        def parse_item_name(text):
+            """Match ':role:`name`' or 'name'"""
+            m = self._name_rgx.match(text)
+            if m:
+                g = m.groups()
+                if g[1] is None:
+                    return g[3], None
+                else:
+                    return g[2], g[1]
+            raise ValueError("%s is not a item name" % text)
+        def push_item(name, rest):
+            if not name:
+                return
+            name, role = parse_item_name(name)
+            items.append((name, list(rest), role))
+            del rest[:]
+        current_func = None
+        rest = []
+        for line in content:
+            if not line.strip(): continue
+            m = self._name_rgx.match(line)
+            if m and line[m.end():].strip().startswith(':'):
+                push_item(current_func, rest)
+                current_func, line = line[:m.end()], line[m.end():]
+                rest = [line.split(':', 1)[1].strip()]
+                if not rest[0]:
+                    rest = []
+            elif not line.startswith(' '):
+                push_item(current_func, rest)
+                current_func = None
+                if ',' in line:
+                    for func in line.split(','):
+                        push_item(func, [])
+                elif line.strip():
+                    current_func = line
+            elif current_func is not None:
+                rest.append(line.strip())
+        push_item(current_func, rest)
+        return items
+    def _parse_index(self, section, content):
+        """
+        .. index: default
+           :refguide: something, else, and more
+        """
+        def strip_each_in(lst):
+            return [s.strip() for s in lst]
+        out = {}
+        section = section.split('::')
+        if len(section) > 1:
+            out['default'] = strip_each_in(section[1].split(','))[0]
+        for line in content:
+            line = line.split(':')
+            if len(line) > 2:
+                out[line[1]] = strip_each_in(line[2].split(','))
+        return out
+    def _parse_summary(self):
+        """Grab signature (if given) and summary"""
+        if self._is_at_section():
+            return
+        summary = self._doc.read_to_next_empty_line()
+        summary_str = " ".join([s.strip() for s in summary]).strip()
+        if re.compile('^([\w., ]+=)?\s*[\w\.]+\(.*\)$').match(summary_str):
+            self['Signature'] = summary_str
+            if not self._is_at_section():
+                self['Summary'] = self._doc.read_to_next_empty_line()
+        else:
+            self['Summary'] = summary
+        if not self._is_at_section():
+            self['Extended Summary'] = self._read_to_next_section()
+    def _parse(self):
+        self._doc.reset()
+        self._parse_summary()
+        for (section,content) in self._read_sections():
+            if not section.startswith('..'):
+                section = ' '.join([s.capitalize() for s in section.split(' ')])
+            if section in ('Parameters', 'Attributes', 'Methods',
+                           'Returns', 'Raises', 'Warns'):
+                self[section] = self._parse_param_list(content)
+            elif section.startswith('.. index::'):
+                self['index'] = self._parse_index(section, content)
+            elif section == 'See Also':
+                self['See Also'] = self._parse_see_also(content)
+            else:
+                self[section] = content
+    # string conversion routines
+    def _str_header(self, name, symbol='-'):
+        return [name, len(name)*symbol]
+    def _str_indent(self, doc, indent=4):
+        out = []
+        for line in doc:
+            out += [' '*indent + line]
+        return out
+    def _str_signature(self):
+        if self['Signature']:
+            return [self['Signature'].replace('*','\*')] + ['']
+        else:
+            return ['']
+    def _str_summary(self):
+        if self['Summary']:
+            return self['Summary'] + ['']
+        else:
+            return []
+    def _str_extended_summary(self):
+        if self['Extended Summary']:
+            return self['Extended Summary'] + ['']
+        else:
+            return []
+    def _str_param_list(self, name):
+        out = []
+        if self[name]:
+            out += self._str_header(name)
+            for param,param_type,desc in self[name]:
+                out += ['%s : %s' % (param, param_type)]
+                out += self._str_indent(desc)
+            out += ['']
+        return out
+    def _str_section(self, name):
+        out = []
+        if self[name]:
+            out += self._str_header(name)
+            out += self[name]
+            out += ['']
+        return out
+    def _str_see_also(self, func_role):
+        if not self['See Also']: return []
+        out = []
+        out += self._str_header("See Also")
+        last_had_desc = True
+        for func, desc, role in self['See Also']:
+            if role:
+                link = ':%s:`%s`' % (role, func)
+            elif func_role:
+                link = ':%s:`%s`' % (func_role, func)
+            else:
+                link = "`%s`_" % func
+            if desc or last_had_desc:
+                out += ['']
+                out += [link]
+            else:
+                out[-1] += ", %s" % link
+            if desc:
+                out += self._str_indent([' '.join(desc)])
+                last_had_desc = True
+            else:
+                last_had_desc = False
+        out += ['']
+        return out
+    def _str_index(self):
+        idx = self['index']
+        out = []
+        out += ['.. index:: %s' % idx.get('default','')]
+        for section, references in idx.iteritems():
+            if section == 'default':
+                continue
+            out += ['   :%s: %s' % (section, ', '.join(references))]
+        return out
+    def __str__(self, func_role=''):
+        out = []
+        out += self._str_signature()
+        out += self._str_summary()
+        out += self._str_extended_summary()
+        for param_list in ('Parameters','Returns','Raises'):
+            out += self._str_param_list(param_list)
+        out += self._str_section('Warnings')
+        out += self._str_see_also(func_role)
+        for s in ('Notes','References','Examples'):
+            out += self._str_section(s)
+        out += self._str_index()
+        return '\n'.join(out)
+def indent(str,indent=4):
+    indent_str = ' '*indent
+    if str is None:
+        return indent_str
+    lines = str.split('\n')
+    return '\n'.join(indent_str + l for l in lines)
+def dedent_lines(lines):
+    """Deindent a list of lines maximally"""
+    return textwrap.dedent("\n".join(lines)).split("\n")
+def header(text, style='-'):
+    return text + '\n' + style*len(text) + '\n'
+class FunctionDoc(NumpyDocString):
+    def __init__(self, func, role='func', doc=None):
+        self._f = func
+        self._role = role # e.g. "func" or "meth"
+        if doc is None:
+            doc = inspect.getdoc(func) or ''
+        try:
+            NumpyDocString.__init__(self, doc)
+        except ValueError, e:
+            print '*'*78
+            print "ERROR: '%s' while parsing `%s`" % (e, self._f)
+            print '*'*78
+            #print "Docstring follows:"
+            #print doclines
+            #print '='*78
+        if not self['Signature']:
+            func, func_name = self.get_func()
+            try:
+                # try to read signature
+                argspec = inspect.getargspec(func)
+                argspec = inspect.formatargspec(*argspec)
+                argspec = argspec.replace('*','\*')
+                signature = '%s%s' % (func_name, argspec)
+            except TypeError, e:
+                signature = '%s()' % func_name
+            self['Signature'] = signature
+    def get_func(self):
+        func_name = getattr(self._f, '__name__', self.__class__.__name__)
+        if inspect.isclass(self._f):
+            func = getattr(self._f, '__call__', self._f.__init__)
+        else:
+            func = self._f
+        return func, func_name
+    def __str__(self):
+        out = ''
+        func, func_name = self.get_func()
+        signature = self['Signature'].replace('*', '\*')
+        roles = {'func': 'function',
+                 'meth': 'method'}
+        if self._role:
+            if not roles.has_key(self._role):
+                print "Warning: invalid role %s" % self._role
+            out += '.. %s:: %s\n    \n\n' % (roles.get(self._role,''),
+                                             func_name)
+        out += super(FunctionDoc, self).__str__(func_role=self._role)
+        return out
+class ClassDoc(NumpyDocString):
+    def __init__(self,cls,modulename='',func_doc=FunctionDoc,doc=None):
+        if not inspect.isclass(cls):
+            raise ValueError("Initialise using a class. Got %r" % cls)
+        self._cls = cls
+        if modulename and not modulename.endswith('.'):
+            modulename += '.'
+        self._mod = modulename
+        self._name = cls.__name__
+        self._func_doc = func_doc
+        if doc is None:
+            doc = pydoc.getdoc(cls)
+        NumpyDocString.__init__(self, doc)
+    @property
+    def methods(self):
+        return [name for name,func in inspect.getmembers(self._cls)
+                if not name.startswith('_') and callable(func)]
+    def __str__(self):
+        out = ''
+        out += super(ClassDoc, self).__str__()
+        out += "\n\n"
+        #for m in self.methods:
+        #    print "Parsing `%s`" % m
+        #    out += str(self._func_doc(getattr(self._cls,m), 'meth')) + '\n\n'
+        #    out += '.. index::\n   single: %s; %s\n\n' % (self._name, m)
+        return out
diff --git a/doc/sphinx/sphinxext/docscrape_sphinx.py b/doc/sphinx/sphinxext/docscrape_sphinx.py
new file mode 100644
index 0000000..77ed271
--- /dev/null
+++ b/doc/sphinx/sphinxext/docscrape_sphinx.py
@@ -0,0 +1,136 @@
+import re, inspect, textwrap, pydoc
+from docscrape import NumpyDocString, FunctionDoc, ClassDoc
+class SphinxDocString(NumpyDocString):
+    # string conversion routines
+    def _str_header(self, name, symbol='`'):
+        return ['.. rubric:: ' + name, '']
+    def _str_field_list(self, name):
+        return [':' + name + ':']
+    def _str_indent(self, doc, indent=4):
+        out = []
+        for line in doc:
+            out += [' '*indent + line]
+        return out
+    def _str_signature(self):
+        return ['']
+        if self['Signature']:
+            return ['``%s``' % self['Signature']] + ['']
+        else:
+            return ['']
+    def _str_summary(self):
+        return self['Summary'] + ['']
+    def _str_extended_summary(self):
+        return self['Extended Summary'] + ['']
+    def _str_param_list(self, name):
+        out = []
+        if self[name]:
+            out += self._str_field_list(name)
+            out += ['']
+            for param,param_type,desc in self[name]:
+                out += self._str_indent(['**%s** : %s' % (param.strip(),
+                                                          param_type)])
+                out += ['']
+                out += self._str_indent(desc,8)
+                out += ['']
+        return out
+    def _str_section(self, name):
+        out = []
+        if self[name]:
+            out += self._str_header(name)
+            out += ['']
+            content = textwrap.dedent("\n".join(self[name])).split("\n")
+            out += content
+            out += ['']
+        return out
+    def _str_see_also(self, func_role):
+        out = []
+        if self['See Also']:
+            see_also = super(SphinxDocString, self)._str_see_also(func_role)
+            out = ['.. seealso::', '']
+            out += self._str_indent(see_also[2:])
+        return out
+    def _str_warnings(self):
+        out = []
+        if self['Warnings']:
+            out = ['.. warning::', '']
+            out += self._str_indent(self['Warnings'])
+        return out
+    def _str_index(self):
+        idx = self['index']
+        out = []
+        if len(idx) == 0:
+            return out
+        out += ['.. index:: %s' % idx.get('default','')]
+        for section, references in idx.iteritems():
+            if section == 'default':
+                continue
+            elif section == 'refguide':
+                out += ['   single: %s' % (', '.join(references))]
+            else:
+                out += ['   %s: %s' % (section, ','.join(references))]
+        return out
+    def _str_references(self):
+        out = []
+        if self['References']:
+            out += self._str_header('References')
+            if isinstance(self['References'], str):
+                self['References'] = [self['References']]
+            out.extend(self['References'])
+            out += ['']
+        return out
+    def __str__(self, indent=0, func_role="obj"):
+        out = []
+        out += self._str_signature()
+        out += self._str_index() + ['']
+        out += self._str_summary()
+        out += self._str_extended_summary()
+        for param_list in ('Parameters', 'Attributes', 'Methods',
+                           'Returns','Raises'):
+            out += self._str_param_list(param_list)
+        out += self._str_warnings()
+        out += self._str_see_also(func_role)
+        out += self._str_section('Notes')
+        out += self._str_references()
+        out += self._str_section('Examples')
+        out = self._str_indent(out,indent)
+        return '\n'.join(out)
+class SphinxFunctionDoc(SphinxDocString, FunctionDoc):
+    pass
+class SphinxClassDoc(SphinxDocString, ClassDoc):
+    pass
+def get_doc_object(obj, what=None, doc=None):
+    if what is None:
+        if inspect.isclass(obj):
+            what = 'class'
+        elif inspect.ismodule(obj):
+            what = 'module'
+        elif callable(obj):
+            what = 'function'
+        else:
+            what = 'object'
+    if what == 'class':
+        return SphinxClassDoc(obj, '', func_doc=SphinxFunctionDoc, doc=doc)
+    elif what in ('function', 'method'):
+        return SphinxFunctionDoc(obj, '', doc=doc)
+    else:
+        if doc is None:
+            doc = pydoc.getdoc(obj)
+        return SphinxDocString(doc)
diff --git a/doc/sphinx/sphinxext/ipython_console_highlighting.py b/doc/sphinx/sphinxext/ipython_console_highlighting.py
new file mode 100644
index 0000000..217b779
--- /dev/null
+++ b/doc/sphinx/sphinxext/ipython_console_highlighting.py
@@ -0,0 +1,114 @@
+"""reST directive for syntax-highlighting ipython interactive sessions.
+XXX - See what improvements can be made based on the new (as of Sept 2009)
+'pycon' lexer for the python console.  At the very least it will give better
+highlighted tracebacks.
+# Needed modules
+# Standard library
+import re
+# Third party
+from pygments.lexer import Lexer, do_insertions
+from pygments.lexers.agile import (PythonConsoleLexer, PythonLexer, 
+                                   PythonTracebackLexer)
+from pygments.token import Comment, Generic
+from sphinx import highlighting
+# Global constants
+line_re = re.compile('.*?\n')
+# Code begins - classes and functions
+class IPythonConsoleLexer(Lexer):
+    """
+    For IPython console output or doctests, such as:
+    .. sourcecode:: ipython
+      In [1]: a = 'foo'
+      In [2]: a
+      Out[2]: 'foo'
+      In [3]: print a
+      foo
+      In [4]: 1 / 0
+    Notes:
+      - Tracebacks are not currently supported.
+      - It assumes the default IPython prompts, not customized ones.
+    """
+    name = 'IPython console session'
+    aliases = ['ipython']
+    mimetypes = ['text/x-ipython-console']
+    input_prompt = re.compile("(In \[[0-9]+\]: )|(   \.\.\.+:)")
+    output_prompt = re.compile("(Out\[[0-9]+\]: )|(   \.\.\.+:)")
+    continue_prompt = re.compile("   \.\.\.+:")
+    tb_start = re.compile("\-+")
+    def get_tokens_unprocessed(self, text):
+        pylexer = PythonLexer(**self.options)
+        tblexer = PythonTracebackLexer(**self.options)
+        curcode = ''
+        insertions = []
+        for match in line_re.finditer(text):
+            line = match.group()
+            input_prompt = self.input_prompt.match(line)
+            continue_prompt = self.continue_prompt.match(line.rstrip())
+            output_prompt = self.output_prompt.match(line)
+            if line.startswith("#"):
+                insertions.append((len(curcode),
+                                   [(0, Comment, line)]))
+            elif input_prompt is not None:
+                insertions.append((len(curcode),
+                                   [(0, Generic.Prompt, input_prompt.group())]))
+                curcode += line[input_prompt.end():]
+            elif continue_prompt is not None:
+                insertions.append((len(curcode),
+                                   [(0, Generic.Prompt, continue_prompt.group())]))
+                curcode += line[continue_prompt.end():]
+            elif output_prompt is not None:
+                # Use the 'error' token for output.  We should probably make
+                # our own token, but error is typicaly in a bright color like
+                # red, so it works fine for our output prompts.
+                insertions.append((len(curcode),
+                                   [(0, Generic.Error, output_prompt.group())]))
+                curcode += line[output_prompt.end():]
+            else:
+                if curcode:
+                    for item in do_insertions(insertions,
+                                              pylexer.get_tokens_unprocessed(curcode)):
+                        yield item
+                        curcode = ''
+                        insertions = []
+                yield match.start(), Generic.Output, line
+        if curcode:
+            for item in do_insertions(insertions,
+                                      pylexer.get_tokens_unprocessed(curcode)):
+                yield item
+def setup(app):
+    """Setup as a sphinx extension."""
+    # This is only a lexer, so adding it below to pygments appears sufficient.
+    # But if somebody knows that the right API usage should be to do that via
+    # sphinx, by all means fix it here.  At least having this setup.py
+    # suppresses the sphinx warning we'd get without it.
+    pass
+# Register the extension as a valid pygments lexer
+highlighting.lexers['ipython'] = IPythonConsoleLexer()
diff --git a/doc/sphinx/sphinxext/numpydoc.py b/doc/sphinx/sphinxext/numpydoc.py
new file mode 100644
index 0000000..ff6c44c
--- /dev/null
+++ b/doc/sphinx/sphinxext/numpydoc.py
@@ -0,0 +1,116 @@
+Sphinx extension that handles docstrings in the Numpy standard format. [1]
+It will:
+- Convert Parameters etc. sections to field lists.
+- Convert See Also section to a See also entry.
+- Renumber references.
+- Extract the signature from the docstring, if it can't be determined otherwise.
+.. [1] http://projects.scipy.org/scipy/numpy/wiki/CodingStyleGuidelines#docstring-standard
+import os, re, pydoc
+from docscrape_sphinx import get_doc_object, SphinxDocString
+import inspect
+def mangle_docstrings(app, what, name, obj, options, lines,
+                      reference_offset=[0]):
+    if what == 'module':
+        # Strip top title
+        title_re = re.compile(r'^\s*[#*=]{4,}\n[a-z0-9 -]+\n[#*=]{4,}\s*',
+                              re.I|re.S)
+        lines[:] = title_re.sub('', "\n".join(lines)).split("\n")
+    else:
+        doc = get_doc_object(obj, what, "\n".join(lines))
+        lines[:] = str(doc).split("\n")
+    if app.config.numpydoc_edit_link and hasattr(obj, '__name__') and \
+           obj.__name__:
+        if hasattr(obj, '__module__'):
+            v = dict(full_name="%s.%s" % (obj.__module__, obj.__name__))
+        else:
+            v = dict(full_name=obj.__name__)
+        lines += ['', '.. htmlonly::', '']
+        lines += ['    %s' % x for x in
+                  (app.config.numpydoc_edit_link % v).split("\n")]
+    # replace reference numbers so that there are no duplicates
+    references = []
+    for l in lines:
+        l = l.strip()
+        if l.startswith('.. ['):
+            try:
+                references.append(int(l[len('.. ['):l.index(']')]))
+            except ValueError:
+                print "WARNING: invalid reference in %s docstring" % name
+    # Start renaming from the biggest number, otherwise we may
+    # overwrite references.
+    references.sort()
+    if references:
+        for i, line in enumerate(lines):
+            for r in references:
+                new_r = reference_offset[0] + r
+                lines[i] = lines[i].replace('[%d]_' % r,
+                                            '[%d]_' % new_r)
+                lines[i] = lines[i].replace('.. [%d]' % r,
+                                            '.. [%d]' % new_r)
+    reference_offset[0] += len(references)
+def mangle_signature(app, what, name, obj, options, sig, retann):
+    # Do not try to inspect classes that don't define `__init__`
+    if (inspect.isclass(obj) and
+        'initializes x; see ' in pydoc.getdoc(obj.__init__)):
+        return '', ''
+    if not (callable(obj) or hasattr(obj, '__argspec_is_invalid_')): return
+    if not hasattr(obj, '__doc__'): return
+    doc = SphinxDocString(pydoc.getdoc(obj))
+    if doc['Signature']:
+        sig = re.sub("^[^(]*", "", doc['Signature'])
+        return sig, ''
+def initialize(app):
+    try:
+        app.connect('autodoc-process-signature', mangle_signature)
+    except:
+        monkeypatch_sphinx_ext_autodoc()
+def setup(app, get_doc_object_=get_doc_object):
+    global get_doc_object
+    get_doc_object = get_doc_object_
+    app.connect('autodoc-process-docstring', mangle_docstrings)
+    app.connect('builder-inited', initialize)
+    app.add_config_value('numpydoc_edit_link', None, True)
+# Monkeypatch sphinx.ext.autodoc to accept argspecless autodocs (Sphinx < 0.5)
+def monkeypatch_sphinx_ext_autodoc():
+    global _original_format_signature
+    import sphinx.ext.autodoc
+    if sphinx.ext.autodoc.format_signature is our_format_signature:
+        return
+    print "[numpydoc] Monkeypatching sphinx.ext.autodoc ..."
+    _original_format_signature = sphinx.ext.autodoc.format_signature
+    sphinx.ext.autodoc.format_signature = our_format_signature
+def our_format_signature(what, obj):
+    r = mangle_signature(None, what, None, obj, None, None, None)
+    if r is not None:
+        return r[0]
+    else:
+        return _original_format_signature(what, obj)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..8aa4c99
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+--extra-index-url https://pypi.python.org/simple/
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..861a9f5
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,5 @@
+tag_build = 
+tag_date = 0
+tag_svn_revision = 0
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..9e9b5b6
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,70 @@
+#! /usr/bin/env python
+# Install script
+import sys
+import os
+import os.path
+import re
+import glob
+from os import path
+# Add to the python path the directory containing the extensions
+# of distutils
+VERSION = "1.2.11"
+AUTHOR  = 'Eric Coissac'
+EMAIL   = 'eric at coissac.eu'
+URL     = 'http://metabarcoding.org/obitools'
+SRC       = 'src'
+CSRC      = None
+if __name__=="__main__":
+    from obidistutils.serenity import serenity_mode
+    serenity=serenity_mode(PACKAGE,VERSION)
+    from obidistutils.core import setup
+    from obidistutils.core import CTOOLS
+    from obidistutils.core import CEXES
+    from obidistutils.core import FILES
+    DEPRECATED_SCRIPTS=["fastaComplement", "fastaUniq","fasta2tab","fastaAnnotate",
+                    "fastaSample","fastaGrep","fastaCount","fastaLength",
+                    "fastaHead","fastaTail","fastaSplit","fastaStrand",
+                    "fastaLocate","solexaPairEnd","ecoTag","obijoinpairedend"
+                       ]
+    setup(name=PACKAGE,
+          description="Scripts and library for sequence analysis",
+          classifiers=[
+            'Development Status :: 5 - Production/Stable',
+            'Environment :: Console',
+            'Intended Audience :: Science/Research',
+            'License :: Other/Proprietary License',
+            'Operating System :: Unix',
+            'Programming Language :: Python',
+            'Programming Language :: Python :: 2',
+            'Topic :: Scientific/Engineering :: Bio-Informatics',
+            'Topic :: Utilities',
+          ],
+          version=VERSION,
+          author=AUTHOR,
+          author_email=EMAIL,
+          license=LICENSE,
+          url=URL,
+          python_src=SRC,
+          sse='sse2',
+          serenity=serenity)
diff --git a/src/OBITools.egg-info/PKG-INFO b/src/OBITools.egg-info/PKG-INFO
new file mode 100644
index 0000000..91758e5
--- /dev/null
+++ b/src/OBITools.egg-info/PKG-INFO
@@ -0,0 +1,19 @@
+Metadata-Version: 1.1
+Name: OBITools
+Version: 1.2.11
+Summary: Scripts and library for sequence analysis
+Home-page: http://metabarcoding.org/obitools
+Author: Eric Coissac
+Author-email: eric at coissac.eu
+License: CeCILL-V2
+Description: UNKNOWN
+Platform: UNKNOWN
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: Other/Proprietary License
+Classifier: Operating System :: Unix
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2
+Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
+Classifier: Topic :: Utilities
diff --git a/src/OBITools.egg-info/SOURCES.txt b/src/OBITools.egg-info/SOURCES.txt
new file mode 100644
index 0000000..1d07409
--- /dev/null
+++ b/src/OBITools.egg-info/SOURCES.txt
@@ -0,0 +1,591 @@
\ No newline at end of file
diff --git a/src/OBITools.egg-info/dependency_links.txt b/src/OBITools.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/OBITools.egg-info/dependency_links.txt
@@ -0,0 +1 @@
diff --git a/src/OBITools.egg-info/not-zip-safe b/src/OBITools.egg-info/not-zip-safe
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/OBITools.egg-info/not-zip-safe
@@ -0,0 +1 @@
diff --git a/src/OBITools.egg-info/requires.txt b/src/OBITools.egg-info/requires.txt
new file mode 100644
index 0000000..791bd73
--- /dev/null
+++ b/src/OBITools.egg-info/requires.txt
@@ -0,0 +1,5 @@
diff --git a/src/OBITools.egg-info/top_level.txt b/src/OBITools.egg-info/top_level.txt
new file mode 100644
index 0000000..7ae40d8
--- /dev/null
+++ b/src/OBITools.egg-info/top_level.txt
@@ -0,0 +1 @@
diff --git a/src/ali2consensus.py b/src/ali2consensus.py
new file mode 100644
index 0000000..e6438e6
--- /dev/null
+++ b/src/ali2consensus.py
@@ -0,0 +1,111 @@
+Created on 30 sept. 2011
+ at author: fboyer
+Used to get the consensus sequence of a nucleotide fasta alignment.
+ali2consensus.py -t 75 myFastaAlignedSequences.fasta
+ at todo: Check input/output format options to suite with the script objective
+from obitools.fasta import fastFastaIterator
+from obitools.options import getOptionManager
+from obitools.alignment import Alignment, columnIterator
+from obitools import NucSequence
+from obitools.format.options import sequenceWriterGenerator, addInOutputOption
+def addAliOptions(optionManager):
+    optionManager.add_option('-t','--threshold',
+                             action="store", dest="threshold",
+                             metavar="",
+                             type="int",
+                             default=50,
+                             help="Threshold parameter for consensus building")
+if __name__=='__main__':
+    optionParser = getOptionManager([addInOutputOption, addAliOptions],
+                                    entryIterator=fastFastaIterator
+                                    )
+    (options, entries) = optionParser()
+    assert options.threshold>=0 and options.threshold<=100, 'Threshold must belong to [0, 100]'
+    threshold = options.threshold/100.
+    #taken from http://www.dna.affrc.go.jp/misc/MPsrch/InfoIUPAC.html    
+    iupacDNA = dict()
+    iupacDNA['-'] = ('-',)
+    iupacDNA['A'] = ('A',)
+    iupacDNA['C'] = ('C',)
+    iupacDNA['G'] = ('G',)
+    iupacDNA['T'] = ('T',)
+    iupacDNA['U'] = ('T',)
+    iupacDNA['M'] = ('A', 'C')
+    iupacDNA['R'] = ('A','G')
+    iupacDNA['W'] = ('A', 'T')
+    iupacDNA['S'] = ('C', 'G')
+    iupacDNA['Y'] = ('C', 'T')
+    iupacDNA['K'] = ('G', 'T')
+    iupacDNA['V'] = ('A', 'C', 'G')
+    iupacDNA['H'] = ('A', 'C', 'T')
+    iupacDNA['D'] = ('A', 'G', 'T')
+    iupacDNA['B'] = ('C', 'G', 'T')
+    iupacDNA['N'] = ('A', 'C', 'G', 'T')
+    reverse_iupacDNA = dict(map(lambda x : (x[1],x[0]), iupacDNA.items()))
+    alignedSequences = Alignment(entries)
+    consensusNtSeq = ""
+    def addCountInCol(t, columnCount):
+        lt = float(len(t))
+        for x in t:
+            columnCount[x]+= 1/lt
+    def cmpTuple(t1,t2):
+        return cmp(t1[1],t2[1])
+    thresholdCount = threshold*len(alignedSequences)
+    for c in columnIterator(alignedSequences):
+        colC = {'A':0., 'C':0., 'G':0., 'T':0., '-':0.}
+        map(lambda t: addCountInCol(t, colC), map(lambda nt: iupacDNA[nt.upper()], c))
+        counts = colC.items()
+        counts.sort(cmpTuple, reverse=True)
+        sumCounts = 0
+        symbols = list()
+        for nt, count in counts: 
+            sumCounts += count
+            symbols.append(nt)
+            if sumCounts>=thresholdCount:
+                symbols.sort()
+                t = tuple(symbols)
+                try:
+                    consensusNtSeq += reverse_iupacDNA[t]
+                except:
+                    consensusNtSeq += '?'
+                finally:
+                    break
+    consensusSeq = NucSequence('Consensus_%d'%(int(threshold*100,)),
+                                consensusNtSeq,
+                               'Consensus sequence done on %d aligned sequences of length %d with a threshold of %d %%'%(len(alignedSequences), 
+                                                                                                                        len(alignedSequences[0]),
+                                                                                                                        int(threshold*100)))
+    writer = sequenceWriterGenerator(options)
+    consensusSeq
+    writer(consensusSeq)
diff --git a/src/ecodbtaxstat.py b/src/ecodbtaxstat.py
new file mode 100644
index 0000000..7600fee
--- /dev/null
+++ b/src/ecodbtaxstat.py
@@ -0,0 +1,76 @@
+:py:mod:`ecodbtaxstat`: gives taxonomic rank frequency of a given ``ecopcr`` database   
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+The :py:mod:`ecodbtaxstat` command requires an ``ecopcr`` database and a taxonomic rank 
+(specified by the ``--rank`` option, default *species*). The command outputs first 
+the total number of sequence records in the database having taxonomic information at this rank, 
+and then the number of sequence records for each value of this rank.
+from obitools.options import getOptionManager
+from obitools.options.taxonomyfilter import addTaxonomyFilterOptions,  \
+                                            taxonomyFilterIteratorGenerator
+from obitools.ecopcr.taxonomy import EcoTaxonomyDB
+from obitools.ecopcr.sequence import EcoPCRDBSequenceIterator
+def addRankOptions(optionManager):
+    group = optionManager.add_option_group('ecodbtaxstat specific option')
+    group.add_option('--rank',
+                             action="store", dest="rank",
+                             metavar="<taxonomic rank>",
+                             type="string",
+                             default="species",
+                             help="The taxonomic rank at which frequencies have to be computed. " 
+                                  "Possible values are: "
+                                  "class, family, forma, genus, infraclass, infraorder, kingdom, "
+                                  "order, parvorder, phylum, species, species group, "
+                                  "species subgroup, subclass, subfamily, subgenus, subkingdom, "
+                                  "suborder, subphylum, subspecies, subtribe, superclass, "
+                                  "superfamily, superkingdom, superorder, superphylum, tribe or varietas. "
+                                  "(Default: species)")
+def cmptax(taxonomy):
+    def cmptaxon(t1,t2):
+        return cmp(taxonomy.getScientificName(t1),
+                   taxonomy.getScientificName(t2))
+    return cmptaxon
+if __name__=='__main__':
+    optionParser = getOptionManager([addRankOptions,addTaxonomyFilterOptions], progdoc=__doc__)
+    (options, entries) = optionParser()
+    filter = taxonomyFilterIteratorGenerator(options)
+    seqdb = EcoPCRDBSequenceIterator(options.ecodb,options.taxonomy)
+    stats = {}
+    i=0
+    tot=0
+    for seq in filter(seqdb):
+        tot+=1
+        t = options.taxonomy.getTaxonAtRank(seq['taxid'],options.rank)
+        if t is not None:
+            i+=1
+            stats[t]=stats.get(t,0)+1
+    print "#sequence count : %d" % tot
+    print "#considered sequences : %d" % i     
+    print "# %s : %d" % (options.rank,len(stats))
+    taxons = stats.keys()
+    taxons.sort(cmptax(options.taxonomy))
+    for t in taxons:
+        print "%s\t%d" % (options.taxonomy.getScientificName(t),stats[t])
\ No newline at end of file
diff --git a/src/ecotag.py b/src/ecotag.py
new file mode 100755
index 0000000..5dacfe6
--- /dev/null
+++ b/src/ecotag.py
@@ -0,0 +1,460 @@
+:py:mod:`ecotag`: assigns sequences to taxa
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`ecotag` is the tool that assigns sequences to a taxon based on 
+sequence similarity. The program first searches the reference database for the 
+reference sequence(s) (hereafter referred to as 'primary reference sequence(s)') showing the 
+highest similarity with the query sequence. Then it looks for all other reference 
+sequences (hereafter referred to as 'secondary reference sequences') whose 
+similarity with the primary reference sequence(s) is equal or higher than the 
+similarity between the primary reference and the query sequences. Finally, it 
+assigns the query sequence to the most recent common ancestor of the primary and 
+secondary reference sequences. 
+As input, `ecotag` requires the sequences to be assigned, a reference database 
+in :doc:`fasta <../fasta>` format, where each sequence is associated with a taxon identified 
+by a unique *taxid*, and a taxonomy database where taxonomic information is stored 
+for each *taxid*.
+  *Example:*
+    .. code-block:: bash
+          > ecotag -d embl_r113  -R ReferenceDB.fasta \\
+            --sort=count -m 0.95 -r seq.fasta > seq_tag.fasta
+    The above command specifies that each sequence stored in ``seq.fasta`` 
+    is compared to those in the reference database called ``ReferenceDB.fasta`` 
+    for taxonomic assignment. In the output file ``seq_tag.fasta``, the sequences 
+    are sorted from highest to lowest counts. When there is no reference sequence 
+    with a similarity equal or higher than 0.95 for a given sequence, no taxonomic 
+    information is provided for this sequence in ``seq_tag.fasta``.
+from obitools.fasta import fastaNucIterator
+#from obitools.align.ssearch import ssearchIterator
+from obitools.utils.bioseq import uniqSequence,sortSequence
+from obitools.align import lenlcs,ALILEN
+from obitools.options.taxonomyfilter import addTaxonomyDBOptions,loadTaxonomyDatabase
+from obitools.options import getOptionManager
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator
+from collections import OrderedDict
+import sys
+import math
+import os.path
+def addSearchOptions(optionManager):
+    optionManager.add_option('-R','--ref-database',
+                             action="store", dest="database",
+                             metavar="<FILENAME>",
+                             type="string",
+                             help="fasta file containing reference "
+                                  "sequences")
+#    optionManager.add_option('-s','--shape',
+#                             action="store", dest="shape",
+#                             metavar="shapeness",
+#                             type="float",
+#                             default=2.0,
+#                             help="selectivity on the ssearch results "
+#                                  "1.0 is the higher selectivity. "
+#                                  "values > 1.0 decrease selectivity.")
+    optionManager.add_option('-m','--minimum-identity',
+                             action="store", dest="minimum",
+                             metavar="identity",
+                             type="float",
+                             default=0.0,
+                             help="minimum identity to consider.")
+    optionManager.add_option('--minimum-circle',
+                             action="store", dest="circle",
+                             metavar="identity",
+                             type="float",
+                             default=1.0,
+                             help="minimum identity considered for the assignment circle.")
+#    optionManager.add_option('-S','--normalized-smallest',
+#                             action="store_false", dest="large",
+#                             default=True,
+#                             help="normalize identity over the shortest sequence")
+#    optionManager.add_option('-L','--normalized-largest',
+#                             action="store_true", dest="large",
+#                             default=True,
+#                             help="normalize identity over the longest sequence")
+    optionManager.add_option('-x','--explain',
+                             action='store',dest='explain',
+                             type="string",
+                             default=None,
+                             help="Add in the output CD (complementary data) record "
+                                  "to explain identification decision")
+    optionManager.add_option('-u','--uniq',
+                             action='store_true',dest='uniq',
+                             default=False,
+                             help='Apply a uniq filter on query sequences before identification')
+#    optionManager.add_option('-T','--table',
+#                             action='store_true',dest='table',
+#                             default=False,
+#                             help='Write results in a tabular format')
+#    optionManager.add_option('--store-in-db',
+#                             action='store_true',dest='storeindb',
+#                             default=False,
+#                             help='Write results in an ecobarcode DB')
+#    optionManager.add_option('--update-db',
+#                             action='store_true',dest='updatedb',
+#                             default=False,
+#                             help='Run identification only on new sequences')
+    optionManager.add_option('--sort',
+                             action='store',dest='sort',
+                             type='string',
+                             default=None,
+                             help='Sort output on input sequence tag')
+    optionManager.add_option('-r','--reverse',
+                             action='store_true',dest='reverse',
+                             default=False,
+                             help='Sort in reverse order (should be used with -S)')
+#    optionManager.add_option('-o','--output-sequence',
+#                             action='store_true',dest='sequence',
+#                             default=False,
+#                             help='Add an extra column in the output with the query sequence')
+#    optionManager.add_option('--self-matches',
+#                             action='store_true',dest='selfmatches',
+#                             default=False,
+#                             help='Switch to the new match algorithm')    
+    optionManager.add_option('-E','--errors',
+                             action='store',dest='error',
+                             type='float',
+                             default=0.0,
+                             help='Tolerated rate of wrong assignation')    
+    optionManager.add_option('-M','--min-matches',
+                             action='store',dest='minmatches',
+                             type="int",
+                             default=1,
+                             help='Minimum congruent assignation')    
+    optionManager.add_option('--cache-size',
+                             action='store',dest='cache',
+                             type='int',
+                             metavar='<SIZE>',
+                             default=1000000,
+                             help='Cache size for the aligment score')    
+def count(data):
+    rep = {}
+    for x in data:
+        if isinstance(x, (list,tuple)):
+            k = x[0]
+            if len(x) > 1:
+                v = [x[1]]
+                default=[]
+            else:
+                v = 1
+                default=0
+        else:
+            k=x
+            v=1
+            default=0
+        rep[k]=rep.get(k,default)+v
+    return rep
+def myLenlcs(s1, s2, minid, normalized, reference):
+    if s1.hasKey('pairend_limit') :
+        overlap = min(0,len(s1) - len(s2))
+        f5P1 = s1[0:s1['pairend_limit']]
+        f3P1 = s1[s1['pairend_limit']:]
+        f5P2 = s2[0:s1['pairend_limit']]
+        from2 = len(s2) - min(len(s2),len(f3P1))
+        f3P2 = s2[from2:]
+        errors = int(math.ceil((1-minid) * len(s1)))
+        minid5P = max(len(f5P1),len(f5P2)) - errors
+        minid3P = max(len(f3P1),len(f3P2)) - errors
+        lcs5P, lali5P = lenlcs(f5P1,f5P2,minid5P,False)
+        lcs3P, lali3P = lenlcs(f3P1,f3P2,minid3P,False)
+        raw_lcs  = lcs5P  + lcs3P  - overlap
+        lali = lali5P + lali3P - overlap
+        lcs = raw_lcs / float(lali)
+    else:     
+        lcs, lali = lenlcs(s1,s2,minid,normalized,reference)
+    return lcs, lali
+def cachedLenLCS(s1,s2,minid,normalized,reference):
+    global __LCSCache__
+    global __INCache__
+    global __OUTCache__
+    global __CACHE_SIZE__
+    pair=frozenset((s1.id,s2.id))
+    if pair in __LCSCache__:
+        rep=__LCSCache__[pair]
+        del __LCSCache__[pair]
+        __INCache__+=1.0
+    else:
+        rep=lenlcs(s1,s2,minid,normalized,reference)
+        __OUTCache__+=1.0
+    __LCSCache__[pair]=rep
+    if len(__LCSCache__) > __CACHE_SIZE__:
+        __LCSCache__.popitem(0)
+    return rep
+#def lcsIterator(entries,db,options):
+#    for seq in entries:
+#        results = []
+#        maxid   = (None,0.0)
+#        minid   = options.minimum
+#        for d in db:
+#            lcs,lali = myLenlcs(seq, d, minid,normalized=True,reference=ALILEN)
+#            if lcs > maxid[1]:
+#                maxid = (d,lcs)
+#                minid = maxid[1] ** options.shape
+#            results.append((d,lcs))
+#        minid = maxid[1] ** options.shape
+#        results = [x for x in results if x[1]>=minid]
+#        yield seq,([maxid[0]],maxid[1]),results
+def mostPreciseTaxid(taxidlist, options):
+    tl = set(x for x in taxidlist if x > 1)
+    if not tl:
+        tl=set([1])
+    while len(tl) > 1:
+        t1 = tl.pop()
+        t2 = tl.pop()
+        if options.taxonomy.isAncestor(t1,t2):
+            taxid = t2
+        elif options.taxonomy.isAncestor(t2,t1):
+            taxid = t1
+        else:
+            taxid = options.taxonomy.lastCommonTaxon(t1,t2)
+        tl.add(taxid)
+    taxid = tl.pop()
+    return taxid
+def lcsIteratorSelf(entries,db,options):
+    for seq in entries:
+        results = []
+        maxid   = ([],0.0)
+        minid   = options.minimum
+        for d in db:
+            lcs,lali = myLenlcs(seq,d,minid,normalized=True,reference=ALILEN)  # @UnusedVariable
+            if lcs > maxid[1] and lcs > options.minimum:
+                maxid = ([d],lcs)
+                minid = maxid[1]
+            elif lcs==maxid[1]:
+                maxid[0].append(d)
+        if maxid[0]:
+            if maxid[1] > options.circle:
+                maxid=(maxid[0],options.circle)
+            results.extend([(s,maxid[1]) for s in maxid[0]])
+            for d in db:
+                for s in maxid[0]:
+                    if d.id != s.id:
+                        lcs,lali = cachedLenLCS(s,d,maxid[1],normalized=True,reference=ALILEN)      # @UnusedVariable
+                        if lcs >= maxid[1]:
+                            results.append((d,lcs))
+        yield seq,maxid,results
+if __name__=='__main__':
+    __LCSCache__=OrderedDict()
+    __INCache__=1.0
+    __OUTCache__=1.0
+    optionParser = getOptionManager([addSearchOptions,addTaxonomyDBOptions,addInOutputOption],progdoc=__doc__)
+    (options, entries) = optionParser()
+    __CACHE_SIZE__=options.cache
+    if __CACHE_SIZE__ < 10:
+        __CACHE_SIZE__=10
+    taxonomy = loadTaxonomyDatabase(options)
+    writer = sequenceWriterGenerator(options)
+    print >>sys.stderr,"Reading reference DB ...",
+#    if (hasattr(options, 'ecobarcodedb') and options.ecobarcodedb is not None):
+#        try:
+#            db = list(fastaNucIterator(options.database))
+#        except IOError:
+#            db = list(referenceDBIterator(options))  
+#        if options.primer is not None:
+#            entries = sequenceIterator(options)  
+#    else:
+    db = list(fastaNucIterator(options.database))
+    dbname=os.path.splitext(os.path.basename(options.database))[0]
+    print >>sys.stderr," : %d" % len(db)
+    taxonlink = {}
+    rankid = taxonomy.findRankByName(options.explain)
+    for seq in db:
+        seqid = seq.id[0:46]
+        seq.id=seqid
+        assert seqid not in taxonlink
+        taxonlink[seqid]=int(seq['taxid'])
+    if options.uniq:
+        entries = uniqSequence(entries)
+    if options.sort is not None:
+        entries = sortSequence(entries, options.sort, options.reverse)
+#    matcher = lcsIterator
+#    if options.selfmatches:
+#        matcher= lcsIteratorSelf
+    search = lcsIteratorSelf(entries,db,options)
+    print >>sys.stderr,'\nCache size : %d\n' % __CACHE_SIZE__
+    for seq,best,match in search:
+        try:
+            seqcount = seq['count']
+        except KeyError:
+            seqcount=1
+        if best[0]:
+            taxlist = set(taxonlink[p[0].id] for p in match)
+            if options.error > 0.0 and len(match) >= int(options.minmatches / (1.0 - options.error)):
+                lca = taxonomy.betterCommonTaxon(options.error,
+                                                 *tuple(taxlist))
+            else:
+                lca = taxonomy.betterCommonTaxon(0.0,*tuple(taxlist))
+            scname = taxonomy.getScientificName(lca)
+            rank = taxonomy.getRank(lca)
+            if len(taxlist) < 15:
+                species_list = set(taxonomy.getSpecies(t) for t in taxlist)
+                species_list = [taxonomy.getScientificName(t) for t in species_list if t is not None]
+            else:
+                species_list = []
+            worst = min(x[1] for x in match)
+            data =['ID',seq.id,best[0][0].id,best[1],worst,'NA',seqcount,len(match),lca,scname,rank]
+        else:
+            data =['UK',seq.id,'NA','NA','NA','NA',seqcount,0,1,'root','no rank']
+        tag = seq.get('id_status',{})
+        tag[dbname]=data[0]=='ID'
+        seq['count']=data[6]
+        tag = seq.get('match_count',{})
+        tag[dbname]=data[7]
+        tag = seq.get('taxid_by_db',{})
+        tag[dbname]=data[8]
+        seq['taxid'] = mostPreciseTaxid(tag.values(), options)
+        tag = seq.get('scientific_name_by_db',{})
+        tag[dbname]=data[9]
+        seq['scientific_name']=options.taxonomy.getScientificName(seq['taxid'])
+        tag = seq.get('rank_by_db',{})
+        tag[dbname]=data[10]
+        seq['rank']=options.taxonomy.getRank(seq['taxid'])
+        if data[0]=='ID':
+            tag = seq.get('best_match',{})
+            tag[dbname]=data[2]
+            tag = seq.get('best_identity',{})
+            tag[dbname]=data[3]
+            tag = seq.get('species_list',{})
+            tag[dbname]=species_list
+            if options.explain is not None:
+                tag = seq.get('explain',{})
+                tag[dbname]=dict((s[0].id,s[1]) for s in match)
+            seq['order']=options.taxonomy.getOrder(seq['taxid'])
+            if seq['order']:
+                seq['order_name']=options.taxonomy.getScientificName(seq['order'])
+            else:
+                seq['order_name']=None
+            seq['family']=options.taxonomy.getFamily(seq['taxid'])
+            if seq['family']:
+                seq['family_name']=options.taxonomy.getScientificName(seq['family'])
+            else:
+                seq['family_name']=None
+            seq['genus']=options.taxonomy.getGenus(seq['taxid'])
+            if seq['genus']:
+                seq['genus_name']=options.taxonomy.getScientificName(seq['genus'])
+            else:
+                seq['genus_name']=None
+            seq['species']=options.taxonomy.getSpecies(seq['taxid'])
+            if seq['species']:
+                seq['species_name']=options.taxonomy.getScientificName(seq['species'])
+            else:
+                seq['species_name']=None
+        writer(seq)        
+    print >>sys.stderr,'\n%5.3f%% of the alignments was cached' % (__INCache__/(__INCache__+__OUTCache__)*100)         
diff --git a/src/ecotaxspecificity.py b/src/ecotaxspecificity.py
new file mode 100755
index 0000000..939cfce
--- /dev/null
+++ b/src/ecotaxspecificity.py
@@ -0,0 +1,239 @@
+:py:mod:`ecotaxspecificity`: Evaluates barcode resolution
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+The :py:mod:`ecotaxspecificity` command evaluates barcode resolution at different 
+taxonomic ranks. 
+As inputs, it takes a sequence record file annotated with taxids in the sequence 
+header, and a database formated as an ecopcr database (see :doc:`obitaxonomy 
+<obitaxonomy>`) or a NCBI taxdump (see NCBI ftp site).
+An example of output is reported below::
+                Number of sequences added in graph: 284
+                Number of nodes in all components: 269
+                Number of sequences lost: 15!
+                rank                      taxon_ok      taxon_total     percent
+                order                            8               8        100.00
+                superfamily                      1               1        100.00
+                parvorder                        1               1        100.00
+                subkingdom                       1               1        100.00
+                superkingdom                     1               1        100.00
+                kingdom                          3               3        100.00
+                phylum                           5               5        100.00
+                infraorder                       1               1        100.00
+                subfamily                        3               3        100.00
+                class                            6               6        100.00
+                species                         35             176         19.89
+                superorder                       1               1        100.00
+                suborder                         1               1        100.00
+                subtribe                         1               1        100.00
+                subclass                         3               3        100.00
+                genus                            9              15         60.00
+                superclass                       1               1        100.00
+                family                          10              10        100.00
+                tribe                            2               2        100.00
+                subphylum                        1               1        100.00
+In this example, the input sequence file contains 284 sequence records, but only 
+269 have been examined, because taxonomic information was not recovered for the
+the 15 remaining ones.
+"Taxon_total" refers to the number of different taxa observed at this rank 
+in the sequence record file (when taxonomic information is available at this 
+rank), and "taxon_ok" corresponds to the number of taxa that the barcode sequence
+identifies unambiguously in the taxonomic database. In this example, the sequence 
+records correspond to 176 different species, but only 35 of these have specific 
+barcodes. "percent" is the percentage of unambiguously identified taxa among 
+the total number of taxa (taxon_ok/taxon_total*100).
+import math
+import sys
+from obitools.graph import Graph
+from obitools.utils import progressBar
+from obitools.align import LCS
+from obitools.align import isLCSReachable
+from obitools.format.options import addInputFormatOption, sequenceWriterGenerator
+from obitools.options import getOptionManager
+from obitools.graph.algorithms.component import componentIterator
+from obitools.ecopcr.options import addTaxonomyDBOptions, loadTaxonomyDatabase
+def addSpecificityOptions(optionManager):
+    group = optionManager.add_option_group('ecotaxspecificity specific options')
+    group.add_option('-e','--errors',
+                             action="store", dest="dist",
+                             metavar="###",
+                             type="int",
+                             default=1,
+                             help="Maximum errors between two sequences")
+    group.add_option('-q','--quorum',
+                            action="store", dest="quorum",
+                            type="float",
+                            default=0.0,
+                            help="Quorum")
+if __name__=='__main__':
+    optionParser = getOptionManager([addInputFormatOption,addTaxonomyDBOptions,addSpecificityOptions])
+    (options, entries) = optionParser()
+    loadTaxonomyDatabase(options)
+    tax =options.taxonomy
+    ranks = set(x for x in tax.rankIterator())
+    results = [seq for seq in entries]
+    graph = Graph("error",directed=False)
+    xx = 0
+    for s in results:
+        #if options.sample is None:
+        #    sample = {"XXX":s['count'] if 'count' in s else 1}
+        #else:
+        #    sample = s[options.sample]
+        #graph.addNode(s.id,shape='circle',_sequence=s,_sample=sample)
+        graph.addNode(s.id,shape='circle',_sequence=s)
+        xx = xx + 1
+    ldb = len(results)    
+    digit = int(math.ceil(math.log10(ldb)))
+    aligncount = ldb*(ldb+1)/2
+    edgecount = 0
+    print >>sys.stderr
+    header = "Alignment  : %%0%dd x %%0%dd -> %%0%dd " % (digit,digit,digit)
+    progressBar(1,aligncount,True,"Alignment  : %s x %s -> %s " % ('-'*digit,'-'*digit, '0'*digit))
+    pos=1
+    aligner = LCS()
+    for i in xrange(ldb):
+        inode = graph[results[i].id]
+        aligner.seqA = results[i]
+        li = len(results[i])
+        for j in xrange(i+1,ldb):
+            progressBar(pos,aligncount,head=header % (i,j,edgecount))
+            pos+=1
+            lj = len(results[j])
+            lm = max(li,lj)
+            lcsmin = lm - options.dist
+            if isLCSReachable(results[i],results[j],lcsmin):
+                aligner.seqB=results[j]
+                ali = aligner()
+                llcs=ali.score
+                lali = len(ali[0])
+                obsdist = lali-llcs
+                if obsdist <= options.dist: # options.dist:
+                    jnode = graph[results[j].id]
+                    res=graph.addEdge(inode.label, jnode.label) # make links
+                    edgecount+=1               
+    indexbyseq={} # each element in this dict will be one component, with first seq of component as its key
+    yy = 0
+    for c in componentIterator(graph):
+        sub = graph.subgraph(c)
+        first = True
+        s = ""
+        for node in sub: #all nodes of a component should go with same key (taken as first sequence in comp)
+            #print node
+            seq = node["_sequence"]
+            if first == True: #we will take first seq of a component as key for that component
+                s = str(seq)
+                indexbyseq[s]=set([seq])
+                first = False
+            else:
+                indexbyseq[s].add(seq)
+            yy = yy + 1
+    #print "Number of sequences added in graph: " + str(xx)
+    #print "Number of nodes in all components: " + str (yy)
+    #print "Number of sequences lost: " + str (xx-yy) + "!"
+    print >>sys.stderr
+    # since multiple different sequences have one key, we need to know what that key is for each sequence
+    indexbykey={} #it will have elements like: {"seq1":key, "seq2":key, ...} where 'key' is the component key to which 'seqx' belongs
+    for key in indexbyseq.keys (): # loop on all components
+        for x in indexbyseq[key]: # loop on each seq in this component
+            v = str(x)
+            if v not in indexbykey:
+                indexbykey[v] = key
+    print '%-20s\t%10s\t%10s\t%7s' % ('rank','taxon_ok','taxon_total','percent')
+    lostSeqs = []
+    for rank,rankid in ranks:
+        if rank != 'no rank':
+            indexbytaxid={}
+            for seq in results:
+                t = tax.getTaxonAtRank(seq['taxid'],rankid)
+                if t is not None: 
+                    if t in indexbytaxid:
+                        indexbytaxid[t].add(str(seq))
+                    else:
+                        indexbytaxid[t]=set([str(seq)])
+            taxoncount=0
+            taxonok=0            
+            for taxon in indexbytaxid:
+                taxlist = set()
+                taxonindividuals = {}
+                for tag in indexbytaxid[taxon]:
+                    if tag in indexbykey:
+                        key = indexbykey[tag] #get component key for this seq
+                        if options.quorum > 0.0:
+                            for x in indexbyseq[key]:
+                                txn = tax.getTaxonAtRank(x['taxid'],rankid)
+                                if txn not in taxonindividuals:
+                                    taxonindividuals[txn] = set([x['taxid']])
+                                else:
+                                    taxonindividuals[txn].add(x['taxid'])
+                        taxlist |=set(tax.getTaxonAtRank(x['taxid'],rankid) for x in indexbyseq[key])
+                    else:
+                        if tag not in lostSeqs:
+                            lostSeqs.append(tag)
+                taxoncount+=1
+                if options.quorum > 0.0:
+                    max = 0
+                    sum = 0
+                    for k in taxonindividuals.keys ():
+                        if len(taxonindividuals[k]) > max:
+                            max = len(taxonindividuals[k])
+                        sum = sum + len(taxonindividuals[k])
+                    if max >= (sum-sum*options.quorum):
+                        taxonok += 1
+                else:
+                    if len(taxlist)==1:
+                        taxonok+=1
+            if taxoncount:
+                print '%-20s\t%10d\t%10d\t%8.2f' % (rank,taxonok,taxoncount,float(taxonok)/taxoncount*100)
+   # if len (lostSeqs) > 0:            
+     #   print "Lost Sequences:"
+       # print lostSeqs       
diff --git a/src/ecotaxstat.py b/src/ecotaxstat.py
new file mode 100755
index 0000000..ea74390
--- /dev/null
+++ b/src/ecotaxstat.py
@@ -0,0 +1,109 @@
+:py:mod:`ecotaxstat` : getting the coverage of an ecoPCR output compared to the original ecoPCR database
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+The :py:mod:`ecotaxstat` command requires two parameters : an *ecoPCR* formatted database (specified 
+with the `-d` option, (see :doc:`obiconvert <obiconvert>` for a description of the database format) 
+and an ecoPCR output (ideally computed using the specified ecoPCR database).
+The command outputs, for every rank, the coverage (Bc) of the ecoPCR output. The coverage (Bc) is the 
+fraction of *taxids* that have a sequence in the database and have also have a sequence in the ecoPCR 
+output file.
+Optionally, *taxids* can be specified to focus the coverage on a smaller part of the taxonomy.
+from obitools.ecopcr import taxonomy
+from obitools.ecopcr import sequence
+from obitools.ecopcr import EcoPCRFile
+from obitools.options import getOptionManager
+from obitools.ecopcr.options import loadTaxonomyDatabase
+import sys
+def addTaxonomyOptions(optionManager):
+    optionManager.add_option('-d','--ecopcrdb',
+                             action="store", dest="db",
+                             metavar="<FILENAME>",
+                             type="string",
+                             help="ecoPCR Database "
+                                  "name")
+    optionManager.add_option('-r','--required',
+                             action="append", 
+                             dest='required',
+                             metavar="<TAXID>",
+                             type="int",
+                             default=[],
+                             help="required taxid")
+if __name__=='__main__':
+    optionParser = getOptionManager([addTaxonomyOptions],
+                                    entryIterator=EcoPCRFile)
+    (options, entries) = optionParser()
+    if (options.db is None):
+        print>>sys.stderr, "-d option is required"
+        sys.exit(1)
+    if len(options.required)==0:
+        print>>sys.stderr, "-r option is required"
+        sys.exit(1)
+    tax = taxonomy.EcoTaxonomyDB(options.db)
+    seqd= sequence.EcoPCRDBSequenceIterator(options.db,taxonomy=tax)
+    ranks = set(x for x in tax.rankIterator())
+    listtaxonbyrank = {}
+    for seq in seqd:
+        taxid = seq['taxid']
+        if (options.required and
+            reduce(lambda x,y: x or y,
+                      (tax.isAncestor(r,taxid) for r in options.required),
+                      False)):
+            for rank,rankid in ranks:
+                if rank != 'no rank':
+                    t = tax.getTaxonAtRank(seq['taxid'],rankid)
+                    if t is not None:
+                        if rank in listtaxonbyrank:
+                            listtaxonbyrank[rank].add(t)
+                        else:
+                            listtaxonbyrank[rank]=set([t])
+    stats = dict((x,len(listtaxonbyrank[x])) for x in listtaxonbyrank)
+    listtaxonbyrank = {}
+    for seq in entries:
+        for rank,rankid in ranks:
+            if rank != 'no rank':
+                t = tax.getTaxonAtRank(seq['taxid'],rankid)
+                if t is not None:
+                    if rank in listtaxonbyrank:
+                        listtaxonbyrank[rank].add(t)
+                    else:
+                        listtaxonbyrank[rank]=set([t])
+    dbstats= dict((x,len(listtaxonbyrank[x])) for x in listtaxonbyrank)
+    ranknames = [x[0] for x in ranks]
+    ranknames.sort()
+    print '%-20s\t%10s\t%10s\t%7s' % ('rank','ecopcr','db','percent')
+    for r in ranknames:
+        if  r in dbstats and r in stats and dbstats[r]:
+            print '%-20s\t%10d\t%10d\t%8.2f' % (r,dbstats[r],stats[r],float(dbstats[r])/stats[r]*100)
diff --git a/src/extractreads.py b/src/extractreads.py
new file mode 100644
index 0000000..04050d5
--- /dev/null
+++ b/src/extractreads.py
@@ -0,0 +1,243 @@
+Created on 9 juin 2012
+ at author: coissac
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator,\
+    autoEntriesIterator
+from obitools.fasta import formatFasta
+from obitools.options import getOptionManager
+from obitools.options._options import allEntryIterator
+from obitools.word._readindex import ReadIndex,minword
+import sys
+import math
+def addWindowsOptions(optionManager):
+    optionManager.add_option('-l','--window-length',
+                             action="store", dest="length",
+                             metavar="<WORD SIZE>",
+                             type="int",
+                             default=90,
+                             help="size of the sliding window")
+    optionManager.add_option('-s','--step',
+                             action="store", dest="step",
+                             metavar="<STEP>",
+                             type="int",
+                             default=1,
+                             help="position difference between two windows")
+    optionManager.add_option('-c','--circular',
+                             action="store_true", dest="circular",
+                             default=False,
+                             help="set for circular sequence")
+    optionManager.add_option('-R','--reference',
+                             action="store", dest="reffile",
+                             metavar="<FILENAME>",
+                             type="str",
+                             default=None,
+                             help="sequence file containing the reference sequences")
+    optionManager.add_option('-r','--reverse-reads',
+                             action="store", dest="reverse",
+                             metavar="<FILENAME>",
+                             type="str",
+                             default=None,
+                             help="Filename containing reverse solexa reads "
+                            )
+    optionManager.add_option('-D','--write-dump',
+                             action="store", dest="wdump",
+                             metavar="<FILENAME>",
+                             type="str",
+                             default=None,
+                             help="Save the index to a dump file"
+                            )
+    optionManager.add_option('-d','--read-dump',
+                             action="store", dest="rdump",
+                             metavar="<FILENAME>",
+                             type="str",
+                             default=None,
+                             help="Read the index from a dump file"
+                            )
+    optionManager.add_option('-S','--singleton',
+                             action="store", dest="singleton",
+                             metavar="<FILENAME>",
+                             type="str",
+                             default=None,
+                             help="Write singleton sequence in this file"
+                            )
+def cutQuality(s):
+    def quantile(x,q=0.1):
+        y = list(x)
+        y.sort()
+        return y[int(q*len(y))]
+    def cumsum0(x):
+        if x[0] < 0: x[0]=0
+        for i in xrange(1,len(x)):
+            x[i]+=x[i-1]
+            if x[i]<0: x[i]=0
+        return x
+    q = [- math.log10(a) * 10 for a in s.quality]
+    mq=quantile(q)
+    q = cumsum0([a - mq for a in q])
+    mx = max(q)
+    xmax = len(q)-1
+    while(q[xmax] < mx):
+        xmax-=1
+    xmin=xmax
+    xmax+=1
+    while(xmin>0 and q[xmin]>0):
+        xmin-=1
+    if q[xmin]==0:
+        xmin+=1
+    return s[xmin:xmax]
+def cutDirectReverse(entries):
+    first = []
+    for i in xrange(10):
+        first.append(entries.next())
+    lens = [len(x) for x in first]
+    clen = {}
+    for i in lens:
+        clen[i]=clen.get(i,0)+1
+    freq = max(clen.values())
+    freq = [k for k in clen if clen[k]==freq]
+    assert len(freq)==1,"To many sequence length"
+    freq = freq[0]
+    assert freq % 2 == 0, ""
+    lread = freq/2
+    seqs = chain(first,entries)
+    for s in seqs:
+        d = s[0:lread]
+        r = s[lread:]
+        yield(d,r)
+def seqPairs(direct,reverse):
+    for d in direct:
+        r = reverse.next()
+        yield(cutQuality(d),cutQuality(r))
+def seq2words(seqs,options):
+    nw=set()
+    for seq in seqs:
+        s=str(seq)
+        if options.circular:
+            s = s + s[0:options.length]
+        ls = len(s) - options.length + 1
+        for wp in xrange(0,ls,options.step):
+            w =minword(s[wp:wp+options.length])
+            if len(w)==options.length:
+                nw.add(w)              
+    return nw
+if __name__ == '__main__':
+    optionParser = getOptionManager([addWindowsOptions,addInOutputOption],progdoc=__doc__)
+    (options, direct) = optionParser()
+    if options.reverse is None:
+        sequences=((x,) for x in direct)
+    else:
+        reverse = allEntryIterator([options.reverse],options.readerIterator)
+        sequences=seqPairs(direct,reverse)
+    reader = autoEntriesIterator(options)
+    rfile = open(options.reffile)
+    reference = reader(rfile)
+    worddone=set()
+    wordlist = seq2words(reference,options)
+    indexer = ReadIndex(readsize=105)
+    seqpair=0
+    nbseq=0
+    writer = sequenceWriterGenerator(options)
+    if options.rdump is None:
+        print >>sys.stderr,"Indexing sequences..."
+        for seq in sequences:
+            indexer.add(seq)
+        print >>sys.stderr,"Indexing words..."
+        indexer.indexWords(options.length,True)
+        if options.wdump is not None:
+            print >>sys.stderr,"Saving index to file %s..." % options.wdump
+            indexer.save(options.wdump,True)
+    else:
+        print >>sys.stderr,"Loading index dump..."
+        indexer.load(options.rdump,True)
+    print >>sys.stderr,"Selecting sequences..."
+    while len(wordlist)>0:
+        w = wordlist.pop()
+        worddone.add(w)
+        i=0
+        #print >>sys.stderr,"Looking for word : %s..." % w
+        for seq in indexer.iterreads(w):
+            i+=1
+            #print formatFasta(seq) 
+            s  = str(seq)
+            sc = str(seq.complement())
+            assert w in s or w in sc,'Bug !!!! sequence %s (%d) %s sans %s' % (seq.id,i,s,w)
+            words = seq2words((seq,),options) - worddone
+            wordlist|=words 
+        seqpair+=i
+        if i:
+            print >>sys.stderr,"\rWrote extracted = %d/total = %d/word done = %d [wordlist=%d]" % (i,seqpair,len(worddone),len(wordlist)),
+    print >>sys.stderr,"\nWriting sequences..."
+    for seq in indexer.itermarkedpairs():
+        print formatFasta(seq)
+    if options.singleton is not None:
+        s = open(options.singleton,'w')
+        for seq in indexer.itermarkedsingleton():
+            print >>s,formatFasta(seq)
+        s.close()
diff --git a/src/extractreads2.py b/src/extractreads2.py
new file mode 100644
index 0000000..41bec8b
--- /dev/null
+++ b/src/extractreads2.py
@@ -0,0 +1,119 @@
+Created on 9 juin 2012
+ at author: coissac
+from esm import Index
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator,\
+    autoEntriesIterator
+from obitools.options import getOptionManager
+from obitools.options._options import allEntryIterator
+def addWindowsOptions(optionManager):
+    optionManager.add_option('-l','--window-length',
+                             action="store", dest="length",
+                             metavar="<WORD SIZE>",
+                             type="int",
+                             default=None,
+                             help="size of the sliding window")
+    optionManager.add_option('-s','--step',
+                             action="store", dest="step",
+                             metavar="<STEP>",
+                             type="int",
+                             default=1,
+                             help="position difference between two windows")
+    optionManager.add_option('-c','--circular',
+                             action="store_true", dest="circular",
+                             default=False,
+                             help="set for circular sequence")
+    optionManager.add_option('-R','--reference',
+                             action="store", dest="reffile",
+                             metavar="<STEP>",
+                             type="str",
+                             default=None,
+                             help="sequence file containing the reference sequences")
+    optionManager.add_option('-r','--reverse-reads',
+                             action="store", dest="reverse",
+                             metavar="<FILENAME>",
+                             type="str",
+                             default=None,
+                             help="Filename containing reverse solexa reads "
+                            )
+def cutDirectReverse(entries):
+    first = []
+    for i in xrange(10):
+        first.append(entries.next())
+    lens = [len(x) for x in first]
+    clen = {}
+    for i in lens:
+        clen[i]=clen.get(i,0)+1
+    freq = max(clen.values())
+    freq = [k for k in clen if clen[k]==freq]
+    assert len(freq)==1,"To many sequence length"
+    freq = freq[0]
+    assert freq % 2 == 0, ""
+    lread = freq/2
+    seqs = chain(first,entries)
+    for s in seqs:
+        d = s[0:lread]
+        r = s[lread:]
+        yield(d,r)
+def seqPairs(direct,reverse):
+    for d in direct:
+        r = reverse.next()
+        yield(d,r)
+if __name__ == '__main__':
+    optionParser = getOptionManager([addWindowsOptions,addInOutputOption],progdoc=__doc__)
+    (options, direct) = optionParser()
+    if options.reverse is None:
+        sequences=((x,) for x in direct)
+    else:
+        reverse = allEntryIterator([options.reverse],options.readerIterator)
+        sequences=seqPairs(direct,reverse)
+    reader = autoEntriesIterator(options)
+    rfile = open(options.reffile)
+    reference = reader(rfile)
+    words = Index()
+    for rs in reference:
+        ft = str(rs)
+        rt = str(rs.complement())
+        if options.circular:
+            ft = ft + ft[0:options.length]
+            rt = rt + rt[0:options.length]
+        for x in xrange(0,len(ft),options.step):
+            w = ft[x:(x+options.length)]
+            if len(w)==options.length:
+                words.enter(w)
+            w = rt[x:(x+options.length)]
+            if len(w)==options.length:
+                words.enter(w)
+    words.fix()
+    writer = sequenceWriterGenerator(options)
+    for seq in sequences:
+        t = "".join([str(x) for x in seq])
+        r = words.query(t)
+        if r:
+            writer(seq)
diff --git a/src/illuminapairedend.py b/src/illuminapairedend.py
new file mode 100644
index 0000000..6bc667c
--- /dev/null
+++ b/src/illuminapairedend.py
@@ -0,0 +1,280 @@
+:py:mod:`illuminapairedend`: aligns paired-end Illumina reads
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+   :py:mod:`illuminapairedend` replaces ``solexapairend``.
+:py:mod:`illuminapairedend` aims at aligning the two reads of a pair-end library sequenced 
+using an Illumina platform. 
+    - If the two reads overlap, it returns the consensus sequence together with its quality 
+    - Otherwise, it concatenates sequence merging the forward read and 
+      the reversed-complemented reverse read.
+The program uses as input one or two :doc:`fastq <../fastq>` sequences reads files. 
+    - If two files are used one of them must be specified using the ``-r`` option. 
+      Sequence records corresponding to the same read pair must be in the same order 
+      in the two files.
+    - If just one file is provided, sequence records are supposed to be all of the same length.
+      The first half of the sequence is used as forward read, the second half is used as the reverse
+      read.
+:py:mod:`illuminapairedend` align the forward sequence record with the reverse complement of the 
+reverse sequence record. The alignment algorithm takes into account the base qualities.
+    *Example:*
+    .. code-block:: bash
+       > illuminapairedend -r seq3P.fastq seq5P.fastq > seq.fastq
+    The ``seq5P.fastq`` sequence file contains the forward sequence records.
+    The ``seq3P.fastq`` sequence file contains the reverse sequence records.
+    Pairs of reads are aligned together and the consensus sequence is stored in the
+    `` seq.fastq`` file.
+from obitools import NucSequence
+from obitools.options import getOptionManager, allEntryIterator
+from obitools.align import QSolexaReverseAssemble
+from obitools.align import QSolexaRightReverseAssemble
+from obitools.tools._solexapairend import buildConsensus
+from obitools.format.options import addOutputFormatOption,\
+    sequenceWriterGenerator
+from itertools import chain
+import cPickle
+import math
+from obitools.fastq._fastq import fastqIterator  # @UnresolvedImport
+def addSolexaPairEndOptions(optionManager):
+    optionManager.add_option('-r','--reverse-reads',
+                             action="store", dest="reverse",
+                             metavar="<FILENAME>",
+                             type="str",
+                             default=None,
+                             help="Filename containing reverse solexa reads "
+                            )
+    optionManager.add_option('--index-file',
+                             action="store", dest="indexfile",
+                             metavar="<FILENAME>",
+                             type="str",
+                             default=None,
+                             help="Filename containing illumina index reads "
+                            )
+    optionManager.add_option('--sanger',
+                             action="store_const", dest="seqinformat",
+                             default=None,
+                             const='sanger',
+                             help="input file is in sanger fastq nucleic format (standard fastq)")
+    optionManager.add_option('--solexa',
+                             action="store_const", dest="seqinformat",
+                             default=None,
+                             const='solexa',
+                             help="input file is in fastq nucleic format produced by solexa sequencer")
+    optionManager.add_option('--illumina',
+                             action="store_const", dest="seqinformat",
+                             default=None,
+                             const='illumina',
+                             help="input file is in fastq nucleic format produced by old solexa sequencer")
+    optionManager.add_option('--score-min',
+                             action="store", dest="smin",
+                             metavar="#.###",
+                             type="float",
+                             default=None,
+                             help="minimum score for keeping aligment")
+def cutDirectReverse(entries):
+    first = []
+    for i in xrange(10):
+        first.append(entries.next())
+    lens = [len(x) for x in first]
+    clen = {}
+    for i in lens:
+        clen[i]=clen.get(i,0)+1
+    freq = max(clen.values())
+    freq = [k for k in clen if clen[k]==freq]
+    assert len(freq)==1,"To many sequence length"
+    freq = freq[0]
+    assert freq % 2 == 0, ""
+    lread = freq/2
+    seqs = chain(first,entries)
+    for s in seqs:
+        d = s[0:lread]
+        r = s[lread:]
+        yield(d,r)
+def seqPairs(direct,reverse):
+    for d in direct:
+        r = reverse.next()
+        yield(d,r)
+def checkAlignOk(ali):
+    #print not (ali[0][0]=='-' or ali[1][len(ali[1])-1]=='-')
+    return not (ali[0][0]=='-' or ali[1][len(ali[1])-1]=='-')
+la = QSolexaReverseAssemble()
+ra = QSolexaRightReverseAssemble()
+def buildAlignment(direct,reverse):
+    if len(direct)==0 or len(reverse)==0:
+        return None
+    la.seqA=direct 
+    la.seqB=reverse 
+    ali=la()
+    ali.direction='left'
+    ra.seqA=direct
+    ra.seqB=reverse
+    rali=ra()
+    rali.direction='right'
+    if ali.score < rali.score:
+        ali=rali
+    return ali
+def alignmentIterator(sequences):
+    for d,r in sequences:
+        ali = buildAlignment(d,r)
+        if ali is None:
+            continue
+        yield ali
+def buildJoinedSequence(ali,options):
+    d = ali[0].getRoot()
+    r = ali[1].getRoot()
+    r=r.complement()
+    s = str(d) + str(r)
+    seq = NucSequence(d.id + '_PairEnd',s,d.definition,**d)
+    withqual = hasattr(d, 'quality') or hasattr(r, 'quality')
+    if withqual:
+        if hasattr(d, 'quality'):
+            quality = d.quality
+        else:
+            quality = [10**-4] * len(d)
+        if hasattr(r, 'quality'):
+            quality.extend(r.quality)
+        else:
+            quality.extend([10**-4] * len(r))
+        seq.quality=quality
+    seq['score']=ali.score
+    seq['ali_dir']=ali.direction
+    seq['mode']='joined'
+    seq['pairend_limit']=len(d)
+    return seq
+if __name__ == '__main__':
+    optionParser = getOptionManager([addSolexaPairEndOptions,addOutputFormatOption],checkFormat=True
+                                    )
+    (options, direct) = optionParser()
+    options.proba = None
+    options.skip  = None
+    options.only  = None
+    options.sminL = None
+    options.sminR = None
+    if options.proba is not None and options.smin is None:
+        p = open(options.proba)
+        options.nullLeft  = cPickle.load(p)
+        options.nullRight = cPickle.load(p)
+        assert options.pvalue is not None, "You have to indicate a pvalue or an score min"
+        i = int(math.floor((1.0 - options.pvalue) * len(options.nullLeft)))
+        if i == len(options.nullLeft):
+            i-=1
+        options.sminL = options.nullLeft[i]
+        i = int(math.floor((1.0 - options.pvalue) * len(options.nullRight)))
+        if i == len(options.nullRight):
+            i-=1
+        options.sminR = options.nullRight[i]
+    if options.smin is not None:
+        options.sminL = options.smin
+        options.sminR = options.smin
+    if options.reverse is None:
+        sequences=cutDirectReverse(direct)
+    else:
+        reverse = allEntryIterator([options.reverse],options.readerIterator)
+        sequences=seqPairs(direct,reverse)
+    if options.indexfile is not None:
+        indexfile = fastqIterator(options.indexfile)
+    else:
+        indexfile = None
+    writer = sequenceWriterGenerator(options)
+    ba = alignmentIterator(sequences)
+    for ali in ba:
+        if options.sminL is not None:
+            if (   (ali.direction=='left' and ali.score > options.sminL) 
+                or (ali.score > options.sminR)):
+                consensus = buildConsensus(ali)
+            else:
+                consensus = buildJoinedSequence(ali, options)
+            consensus['sminL']=options.sminL
+            consensus['sminR']=options.sminR
+        else:
+            consensus = buildConsensus(ali)
+        if indexfile is not None:
+            i = str(indexfile.next())
+            consensus['illumina_index']=i
+        writer(consensus)
diff --git a/src/ngsfilter.py b/src/ngsfilter.py
new file mode 100644
index 0000000..979bae3
--- /dev/null
+++ b/src/ngsfilter.py
@@ -0,0 +1,458 @@
+:py:mod:`ngsfilter` : Assigns sequence records to the corresponding experiment/sample based on DNA tags and primers
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+To distinguish between sequences from different PCR products pooled in the same sequencing library, pairs of small DNA 
+sequences (call tags, see the :py:mod:`oligoTag` command and its associated paper for more informations on the design 
+of such tags) can be concatenated to the PCR primers. 
+:py:mod:`ngsfilter` takes as input sequence record files and a file describing the DNA tags and primers sequences used 
+for each PCR sample. :py:mod:`ngsfilter` allows to demultiplex sequence records file by identifying these DNA tags and 
+the primers.
+:py:mod:`ngsfilter` requires a sample description file containing the description of the primers and tags associated 
+to each sample (specified by option ``-t``). The sample description file is a text file where each line describes one 
+sample. Columns are separated by space or tab characters. Lines beginning with the '#' character will be considered 
+as commentary lines and will simply be ignored by :py:mod:`ngsfilter`. 
+Here is an example of a sample description file::
+    #exp   sample     tags                   forward_primer       reverse_primer              extra_information
+    gh     01_11a     cacgcagtc:cacgcatcg    GGGCAATCCTGAGCCAA    CCATTGAGTCTCTGCACCTATC    F @ community=Festuca; bucket=1; extraction=1;
+    gh     01_12a     cacgcatcg:cacgcagtc    GGGCAATCCTGAGCCAA    CCATTGAGTCTCTGCACCTATC    F @ community=Festuca; bucket=1; extraction=2;
+    gh     01_21a     cacgcgcat:cacgctact    GGGCAATCCTGAGCCAA    CCATTGAGTCTCTGCACCTATC    F @ community=Festuca; bucket=2; extraction=1;
+    gh     01_22a     cacgctact:cacgcgcat    GGGCAATCCTGAGCCAA    CCATTGAGTCTCTGCACCTATC    F @ community=Festuca; bucket=2; extraction=2;
+    gh     02_11a     cacgctgag:cacgtacga    GGGCAATCCTGAGCCAA    CCATTGAGTCTCTGCACCTATC    F @ community=Festuca; bucket=1; extraction=1;
+    gh     02_12a     cacgtacga:cacgctgag    GGGCAATCCTGAGCCAA    CCATTGAGTCTCTGCACCTATC    F @ community=Festuca; bucket=1; extraction=2;
+The results consist of sequence records, printed on the standard output, with their sequence trimmed of the primers and 
+tags and annotated with the corresponding experiment and sample (and possibly some extra informations). Sequences for 
+which the tags and primers have not been well identified, and which are thus unassigned to any sample, are stored in a 
+file if option ``-u`` is specified and tagged as erroneous sequences (``error`` attribute) by :py:mod:`ngsfilter`. 
+from obitools import NucSequence, DNAComplementSequence
+from string import lower
+import sys
+import math
+from obitools.options import getOptionManager
+from obitools.utils import ColumnFile
+from obitools.align import FreeEndGapFullMatch
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator
+def addNGSOptions(optionManager):
+    group = optionManager.add_option_group('ngsfilter specific options')
+    group.add_option('-t','--tag-list',
+                     action="store", dest="taglist",
+                     metavar="<FILENAME>",
+                     type="string",
+                     default=None,
+                     help="File containing the samples definition (with tags, primers, sample names,...)")
+    group.add_option('-u','--unidentified',
+                     action="store", dest="unidentified",
+                     metavar="<FILENAME>",
+                     type="string",
+                     default=None,
+                     help="Filename used to store the sequences unassigned to any sample")
+    group.add_option('-e','--error',
+                     action="store", dest="error",
+                     metavar="###",
+                     type="int",
+                     default=2,
+                     help="Number of errors allowed for matching primers [default = 2]")
+class Primer:
+    collection={}
+    def __init__(self,sequence,taglength,direct=True,error=2,verbose=False):
+        '''
+        @param sequence:
+        @type sequence:
+        @param direct:
+        @type direct:
+        '''
+        assert sequence not in Primer.collection        \
+            or Primer.collection[sequence]==taglength,  \
+            "Primer %s must always be used with tags of the same length" % sequence
+        Primer.collection[sequence]=taglength
+        self.raw=sequence
+        self.sequence = NucSequence('primer',sequence)
+        self.lseq = len(self.sequence)
+        self.align=FreeEndGapFullMatch()
+        self.align.match=4
+        self.align.mismatch=-2
+        self.align.opengap=-2
+        self.align.extgap=-2
+        self.error=error
+        self.minscore = (self.lseq-error) * self.align.match + error * self.align.mismatch
+        if verbose:
+            print >>sys.stderr,sequence,":",self.lseq,"*",self.align.match,"+",error,"*",self.align.mismatch,"=",self.minscore
+        self.taglength=taglength
+        self.align.seqB=self.sequence
+        self.direct = direct
+        self.verbose=verbose
+    def complement(self):
+        p = Primer(self.raw,
+                  self.taglength,
+                  not self.direct,verbose=self.verbose,
+                  error=self.error)
+        p.sequence=p.sequence.complement()
+        p.align.seqB=p.sequence
+        return p
+    def __hash__(self):
+        return hash(str(self.raw))
+    def __eq__(self,primer):
+        return self.raw==primer.raw 
+    def __call__(self,sequence):
+        if len(sequence) <= self.lseq:
+            return None
+        if self.verbose:
+            print >>sys.stderr,len(sequence) , self.lseq,len(sequence) < self.lseq
+        self.align.seqA=sequence
+        ali=self.align()
+        if self.verbose:
+            print >>sys.stderr,ali
+            print >>sys.stderr,"Score : %d  Minscore : %d \n" %(ali.score,self.minscore)
+        if ali.score >= self.minscore:
+            score = ali.score
+            start = ali[1].gaps[0][1]
+            end = len(ali[1])-ali[1].gaps[-1][1]
+            if self.taglength is not None:
+                if isinstance(self.sequence, DNAComplementSequence):
+                    if (len(sequence)-end) >= self.taglength:
+                        tag=str(sequence[end:end+self.taglength].complement())
+                    else:
+                        tag=None
+                else:
+                    if start >= self.taglength:                
+                        tag=str(sequence[start - self.taglength:start])
+                    else:
+                        tag=None
+            else:
+                tag=None
+            return score,start,end,tag
+        return None 
+    def __str__(self):
+        return "%s: %s" % ({True:'D',False:'R'}[self.direct],self.raw)
+    __repr__=__str__
+def tagpair(x):
+    x=tuple(lower(y.strip()) for y in x.split(':'))
+    if len(x)==1:
+        x = (x[0],x[0])
+    return x
+def readTagfile(filename):
+    """
+    data file describing tags and primers for each sample
+    is a space separated tabular file following this format
+    experiment sample forward_tag reverse_tag forward_primer reverse_primer partial
+    tags can be specified as - if no tag are used
+    """
+    tab=ColumnFile(filename,strip=True,
+                            types=(str,str,tagpair,lower,lower,bool),
+                            head=('experiment','sample',
+                                  'tags',
+                                  'forward_primer','reverse_primer',
+                                  'partial'),
+                            skip="#",
+                            extra="@")
+    primers = {}
+    for p in tab:
+        forward=Primer(p['forward_primer'],
+                       len(p['tags'][0]) if p['tags'][0]!='-' else None,
+                       True,
+                       error=options.error,verbose=options.debug)
+        fp = primers.get(forward,{})
+        primers[forward]=fp
+        reverse=Primer(p['reverse_primer'],
+                       len(p['tags'][1]) if p['tags'][1]!='-' else None,
+                       False,
+                       error=options.error,verbose=options.debug)
+        rp = primers.get(reverse,{})
+        primers[reverse]=rp
+        cf=forward.complement()
+        cr=reverse.complement()
+        dpp=fp.get(cr,{})
+        fp[cr]=dpp
+        rpp=rp.get(cf,{})
+        rp[cf]=rpp
+        tags = (p['tags'][0] if p['tags'][0]!='-' else None,
+                p['tags'][1] if p['tags'][1]!='-' else None)
+        assert tags not in dpp, \
+               "tag pair %s is already used with primer pairs : (%s,%s)" % (str(tags),forward,reverse)
+        extras = p.get('__extra__',{})
+        data   ={'experiment':p['experiment'],
+                   'sample':    p['sample']
+                }
+        data.update(extras)
+        dpp[tags]=data
+        rpp[tags]=data
+        if p['partial']:
+            dpartial = fp.get(None,{})
+            fp[None]=dpartial
+            rpartial = rp.get(None,{})
+            rp[None]=rpartial
+            dt = [x for x in dpartial if x[0]==tags[0]]
+            rt = [x for x in rpartial if x[1]==tags[1]]
+            assert not(dt) and not(rt), \
+                "partial fragment are not usable with primer pair : (%s,%s)" % (forward,reverse)
+            dpartial[tags]=data
+            rpartial[tags]=data
+    return primers
+def annotate(sequence,options):
+    def sortMatch(m1,m2):
+        if m1[1] is None and m2[1] is None:
+            return 0
+        if m1[1] is None:
+            return 1
+        if m2[1] is None:
+            return -1
+        return cmp(m1[1][1],m2[1][2])
+    if hasattr(sequence, "quality"):
+        q = -reduce(lambda x,y:x+y,(math.log10(z) for z in sequence.quality),0)/len(sequence.quality)*10
+        sequence['avg_quality']=q
+        q = -reduce(lambda x,y:x+y,(math.log10(z) for z in sequence.quality[0:10]),0)
+        sequence['head_quality']=q
+        if len(sequence.quality[10:-10]) :
+            q = -reduce(lambda x,y:x+y,(math.log10(z) for z in sequence.quality[10:-10]),0)/len(sequence.quality[10:-10])*10
+            sequence['mid_quality']=q
+        q = -reduce(lambda x,y:x+y,(math.log10(z) for z in sequence.quality[-10:]),0)
+        sequence['tail_quality']=q
+    primers = options.primers
+    if options.debug:
+        print >>sys.stderr,"directmatch"
+    directmatch = [(p,p(sequence)) for p in primers]
+    directmatch.sort(cmp=sortMatch)
+    directmatch=directmatch[0] if directmatch[0][1] is not None else None
+    if options.debug:
+        print  >>sys.stderr,">>>>",directmatch
+    if directmatch is None:
+        sequence['error']='No primer match'
+        return False,sequence
+    match=str(sequence[directmatch[1][1]:directmatch[1][2]])
+    sequence['seq_length_ori']=len(sequence)
+    sequence = sequence[directmatch[1][2]:]
+    if directmatch[0].direct:
+        sequence['direction']='forward'
+        sequence['forward_score']=directmatch[1][0]
+        sequence['forward_primer']=directmatch[0].raw
+        sequence['forward_match']=match
+    else:
+        sequence['direction']='reverse'
+        sequence['reverse_score']=directmatch[1][0]
+        sequence['reverse_primer']=directmatch[0].raw
+        sequence['reverse_match']=match
+    del sequence['cut']
+    primers = options.primers[directmatch[0]]
+    if options.debug:
+        print  >>sys.stderr,"reverse match"
+    reversematch = [(p,p(sequence)) for p in primers if p is not None]
+    reversematch.sort(cmp=sortMatch)
+    reversematch = reversematch[0] if reversematch[0][1] is not None else None
+    if options.debug:
+        print  >>sys.stderr,"<<<<",reversematch
+    if reversematch is None and None not in primers:
+        if directmatch[0].direct:
+            message = 'No reverse primer match'
+        else:
+            message = 'No direct primer match'
+        sequence['error']=message
+        return False,sequence
+    if reversematch is None:
+        sequence['status']='partial'
+        if directmatch[0].direct:
+            tags=(directmatch[1][3],None)
+        else:
+            tags=(None,directmatch[1][3])
+        samples = primers[None]
+    else:
+        sequence['status']='full'
+        match=str(sequence[reversematch[1][1]:reversematch[1][2]].complement())
+        sequence = sequence[0:reversematch[1][1]]
+        if directmatch[0].direct:
+            tags=(directmatch[1][3],reversematch[1][3])
+            sequence['reverse_score']=reversematch[1][0]
+            sequence['reverse_primer']=reversematch[0].raw
+            sequence['reverse_match']=match
+            sequence['forward_tag']=tags[0]
+            sequence['reverse_tag']=tags[1]
+        else:
+            tags=(reversematch[1][3],directmatch[1][3])
+            sequence['forward_score']=reversematch[1][0]
+            sequence['forward_primer']=reversematch[0].raw
+            sequence['forward_match']=match
+        del sequence['cut']
+        sequence['forward_tag']=tags[0]
+        sequence['reverse_tag']=tags[1]
+        samples = primers[reversematch[0]]
+    if not directmatch[0].direct:
+        sequence=sequence.complement()
+        del sequence['complemented']
+    sample=None
+    if tags[0] is not None:                                     # Direct  tag known
+        if tags[1] is not None:                                 # Reverse tag known
+            sample = samples.get(tags,None)             
+        else:                                                   # Reverse tag known
+            s=[samples[x] for x in samples if x[0]==tags[0]]
+            if len(s)==1:
+                sample=s[0]
+            elif len(s)>1:
+                sequence['error']='multiple samples match tags'
+                return False,sequence
+            else:
+                sample=None
+    else:                                                       # Direct tag unknown
+        if tags[1] is not None:                                 # Reverse tag known
+            s=[samples[x] for x in samples if x[1]==tags[1]]
+            if len(s)==1:
+                sample=s[0]
+            elif len(s)>1:
+                sequence['error']='multiple samples match tags'
+                return False,sequence
+            else:                                               # Reverse tag known
+                sample=None
+    if sample is None:
+        sequence['error']="Cannot assign sequence to a sample"
+        return False,sequence
+    sequence._info.update(sample)
+    sequence['seq_length']=len(sequence)
+    return True,sequence
+if __name__ == '__main__':
+    optionParser = getOptionManager([addNGSOptions,addInOutputOption], progdoc=__doc__)
+    (options, entries) = optionParser()
+#    assert options.direct is not None or options.taglist is not None, \
+#         "you must specify option -d ou -t"
+    assert options.taglist is not None,"you must specify option  -t"
+#    if options.taglist is not None:
+    primers=readTagfile(options.taglist)
+#TODO: Patch when no taglists
+#    else:
+#        options.direct=options.direct.lower()
+#        options.reverse=options.reverse.lower()
+#        primers={options.direct:(options.taglength,{})}
+#        if options.reverse is not None:
+#            reverse = options.reverse
+#        else:
+#            reverse = '-'
+#        primers[options.direct][1][reverse]={'-':('-','-',True,None)}
+    options.primers=primers
+    if options.unidentified is not None:
+        unidentified = open(options.unidentified,"w")
+    writer = sequenceWriterGenerator(options)
+    if options.unidentified is not None:
+        unidentified = sequenceWriterGenerator(options,open(options.unidentified,"w"))
+    else :
+        unidentified = None
+    for seq in entries:
+        good,seq = annotate(seq,options)
+        if good:
+            writer(seq)
+        elif unidentified is not None:
+            unidentified(seq)
diff --git a/src/obiaddtaxids.py b/src/obiaddtaxids.py
new file mode 100644
index 0000000..052fe1f
--- /dev/null
+++ b/src/obiaddtaxids.py
@@ -0,0 +1,424 @@
+:py:mod:`obiaddtaxids`: adds *taxids* to sequence records using an ecopcr database
+.. codeauthor:: Celine Mercier <celine.mercier at metabarcoding.org>
+The :py:mod:`obiaddtaxids` command annotates sequence records with a *taxid* based on 
+a taxon scientific name stored in the sequence record header.
+Taxonomic information linking a *taxid* to a taxon scientific name is stored in a 
+database formatted as an ecoPCR database (see :doc:`obitaxonomy <obitaxonomy>`) or 
+a NCBI taxdump (see NCBI ftp site).
+The way to extract the taxon scientific name from the sequence record header can be
+specified by two options:
+    - By default, the sequence identifier is used. Underscore characters (``_``) are substituted
+      by spaces before looking for the taxon scientific name into the taxonomic
+      database.
+    - If the input file is an ``OBITools`` extended :doc:`fasta <../fasta>` format, the ``-k`` option
+      specifies the attribute containing the taxon scientific name.
+    - If the input file is a :doc:`fasta <../fasta>` file imported from the UNITE or from the SILVA web sites,
+      the ``-f`` option allows specifying this source and parsing correctly the associated 
+      taxonomic information.
+For each sequence record, :py:mod:`obiaddtaxids` tries to match the extracted taxon scientific name 
+with those stored in the taxonomic database.
+    - If a match is found, the sequence record is annotated with the corresponding *taxid*.
+    - If the ``-g`` option is set and the taxon name is composed of two words and only the 
+      first one is found in the taxonomic database at the 'genus' rank, :py:mod:`obiaddtaxids` 
+      considers that it found the genus associated with this sequence record and it stores this 
+      sequence record in the file specified by the ``-g`` option.
+    - If the ``-u`` option is set and no taxonomic information is retrieved from the 
+      scientific taxon name, the sequence record is stored in the file specified by the 
+      ``-u`` option.
+    *Example*
+    .. code-block:: bash
+       > obiaddtaxids -k species_name -g genus_identified.fasta \\
+                      -u unidentified.fasta -d my_ecopcr_database \\
+                      my_sequences.fasta > identified.fasta
+    Tries to match the value associated with the ``species_name`` key of each sequence record 
+    from the ``my_sequences.fasta`` file with a taxon name from the ecoPCR database ``my_ecopcr_database``. 
+            - If there is an exact match, the sequence record is stored in the ``identified.fasta`` file. 
+            - If not and the ``species_name`` value is composed of two words, :py:mod:`obiaddtaxids` 
+              considers the first word as a genus name and tries to find it into the taxonomic database. 
+                - If a genus is found, the sequence record is stored in the ``genus_identified.fasta``
+                  file. 
+                - Otherwise the sequence record is stored in the ``unidentified.fasta`` file.
+import re
+from obitools.fasta import fastaIterator,formatFasta
+from obitools.options import getOptionManager
+from obitools.options.taxonomyfilter import addTaxonomyDBOptions
+from obitools.options.taxonomyfilter import loadTaxonomyDatabase
+from obitools.format.genericparser import genericEntryIteratorGenerator
+from obitools import NucSequence
+def addObiaddtaxidsOptions(optionManager):
+    optionManager.add_option('-g','--genus_found',
+                             action="store", dest="genus_found",
+                             metavar="<FILENAME>",
+                             type="string",
+                             default=None,
+                             help="(not with UNITE databases) file used to store sequences with the genus found.")
+    optionManager.add_option('-u','--unidentified',
+                             action="store", dest="unidentified",
+                             metavar="<FILENAME>",
+                             type="string",
+                             default=None,
+                             help="file used to store completely unidentified sequences.")
+    optionManager.add_option('-s','--dirty',
+                             action='store', dest="dirty",
+                             metavar="<FILENAME>",
+                             type="str",
+                             default=None,
+                             help="(not with UNITE databases) if chosen, ALL the words in the name used to identify the sequences will be searched"
+                                  " when neither the exact name nor the genus have been found."
+                                  " Only use if the sequences in your database are badly named with useless words or numbers"
+                                  " in the name etc."
+                                  " The sequences identified this way will be written in <FILENAME>.")
+    optionManager.add_option('-f','--format',
+                             action="store", dest="db_type",
+                             metavar="<FORMAT>",
+                             type="string",
+                             default='raw',
+                             help="type of the database with the taxa to be added. Possibilities : 'raw', 'UNITE_FULL', 'UNITE_GENERAL' or 'SILVA'."
+                                  "The UNITE_FULL format is the one used for the 'Full UNITE+INSD dataset', and the UNITE_GENERAL format is the "
+                                  "one used for the 'General FASTA release'."
+                                  " Default : raw.")
+    optionManager.add_option('-k','--key-name',
+                             action="store", dest="tagname",
+                             metavar="<KEYNAME>",
+                             type="string",
+                             default='',
+                             help="name of the key attribute containing the taxon name in databases of 'raw' type. Default : the taxon name is the id "
+                             "of the sequence. The taxon name MUST have '_' between the words of the name when it's the id, and "
+                             "CAN be of this form when it's in a field.")
+    optionManager.add_option('-a','--restricting_ancestor',
+                             action="store", dest="res_anc",
+                             type="str",
+                             metavar="<ANCESTOR>",
+                             default='',
+                             help="can be a word or a taxid (number). Enables to restrict the search of taxids under a "
+                                  "specified ancestor. If it's a word, it's the field containing the ancestor's taxid "
+                                  "in each sequence's header (can be different for each sequence). If it's a number, "
+                                  "it's the taxid of the ancestor (in which case it's the same for all the sequences)")
+def numberInStr(s) :
+    containsNumber = False
+    for c in s :
+        if c.isdigit() :
+            containsNumber = True
+    return containsNumber
+def UNITEIterator_FULL(f):
+    fastaEntryIterator = genericEntryIteratorGenerator(startEntry='>')
+    for entry in fastaEntryIterator(f) :
+        all = entry.split('\n')
+        header = all[0]
+        fields = header.split('|')        
+        seq_id = fields[0][1:]
+        seq = all[1]
+        s = NucSequence(seq_id, seq)
+        path = fields[1]
+        species_name_loc = path.index('s__')
+        species_name_loc+=3
+        s['species_name'] = path[species_name_loc:]
+        genus_name_loc = path.index('g__')
+        genus_name_loc+=3
+        s['genus_name'] = path[genus_name_loc:species_name_loc-4]
+        path = re.sub('[a-z]__', '', path)
+        s['path'] = path.replace(';', ',')
+        yield s
+def UNITEIterator_GENERAL(f):
+    fastaEntryIterator = genericEntryIteratorGenerator(startEntry='>')
+    for entry in fastaEntryIterator(f) :
+        all = entry.split('\n')
+        header = all[0]
+        fields = header.split('|')  
+        seq_id = fields[0][1:]
+        seq = all[1]
+        s = NucSequence(seq_id, seq)
+        s['species_name'] = seq_id.replace("_", " ")
+        path = fields[4]
+        path = re.sub('[a-z]__', '', path)
+        path = path.replace(';', ',')
+        s['path'] = path.replace(',,', ',')
+        yield s
+def SILVAIterator(f, tax):
+    fastaEntryIterator = genericEntryIteratorGenerator(startEntry='>')
+    for entry in fastaEntryIterator(f) :
+        all = entry.split('\n')
+        header = all[0]
+        fields = header.split(' | ')
+        id = fields[0][1:]
+        seq = all[1]
+        s = NucSequence(id, seq)
+        if (
+            '(' in fields[1] 
+            and len(fields[1].split('(')[1][:-1]) > 2 
+            and ')' not in fields[1].split('(')[1][:-1] 
+            and not numberInStr(fields[1].split('(')[1][:-1])
+            ) :
+            species_name = fields[1].split('(')[0][:-1]
+            other_name = fields[1].split('(')[1][:-1]
+            ancestor = None
+            notAnAncestor = False
+            if (len(other_name.split(' ')) == 1 and other_name[0].isupper()):
+                try:
+                    ancestor = tax.findTaxonByName(other_name)
+                except KeyError :
+                    notAnAncestor = True
+            if (ancestor == None and notAnAncestor == False):
+                s['common_name'] = other_name
+                s['original_silva_name'] = fields[1]
+                s['species_name'] = species_name
+            elif (ancestor != None and notAnAncestor == False) :
+                s['ancestor_name'] = other_name
+                s['ancestor'] = ancestor[0]
+                s['original_silva_name'] = fields[1]
+                s['species_name'] = species_name
+            elif notAnAncestor == True :
+                s['species_name'] = fields[1]
+        else :
+            s['species_name'] = fields[1]
+        yield s
+def dirtyLookForSimilarNames(name, tax, ancestor):
+    similar_name = ''
+    taxid = None
+    try :
+        t = tax.findTaxonByName(name)
+        taxid = t[0]
+        similar_name = t[3]
+    except KeyError :
+        taxid = None
+    if ancestor != None and not tax.isAncestor(ancestor, taxid) :
+        taxid = None
+    return similar_name, taxid
+def getGenusTaxid(tax, species_name, ancestor):
+    genus_sp = species_name.split(' ')
+    genus_taxid = getTaxid(tax, genus_sp[0], ancestor)
+    if tax.getRank(genus_taxid) != 'genus' :
+        raise KeyError()
+    return genus_taxid
+def getTaxid(tax, name, ancestor):
+    taxid = tax.findTaxonByName(name)[0][0]
+    if ancestor != None and not tax.isAncestor(ancestor, taxid) :
+        raise KeyError()
+    return taxid
+def get_species_name(s, options) :
+    species_name = None
+    if options.tagname == '' or options.tagname in s :
+        if options.tagname == '' :
+            species_name = s.id
+        else :
+            species_name = s[options.tagname]
+        if "_" in species_name :
+            species_name = species_name.replace('_', ' ')
+        if len(species_name.split(' ')) == 2 and (species_name.split(' ')[1] == 'sp' or species_name.split(' ')[1] == 'sp.' or species_name.split(' ')[1] == 'unknown') :
+            species_name = species_name.split(' ')[0]
+        if options.tagname == '' :
+            s['species_name'] = species_name
+    return species_name
+def getVaguelySimilarNames(species_name, tax, restricting_ancestor) :
+    kindOfFound = False              
+    uselessWords = ['sp', 'sp.', 'fungus', 'fungal', 'unknown', 'strain', 'associated', 'uncultured']
+    for word in species_name.split(' ') :
+        if word not in uselessWords :
+            similar_name, taxid = dirtyLookForSimilarNames(word, tax, restricting_ancestor)
+            if taxid != None :
+                if len(similar_name) > len(s['species_name']) or kindOfFound == False :
+                    s['species_name'] = similar_name
+                    kindOfFound = True
+    return kindOfFound
+def openFiles(options) :
+    if options.unidentified is not None:
+        options.unidentified=open(options.unidentified,'w')
+    if options.genus_found is not None:
+        options.genus_found=open(options.genus_found,'w')
+    if options.dirty is not None:
+        options.dirty = open(options.dirty, 'w')
+if __name__=='__main__':
+    optionParser = getOptionManager([addObiaddtaxidsOptions, addTaxonomyDBOptions], progdoc=__doc__)
+    (options,entries) = optionParser()
+    tax=loadTaxonomyDatabase(options)
+    if options.db_type == 'raw' :
+        entryIterator = fastaIterator
+        entries = entryIterator(entries)
+    elif options.db_type == 'UNITE_FULL' :
+        entryIterator = UNITEIterator_FULL
+        entries = entryIterator(entries)
+    elif options.db_type == 'UNITE_GENERAL' :
+        entryIterator = UNITEIterator_GENERAL
+        entries = entryIterator(entries)
+    elif options.db_type == 'SILVA' :
+        entryIterator = SILVAIterator
+        entries = entryIterator(entries, tax)
+        options.tagname = 'species_name'
+    openFiles(options)
+    if (options.db_type == 'raw') or (options.db_type == 'SILVA') :
+        if options.res_anc == '' :
+            restricting_ancestor = None
+        elif options.res_anc.isdigit() :
+            restricting_ancestor = int(options.res_anc)
+        for s in entries:
+            if options.res_anc != '' and not options.res_anc.isdigit():
+                restricting_ancestor = int(s[options.res_anc])
+            species_name = get_species_name(s, options)
+            if species_name != None :    
+                try:
+                    taxid = getTaxid(tax, species_name, restricting_ancestor)
+                    s['taxid'] = taxid
+                    print formatFasta(s)
+                except KeyError:
+                    genusFound = False
+                    if options.genus_found is not None and len(species_name.split(' ')) >= 2 :
+                        try:
+                            genusTaxid = getGenusTaxid(tax, species_name, restricting_ancestor)
+                            s['genus_taxid'] = genusTaxid
+                            print>>options.genus_found, formatFasta(s)
+                            genusFound = True
+                        except KeyError :
+                            pass
+                    kindOfFound = False
+                    if options.dirty is not None and not genusFound :
+                        kindOfFound = getVaguelySimilarNames(species_name, tax, restricting_ancestor)
+                        if kindOfFound == True :
+                            print>>options.dirty, formatFasta(s)
+                    if options.unidentified is not None and not genusFound and not kindOfFound :
+                        print>>options.unidentified,formatFasta(s)
+    elif ((options.db_type =='UNITE_FULL') or (options.db_type =='UNITE_GENERAL')) :
+        restricting_ancestor = tax.findTaxonByName('Fungi')[0][0]
+        for s in entries :
+            try :
+                species_name = s['species_name']
+                taxid = getTaxid(tax, species_name, restricting_ancestor)
+                s['taxid'] = taxid
+                s['rank'] = tax.getRank(taxid)
+                print formatFasta(s)
+            except KeyError:
+                genusFound = False
+                if options.genus_found is not None :
+                    try:
+                        genusTaxid = getGenusTaxid(tax, species_name, restricting_ancestor)
+                        s['genus_taxid'] = genusTaxid
+                        print>>options.genus_found, formatFasta(s)
+                        genusFound = True
+                    except KeyError:
+                        pass
+                if options.unidentified is not None and not genusFound :
+                    print>>options.unidentified,formatFasta(s)
diff --git a/src/obiannotate.py b/src/obiannotate.py
new file mode 100755
index 0000000..b04c129
--- /dev/null
+++ b/src/obiannotate.py
@@ -0,0 +1,85 @@
+:py:mod:`obiannotate`: adds/edits sequence record annotations
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`obiannotate` is the command that allows adding/modifying/removing 
+annotation attributes attached to sequence records.
+Once such attributes are added, they can be used by the other OBITools commands for 
+filtering purposes or for statistics computing.
+*Example 1:*
+    .. code-block:: bash
+        > obiannotate -S short:'len(sequence)<100' seq1.fasta > seq2.fasta
+    The above command adds an attribute named *short* which has a boolean value indicating whether the sequence length is less than 100bp.
+*Example 2:*
+    .. code-block:: bash
+        > obiannotate --seq-rank seq1.fasta | \\
+          obiannotate -C --set-identifier '"'FungA'_%05d" % seq_rank' \\
+          > seq2.fasta
+    The above command adds a new attribute whose value is the sequence record 
+    entry number in the file. Then it clears all the sequence record attributes 
+    and sets the identifier to a string beginning with *FungA_* followed by a 
+    suffix with 5 digits containing the sequence entry number.
+*Example 3:*
+    .. code-block:: bash
+        > obiannotate -d my_ecopcr_database \\
+          --with-taxon-at-rank=genus seq1.fasta > seq2.fasta
+    The above command adds taxonomic information at the *genus* rank to the 
+    sequence records. 
+*Example 4:*
+    .. code-block:: bash
+        > obiannotate -S 'new_seq:str(sequence).replace("a","t")' \\
+          seq1.fasta | obiannotate --set-sequence new_seq > seq2.fasta
+    The overall aim of the above command is to edit the *sequence* object itself, 
+    by replacing all nucleotides *a* by nucleotides *t*. First, a new attribute 
+    named *new_seq* is created, which contains the modified sequence, and then 
+    the former sequence is replaced by the modified one.
+from obitools.options import getOptionManager
+from obitools.options.bioseqfilter import addSequenceFilteringOptions
+from obitools.options.bioseqfilter import filterGenerator
+from obitools.options.bioseqedittag import addSequenceEditTagOptions
+from obitools.options.bioseqedittag import sequenceTaggerGenerator
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator
+if __name__=='__main__':
+    optionParser = getOptionManager([addSequenceFilteringOptions,
+                                     addSequenceEditTagOptions,
+                                     addInOutputOption], progdoc=__doc__)
+    (options, entries) = optionParser()
+    writer = sequenceWriterGenerator(options)
+    sequenceTagger = sequenceTaggerGenerator(options)
+    goodFasta = filterGenerator(options)
+    for seq in entries:
+        if goodFasta(seq):
+            sequenceTagger(seq)
+        writer(seq)
diff --git a/src/obiclean.py b/src/obiclean.py
new file mode 100644
index 0000000..36af2dd
--- /dev/null
+++ b/src/obiclean.py
@@ -0,0 +1,416 @@
+:py:mod:`obiclean`: tags a set of sequences for PCR/sequencing errors identification 
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`obiclean` is a command that classifies sequence records either as ``head``, ``internal`` or ``singleton``.
+For that purpose, two pieces of information are used:
+    - sequence record counts
+    - sequence similarities
+*S1* a sequence record is considered as a variant of *S2* another sequence record if and only if:
+    - ``count`` of *S1* divided by ``count`` of *S2* is lesser than the ratio *R*.
+      *R* default value is set to 1, and can be adjusted between 0 and 1 with the ``-r`` option.
+    - both sequences are *related* to one another (they can align with some differences, 
+      the maximum number of differences can be specified by the ``-d`` option).
+Considering *S* a sequence record, the following properties hold for *S* tagged as:
+    - ``head``: 
+       + there exists **at least one** sequence record in the dataset that is a variant of *S*
+       + there exists **no** sequence record in the dataset such that *S* is a variant of this 
+         sequence record
+    - ``internal``:
+       + there exists **at least one** sequence record in the dataset such that *S* is a variant
+         of this sequence record
+    - ``singleton``: 
+       + there exists **no** sequence record in the dataset that is a variant of *S*
+       + there exists **no** sequence record in the dataset such that *S* is a variant of this 
+         sequence record
+By default, tagging is done once for the whole dataset, but it can also be done sample by sample
+by specifying the ``-s`` option. In such a case, the counts are extracted from the sample 
+Finally, each sequence record is annotated with three new attributes ``head``, ``internal`` and 
+``singleton``. The attribute values are the numbers of samples in which the sequence record has 
+been classified in this manner.
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator
+from obitools.options import getOptionManager
+from obitools.graph import UndirectedGraph,Indexer
+from obitools.graph.dag import DAG
+from obitools.utils import progressBar
+from obitools.align import LCS
+from obitools.align import isLCSReachable
+import sys
+import math
+def addCleanOptions(optionManager):
+    optionManager.add_option('-d','--distance',
+                             action="store", dest="dist",
+                             metavar="###",
+                             type="int",
+                             default=1,
+                             help="Maximum numbers of errors between two variant sequences [default: 1]")
+    optionManager.add_option('-s','--sample',
+                             action="store", dest="sample",
+                             metavar="<TAGNAME>",
+                             type="str",
+                             default=None,
+                             help="Tag containing sample descriptions")
+    optionManager.add_option('-g','--graph',
+                             action="store", dest="graph",
+                             metavar="<TAGNAME>",
+                             type="str",
+                             default=None,
+                             help="File name where the clustering graphs are saved")
+    optionManager.add_option('-r','--ratio',
+                             action="store", dest="ratio",
+                             metavar="<FLOAT>",
+                             type="float",
+                             default="0.5",
+                             help="Minimum ratio between counts of two sequence records so that the less abundant "
+                                  "one can be considered as a variant of the more abundant "
+                                  "[default: 0.5]")
+    optionManager.add_option('-H','--head',
+                             action="store_true", dest="onlyhead",
+                             default=False,
+                             help="Outputs only head tagged sequence records")
+    optionManager.add_option('-C','--cluster',
+                             action="store_true", dest="clustermode",
+                             default=False,
+                             help="Set obiclean in clustering mode")
+def lookforFather(node,sample):
+    father=set()
+    for neighbour in node.neighbourIterator():
+        if sample in neighbour['_sample']:
+            if neighbour['_sample'][sample] > node['_sample'][sample]:
+                gdfather = lookforFather(neighbour, sample)
+                father|=gdfather
+    if not father:
+        father.add(node)
+    return father
+def cmpseqcount(s1,s2):
+    if 'count' not in s1:
+        s1['count']=1
+    if 'count' not in s2:
+        s2['count']=1
+    return cmp(s2['count'],s1['count'])
+if __name__ == '__main__':
+    optionParser = getOptionManager([addCleanOptions,addInOutputOption], progdoc=__doc__)
+    (options, entries) = optionParser()
+    if (options.onlyhead):
+        options.clustermode=True
+    globalIndex = Indexer()         # I keep correspondances for all graphs between 
+                                    # node id and sequence
+    db = []                         # sequences are stored in a list. The indexes in the list
+                                    # are corresponding to the node index in graphs
+    sampleOccurrences = []          # Contains the list of count distribution per samples
+                                    # The indexes in the list are corresponding to the node 
+                                    # index in graphs
+    graph = UndirectedGraph("error",indexer=globalIndex)
+    pcr= {}                         # For each sample store a set of node id occuring in this PCR
+    if options.graph is not None:
+        graphfile=open(options.graph,"w")
+    else:
+        graphfile=None
+    for s in entries:
+        nodeid = globalIndex.getIndex(s.id)
+        db.append(s)
+        if options.sample is None:
+            sample = {"XXX":s['count'] if 'count' in s else 1}
+        else:
+            sample = s[options.sample]
+        sampleOccurrences.append(sample)
+        graph.addNode(s.id,shape='circle')
+        for sp in sample:
+            spcr = pcr.get(sp,set())
+            spcr.add(nodeid)
+            pcr[sp]=spcr
+    writer = sequenceWriterGenerator(options)            
+    ldb = len(db)    
+    digit = int(math.ceil(math.log10(ldb)))
+    aligncount = ldb*(ldb+1)/2
+    edgecount = 0
+    print >>sys.stderr
+    header = "Alignment  : %%0%dd x %%0%dd -> %%0%dd " % (digit,digit,digit)
+    progressBar(1,aligncount,True,"Alignment  : %s x %s -> %s " % ('-'*digit,'-'*digit, '0'*digit))
+    pos=1
+    aligner = LCS()
+    #
+    # We build the global levenstein graph
+    # Two sequences are linked if their distances are below
+    # options.dist (usually 1)
+    #
+    for i in xrange(ldb):
+        aligner.seqA = db[i]
+        li = len(db[i])
+        for j in xrange(i+1,ldb):
+            progressBar(pos,aligncount,head=header % (i,j,edgecount))
+            pos+=1
+            lj = len(db[j])
+            lm = max(li,lj)
+            lcsmin = lm - options.dist
+            if isLCSReachable(db[i],db[j],lcsmin):
+                aligner.seqB=db[j]
+                ali = aligner()
+                llcs=ali.score
+                lali = len(ali[0])
+                obsdist = lali-llcs
+                if obsdist >= 1 and obsdist <= options.dist:
+                    graph.addEdge(index1=i, index2=j)
+                    edgecount+=1               
+    print >>sys.stderr
+    header = "Clustering sample  : %20s "
+    samplecount = len(pcr)
+    print >>sys.stderr,"Sample count : %d" % samplecount
+    progressBar(1,samplecount,True,head=header % "--")
+    isample=0
+    #
+    # We iterate through all PCR
+    #
+    for sample in pcr:
+        isample+=1
+        progressBar(isample,samplecount,head=header % sample)
+        seqids   = list(pcr[sample])
+        nnodes    = len(seqids)
+        #
+        # We build a sub DAG for each sample
+        #
+        sub = DAG(sample,indexer=globalIndex)
+        counts = []
+        for i in seqids:
+            c=sampleOccurrences[i][sample]
+            sub.addNode(index=i,count=c,oricount=c)
+            counts.append(c)
+        order  = map(None,counts,seqids)
+        order.sort(key=lambda a : a[0],reverse=True)
+        for j in xrange(nnodes - 1):
+            count1,index1 = order[j]
+            for k in xrange(j+1,nnodes):
+                count2,index2 = order[k]
+                r = float(count2)/float(count1)
+                if r <= options.ratio and graph.hasEdge(index1=index1,index2=index2):
+                    sub.addEdge(index1=index1, 
+                                index2=index2,
+                                ratio=r,
+                                arette = "%d -> %d" % (count1,count2))
+        if (options.clustermode):
+            # We transfer the weight of errors to the parent sequence
+            # when an error has several parents, we distribute its
+            # weight to each of its parent proportionally to the parent 
+            # weight. 
+            leaves = sub.getLeaves()
+            while leaves:
+                for l in leaves:
+                    l['color']='red'
+                    l['done']=True
+                    c = l['count']
+                    p = l.getParents()
+                    pc = [float(x['count']) for x in p]
+                    ps = sum(pc)
+                    pc = [x / ps * c for x in pc]
+                    for i in xrange(len(pc)):
+                        p[i]['count']+=int(round(pc[i]))
+                    leaves = [x for x in sub.nodeIterator(lambda n : 'done' not in n and not [y for y in n.neighbourIterator(lambda  k : 'done' not in k)])]
+            # Just clean the done tag set by the precedent loop
+            for x in sub.nodeIterator():
+                del x["done"]
+        # Annotate each sequences with its more probable parent.
+        # When a sequence has several potential parents, it is
+        # annotated with the heaviest one
+        heads = sub.getRoots()
+        sons  = []
+        for h in heads:
+            h['cluster']=h.label
+            if (options.clustermode):
+                h['head']   =True
+            sons.extend(h.neighbourIterator(lambda  k : 'cluster' not in k))
+            #
+            # Annotate the corresponding sequence
+            #
+            seq = db[h.index]
+            # sequence at least head in one PCR get the obiclean_head
+            # attribute
+            seq['obiclean_head']=True
+            if (options.clustermode):
+                # Store for each sample the cluster center related to
+                # this sequence 
+                if "obiclean_cluster" not in seq:
+                    seq['obiclean_cluster']={}
+                seq['obiclean_cluster'][sample]=h.label
+                # Store for each sample the count of this sequence plus 
+                # the count of all its related
+                if "obiclean_count" not in seq:
+                    seq["obiclean_count"]={}
+                seq["obiclean_count"][sample]=h['count']
+            if "obiclean_status" not in seq:
+                seq["obiclean_status"]={}
+            if len(h) > 0:
+                seq["obiclean_status"][sample]='h'
+            else:
+                seq["obiclean_status"][sample]='s'
+        heads=sons
+        sons  = []
+        while heads:
+            for h in heads:
+                parents = h.getParents()
+                maxp=None
+                for p in parents:
+                    if maxp is None or p['count']>maxp['count']:
+                        maxp=p
+                if 'cluster' in maxp:
+                    cluster = maxp['cluster']
+                    h['cluster']=cluster
+                    sons.extend(h.neighbourIterator(lambda  k : 'cluster' not in k))
+                    #
+                    # Annotate the corresponding sequence
+                    #
+                    seq = db[h.index]
+                    if (options.clustermode):
+                        if "obiclean_cluster" not in seq:
+                            seq['obiclean_cluster']={}
+                        seq['obiclean_cluster'][sample]=cluster
+                        if "obiclean_count" not in seq:
+                            seq["obiclean_count"]={}
+                        seq["obiclean_count"][sample]=h['count']
+                if "obiclean_status" not in seq:
+                    seq["obiclean_status"]={}
+                seq["obiclean_status"][sample]='i'
+            heads=sons
+            sons  = []
+        if graphfile is not None:
+            print >>graphfile,sub
+    print >>sys.stderr
+    seqcount = len(db)
+    sc=0
+    progressBar(1,seqcount,True,head="Writing sequences")
+    for node in db:
+        sc+=1
+        progressBar(sc,seqcount,head="Writing sequences")
+        if (not options.onlyhead or 'obiclean_head' in node):
+            status = node["obiclean_status"]
+            i=0
+            h=0
+            s=0
+            for sample in status:
+                st=status[sample]
+                if st=="i":
+                    i+=1
+                elif st=="s":
+                    s+=1
+                else:
+                    h+=1
+            node['obiclean_headcount']=h
+            node['obiclean_internalcount']=i
+            node['obiclean_singletoncount']=s
+            node['obiclean_samplecount']=s+i+h
+            if 'obiclean_head' not in node:
+                node['obiclean_head']=False
+#            if (not options.clustermode):
+#                del node["obiclean_status"]
+            writer(node)
+    print >>sys.stderr
diff --git a/src/obicomplement.py b/src/obicomplement.py
new file mode 100644
index 0000000..f4cb073
--- /dev/null
+++ b/src/obicomplement.py
@@ -0,0 +1,62 @@
+:py:mod:`obicomplement`: reverse-complements sequences
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`obicomplement` reverse-complements the sequence records.
+    .. TIP:: The identifiers of the sequence records are modified by appending
+             to them the ``_CMP`` suffix.
+    .. TIP:: a attribute with key ``complemented`` and value sets to ``True`` is added
+             on each reversed complemented sequence record.
+By using the selection option set, it is possible to reverse complement only a subset of the
+sequence records included in the input file. The selected sequence are reversed complemented,
+others are stored without modification 
+    *Example 1:* 
+    .. code-block:: bash
+       > obicomplement seq.fasta > seqRC.fasta
+    Reverses complements all sequence records from the ``seq.fasta`` file and stores the 
+    result to the ``seqRC.fasta`` file.
+    *Example 2:* 
+    .. code-block:: bash
+       > obicomplement -s 'A{10,}$' seq.fasta > seqRC.fasta
+    Reverses complements sequence records from the ``seq.fasta`` file only if they finish
+    by at least 10 ``A``. Others sequences are stored without modification.
+from obitools.options import getOptionManager
+from obitools.options.bioseqfilter import addSequenceFilteringOptions
+from obitools.options.bioseqfilter import filterGenerator
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator
+if __name__=='__main__':
+    optionParser = getOptionManager([addSequenceFilteringOptions,addInOutputOption], progdoc=__doc__)
+    (options, entries) = optionParser()
+    goodFasta = filterGenerator(options)
+    writer = sequenceWriterGenerator(options)
+    for seq in entries:
+        if goodFasta(seq):
+            writer(seq.complement())
+        else:
+            writer(seq)
\ No newline at end of file
diff --git a/src/obiconvert.py b/src/obiconvert.py
new file mode 100644
index 0000000..bbfb786
--- /dev/null
+++ b/src/obiconvert.py
@@ -0,0 +1,54 @@
+:py:mod:`obiconvert`: converts sequence files to different output formats
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`obiconvert` converts sequence files to different output formats.
+:doc:`See the documentation for more details on the different formats. <../formats>`
+Input files can be in :
+    - *fasta* format
+    - *extended OBITools fasta* format
+    - Sanger *fastq* format
+    - Solexa *fastq* format
+    - *ecoPCR* format
+    - *ecoPCR* database format
+    - *GenBank* format
+    - *EMBL* format
+:py:mod:`obiconvert` converts those files to the :
+    - *extended OBITools fasta* format
+    - Sanger *fastq* format
+    - *ecoPCR* database format
+If no file name is specified, data is read from standard input. 
+from obitools.options import getOptionManager
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator
+from obitools.ecopcr.options import addTaxonomyDBOptions
+from sys import stderr
+if __name__ == '__main__':
+    optionParser = getOptionManager([addInOutputOption,addTaxonomyDBOptions])
+    (options, entries) = optionParser()
+    writer = sequenceWriterGenerator(options)
+    for entry in entries:
+        if options.skiperror:
+            try:
+                writer(entry)
+            except:
+                print >>stderr,"Skip writing of sequence : %s" % entry.id
+        else:
+            writer(entry)
\ No newline at end of file
diff --git a/src/obicount.py b/src/obicount.py
new file mode 100644
index 0000000..bc431a5
--- /dev/null
+++ b/src/obicount.py
@@ -0,0 +1,59 @@
+:py:mod:`obicount`: counts the number of sequence records 
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`obicount` counts the number of sequence records and/or the sum of the ``count`` attributes.
+    .. code-block:: bash
+        > obicount seq.fasta  
+    Prints the number of sequence records contained in the ``seq.fasta`` 
+    file and the sum of their ``count`` attributes.
+from obitools.options import getOptionManager
+from obitools.format.options import addInputFormatOption
+def addCountOptions(optionManager):
+    group=optionManager.add_option_group('Obicount specific options')
+    group.add_option('-s','--sequence',
+                             action="store_true", dest="sequence",
+                             default=False,
+                             help="Prints only the number of sequence records."
+                             )
+    group.add_option('-a','--all',
+                             action="store_true", dest="all",
+                             default=False,
+                             help="Prints only the total count of sequence records (if a sequence has no `count` attribute, its default count is 1) (default: False)."
+                             )
+if __name__ == '__main__':
+    optionParser = getOptionManager([addCountOptions,addInputFormatOption], progdoc=__doc__)
+    (options, entries) = optionParser()
+    count1=0
+    count2=0
+    for s in entries:
+        count1+=1
+        if 'count' in s:
+            count2+=s['count']
+        else:
+            count2+=1
+    if options.all==options.sequence:
+        print count1,count2
+    elif options.all:
+        print count2
+    else:
+        print count1
\ No newline at end of file
diff --git a/src/obicut.py b/src/obicut.py
new file mode 100755
index 0000000..8d01734
--- /dev/null
+++ b/src/obicut.py
@@ -0,0 +1,53 @@
+:py:mod:`obicut`: trims sequences
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`obicut` is a command that trims sequence objects based on two integer 
+values: the ``-b`` option gives the first position of the sequence to be kept, 
+and the ``-e`` option gives the last position to be kept. Both values can be 
+computed using a python expression.
+  *Example:*
+    .. code-block:: bash
+          > obicut -b 50 -e seq_length seq1.fasta > seq2.fasta
+    Keeps only the sequence part from the fiftieth position to the end.
+  *Example:*
+    .. code-block:: bash
+          > obicut -b 50 -e seq_length-50 seq1.fasta > seq2.fasta
+    Trims the first and last 50 nucleotides of the sequence object.
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator
+from obitools.options import getOptionManager
+from obitools.options.bioseqfilter import addSequenceFilteringOptions, sequenceFilterIteratorGenerator
+from obitools.options.bioseqcutter import addSequenceCuttingOptions, cutterIteratorGenerator
+if __name__=='__main__':  # @UndefinedVariable
+    optionParser = getOptionManager([addSequenceCuttingOptions,
+                                     addSequenceFilteringOptions,
+                                     addInOutputOption],
+                                    progdoc=__doc__)  # @UndefinedVariable
+    (options, entries) = optionParser()
+    filter = sequenceFilterIteratorGenerator(options)
+    cutter = cutterIteratorGenerator(options)
+    writer = sequenceWriterGenerator(options)
+    for seq in cutter(filter(entries)):
+        writer(seq)
\ No newline at end of file
diff --git a/src/obidistribute.py b/src/obidistribute.py
new file mode 100644
index 0000000..2c31d56
--- /dev/null
+++ b/src/obidistribute.py
@@ -0,0 +1,140 @@
+:py:mod:`obidistribute`: Distributes sequence records over several sequence records files 
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`obidistribute` distributes equitably a set of sequence records over several files 
+(No sequence records are printed on standard output).
+The number of files is set using the ``-n`` option (required). File names are build with a prefix if
+provided (``-p``option) and the file number (1 to ``n``).
+    .. code-block:: bash
+        > obidistribute -n 10 -p 'part' seq.fastq
+    Distribute the sequence records contained in the ``seq.fastq`` 
+    file and distributes them over files ``part_1.fastq`` to ``part_10.fastq``.
+from obitools.options import getOptionManager
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator
+import math
+from obitools.fasta import formatFasta
+from obitools.fastq import formatFastq
+def addDistributeOptions(optionManager):
+    group = optionManager.add_option_group('obidistribute specific options')
+    group.add_option('-n','--number',
+                             action="store", dest="number",
+                             metavar="###",
+                             type="int",
+                             default=None,
+                             help="Number of files to distribute over")
+    group.add_option('-p','--prefix',
+                             action="store", dest="prefix",
+                             metavar="<PREFIX FILENAME>",
+                             type="string",
+                             default="",
+                             help="prefix added at each file name")
+class OutFiles:
+    def __init__(self,options):
+        self._tags = options.tagname
+        self._undefined = None
+        if options.undefined is not None:
+            self._undefined=open(options.undefined,'w')
+        self._prefix=options.prefix
+        self._files = {}
+        self._first=None
+        self._last=None
+        self._extension=options.outputFormat
+        self._digit = math.ceil(math.log10(options.number))
+    def __getitem__(self,key):
+        if key in self._files:
+            data = self._files[key]
+            prev,current,next = data
+            if next is not None:
+                if prev is not None:
+                    self._files[prev][2]=next
+                self._files[next][0]=prev
+                data[0]=self._last
+                data[2]=None
+                self._last=key
+        else:
+            name = key
+            if self._prefix is not None:
+                template = "%s_%%0%dd.%s" % (self._prefix,self._digit,self._extension)
+            else:
+                template = "%%0%dd.%s" % (self._digit,self._extension)
+            current = open(template % name,'a')
+            prev=self._last 
+            self._last=key
+            next=None
+            self._files[key]=[prev,current,next]
+            if len(self._files)>100:
+                oprev,old,onext=self._files[self._first]
+                del(self._files[self._first])
+                old.close()
+                self._first=onext
+            if self._first is None:
+                self._first=key
+        return current
+    def __call__(self,seq):
+        ok = reduce(lambda x,y: x and y, (z in seq for z in self._tags),True)
+        if ok:
+            k = "_".join([str(seq[x]) for x in self._tags])
+            file=self[k]
+        else:
+            file=self._undefined
+        if file is not None and self._extension=="fasta":
+            print >>file,formatFasta(seq)
+        else:
+            print >>file,formatFastq(seq)
+    def __del__(self):
+        k=self._files.keys()
+        for x in k:
+            del(self._files[x])
+if __name__=='__main__':
+    optionParser = getOptionManager([addDistributeOptions,addInOutputOption], progdoc=__doc__)
+    (options, entries) = optionParser()
+    assert options.number is not None, "You must specify the number of parts"
+    digit = math.ceil(math.log10(options.number))
+    out=[]
+    i = 0
+    for seq in entries:
+        if not out:
+            template = "%s_%%0%dd.%s" % (options.prefix,digit,options.outputFormat)
+            out=[sequenceWriterGenerator(options,
+                                         open(template % (i+1),"w"))
+                 for i in xrange(options.number)
+                ]
+        out[i](seq)
+        i+=1
+        i%=options.number
+    del out
diff --git a/src/obiextract.py b/src/obiextract.py
new file mode 100644
index 0000000..3908565
--- /dev/null
+++ b/src/obiextract.py
@@ -0,0 +1,81 @@
+:py:mod:`obiextract`: extract samples from a dataset 
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+The :py:mod:`obiextract` command extract a subset of samples from a complete
+Extracted sample names can be specified or by indicating their names using option
+on the command line or by indicating a file name containing a sample name per line
+The count attribute of the sequence and the slot describing distribution of the sample
+occurrences among samples are modified according to the selected samples.
+A sequence not present in at least one of the selected samples is not conserved in the 
+output of :py:mod:`obiextract`.
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator
+from obitools.options import getOptionManager
+def addExtractOptions(optionManager):
+    optionManager.add_option('-s','--sample',
+                             action="store", dest="sample",
+                             metavar="<TAGNAME>",
+                             type="str",
+                             default="merged_sample",
+                             help="Tag containing sample descriptions")
+    optionManager.add_option('-e','--extract',
+                             action="append",
+                             type="string",
+                             dest="sample_list",
+                             default=[],
+                             metavar="<SAMPLE_NAME>",
+                             help="which <SAMPLE_NAME> have to be extracted")
+    optionManager.add_option('-E','--extract-list',
+                             action="store", dest="sample_file",
+                             metavar="<FILENAME>",
+                             type="str",
+                             default=None,
+                             help="File name where a list of sample is stored")
+def selectSamples(entry,key,samples):
+    newsamples = {}
+    oldsamples = entry.get(key,{})
+    for k in samples:
+        if k in oldsamples:
+            newsamples[k]=oldsamples[k]
+    s = sum(newsamples.values())
+    if s > 0:
+        entry['count']=s 
+        entry[key]=newsamples
+        if len(newsamples)==1 and key[0:7]=='merged_':
+            entry[key[7:]]=newsamples.keys()[0]
+    else:
+        entry=None
+    return entry
+if __name__ == '__main__':
+    optionParser = getOptionManager([addExtractOptions,addInOutputOption],progdoc=__doc__)
+    (options, entries) = optionParser()
+    if options.sample_file is not None:
+        s = [x.strip() for x in open(options.sample_file)]
+        options.sample_list.extend(s)
+    writer = sequenceWriterGenerator(options)
+    for seq in entries:
+        seq = selectSamples(seq,options.sample,options.sample_list)
+        if seq is not None:
+            writer(seq)
diff --git a/src/obigrep.py b/src/obigrep.py
new file mode 100644
index 0000000..fe85380
--- /dev/null
+++ b/src/obigrep.py
@@ -0,0 +1,45 @@
+:py:mod:`obigrep`: filters sequence file 
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+The :py:mod:`obigrep` command is in some way analog to the standard Unix `grep`
+It selects a subset of sequence records from a sequence file. 
+A sequence record is a complex object composed of an identifier, 
+a set of attributes (``key=value``), a definition, and the sequence itself. 
+Instead of working text line by text line as the standard Unix tool, selection is 
+done sequence record by sequence record. 
+A large set of options allows refining selection on any of the sequence record 
+Moreover :py:mod:`obigrep` allows specifying simultaneously several conditions (that 
+take the value ``TRUE`` or ``FALSE``) and only the sequence records that fulfill all 
+the conditions (all conditions are ``TRUE``) are selected.
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator
+from obitools.options import getOptionManager
+from obitools.options.bioseqfilter import addSequenceFilteringOptions
+from obitools.options.bioseqfilter import sequenceFilterIteratorGenerator
+if __name__=='__main__':
+    optionParser = getOptionManager([addSequenceFilteringOptions,addInOutputOption],progdoc=__doc__)
+    (options, entries) = optionParser()
+    goodSeq   = sequenceFilterIteratorGenerator(options)
+    writer = sequenceWriterGenerator(options)
+    for seq in goodSeq(entries):
+        writer(seq)
diff --git a/src/obihead.py b/src/obihead.py
new file mode 100644
index 0000000..f9b2a22
--- /dev/null
+++ b/src/obihead.py
@@ -0,0 +1,57 @@
+:py:mod:`obihead`: extracts the first sequence records
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`obihead` command is in some way analog to the standard Unix `head` command.
+It selects the head of a sequence file. 
+But instead of working text line by text line as the standard Unix tool, 
+selection is done at the sequence record level. You can specify the number of sequence records 
+to select.
+  *Example:*
+    .. code-block:: bash
+         > obihead -n 150 seq1.fasta > seq2.fasta
+    Selects the 150 first sequence records from the ``seq1.fasta`` file and stores
+    them into the ``seq2.fasta`` file.
+import sys
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator
+from obitools.options import getOptionManager
+def addHeadOptions(optionManager):
+    optionManager.add_option('-n','--sequence-count',
+                             action="store", dest="count",
+                             metavar="###",
+                             type="int",
+                             default=10,
+                             help="Count of first sequences to print")
+if __name__ == '__main__':
+    optionParser = getOptionManager([addHeadOptions,addInOutputOption])
+    (options, entries) = optionParser()
+    i=0
+    writer = sequenceWriterGenerator(options)
+    for s in entries:
+        if i < options.count:
+            writer(s)
+            i+=1
+        else:
+            print >>sys.stderr,""
+            sys.exit(0)
diff --git a/src/obijoinpairedend.py b/src/obijoinpairedend.py
new file mode 100644
index 0000000..3d4f8f2
--- /dev/null
+++ b/src/obijoinpairedend.py
@@ -0,0 +1,134 @@
+:py:mod:`obijoinpairedend`: Joins paired-end reads
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`obijoinpairedend` aims at joining the two reads of a paired-end library.
+For this purpose, it concatenates sequence merging the forward read and the 
+reversed-complemented reverse read.
+The program uses as input one or two sequences reads files. 
+    - If two files are used one of them must be specified using the ``-r`` option. 
+      Sequence records corresponding to the same read pair must be in the same order 
+      in the two files.
+    - If just one file is provided, sequence records are supposed to be all of the same length.
+      The first half of the sequence is used as forward read, the second half is used as the reverse
+      read.
+    *Example:*
+    .. code-block:: bash
+       > obijoinpairedend -r seq3P.fastq seq5P.fastq > seq.fastq
+    The ``seq5P.fastq`` sequence file contains the forward sequence records.
+    The ``seq3P.fastq`` sequence file contains the reverse sequence records.
+    Pairs of reads are joined together and the resulting sequence is stored in the
+    `` seq.fastq`` file.
+from obitools.options import getOptionManager
+from itertools import chain
+from obitools import NucSequence
+from obitools.format.options import sequenceWriterGenerator, autoEntriesIterator,\
+    addInOutputOption
+from obitools.utils import universalOpen
+def addPairEndOptions(optionManager):
+    optionManager.add_option('-r','--reverse-reads',
+                             action="store", dest="reverse",
+                             metavar="<FILENAME>",
+                             type="string",
+                             default=None,
+                             help="Filename containing reverse solexa reads "
+                            )
+def cutDirectReverse(entries):
+    first = []
+    for i in xrange(10):
+        first.append(entries.next())
+    lens = [len(x) for x in first]
+    clen = {}
+    for i in lens:
+        clen[i]=clen.get(i,0)+1
+    freq = max(clen.values())
+    freq = [k for k in clen if clen[k]==freq]
+    assert len(freq)==1,"To many sequence length"
+    freq = freq[0]
+    assert freq % 2 == 0, ""
+    lread = freq/2
+    seqs = chain(first,entries)
+    for s in seqs:
+        d = s[0:lread]
+        r = s[lread:]
+        yield(d,r)
+def seqPairs(direct,reverse):
+    for d in direct:
+        r = reverse.next()
+        yield(d,r)
+def buildJoinedSequence(sequences,options):
+    for d,r in sequences:
+        r=r.complement()
+        s = str(d) + str(r)
+        seq = NucSequence(d.id + '_PairEnd',s,d.definition,**d)
+        withqual = hasattr(d, 'quality') or hasattr(r, 'quality')
+        if withqual:
+            if hasattr(d, 'quality'):
+                quality = d.quality
+            else:
+                quality = [10**-4] * len(d)
+            if hasattr(r, 'quality'):
+                quality.extend(r.quality)
+            else:
+                quality.extend([10**-4] * len(r))
+            seq.quality=quality
+            seq['pairend_limit']=len(d)
+        yield seq
+if __name__ == '__main__':
+    optionParser = getOptionManager([addPairEndOptions,addInOutputOption])
+    (options, direct) = optionParser()
+    if options.reverse is None:
+        sequences=cutDirectReverse(direct)
+    else:
+        reader = autoEntriesIterator(options)
+        reverse = reader(universalOpen(options.reverse))
+        sequences=seqPairs(direct,reverse)
+    writer = sequenceWriterGenerator(options)
+    for seq in buildJoinedSequence(sequences,options):
+        writer(seq)
diff --git a/src/obipr2.py b/src/obipr2.py
new file mode 100644
index 0000000..ac8e9d8
--- /dev/null
+++ b/src/obipr2.py
@@ -0,0 +1,302 @@
+:py:mod:`obipr2`: converts silva database into an ecoPCR database
+:py:mod:`obipr2`: converts and optionally download the `PR2 database <http://ssu-rrna.org/pr2/>`_
+into an ecoPCR database. The formated database include the taxonomy as defined by the PR2 authors.
+.. warning::
+    Take care that the numeric taxids associated to the sequences are specific 
+    to this **PR2** database and not compatible with the NCBI taxids. 
+    The taxids present in a version of the **PR2** database are are just valid for 
+    this version of the database and not compatible with the taxids used in another version
+    downloaded at an other time.
+    .. code-block:: bash
+           > obipr2 
+   This command downloads and formats the latest version of the PR2 database from
+   the official `PR2 web site<http://ssu-rrna.org/pr2/>`_.
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+from obitools.options import getOptionManager
+from obitools.ecopcr.taxonomy import Taxonomy
+from obitools.fasta import fastaIterator
+import sys
+from obitools.utils import universalOpen, ColumnFile
+import re
+import urllib2
+from obitools.ecopcr.sequence import EcoPCRDBSequenceWriter
+from obitools.utils import progressBar
+from os.path import isfile, join
+from os import listdir
+def numberInStr(s) :
+    containsNumber = False
+    for c in s :
+        if c.isdigit() :
+            containsNumber = True
+    return containsNumber
+def silvaOptions(optionManager):
+    optionManager.add_option('--localdb',
+                             action="store", dest="local",
+                             type='str',
+                             default=None,
+                             help="Local copy of the files located in the specified directory "
+                             "will be used instead of those present on the PR2 web site")
+    optionManager.add_option('-m','--min-taxid',
+                             action="store", dest="taxashift",
+                             type="int",
+                             metavar="####",
+                             default=10000000,
+                             help="minimal taxid for the species taxid")
+baseurl="%s/pr2/download/entire_database" % siteurl
+def getHyperlink(url):
+    furl = urllib2.urlopen(url)
+    data = "".join([l.strip() for l in furl])
+    href = re.compile('<a .*?</a>',re.IGNORECASE)
+    target=re.compile('href="(.*?)"',re.IGNORECASE)
+    filename=re.compile(">(.*?)</a>",re.IGNORECASE)
+    hrefs = href.findall(data)
+    links = {}
+    for h in hrefs:
+        t = target.search(h).group(1) 
+        f = filename.search(h).group(1)
+        links[f]=t 
+    return links
+def pr2URLS(options):
+    global baseurl
+    if options.local is not None:
+        archive = dict((f,f) for f in listdir(options.local) if isfile(join(options.local,f)))
+        baseurl=options.local
+    else:
+        archive=getHyperlink(baseurl)
+    pr2file = [x.strip() for x in archive.keys() 
+                 if x.strip().endswith('pr2.fasta.gz') or x.strip().endswith('pr2.fasta')
+                ]
+    version_pattern = re.compile("^gb([0-9]*)", re.IGNORECASE)
+    versions = [int(version_pattern.search(x.strip()).group(1)) for x in pr2file]
+    latest = max(versions)
+    seqfile=pr2file[versions.index(latest)]
+    pr2txfile = [x for x in archive.keys() 
+                 if x.endswith('pr2.tlf.gz') or x.endswith('pr2.tlf')
+                ]
+    versions = [int(version_pattern.search(x).group(1)) for x in pr2txfile]
+    print versions
+    taxfile = pr2txfile[versions.index(latest)]
+    try:
+        sequrl = archive[seqfile]
+    except KeyError:
+        if seqfile[-3:]=='.gz':
+            seqfile=seqfile[0:-3]
+        else:
+            seqfile=seqfile+'.gz'
+        sequrl = archive[seqfile]
+    try:
+        taxurl = archive[taxfile]
+    except KeyError:
+        if taxfile[-3:]=='.gz':
+            taxfile=taxfile[0:-3]
+        else:
+            taxfile=taxfile+'.gz'
+        taxurl = archive[taxfile]
+    output = "pr2_gb%d" % latest
+    return "%s/%s" %(baseurl,sequrl),"%s/%s" %(baseurl,taxurl),output
+pathElementPattern = re.compile("^ *(.*?) {(.*?)} *$", re.IGNORECASE)
+def pr2PathParser(path):
+    x = pathElementPattern.match(path)
+    rank = x.group(1)
+    if rank=='classe':
+        rank='class'
+    elif rank=='ordre':
+        rank='order'
+    elif rank=='famille':
+        rank='family'
+    elif rank=='genre':
+        rank='genus'
+    elif rank=='espece':
+        rank='species'
+    elif rank.strip()=="":
+        rank="no rank"
+    return rank,x.group(2)
+class Pr2Dump(Taxonomy):  
+    def __init__(self,taxdump=None):
+        self._path=taxdump
+        self._readNodeTable(taxdump)
+        Taxonomy.__init__(self)
+    def _taxonCmp(t1,t2):
+        if t1[0] < t2[0]:
+            return -1
+        elif t1[0] > t2[0]:
+            return +1
+        return 0
+    _taxonCmp=staticmethod(_taxonCmp)
+    def _readNodeTable(self,dumpfile):
+        nodes = ColumnFile(dumpfile, 
+                           sep='\t', 
+                           types=(str,pr2PathParser))
+        print >>sys.stderr,"Reading taxonomy dump file..."
+            # (taxid,rank,parent)
+        nexttaxid = 2  
+        taxidx={'root':1}         
+        actaxid={} 
+        taxonomy=[[1,'root',1,'root','pr2']]
+        for node in nodes:
+            ac = node[0]
+            path = [('root','root')] + node[2:]
+            allpath = [[]]
+            for s in path:
+                allpath.append(allpath[-1]+[s[1]])
+            allpath.pop(0)
+            allpath=[";".join(x) for x in allpath]
+            i=0
+            for p in allpath:
+                try:
+                    taxid = taxidx[p]
+                except KeyError:
+                    taxid=nexttaxid
+                    taxidx[p]=taxid
+                    nexttaxid+=1
+                    parent=p.rsplit(";",1)[0]
+                    ptaxid=taxidx[parent]
+                    rank = path[i][0]
+                    name = path[i][1]
+                    taxonomy.append([taxid,rank,ptaxid,name,'pr2'])
+                i+=1
+            actaxid[ac]=taxid
+        print >>sys.stderr,"List all taxonomy rank..."    
+        ranks =list(set(x[1] for x in taxonomy)) 
+        ranks.sort()
+        print >>sys.stderr,ranks
+        rankidx = dict(map(None,ranks,xrange(len(ranks))))
+        self._taxonomy=taxonomy
+        self._localtaxon=len(taxonomy)
+        print >>sys.stderr,"Indexing taxonomy..."
+        index = {}
+        for i in xrange(self._localtaxon):
+            index[self._taxonomy[i][0]]=i
+        print >>sys.stderr,"Indexing parent and rank..."
+        for t in self._taxonomy:
+            t[1]=rankidx[t[1]]
+            t[2]=index[t[2]]
+        self._ranks=ranks
+        self._index=index 
+        self._preferedName = []
+        self._name=[(n[3],'scientific name',self._index[n[0]]) for n in taxonomy]    
+        self.pr2ac=actaxid
+def pr22obi(seq,taxonomy):
+    try:
+        # parent = taxonomy.findTaxonByTaxid(taxonomy.silvaname[ancestor])
+        oriid=seq.id 
+        seq.id,seq.definition=oriid.split("|",1)
+        taxid=taxonomy.pr2ac[seq.id]
+        seq['taxid']=taxid
+    except KeyError:
+        pass
+    return seq            
+if __name__ == '__main__':
+    optionParser = getOptionManager([silvaOptions])
+    (options, entries) = optionParser()
+    sequrl,taxurl,options.ecopcroutput = pr2URLS(options)
+    taxonomydata = universalOpen(taxurl)
+    options.taxonomy = Pr2Dump(taxonomydata)
+#     if options.write != '' :
+#         options.write = open(options.write, 'w')
+    entries = fastaIterator(universalOpen(sequrl))
+    writer  = EcoPCRDBSequenceWriter(options)
+    nseq = len(options.taxonomy.pr2ac)
+    progressBar(1,nseq,
+                head=options.ecopcroutput)
+    done=0
+    for e in entries:
+        e = pr22obi(e, options.taxonomy)
+        done+=1
+        progressBar(done,nseq,
+                    head=options.ecopcroutput)
+        if 'taxid' in e:
+            writer.put(e)
+        else:
+            print >>sys.stderr,"\nCannot find taxon for entry : %s : %s" % (e.id,e.definition)
+    print >>sys.stderr
\ No newline at end of file
diff --git a/src/obisample.py b/src/obisample.py
new file mode 100644
index 0000000..7fb4654
--- /dev/null
+++ b/src/obisample.py
@@ -0,0 +1,119 @@
+:py:mod:`obisample`: randomly resamples sequence records
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`obisample` randomly resamples sequence records with or without replacement.
+from obitools.options import getOptionManager
+from obitools.sample import weigthedSample, weigthedSampleWithoutReplacement
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator
+import random
+def addSampleOptions(optionManager):
+    optionManager.add_option('-s','--sample-size',
+                             action="store", dest="size",
+                             metavar="###",
+                             type="float",
+                             default=None,
+                             help="Size of the generated sample. "
+                                  "If -a option is set, size is expressed as fraction"
+                             )
+    optionManager.add_option('-a','--approx-sampling',
+                             action="store_true", dest="approx",
+                             default=False,
+                             help="Switch to an approximative algorithm, "
+                                  "useful for large files"
+                             )
+    optionManager.add_option('-w','--without-replacement',
+                             action="store_true", dest="woreplace",
+                             default=False,
+                             help="Ask for sampling without replacement"
+                            )
+def rbinom(n,p):
+    return sum((random.random() < p) for x in xrange(n))
+if __name__ == '__main__':
+    optionParser = getOptionManager([addSampleOptions,addInOutputOption]
+                                    )
+    (options, entries) = optionParser()
+    if not options.approx:
+        db = [s for s in entries]
+        if options.size is None:
+            options.size=len(db)
+        else:
+            options.size=int(options.size)
+        distribution = {}
+        idx=0
+        total = 0
+        for s in db:
+            count = s['count']
+            total+=count
+            distribution[idx]=count
+            idx+=1
+        if options.woreplace:
+            assert options.size <= total
+            sp = weigthedSampleWithoutReplacement
+        else:
+            sp= weigthedSample
+        sample =sp(distribution, options.size)
+    else:
+        db = []
+        distribution = {}
+        idx = 0
+        total = 0
+        assert options.size is not None, \
+            "You cannot specify option -a without option -s"
+        assert options.size>=0 and options.size <=1, \
+            "When used with -a options -s must be a probability"
+        p = options.size * 1.5
+        if p > 1.:
+            p = 1.
+        for seq in entries:
+            count = seq['count']
+            total+=count
+            n = rbinom(count, p)
+            if n > 0:
+                db.append(seq)
+                distribution[idx]=n
+                idx+=1
+        size = int(total * options.size)  
+        sample=weigthedSampleWithoutReplacement(distribution, size)
+    writer = sequenceWriterGenerator(options)
+    for idx in sample:
+        seq = db[idx]
+        seq['count']=sample[idx]
+        writer(seq)
diff --git a/src/obiselect.py b/src/obiselect.py
new file mode 100644
index 0000000..0e05e82
--- /dev/null
+++ b/src/obiselect.py
@@ -0,0 +1,281 @@
+:py:mod:`obiselect` : selects representative sequence records
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`obiselect` command allows to select a subset of sequences records from a sequence
+file by describing sequence record groups and defining how many and which sequence records
+from each group must be retrieved.
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator
+from obitools.options import getOptionManager
+from obitools.ecopcr.options import addTaxonomyDBOptions, loadTaxonomyDatabase
+from random import random
+from obitools.utils import progressBar
+import math
+import sys
+from obitools.utils.bioseq import mergeTaxonomyClassification
+def minimum(seqs):
+    return min(s['select'] for s in seqs)
+def maximum(seqs):
+    try:
+        return max(s['select'] for s in seqs)
+    except TypeError, e:
+        print >>sys.stderr, seqs
+        raise e
+def mean(seqs):
+    ss= reduce(lambda x,y: x + y,(s['select'] for s in seqs),0)
+    return float(ss) / len(seqs)
+def median(seqs):
+    ss = [s['select'] for s in seqs]
+    ss.sort()
+    return ss[len(ss)/2]
+def addSelectOptions(optionManager):
+    group = optionManager.add_option_group('obiselect specific options')
+    group.add_option('-c','--category-attribute',
+                             action="append", dest="categories",
+                             metavar="<Attribute Name>",
+                             default=[],
+                             help="Add one attribute to the list of"
+                                  " attribute used for categorizing sequence records")
+    group.add_option('-n','--number',
+                             action="store", dest="number",
+                             metavar="",
+                             type="int",
+                             default=1,
+                             help="number of sequence records to keep in each category")
+    group.add_option('-f','--function',
+                             action="store", dest="function",
+                             metavar="",
+                             default="random",
+                             help="python code evaluated for each sequence record [default: random value]")
+    group.add_option('-m','--min',
+                             action="store_const", dest="method",
+                             metavar="",
+                             default=maximum,
+                             const=minimum,
+                             help="select sequence record in each group minimizing the function"
+                                  " (exclusive with -M, -a, --median)")
+    group.add_option('-M','--max',
+                             action="store_const", dest="method",
+                             metavar="",
+                             default=maximum,
+                             const=maximum,
+                             help="select sequence record in each group maximizing the function"
+                                  " (exclusive with -m, -a, --median)")
+    group.add_option('-a','--mean',
+                             action="store_const", dest="method",
+                             metavar="",
+                             default=maximum,
+                             const=mean,
+                             help="select sequence record in each group closest to the mean of the function"
+                                  " (exclusive with -m, -M, --median)")
+    group.add_option('--median',
+                             action="store_const", dest="method",
+                             metavar="<Attribute Name>",
+                             default=maximum,
+                             const=median,
+                             help="select sequence record in each group closest to the median of the function"
+                                  " (exclusive with -m, -M, -a)")
+    group.add_option('--merge',
+                             action="append", dest="merge",
+                             metavar="<TAG NAME>",
+                             type="string",
+                             default=[],
+                             help="attributes to merge within each group")
+    group.add_option('-s','--sample',
+                             action="store", dest="sample",
+                             metavar="<TAGNAME>",
+                             type="str",
+                             default=None,
+                             help="Tag containing sample descriptions, the default value is set to *merged_sample*")
+    group.add_option('--merge-ids',
+                             action="store_true", dest="mergeids",
+                             default=False,
+                             help="add the merged id data to output")
+def sortclass(seqs,options):
+    cible = float(options.method(seqs))
+    for s in seqs:
+        s['distance']=math.sqrt((float(s['select'])-cible)**2)
+    seqs.sort(lambda s1,s2 : cmp(s1['distance'],s2['distance']))
+if __name__ == '__main__':
+    optionParser = getOptionManager([addSelectOptions,addInOutputOption,addTaxonomyDBOptions])
+    (options, entries) = optionParser()
+    taxonomy=loadTaxonomyDatabase(options)
+    writer = sequenceWriterGenerator(options)
+    classes = {}
+    print >>sys.stderr,"\nLoading sequences...\n"
+    with_taxonomy=hasattr(options, 'taxonomy') and options.taxonomy is not None
+    nbseq=0
+    for s in entries:
+        nbseq+=1
+        category = []
+        if with_taxonomy:
+            environ = {'taxonomy' : options.taxonomy,'sequence':s,'random':random()}
+        else:
+            environ = {'sequence':s,'random':random()}
+        for c in options.categories:
+            try:
+                v = eval(c,environ,s)
+                category.append(v)
+            except:
+                category.append(None)
+        category=tuple(category)
+        group = classes.get(category,[])
+        group.append(s)
+        classes[category]= group
+        try:    
+            select =  eval(options.function,environ,s)
+            s['select']=select
+        except:
+            s['select']=None
+    mergedKey = options.merge
+    mergeIds = options.mergeids 
+    if mergedKey is not None:
+        mergedKey=set(mergedKey)
+    else:
+        mergedKey=set() 
+    if taxonomy is not None:
+        mergedKey.add('taxid')
+    print >>sys.stderr,"\nSelecting sequences...\n"
+    lclasses=len(classes)
+    progressBar(1,lclasses,True,'Selecting')
+    i=0
+    for c in classes:
+        i+=1
+        progressBar(i,lclasses,False,"%15s" % ("/".join(map(str,c)),))
+        seqs = classes[c]
+        if options.sample is not None:
+            subsets = {}
+            for s in seqs:
+                for sid in s[options.sample]:
+                    ss = subsets.get(sid,[])
+                    ss.append(s)
+                    subsets[sid]=ss
+        else:
+            subsets={"all":seqs}
+        for seqs in subsets.values():
+            sortclass(seqs, options)
+            if len(c)==1:
+                c=c[0]
+            if options.number==1 and options.sample is None:
+                s = seqs[0]
+                for key in mergedKey:
+                    if key=='taxid' and mergeIds:
+                        if 'taxid_dist' not in s:
+                            s["taxid_dist"]={}
+                        if 'taxid' in s:
+                            s["taxid_dist"][s.id]=s['taxid']
+                    mkey = "merged_%s" % key 
+                    if mkey not in s:
+                        if key in s:
+                            s[mkey]={s[key]:1}
+                        else:
+                            s[mkey]={}
+                if 'count' not in s:
+                    s['count']=1
+                if mergeIds:        
+                    s['merged']=[s.id]
+                for seq in seqs[1:]:
+                    if 'count' in seq:
+                        s['count']+=seq['count']
+                    else:
+                        s['count']+=1
+                    for key in mergedKey:
+                        if key=='taxid' and mergeIds:
+                            if 'taxid_dist' in seq:
+                                s["taxid_dist"].update(seq["taxid_dist"])
+                            if 'taxid' in seq:
+                                s["taxid_dist"][seq.id]=seq['taxid']
+                        mkey = "merged_%s" % key 
+                        if mkey in seq:
+                            m = seq[mkey]
+                        else:
+                            if key in seq:
+                                m={seq[key]:1}
+                        allmkey = set(m.keys()) | set(s[mkey].keys())
+                        s[mkey] = dict((k,m.get(k,0)+s[mkey].get(k,0)) for k in allmkey)
+                    if mergeIds:        
+                        s['merged'].append(seq.id)
+                if taxonomy is not None:
+                    mergeTaxonomyClassification(seqs, taxonomy)
+            for s in seqs[0:options.number]:
+                s['class']=c
+                s['__ at TOWRITE@__']=True
+    print >>sys.stderr,"\Writing sequences...\n"
+    progressBar(1,nbseq,True,'Writing')
+    i=0
+    for c in classes:
+        seqs = classes[c]
+        for s in seqs:
+            i+=1
+            progressBar(i,nbseq,False,"Writing")
+            if '__ at TOWRITE@__' in s:
+                del s['__ at TOWRITE@__']
+                del s['select']
+                writer(s)
+    print >>sys.stderr
diff --git a/src/obisilva.py b/src/obisilva.py
new file mode 100644
index 0000000..9816815
--- /dev/null
+++ b/src/obisilva.py
@@ -0,0 +1,355 @@
+:py:mod:`obisilva`: converts silva database into an ecoPCR database
+:py:mod:`obisilva`: converts and optionally download the `Silva database <http://www.arb-silva.de>`_
+into an ecoPCR database. The formated database include the taxonomy as defined by the Silva authors.
+.. warning::
+    Take care that the numeric taxids associated to the sequences are specific 
+    to this Silva database and not compatible with the NCBI taxids. 
+    The taxids present in a version of the Silva database (*i.e* ssu, lsu, parc, ref...)
+    are are just valid for this version of the database and not compatible 
+    with the taxids used in another version.
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+from obitools.options import getOptionManager
+from obitools.ecopcr.taxonomy import ecoTaxonomyWriter, Taxonomy
+from obitools.fasta import fastaIterator
+import sys
+from obitools.utils import universalOpen, ColumnFile
+import re
+import urllib2
+from obitools import NucSequence
+from obitools.ecopcr.sequence import EcoPCRDBSequenceWriter
+from obitools.utils import progressBar
+from os.path import isfile, join
+from os import listdir
+def numberInStr(s) :
+    containsNumber = False
+    for c in s :
+        if c.isdigit() :
+            containsNumber = True
+    return containsNumber
+def silvaOptions(optionManager):
+    optionManager.add_option('-s','--ssu',
+                             action="store_const", dest="rrna",
+                             metavar="<taxon_name>:rank:parent",
+                             const = "ssu",
+                             default=None,
+                             help="specify that you are interested in the SSU database")
+    optionManager.add_option('-l','--lsu',
+                             action="store_const", dest="rrna",
+                             metavar="<taxon_name>:rank:parent",
+                             const = "lsu",
+                             default=None,
+                             help="specify that you are interested in the LSU database")
+    optionManager.add_option('-p','--parc',
+                             action="store_const", dest="type",
+                             metavar="<taxon_name>:rank:parent",
+                             const = "parc",
+                             default=None,
+                             help="specify that you are interested in the parc version of the database")
+    optionManager.add_option('-r','--ref',
+                             action="store_const", dest="type",
+                             metavar="<taxon_name>:rank:parent",
+                             const = "ref",
+                             default=None,
+                             help="specify that you are interested in the reference version of the database")
+    optionManager.add_option('-n','--nr',
+                             action="store_true", dest="nr",
+                             default=False,
+                             help="specify that you are interested in the non redundant version of the database")
+    optionManager.add_option('-t','--trunc',
+                             action="store_true", dest="trunc",
+                             default=False,
+                             help="specify that you are interested in the truncated version of database")
+    optionManager.add_option('--localdb',
+                             action="store", dest="local",
+                             type='str',
+                             default=None,
+                             help="Local copy of the files located in the specified directory "
+                             "will be used instead of those present on the ARB-Silva web site")
+    optionManager.add_option('-m','--min-taxid',
+                             action="store", dest="taxashift",
+                             type="int",
+                             metavar="####",
+                             default=10000000,
+                             help="minimal taxid for the species taxid")
+baseurl="%sno_cache/download/archive/current/Exports" % siteurl
+# (options.rrna,options.type,options.trunc,options.nr)
+seqfilepattern={('lsu','parc',False,False) : "SILVA_%s_LSUParc_tax_silva.fasta.gz",
+                ('lsu','parc',False,True ) : None,
+                ('lsu','parc',True ,False) : "SILVA_%s_LSUParc_tax_silva_trunc.fasta.gz",
+                ('lsu','parc',True ,True ) : None,
+                ('lsu','ref' ,False,False) : "SILVA_%s_LSURef_tax_silva.fasta.gz",
+                ('lsu','ref' ,False,True ) : None,
+                ('lsu','ref' ,True ,False) : "SILVA_%s_LSURef_tax_silva_trunc.fasta.gz",
+                ('lsu','ref' ,True ,True ) : None,
+                ('ssu','parc',False,False) : "SILVA_%s_SSUParc_tax_silva.fasta.gz",
+                ('ssu','parc',False,True ) : None,
+                ('ssu','parc',True ,False) : "SILVA_%s_SSUParc_tax_silva_trunc.fasta.gz",
+                ('ssu','parc',True ,True ) : None,
+                ('ssu','ref' ,False,False) : "SILVA_%s_SSURef_tax_silva.fasta.gz",
+                ('ssu','ref' ,False,True ) : "SILVA_%s_SSURef_Nr99_tax_silva.fasta.gz",
+                ('ssu','ref' ,True ,False) : "SILVA_%s_SSURef_tax_silva_trunc.fasta.gz",
+                ('ssu','ref' ,True ,True ) : "SILVA_%s_SSURef_Nr99_tax_silva_trunc.fasta.gz"
+               }
+# (options.rrna,options.nr)
+taxfilepattern={'lsu' : "tax_slv_lsu_%s.txt",
+                'ssu' : "tax_slv_ssu_%s.txt"
+               }
+def getHyperlink(url):
+    furl = urllib2.urlopen(url)
+    data = "".join([l.strip() for l in furl])
+    href = re.compile('<a .*?</a>',re.IGNORECASE)
+    target=re.compile('href="(.*?)"',re.IGNORECASE)
+    filename=re.compile(">(.*?)</a>",re.IGNORECASE)
+    hrefs = href.findall(data)
+    links = {}
+    for h in hrefs:
+        t = target.search(h).group(1) 
+        f = filename.search(h).group(1)
+        links[f]=t 
+    return links
+def silvaURLS(options):
+    global siteurl
+    if options.local is not None:
+        archive = dict((f,f) for f in listdir(options.local) if isfile(join(options.local,f)))
+        taxonomy= dict((f,"taxonomy/"+f) for f in listdir(options.local+'/taxonomy') if isfile(join(options.local+'/taxonomy',f)))
+        siteurl=options.local
+    else:
+        archive=getHyperlink(baseurl)
+        taxonomy=getHyperlink(baseurl+"/taxonomy")
+    silvafile = [x for x in archive.keys() 
+                 if x.startswith('SILVA') and (x.endswith('fasta.gz') or x.endswith('fasta'))
+                ]
+    versions = [tuple(map(int, x.split('_')[1].split('.'))) for x in silvafile]    
+    versions.sort(reverse=True)
+    version='.'.join(map(str,versions[0]))
+    #if all(x==versions[0] for x in versions):
+    #    version = int(versions[0])
+    #else:
+    #    raise AssertionError("Unable to identify the database version")
+    whichfile = (options.rrna,options.type,options.trunc,options.nr)
+    seqfile = seqfilepattern[whichfile]
+    if seqfile is None:
+        raise AssertionError("Non existing version of Silva")
+    seqfile = seqfile % version
+    taxfile = taxfilepattern[options.rrna] % version
+    try:
+        sequrl = archive[seqfile]
+    except KeyError:
+        if seqfile[-3:]=='.gz':
+            seqfile=seqfile[0:-3]
+        else:
+            seqfile=seqfile+'.gz'
+        sequrl = archive[seqfile]
+    try:
+        taxurl = taxonomy[taxfile]
+    except KeyError:
+        if taxfile[-3:]=='.gz':
+            taxfile=taxfile[0:-3]
+        else:
+            taxfile=taxfile+'.gz'
+        taxurl = taxonomy[taxfile]
+    output = "silva_%s_%s%s_%s%s" % (version,options.rrna,options.type,
+                                     {True:"nr_" ,   False:""}[options.nr],
+                                     {True:"trunc" , False:"full"}[options.trunc]
+                                    )
+    return "%s/%s" %(siteurl,sequrl),"%s/%s" %(siteurl,taxurl),output
+def silvaPathParser(path):
+    x = path.strip().rsplit(";",2)[0:2]
+    if x[1]=="":
+        x[1]=x[0]
+        x[0]="root"
+    return x
+class SilvaDump(Taxonomy):  
+    def __init__(self,taxdump=None):
+        self._path=taxdump
+        self._readNodeTable(taxdump)
+        print >>sys.stderr,"Adding scientific name..."
+#         self._nameidx = {}
+#         for x in self._name :
+#             if x[0] not in self._nameidx :
+#                 self._nameidx[x[0]] = [x[2]]
+#             else :
+#                 self._nameidx[x[0]].append(x[2])
+        # self._bigestTaxid = max(x[0] for x in self._taxonomy)
+        Taxonomy.__init__(self)
+    def _taxonCmp(t1,t2):
+        if t1[0] < t2[0]:
+            return -1
+        elif t1[0] > t2[0]:
+            return +1
+        return 0
+    _taxonCmp=staticmethod(_taxonCmp)
+    def _readNodeTable(self,dumpfile):
+        nodes = ColumnFile(dumpfile, 
+                           sep='\t', 
+                           types=(str,int,str,str,int))
+        print >>sys.stderr,"Reading taxonomy dump file..."
+            # (taxid,rank,parent)
+        taxonomy=[[n[1],n[2],n[0]] for n in nodes]
+        taxonomy.append([1,'root','root;'])
+        print >>sys.stderr,"Sorting taxons..."
+        taxonomy.sort(SilvaDump._taxonCmp)
+        print >>sys.stderr,"Assigning parent taxids..."
+        taxidx=dict((n[2][0:-1],n[0]) for n in taxonomy)
+        taxonomy=[[n[0],n[1]]+ silvaPathParser(n[2]) for n in taxonomy]
+        print >>sys.stderr,"Extracting scientific name..."
+        taxonomy=[[n[0],n[1],taxidx[n[2]],n[3],'silva'] for n in taxonomy]
+        print >>sys.stderr,"List all taxonomy rank..."    
+        ranks =list(set(x[1] for x in taxonomy) | set(['species'])) 
+        ranks.sort()
+        rankidx = dict(map(None,ranks,xrange(len(ranks))))
+        self._taxonomy=taxonomy
+        self._localtaxon=len(taxonomy)
+        print >>sys.stderr,"Indexing taxonomy..."
+        index = {}
+        for i in xrange(self._localtaxon):
+            index[self._taxonomy[i][0]]=i
+        print >>sys.stderr,"Indexing parent and rank..."
+        for t in self._taxonomy:
+            t[1]=rankidx[t[1]]
+            t[2]=index[t[2]]
+        self._ranks=ranks
+        self._index=index 
+        self._preferedName = []
+        self._name=[(n[3],'scientific name',self._index[n[0]]) for n in taxonomy]    
+        self.silvaname=taxidx
+def silva2obi(seq,taxonomy,state):
+    s = str(seq).lower().replace('u', 't')
+    s = NucSequence(seq.id,s,seq.definition)
+    ancestor,species = [x.strip() for x in seq.definition.rsplit(';',1)]
+    try:
+        # parent = taxonomy.findTaxonByTaxid(taxonomy.silvaname[ancestor])
+        ptaxid=taxonomy.silvaname[ancestor]
+        if taxonomy.getRank(ptaxid)=="genus":
+            state.add(ptaxid)
+        taxid  = taxonomy.addLocalTaxon(species,'species',ptaxid,options.taxashift) 
+        s['taxid']=taxid
+        s['specie_name']=species
+    except KeyError:
+        pass
+    return s                
+if __name__ == '__main__':
+    optionParser = getOptionManager([silvaOptions])
+    (options, entries) = optionParser()
+    if options.rrna is None:
+        raise AssertionError("rRNA type not specified (--ssu or --lsu)")
+    if options.type is None:
+        raise AssertionError("library type not specified (--parc or --ref)")
+    sequrl,taxurl,options.ecopcroutput = silvaURLS(options)
+    taxonomydata = universalOpen(taxurl)
+    options.taxonomy = SilvaDump(taxonomydata)
+#     if options.write != '' :
+#         options.write = open(options.write, 'w')
+    entries = fastaIterator(universalOpen(sequrl))
+    writer  = EcoPCRDBSequenceWriter(options)
+    state = set()
+    gidx = options.taxonomy.findRankByName('genus')
+    ngenus = len([x for x in options.taxonomy._taxonomy if x[1]==gidx])
+    progressBar(max(1,len(state)),ngenus,
+                head=options.ecopcroutput)
+    for e in entries:
+        e = silva2obi(e, options.taxonomy,state)
+        progressBar(max(1,len(state)),ngenus,
+                    head=options.ecopcroutput)
+        if 'taxid' in e:
+            writer.put(e)
+        else:
+            print >>sys.stderr,"\nCannot find taxon for entry : %s : %s" % (e.id,e.definition)
+    print >>sys.stderr
+    ecoTaxonomyWriter(options.ecopcroutput,options.taxonomy,onlyLocal=True)
diff --git a/src/obisort.py b/src/obisort.py
new file mode 100644
index 0000000..3ea093e
--- /dev/null
+++ b/src/obisort.py
@@ -0,0 +1,61 @@
+:py:mod:`obisort`: Sorts sequence records according to the value of a given attribute 
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`obisort` sorts sequence records according to the value of a given attribute, which can be either numeric or alphanumeric.
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator
+from obitools.options import getOptionManager
+def addSortOptions(optionManager):
+    group=optionManager.add_option_group('Obisort specific options')
+    group.add_option('-k','--key',
+                             action="append", dest="key",
+                             metavar="<TAG NAME>",
+                             type="string",
+                             default=[],
+                             help="Attribute used to sort the sequence records.")
+    group.add_option('-r','--reverse',
+                             action="store_true", dest="reverse",
+                             default=False,
+                             help="Sorts in reverse order.")
+def cmpGenerator(options):
+    keys=options.key
+    lk=len(keys)-1
+    def cmpkeys(x,y,i=0):
+        k=keys[i]
+        c=cmp(x[k],y[k])
+        if c==0 and i < lk:
+            i+=1
+            c=cmpkeys(x, y,i+1)
+        if i==lk:
+            i=0
+        return c
+    return cmpkeys
+if __name__ == '__main__':
+    optionParser = getOptionManager([addSortOptions,addInOutputOption])
+    (options, entries) = optionParser()
+    cmpk=cmpGenerator(options)
+    seqs = [seq for seq in entries]
+    seqs.sort(cmpk, reverse=options.reverse)
+    writer = sequenceWriterGenerator(options)
+    for seq in seqs:
+        writer(seq)
diff --git a/src/obisplit.py b/src/obisplit.py
new file mode 100755
index 0000000..d09ae62
--- /dev/null
+++ b/src/obisplit.py
@@ -0,0 +1,135 @@
+:py:mod:`obisplit`: Splits a sequence file in a set of subfiles
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`obisplit` splits the input sequence file in a set of subfiles
+according to the values of a given attribute. The generated subfiles are named 
+after the values of the attribute, possibly preceded by a prefix 
+(``-p`` option). The sequence records for which the attribute is missing are discarded by default, or
+put in a file whose name is set using the ``-u`` option.
+  Example:
+    .. code-block:: bash
+        > obisplit -p experiment_ -t mode  
+    The above command splits the sequence input file according to the ``mode`` attribute. 
+    This attribute is created by the :py:mod:`solexapairend` tool and its value can be set to  
+    either ``joined`` or ``alignment``. The prefix ``experiment_`` is put before
+    each subfile name. Two subfiles will thus be created: ``experiment_joined`` and 
+    ``experiment_alignment``.
+from obitools.options import getOptionManager
+from obitools.format.options import addInOutputOption
+from obitools.fasta import formatFasta
+from obitools.fastq import formatFastq
+def addSplitOptions(optionManager):
+    group = optionManager.add_option_group('Obisplit specific options')
+    group.add_option('-p','--prefix',
+                             action="store", dest="prefix",
+                             metavar="<PREFIX FILENAME>",
+                             type="string",
+                             default=None,
+                             help="Prefix added to each subfile name")
+    group.add_option('-t','--tag-name',
+                             action="append", dest="tagname",
+                             metavar="<tagname>",
+                             type="string",
+                             default=[],
+                             help="Attribute used to split the sequence file")
+    group.add_option('-u','--undefined',
+                             action="store", dest="undefined",
+                             metavar="<FILENAME>",
+                             type="string",
+                             default=None,
+                             help="Name of the file where undefined sequenced are stored")
+class OutFiles:
+    def __init__(self,options):
+        self._tags = options.tagname
+        self._undefined = None
+        if options.undefined is not None:
+            self._undefined=open(options.undefined,'w')
+        self._prefix=options.prefix
+        self._files = {}
+        self._first=None
+        self._last=None
+        self._extension=options.outputFormat
+    def __getitem__(self,key):
+        if key in self._files:
+            data = self._files[key]
+            prev,current,next = data
+            if next is not None:
+                if prev is not None:
+                    self._files[prev][2]=next
+                self._files[next][0]=prev
+                data[0]=self._last
+                data[2]=None
+                self._last=key
+        else:
+            name = key
+            if self._prefix is not None:
+                name = '%s%s' % (options.prefix,name)
+            current = open('%s.%s' % (name,self._extension),'a')
+            prev=self._last 
+            self._last=key
+            next=None
+            self._files[key]=[prev,current,next]
+            if len(self._files)>100:
+                oprev,old,onext=self._files[self._first]
+                del(self._files[self._first])
+                old.close()
+                self._first=onext
+            if self._first is None:
+                self._first=key
+        return current
+    def __call__(self,seq):
+        ok = reduce(lambda x,y: x and y, (z in seq for z in self._tags),True)
+        if ok:
+            k = "_".join([str(seq[x]) for x in self._tags])
+            file=self[k]
+        else:
+            file=self._undefined
+        if file is not None and self._extension=="fasta":
+            print >>file,formatFasta(seq)
+        else:
+            print >>file,formatFastq(seq)
+    def __del__(self):
+        k=self._files.keys()
+        for x in k:
+            del(self._files[x])
+if __name__=='__main__':
+    optionParser = getOptionManager([addSplitOptions,addInOutputOption])
+    (options, entries) = optionParser()
+    out=None
+    for seq in entries:
+        if out is None:
+            out = OutFiles(options)
+        out(seq)    
diff --git a/src/obistat.py b/src/obistat.py
new file mode 100644
index 0000000..801f3e1
--- /dev/null
+++ b/src/obistat.py
@@ -0,0 +1,221 @@
+:py:mod:`obistat`: computes basic statistics for attribute values 
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`obistats` computes basic statistics for attribute values of sequence records.
+The sequence records can be categorized or not using one or several ``-c`` options.
+By default, only the number of sequence records and the total count are computed for each category. 
+Additional statistics can be computed for attribute values in each category, like:
+    - minimum value (``-m`` option) 
+    - maximum value (``-M`` option) 
+    - mean value (``-a`` option) 
+    - variance (``-v`` option) 
+    - standard deviation (``-s`` option)
+The result is a contingency table with the different categories in rows, and the 
+computed statistics in columns. 
+from obitools.options import getOptionManager
+from obitools.format.options import addInputFormatOption
+from obitools.ecopcr.options import addTaxonomyDBOptions, loadTaxonomyDatabase
+import math
+def addStatOptions(optionManager):
+    group = optionManager.add_option_group('obistat specific options')
+    group.add_option('-c','--category-attribute',
+                             action="append", dest="categories",
+                             metavar="<Attribute Name>",
+                             default=[],
+                             help="Attribute used to categorize the sequence records.")
+    group.add_option('-m','--min',
+                             action="append", dest="minimum",
+                             metavar="<Attribute Name>",
+                             default=[],
+                             help="Computes the minimum value of attribute for each category.")
+    group.add_option('-M','--max',
+                             action="append", dest="maximum",
+                             metavar="<Attribute Name>",
+                             default=[],
+                             help="Computes the maximum value of attribute for each category.")
+    group.add_option('-a','--mean',
+                             action="append", dest="mean",
+                             metavar="<Attribute Name>",
+                             default=[],
+                             help="Computes the mean value of attribute for each category.")
+    group.add_option('-v','--variance',
+                             action="append", dest="var",
+                             metavar="<Attribute Name>",
+                             default=[],
+                             help="Computes the variance of attribute for each category.")
+    group.add_option('-s','--std-dev',
+                             action="append", dest="sd",
+                             metavar="<Attribute Name>",
+                             default=[],
+                             help="Computes the standard deviation of attribute for each category.")
+def statistics(values,attribute,func):
+    stat={}
+    lstat={}
+    for var in attribute:
+        if var in values:
+            stat[var]={}
+            lstat[var]=0
+            for c in values[var]:
+                v = values[var][c]
+                m = func(v)
+                stat[var][c]=m
+                lm=len(str(m))
+                if lm > lstat[var]:
+                    lstat[var]=lm
+    return stat,lstat
+def minimum(values,options):
+    return statistics(values, options.minimum, min)
+def maximum(values,options):
+    return statistics(values, options.maximum, max)
+def mean(values,options):
+    def average(v):
+        s = reduce(lambda x,y:x+y,v,0)
+        return float(s)/len(v)
+    return statistics(values, options.mean, average)
+def variance(v):
+    if len(v)==1: 
+        return 0 
+    s = reduce(lambda x,y:(x[0]+y,x[1]+y**2),v,(0.,0.))
+    return s[1]/(len(v)-1) - s[0]**2/len(v)/(len(v)-1)
+def varpop(values,options):
+    return statistics(values, options.var, variance)
+def sd(values,options):
+    def stddev(v):
+        return math.sqrt(variance(v))
+    return statistics(values, options.sd, stddev)
+if __name__ == "__main__":
+    optionParser = getOptionManager([addStatOptions,addInputFormatOption,addTaxonomyDBOptions],
+                                    progdoc=__doc__)
+    (options, entries) = optionParser()
+    loadTaxonomyDatabase(options)
+    options.statistics = set(options.minimum) | set(options.maximum) | set(options.mean)
+    total = 0
+    catcount={}
+    totcount={}
+    values={}
+    lcat=0
+    for s in entries:
+        category = []
+        for c in options.categories:
+            try:
+                if hasattr(options, 'taxonomy') and options.taxonomy is not None:
+                    environ = {'taxonomy' : options.taxonomy,'sequence':s}
+                else:
+                    environ = {'sequence':s}
+                v = eval(c,environ,s)
+                lv=len(str(v))
+                if lv > lcat:
+                    lcat=lv
+                category.append(v)
+            except:
+                category.append(None)
+                if 4 > lcat:
+                    lcat=4
+        category=tuple(category)
+        catcount[category]=catcount.get(category,0)+1
+        try: 
+            totcount[category]=totcount.get(category,0)+s['count']
+        except KeyError:
+            totcount[category]=totcount.get(category,0)+1
+        for var in options.statistics:
+            if var in s:
+                v = s[var]
+                if var not in values:
+                    values[var]={}
+                if category not in values[var]:
+                    values[var][category]=[]
+                values[var][category].append(v)
+    mini,lmini  = minimum(values, options)
+    maxi,lmaxi  = maximum(values, options)
+    avg ,lavg   = mean(values, options)
+    varp ,lvarp = varpop(values, options)
+    sigma,lsigma= sd(values, options)
+    pcat  = "%%-%ds" % lcat
+    if options.minimum:
+        minvar= "min_%%-%ds" % max(len(x) for x in options.minimum)
+    else:
+        minvar= "%s"
+    if options.maximum:
+        maxvar= "max_%%-%ds" % max(len(x) for x in options.maximum)
+    else:
+        maxvar= "%s"
+    if options.mean:
+        meanvar= "mean_%%-%ds" % max(len(x) for x in options.mean)
+    else:
+        meanvar= "%s"
+    if options.var:
+        varvar= "var_%%-%ds" % max(len(x) for x in options.var)
+    else:
+        varvar= "%s"
+    if options.sd:
+        sdvar= "sd_%%-%ds" % max(len(x) for x in options.sd)
+    else:
+        sdvar= "%s"
+    hcat = "\t".join([pcat % x for x in options.categories]) + "\t" +\
+           "\t".join([minvar % x for x in options.minimum])  + "\t" +\
+           "\t".join([maxvar % x for x in options.maximum])  + "\t" +\
+           "\t".join([meanvar % x for x in options.mean])  + "\t" +\
+           "\t".join([varvar % x for x in options.var])  + "\t" +\
+           "\t".join([sdvar % x for x in options.sd]) + \
+           "\t   count" + \
+           "\t   total" 
+    print hcat
+    for c in catcount:
+        for v in c:
+            print pcat % str(v)+"\t",
+        for m in options.minimum:
+            print (("%%%dd" % lmini[m]) % mini[m][c])+"\t",
+        for m in options.maximum:
+            print (("%%%dd" % lmaxi[m]) % maxi[m][c])+"\t",
+        for m in options.mean:
+            print (("%%%df" % lavg[m]) % avg[m][c])+"\t",
+        for m in options.var:
+            print (("%%%df" % lvarp[m]) % varp[m][c])+"\t",
+        for m in options.sd:
+            print (("%%%df" % lsigma[m]) % sigma[m][c])+"\t",
+        print "%7d" %catcount[c],
+        print "%9d" %totcount[c]
diff --git a/src/obisubset.py b/src/obisubset.py
new file mode 100644
index 0000000..19f96e7
--- /dev/null
+++ b/src/obisubset.py
@@ -0,0 +1,116 @@
+:py:mod:`obisubset`: extract a subset of samples 
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+The :py:mod:`obisubset` command extracts a subset of samples from a sequence file
+after its dereplication using :py:mod:`obiuniq` program.
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator
+from obitools.options import getOptionManager
+import re
+def addSubsetOptions(optionManager):
+    group = optionManager.add_option_group('obisubset specific options')
+    group.add_option('-s','--sample',
+                             action="store", dest="sample",
+                             metavar="<TAGNAME>",
+                             type="str",
+                             default='merged_sample',
+                             help="Tag containing sample descriptions, the default value is set to *merged_sample*")
+    group.add_option('-o','--other-tag',
+                     action="append", dest="taglist",
+                     metavar="<TAGNAME>",
+                     type="string",
+                     default=[],
+                     help="Another tag to clean according to the sample subset")
+    group.add_option('-l','--sample-list',
+                     action="store", dest="samplelist",
+                     metavar="<FILENAME>",
+                     type="string",
+                     default=None,
+                     help="File containing the samples names (one sample id per line)")
+    group.add_option('-p','--sample-pattern',
+                     action="store", dest="samplepattern",
+                     metavar="<REGEX>",
+                     type="string",
+                     default=None,
+                     help="A regular expression pattern matching the sample ids to extract")
+    group.add_option('-n','--sample-name',
+                     action="append", dest="samplename",
+                     metavar="<SAMPLEIDS>",
+                     type="string",
+                     default=[],
+                     help="A sample id to extract")
+def sequenceSelectorGenerator(options):
+    samplename = set(options.samplename)
+    othertags  = set(options.taglist)
+    if options.samplelist is not None:
+        with open(options.samplelist) as lname :
+            for name in lname:
+                name = name.strip()
+                samplename.add(name)
+    if options.samplepattern is not None:
+        samplepattern = re.compile(options.samplepattern)
+    else:
+        samplepattern = None
+    def sequenceSelector(entries):
+        for entry in entries:
+            samples=entry[options.sample]
+            slist = set(samples.keys())
+            tokeep=slist & samplename
+            if samplepattern is not None:
+                for name in slist:
+                    if samplepattern.match(name):
+                        tokeep.add(name)
+            if tokeep:
+                newsample={}
+                newcount=0
+                for name in tokeep:
+                    c = samples[name]
+                    newsample[name]= c 
+                    newcount+=c 
+                entry['count']=newcount 
+                entry[options.sample]=newsample
+                for t in othertags:
+                    if t in entry:
+                        d = entry[t]
+                        newd={}
+                        for name in tokeep:
+                            if name in d:
+                                newd[name] = d[name]
+                        entry[t]=newd
+                yield entry
+    return sequenceSelector
+if __name__=='__main__':
+    optionParser = getOptionManager([addInOutputOption,addSubsetOptions],progdoc=__doc__)
+    (options, entries) = optionParser()
+    writer = sequenceWriterGenerator(options)
+    good = sequenceSelectorGenerator(options)
+    for seq in good(entries):
+        writer(seq)
diff --git a/src/obitab.py b/src/obitab.py
new file mode 100644
index 0000000..e021bbd
--- /dev/null
+++ b/src/obitab.py
@@ -0,0 +1,178 @@
+:py:mod:`obitab`: converts a sequence file to a tabular file
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`obitab` command converts sequence file to a tabular file that
+can be open by a spreadsheet program or R.
+from obitools.options import getOptionManager
+from obitools.format.options import addInOutputOption
+def addTableOptions(optionManager):
+    optionManager.add_option('-n','--na-string',
+                             action="store", dest="NA",
+                             metavar="<NOT AVAILABLE STRING>",
+                             type="string",
+                             default="NA",
+                             help="String write in the table for not available value"
+                            )
+    optionManager.add_option('','--output-field-separator',
+                             action="store", dest="ofs",
+                             metavar="STRING",
+                             type="string",
+                             default="\t",
+                             help="Field separator for CSV file"
+                            )
+    optionManager.add_option('-o','--output-seq',
+                             action="store_true", dest="sequence",
+                             default=False,
+                             help="Add an extra column for sequence"
+                            )
+    optionManager.add_option('-d','--no-definition',
+                             action="store_false", dest="definition",
+                             default=True,
+                             help="Remove column for sequence definition"
+                            )
+    optionManager.add_option('-a','--omit-attribute',
+                             action="append", dest="omit",
+                             metavar="<KEY>",
+                             default=[],
+                             help="Add attribute name to omit in the output tab"
+                            )
+def headerCmp(h1,h2):
+    if type(h1) is str and type(h2) is str:
+        return cmp(h1, h2)
+    if type(h1) is str and type(h2) is tuple:
+        return cmp(h1, h2[0])
+    if type(h1) is tuple and type(h2) is str:
+        return cmp(h1[0], h2)
+    if type(h1) is tuple and type(h2) is tuple:
+        c = cmp(h1[0],h2[0])
+        if c==0:
+            c = cmp(h1[1],h2[1])
+        return c
+    raise AssertionError
+if __name__=='__main__':
+    optionParser = getOptionManager([addTableOptions,addInOutputOption])
+    (options, entries) = optionParser()
+    column = {}
+    subcol = {}
+    db = []
+    for seq in entries: 
+        db.append(seq)
+        keys = seq.keys()      
+        for k in keys:
+            t=type(seq[k])
+            if k in column:
+                column[k].add(t)
+            else:
+                column[k]=set([t])
+            if t is dict:
+                if k not in subcol:
+                    subcol[k]=set()
+                subcol[k]|=set(seq[k].keys())
+    headers = set()
+    for c in column:
+        if len(column[c])==1:
+            column[c]=column[c].pop()
+        else:
+            column[c]=str
+        if column[c] not in (str,int,float,dict,bool):
+            column[c]=str
+        if column[c] is not dict:
+            headers.add(c)
+        else:
+            for sc in subcol[c]:
+                headers.add((c,sc))
+    omit = set(options.omit)
+    headers=list(headers)
+    headers.sort(headerCmp)
+    OFS = options.ofs
+    s = "id"
+    if options.definition:
+        s = '%s%sdefinition'%(s,OFS)
+    for k in headers:
+        if type(k) is str:
+            if k not in omit:
+                s = '%s%s%s'%(s,OFS,k)
+        else:
+            if k[0] not in omit:
+                if type(k[1]) is tuple:
+                    sk = ":".join([str(x) for x in k[1]])
+                else:
+                    sk = str(k[1])
+                if k[0][0:7]=='merged_':
+                    s = '%s%s%s:%s' % (s,OFS,k[0][7:],sk)
+                else:
+                    s = '%s%s%s:%s' % (s,OFS,k[0],sk)
+    if options.sequence:
+        s = "%s%ssequence"%(s,OFS)
+    print s
+    for seq in db:
+        s = seq.id
+        if options.definition:
+            s = '%s%s%s'%(s,OFS,seq.definition)
+        for k in headers:
+            if type(k) is str:
+                if k not in omit:
+                    if k in seq:
+                        v = seq[k]
+                        if v is None:
+                            v=options.NA
+                        s = '%s%s%s'%(s,OFS,v)
+                    else:
+                        s = '%s%s%s'%(s,OFS,options.NA)
+            else:
+                if k[0] not in omit:
+                    if k[0] in seq:
+                        sk = seq[k[0]]
+                    else:
+                        sk={}
+                    if k[1] in sk:
+                        v = sk[k[1]]
+                        if v is None:
+                            v=options.NA
+                        s = '%s%s%s'%(s,OFS,v)
+                    else:
+                        if k[0][0:7]=='merged_':
+                            s = '%s%s0'%(s,OFS)
+                        else:
+                            s = '%s%s%s'%(s,OFS,options.NA)
+        if options.sequence:
+            s = '%s%s%s'%(s,OFS,str(seq))
+        print s
diff --git a/src/obitail.py b/src/obitail.py
new file mode 100644
index 0000000..a89fa46
--- /dev/null
+++ b/src/obitail.py
@@ -0,0 +1,54 @@
+:py:mod:`obitail`: extracts the last sequence records
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`obitail` command is in some way analog to the standard Unix `tail` command.
+It selects the tail of :doc:`a sequence file <../formats>`. 
+But instead of working text line by text line as the standard Unix tool, 
+selection is done at the sequence record level. You can specify the number of 
+sequence records to select.
+  *Example:*
+    .. code-block:: bash
+          > obitail -n 150 seq1.fasta > seq2.fasta
+    Selects the 150 last sequence records from the ``seq1.fasta`` file and stores
+    them into the ``seq2.fasta`` file.
+from obitools.format.options import addInOutputOption, sequenceWriterGenerator
+from obitools.options import getOptionManager
+import collections
+def addHeadOptions(optionManager):
+    optionManager.add_option('-n','--sequence-count',
+                             action="store", dest="count",
+                             metavar="###",
+                             type="int",
+                             default=10,
+                             help="Count of first sequences to print")
+if __name__ == '__main__':
+    optionParser = getOptionManager([addHeadOptions,addInOutputOption])
+    (options, entries) = optionParser()
+    i=0
+    queue = collections.deque(entries,options.count)
+    writer = sequenceWriterGenerator(options)
+    while queue:
+        writer(queue.popleft())
diff --git a/src/obitaxonomy.py b/src/obitaxonomy.py
new file mode 100644
index 0000000..3f96362
--- /dev/null
+++ b/src/obitaxonomy.py
@@ -0,0 +1,350 @@
+:py:mod:`obitaxonomy`: manages taxonomic databases
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org> and Celine Mercier <celine.mercier at metabarcoding.org>
+The :py:mod:`obitaxonomy` command can generate an ecoPCR database from a NCBI taxdump 
+(see NCBI ftp site) and allows managing the taxonomic data contained in both types of 
+Several types of editing are possible:
+**Adding a taxon to the database**
+    The new taxon is described by three values: 
+    its scientific name, its taxonomic rank, and the *taxid* of its first ancestor.
+    Done by using the ``-a`` option. 
+**Deleting a taxon from the database**
+    Erases a local taxon. Done by using the ``-D`` option and specifying a *taxid*. 
+**Adding a species to the database**
+    The genus of the species must already exist in the database. The species will be 
+    added under its genus. Done by using the ``-s`` option and specifying a species 
+    scientific name. 
+**Adding a preferred scientific name for a taxon in the database**
+    Adds a preferred name for a taxon in the taxonomy, by specifying the new favorite 
+    name and the *taxid* of the taxon whose preferred name should be changed. 
+    Done by using the ``-f`` option.
+**Adding all the taxa from a sequence file in the ``OBITools`` extended :doc:`fasta <../fasta>` format to the database**
+    All the taxon from a file in the ``OBITools`` extended :doc:`fasta <../fasta>` format, and eventually their ancestors, are added to the 
+    taxonomy database.
+    The header of each sequence record must contain the attribute defined by the 
+    ``-k`` option (default key: ``species_name``), whose value is the scientific name 
+    of the taxon to be added.
+    A taxonomic path for each sequence record can be specified with the ``-p`` option, 
+    as the attribute key that contains the taxonomic path of the taxon to be added. 
+    A restricting ancestor can be specified with the ``-A`` option, either as a *taxid* 
+    (integer) or a key (string). If it is a *taxid*, this *taxid* is the default *taxid* 
+    under which the new taxon is added if none of his ancestors are specified or can 
+    be found. If it is a key, :py:mod:`obitaxonomy` looks for the ancestor *taxid* in 
+    the corresponding attribute, and the new taxon is systematically added under this 
+    ancestor. By default, the restricting ancestor is the root of the taxonomic tree for
+    all the new taxa.
+    If neither a path nor an ancestor is specified in the header of the sequence record,
+    :py:mod:`obitaxonomy` tries to read the taxon name as a species name and to find the 
+    genus in the taxonomic database. If the genus is found, the new taxon is added under it. 
+    If not, it is added under the restricting ancestor. 
+    It is highly recommended checking what was exactly done by reading the output, 
+    since :py:mod:`obitaxonomy` uses *ad hoc* parsing and decision rules.
+    Done by using the ``-F`` option. 
+- When a taxon is added, a new *taxid* is assigned to it. The minimum for the new *taxids* 
+  can be specified by the ``-m`` option and is equal to 10000000 by default.
+- For each modification, a line is printed with details on what was done.
+from obitools.options.taxonomyfilter import addTaxonomyDBOptions,loadTaxonomyDatabase
+from obitools.options import getOptionManager
+from obitools.ecopcr.taxonomy import ecoTaxonomyWriter
+from obitools.fasta import fastaIterator
+import sys
+def addTaxonFromFile(name, rank, parent, options) :
+    taxid = options.taxonomy.addLocalTaxon(name, rank, parent, options.taxashift)
+    taxon = options.taxonomy.findTaxonByTaxid(taxid)
+    parent= options.taxonomy._taxonomy[taxon[2]]
+#    if options.write == '' :
+    print>>sys.stderr, "added : %-40s\t%-15s\t%-8d\t->\t%s [%d] (%s)" % (taxon[3],options.taxonomy._ranks[taxon[1]],
+                                                                            taxon[0],
+                                                                            parent[3],parent[0],options.taxonomy._ranks[parent[1]])
+#    else :
+#        print>>options.write, "added : %-40s\t%-15s\t%-8d\t->\t%s [%d] (%s)" % (taxon[3],options.taxonomy._ranks[taxon[1]],
+#                                                                           taxon[0],
+#                                                                           parent[3],parent[0],options.taxonomy._ranks[parent[1]])
+    return taxid
+def numberInStr(s) :
+    containsNumber = False
+    for c in s :
+        if c.isdigit() :
+            containsNumber = True
+    return containsNumber
+def editTaxonomyOptions(optionManager):
+    optionManager.add_option('-a','--add-taxon',
+                             action="append", dest="newtaxon",
+                             metavar="<taxon_name>:rank:parent",
+                             default=[],
+                             help="Adds a new taxon to the taxonomy. The new taxon "
+                                  "is described by three values separated by colons: "
+                                  "the scientific name, the rank of the new taxon, "
+                                  "the taxid of the parent taxon")
+    optionManager.add_option('-D','--delete-local-taxon',
+                             action="append", dest="deltaxon",
+                             metavar="<TAXID>",
+                             default=[],
+                             help="Erase a local taxon")
+    optionManager.add_option('-s','--add-species',
+                             action="append", dest="newspecies",
+                             metavar="<SPECIES_NAME>",
+                             default=[],
+                             help="Adds a new species to the taxonomy. The new species "
+                                  "is described by its scientific name")
+    optionManager.add_option('-F','--add-file',
+                             action="store", dest="species_file",
+                             metavar="<file name>",
+                             default=None,
+                             help="Add all the species from a fasta file to the taxonomy. The header of"
+                                  " the sequences must contain the field defined by the -k option")
+    optionManager.add_option('-k','--key_name',
+                             action="store", dest="key_name",
+                             metavar="<key name>",
+                             default='species_name',
+                             help="Name of the attribute key used to find the species names in the headers "
+                                  "when the -F option is used. "
+                                  "Default = 'species_name'")
+    optionManager.add_option('-f','--add-favorite-name',
+                             action="append", dest="newname",
+                             metavar="<taxon_name>:taxid",
+                             default=[],
+                             help="Add a new favorite name to the taxonomy. The new name "
+                                  "is described by two values separated by a colon. "
+                                  "the new favorite name and the taxid of the taxon")
+    optionManager.add_option('-m','--min-taxid',
+                             action="store", dest="taxashift",
+                             type="int",
+                             metavar="####",
+                             default=10000000,
+                             help="minimal taxid for the newly added taxid")
+    optionManager.add_option('-A','--restricting_ancestor',
+                             action="store", dest="res_anc",
+                             type="str",
+                             metavar="<ANCESTOR>",
+                             default='',
+                             help="works with the -F option. Can be a word or a taxid (number). Enables to restrict the "
+                                  "adding of taxids under a specified ancestor. If it's a word, it's the field containing "
+                                  "the ancestor's taxid in each sequence's header (can be different for each sequence). If "
+                                  "it's a number, it's the taxid of the ancestor (in which case it's the same for all the sequences)."
+                                  " All the sequences in the file for which the genus can't be found will be added under this ancestor.")
+#    optionManager.add_option('-w','--write_in_file',
+#                             action="store", dest="write",
+#                             metavar="<write_in_file>",
+#                             type = "str", default='',
+#                             help="works with the -F option. Writes all the taxa added in the specified file instead of in the console screen."
+#                             " Useful for big and/or problematic files.")
+    optionManager.add_option('-p','--path',
+                             action="store", dest="path",
+                             type="str",
+                             metavar="<path>",
+                             default='',
+                             help="works with the -F option. Field name for the taxonomy path of the taxa if they are in the headers of the sequences. "
+                             "Must be of the form 'Fungi,Agaricomycetes,Thelephorales,Thelephoraceae' with the highest ancestors"
+                             " first and ',' as separators between ancestors")
+#    optionManager.add_option('-P','--force_ancestor',
+#                             action="store_true", dest="force_ancestor",
+#                             metavar="<force_ancestor>",
+#                             default=False,
+#                             help="works with the -A option when the ancestor is in the header. Forces the adding of the species under the ancestor specified."
+#                             " /!\ the ancestor must exist. Use taxonomy paths (-p option) if you want the ancestor(s) to be created too.")
+if __name__ == '__main__':
+    optionParser = getOptionManager([addTaxonomyDBOptions,editTaxonomyOptions])
+    (options, entries) = optionParser()
+    loadTaxonomyDatabase(options)
+    localdata=False
+#     if options.write != '' :
+#         options.write = open(options.write, 'w')
+    for t in options.newtaxon:
+        tx = t.split(':')
+        taxid = options.taxonomy.addLocalTaxon(tx[0].strip(),tx[1],tx[2],options.taxashift)
+        taxon = options.taxonomy.findTaxonByTaxid(taxid)
+        parent= options.taxonomy._taxonomy[taxon[2]]
+        print "added : %-40s\t%-15s\t%-8d\t->\t%s [%d] (%s)" % (taxon[3],options.taxonomy._ranks[taxon[1]],
+                                                     taxon[0],
+                                                     parent[3],parent[0],options.taxonomy._ranks[parent[1]])
+        localdata=True
+#    for t in options.deltaxon:
+#        tx = int(t)
+#        taxon = options.taxonomy.removeLocalTaxon(tx)
+#        print "removed : %-40s\t%-15s\t%-8d" % (taxon[3],options.taxonomy._ranks[taxon[1]],
+#                                                     taxon[0])
+#        localdata=True
+    if options.species_file != None :
+        useless_words = ['fungal','fungi','endophyte','unknown','mycorrhizal','uncultured','Uncultured','ectomycorrhiza', \
+                         'ectomycorrhizal','mycorrhizal','vouchered','unidentified','bacterium','Bacterium']
+        if options.res_anc == '' :
+            restricting_ancestor = 1
+            resAncInHeader = False
+        elif options.res_anc.isdigit() :
+            restricting_ancestor = int(options.res_anc)
+            resAncInHeader = False
+        else :
+            resAncInHeader = True
+        for seq in fastaIterator(options.species_file) :
+            if resAncInHeader :
+                if options.res_anc in seq :
+                    restricting_ancestor = int(seq[options.res_anc])
+                else :
+                    restricting_ancestor = 1
+            t = seq[options.key_name]
+            key_error = False
+            taxid = None
+            # check if the taxon isn't already in the taxonomy with the right ancestor
+            try :
+                possible_taxids = options.taxonomy.findTaxonByName(t)
+                for p in possible_taxids :
+                    if options.taxonomy.isAncestor(restricting_ancestor, p[0]) :
+                        taxid = p[0]
+            except KeyError :
+                key_error = True
+            if key_error or taxid is None :
+                if (resAncInHeader and options.res_anc in seq) :
+                    taxid = addTaxonFromFile(t,'species',restricting_ancestor,options)
+                elif options.path != '' :   
+                    previous = options.taxonomy.findTaxonByTaxid(restricting_ancestor)
+                    if seq[options.path] != '' :
+                        ancestors = [a for a in seq[options.path].split(',')]
+                        if ancestors[-1] != t :
+                            ancestors.append(t)
+                    else :     # useful when data is from UNITE databases but could disappear
+                        if len(t.split(' ')) >= 2 and not numberInStr(t) :
+                            genus, trash = t.split(" ",1)
+                            ancestors = [genus, t]
+                        else :
+                            ancestors = [t]
+                    for a in ancestors :
+                        try:
+                            possible_previous = options.taxonomy.findTaxonByName(a)
+                            keyError = True
+                            for p in possible_previous :
+                                if options.taxonomy.isAncestor(restricting_ancestor, p[0]) :
+                                    previous = p
+                                    keyError = False
+                            if keyError :
+                                raise KeyError()
+                        except KeyError :
+                            if (len(ancestors) > 1 and a == ancestors[-2] and len(ancestors[-1].split(' ')) >= 2 and ((not numberInStr(a)) or 'sp' in a.split(' '))) :      #a voirrrrr, trop restrictif ?
+                                rank = 'genus'
+                            elif a == ancestors[-1] :
+                                rank = 'species'
+                            else :
+                                rank = 'no rank'
+                            taxid = addTaxonFromFile(a,rank,previous[0],options)
+                            previous = (taxid, options.taxonomy.findRankByName(rank))
+                else :
+                    if (len(t.split(' ')) >= 2 and (not numberInStr(t)  or 'sp' in t.split(' ') or t[0].isupper()) \
+                        and t.split(' ')[0] not in useless_words) :
+                        genus,species = t.split(" ",1)
+                        try :
+                            possible_genuses = options.taxonomy.findTaxonByName(genus)
+                            genus_taxid = None
+                            for g in possible_genuses :
+                                if options.taxonomy.isAncestor(restricting_ancestor, g[0]) :
+                                    genus_taxid = g[0]
+                        except KeyError :
+                            genus_taxid = addTaxonFromFile(genus,'genus',restricting_ancestor,options)
+                        if genus_taxid is None :    # Genuses matching the name were found but they weren't under the restricting ancestor
+                            parent = restricting_ancestor
+                        else :
+                            parent = genus_taxid
+                        taxid = addTaxonFromFile(t, 'species', parent, options)
+                    else :
+                        taxid = addTaxonFromFile(t, 'species', restricting_ancestor, options)
+                localdata=True
+#            seq['taxid'] = taxid
+#            print formatFasta(seq)
+    for t in options.newspecies:
+        genus,species = t.split(" ",1)
+        parent = options.taxonomy.findTaxonByName(genus)
+        taxid = options.taxonomy.addLocalTaxon(t,'species',parent[0],options.taxashift)
+        taxon = options.taxonomy.findTaxonByTaxid(taxid)
+        parent= options.taxonomy._taxonomy[taxon[2]]
+        print "added : %-40s\t%-15s\t%-8d\t->\t%s [%d] (%s)" % (taxon[3],options.taxonomy._ranks[taxon[1]],
+                                                     taxon[0],
+                                                     parent[3],parent[0],options.taxonomy._ranks[parent[1]])
+        localdata=True
+    for t in options.newname:
+        tx = t.split(':')
+        taxid = options.taxonomy.addPreferedName(int(tx[1]), tx[0].strip())
+        print "name : %8d\t->\t%s" % (taxid,options.taxonomy.getPreferedName(taxid))
+    ecoTaxonomyWriter(options.ecodb,options.taxonomy,onlyLocal=True)
\ No newline at end of file
diff --git a/src/obitools/SVGdraw.py b/src/obitools/SVGdraw.py
new file mode 100644
index 0000000..521f750
--- /dev/null
+++ b/src/obitools/SVGdraw.py
@@ -0,0 +1,1054 @@
+#!/usr/bin/env python
+##Copyright (c) 2002, Fedor Baart & Hans de Wit (Stichting Farmaceutische Kengetallen)
+##All rights reserved.
+##Redistribution and use in source and binary forms, with or without modification,
+##are permitted provided that the following conditions are met:
+##Redistributions of source code must retain the above copyright notice, this
+##list of conditions and the following disclaimer.
+##Redistributions in binary form must reproduce the above copyright notice,
+##this list of conditions and the following disclaimer in the documentation and/or
+##other materials provided with the distribution.
+##Neither the name of the Stichting Farmaceutische Kengetallen nor the names of
+##its contributors may be used to endorse or promote products derived from this
+##software without specific prior written permission.
+##Thanks to Gerald Rosennfellner for his help and useful comments.
+__doc__="""Use SVGdraw to generate your SVGdrawings.
+SVGdraw uses an object model drawing and a method toXML to create SVG graphics
+by using easy to use classes and methods usualy you start by creating a drawing eg
+    d=drawing()
+    #then you create a SVG root element
+    s=svg()
+    #then you add some elements eg a circle and add it to the svg root element
+    c=circle()
+    #you can supply attributes by using named arguments.
+    c=circle(fill='red',stroke='blue')
+    #or by updating the attributes attribute:
+    c.attributes['stroke-width']=1
+    s.addElement(c)
+    #then you add the svg root element to the drawing
+    d.setSVG(s)
+    #and finaly you xmlify the drawing
+    d.toXml()
+this results in the svg source of the drawing, which consists of a circle
+on a white background. Its as easy as that;)
+This module was created using the SVG specification of www.w3c.org and the
+O'Reilly (www.oreilly.com) python books as information sources. A svg viewer
+is available from www.adobe.com"""
+# there are two possibilities to generate svg:
+# via a dom implementation and directly using <element>text</element> strings
+# the latter is way faster (and shorter in coding)
+# the former is only used in debugging svg programs
+# maybe it will be removed alltogether after a while
+# with the following variable you indicate whether to use the dom implementation
+# Note that PyXML is required for using the dom implementation.
+# It is also possible to use the standard minidom. But I didn't try that one.
+# Anyway the text based approach is about 60 times faster than using the full dom implementation.
+import exceptions
+if use_dom_implementation<>0:
+    try:
+        from xml.dom import implementation
+        from xml.dom.ext import PrettyPrint
+    except:
+        raise exceptions.ImportError, "PyXML is required for using the dom implementation"
+#The implementation is used for the creating the XML document.
+#The prettyprint module is used for converting the xml document object to a xml file
+import sys
+assert sys.version_info[0]>=2
+if sys.version_info[1]<2:
+    True=1
+    False=0
+    file=open
+#The recursion limit is set conservative so mistakes like s=svg() s.addElement(s)
+#won't eat up too much processor time.
+#the following code is pasted form xml.sax.saxutils
+#it makes it possible to run the code without the xml sax package installed
+#To make it possible to have <rubbish> in your text elements, it is necessary to escape the texts
+def _escape(data, entities={}):
+    """Escape &, <, and > in a string of data.
+    You can escape other strings of data by passing a dictionary as
+    the optional entities parameter.  The keys and values must all be
+    strings; each key will be replaced with its corresponding value.
+    """
+    data = data.replace("&", "&")
+    data = data.replace("<", "<")
+    data = data.replace(">", ">")
+    for chars, entity in entities.items():
+        data = data.replace(chars, entity)
+    return data
+def _quoteattr(data, entities={}):
+    """Escape and quote an attribute value.
+    Escape &, <, and > in a string of data, then quote it for use as
+    an attribute value.  The \" character will be escaped as well, if
+    necessary.
+    You can escape other strings of data by passing a dictionary as
+    the optional entities parameter.  The keys and values must all be
+    strings; each key will be replaced with its corresponding value.
+    """
+    data = _escape(data, entities)
+    if '"' in data:
+        if "'" in data:
+            data = '"%s"' % data.replace('"', """)
+        else:
+            data = "'%s'" % data
+    else:
+        data = '"%s"' % data
+    return data
+def _xypointlist(a):
+    """formats a list of xy pairs"""
+    s=''
+    for e in a: #this could be done more elegant
+        s+=str(e)[1:-1] +'  '
+    return s
+def _viewboxlist(a):
+    """formats a tuple"""
+    s=''
+    for e in a: 
+        s+=str(e)+' '
+    return s
+def _pointlist(a):
+    """formats a list of numbers"""
+    return str(a)[1:-1]
+class pathdata:
+    """class used to create a pathdata object which can be used for a path.
+    although most methods are pretty straightforward it might be useful to look at the SVG specification."""
+    #I didn't test the methods below. 
+    def __init__(self,x=None,y=None):
+        self.path=[]
+        if x is not None and y is not None:
+            self.path.append('M '+str(x)+' '+str(y))
+    def closepath(self):
+        """ends the path"""
+        self.path.append('z')
+    def move(self,x,y):
+        """move to absolute"""
+        self.path.append('M '+str(x)+' '+str(y))
+    def relmove(self,x,y):
+        """move to relative"""
+        self.path.append('m '+str(x)+' '+str(y))
+    def line(self,x,y):
+        """line to absolute"""
+        self.path.append('L '+str(x)+' '+str(y))
+    def relline(self,x,y):
+        """line to relative"""
+        self.path.append('l '+str(x)+' '+str(y))
+    def hline(self,x):
+        """horizontal line to absolute"""
+        self.path.append('H'+str(x))
+    def relhline(self,x):
+        """horizontal line to relative"""
+        self.path.append('h'+str(x))
+    def vline(self,y):
+        """verical line to absolute"""
+        self.path.append('V'+str(y))
+    def relvline(self,y):
+        """vertical line to relative"""
+        self.path.append('v'+str(y))
+    def bezier(self,x1,y1,x2,y2,x,y):
+        """bezier with xy1 and xy2 to xy absolut"""
+        self.path.append('C'+str(x1)+','+str(y1)+' '+str(x2)+','+str(y2)+' '+str(x)+','+str(y))
+    def relbezier(self,x1,y1,x2,y2,x,y):
+        """bezier with xy1 and xy2 to xy relative"""
+        self.path.append('c'+str(x1)+','+str(y1)+' '+str(x2)+','+str(y2)+' '+str(x)+','+str(y))
+    def smbezier(self,x2,y2,x,y):
+        """smooth bezier with xy2 to xy absolut"""
+        self.path.append('S'+str(x2)+','+str(y2)+' '+str(x)+','+str(y))
+    def relsmbezier(self,x2,y2,x,y):
+        """smooth bezier with xy2 to xy relative"""
+        self.path.append('s'+str(x2)+','+str(y2)+' '+str(x)+','+str(y))
+    def qbezier(self,x1,y1,x,y):
+        """quadratic bezier with xy1 to xy absolut"""
+        self.path.append('Q'+str(x1)+','+str(y1)+' '+str(x)+','+str(y))
+    def relqbezier(self,x1,y1,x,y):
+        """quadratic bezier with xy1 to xy relative"""
+        self.path.append('q'+str(x1)+','+str(y1)+' '+str(x)+','+str(y))
+    def smqbezier(self,x,y):
+        """smooth quadratic bezier to xy absolut"""
+        self.path.append('T'+str(x)+','+str(y))
+    def relsmqbezier(self,x,y):
+        """smooth quadratic bezier to xy relative"""
+        self.path.append('t'+str(x)+','+str(y))
+    def ellarc(self,rx,ry,xrot,laf,sf,x,y):
+        """elliptival arc with rx and ry rotating with xrot using large-arc-flag and sweep-flag  to xy absolut"""
+        self.path.append('A'+str(rx)+','+str(ry)+' '+str(xrot)+' '+str(laf)+' '+str(sf)+' '+str(x)+' '+str(y))
+    def relellarc(self,rx,ry,xrot,laf,sf,x,y):
+        """elliptival arc with rx and ry rotating with xrot using large-arc-flag and sweep-flag  to xy relative"""
+        self.path.append('a'+str(rx)+','+str(ry)+' '+str(xrot)+' '+str(laf)+' '+str(sf)+' '+str(x)+' '+str(y))
+    def __repr__(self):
+        return ' '.join(self.path)
+class SVGelement:
+    """SVGelement(type,attributes,elements,text,namespace,**args)
+    Creates a arbitrary svg element and is intended to be subclassed not used on its own.
+    This element is the base of every svg element it defines a class which resembles
+    a xml-element. The main advantage of this kind of implementation is that you don't
+    have to create a toXML method for every different graph object. Every element
+    consists of a type, attribute, optional subelements, optional text and an optional
+    namespace. Note the elements==None, if elements = None:self.elements=[] construction.
+    This is done because if you default to elements=[] every object has a reference
+    to the same empty list."""
+    def __init__(self,type='',attributes=None,elements=None,text='',namespace='',cdata=None,**args):
+        self.type=type
+        if attributes==None:
+            self.attributes={}
+        else:
+            self.attributes=attributes
+        if elements==None:
+            self.elements=[]
+        else:
+            self.elements=elements
+        self.text=text
+        self.namespace=namespace
+        self.cdata=cdata
+        for arg in args.keys():
+            self.attributes[arg]=args[arg]
+    def addElement(self,SVGelement):
+        """adds an element to a SVGelement
+        SVGelement.addElement(SVGelement)
+        """
+        self.elements.append(SVGelement)
+    #def toXml(self,level,f, preserveWhitespace=False):
+    def toXml(self,level,f, **kwargs):
+        preserve = kwargs.get("preserveWhitespace", False)
+        if preserve:
+            #print "PRESERVING"
+            NEWLINE = ""
+            TAB = ""
+        else:
+            #print "NOT PRESE"
+            NEWLINE = "\n"
+            TAB = "\t"
+        f.write(TAB*level)
+        f.write('<'+self.type)
+        for attkey in self.attributes.keys():
+            f.write(' '+_escape(str(attkey))+'='+_quoteattr(str(self.attributes[attkey])))
+        if self.namespace:
+            f.write(' xmlns="'+ _escape(str(self.namespace))+'" ')
+        if self.elements or self.text or self.cdata:
+            f.write('>')
+        if self.elements:
+            f.write(NEWLINE)
+        for element in self.elements:
+            element.toXml(level+1,f, preserveWhitespace=preserve)
+        if self.cdata:
+            f.write(NEWLINE+TAB*(level+1)+'<![CDATA[')
+            for line in self.cdata.splitlines():
+               f.write(NEWLINE+TAB*(level+2)+line)
+            f.write(NEWLINE+TAB*(level+1)+']]>'+NEWLINE)
+        if self.text:
+            if type(self.text)==type(''): #If the text is only text
+                f.write(_escape(str(self.text)))
+            else:                         #If the text is a spannedtext class
+                f.write(str(self.text))
+        if self.elements:
+            f.write(TAB*level+'</'+self.type+'>'+NEWLINE)
+        elif self.text: 
+            f.write('</'+self.type+'>'+NEWLINE)
+        elif self.cdata:
+            f.write(TAB*level+'</'+self.type+'>'+NEWLINE)
+        else:
+            f.write('/>'+NEWLINE)
+class tspan(SVGelement):
+    """ts=tspan(text='',**args)
+    a tspan element can be used for applying formatting to a textsection
+    usage:
+    ts=tspan('this text is bold')
+    ts.attributes['font-weight']='bold'
+    st=spannedtext()
+    st.addtspan(ts)
+    t=text(3,5,st)
+    """
+    def __init__(self,text=None,**args):
+        SVGelement.__init__(self,'tspan',**args)
+        if self.text<>None:
+            self.text=text
+    def __repr__(self):
+        s="<tspan"
+        for key,value in self.attributes.items():
+         s+= ' %s="%s"' % (key,value)
+        s+='>'
+        s+=self.text
+        s+='</tspan>'
+        return s
+class tref(SVGelement):
+    """tr=tref(link='',**args)
+    a tref element can be used for referencing text by a link to its id.
+    usage:
+    tr=tref('#linktotext')
+    st=spannedtext()
+    st.addtref(tr)
+    t=text(3,5,st)
+    """
+    def __init__(self,link,**args):
+        SVGelement.__init__(self,'tref',{'xlink:href':link},**args)
+    def __repr__(self):
+        s="<tref"
+        for key,value in self.attributes.items():
+         s+= ' %s="%s"' % (key,value)
+        s+='/>'
+        return s
+class spannedtext:
+    """st=spannedtext(textlist=[])
+    a spannedtext can be used for text which consists of text, tspan's and tref's
+    You can use it to add to a text element or path element. Don't add it directly
+    to a svg or a group element.
+    usage:
+    ts=tspan('this text is bold')
+    ts.attributes['font-weight']='bold'
+    tr=tref('#linktotext')
+    tr.attributes['fill']='red'
+    st=spannedtext()
+    st.addtspan(ts)
+    st.addtref(tr)
+    st.addtext('This text is not bold')
+    t=text(3,5,st)
+    """
+    def __init__(self,textlist=None):
+        if textlist==None:
+            self.textlist=[]
+        else:
+            self.textlist=textlist
+    def addtext(self,text=''):
+        self.textlist.append(text)
+    def addtspan(self,tspan):
+        self.textlist.append(tspan)
+    def addtref(self,tref):
+        self.textlist.append(tref)
+    def __repr__(self):
+        s=""
+        for element in self.textlist:
+            s+=str(element)
+        return s
+class rect(SVGelement):
+    """r=rect(width,height,x,y,fill,stroke,stroke_width,**args)
+    a rectangle is defined by a width and height and a xy pair 
+    """
+    def __init__(self,x=None,y=None,width=None,height=None,fill=None,stroke=None,stroke_width=None,**args):
+        if width==None or height==None:
+            if width<>None:
+                raise ValueError, 'height is required'
+            if height<>None:
+                raise ValueError, 'width is required'
+            else:
+                raise ValueError, 'both height and width are required'
+        SVGelement.__init__(self,'rect',{'width':width,'height':height},**args)
+        if x<>None:
+            self.attributes['x']=x
+        if y<>None:
+            self.attributes['y']=y
+        if fill<>None:
+            self.attributes['fill']=fill
+        if stroke<>None:
+            self.attributes['stroke']=stroke
+        if stroke_width<>None:
+            self.attributes['stroke-width']=stroke_width
+class ellipse(SVGelement):
+    """e=ellipse(rx,ry,x,y,fill,stroke,stroke_width,**args)
+    an ellipse is defined as a center and a x and y radius.
+    """
+    def __init__(self,cx=None,cy=None,rx=None,ry=None,fill=None,stroke=None,stroke_width=None,**args):
+        if rx==None or ry== None:
+            if rx<>None:
+                raise ValueError, 'rx is required'
+            if ry<>None:
+                raise ValueError, 'ry is required'
+            else:
+                raise ValueError, 'both rx and ry are required'
+        SVGelement.__init__(self,'ellipse',{'rx':rx,'ry':ry},**args)
+        if cx<>None:
+            self.attributes['cx']=cx
+        if cy<>None:
+            self.attributes['cy']=cy
+        if fill<>None:
+            self.attributes['fill']=fill
+        if stroke<>None:
+            self.attributes['stroke']=stroke
+        if stroke_width<>None:
+            self.attributes['stroke-width']=stroke_width
+class circle(SVGelement):
+    """c=circle(x,y,radius,fill,stroke,stroke_width,**args)
+    The circle creates an element using a x, y and radius values eg
+    """
+    def __init__(self,cx=None,cy=None,r=None,fill=None,stroke=None,stroke_width=None,**args):
+        if r==None:
+            raise ValueError, 'r is required'
+        SVGelement.__init__(self,'circle',{'r':r},**args)
+        if cx<>None:
+            self.attributes['cx']=cx
+        if cy<>None:
+            self.attributes['cy']=cy
+        if fill<>None:
+            self.attributes['fill']=fill
+        if stroke<>None:
+            self.attributes['stroke']=stroke
+        if stroke_width<>None:
+            self.attributes['stroke-width']=stroke_width
+class point(circle):
+    """p=point(x,y,color)
+    A point is defined as a circle with a size 1 radius. It may be more efficient to use a
+    very small rectangle if you use many points because a circle is difficult to render.
+    """
+    def __init__(self,x,y,fill='black',**args):
+        circle.__init__(self,x,y,1,fill,**args)
+class line(SVGelement):
+    """l=line(x1,y1,x2,y2,stroke,stroke_width,**args)
+    A line is defined by a begin x,y pair and an end x,y pair
+    """
+    def __init__(self,x1=None,y1=None,x2=None,y2=None,stroke=None,stroke_width=None,**args):
+        SVGelement.__init__(self,'line',**args)
+        if x1<>None:
+            self.attributes['x1']=x1
+        if y1<>None:
+            self.attributes['y1']=y1
+        if x2<>None:
+            self.attributes['x2']=x2
+        if y2<>None:
+            self.attributes['y2']=y2
+        if stroke_width<>None:
+            self.attributes['stroke-width']=stroke_width
+        if stroke<>None:
+            self.attributes['stroke']=stroke
+class polyline(SVGelement):
+    """pl=polyline([[x1,y1],[x2,y2],...],fill,stroke,stroke_width,**args)
+    a polyline is defined by a list of xy pairs
+    """
+    def __init__(self,points,fill=None,stroke=None,stroke_width=None,**args):
+        SVGelement.__init__(self,'polyline',{'points':_xypointlist(points)},**args)
+        if fill<>None:
+            self.attributes['fill']=fill
+        if stroke_width<>None:
+            self.attributes['stroke-width']=stroke_width
+        if stroke<>None:
+            self.attributes['stroke']=stroke
+class polygon(SVGelement):
+    """pl=polyline([[x1,y1],[x2,y2],...],fill,stroke,stroke_width,**args)
+    a polygon is defined by a list of xy pairs
+    """
+    def __init__(self,points,fill=None,stroke=None,stroke_width=None,**args):
+        SVGelement.__init__(self,'polygon',{'points':_xypointlist(points)},**args)
+        if fill<>None:
+            self.attributes['fill']=fill
+        if stroke_width<>None:
+            self.attributes['stroke-width']=stroke_width
+        if stroke<>None:
+            self.attributes['stroke']=stroke
+class path(SVGelement):
+    """p=path(path,fill,stroke,stroke_width,**args)
+    a path is defined by a path object and optional width, stroke and fillcolor
+    """
+    def __init__(self,pathdata,fill=None,stroke=None,stroke_width=None,id=None,**args):
+        SVGelement.__init__(self,'path',{'d':str(pathdata)},**args)
+        if stroke<>None:
+            self.attributes['stroke']=stroke
+        if fill<>None:
+            self.attributes['fill']=fill
+        if stroke_width<>None:
+            self.attributes['stroke-width']=stroke_width
+        if id<>None:
+            self.attributes['id']=id
+class text(SVGelement):
+    """t=text(x,y,text,font_size,font_family,**args)
+    a text element can bge used for displaying text on the screen
+    """
+    def __init__(self,x=None,y=None,text=None,font_size=None,font_family=None,text_anchor=None,**args):
+        SVGelement.__init__(self,'text',**args)
+        if x<>None:
+            self.attributes['x']=x
+        if y<>None:
+            self.attributes['y']=y
+        if font_size<>None:
+            self.attributes['font-size']=font_size
+        if font_family<>None:
+            self.attributes['font-family']=font_family
+        if text<>None:
+            self.text=text
+        if text_anchor<>None:
+            self.attributes['text-anchor']=text_anchor
+    def toXml(self,level,f, **kwargs):
+        preserve = self.attributes.get("xml:space", None)
+        if preserve == "preserve":
+            #print "FOO PRE"
+            SVGelement.toXml(self,level, f, preserveWhitespace=True)
+        else:
+            #print "FOO NOT"
+            SVGelement.toXml(self, level, f, preserveWhitespace=False)
+class textpath(SVGelement):
+    """tp=textpath(text,link,**args)
+    a textpath places a text on a path which is referenced by a link.   
+    """
+    def __init__(self,link,text=None,**args):
+        SVGelement.__init__(self,'textPath',{'xlink:href':link},**args)
+        if text<>None:
+            self.text=text
+class pattern(SVGelement):
+    """p=pattern(x,y,width,height,patternUnits,**args)
+    A pattern is used to fill or stroke an object using a pre-defined
+    graphic object which can be replicated ("tiled") at fixed intervals
+    in x and y to cover the areas to be painted.
+    """
+    def __init__(self,x=None,y=None,width=None,height=None,patternUnits=None,**args):
+        SVGelement.__init__(self,'pattern',**args)
+        if x<>None:
+            self.attributes['x']=x
+        if y<>None:
+            self.attributes['y']=y
+        if width<>None:
+            self.attributes['width']=width
+        if height<>None:
+            self.attributes['height']=height
+        if patternUnits<>None:
+            self.attributes['patternUnits']=patternUnits
+class title(SVGelement):
+    """t=title(text,**args)
+    a title is a text element. The text is displayed in the title bar
+    add at least one to the root svg element
+    """
+    def __init__(self,text=None,**args):
+        SVGelement.__init__(self,'title',**args)
+        if text<>None:
+            self.text=text
+class description(SVGelement):
+    """d=description(text,**args)
+    a description can be added to any element and is used for a tooltip
+    Add this element before adding other elements.
+    """
+    def __init__(self,text=None,**args):
+        SVGelement.__init__(self,'desc',**args)
+        if text<>None:
+            self.text=text
+class lineargradient(SVGelement):
+    """lg=lineargradient(x1,y1,x2,y2,id,**args)
+    defines a lineargradient using two xy pairs.
+    stop elements van be added to define the gradient colors.
+    """
+    def __init__(self,x1=None,y1=None,x2=None,y2=None,id=None,**args):
+        SVGelement.__init__(self,'linearGradient',**args)
+        if x1<>None:
+            self.attributes['x1']=x1
+        if y1<>None:
+            self.attributes['y1']=y1
+        if x2<>None:
+            self.attributes['x2']=x2
+        if y2<>None:
+            self.attributes['y2']=y2
+        if id<>None:
+            self.attributes['id']=id
+class radialgradient(SVGelement):
+    """rg=radialgradient(cx,cy,r,fx,fy,id,**args)
+    defines a radial gradient using a outer circle which are defined by a cx,cy and r and by using a focalpoint.
+    stop elements van be added to define the gradient colors.
+    """
+    def __init__(self,cx=None,cy=None,r=None,fx=None,fy=None,id=None,**args):
+        SVGelement.__init__(self,'radialGradient',**args)
+        if cx<>None:
+            self.attributes['cx']=cx
+        if cy<>None:
+            self.attributes['cy']=cy
+        if r<>None:
+            self.attributes['r']=r
+        if fx<>None:
+            self.attributes['fx']=fx
+        if fy<>None:
+            self.attributes['fy']=fy
+        if id<>None:
+            self.attributes['id']=id
+class stop(SVGelement):
+    """st=stop(offset,stop_color,**args)
+    Puts a stop color at the specified radius
+    """
+    def __init__(self,offset,stop_color=None,**args):
+        SVGelement.__init__(self,'stop',{'offset':offset},**args)
+        if stop_color<>None:
+            self.attributes['stop-color']=stop_color
+class style(SVGelement):
+    """st=style(type,cdata=None,**args)
+    Add a CDATA element to this element for defing in line stylesheets etc..
+    """
+    def __init__(self,type,cdata=None,**args):
+        SVGelement.__init__(self,'style',{'type':type},cdata=cdata, **args)
+class image(SVGelement):
+    """im=image(url,width,height,x,y,**args)
+    adds an image to the drawing. Supported formats are .png, .jpg and .svg.
+    """
+    def __init__(self,url,x=None,y=None,width=None,height=None,**args):
+        if width==None or height==None:
+            if width<>None:
+                raise ValueError, 'height is required'
+            if height<>None:
+                raise ValueError, 'width is required'
+            else:
+                raise ValueError, 'both height and width are required'
+        SVGelement.__init__(self,'image',{'xlink:href':url,'width':width,'height':height},**args)
+        if x<>None:
+            self.attributes['x']=x
+        if y<>None:
+            self.attributes['y']=y
+class cursor(SVGelement):
+    """c=cursor(url,**args)
+    defines a custom cursor for a element or a drawing
+    """
+    def __init__(self,url,**args):
+        SVGelement.__init__(self,'cursor',{'xlink:href':url},**args)
+class marker(SVGelement):
+    """m=marker(id,viewbox,refX,refY,markerWidth,markerHeight,**args)
+    defines a marker which can be used as an endpoint for a line or other pathtypes
+    add an element to it which should be used as a marker.
+    """
+    def __init__(self,id=None,viewBox=None,refx=None,refy=None,markerWidth=None,markerHeight=None,**args):
+        SVGelement.__init__(self,'marker',**args)
+        if id<>None:
+            self.attributes['id']=id
+        if viewBox<>None:
+            self.attributes['viewBox']=_viewboxlist(viewBox)
+        if refx<>None:
+            self.attributes['refX']=refx
+        if refy<>None:
+            self.attributes['refY']=refy
+        if markerWidth<>None:
+            self.attributes['markerWidth']=markerWidth
+        if markerHeight<>None:
+            self.attributes['markerHeight']=markerHeight
+class group(SVGelement):
+    """g=group(id,**args)
+    a group is defined by an id and is used to contain elements
+    g.addElement(SVGelement)
+    """
+    def __init__(self,id=None,**args):
+        SVGelement.__init__(self,'g',**args)
+        if id<>None:
+            self.attributes['id']=id
+class symbol(SVGelement):
+    """sy=symbol(id,viewbox,**args)
+    defines a symbol which can be used on different places in your graph using
+    the use element. A symbol is not rendered but you can use 'use' elements to
+    display it by referencing its id.
+    sy.addElement(SVGelement)
+    """
+    def __init__(self,id=None,viewBox=None,**args):
+        SVGelement.__init__(self,'symbol',**args)
+        if id<>None:
+            self.attributes['id']=id
+        if viewBox<>None:
+            self.attributes['viewBox']=_viewboxlist(viewBox)
+class defs(SVGelement):
+    """d=defs(**args)
+    container for defining elements
+    """
+    def __init__(self,**args):
+        SVGelement.__init__(self,'defs',**args)
+class switch(SVGelement):
+    """sw=switch(**args)
+    Elements added to a switch element which are "switched" by the attributes
+    requiredFeatures, requiredExtensions and systemLanguage.
+    Refer to the SVG specification for details.
+    """
+    def __init__(self,**args):
+        SVGelement.__init__(self,'switch',**args)
+class use(SVGelement):
+    """u=use(link,x,y,width,height,**args)
+    references a symbol by linking to its id and its position, height and width
+    """
+    def __init__(self,link,x=None,y=None,width=None,height=None,**args):
+        SVGelement.__init__(self,'use',{'xlink:href':link},**args)
+        if x<>None:
+            self.attributes['x']=x
+        if y<>None:
+            self.attributes['y']=y
+        if width<>None:
+            self.attributes['width']=width
+        if height<>None:
+            self.attributes['height']=height
+class link(SVGelement):
+    """a=link(url,**args)
+    a link  is defined by a hyperlink. add elements which have to be linked
+    a.addElement(SVGelement)
+    """
+    def __init__(self,link='',**args):
+        SVGelement.__init__(self,'a',{'xlink:href':link},**args)
+class view(SVGelement):
+    """v=view(id,**args)
+    a view can be used to create a view with different attributes"""
+    def __init__(self,id=None,**args):
+        SVGelement.__init__(self,'view',**args)
+        if id<>None:
+            self.attributes['id']=id
+class script(SVGelement):
+    """sc=script(type,type,cdata,**args)
+    adds a script element which contains CDATA to the SVG drawing
+    """
+    def __init__(self,type,cdata=None,**args):
+        SVGelement.__init__(self,'script',{'type':type},cdata=cdata,**args)
+class animate(SVGelement):
+    """an=animate(attribute,from,to,during,**args)
+    animates an attribute.    
+    """
+    def __init__(self,attribute,fr=None,to=None,dur=None,**args):
+        SVGelement.__init__(self,'animate',{'attributeName':attribute},**args)
+        if fr<>None:
+            self.attributes['from']=fr
+        if to<>None:
+            self.attributes['to']=to
+        if dur<>None:
+            self.attributes['dur']=dur
+class animateMotion(SVGelement):
+    """an=animateMotion(pathdata,dur,**args)
+    animates a SVGelement over the given path in dur seconds
+    """
+    def __init__(self,pathdata,dur,**args):
+        SVGelement.__init__(self,'animateMotion',**args)
+        if pathdata<>None:
+            self.attributes['path']=str(pathdata)
+        if dur<>None:
+            self.attributes['dur']=dur
+class animateTransform(SVGelement):
+    """antr=animateTransform(type,from,to,dur,**args)
+    transform an element from and to a value.
+    """
+    def __init__(self,type=None,fr=None,to=None,dur=None,**args):
+        SVGelement.__init__(self,'animateTransform',{'attributeName':'transform'},**args)
+        #As far as I know the attributeName is always transform
+        if type<>None:
+            self.attributes['type']=type
+        if fr<>None:
+            self.attributes['from']=fr
+        if to<>None:
+            self.attributes['to']=to
+        if dur<>None:
+            self.attributes['dur']=dur
+class animateColor(SVGelement):
+    """ac=animateColor(attribute,type,from,to,dur,**args)
+    Animates the color of a element
+    """
+    def __init__(self,attribute,type=None,fr=None,to=None,dur=None,**args):
+        SVGelement.__init__(self,'animateColor',{'attributeName':attribute},**args)
+        if type<>None:
+            self.attributes['type']=type
+        if fr<>None:
+            self.attributes['from']=fr
+        if to<>None:
+            self.attributes['to']=to
+        if dur<>None:
+            self.attributes['dur']=dur        
+class set(SVGelement):
+    """st=set(attribute,to,during,**args)
+    sets an attribute to a value for a
+    """
+    def __init__(self,attribute,to=None,dur=None,**args):
+        SVGelement.__init__(self,'set',{'attributeName':attribute},**args)
+        if to<>None:
+            self.attributes['to']=to
+        if dur<>None:
+            self.attributes['dur']=dur
+class svg(SVGelement):
+    """s=svg(viewbox,width,height,**args)
+    a svg or element is the root of a drawing add all elements to a svg element.
+    You can have different svg elements in one svg file
+    s.addElement(SVGelement)
+    eg
+    d=drawing()
+    s=svg((0,0,100,100),'100%','100%')
+    c=circle(50,50,20)
+    s.addElement(c)
+    d.setSVG(s)
+    d.toXml()
+    """
+    def __init__(self,viewBox=None, width=None, height=None,**args):
+        SVGelement.__init__(self,'svg',**args)
+        if viewBox<>None:
+            self.attributes['viewBox']=_viewboxlist(viewBox)
+        if width<>None:
+            self.attributes['width']=width
+        if height<>None:
+            self.attributes['height']=height
+        self.namespace="http://www.w3.org/2000/svg"
+class drawing:
+    """d=drawing()
+    this is the actual SVG document. It needs a svg element as a root.
+    Use the addSVG method to set the svg to the root. Use the toXml method to write the SVG
+    source to the screen or to a file
+    d=drawing()
+    d.addSVG(svg)
+    d.toXml(optionalfilename)
+    """
+    def __init__(self):
+        self.svg=None
+    def setSVG(self,svg):
+        self.svg=svg
+        #Voeg een element toe aan de grafiek toe.
+    if use_dom_implementation==0:      
+        def toXml(self, filename='',compress=False):
+            import cStringIO
+            xml=cStringIO.StringIO()
+            xml.write('<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n')
+            xml.write("""<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
+                         "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" 
+                         [<!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">]>\n""")      
+            self.svg.toXml(0,xml)
+            if not filename:
+                if compress:
+                    import gzip
+                    f=cStringIO.StringIO()
+                    zf=gzip.GzipFile(fileobj=f,mode='wb')
+                    zf.write(xml.getvalue())
+                    zf.close()
+                    f.seek(0)
+                    return f.read()
+                else:
+                    return xml.getvalue()
+            else:
+                if filename[-4:]=='svgz':
+                    import gzip
+                    f=gzip.GzipFile(filename=filename,mode="wb", compresslevel=9)
+                    f.write(xml.getvalue())
+                    f.close()
+                else:
+                    f=file(filename,'w')
+                    f.write(xml.getvalue())
+                    f.close()
+    else:
+        def toXml(self,filename='',compress=False):
+            """drawing.toXml()        ---->to the screen
+            drawing.toXml(filename)---->to the file
+            writes a svg drawing to the screen or to a file
+            compresses if filename ends with svgz or if compress is true
+            """
+            doctype = implementation.createDocumentType('svg',"-//W3C//DTD SVG 1.0//EN""",'http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd ')
+            global root
+            #root is defined global so it can be used by the appender. Its also possible to use it as an arugument but
+            #that is a bit messy.
+            root=implementation.createDocument(None,None,doctype)
+            #Create the xml document.
+            global appender
+            def appender(element,elementroot):
+                """This recursive function appends elements to an element and sets the attributes
+                and type. It stops when alle elements have been appended"""
+                if element.namespace:
+                    e=root.createElementNS(element.namespace,element.type)
+                else:
+                    e=root.createElement(element.type)
+                if element.text:
+                    textnode=root.createTextNode(element.text)
+                    e.appendChild(textnode)
+                for attribute in element.attributes.keys():   #in element.attributes is supported from python 2.2
+                    e.setAttribute(attribute,str(element.attributes[attribute]))
+                if element.elements:
+                    for el in element.elements:
+                        e=appender(el,e)
+                elementroot.appendChild(e)
+                return elementroot
+            root=appender(self.svg,root)
+            if not filename:
+                import cStringIO
+                xml=cStringIO.StringIO()
+                PrettyPrint(root,xml)
+                if compress:
+                    import gzip
+                    f=cStringIO.StringIO()
+                    zf=gzip.GzipFile(fileobj=f,mode='wb')
+                    zf.write(xml.getvalue())
+                    zf.close()
+                    f.seek(0)
+                    return f.read()
+                else:
+                    return xml.getvalue()
+            else:
+                try:
+                    if filename[-4:]=='svgz':
+                        import gzip
+                        import cStringIO
+                        xml=cStringIO.StringIO()
+                        PrettyPrint(root,xml)
+                        f=gzip.GzipFile(filename=filename,mode='wb',compresslevel=9)
+                        f.write(xml.getvalue())
+                        f.close()
+                    else:
+                        f=open(filename,'w')
+                        PrettyPrint(root,f)
+                        f.close()
+                except:
+                    print "Cannot write SVG file: " + filename
+    def validate(self):
+        try:
+            import xml.parsers.xmlproc.xmlval
+        except:
+            raise exceptions.ImportError,'PyXml is required for validating SVG'
+        svg=self.toXml()
+        xv=xml.parsers.xmlproc.xmlval.XMLValidator()
+        try:
+            xv.feed(svg)
+        except:
+            raise "SVG is not well formed, see messages above"
+        else:
+            print "SVG well formed"
+if __name__=='__main__':
+    d=drawing()
+    s=svg((0,0,100,100))
+    r=rect(-100,-100,300,300,'cyan')
+    s.addElement(r)
+    t=title('SVGdraw Demo')
+    s.addElement(t)
+    g=group('animations')
+    e=ellipse(0,0,5,2)
+    g.addElement(e)
+    c=circle(0,0,1,'red')
+    g.addElement(c)
+    pd=pathdata(0,-10)
+    for i in range(6):
+        pd.relsmbezier(10,5,0,10)
+        pd.relsmbezier(-10,5,0,10)
+    an=animateMotion(pd,10)
+    an.attributes['rotate']='auto-reverse'
+    an.attributes['repeatCount']="indefinite"
+    g.addElement(an)
+    s.addElement(g)
+    for i in range(20,120,20):
+        u=use('#animations',i,0)
+        s.addElement(u)
+    for i in range(0,120,20):
+        for j in range(5,105,10):
+            c=circle(i,j,1,'red','black',.5)
+            s.addElement(c)
+    d.setSVG(s)
+    print d.toXml()
diff --git a/src/obitools/__init__.py b/src/obitools/__init__.py
new file mode 100644
index 0000000..e3f8c87
--- /dev/null
+++ b/src/obitools/__init__.py
@@ -0,0 +1,57 @@
+**obitools** main module
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+obitools module provides base class for sequence manipulation.
+All biological sequences must be subclass of :py:class:`obitools.BioSequence`.
+Some biological sequences are defined as transformation of other
+biological sequences. For example Reversed complemented sequences
+are a transformation of a :py:class:`obitools.NucSequence`. This particular
+type of sequences are subclasses of the :py:class:`obitools.WrappedBioSequence`.
+.. inheritance-diagram:: BioSequence NucSequence AASequence WrappedBioSequence SubSequence DNAComplementSequence
+        :parts: 1
+from _obitools import BioSequence,NucSequence,AASequence, \
+                      WrappedBioSequence,SubSequence, \
+                      DNAComplementSequence,_default_raw_parser, \
+                      _isNucSeq,bioSeqGenerator
+#    from functools import partial
+#    #
+#    # Add for compatibility purpose with Python < 2.5
+#    #
+#    def partial(func, *args, **keywords):
+#        def newfunc(*fargs, **fkeywords):
+#            newkeywords = keywords.copy()
+#            newkeywords.update(fkeywords)
+#            return func(*(args + fargs), **newkeywords)
+#        newfunc.func = func
+#        newfunc.args = args
+#        newfunc.keywords = keywords
+#        return newfunc
diff --git a/src/obitools/_obitools.h b/src/obitools/_obitools.h
new file mode 100644
index 0000000..d9f7958
--- /dev/null
+++ b/src/obitools/_obitools.h
@@ -0,0 +1,25 @@
+#ifndef __PYX_HAVE__obitools___obitools
+#define __PYX_HAVE__obitools___obitools
+#ifndef __PYX_HAVE_API__obitools___obitools
+#ifndef __PYX_EXTERN_C
+  #ifdef __cplusplus
+    #define __PYX_EXTERN_C extern "C"
+  #else
+    #define __PYX_EXTERN_C extern
+  #endif
+__PYX_EXTERN_C DL_IMPORT(PyObject) *__pyx_v_8obitools_9_obitools___default_raw_parser;
+#endif /* !__PYX_HAVE_API__obitools___obitools */
+PyMODINIT_FUNC init_obitools(void);
+PyMODINIT_FUNC PyInit__obitools(void);
+#endif /* !__PYX_HAVE__obitools___obitools */
diff --git a/src/obitools/_obitools.pxd b/src/obitools/_obitools.pxd
new file mode 100644
index 0000000..3fa7895
--- /dev/null
+++ b/src/obitools/_obitools.pxd
@@ -0,0 +1,109 @@
+cdef extern from "strings.h":
+     void *memchr(char *s, int c, int n)
+cdef public bytes __default_raw_parser = b" %s *= *([^;]*);"
+cdef class BioSequence(object):
+    cdef object __quality
+    cdef public bytes __seq
+    cdef public int   __len
+    cdef public bytes __rawinfo
+    cdef public dict  _info
+    cdef public bytes _rawparser
+    cdef public bytes _definition
+    cdef public bytes _id
+    cdef public bint _hasTaxid
+    cdef public object _wrappers
+    cdef public object word4table
+    cdef public int word4over
+    cpdef bytes get_seq(self)
+    cpdef set_seq(self, object value)
+    cpdef object clone(self)
+    cpdef bytes getDefinition(self)
+    cpdef setDefinition(self, bytes value)
+    cpdef bytes getId(self)
+    cpdef setId(self, bytes value)
+    cpdef bytes getStr(self)
+    cpdef getSymbolAt(self, int position)
+    cpdef object getSubSeq(self, object location)
+    cpdef object getKey(self, bytes key)
+    cpdef extractTaxon(self)
+    cpdef bint hasKey(self,bytes key)
+    cpdef list items(self)
+    cpdef list keys(self)
+    cpdef dict getTags(self)
+    cpdef object getRoot(self)
+    cpdef int _getTaxid(self)
+    cpdef _setTaxid(self,int taxid)
+    cpdef bytes _getRawInfo(self)
+cdef class NucSequence(BioSequence):
+    cpdef object complement(self)
+    cpdef bint isNucleotide(self)
+cdef class AASequence(BioSequence):
+    cpdef bint isNucleotide(self)
+cdef class WrappedBioSequence(BioSequence):
+    cdef object _wrapped
+    cdef object __weakref__
+    cpdef object clone(self)
+    cpdef object getWrapped(self)
+    cpdef bytes getDefinition(self)
+    cpdef setDefinition(self, bytes value)
+    cpdef bytes getId(self)
+    cpdef setId(self, bytes value)
+    cpdef bint isNucleotide(self)
+    cpdef object getKey(self,bytes key)
+    cpdef bint hasKey(self,bytes key)
+    cpdef getSymbolAt(self, int position)
+    cpdef int posInWrapped(self, int position, object reference=?   ) except *
+    cpdef  int _posInWrapped(self, int position) except *
+    cpdef bytes getStr(self)
+    cpdef object getRoot(self)
+    cpdef object complement(self)    
+    cpdef bytes _getRawInfo(self)
+cdef int _sign(int x)
+cdef class SubSequence(WrappedBioSequence):
+    cdef public object _location
+    cdef public object _indices
+    cdef public object _xrange
+    cpdef bytes getId(self)
+    cpdef setId(self, bytes value)
+    cpdef object clone(self)
+    cpdef bytes getStr(self)
+    cpdef int _posInWrapped(self, int position) except *
+cdef class DNAComplementSequence(WrappedBioSequence):
+    cdef dict _comp
+    cpdef bytes getId(self)
+    cpdef setId(self, bytes value)
+    cpdef bytes getStr(self)
+    cpdef int _posInWrapped(self, int position) except *
+    cpdef  getSymbolAt(self, int position)
+    cpdef object complement(self)
+cpdef bint _isNucSeq(bytes text)
+cdef object _bioSeqGenerator(bytes id,
+                             bytes seq,
+                             bytes definition,
+                             bytes rawinfo,
+                             bytes rawparser,
+                             dict info)
diff --git a/src/obitools/_obitools.pyx b/src/obitools/_obitools.pyx
new file mode 100644
index 0000000..55fc456
--- /dev/null
+++ b/src/obitools/_obitools.pyx
@@ -0,0 +1,800 @@
+# cython: profile=True
+from _obitools cimport *
+#from cython.parallel import parallel, prange
+from weakref import ref
+import re
+from itertools import chain
+import array
+from obitools.utils.iterator import uniqueChain
+from obitools.sequenceencoder import DNAComplementEncoder
+from obitools.location import Location
+__default_raw_parser = b" %s *= *([^;]*);"
+cdef class WrapperSetIterator(object):
+    def __init__(self,s):
+        self._i = set.__iter__(s)
+    def next(self):  # @ReservedAssignment
+        return self._i.next()()
+    def __iter__(self):
+        return self
+cdef class WrapperSet(set):
+    def __iter__(self):  # @DuplicatedSignature
+        return WrapperSetIterator(self)
+cdef class BioSequence(object):
+    '''
+    BioSequence class is the base class for biological
+    sequence representation.
+    It provides storage of :
+        - the sequence itself, 
+        - an identifier,
+        - a definition an manage 
+        - a set of complementary information on a key / value principle.
+    .. warning:: 
+            :py:class:`obitools.BioSequence` is an abstract class, this constructor
+            can only be called by a subclass constructor.
+    '''
+    def __init__(self,bytes id, bytes seq,  # @DuplicatedSignature
+                      bytes definition=None,
+                      bytes rawinfo=None,
+                      bytes rawparser=__default_raw_parser,**info):
+        '''        
+        :param id: sequence identifier
+        :type id:  `str`
+        :param seq: the sequence
+        :type seq:  `str`
+        :param definition: sequence definition (optional)
+        :type definition: `str`
+        :param rawinfo: a text containing a set of key=value; patterns
+        :type definition: `str`
+        :param rawparser: a text describing a regular patterns template 
+                          used to parse rawinfo
+        :type definition: `str`
+        :param info: extra named parameters can be added to associate complementary
+                     data to the sequence
+        '''
+        assert type(self)!=BioSequence,"obitools.BioSequence is an abstract class"
+        self._seq=seq
+        self._info = dict(info)
+        if rawinfo is not None:
+            self.__rawinfo=b' ' + rawinfo
+        else:
+            self.__rawinfo=None
+        self._rawparser=rawparser
+        self._definition=definition
+        self._id=id
+        self._hasTaxid=True
+        self.__quality=None
+        self.word4table=None
+        self.word4over=0
+    cpdef bytes get_seq(self):
+        return self.__seq
+    cpdef set_seq(self, object value):
+        cdef bytes s
+        if not isinstance(value, bytes):
+            s=bytes(value)
+        else:
+            s=value
+        self.__seq = s.lower()
+        self.__len = len(s)
+    cpdef object clone(self):
+        seq = type(self)(self.id,
+                         str(self),
+                         definition=self.definition
+                         )
+        seq._info=dict(self.getTags())
+        seq.__rawinfo=self.__rawinfo
+        seq._rawparser=self._rawparser
+        seq._hasTaxid=self._hasTaxid
+        return seq
+    cpdef bytes getDefinition(self):
+        '''
+        Sequence definition getter.
+        :return: the sequence definition
+        :rtype: str
+        '''
+        return self._definition
+    cpdef setDefinition(self, bytes value):
+        '''
+        Sequence definition setter.
+        :param value: the new sequence definition
+        :type value: C{str}
+        :return: C{None}
+        '''
+        self._definition = value
+    cpdef bytes getId(self):
+        '''
+        Sequence identifier getter
+        :return: the sequence identifier
+        :rtype: C{str}
+        '''
+        return self._id
+    cpdef setId(self, bytes value):
+        '''
+        Sequence identifier setter.
+        :param value: the new sequence identifier
+        :type value:  C{str}
+        :return: C{None}
+        '''
+        self._id = value
+    cpdef bytes getStr(self):
+        '''
+        Return the sequence as a string
+        :return: the string representation of the sequence
+        :rtype: str
+        '''
+        return self._seq
+    cpdef  getSymbolAt(self, int position):
+        '''
+        Return the symbole at C{position} in the sequence
+        :param position: the desired position. Position start from 0
+                         if position is < 0 then they are considered
+                         to reference the end of the sequence.
+        :type position: `int`
+        :return: a one letter string
+        :rtype: `str`
+        '''
+        return str(self)[position]
+    cpdef object getSubSeq(self, object location):
+        '''
+        return a subsequence as described by C{location}.
+        The C{location} parametter can be a L{obitools.location.Location} instance,
+        an interger or a python C{slice} instance. If C{location}
+        is an iterger this method is equivalent to L{getSymbolAt}.
+        :param location: the positions of the subsequence to return
+        :type location: C{Location} or C{int} or C{slice}
+        :return: the subsequence
+        :rtype: a single character as a C{str} is C{location} is an integer,
+                a L{obitools.SubSequence} instance otherwise.
+        '''
+        if isinstance(location,Location):
+            return location.extractSequence(self)
+        elif isinstance(location, int):
+            return self.getSymbolAt(location)
+        elif isinstance(location, slice):
+            return SubSequence(self,location)
+        raise TypeError,'key must be a Location, an integer or a slice'  
+    cpdef object getKey(self, bytes key):
+        if key not in self._info:
+            if self.__rawinfo is None:
+                if key==b'count':
+                    return 1
+                elif key==b'taxid' and self._hasTaxid:
+                    self.extractTaxon()
+                    return self._info['taxid']
+                else:
+                    raise KeyError,key
+            p = re.compile(self._rawparser % key)
+            m = p.search(self.__rawinfo)
+            if m is not None:
+                v=m.group(1)
+                self.__rawinfo=b' ' + self.__rawinfo[0:m.start(0)]+self.__rawinfo[m.end(0):]
+                try:
+                    v = eval(v)
+                except:
+                    pass
+                self._info[key]=v
+            else:
+                if key=='count':
+                    v=1
+                else:
+                    raise KeyError,key
+        else:
+            v=self._info[key]
+        return v
+    cpdef extractTaxon(self):
+        '''
+        Extract Taxonomy information from the sequence header.
+        This method by default return None. It should be subclassed
+        if necessary as in L{obitools.seqdb.AnnotatedSequence}.
+        :return: None
+        '''
+        self._hasTaxid=self.hasKey(b'taxid')
+        return None
+    def get(self,key,default):
+        try:
+            v = self.getKey(key)
+        except KeyError:
+            v=default
+            self[key]=v
+        return v 
+    def __str__(self):
+        return self.getStr()
+    def __getitem__(self,key):
+        if isinstance(key, bytes):
+            return self.getKey(key)
+        else:
+            return self.getSubSeq(key)
+    def __setitem__(self,key,value):
+        self.__contains__(key)
+        self._info[key]=value
+        if key=='taxid':
+            self._hasTaxid=value is not None
+    def __delitem__(self,key):
+        if isinstance(key, bytes):
+            if key in self:
+                del self._info[key]
+            else:
+                raise KeyError,key    
+            if key=='taxid':
+                self._hasTaxid=False
+        else:
+            raise TypeError,key
+    def __iter__(self):  # @DuplicatedSignature
+        '''
+        Iterate through the sequence symbols
+        '''
+        return iter(str(self))
+    def __len__(self):
+        return self.__len
+    cpdef bint hasKey(self,bytes key):
+        cdef bint rep
+        rep = key in self._info
+        if not rep and self.__rawinfo is not None:
+            p = re.compile(self._rawparser % key)
+            m = p.search(self.__rawinfo)
+            if m is not None:
+                v=m.group(1)
+                self.__rawinfo=b' ' + self.__rawinfo[0:m.start(0)]+self.__rawinfo[m.end(0):]
+                try:
+                    v = eval(v)
+                except:
+                    pass
+                self._info[key]=v
+                rep=True
+        return rep
+    def __contains__(self,key):
+        '''
+        methods allowing to use the C{in} operator on a C{BioSequence}.
+        The C{in} operator test if the C{key} value is defined for this
+        sequence.
+        :param key: the name of the checked value
+        :type key: str
+        :return: C{True} if the value is defined, {False} otherwise.
+        :rtype: C{bool}
+        '''
+        if key=='taxid' and self._hasTaxid is None:
+            self.extractTaxon()
+        return self.hasKey(key)
+    def rawiteritems(self):
+        return self.iteritems()
+    def iteritems(self):
+        '''
+        iterate other items dictionary storing the values
+        associated to the sequence. It works similarly to
+        the iteritems function of C{dict}.
+        :return: an iterator over the items (key,value)
+                 link to a sequence
+        :rtype: iterator over tuple
+        :see: L{items}
+        '''
+        if self.__rawinfo is not None:
+            p = re.compile(self._rawparser % "([a-zA-Z]\w*)")
+            for k,v in p.findall(self.__rawinfo):
+                try:
+                    self._info[k]=eval(v)
+                except:
+                    self._info[k]=v
+            self.__rawinfo=None
+        return self._info.iteritems()
+    cpdef list items(self):
+        return [x for x in self.iteritems()]
+    def iterkeys(self):
+        return (k for k,v in self.iteritems())
+    cpdef list keys(self):
+        return [x for x in self.iterkeys()]
+    cpdef dict getTags(self):
+        self.iteritems()
+        return self._info
+    cpdef object getRoot(self):
+        return self
+    def getWrappers(self):
+        if self._wrappers is None:
+            self._wrappers=WrapperSet()
+        return self._wrappers
+    def register(self,wrapper):
+        self.wrappers.add(ref(wrapper,self._unregister))
+    def _unregister(self,ref):
+        self.wrappers.remove(ref)
+    wrappers = property(getWrappers,None,None,'')
+    definition = property(getDefinition, setDefinition, None, "Sequence Definition")
+    id = property(getId, setId, None, 'Sequence identifier')
+    cpdef int _getTaxid(self):
+        return self['taxid']
+    cpdef _setTaxid(self,int taxid):
+        self['taxid']=taxid
+    cpdef bytes _getRawInfo(self):
+        return self.__rawinfo
+    _rawinfo = property(_getRawInfo)
+    taxid = property(_getTaxid,_setTaxid,None,'NCBI Taxonomy identifier')
+    _seq = property(get_seq, set_seq, None, None)
+    def _getQuality(self):
+        if self.__quality is None:
+            raise AttributeError
+        else:
+            return self.__quality
+    def _setQuality(self,qual):
+        self.__quality=qual
+    def _delQuality(self):
+        self.__quality=None
+    quality = property(_getQuality,_setQuality,_delQuality,'Quality associated to the sequence')
+cdef class NucSequence(BioSequence):
+    """
+    :py:class:`NucSequence` specialize the :py:class:`BioSequence` class for storing DNA
+    sequences. 
+    The constructor is identical to the :py:class:`BioSequence` constructor.
+    """
+    cpdef object complement(self):
+        """
+        :return: The reverse complemented sequence as an instance of :py:class:`DNAComplementSequence`
+        :rtype: :py:class:`DNAComplementSequence`
+        """
+        return DNAComplementSequence(self)
+    cpdef bint isNucleotide(self):
+        return True
+cdef class AASequence(BioSequence):
+    """
+    :py:class:`AASequence` specialize the :py:class:`BioSequence` class for storing protein
+    sequences. 
+    The constructor is identical to the :py:class:`BioSequence` constructor.
+    """
+    cpdef bint isNucleotide(self):
+        return False
+cdef class WrappedBioSequence(BioSequence):
+    """
+    .. warning:: 
+            :py:class:`obitools.WrappedBioSequence` is an abstract class, this constructor
+            can only be called by a subclass constructor.
+    """
+    def __init__(self, object reference,  # @DuplicatedSignature
+                       bytes id=None,
+                       bytes definition=None,
+                       **info):
+        assert type(self)!=WrappedBioSequence,"obitools.WrappedBioSequence is an abstract class"
+        self._wrapped = reference
+        reference.register(self)
+        self._id=id
+        self.definition=definition
+        self._info=info
+    cpdef object clone(self):
+        seq = type(self)(self.wrapped,
+                         id=self._id,
+                         definition=self._definition
+                         )
+        seq._info=dict(self._info)
+        return seq
+    cpdef object getWrapped(self):
+        return self._wrapped
+    cpdef bytes getDefinition(self):
+        d = self._definition or self.wrapped.definition
+        return d
+    cpdef setDefinition(self, bytes value):
+        '''
+        Sequence definition setter.
+        :param value: the new sequence definition
+        :type value: C{str}
+        :return: C{None}
+        '''
+        self._definition=value
+    cpdef bytes getId(self):
+        d = self._id or self.wrapped.id
+        return d
+    cpdef setId(self, bytes value):
+        '''
+        Sequence identifier setter.
+        :param value: the new sequence identifier
+        :type value:  C{str}
+        :return: C{None}
+        '''
+        self._id = value
+    cpdef bint isNucleotide(self):
+        return self.wrapped.isNucleotide()
+    def iterkeys(self):  # @DuplicatedSignature
+        return uniqueChain(self._info.iterkeys(),
+                               self.wrapped.iterkeys())
+    def rawiteritems(self):  # @DuplicatedSignature
+        return chain(self._info.iteritems(),
+                        (x for x in self.wrapped.rawiteritems()
+                         if x[0] not in self._info))
+    def iteritems(self):  # @DuplicatedSignature
+        for x in self.iterkeys():
+            yield (x,self[x])
+    cpdef object getKey(self,bytes key):
+        if key in self._info:
+            return self._info[key]
+        else:
+            return self.wrapped.getKey(key)
+    cpdef bint hasKey(self,bytes key):
+        return key in self._info or self.wrapped.hasKey(key)
+    cpdef  getSymbolAt(self, int position):
+        return self.wrapped.getSymbolAt(self.posInWrapped(position))
+    cpdef int posInWrapped(self, int position, object reference=None)  except *:
+        if reference is None or reference is self.wrapped:
+            return self._posInWrapped(position)
+        else:
+            return self.wrapped.posInWrapped(self._posInWrapped(position),reference)
+    cpdef bytes getStr(self):
+        return str(self.wrapped)
+    cpdef object getRoot(self):
+        return self.wrapped.getRoot()
+    cpdef object complement(self):
+        """
+        The :py:meth:`complement` method of the :py:class:`WrappedBioSequence` class 
+        raises an exception :py:exc:`AttributeError` if the method is called and the cut
+        sequence does not corresponds to a nucleic acid sequence.
+        """
+        if self.wrapped.isNucleotide():
+            return DNAComplementSequence(self)
+        raise AttributeError
+    cpdef int _posInWrapped(self, int position) except *:
+        return position
+    definition = property(getDefinition,setDefinition, None)
+    id = property(getId,setId, None)
+    wrapped = property(getWrapped, None, None, "A pointer to the wrapped sequence")
+    cpdef bytes _getRawInfo(self):
+        return self.wrapped.__rawinfo
+    _rawinfo = property(_getRawInfo)
+cdef int _sign(int x):
+    if x == 0:
+        return 0
+    elif x < 0:
+        return -1
+    return 1
+cdef class SubSequence(WrappedBioSequence):
+    """
+    """
+    def __init__(self, object reference,  # @DuplicatedSignature
+                       object location=None,
+                       int start=0, object stop=None,
+                       object id=None,
+                       object definition=None,
+                 **info):
+        WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info)
+        if isinstance(location, slice):
+            self._location = location
+        else:
+            step = 1
+            start = 0;
+            if not isinstance(stop,int):
+                stop = len(reference)
+            self._location=slice(start,stop,step)
+        self._indices=self._location.indices(len(self.wrapped))
+        self._xrange=xrange(*self._indices)
+        self._info['cut']='[%d,%d,%s]' % self._indices
+        if hasattr(reference,'quality'):
+            self.quality = reference.quality[self._location]
+    cpdef bytes getId(self):
+        d = self._id or ("%s_SUB" % self.wrapped.id)
+        return d
+    cpdef setId(self, bytes value):
+        '''
+        Sequence identifier setter.
+        :param value: the new sequence identifier
+        :type value:  C{str}
+        :return: C{None}
+        '''
+        WrappedBioSequence.setId(self,value)
+    cpdef object clone(self):
+        seq = WrappedBioSequence.clone(self)
+        seq._location=self._location
+        seq._indices=seq._location.indices(len(seq.wrapped))
+        seq._xrange=xrange(*seq._indices)
+        return seq
+    def __len__(self):  # @DuplicatedSignature
+        return len(self._xrange)
+    cpdef bytes getStr(self):
+        return b''.join([x for x in self])
+    def __iter__(self):  # @DuplicatedSignature
+        return (self.wrapped.getSymbolAt(x) for x in self._xrange)
+    cpdef int _posInWrapped(self, int position)  except *:
+        return self._xrange[position]
+    id = property(getId,setId, None)
+cdef dict _comp={b'a': b't', b'c': b'g', b'g': b'c', b't': b'a',
+                 b'r': b'y', b'y': b'r', b'k': b'm', b'm': b'k', 
+                 b's': b's', b'w': b'w', b'b': b'v', b'd': b'h', 
+                 b'h': b'd', b'v': b'b', b'n': b'n', b'u': b'a',
+                 b'-': b'-'}
+cdef class DNAComplementSequence(WrappedBioSequence):
+    """
+    Class used to represent a reverse complemented DNA sequence. Usually instances
+    of this class are produced by using the :py:meth:`NucSequence.complement` method.
+    """
+    def __init__(self, object reference,  # @DuplicatedSignature
+                       bytes id=None,
+                       bytes definition=None,
+                       **info):
+        WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info)
+        assert reference.isNucleotide()
+        self._info[b'complemented']=True
+        if hasattr(reference,'quality'):
+            self.quality = reference.quality[::-1]
+    cpdef bytes getId(self):
+        d = self._id or (b"%s_CMP" % self.wrapped.id)
+        return d
+    cpdef setId(self, bytes value):
+        '''
+        Sequence identifier setter.
+        :param value: the new sequence identifier
+        :type value:  C{str}
+        :return: C{None}
+        '''
+        WrappedBioSequence.setId(self,value)
+    def __len__(self):  # @DuplicatedSignature
+        return len(self._wrapped)
+    cpdef bytes getStr(self):
+        return b''.join([x for x in self])
+    def __iter__(self):  # @DuplicatedSignature
+        return (self.getSymbolAt(x) for x in xrange(len(self)))
+    cpdef int _posInWrapped(self, int position) except *:
+        return -(position+1)
+    cpdef  getSymbolAt(self, int position):
+        return _comp[self.wrapped.getSymbolAt(self.posInWrapped(position))]
+    cpdef object complement(self):
+        """
+        The :py:meth:`complement` method of the :py:class:`DNAComplementSequence` class actually
+        returns the wrapped sequenced. Effectively the reversed complemented sequence of a reversed
+        complemented sequence is the initial sequence.
+        """
+        return self.wrapped
+    id = property(getId,setId, None)
+cdef set _iupac=set([b'r', b'y', b'k', b'm', 
+                     b's', b'w', b'b', b'd', 
+                     b'h', b'v', b'n',
+                     b'R', b'Y', b'K', b'M', 
+                     b'S', b'W', b'B', b'D', 
+                     b'H', b'V', b'N'])
+#cdef char *_iupac=b"acgtrykmswbdhvnu-"
+cdef set _nuc = set([b'a', b'c', b'g', b't',b'u',b'A', b'C', b'G', b'T',b'U',b'-'])
+#cdef char *_nuc=b"acgt-"
+cpdef bint _isNucSeq(bytes text):
+    cdef int acgt
+    cdef int notnuc
+    cdef int ltot,lltot
+    cdef int  i
+    acgt   = 0
+    notnuc = 0
+    lltot  = len(text)
+    ltot   = lltot * 4 / 5
+    for c in text:
+        if c in _nuc:
+            acgt+=1
+        elif c not in _iupac:
+            notnuc+=1
+    return notnuc==0 and acgt > ltot
+cdef object _bioSeqGenerator(bytes id,
+                             bytes seq,
+                             bytes definition,
+                             bytes rawinfo,
+                             bytes rawparser,
+                             dict info):
+    if _isNucSeq(seq):
+        return NucSequence(id,seq,definition,rawinfo,rawparser,**info)
+    else:
+        return AASequence(id,seq,definition,rawinfo,rawparser,**info)
+def  bioSeqGenerator(bytes id,
+                     bytes seq,
+                     bytes definition=None,
+                     bytes rawinfo=None,
+                     bytes rawparser=__default_raw_parser,
+                     **info):
+    """
+    Generate automagically the good class instance between :
+        - :py:class:`NucSequence`
+        - :py:class:`AASequence`
+    Build a new sequence instance. Sequences are instancied as :py:class:`NucSequence` if the
+    `seq` attribute contains more than 80% of *A*, *C*, *G*, *T* or *-* symbols 
+    in upper or lower cases. Conversely, the new sequence instance is instancied as 
+    :py:class:`AASequence`.
+    :param id: sequence identifier
+    :type id:  `str`
+    :param seq: the sequence
+    :type seq:  `str`
+    :param definition: sequence definition (optional)
+    :type definition: `str`
+    :param rawinfo: a text containing a set of key=value; patterns
+    :type definition: `str`
+    :param rawparser: a text describing a regular patterns template 
+                      used to parse rawinfo
+    :type definition: `str`
+    :param info: extra named parameters can be added to associate complementary
+                 data to the sequence
+    """
+    return _bioSeqGenerator(id,seq,definition,rawinfo,rawparser,info)
diff --git a/src/obitools/align/__init__.py b/src/obitools/align/__init__.py
new file mode 100644
index 0000000..1e3e8d6
--- /dev/null
+++ b/src/obitools/align/__init__.py
@@ -0,0 +1,15 @@
+from _nws import NWS
+from _upperbond import indexSequences
+from _lcs import LCS,lenlcs,ALILEN,MAXLEN,MINLEN
+from _assemble import DirectAssemble, ReverseAssemble
+from _qsassemble import QSolexaDirectAssemble,QSolexaReverseAssemble 
+from _rassemble import RightDirectAssemble as RightReverseAssemble
+from _qsrassemble import QSolexaRightDirectAssemble,QSolexaRightReverseAssemble 
+from _freeendgap import FreeEndGap
+from _freeendgapfm import FreeEndGapFullMatch
+from _upperbond import isLCSReachable
+from _codonnws import CodonNWS
diff --git a/src/obitools/align/_assemble.pxd b/src/obitools/align/_assemble.pxd
new file mode 100644
index 0000000..2e4359f
--- /dev/null
+++ b/src/obitools/align/_assemble.pxd
@@ -0,0 +1,10 @@
+from _nws cimport *  
+cdef class DirectAssemble(NWS):
+    cdef double ysmax
+    cdef int    ymax
+    cdef double doAlignment(self) except? 0
+cdef class ReverseAssemble(DirectAssemble):    
+    pass
\ No newline at end of file
diff --git a/src/obitools/align/_assemble.pyx b/src/obitools/align/_assemble.pyx
new file mode 100644
index 0000000..849cd0a
--- /dev/null
+++ b/src/obitools/align/_assemble.pyx
@@ -0,0 +1,169 @@
+Created on 6 Nov. 2009
+ at author: coissac
+from _assemble cimport *  
+cdef class DirectAssemble(NWS):
+    def __init__(self,match=4,mismatch=-6,opengap=-8,extgap=-2):
+        NWS.__init__(self,match,mismatch,opengap,extgap)
+        self.ysmax=0
+        self.ymax=0
+    cdef double doAlignment(self) except? 0:
+        cdef int i  # vertical index
+        cdef int j  # horizontal index
+        cdef int idx
+        cdef int idx0
+        cdef int idx1
+        cdef int jump
+        cdef int delta
+        cdef double score
+        cdef double scoremax
+        cdef int    path
+        if self.needToCompute:
+            self.allocate()
+            self.reset()
+            self.ysmax=0
+            self.ymax=0
+            for j in range(1,self.hSeq.length+1):
+                idx = self.index(j,0)
+                self.matrix.matrix[idx].score = 0
+                self.matrix.matrix[idx].path  = j
+            for i in range(1,self.vSeq.length+1):
+                idx = self.index(0,i)
+                self.matrix.matrix[idx].score = self._opengap + (self._extgap * (i-1))
+                self.matrix.matrix[idx].path  = -i
+            idx0=self.index(-1,0)
+            idx1=self.index(0,1)
+            for i in range(1,self.vSeq.length+1):
+                idx0+=1
+                idx1+=1
+                for j in range(1,self.hSeq.length+1):
+                    # 1 - came from diagonal
+                    #idx = self.index(j-1,i-1)
+                    idx = idx0
+                    # print "computing cell : %d,%d --> %d/%d" % (j,i,self.index(j,i),self.matrix.msize),
+                    scoremax = self.matrix.matrix[idx].score + \
+                               self.matchScore(j,i)
+                    path = 0
+                    # print "so=%f sd=%f sm=%f" % (self.matrix.matrix[idx].score,self.matchScore(j,i),scoremax),
+                    # 2 - open horizontal gap
+                    # idx = self.index(j-1,i)
+                    idx = idx1 - 1
+                    score = self.matrix.matrix[idx].score+ \
+                            self._opengap
+                    if score > scoremax : 
+                        scoremax = score
+                        path = +1
+                    # 3 - open vertical gap
+                    # idx = self.index(j,i-1)
+                    idx = idx0 + 1
+                    score = self.matrix.matrix[idx].score + \
+                            self._opengap
+                    if score > scoremax : 
+                        scoremax = score
+                        path = -1
+                    # 4 - extend horizontal gap
+                    jump = self.matrix.bestHJump[i]
+                    if jump >= 0:
+                        idx = self.index(jump,i)
+                        delta = j-jump
+                        score = self.matrix.matrix[idx].score + \
+                                self._extgap * delta
+                        if score > scoremax :
+                            scoremax = score
+                            path = delta+1 
+                    # 5 - extend vertical gap
+                    jump = self.matrix.bestVJump[j]
+                    if jump >= 0:
+                        idx = self.index(j,jump)
+                        delta = i-jump
+                        score = self.matrix.matrix[idx].score + \
+                                self._extgap * delta
+                        if score > scoremax :
+                            scoremax = score
+                            path = -delta-1 
+                    # idx = self.index(j,i)
+                    idx = idx1
+                    self.matrix.matrix[idx].score = scoremax
+                    self.matrix.matrix[idx].path  = path 
+                    if path == -1:
+                        self.matrix.bestVJump[j]=i
+                    elif path == +1 :
+                        self.matrix.bestHJump[i]=j
+                    if j==self.hSeq.length and scoremax > self.ysmax:
+                        self.ysmax=scoremax
+                        self.ymax=i
+                    idx0+=1
+                    idx1+=1
+        self.sequenceChanged=False
+        self.scoreChanged=False
+        return self.ysmax
+    cdef void backtrack(self):
+        #cdef list path=[]
+        cdef int i
+        cdef int j 
+        cdef int p
+        self.doAlignment()
+        i=self.ymax
+        j=self.hSeq.length
+        self.path=allocatePath(i,j+1,self.path)
+        if self.ymax<self.vSeq.length:
+            self.path.path[self.path.length]=self.ymax-self.vSeq.length
+            self.path.length+=1
+        while (i or j):
+            p=self.matrix.matrix[self.index(j,i)].path
+            self.path.path[self.path.length]=p
+            self.path.length+=1
+            #path.append(p)
+            if p==0:
+                i-=1
+                j-=1
+            elif p < 0:
+                i+=p
+            else:
+                j-=p
+        #path.reverse()
+        #reversePath(self.path)
+        self.path.hStart=0
+        self.path.vStart=0
+        #return 0,0,path
+cdef class ReverseAssemble(DirectAssemble):    
+    property seqB:
+            def __get__(self):
+                return self.verticalSeq.wrapped
+            def __set__(self, seq):
+                self.sequenceChanged=True
+                self.verticalSeq=seq.complement()
+                self.vSeq=allocateSequence(self.verticalSeq,self.vSeq)
diff --git a/src/obitools/align/_codonnws.pxd b/src/obitools/align/_codonnws.pxd
new file mode 100644
index 0000000..ddd338c
--- /dev/null
+++ b/src/obitools/align/_codonnws.pxd
@@ -0,0 +1,15 @@
+from _nws cimport *
+cdef class CodonNWS(NWS):
+    #cdef double* _aamatrix
+    cdef int _phasedA
+    cdef int _phasedB
+    cdef double matchCodon(self,int h, int v)
+    cdef double doAlignment(self) except? 0
+    cdef void backtrack(self)
+    cdef inline int colindex(self, int idx)        
+    cdef inline int rowindex(self, int idx)
diff --git a/src/obitools/align/_codonnws.pyx b/src/obitools/align/_codonnws.pyx
new file mode 100644
index 0000000..f8dec6b
--- /dev/null
+++ b/src/obitools/align/_codonnws.pyx
@@ -0,0 +1,1589 @@
+Created on 6 Nov. 2009
+ at author: coissac
+from _codonnws cimport * 
+#TODO: change functions for translation and BLOSUM scores
+#Standard genetic code
+#  name "Standard" ,
+#  name "SGC0" ,
+#  id 1 ,
+#  sncbieaa "---M---------------M---------------M----------------------------"
+#TODO : fonction completement cablee en dure a changer vite !
+cdef char _translate(char c1, char c2, char c3):
+    if c1=='a':
+        if c2=='a':
+            if c3=='a':
+                return 'k'
+            elif c3=='c':
+                return 'n'
+            elif c3=='g':
+                return 'k'
+            elif c3=='t':
+                return 'n'
+        elif c2=='c':
+            if c3=='a':
+                return 't'
+            elif c3=='c':
+                return 't'
+            elif c3=='g':
+                return 't'
+            elif c3=='t':
+                return 't'
+        elif c2=='g':
+            if c3=='a':
+                return 'r'
+            elif c3=='c':
+                return 's'
+            elif c3=='g':
+                return 'r'
+            elif c3=='t':
+                return 's'
+        elif c2=='t':
+            if c3=='a':
+                return 'i'
+            elif c3=='c':
+                return 'i'
+            elif c3=='g':
+                return 'm'
+            elif c3=='t':
+                return 'i'
+    elif c1=='c':
+        if c2=='a':
+            if c3=='a':
+                return 'q'
+            elif c3=='c':
+                return 'h'
+            elif c3=='g':
+                return 'q'
+            elif c3=='t':
+                return 'h'
+        elif c2=='c':
+            if c3=='a':
+                return 'p'
+            elif c3=='c':
+                return 'p'
+            elif c3=='g':
+                return 'p'
+            elif c3=='t':
+                return 'p'
+        elif c2=='g':
+            if c3=='a':
+                return 'r'
+            elif c3=='c':
+                return 'r'
+            elif c3=='g':
+                return 'r'
+            elif c3=='t':
+                return 'r'
+        elif c2=='g':
+            if c3=='a':
+                return 'l'
+            elif c3=='c':
+                return 'l'
+            elif c3=='g':
+                return 'l'
+            elif c3=='t':
+                return 'l'
+    elif c1=='g':
+        if c2=='a':
+            if c3=='a':
+                return 'e'
+            elif c3=='c':
+                return 'd'
+            elif c3=='g':
+                return 'e'
+            elif c3=='t':
+                return 'd'
+        elif c2=='c':
+            if c3=='a':
+                return 'a'
+            elif c3=='c':
+                return 'a'
+            elif c3=='g':
+                return 'a'
+            elif c3=='t':
+                return 'a'
+        elif c2=='g':
+            if c3=='a':
+                return 'g'
+            elif c3=='c':
+                return 'g'
+            elif c3=='g':
+                return 'g'
+            elif c3=='t':
+                return 'g'
+        elif c2=='t':
+            if c3=='a':
+                return 'v'
+            elif c3=='c':
+                return 'v'
+            elif c3=='g':
+                return 'v'
+            elif c3=='t':
+                return 'v'
+    elif c1=='t':
+        if c2=='a':
+            if c3=='a':
+                return '*'
+            elif c3=='c':
+                return 'y'
+            elif c3=='g':
+                return '*'
+            elif c3=='t':
+                return 'y'
+        elif c2=='c':
+            if c3=='a':
+                return 's'
+            elif c3=='c':
+                return 's'
+            elif c3=='g':
+                return 's'
+            elif c3=='t':
+                return 's'
+        elif c2=='g':
+            if c3=='a':
+                return '*'
+            elif c3=='c':
+                return 'c'
+            elif c3=='g':
+                return 'w'
+            elif c3=='t':
+                return 'c'
+        elif c2=='t':
+            if c3=='a':
+                return 'l'
+            elif c3=='c':
+                return 'f'
+            elif c3=='g':
+                return 'l'
+            elif c3=='t':
+                return 'f'
+    return '*'
+##  Matrix made by matblas from blosum62.iij
+##  * column uses minimum score
+##  BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
+##  Blocks Database = /data/blocks_5.0/blocks.dat
+##  Cluster Percentage: >= 62
+##  Entropy =   0.6979, Expected =  -0.5209
+#   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  *
+#A  4 -1 -2 -2  0 -1 -1  0 -2 -1 -1 -1 -1 -2 -1  1  0 -3 -2  0 -2 -1  0 -4 
+#R -1  5  0 -2 -3  1  0 -2  0 -3 -2  2 -1 -3 -2 -1 -1 -3 -2 -3 -1  0 -1 -4 
+#N -2  0  6  1 -3  0  0  0  1 -3 -3  0 -2 -3 -2  1  0 -4 -2 -3  3  0 -1 -4 
+#D -2 -2  1  6 -3  0  2 -1 -1 -3 -4 -1 -3 -3 -1  0 -1 -4 -3 -3  4  1 -1 -4 
+#C  0 -3 -3 -3  9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 
+#Q -1  1  0  0 -3  5  2 -2  0 -3 -2  1  0 -3 -1  0 -1 -2 -1 -2  0  3 -1 -4 
+#E -1  0  0  2 -4  2  5 -2  0 -3 -3  1 -2 -3 -1  0 -1 -3 -2 -2  1  4 -1 -4 
+#G  0 -2  0 -1 -3 -2 -2  6 -2 -4 -4 -2 -3 -3 -2  0 -2 -2 -3 -3 -1 -2 -1 -4 
+#H -2  0  1 -1 -3  0  0 -2  8 -3 -3 -1 -2 -1 -2 -1 -2 -2  2 -3  0  0 -1 -4 
+#I -1 -3 -3 -3 -1 -3 -3 -4 -3  4  2 -3  1  0 -3 -2 -1 -3 -1  3 -3 -3 -1 -4 
+#L -1 -2 -3 -4 -1 -2 -3 -4 -3  2  4 -2  2  0 -3 -2 -1 -2 -1  1 -4 -3 -1 -4 
+#K -1  2  0 -1 -3  1  1 -2 -1 -3 -2  5 -1 -3 -1  0 -1 -3 -2 -2  0  1 -1 -4 
+#M -1 -1 -2 -3 -1  0 -2 -3 -2  1  2 -1  5  0 -2 -1 -1 -1 -1  1 -3 -1 -1 -4 
+#F -2 -3 -3 -3 -2 -3 -3 -3 -1  0  0 -3  0  6 -4 -2 -2  1  3 -1 -3 -3 -1 -4 
+#P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4  7 -1 -1 -4 -3 -2 -2 -1 -2 -4 
+#S  1 -1  1  0 -1  0  0  0 -1 -2 -2  0 -1 -2 -1  4  1 -3 -2 -2  0  0  0 -4 
+#T  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1  1  5 -2 -2  0 -1 -1  0 -4 
+#W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1  1 -4 -3 -2 11  2 -3 -4 -3 -2 -4 
+#Y -2 -2 -2 -3 -2 -1 -2 -3  2 -1 -1 -2 -1  3 -3 -2 -2  2  7 -1 -3 -2 -1 -4 
+#V  0 -3 -3 -3 -1 -2 -2 -3 -3  3  1 -2  1 -1 -2 -2  0 -3 -1  4 -3 -2 -1 -4 
+#B -2 -1  3  4 -3  0  1 -1  0 -3 -4  0 -3 -3 -2  0 -1 -4 -3 -3  4  1 -1 -4 
+#Z -1  0  0  1 -3  3  4 -2  0 -3 -3  1 -1 -3 -1  0 -1 -3 -2 -2  1  4 -1 -4 
+#X  0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2  0  0 -2 -1 -1 -1 -1 -1 -4 
+#* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4  1 
+cdef double _blosum62(char c1_1, char c1_2, char c1_3, char c2_1, char c2_2, char c2_3):
+    cdef char aa1 = _translate(c1_1, c1_2, c1_3)
+    cdef char aa2 = _translate(c2_1, c2_2, c2_3)
+    if aa1=="a" and aa2=="a":
+        return 4
+    if aa1=="a" and aa2=="r":
+        return -1
+    if aa1=="a" and aa2=="n":
+        return -2
+    if aa1=="a" and aa2=="d":
+        return -2
+    if aa1=="a" and aa2=="c":
+        return 0
+    if aa1=="a" and aa2=="q":
+        return -1
+    if aa1=="a" and aa2=="e":
+        return -1
+    if aa1=="a" and aa2=="g":
+        return 0
+    if aa1=="a" and aa2=="h":
+        return -2
+    if aa1=="a" and aa2=="i":
+        return -1
+    if aa1=="a" and aa2=="l":
+        return -1
+    if aa1=="a" and aa2=="k":
+        return -1
+    if aa1=="a" and aa2=="m":
+        return -1
+    if aa1=="a" and aa2=="f":
+        return -2
+    if aa1=="a" and aa2=="p":
+        return -1
+    if aa1=="a" and aa2=="s":
+        return 1
+    if aa1=="a" and aa2=="t":
+        return 0
+    if aa1=="a" and aa2=="w":
+        return -3
+    if aa1=="a" and aa2=="y":
+        return -2
+    if aa1=="a" and aa2=="v":
+        return 0
+    if aa1=="a" and aa2=="b":
+        return -2
+    if aa1=="a" and aa2=="z":
+        return -1
+    if aa1=="a" and aa2=="x":
+        return 0
+    if aa1=="a" and aa2=="*":
+        return -4
+    if aa1=="r" and aa2=="a":
+        return -1
+    if aa1=="r" and aa2=="r":
+        return 5
+    if aa1=="r" and aa2=="n":
+        return 0
+    if aa1=="r" and aa2=="d":
+        return -2
+    if aa1=="r" and aa2=="c":
+        return -3
+    if aa1=="r" and aa2=="q":
+        return 1
+    if aa1=="r" and aa2=="e":
+        return 0
+    if aa1=="r" and aa2=="g":
+        return -2
+    if aa1=="r" and aa2=="h":
+        return 0
+    if aa1=="r" and aa2=="i":
+        return -3
+    if aa1=="r" and aa2=="l":
+        return -2
+    if aa1=="r" and aa2=="k":
+        return 2
+    if aa1=="r" and aa2=="m":
+        return -1
+    if aa1=="r" and aa2=="f":
+        return -3
+    if aa1=="r" and aa2=="p":
+        return -2
+    if aa1=="r" and aa2=="s":
+        return -1
+    if aa1=="r" and aa2=="t":
+        return -1
+    if aa1=="r" and aa2=="w":
+        return -3
+    if aa1=="r" and aa2=="y":
+        return -2
+    if aa1=="r" and aa2=="v":
+        return -3
+    if aa1=="r" and aa2=="b":
+        return -1
+    if aa1=="r" and aa2=="z":
+        return 0
+    if aa1=="r" and aa2=="x":
+        return -1
+    if aa1=="r" and aa2=="*":
+        return -4
+    if aa1=="n" and aa2=="a":
+        return -2
+    if aa1=="n" and aa2=="r":
+        return 0
+    if aa1=="n" and aa2=="n":
+        return 6
+    if aa1=="n" and aa2=="d":
+        return 1
+    if aa1=="n" and aa2=="c":
+        return -3
+    if aa1=="n" and aa2=="q":
+        return 0
+    if aa1=="n" and aa2=="e":
+        return 0
+    if aa1=="n" and aa2=="g":
+        return 0
+    if aa1=="n" and aa2=="h":
+        return 1
+    if aa1=="n" and aa2=="i":
+        return -3
+    if aa1=="n" and aa2=="l":
+        return -3
+    if aa1=="n" and aa2=="k":
+        return 0
+    if aa1=="n" and aa2=="m":
+        return -2
+    if aa1=="n" and aa2=="f":
+        return -3
+    if aa1=="n" and aa2=="p":
+        return -2
+    if aa1=="n" and aa2=="s":
+        return 1
+    if aa1=="n" and aa2=="t":
+        return 0
+    if aa1=="n" and aa2=="w":
+        return -4
+    if aa1=="n" and aa2=="y":
+        return -2
+    if aa1=="n" and aa2=="v":
+        return -3
+    if aa1=="n" and aa2=="b":
+        return 3
+    if aa1=="n" and aa2=="z":
+        return 0
+    if aa1=="n" and aa2=="x":
+        return -1
+    if aa1=="n" and aa2=="*":
+        return -4
+    if aa1=="d" and aa2=="a":
+        return -2
+    if aa1=="d" and aa2=="r":
+        return -2
+    if aa1=="d" and aa2=="n":
+        return 1
+    if aa1=="d" and aa2=="d":
+        return 6
+    if aa1=="d" and aa2=="c":
+        return -3
+    if aa1=="d" and aa2=="q":
+        return 0
+    if aa1=="d" and aa2=="e":
+        return 2
+    if aa1=="d" and aa2=="g":
+        return -1
+    if aa1=="d" and aa2=="h":
+        return -1
+    if aa1=="d" and aa2=="i":
+        return -3
+    if aa1=="d" and aa2=="l":
+        return -4
+    if aa1=="d" and aa2=="k":
+        return -1
+    if aa1=="d" and aa2=="m":
+        return -3
+    if aa1=="d" and aa2=="f":
+        return -3
+    if aa1=="d" and aa2=="p":
+        return -1
+    if aa1=="d" and aa2=="s":
+        return 0
+    if aa1=="d" and aa2=="t":
+        return -1
+    if aa1=="d" and aa2=="w":
+        return -4
+    if aa1=="d" and aa2=="y":
+        return -3
+    if aa1=="d" and aa2=="v":
+        return -3
+    if aa1=="d" and aa2=="b":
+        return 4
+    if aa1=="d" and aa2=="z":
+        return 1
+    if aa1=="d" and aa2=="x":
+        return -1
+    if aa1=="d" and aa2=="*":
+        return -4
+    if aa1=="c" and aa2=="a":
+        return 0
+    if aa1=="c" and aa2=="r":
+        return -3
+    if aa1=="c" and aa2=="n":
+        return -3
+    if aa1=="c" and aa2=="d":
+        return -3
+    if aa1=="c" and aa2=="c":
+        return 9
+    if aa1=="c" and aa2=="q":
+        return -3
+    if aa1=="c" and aa2=="e":
+        return -4
+    if aa1=="c" and aa2=="g":
+        return -3
+    if aa1=="c" and aa2=="h":
+        return -3
+    if aa1=="c" and aa2=="i":
+        return -1
+    if aa1=="c" and aa2=="l":
+        return -1
+    if aa1=="c" and aa2=="k":
+        return -3
+    if aa1=="c" and aa2=="m":
+        return -1
+    if aa1=="c" and aa2=="f":
+        return -2
+    if aa1=="c" and aa2=="p":
+        return -3
+    if aa1=="c" and aa2=="s":
+        return -1
+    if aa1=="c" and aa2=="t":
+        return -1
+    if aa1=="c" and aa2=="w":
+        return -2
+    if aa1=="c" and aa2=="y":
+        return -2
+    if aa1=="c" and aa2=="v":
+        return -1
+    if aa1=="c" and aa2=="b":
+        return -3
+    if aa1=="c" and aa2=="z":
+        return -3
+    if aa1=="c" and aa2=="x":
+        return -2
+    if aa1=="c" and aa2=="*":
+        return -4
+    if aa1=="q" and aa2=="a":
+        return -1
+    if aa1=="q" and aa2=="r":
+        return 1
+    if aa1=="q" and aa2=="n":
+        return 0
+    if aa1=="q" and aa2=="d":
+        return 0
+    if aa1=="q" and aa2=="c":
+        return -3
+    if aa1=="q" and aa2=="q":
+        return 5
+    if aa1=="q" and aa2=="e":
+        return 2
+    if aa1=="q" and aa2=="g":
+        return -2
+    if aa1=="q" and aa2=="h":
+        return 0
+    if aa1=="q" and aa2=="i":
+        return -3
+    if aa1=="q" and aa2=="l":
+        return -2
+    if aa1=="q" and aa2=="k":
+        return 1
+    if aa1=="q" and aa2=="m":
+        return 0
+    if aa1=="q" and aa2=="f":
+        return -3
+    if aa1=="q" and aa2=="p":
+        return -1
+    if aa1=="q" and aa2=="s":
+        return 0
+    if aa1=="q" and aa2=="t":
+        return -1
+    if aa1=="q" and aa2=="w":
+        return -2
+    if aa1=="q" and aa2=="y":
+        return -1
+    if aa1=="q" and aa2=="v":
+        return -2
+    if aa1=="q" and aa2=="b":
+        return 0
+    if aa1=="q" and aa2=="z":
+        return 3
+    if aa1=="q" and aa2=="x":
+        return -1
+    if aa1=="q" and aa2=="*":
+        return -4
+    if aa1=="e" and aa2=="a":
+        return -1
+    if aa1=="e" and aa2=="r":
+        return 0
+    if aa1=="e" and aa2=="n":
+        return 0
+    if aa1=="e" and aa2=="d":
+        return 2
+    if aa1=="e" and aa2=="c":
+        return -4
+    if aa1=="e" and aa2=="q":
+        return 2
+    if aa1=="e" and aa2=="e":
+        return 5
+    if aa1=="e" and aa2=="g":
+        return -2
+    if aa1=="e" and aa2=="h":
+        return 0
+    if aa1=="e" and aa2=="i":
+        return -3
+    if aa1=="e" and aa2=="l":
+        return -3
+    if aa1=="e" and aa2=="k":
+        return 1
+    if aa1=="e" and aa2=="m":
+        return -2
+    if aa1=="e" and aa2=="f":
+        return -3
+    if aa1=="e" and aa2=="p":
+        return -1
+    if aa1=="e" and aa2=="s":
+        return 0
+    if aa1=="e" and aa2=="t":
+        return -1
+    if aa1=="e" and aa2=="w":
+        return -3
+    if aa1=="e" and aa2=="y":
+        return -2
+    if aa1=="e" and aa2=="v":
+        return -2
+    if aa1=="e" and aa2=="b":
+        return 1
+    if aa1=="e" and aa2=="z":
+        return 4
+    if aa1=="e" and aa2=="x":
+        return -1
+    if aa1=="e" and aa2=="*":
+        return -4
+    if aa1=="g" and aa2=="a":
+        return 0
+    if aa1=="g" and aa2=="r":
+        return -2
+    if aa1=="g" and aa2=="n":
+        return 0
+    if aa1=="g" and aa2=="d":
+        return -1
+    if aa1=="g" and aa2=="c":
+        return -3
+    if aa1=="g" and aa2=="q":
+        return -2
+    if aa1=="g" and aa2=="e":
+        return -2
+    if aa1=="g" and aa2=="g":
+        return 6
+    if aa1=="g" and aa2=="h":
+        return -2
+    if aa1=="g" and aa2=="i":
+        return -4
+    if aa1=="g" and aa2=="l":
+        return -4
+    if aa1=="g" and aa2=="k":
+        return -2
+    if aa1=="g" and aa2=="m":
+        return -3
+    if aa1=="g" and aa2=="f":
+        return -3
+    if aa1=="g" and aa2=="p":
+        return -2
+    if aa1=="g" and aa2=="s":
+        return 0
+    if aa1=="g" and aa2=="t":
+        return -2
+    if aa1=="g" and aa2=="w":
+        return -2
+    if aa1=="g" and aa2=="y":
+        return -3
+    if aa1=="g" and aa2=="v":
+        return -3
+    if aa1=="g" and aa2=="b":
+        return -1
+    if aa1=="g" and aa2=="z":
+        return -2
+    if aa1=="g" and aa2=="x":
+        return -1
+    if aa1=="g" and aa2=="*":
+        return -4
+    if aa1=="h" and aa2=="a":
+        return -2
+    if aa1=="h" and aa2=="r":
+        return 0
+    if aa1=="h" and aa2=="n":
+        return 1
+    if aa1=="h" and aa2=="d":
+        return -1
+    if aa1=="h" and aa2=="c":
+        return -3
+    if aa1=="h" and aa2=="q":
+        return 0
+    if aa1=="h" and aa2=="e":
+        return 0
+    if aa1=="h" and aa2=="g":
+        return -2
+    if aa1=="h" and aa2=="h":
+        return 8
+    if aa1=="h" and aa2=="i":
+        return -3
+    if aa1=="h" and aa2=="l":
+        return -3
+    if aa1=="h" and aa2=="k":
+        return -1
+    if aa1=="h" and aa2=="m":
+        return -2
+    if aa1=="h" and aa2=="f":
+        return -1
+    if aa1=="h" and aa2=="p":
+        return -2
+    if aa1=="h" and aa2=="s":
+        return -1
+    if aa1=="h" and aa2=="t":
+        return -2
+    if aa1=="h" and aa2=="w":
+        return -2
+    if aa1=="h" and aa2=="y":
+        return 2
+    if aa1=="h" and aa2=="v":
+        return -3
+    if aa1=="h" and aa2=="b":
+        return 0
+    if aa1=="h" and aa2=="z":
+        return 0
+    if aa1=="h" and aa2=="x":
+        return -1
+    if aa1=="h" and aa2=="*":
+        return -4
+    if aa1=="i" and aa2=="a":
+        return -1
+    if aa1=="i" and aa2=="r":
+        return -3
+    if aa1=="i" and aa2=="n":
+        return -3
+    if aa1=="i" and aa2=="d":
+        return -3
+    if aa1=="i" and aa2=="c":
+        return -1
+    if aa1=="i" and aa2=="q":
+        return -3
+    if aa1=="i" and aa2=="e":
+        return -3
+    if aa1=="i" and aa2=="g":
+        return -4
+    if aa1=="i" and aa2=="h":
+        return -3
+    if aa1=="i" and aa2=="i":
+        return 4
+    if aa1=="i" and aa2=="l":
+        return 2
+    if aa1=="i" and aa2=="k":
+        return -3
+    if aa1=="i" and aa2=="m":
+        return 1
+    if aa1=="i" and aa2=="f":
+        return 0
+    if aa1=="i" and aa2=="p":
+        return -3
+    if aa1=="i" and aa2=="s":
+        return -2
+    if aa1=="i" and aa2=="t":
+        return -1
+    if aa1=="i" and aa2=="w":
+        return -3
+    if aa1=="i" and aa2=="y":
+        return -1
+    if aa1=="i" and aa2=="v":
+        return 3
+    if aa1=="i" and aa2=="b":
+        return -3
+    if aa1=="i" and aa2=="z":
+        return -3
+    if aa1=="i" and aa2=="x":
+        return -1
+    if aa1=="i" and aa2=="*":
+        return -4
+    if aa1=="l" and aa2=="a":
+        return -1
+    if aa1=="l" and aa2=="r":
+        return -2
+    if aa1=="l" and aa2=="n":
+        return -3
+    if aa1=="l" and aa2=="d":
+        return -4
+    if aa1=="l" and aa2=="c":
+        return -1
+    if aa1=="l" and aa2=="q":
+        return -2
+    if aa1=="l" and aa2=="e":
+        return -3
+    if aa1=="l" and aa2=="g":
+        return -4
+    if aa1=="l" and aa2=="h":
+        return -3
+    if aa1=="l" and aa2=="i":
+        return 2
+    if aa1=="l" and aa2=="l":
+        return 4
+    if aa1=="l" and aa2=="k":
+        return -2
+    if aa1=="l" and aa2=="m":
+        return 2
+    if aa1=="l" and aa2=="f":
+        return 0
+    if aa1=="l" and aa2=="p":
+        return -3
+    if aa1=="l" and aa2=="s":
+        return -2
+    if aa1=="l" and aa2=="t":
+        return -1
+    if aa1=="l" and aa2=="w":
+        return -2
+    if aa1=="l" and aa2=="y":
+        return -1
+    if aa1=="l" and aa2=="v":
+        return 1
+    if aa1=="l" and aa2=="b":
+        return -4
+    if aa1=="l" and aa2=="z":
+        return -3
+    if aa1=="l" and aa2=="x":
+        return -1
+    if aa1=="l" and aa2=="*":
+        return -4
+    if aa1=="k" and aa2=="a":
+        return -1
+    if aa1=="k" and aa2=="r":
+        return 2
+    if aa1=="k" and aa2=="n":
+        return 0
+    if aa1=="k" and aa2=="d":
+        return -1
+    if aa1=="k" and aa2=="c":
+        return -3
+    if aa1=="k" and aa2=="q":
+        return 1
+    if aa1=="k" and aa2=="e":
+        return 1
+    if aa1=="k" and aa2=="g":
+        return -2
+    if aa1=="k" and aa2=="h":
+        return -1
+    if aa1=="k" and aa2=="i":
+        return -3
+    if aa1=="k" and aa2=="l":
+        return -2
+    if aa1=="k" and aa2=="k":
+        return 5
+    if aa1=="k" and aa2=="m":
+        return -1
+    if aa1=="k" and aa2=="f":
+        return -3
+    if aa1=="k" and aa2=="p":
+        return -1
+    if aa1=="k" and aa2=="s":
+        return 0
+    if aa1=="k" and aa2=="t":
+        return -1
+    if aa1=="k" and aa2=="w":
+        return -3
+    if aa1=="k" and aa2=="y":
+        return -2
+    if aa1=="k" and aa2=="v":
+        return -2
+    if aa1=="k" and aa2=="b":
+        return 0
+    if aa1=="k" and aa2=="z":
+        return 1
+    if aa1=="k" and aa2=="x":
+        return -1
+    if aa1=="k" and aa2=="*":
+        return -4
+    if aa1=="m" and aa2=="a":
+        return -1
+    if aa1=="m" and aa2=="r":
+        return -1
+    if aa1=="m" and aa2=="n":
+        return -2
+    if aa1=="m" and aa2=="d":
+        return -3
+    if aa1=="m" and aa2=="c":
+        return -1
+    if aa1=="m" and aa2=="q":
+        return 0
+    if aa1=="m" and aa2=="e":
+        return -2
+    if aa1=="m" and aa2=="g":
+        return -3
+    if aa1=="m" and aa2=="h":
+        return -2
+    if aa1=="m" and aa2=="i":
+        return 1
+    if aa1=="m" and aa2=="l":
+        return 2
+    if aa1=="m" and aa2=="k":
+        return -1
+    if aa1=="m" and aa2=="m":
+        return 5
+    if aa1=="m" and aa2=="f":
+        return 0
+    if aa1=="m" and aa2=="p":
+        return -2
+    if aa1=="m" and aa2=="s":
+        return -1
+    if aa1=="m" and aa2=="t":
+        return -1
+    if aa1=="m" and aa2=="w":
+        return -1
+    if aa1=="m" and aa2=="y":
+        return -1
+    if aa1=="m" and aa2=="v":
+        return 1
+    if aa1=="m" and aa2=="b":
+        return -3
+    if aa1=="m" and aa2=="z":
+        return -1
+    if aa1=="m" and aa2=="x":
+        return -1
+    if aa1=="m" and aa2=="*":
+        return -4
+    if aa1=="f" and aa2=="a":
+        return -2
+    if aa1=="f" and aa2=="r":
+        return -3
+    if aa1=="f" and aa2=="n":
+        return -3
+    if aa1=="f" and aa2=="d":
+        return -3
+    if aa1=="f" and aa2=="c":
+        return -2
+    if aa1=="f" and aa2=="q":
+        return -3
+    if aa1=="f" and aa2=="e":
+        return -3
+    if aa1=="f" and aa2=="g":
+        return -3
+    if aa1=="f" and aa2=="h":
+        return -1
+    if aa1=="f" and aa2=="i":
+        return 0
+    if aa1=="f" and aa2=="l":
+        return 0
+    if aa1=="f" and aa2=="k":
+        return -3
+    if aa1=="f" and aa2=="m":
+        return 0
+    if aa1=="f" and aa2=="f":
+        return 6
+    if aa1=="f" and aa2=="p":
+        return -4
+    if aa1=="f" and aa2=="s":
+        return -2
+    if aa1=="f" and aa2=="t":
+        return -2
+    if aa1=="f" and aa2=="w":
+        return 1
+    if aa1=="f" and aa2=="y":
+        return 3
+    if aa1=="f" and aa2=="v":
+        return -1
+    if aa1=="f" and aa2=="b":
+        return -3
+    if aa1=="f" and aa2=="z":
+        return -3
+    if aa1=="f" and aa2=="x":
+        return -1
+    if aa1=="f" and aa2=="*":
+        return -4
+    if aa1=="p" and aa2=="a":
+        return -1
+    if aa1=="p" and aa2=="r":
+        return -2
+    if aa1=="p" and aa2=="n":
+        return -2
+    if aa1=="p" and aa2=="d":
+        return -1
+    if aa1=="p" and aa2=="c":
+        return -3
+    if aa1=="p" and aa2=="q":
+        return -1
+    if aa1=="p" and aa2=="e":
+        return -1
+    if aa1=="p" and aa2=="g":
+        return -2
+    if aa1=="p" and aa2=="h":
+        return -2
+    if aa1=="p" and aa2=="i":
+        return -3
+    if aa1=="p" and aa2=="l":
+        return -3
+    if aa1=="p" and aa2=="k":
+        return -1
+    if aa1=="p" and aa2=="m":
+        return -2
+    if aa1=="p" and aa2=="f":
+        return -4
+    if aa1=="p" and aa2=="p":
+        return 7
+    if aa1=="p" and aa2=="s":
+        return -1
+    if aa1=="p" and aa2=="t":
+        return -1
+    if aa1=="p" and aa2=="w":
+        return -4
+    if aa1=="p" and aa2=="y":
+        return -3
+    if aa1=="p" and aa2=="v":
+        return -2
+    if aa1=="p" and aa2=="b":
+        return -2
+    if aa1=="p" and aa2=="z":
+        return -1
+    if aa1=="p" and aa2=="x":
+        return -2
+    if aa1=="p" and aa2=="*":
+        return -4
+    if aa1=="s" and aa2=="a":
+        return 1
+    if aa1=="s" and aa2=="r":
+        return -1
+    if aa1=="s" and aa2=="n":
+        return 1
+    if aa1=="s" and aa2=="d":
+        return 0
+    if aa1=="s" and aa2=="c":
+        return -1
+    if aa1=="s" and aa2=="q":
+        return 0
+    if aa1=="s" and aa2=="e":
+        return 0
+    if aa1=="s" and aa2=="g":
+        return 0
+    if aa1=="s" and aa2=="h":
+        return -1
+    if aa1=="s" and aa2=="i":
+        return -2
+    if aa1=="s" and aa2=="l":
+        return -2
+    if aa1=="s" and aa2=="k":
+        return 0
+    if aa1=="s" and aa2=="m":
+        return -1
+    if aa1=="s" and aa2=="f":
+        return -2
+    if aa1=="s" and aa2=="p":
+        return -1
+    if aa1=="s" and aa2=="s":
+        return 4
+    if aa1=="s" and aa2=="t":
+        return 1
+    if aa1=="s" and aa2=="w":
+        return -3
+    if aa1=="s" and aa2=="y":
+        return -2
+    if aa1=="s" and aa2=="v":
+        return -2
+    if aa1=="s" and aa2=="b":
+        return 0
+    if aa1=="s" and aa2=="z":
+        return 0
+    if aa1=="s" and aa2=="x":
+        return 0
+    if aa1=="s" and aa2=="*":
+        return -4
+    if aa1=="t" and aa2=="a":
+        return 0
+    if aa1=="t" and aa2=="r":
+        return -1
+    if aa1=="t" and aa2=="n":
+        return 0
+    if aa1=="t" and aa2=="d":
+        return -1
+    if aa1=="t" and aa2=="c":
+        return -1
+    if aa1=="t" and aa2=="q":
+        return -1
+    if aa1=="t" and aa2=="e":
+        return -1
+    if aa1=="t" and aa2=="g":
+        return -2
+    if aa1=="t" and aa2=="h":
+        return -2
+    if aa1=="t" and aa2=="i":
+        return -1
+    if aa1=="t" and aa2=="l":
+        return -1
+    if aa1=="t" and aa2=="k":
+        return -1
+    if aa1=="t" and aa2=="m":
+        return -1
+    if aa1=="t" and aa2=="f":
+        return -2
+    if aa1=="t" and aa2=="p":
+        return -1
+    if aa1=="t" and aa2=="s":
+        return 1
+    if aa1=="t" and aa2=="t":
+        return 5
+    if aa1=="t" and aa2=="w":
+        return -2
+    if aa1=="t" and aa2=="y":
+        return -2
+    if aa1=="t" and aa2=="v":
+        return 0
+    if aa1=="t" and aa2=="b":
+        return -1
+    if aa1=="t" and aa2=="z":
+        return -1
+    if aa1=="t" and aa2=="x":
+        return 0
+    if aa1=="t" and aa2=="*":
+        return -4
+    if aa1=="w" and aa2=="a":
+        return -3
+    if aa1=="w" and aa2=="r":
+        return -3
+    if aa1=="w" and aa2=="n":
+        return -4
+    if aa1=="w" and aa2=="d":
+        return -4
+    if aa1=="w" and aa2=="c":
+        return -2
+    if aa1=="w" and aa2=="q":
+        return -2
+    if aa1=="w" and aa2=="e":
+        return -3
+    if aa1=="w" and aa2=="g":
+        return -2
+    if aa1=="w" and aa2=="h":
+        return -2
+    if aa1=="w" and aa2=="i":
+        return -3
+    if aa1=="w" and aa2=="l":
+        return -2
+    if aa1=="w" and aa2=="k":
+        return -3
+    if aa1=="w" and aa2=="m":
+        return -1
+    if aa1=="w" and aa2=="f":
+        return 1
+    if aa1=="w" and aa2=="p":
+        return -4
+    if aa1=="w" and aa2=="s":
+        return -3
+    if aa1=="w" and aa2=="t":
+        return -2
+    if aa1=="w" and aa2=="w":
+        return 11
+    if aa1=="w" and aa2=="y":
+        return 2
+    if aa1=="w" and aa2=="v":
+        return -3
+    if aa1=="w" and aa2=="b":
+        return -4
+    if aa1=="w" and aa2=="z":
+        return -3
+    if aa1=="w" and aa2=="x":
+        return -2
+    if aa1=="w" and aa2=="*":
+        return -4
+    if aa1=="y" and aa2=="a":
+        return -2
+    if aa1=="y" and aa2=="r":
+        return -2
+    if aa1=="y" and aa2=="n":
+        return -2
+    if aa1=="y" and aa2=="d":
+        return -3
+    if aa1=="y" and aa2=="c":
+        return -2
+    if aa1=="y" and aa2=="q":
+        return -1
+    if aa1=="y" and aa2=="e":
+        return -2
+    if aa1=="y" and aa2=="g":
+        return -3
+    if aa1=="y" and aa2=="h":
+        return 2
+    if aa1=="y" and aa2=="i":
+        return -1
+    if aa1=="y" and aa2=="l":
+        return -1
+    if aa1=="y" and aa2=="k":
+        return -2
+    if aa1=="y" and aa2=="m":
+        return -1
+    if aa1=="y" and aa2=="f":
+        return 3
+    if aa1=="y" and aa2=="p":
+        return -3
+    if aa1=="y" and aa2=="s":
+        return -2
+    if aa1=="y" and aa2=="t":
+        return -2
+    if aa1=="y" and aa2=="w":
+        return 2
+    if aa1=="y" and aa2=="y":
+        return 7
+    if aa1=="y" and aa2=="v":
+        return -1
+    if aa1=="y" and aa2=="b":
+        return -3
+    if aa1=="y" and aa2=="z":
+        return -2
+    if aa1=="y" and aa2=="x":
+        return -1
+    if aa1=="y" and aa2=="*":
+        return -4
+    if aa1=="v" and aa2=="a":
+        return 0
+    if aa1=="v" and aa2=="r":
+        return -3
+    if aa1=="v" and aa2=="n":
+        return -3
+    if aa1=="v" and aa2=="d":
+        return -3
+    if aa1=="v" and aa2=="c":
+        return -1
+    if aa1=="v" and aa2=="q":
+        return -2
+    if aa1=="v" and aa2=="e":
+        return -2
+    if aa1=="v" and aa2=="g":
+        return -3
+    if aa1=="v" and aa2=="h":
+        return -3
+    if aa1=="v" and aa2=="i":
+        return 3
+    if aa1=="v" and aa2=="l":
+        return 1
+    if aa1=="v" and aa2=="k":
+        return -2
+    if aa1=="v" and aa2=="m":
+        return 1
+    if aa1=="v" and aa2=="f":
+        return -1
+    if aa1=="v" and aa2=="p":
+        return -2
+    if aa1=="v" and aa2=="s":
+        return -2
+    if aa1=="v" and aa2=="t":
+        return 0
+    if aa1=="v" and aa2=="w":
+        return -3
+    if aa1=="v" and aa2=="y":
+        return -1
+    if aa1=="v" and aa2=="v":
+        return 4
+    if aa1=="v" and aa2=="b":
+        return -3
+    if aa1=="v" and aa2=="z":
+        return -2
+    if aa1=="v" and aa2=="x":
+        return -1
+    if aa1=="v" and aa2=="*":
+        return -4
+    if aa1=="b" and aa2=="a":
+        return -2
+    if aa1=="b" and aa2=="r":
+        return -1
+    if aa1=="b" and aa2=="n":
+        return 3
+    if aa1=="b" and aa2=="d":
+        return 4
+    if aa1=="b" and aa2=="c":
+        return -3
+    if aa1=="b" and aa2=="q":
+        return 0
+    if aa1=="b" and aa2=="e":
+        return 1
+    if aa1=="b" and aa2=="g":
+        return -1
+    if aa1=="b" and aa2=="h":
+        return 0
+    if aa1=="b" and aa2=="i":
+        return -3
+    if aa1=="b" and aa2=="l":
+        return -4
+    if aa1=="b" and aa2=="k":
+        return 0
+    if aa1=="b" and aa2=="m":
+        return -3
+    if aa1=="b" and aa2=="f":
+        return -3
+    if aa1=="b" and aa2=="p":
+        return -2
+    if aa1=="b" and aa2=="s":
+        return 0
+    if aa1=="b" and aa2=="t":
+        return -1
+    if aa1=="b" and aa2=="w":
+        return -4
+    if aa1=="b" and aa2=="y":
+        return -3
+    if aa1=="b" and aa2=="v":
+        return -3
+    if aa1=="b" and aa2=="b":
+        return 4
+    if aa1=="b" and aa2=="z":
+        return 1
+    if aa1=="b" and aa2=="x":
+        return -1
+    if aa1=="b" and aa2=="*":
+        return -4
+    if aa1=="z" and aa2=="a":
+        return -1
+    if aa1=="z" and aa2=="r":
+        return 0
+    if aa1=="z" and aa2=="n":
+        return 0
+    if aa1=="z" and aa2=="d":
+        return 1
+    if aa1=="z" and aa2=="c":
+        return -3
+    if aa1=="z" and aa2=="q":
+        return 3
+    if aa1=="z" and aa2=="e":
+        return 4
+    if aa1=="z" and aa2=="g":
+        return -2
+    if aa1=="z" and aa2=="h":
+        return 0
+    if aa1=="z" and aa2=="i":
+        return -3
+    if aa1=="z" and aa2=="l":
+        return -3
+    if aa1=="z" and aa2=="k":
+        return 1
+    if aa1=="z" and aa2=="m":
+        return -1
+    if aa1=="z" and aa2=="f":
+        return -3
+    if aa1=="z" and aa2=="p":
+        return -1
+    if aa1=="z" and aa2=="s":
+        return 0
+    if aa1=="z" and aa2=="t":
+        return -1
+    if aa1=="z" and aa2=="w":
+        return -3
+    if aa1=="z" and aa2=="y":
+        return -2
+    if aa1=="z" and aa2=="v":
+        return -2
+    if aa1=="z" and aa2=="b":
+        return 1
+    if aa1=="z" and aa2=="z":
+        return 4
+    if aa1=="z" and aa2=="x":
+        return -1
+    if aa1=="z" and aa2=="*":
+        return -4
+    if aa1=="x" and aa2=="a":
+        return 0
+    if aa1=="x" and aa2=="r":
+        return -1
+    if aa1=="x" and aa2=="n":
+        return -1
+    if aa1=="x" and aa2=="d":
+        return -1
+    if aa1=="x" and aa2=="c":
+        return -2
+    if aa1=="x" and aa2=="q":
+        return -1
+    if aa1=="x" and aa2=="e":
+        return -1
+    if aa1=="x" and aa2=="g":
+        return -1
+    if aa1=="x" and aa2=="h":
+        return -1
+    if aa1=="x" and aa2=="i":
+        return -1
+    if aa1=="x" and aa2=="l":
+        return -1
+    if aa1=="x" and aa2=="k":
+        return -1
+    if aa1=="x" and aa2=="m":
+        return -1
+    if aa1=="x" and aa2=="f":
+        return -1
+    if aa1=="x" and aa2=="p":
+        return -2
+    if aa1=="x" and aa2=="s":
+        return 0
+    if aa1=="x" and aa2=="t":
+        return 0
+    if aa1=="x" and aa2=="w":
+        return -2
+    if aa1=="x" and aa2=="y":
+        return -1
+    if aa1=="x" and aa2=="v":
+        return -1
+    if aa1=="x" and aa2=="b":
+        return -1
+    if aa1=="x" and aa2=="z":
+        return -1
+    if aa1=="x" and aa2=="x":
+        return -1
+    if aa1=="x" and aa2=="*":
+        return -4
+    if aa1=="*" and aa2=="a":
+        return -4
+    if aa1=="*" and aa2=="r":
+        return -4
+    if aa1=="*" and aa2=="n":
+        return -4
+    if aa1=="*" and aa2=="d":
+        return -4
+    if aa1=="*" and aa2=="c":
+        return -4
+    if aa1=="*" and aa2=="q":
+        return -4
+    if aa1=="*" and aa2=="e":
+        return -4
+    if aa1=="*" and aa2=="g":
+        return -4
+    if aa1=="*" and aa2=="h":
+        return -4
+    if aa1=="*" and aa2=="i":
+        return -4
+    if aa1=="*" and aa2=="l":
+        return -4
+    if aa1=="*" and aa2=="k":
+        return -4
+    if aa1=="*" and aa2=="m":
+        return -4
+    if aa1=="*" and aa2=="f":
+        return -4
+    if aa1=="*" and aa2=="p":
+        return -4
+    if aa1=="*" and aa2=="s":
+        return -4
+    if aa1=="*" and aa2=="t":
+        return -4
+    if aa1=="*" and aa2=="w":
+        return -4
+    if aa1=="*" and aa2=="y":
+        return -4
+    if aa1=="*" and aa2=="v":
+        return -4
+    if aa1=="*" and aa2=="b":
+        return -4
+    if aa1=="*" and aa2=="z":
+        return -4
+    if aa1=="*" and aa2=="x":
+        return -4
+    if aa1=="*" and aa2=="*":
+        return 1
+cdef class CodonNWS(NWS):
+    def __init__(self,match=2,mismatch=-3,opengap=-4,extgap=-1, phasedA = -1, phasedB = -1):#, AAmatrix=_blosum62, translationtable=None):
+        NWS.__init__(self,match, mismatch, opengap, extgap)
+        self._phasedA = -1 if phasedA == -1 else phasedA%3
+        self._phasedB = -1 if phasedB == -1 else phasedB%3
+    cdef double matchCodon(self, int h, int v):
+        cdef double score
+        cdef double match
+        score = 0
+        for i in range(3):
+            match = iupacPartialMatch(self.hSeq.sequence[h-i-1],self.vSeq.sequence[v-i-1])
+            score += match * self._match + (1-match) * self._mismatch
+        bl = _blosum62(self.hSeq.sequence[h-1], self.hSeq.sequence[h-2], self.hSeq.sequence[h-3], self.vSeq.sequence[v-1], self.vSeq.sequence[v-2], self.vSeq.sequence[v-3])
+        #print "MatchCodon","h=",h,"v=",v, "   ",\
+        #                   ''.join(['%c'%(self.hSeq.sequence[h-3],),\
+        #                   '%c'%(self.hSeq.sequence[h-2],),\
+        #                   '%c'%(self.hSeq.sequence[h-1],)]),\
+        #                   "  ",                           \
+        #                   ''.join(['%c'%(self.vSeq.sequence[v-3],),\
+        #                   '%c'%(self.vSeq.sequence[v-2],),\
+        #                   '%c'%(self.vSeq.sequence[v-1])])
+        #print '--> score = %d + %d'%(score,bl)                                           
+        score += bl 
+        return score
+    cdef inline int colindex(self, int idx):
+        return idx%(self._hlen()+1)
+    cdef inline int rowindex(self, int idx):
+        return idx/(self._hlen()+1)
+    #on change la signification des infos dans la matrice path
+    #on met l'indice de la cellule d'origine
+    cdef double doAlignment(self) except? 0:
+        cdef int i  # vertical index
+        cdef int j  # horizontal index
+        cdef int idx
+        cdef int jump
+        cdef int delta
+        cdef double score 
+        cdef double scoremax
+        cdef int    path
+        if self.needToCompute:
+            self.allocate()
+            self.reset()
+            for j in range(1,self._hlen()+1):
+                idx = self.index(j,0)
+                self.matrix.matrix[idx].score = self._opengap + (self._extgap * (j-1))
+                self.matrix.matrix[idx].path  = 0
+            for i in range(1,self._vlen()+1):
+                idx = self.index(0,i)
+                self.matrix.matrix[idx].score = self._opengap + (self._extgap * (i-1))
+                self.matrix.matrix[idx].path  = 0
+            for i in range(1,self._vlen()+1):
+                for j in range(1,self._hlen()+1):
+                    # 1 - came from diagonal
+                    idx = self.index(j-1,i-1)
+                    # print "computing cell : %d,%d --> %d/%d" % (j,i,self.index(j,i),self.matrix.msize),
+                    scoremax = self.matrix.matrix[idx].score + \
+                               self.matchScore(j,i)
+                    path = idx
+                    # print "so=%f sd=%f sm=%f" % (self.matrix.matrix[idx].score,self.matchScore(j,i),scoremax),
+                    # 1.1 - came from diagonal by aligning a codon with a codon
+                    #print i, i%3, self._phasedB, i%3==self._phasedB
+                    if (j-3)>=0 and (i-3)>=0 and (self._phasedB==-1 or (i%3)==self._phasedB) and (self._phasedA==-1 or (j%3)==self._phasedA):
+                        idx = self.index(j-3,i-3)
+                        contrib = self.matchCodon(j,i)
+                        score = self.matrix.matrix[idx].score + \
+                                contrib
+                        #print "so=%f sd=%f score=%f sm=%f" % (self.matrix.matrix[idx].score,contrib,score, scoremax)
+                        if score > scoremax : 
+                            #print "putain trop bien !"
+                            scoremax = score
+                            path = idx
+                    # 2 - open horizontal gap
+                    idx = self.index(j-1,i)
+                    score = self.matrix.matrix[idx].score + \
+                            self._opengap
+                    if score > scoremax : 
+                        scoremax = score
+                        path = idx
+                    # 3 - open vertical gap
+                    idx = self.index(j,i-1)
+                    score = self.matrix.matrix[idx].score + \
+                            self._opengap
+                    if score > scoremax : 
+                        scoremax = score
+                        path = idx
+                    # 4 - extend horizontal gap
+                    jump = self.matrix.bestHJump[i]
+                    if jump >= 0:
+                        idx = self.index(jump,i)
+                        delta = j-jump
+                        score = self.matrix.matrix[idx].score + \
+                                self._extgap * delta
+                        if score > scoremax :
+                            scoremax = score
+                            path = idx 
+                    # 5 - extend vertical gap
+                    jump = self.matrix.bestVJump[j]
+                    if jump >= 0:
+                        idx = self.index(j,jump)
+                        delta = i-jump
+                        score = self.matrix.matrix[idx].score + \
+                                self._extgap * delta
+                        if score > scoremax :
+                            scoremax = score
+                            path = idx 
+                    idx = self.index(j,i)
+                    self.matrix.matrix[idx].score = scoremax
+                    self.matrix.matrix[idx].path  = path 
+                    #si on a choisi l'ouverture de gap                    
+                    if path == self.index(j,i-1):
+                        self.matrix.bestVJump[j]=i
+                    elif path == self.index(j-1,i):
+                        self.matrix.bestHJump[i]=j
+        self.sequenceChanged=False
+        self.scoreChanged=False
+        idx = self.index(self._hlen(),self._vlen())
+        return self.matrix.matrix[idx].score
+    cdef void backtrack(self):
+        #cdef list path=[]
+        cdef int i
+        cdef int j 
+        cdef int p
+        self.doAlignment()
+        i=self._vlen()
+        j=self._hlen()
+        self.path=allocatePath(i,j,self.path)
+        while (i or j):
+            idx=self.matrix.matrix[self.index(j,i)].path
+            ori_j = self.colindex(idx)
+            ori_i = self.rowindex(idx)
+            #print i,j
+            if i-ori_i == 3 and j-ori_j == 3:
+                #print 'on passe par un codon'
+                p = 0
+                self.path.path[self.path.length]=p
+                self.path.length+=1
+                self.path.path[self.path.length]=p
+                self.path.length+=1
+            elif i-ori_i == 1 and j-ori_j == 1:
+                #print 'on passe par un match'
+                p = 0
+            elif i-ori_i == 0:
+                #print 'on passe par un gap'
+                p = (j-ori_j)
+            elif j-ori_j == 0:
+                #print 'on passe par un gap'
+                p = -(i-ori_i)
+            else:
+                print "badaboum !"
+            i = ori_i
+            j = ori_j
+            #print '->', i, j
+            self.path.path[self.path.length]=p
+            self.path.length+=1
+        self.path.hStart=0
+        self.path.vStart=0
+    property match:
+        def __get__(self):
+            return self._match
+        def __set__(self,match):
+            self._match=match 
+            self.scoreChanged=True
+    property mismatch:
+        def __get__(self):
+            return self._mismatch
+        def __set__(self,mismatch):
+            self._mismatch=mismatch 
+            self.scoreChanged=True
diff --git a/src/obitools/align/_dynamic.pxd b/src/obitools/align/_dynamic.pxd
new file mode 100644
index 0000000..c268c64
--- /dev/null
+++ b/src/obitools/align/_dynamic.pxd
@@ -0,0 +1,90 @@
+cdef import from "stdlib.h":
+    void* malloc(int size)  except NULL
+    void* realloc(void* chunk,int size)  except NULL
+    void free(void* chunk)
+cdef import from "string.h":
+    void bzero(void *s, size_t n)
+    void memset(void* chunk,int car,int length)
+    void memcpy(void* s1, void* s2, int n)
+cdef struct AlignCell :
+    double score
+    int   path 
+cdef struct AlignMatrix :
+    AlignCell*  matrix
+    int*        bestVJump
+    int*        bestHJump
+    int         msize
+    int         vsize
+    int         hsize
+cdef AlignMatrix* allocateMatrix(int hsize, int vsize,AlignMatrix *matrix=?)
+cdef void freeMatrix(AlignMatrix* matrix)
+cdef void resetMatrix(AlignMatrix* matrix)
+cdef struct alignSequence:
+    long    length
+    long    buffsize
+    bint    hasQuality
+    char*   sequence
+    double* quality
+cdef alignSequence* allocateSequence(object bioseq, alignSequence* seq=?) except *
+cdef void freeSequence(alignSequence* seq)
+cdef struct alignPath:
+    long length
+    long buffsize
+    long vStart
+    long hStart
+    long *path
+cdef alignPath* allocatePath(long l1,long l2,alignPath* path=?)
+cdef void reversePath(alignPath* path)
+cdef void freePath(alignPath* path)
+cdef int bitCount(int x)
+cpdef bint iupacMatch(unsigned char a, unsigned char b)
+cpdef double iupacPartialMatch(unsigned char a, unsigned char b)
+cpdef unsigned char encodeBase(unsigned char lettre)
+cdef class DynamicProgramming:
+    cdef AlignMatrix* matrix
+    cdef object horizontalSeq 
+    cdef object verticalSeq
+    cdef alignSequence* hSeq
+    cdef alignSequence* vSeq
+    cdef alignPath*     path
+    cdef double _opengap
+    cdef double _extgap
+    cdef object alignment
+    cdef bint sequenceChanged
+    cdef bint scoreChanged
+    cdef int _vlen(self)
+    cdef int _hlen(self)
+    cdef int allocate(self) except -1
+    cdef double doAlignment(self) except? 0
+    cdef void reset(self)
+    cdef inline int index(self, int x, int y)
+    cdef inline bint _needToCompute(self)
+    cdef void backtrack(self)
+    cdef void clean(self)
diff --git a/src/obitools/align/_dynamic.pyx b/src/obitools/align/_dynamic.pyx
new file mode 100644
index 0000000..ada9f52
--- /dev/null
+++ b/src/obitools/align/_dynamic.pyx
@@ -0,0 +1,365 @@
+Created on 14 sept. 2009
+ at author: coissac
+from obitools import BioSequence
+from obitools.alignment import AlignedSequence
+from obitools.alignment import Alignment
+# Import standard memory management function to improve
+# efficiency of the alignment code
+from _dynamic cimport * 
+cdef AlignMatrix* allocateMatrix(int hsize, int vsize,AlignMatrix *matrix=NULL):
+    vsize+=1
+    hsize+=1
+    if matrix is NULL:
+        matrix = <AlignMatrix*>malloc(sizeof(AlignMatrix))
+        matrix.vsize=0
+        matrix.hsize=0
+        matrix.msize=0
+        matrix.matrix=NULL
+        matrix.bestVJump=NULL
+        matrix.bestHJump=NULL
+    if hsize > matrix.hsize:
+        matrix.bestVJump = <int*>realloc(matrix.bestVJump,hsize * sizeof(int))
+        matrix.hsize=hsize
+    if vsize > matrix.vsize:
+        matrix.bestHJump = <int*>realloc(matrix.bestHJump,vsize * sizeof(int))
+        matrix.vsize=vsize
+    if (hsize * vsize) > matrix.msize:
+        matrix.msize = hsize * vsize
+        matrix.matrix = <AlignCell*>realloc(matrix.matrix, matrix.msize * sizeof(AlignCell))
+    return matrix
+cdef void freeMatrix(AlignMatrix* matrix):
+    if matrix is not NULL:
+        if matrix.matrix is not NULL:
+            free(matrix.matrix)
+        if matrix.bestVJump is not NULL:
+            free(matrix.bestVJump)
+        if matrix.bestHJump is not NULL:
+            free(matrix.bestHJump)
+        free(matrix)
+cdef void resetMatrix(AlignMatrix* matrix):
+    if matrix is not NULL:
+        if matrix.matrix is not NULL:
+            bzero(<void*>matrix.matrix, matrix.msize * sizeof(AlignCell))
+        if matrix.bestHJump is not NULL:
+            memset(<void*>matrix.bestHJump,255,matrix.vsize * sizeof(int))
+        if matrix.bestVJump is not NULL:
+            memset(<void*>matrix.bestVJump,255,matrix.hsize * sizeof(int))
+cdef alignSequence* allocateSequence(object bioseq, alignSequence* seq=NULL) except *:
+    cdef bytes strseq
+    cdef int i
+    if seq is NULL:
+        seq = <alignSequence*>malloc(sizeof(alignSequence))
+        seq.length=0
+        seq.buffsize=0
+        seq.sequence=NULL
+        seq.quality=NULL
+        seq.hasQuality=False
+    seq.length=len(bioseq)
+    if seq.length > seq.buffsize:
+        seq.sequence = <char*>realloc(seq.sequence,sizeof(char)*seq.length)
+        seq.quality  = <double*>realloc(seq.quality,sizeof(double)*seq.length)
+        seq.buffsize = seq.length
+    strseq = str(bioseq).lower()
+    memcpy(seq.sequence,<char*>strseq,seq.length)
+    if 'quality' in bioseq:
+        seq.hasQuality=True
+        quality=bioseq['quality']
+        for i in range(0,seq.length):
+            seq.quality[i]=<double>quality[i]
+    return seq
+cdef void freeSequence(alignSequence* seq):
+    if seq is not NULL:
+        if seq.sequence is not NULL:
+            free(<void*>seq.sequence)
+        if seq.quality is not NULL:
+            free(<void*>seq.quality)
+        free(seq)
+cdef alignPath* allocatePath(long l1,long l2,alignPath* path=NULL):
+    cdef long length=l1+l2
+    if path is NULL:
+        path = <alignPath*>malloc(sizeof(alignPath))
+        path.length=0
+        path.buffsize=0
+        path.path=NULL
+    if length > path.buffsize:
+        path.buffsize=length
+        path.path=<long*>realloc(path.path,sizeof(long)*length)
+    path.length=0
+    path.vStart=0
+    path.hStart=0
+    return path
+cdef void reversePath(alignPath* path):
+        cdef long i
+        cdef long j
+        j=path.length
+        for i in range(path.length/2):
+            j-=1
+            path.path[i],path.path[j]=path.path[j],path.path[i]
+cdef void freePath(alignPath* path):
+    if path is not NULL:
+        if path.path is not NULL:
+            free(<void*>path.path)
+        free(<void*>path)
+cdef int aascii = ord(b'a')
+cdef int _basecode[26]
+cdef int bitCount(int x):
+    cdef int i=0
+    while(x):
+        i+=1
+        x&=x-1
+    return i
+cpdef bint iupacMatch(unsigned char a, unsigned char b):
+    cdef bint m 
+    if a==42:    # * ascii code
+        a=110    # n ascii code
+    if b==42:    # * ascii code
+        b=110    # n ascii code
+    m = _basecode[a - aascii] & _basecode[b - aascii]
+    return m
+cpdef unsigned char encodeBase(unsigned char lettre):
+    return _basecode[lettre - aascii]
+cpdef double iupacPartialMatch(unsigned char a, unsigned char b):
+    cdef int codeA
+    cdef int codeB
+    cdef int good
+    cdef int all
+    cdef double partial 
+    if a==42:    # * ascii code
+        a=110    # n ascii code
+    if b==42:    # * ascii code
+        b=110    # n ascii code
+    codeA =  _basecode[a - aascii]
+    codeB =  _basecode[b - aascii]
+    good  =  bitCount(codeA & codeB)
+    all   =  bitCount(codeA)  * bitCount(codeB)
+    partial= <double>good / all 
+    return partial
+cdef class DynamicProgramming:
+    def __init__(self,opengap,extgap):
+        self.sequenceChanged=True
+        self.scoreChanged=True
+        self.matrix=NULL
+        self.hSeq=NULL
+        self.vSeq=NULL
+        self.path=NULL
+        self.horizontalSeq=None
+        self.verticalSeq=None
+        self._opengap=opengap
+        self._extgap=extgap
+    cdef int _vlen(self):
+        return self.vSeq.length
+    cdef int _hlen(self):
+        return self.hSeq.length
+    cdef int allocate(self) except -1:
+        assert self.horizontalSeq is not None,'Sequence A must be set'
+        assert self.verticalSeq is not None,'Sequence B must be set'
+        cdef long lenH=self._hlen()
+        cdef long lenV=self._vlen()
+        self.matrix=allocateMatrix(lenH,lenV,self.matrix)
+        return 0
+    cdef double doAlignment(self) except? 0:
+        pass
+    cdef bint _needToCompute(self):
+        return self.scoreChanged or self.sequenceChanged
+    cdef void backtrack(self):
+        pass
+    property seqA:
+            def __get__(self):
+                return self.horizontalSeq
+            def __set__(self, seq):
+                self.sequenceChanged=True
+                self.horizontalSeq=seq
+                self.hSeq=allocateSequence(self.horizontalSeq,self.hSeq)
+    property seqB:
+            def __get__(self):
+                return self.verticalSeq
+            def __set__(self, seq):
+                self.sequenceChanged=True
+                self.verticalSeq=seq
+                self.vSeq=allocateSequence(self.verticalSeq,self.vSeq)
+    property opengap:
+        def __get__(self):
+            return self._opengap
+        def __set__(self,opengap):
+            self._opengap=opengap 
+            self.scoreChanged=True
+    property extgap:
+        def __get__(self):
+            return self._extgap
+        def __set__(self,extgap):
+            self._extgap=extgap 
+            self.scoreChanged=True
+    property needToCompute:
+        def __get__(self):
+            return self.scoreChanged or self.sequenceChanged
+    property score:
+        def __get__(self):
+            return self.doAlignment()
+    cdef void reset(self):
+        self.scoreChanged=True
+        resetMatrix(self.matrix)
+    cdef inline int index(self, int x, int y):
+        return (self._hlen()+1) * y + x
+    cdef void clean(self):
+        freeMatrix(self.matrix)
+        freeSequence(self.hSeq)
+        freeSequence(self.vSeq)
+        freePath(self.path)
+    def __dealloc__(self):
+        self.clean()
+    def __call__(self):
+        cdef list hgaps=[]
+        cdef list vgaps=[]
+        cdef list b
+        cdef int  hp=0
+        cdef int  vp=0
+        cdef int  lenh=0
+        cdef int  lenv=0
+        cdef int  h,v,p
+        cdef int  i
+        cdef object ali
+        cdef double score
+        if self._needToCompute():
+            score = self.doAlignment()
+            self.backtrack()
+            for i in range(self.path.length-1,-1,-1):
+                p=self.path.path[i]
+                if p==0:
+                    hp+=1
+                    vp+=1
+                    lenh+=1
+                    lenv+=1
+                elif p>0:
+                    hp+=p
+                    lenh+=p
+                    vgaps.append([vp,p])
+                    vp=0
+                else:
+                    vp-=p
+                    lenv-=p
+                    hgaps.append([hp,-p])
+                    hp=0
+            if hp:
+                hgaps.append([hp,0])
+            if vp:
+                vgaps.append([vp,0])
+            if lenh < self._hlen():
+                hseq=self.horizontalSeq[self.path.hStart:self.path.hStart+lenh]
+            else:
+                hseq=self.horizontalSeq
+            hseq=AlignedSequence(hseq) 
+            hseq.gaps=hgaps       
+            if lenv < self._vlen():
+                vseq=self.verticalSeq[self.path.vStart:self.path.vStart+lenv]
+            else:
+                vseq=self.verticalSeq
+            vseq=AlignedSequence(vseq) 
+            vseq.gaps=vgaps       
+            ali=Alignment()
+            ali.append(hseq)
+            ali.append(vseq)
+            ali.score=score
+            self.alignment=ali
+        ali=self.alignment.clone()
+        ali.score=self.alignment.score
+        return ali
+# initialize iupac carray
+for i in range(26):
+    _basecode[i]=__basecode[i]
\ No newline at end of file
diff --git a/src/obitools/align/_freeendgap.pxd b/src/obitools/align/_freeendgap.pxd
new file mode 100644
index 0000000..d829b33
--- /dev/null
+++ b/src/obitools/align/_freeendgap.pxd
@@ -0,0 +1,9 @@
+from _nws cimport *  
+cdef class FreeEndGap(NWS):
+    cdef double xsmax
+    cdef int    xmax
+    cdef double doAlignment(self) except? 0
diff --git a/src/obitools/align/_freeendgap.pyx b/src/obitools/align/_freeendgap.pyx
new file mode 100644
index 0000000..783fad7
--- /dev/null
+++ b/src/obitools/align/_freeendgap.pyx
@@ -0,0 +1,161 @@
+Created on 6 Nov. 2009
+ at author: coissac
+from _freeendgap cimport *  
+cdef class FreeEndGap(NWS):
+    def __init__(self,match=4,mismatch=-6,opengap=-8,extgap=-2):
+        NWS.__init__(self,match,mismatch,opengap,extgap)
+        self.xsmax=0
+        self.xmax=0
+    cdef double doAlignment(self) except? 0:
+        cdef int i  # vertical index
+        cdef int j  # horizontal index
+        cdef int idx
+        cdef int idx0
+        cdef int idx1
+        cdef int jump
+        cdef int delta
+        cdef double score
+        cdef double scoremax
+        cdef int    path
+        assert self.hSeq.length > self.vSeq.length, \
+                "Sequence B must be shorter than sequence A"
+        if self.needToCompute:
+            self.allocate()
+            self.reset()
+            self.xsmax=0
+            self.xmax=0
+            for j in range(1,self.hSeq.length+1):
+                idx = self.index(j,0)
+                self.matrix.matrix[idx].score = 0
+                self.matrix.matrix[idx].path  = j
+            for i in range(1,self.vSeq.length+1):
+                idx = self.index(0,i)
+                self.matrix.matrix[idx].score = self._opengap + (self._extgap * (i-1))
+                self.matrix.matrix[idx].path  = -i
+            idx0=self.index(-1,0)
+            idx1=self.index(0,1)
+            for i in range(1,self.vSeq.length+1):
+                idx0+=1
+                idx1+=1
+                for j in range(1,self.hSeq.length+1):
+                    # 1 - came from diagonal
+                    #idx = self.index(j-1,i-1)
+                    idx = idx0
+                    # print "computing cell : %d,%d --> %d/%d" % (j,i,self.index(j,i),self.matrix.msize),
+                    scoremax = self.matrix.matrix[idx].score + \
+                               self.matchScore(j,i)
+                    path = 0
+                    # print "so=%f sd=%f sm=%f" % (self.matrix.matrix[idx].score,self.matchScore(j,i),scoremax),
+                    # 2 - open horizontal gap
+                    # idx = self.index(j-1,i)
+                    idx = idx1 - 1
+                    score = self.matrix.matrix[idx].score+ \
+                            self._opengap
+                    if score > scoremax : 
+                        scoremax = score
+                        path = +1
+                    # 3 - open vertical gap
+                    # idx = self.index(j,i-1)
+                    idx = idx0 + 1
+                    score = self.matrix.matrix[idx].score + \
+                            self._opengap
+                    if score > scoremax : 
+                        scoremax = score
+                        path = -1
+                    # 4 - extend horizontal gap
+                    jump = self.matrix.bestHJump[i]
+                    if jump >= 0:
+                        idx = self.index(jump,i)
+                        delta = j-jump
+                        score = self.matrix.matrix[idx].score + \
+                                self._extgap * delta
+                        if score > scoremax :
+                            scoremax = score
+                            path = delta+1 
+                    # 5 - extend vertical gap
+                    jump = self.matrix.bestVJump[j]
+                    if jump >= 0:
+                        idx = self.index(j,jump)
+                        delta = i-jump
+                        score = self.matrix.matrix[idx].score + \
+                                self._extgap * delta
+                        if score > scoremax :
+                            scoremax = score
+                            path = -delta-1 
+                    # idx = self.index(j,i)
+                    idx = idx1
+                    self.matrix.matrix[idx].score = scoremax
+                    self.matrix.matrix[idx].path  = path 
+                    if path == -1:
+                        self.matrix.bestVJump[j]=i
+                    elif path == +1 :
+                        self.matrix.bestHJump[i]=j
+                    if i==self.vSeq.length and scoremax > self.xsmax:
+                        self.xsmax=scoremax
+                        self.xmax=j
+                    idx0+=1
+                    idx1+=1
+        self.sequenceChanged=False
+        self.scoreChanged=False
+        return self.xsmax
+    cdef void backtrack(self):
+        #cdef list path=[]
+        cdef int i
+        cdef int j 
+        cdef int p
+        self.doAlignment()
+        j=self.xmax
+        i=self.vSeq.length
+        self.path=allocatePath(i,j+1,self.path)
+        if self.xmax<self.hSeq.length:
+            self.path.path[self.path.length]=self.hSeq.length-self.xmax
+            self.path.length+=1
+        while (i or j):
+            p=self.matrix.matrix[self.index(j,i)].path
+            self.path.path[self.path.length]=p
+            self.path.length+=1
+            #path.append(p)
+            if p==0:
+                i-=1
+                j-=1
+            elif p < 0:
+                i+=p
+            else:
+                j-=p
+        #path.reverse()
+        #reversePath(self.path)
+        self.path.hStart=0
+        self.path.vStart=0
+        #return 0,0,path
diff --git a/src/obitools/align/_freeendgapfm.pxd b/src/obitools/align/_freeendgapfm.pxd
new file mode 100644
index 0000000..2cf35b9
--- /dev/null
+++ b/src/obitools/align/_freeendgapfm.pxd
@@ -0,0 +1,5 @@
+from _freeendgap cimport *  
+cdef class FreeEndGapFullMatch(FreeEndGap):
+    cdef double matchScore(self,int h, int v)
diff --git a/src/obitools/align/_freeendgapfm.pyx b/src/obitools/align/_freeendgapfm.pyx
new file mode 100644
index 0000000..5a2c858
--- /dev/null
+++ b/src/obitools/align/_freeendgapfm.pyx
@@ -0,0 +1,19 @@
+Created on 6 Nov. 2009
+ at author: coissac
+from _freeendgapfm cimport *  
+cdef class FreeEndGapFullMatch(FreeEndGap):
+    cdef double matchScore(self,int h, int v):
+        cdef double score
+        if iupacMatch(self.hSeq.sequence[h-1],self.vSeq.sequence[v-1]):
+            score=self._match
+        else:
+            score=self._mismatch
+        return score  
diff --git a/src/obitools/align/_gprofilenws.pxd b/src/obitools/align/_gprofilenws.pxd
new file mode 100644
index 0000000..22fd47f
--- /dev/null
+++ b/src/obitools/align/_gprofilenws.pxd
@@ -0,0 +1,8 @@
+from _profilenws cimport *
+cdef class GProfileNWS(ProfileNWS):
+    cdef double matchScore(self,int h, int v)
+    cdef object alignment1
+    cdef object alignment2
+    #cdef double doAlignment(self) except? 0
\ No newline at end of file
diff --git a/src/obitools/align/_gprofilenws.pyx b/src/obitools/align/_gprofilenws.pyx
new file mode 100644
index 0000000..e615dd2
--- /dev/null
+++ b/src/obitools/align/_gprofilenws.pyx
@@ -0,0 +1,167 @@
+Created on 16 Feb. 2011
+ at author: celine
+from _gprofilenws cimport *
+cdef class GProfileNWS(ProfileNWS):
+    cdef double matchScore(self,int h, int v):
+        cdef double pmatch
+        cdef double* hp = self.hProf.frequency
+        cdef double* vp = self.vProf.frequency
+        cdef int     hl = self.hProf.length
+        cdef int     vl = self.vProf.length
+        h-=1
+        v-=1
+        pmatch =  hp[h]*vp[v] + \
+                  hp[h+hl]*vp[v+vl] + \
+                  hp[h+2*hl]*vp[v+2*vl] + \
+                  hp[h+3*hl]*vp[v+3*vl] + \
+                  hp[h+4*hl]*vp[v+4*vl] + \
+                  hp[h+5*hl]*vp[v+5*vl]
+        return self._match * pmatch + (1-pmatch) * self._mismatch
+    def __call__(self,pseudocounts=0):
+        cdef list hgaps=[]
+        cdef list vgaps=[]
+        cdef list b
+        cdef int  hp
+        cdef int  vp
+        cdef int  rp
+        cdef int  lenh=0
+        cdef int  lenv=0
+        cdef int  h,v,p
+        cdef int  i
+        cdef object ali
+        cdef double score
+        cdef DNAProfile newProfile1
+        cdef DNAProfile newProfile2
+        cdef DNAProfile horizontalSeq=self.horizontalSeq
+        cdef DNAProfile verticalSeq=self.verticalSeq
+        if self._needToCompute():
+            score = self.doAlignment()
+            self.backtrack()
+            sum = 0
+            for p in xrange(self.path.length) :
+                v = self.path.path[p]
+                if v == 0 :
+                    sum += 1
+                else :
+                    sum += abs(v)
+            newProfile1 = DNAProfile(size=sum,pseudo=pseudocounts)
+            newProfile1.profile.weight = horizontalSeq.profile.weight
+            newProfile2 = DNAProfile(size=sum,pseudo=pseudocounts)
+            newProfile2.profile.weight = verticalSeq.profile.weight
+            hp=horizontalSeq.profile.length-1
+            vp=verticalSeq.profile.length-1
+            rp1=newProfile1.profile.length-1
+            rp2=newProfile2.profile.length-1
+            for i in range(self.path.length):
+                p=self.path.path[i]
+            for i in range(self.path.length):
+                p=self.path.path[i]
+                if p==0:
+                    newProfile1.A[rp1] = horizontalSeq.A[hp]
+                    newProfile1.C[rp1] = horizontalSeq.C[hp]
+                    newProfile1.G[rp1] = horizontalSeq.G[hp]
+                    newProfile1.T[rp1] = horizontalSeq.T[hp]
+                    newProfile1.Og[rp1] = horizontalSeq.Og[hp]
+                    newProfile1.Eg[rp1] = horizontalSeq.Eg[hp]
+                    newProfile2.A[rp2] = verticalSeq.A[vp]
+                    newProfile2.C[rp2] = verticalSeq.C[vp]
+                    newProfile2.G[rp2] = verticalSeq.G[vp]
+                    newProfile2.T[rp2] = verticalSeq.T[vp]
+                    newProfile2.Og[rp2] = verticalSeq.Og[vp]
+                    newProfile2.Eg[rp2] = verticalSeq.Eg[vp]
+                    hp-=1
+                    vp-=1
+                    rp1-=1
+                    rp2-=1
+                elif p>0:
+                    for x in xrange(p-1) :
+                        newProfile1.A[rp1] = horizontalSeq.A[hp]
+                        newProfile1.C[rp1] = horizontalSeq.C[hp]
+                        newProfile1.G[rp1] = horizontalSeq.G[hp]
+                        newProfile1.T[rp1] = horizontalSeq.T[hp]
+                        newProfile1.Og[rp1] = horizontalSeq.Og[hp]
+                        newProfile1.Eg[rp1] = horizontalSeq.Eg[hp]
+                        newProfile2.Eg[rp2] = verticalSeq.profile.weight
+                        hp-=1
+                        rp1-=1
+                        rp2-=1
+                    newProfile1.A[rp1] = horizontalSeq.A[hp]
+                    newProfile1.C[rp1] = horizontalSeq.C[hp]
+                    newProfile1.G[rp1] = horizontalSeq.G[hp]
+                    newProfile1.T[rp1] = horizontalSeq.T[hp]
+                    newProfile1.Og[rp1] = horizontalSeq.Og[hp]
+                    newProfile1.Eg[rp1] = horizontalSeq.Eg[hp]
+                    newProfile2.Og[rp2] = verticalSeq.profile.weight
+                    hp-=1
+                    rp1-=1
+                    rp2-=1
+                else:
+                    for x in xrange(abs(p)-1) :
+                        newProfile2.A[rp2] = verticalSeq.A[vp]
+                        newProfile2.C[rp2] = verticalSeq.C[vp]
+                        newProfile2.G[rp2] = verticalSeq.G[vp]
+                        newProfile2.T[rp2] = verticalSeq.T[vp]
+                        newProfile2.Og[rp2] = verticalSeq.Og[vp]
+                        newProfile2.Eg[rp2] = verticalSeq.Eg[vp]
+                        newProfile1.Eg[rp1] = horizontalSeq.profile.weight
+                        vp-=1
+                        rp1-=1
+                        rp2-=1
+                    newProfile2.A[rp2] = verticalSeq.A[vp]
+                    newProfile2.C[rp2] = verticalSeq.C[vp]
+                    newProfile2.G[rp2] = verticalSeq.G[vp]
+                    newProfile2.T[rp2] = verticalSeq.T[vp]
+                    newProfile2.Og[rp2] = verticalSeq.Og[vp]
+                    newProfile2.Eg[rp2] = verticalSeq.Eg[vp]
+                    newProfile1.Og[rp1] = horizontalSeq.profile.weight
+                    vp-=1
+                    rp1-=1
+                    rp2-=1
+            self.alignment1 = newProfile1
+            self.alignment2 = newProfile2
+        ali1=DNAProfile(self.alignment1,pseudo=pseudocounts)
+        ali2=DNAProfile(self.alignment2,pseudo=pseudocounts)
+        return ali1, ali2
diff --git a/src/obitools/align/_lcs.cfiles b/src/obitools/align/_lcs.cfiles
new file mode 100644
index 0000000..0e27863
--- /dev/null
+++ b/src/obitools/align/_lcs.cfiles
@@ -0,0 +1 @@
\ No newline at end of file
diff --git a/src/obitools/align/_lcs.ext.1.c b/src/obitools/align/_lcs.ext.1.c
new file mode 100644
index 0000000..7614125
--- /dev/null
+++ b/src/obitools/align/_lcs.ext.1.c
@@ -0,0 +1,168 @@
+#include "_lcs.h"
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdio.h>
+// Allocate a band allowing to align sequences of length : 'length'
+column_t* allocateColumn(int length,column_t *column, bool mode8bits)
+	int size;
+	bool newc = false;
+			// The band length should be equal to the length
+			// of the sequence + 7 for taking into account its
+			// shape
+	size = (length+1) * ((mode8bits) ? sizeof(int8_t):sizeof(int16_t));
+			// If the pointer to the old column is NULL we allocate
+			// a new column
+	if (column==NULL)
+	{
+		column = malloc(sizeof(column_t));
+		if (!column)
+			return NULL;
+		column->size = 0;
+		column->data.shrt=NULL;
+		column->score.shrt=NULL;
+		newc = true;
+	}
+			// Otherwise we check if its size is sufficient
+			// or if it should be extend
+	if (size > column->size)
+	{
+		int16_t *old = column->data.shrt;
+		int16_t *olds= column->score.shrt;
+		column->data.shrt = malloc(size);
+		column->score.shrt= malloc(size);
+		if (column->data.shrt==NULL || column->score.shrt==NULL)
+		{
+			fprintf(stderr,"Allocation Error on column for a size of %d\n" , size);
+			column->data.shrt = old;
+			column->score.shrt= olds;
+			if (newc)
+			{
+				free(column);
+				column=NULL;
+				return NULL;
+			}
+			return NULL;
+		}
+		else
+			column->size = size;
+	}
+	return column;
+void freeColumn(column_p column)
+	if (column)
+	{
+		if (column->data.shrt)
+			free(column->data.shrt);
+		if (column->score.shrt)
+			free(column->score.shrt);
+		free(column);
+	}
+int fastLCSScore(const char* seq1, const char* seq2,column_pp column,int32_t* lpath)
+	return fastLCSScore16(seq1,seq2,column,lpath);
+int simpleLCS(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath)
+	int lseq1,lseq2;		// length of the both sequences
+	int lcs;
+	int itmp;				// tmp variables for swap
+	const char* stmp;		//
+	int32_t *score;
+	int32_t *path;
+	column_t *column;
+	int32_t i,j;
+	int32_t sl,su,sd;
+	int32_t pl,pu,pd;
+		// Made seq1 the longest sequences
+	lseq1=strlen(seq1);
+	lseq2=strlen(seq2);
+	if (lseq1 < lseq2)
+	{
+		itmp=lseq1;
+		lseq1=lseq2;
+		lseq2=itmp;
+		stmp=seq1;
+		seq1=seq2;
+		seq2=stmp;
+	}
+	lseq1++;
+	lseq2++;
+							// a band sized to the smallest sequence is allocated
+	if (ppcolumn)
+		column = *ppcolumn;
+	else
+		column=NULL;
+	column = allocateColumn(lseq1*2,column,0);
+	score = (int32_t*) column->score.shrt;
+	path = (int32_t*) column->data.shrt;
+	memset(score,0,lseq1 * sizeof(int32_t));
+	for (j=0; j < lseq1; j++)
+		path[j]=j;
+	for (i=1; i< lseq2; i++)
+	{
+		sl=0;
+		pl=i;
+		for (j=1; j < lseq1; j++)
+		{
+			sd=score[j-1] + (seq2[i-1]==seq1[j-1] ? 1:0);
+			pd=path[j-1]  + 1;
+			su=score[j];
+			pu=path[j] + 1;
+			score[j-1]=sl;
+			if (su > sl) sl=su, pl=pu;
+			if (sd > sl) sl=sd, pl=pd;
+		}
+	}
+	lcs = sl;
+	if(lpath) *lpath=pl;
+	if (ppcolumn)
+		*ppcolumn=column;
+	else
+		freeColumn(column);
+	return lcs;
diff --git a/src/obitools/align/_lcs.ext.2.c b/src/obitools/align/_lcs.ext.2.c
new file mode 100644
index 0000000..381dc6a
--- /dev/null
+++ b/src/obitools/align/_lcs.ext.2.c
@@ -0,0 +1,34 @@
+#include "_lcs.h"
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdio.h>
+#define VSIZE (8)
+#define VTYPE vInt16
+#define STYPE int16_t
+#define CMENB shrt
+#define VMODE false
+#define FASTLCSSCORE fastLCSScore16
+#define ADD_REG    _MM_ADD_EPI16
+#define SUB_REG    _MM_SUB_EPI16
+#define AND_REG    _MM_AND_SI128
+#define ANDNOT_REG    _MM_ANDNOT_SI128
+#define OR_REG    _MM_OR_SI128
+#define SET_CONST  _MM_SET1_EPI16
+#define GET_MAX    _MM_MAX_EPI16
+#define GET_MIN    _MM_MIN_EPI16
+#define MIN_SCORE  INT16_MIN
+#define MAX_SCORE  32000
+#include "_lcs_fast.h"
diff --git a/src/obitools/align/_lcs.ext.3.c b/src/obitools/align/_lcs.ext.3.c
new file mode 100644
index 0000000..5c3a150
--- /dev/null
+++ b/src/obitools/align/_lcs.ext.3.c
@@ -0,0 +1,34 @@
+#include "_lcs.h"
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdio.h>
+#define VSIZE (16)
+#define VTYPE vInt8
+#define STYPE int8_t
+#define CMENB byte
+#define VMODE true
+#define FASTLCSSCORE fastLCSScore8
+#define ADD_REG    _MM_ADD_EPI8
+#define SUB_REG    _MM_SUB_EPI8
+#define AND_REG    _MM_AND_SI128
+#define ANDNOT_REG    _MM_ANDNOT_SI128
+#define OR_REG    _MM_OR_SI128
+#define SET_CONST  _MM_SET1_EPI8
+#define GET_MAX    _MM_MAX_EPI8
+#define GET_MIN    _MM_MIN_EPI8
+#define MIN_SCORE  INT8_MIN
+#define MAX_SCORE  127
+#include "_lcs_fast.h"
diff --git a/src/obitools/align/_lcs.ext.4.c b/src/obitools/align/_lcs.ext.4.c
new file mode 100644
index 0000000..ed2d060
--- /dev/null
+++ b/src/obitools/align/_lcs.ext.4.c
@@ -0,0 +1,225 @@
+#include "_sse.h"
+#include <stdio.h>
+#include <math.h>
+inline static uchar_v hash4m128(uchar_v frag)
+	uchar_v words;
+	vUInt8 mask_03= _MM_SET1_EPI8(0x03);        // charge le registre avec 16x le meme octet
+	vUInt8 mask_FC= _MM_SET1_EPI8(0xFC);
+	frag.m = _MM_SRLI_EPI64(frag.m,1);         // shift logic a droite sur 2 x 64 bits
+	frag.m = _MM_AND_SI128(frag.m,mask_03);    // and sur les 128 bits
+	words.m= _MM_SLLI_EPI64(frag.m,2);
+	words.m= _MM_AND_SI128(words.m,mask_FC);
+	frag.m = _MM_SRLI_SI128(frag.m,1);
+	words.m= _MM_OR_SI128(words.m,frag.m);
+	words.m= _MM_SLLI_EPI64(words.m,2);
+	words.m= _MM_AND_SI128(words.m,mask_FC);
+	frag.m = _MM_SRLI_SI128(frag.m,1);
+	words.m= _MM_OR_SI128(words.m,frag.m);
+	words.m= _MM_SLLI_EPI64(words.m,2);
+	words.m= _MM_AND_SI128(words.m,mask_FC);
+	frag.m = _MM_SRLI_SI128(frag.m,1);
+	words.m= _MM_OR_SI128(words.m,frag.m);
+	return words;
+inline static int anyzerom128(vUInt8 data)
+	vUInt8 mask_00= _MM_SETZERO_SI128();
+	uint64_v tmp;
+	tmp.m = _MM_CMPEQ_EPI8(data,mask_00);
+	return (int)(tmp.c[0]!=0 || tmp.c[1]!=0);
+inline static void dumpm128(unsigned short *table,vUInt8 data)
+	memcpy(table,&data,16);
+int buildTable(const char* sequence, unsigned char *table, int *count)
+	int overflow = 0;
+	int wc=0;
+	int i;
+	vUInt8 mask_00= _MM_SETZERO_SI128();
+	uchar_v frag;
+	uchar_v words;
+	uchar_v zero;
+	char* s;
+	s=(char*)sequence;
+	memset(table,0,256*sizeof(unsigned char));
+	// encode ascii sequence with  A : 00 C : 01  T: 10   G : 11
+	for(frag.m=_MM_LOADU_SI128((vUInt8*)s);
+		! anyzerom128(frag.m);
+		s+=12,frag.m=_MM_LOADU_SI128((vUInt8*)s))
+	{
+		words= hash4m128(frag);
+		// printf("%d %d %d %d\n",words.c[0],words.c[1],words.c[2],words.c[3]);
+		if (table[words.c[0]]<255)  table[words.c[0]]++;  else overflow++;
+		if (table[words.c[1]]<255)  table[words.c[1]]++;  else overflow++;
+		if (table[words.c[2]]<255)  table[words.c[2]]++;  else overflow++;
+		if (table[words.c[3]]<255)  table[words.c[3]]++;  else overflow++;
+		if (table[words.c[4]]<255)  table[words.c[4]]++;  else overflow++;
+		if (table[words.c[5]]<255)  table[words.c[5]]++;  else overflow++;
+		if (table[words.c[6]]<255)  table[words.c[6]]++;  else overflow++;
+		if (table[words.c[7]]<255)  table[words.c[7]]++;  else overflow++;
+		if (table[words.c[8]]<255)  table[words.c[8]]++;  else overflow++;
+		if (table[words.c[9]]<255)  table[words.c[9]]++;  else overflow++;
+		if (table[words.c[10]]<255) table[words.c[10]]++; else overflow++;
+		if (table[words.c[11]]<255) table[words.c[11]]++; else overflow++;
+		wc+=12;
+	}
+	zero.m=_MM_CMPEQ_EPI8(frag.m,mask_00);
+	//printf("frag=%d %d %d %d\n",frag.c[0],frag.c[1],frag.c[2],frag.c[3]);
+	//printf("zero=%d %d %d %d\n",zero.c[0],zero.c[1],zero.c[2],zero.c[3]);
+	words = hash4m128(frag);
+	if (zero.c[0]+zero.c[1]+zero.c[2]+zero.c[3]==0)
+		for(i=0;zero.c[i+3]==0;i++,wc++)
+			if (table[words.c[i]]<255) table[words.c[i]]++;  else overflow++;
+	if (count) *count=wc;
+	return overflow;
+static inline vUInt16 partialminsum(vUInt8 ft1,vUInt8 ft2)
+	vUInt8   mini;
+	vUInt16  minilo;
+	vUInt16  minihi;
+	vUInt8 mask_00= _MM_SETZERO_SI128();
+	mini      = _MM_MIN_EPU8(ft1,ft2);
+	minilo    = _MM_UNPACKLO_EPI8(mini,mask_00);
+	minihi    = _MM_UNPACKHI_EPI8(mini,mask_00);
+	return _MM_ADDS_EPU16(minilo,minihi);
+int compareTable(unsigned char *t1, int over1, unsigned char* t2,  int over2)
+	vUInt8   ft1;
+	vUInt8   ft2;
+	vUInt8  *table1=(vUInt8*)t1;
+	vUInt8  *table2=(vUInt8*)t2;
+	ushort_v summini;
+	int      i;
+	int      total;
+	ft1 = _MM_LOADU_SI128(table1);
+	ft2 = _MM_LOADU_SI128(table2);
+	summini.m = partialminsum(ft1,ft2);
+	table1++;
+	table2++;
+	for (i=1;i<16;i++,table1++,table2++)
+	{
+		ft1 = _MM_LOADU_SI128(table1);
+		ft2 = _MM_LOADU_SI128(table2);
+		summini.m = _MM_ADDS_EPU16(summini.m,partialminsum(ft1,ft2));
+	}
+	// Finishing the sum process
+	summini.m = _MM_ADDS_EPU16(summini.m,_MM_SRLI_SI128(summini.m,8)); // sum the 4 firsts with the 4 lasts
+	summini.m = _MM_ADDS_EPU16(summini.m,_MM_SRLI_SI128(summini.m,4));
+	total = summini.c[0]+summini.c[1];
+	total+= (over1 < over2) ? over1:over2;
+	return total;
+int threshold4(int wordcount,double identity)
+	int error;
+	int lmax;
+	wordcount+=3;
+	error = (int)floor((double)wordcount * ((double)1.0-identity));
+	lmax  = (wordcount - error) / (error + 1);
+	if (lmax < 4)
+		return 0;
+	return    (lmax  - 3) \
+			* (error + 1) \
+			+ ((wordcount - error) % (error + 1));
+int thresholdLCS4(int32_t reflen,int32_t lcs)
+	int nbfrag;
+	int smin;
+	int R;
+	int common;
+	nbfrag = (reflen - lcs)*2 + 1;
+	smin   = lcs/nbfrag;
+	R = lcs - smin * nbfrag;
+	common = MAX(smin - 2,0) * R + MAX(smin - 3,0) * (nbfrag - R);
+	return  common;
+#ifndef MAX
+#define MAX(x,y) (((x)>(y)) ? (x):(y))
+#define MIN(x,y) (((x)<(y)) ? (x):(y))
+int ispossible(int len1, unsigned char *t1, int over1,
+		       int len2, unsigned char* t2, int over2,
+		       double minimum, int normalized, int large)
+	int32_t reflen;
+    int32_t lcs;
+    int32_t mincount;
+	if (normalized)
+	{
+		if (large)
+			reflen = MAX(len1,len2);
+		else
+			reflen = MIN(len1,len2);
+		lcs = (int32_t)floor((double)reflen * minimum);
+	}
+	else
+	{
+		if (large)
+			reflen = MAX(len1,len2);
+		else
+			reflen = MIN(len1,len2);
+		lcs = (int32_t) minimum;
+	}
+	if (lcs > MIN(len1,len2))
+		return 0;
+	mincount = thresholdLCS4(reflen,lcs);
+// fprintf(stderr,"MaxLCS %d %d %d : %d\n",reflen,lcs,compareTable(t1,over1,t2,over2),mincount);
+	return compareTable(t1,over1,t2,over2) >=mincount;
diff --git a/src/obitools/align/_lcs.h b/src/obitools/align/_lcs.h
new file mode 100644
index 0000000..b9d5f15
--- /dev/null
+++ b/src/obitools/align/_lcs.h
@@ -0,0 +1,29 @@
+#include "_sse.h"
+#define bool char
+#define false (1==0)
+#define true  (1==1)
+typedef struct {
+	int16_t    size;
+	union { int16_t *shrt;
+	        int8_t  *byte;
+	      } data;
+	union { int16_t *shrt;
+			int8_t  *byte;
+		  } score;
+} column_t, **column_pp, *column_p;
+column_p allocateColumn(int length,column_t *column, bool mode8bits);
+void freeColumn(column_p column);
+int fastLCSScore16(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath);
+int fastLCSScore8(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath);
+int simpleLCS(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath);
+int fastLCSScore(const char* seq1, const char* seq2,column_pp column,int32_t* lpath);
diff --git a/src/obitools/align/_lcs.pxd b/src/obitools/align/_lcs.pxd
new file mode 100644
index 0000000..e76a9c5
--- /dev/null
+++ b/src/obitools/align/_lcs.pxd
@@ -0,0 +1,9 @@
+cdef extern from *:
+    ctypedef char* const_char_ptr "const char*"
+    ctypedef int* int32_ptr 
+cdef import from "_lcs.h":
+    struct column_t:
+        pass    
+    int fastLCSScore(const_char_ptr seq1, const_char_ptr seq2,column_t* column,int32_ptr length)
diff --git a/src/obitools/align/_lcs.pyx b/src/obitools/align/_lcs.pyx
new file mode 100644
index 0000000..c809d47
--- /dev/null
+++ b/src/obitools/align/_lcs.pyx
@@ -0,0 +1,206 @@
+Created on 6 Nov. 2009
+ at author: coissac
+from cpython cimport array
+from obitools import BioSequence
+from _lcs cimport *
+from _upperbond cimport *
+from _dynamic cimport *
+from _upperbond import *
+cdef class LCS(DynamicProgramming):
+    def __init__(self):
+        DynamicProgramming.__init__(self,opengap=0,extgap=0)
+    property opengap:
+        def __get__(self):
+            return self._opengap
+    property extgap:
+        def __get__(self):  # @DuplicatedSignature
+            return self._extgap
+    cdef double matchScore(self,int h, int v):
+        return iupacPartialMatch(self.hSeq.sequence[h-1],self.vSeq.sequence[v-1])
+    cdef double doAlignment(self) except? 0:
+        cdef int i  # vertical index
+        cdef int j  # horizontal index
+        cdef int idx
+        cdef int jump
+        cdef int delta
+        cdef double score
+        cdef double scoremax
+        cdef int    path
+        if self.needToCompute:
+            self.allocate()
+            self.reset()
+            for j in range(1,self.hSeq.length+1):
+                idx = self.index(j,0)
+                self.matrix.matrix[idx].score = 0
+                self.matrix.matrix[idx].path  = j
+            for i in range(1,self.vSeq.length+1):
+                idx = self.index(0,i)
+                self.matrix.matrix[idx].score = 0
+                self.matrix.matrix[idx].path  = -i
+            for i in range(1,self.vSeq.length+1):
+                for j in range(1,self.hSeq.length+1):
+                    # 1 - came from diagonal
+                    idx = self.index(j-1,i-1)
+                    # print "computing cell : %d,%d --> %d/%d" % (j,i,self.index(j,i),self.matrix.msize),
+                    scoremax = self.matrix.matrix[idx].score + \
+                               self.matchScore(j,i)
+                    path = 0
+                    # print "so=%f sd=%f sm=%f" % (self.matrix.matrix[idx].score,self.matchScore(j,i),scoremax),
+                    # 2 - open horizontal gap
+                    idx = self.index(j-1,i)
+                    score = self.matrix.matrix[idx].score
+                    if score > scoremax :
+                        scoremax = score
+                        path = self.matrix.matrix[idx].path
+                        if path >=0:
+                            path+=1
+                        else:
+                            path=+1
+                    # 3 - open vertical gap
+                    idx = self.index(j,i-1)
+                    score = self.matrix.matrix[idx].score
+                    if score > scoremax :
+                        scoremax = score
+                        path = self.matrix.matrix[idx].path
+                        if path <=0:
+                            path-=1
+                        else:
+                            path=-1
+                    idx = self.index(j,i)
+                    self.matrix.matrix[idx].score = scoremax
+                    self.matrix.matrix[idx].path  = path
+        self.sequenceChanged=False
+        self.scoreChanged=False
+        idx = self.index(self.hSeq.length,self.vSeq.length)
+        return self.matrix.matrix[idx].score
+    cdef void backtrack(self):
+        #cdef list path=[]
+        cdef int i
+        cdef int j
+        cdef int p
+        self.doAlignment()
+        i=self.vSeq.length
+        j=self.hSeq.length
+        self.path=allocatePath(i,j,self.path)
+        while (i or j):
+            p=self.matrix.matrix[self.index(j,i)].path
+            self.path.path[self.path.length]=p
+            self.path.length+=1
+#            path.append(p)
+            if p==0:
+                i-=1
+                j-=1
+            elif p < 0:
+                i+=p
+            else:
+                j-=p
+        #path.reverse()
+        #reversePath(self.path)
+        self.path.hStart=0
+        self.path.vStart=0
+        #return 0,0,path
+def lenlcs(seq1,seq2,double minimum=0.,bint normalized=False, int reference=ALILEN):
+    cdef double lcs
+    cdef bytes se1=bytes(str(seq1))
+    cdef bytes se2=bytes(str(seq2))
+    cdef int l1 = len(seq1)
+    cdef int l2 = len(seq2)
+    cdef int o1
+    cdef int o2
+    cdef int wordcount
+    cdef int alilength
+    cdef bint possible
+    cdef bint large
+    cdef array.array[unsigned char] w1
+    cdef array.array[unsigned char] w2
+    cdef char *s1
+    cdef char *s2
+    s1=se1
+    s2=se2
+    if min(l1,l2) < 8:
+        lcsali = LCS()
+        lcsali.seqA = seq1
+        lcsali.seqB = seq2
+        lcs = lcsali.doAlignment()
+    else:
+        if minimum > 0.:
+            if isinstance(seq1, BioSequence) and hasattr(seq1, "word4table") and seq1.word4table is not None:
+                w1 = seq1.word4table
+                o1 = seq1.word4over
+            else:
+                w1 = newtable()
+                o1 = buildTable(s1,w1.data.as_uchars,&wordcount)
+                if isinstance(seq1, BioSequence):
+                    seq1.word4table=w1
+                    seq1.word4over=o1
+            if isinstance(seq2, BioSequence) and hasattr(seq2, "word4table") and seq2.word4table is not None:
+                w2 = seq2.word4table
+                o2 = seq2.word4over
+            else:
+                w2 = newtable()
+                o2 = buildTable(s2,w2.data.as_uchars,&wordcount)
+                if isinstance(seq2, BioSequence) :
+                    seq2.word4table=w2
+                    seq2.word4over=o2
+            large = reference==ALILEN or reference==MAXLEN
+            possible = ispossible(l1, w1.data.as_uchars, o1,
+                                  l2, w2.data.as_uchars, o2,
+                                  minimum,normalized,large)
+            if possible:
+                lcs = fastLCSScore(s1,s2,NULL,&alilength)
+            else:
+                lcs = -1.0
+        else:
+            lcs = fastLCSScore(s1,s2,NULL,&alilength)
+    if lcs >= 0 and normalized:
+        if reference==ALILEN:
+            if alilength > 0:
+                lcs /=alilength
+            else:
+                lcs = 0
+        elif reference==MAXLEN:
+            lcs /=max(l1,l2)
+        elif reference==MINLEN:
+            lcs /=min(l1,l2)
+    return lcs,alilength
diff --git a/src/obitools/align/_lcs_fast.h b/src/obitools/align/_lcs_fast.h
new file mode 100644
index 0000000..115cf26
--- /dev/null
+++ b/src/obitools/align/_lcs_fast.h
@@ -0,0 +1,597 @@
+ * Print a SSE register for debug purpose
+ */
+#ifdef __SSE2__
+static void  printreg(VTYPE r)
+	STYPE a0,a1,a2,a3,a4,a5,a6,a7;
+#if VMODE
+	STYPE a8,a9,a10,a11,a12,a13,a14,a15;
+	a0= EXTRACT_REG(r,0);
+	a1= EXTRACT_REG(r,1);
+	a2= EXTRACT_REG(r,2);
+	a3= EXTRACT_REG(r,3);
+	a4= EXTRACT_REG(r,4);
+	a5= EXTRACT_REG(r,5);
+	a6= EXTRACT_REG(r,6);
+	a7= EXTRACT_REG(r,7);
+#if VMODE
+	a8= EXTRACT_REG(r,8);
+	a9= EXTRACT_REG(r,9);
+	a10= EXTRACT_REG(r,10);
+	a11= EXTRACT_REG(r,11);
+	a12= EXTRACT_REG(r,12);
+	a13= EXTRACT_REG(r,13);
+	a14= EXTRACT_REG(r,14);
+	a15= EXTRACT_REG(r,15);
+printf( "a00 :-> %7d  %7d  %7d  %7d "
+		" %7d  %7d  %7d  %7d "
+#if VMODE
+		"%7d  %7d  %7d  %7d "
+		" %7d  %7d  %7d  %7d "
+		"\n"
+		, a0,a1,a2,a3,a4,a5,a6,a7
+#if VMODE
+		, a8,a9,a10,a11,a12,a13,a14,a15
+ * set position p of a SSE register with the value v
+ */
+static inline VTYPE insert_reg(VTYPE r, STYPE v, int p)
+	switch (p) {
+	case 0: return INSERT_REG(r,v,0);
+	case 1: return INSERT_REG(r,v,1);
+	case 2: return INSERT_REG(r,v,2);
+	case 3: return INSERT_REG(r,v,3);
+	case 4: return INSERT_REG(r,v,4);
+	case 5: return INSERT_REG(r,v,5);
+	case 6: return INSERT_REG(r,v,6);
+	case 7: return INSERT_REG(r,v,7);
+#if VMODE
+	case 8: return INSERT_REG(r,v,8);
+	case 9: return INSERT_REG(r,v,9);
+	case 10: return INSERT_REG(r,v,10);
+	case 11: return INSERT_REG(r,v,11);
+	case 12: return INSERT_REG(r,v,12);
+	case 13: return INSERT_REG(r,v,13);
+	case 14: return INSERT_REG(r,v,14);
+	case 15: return INSERT_REG(r,v,15);
+	}
+	return _MM_SETZERO_SI128();
+static inline STYPE extract_reg(VTYPE r, int p)
+	switch (p) {
+	case 0: return EXTRACT_REG(r,0);
+	case 1: return EXTRACT_REG(r,1);
+	case 2: return EXTRACT_REG(r,2);
+	case 3: return EXTRACT_REG(r,3);
+	case 4: return EXTRACT_REG(r,4);
+	case 5: return EXTRACT_REG(r,5);
+	case 6: return EXTRACT_REG(r,6);
+	case 7: return EXTRACT_REG(r,7);
+#if VMODE
+	case 8: return EXTRACT_REG(r,8);
+	case 9: return EXTRACT_REG(r,9);
+	case 10: return EXTRACT_REG(r,10);
+	case 11: return EXTRACT_REG(r,11);
+	case 12: return EXTRACT_REG(r,12);
+	case 13: return EXTRACT_REG(r,13);
+	case 14: return EXTRACT_REG(r,14);
+	case 15: return EXTRACT_REG(r,15);
+	}
+	return 0;
+#define GET_H_SYMBOLE(s,p) ((p && p < lseq1) ? (s)[(p)-1]:255)
+#define GET_V_SYMBOLE(s,p) ((p && p < lseq2) ? (s)[(p)-1]:0)
+#define LSHIFT_SCORE(r)      { r = _MM_SLLI_SI128((r),sizeof(STYPE)); }
+#define SET_H_SYMBOLE(r,p,s) { r = insert_reg((r),(STYPE)GET_H_SYMBOLE(seq1,(s)),(p)); }
+#define PUSH_V_SYMBOLE(r,s)  { r = insert_reg(_MM_SLLI_SI128((r),sizeof(STYPE)),(STYPE)GET_V_SYMBOLE(seq2,(s)),0); }
+#define EQUAL(f1,f2)         _MM_AND_SI128(EQUAL_REG((f1),(f2)),SET_CONST(1))
+int FASTLCSSCORE(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath)
+	int lseq1,lseq2;		// length of the both sequences
+	int itmp;				// tmp variables for swap
+	const char* stmp;		//
+	int nbands;				// Number of bands of width eight in the score matrix
+	int lastband;			// width of the last band
+							// Register for scanning the score matrix
+	VTYPE  minus1;
+	VTYPE  minus2;
+	VTYPE  current;
+	VTYPE  left;
+	VTYPE  top;
+	VTYPE  diag;
+	VTYPE  sminus1;
+	VTYPE  sminus2;
+	VTYPE  scurrent;
+	VTYPE  sleft;
+	VTYPE  stop;
+	VTYPE  sdiag;
+	VTYPE  way;
+	VTYPE  onevect;
+	VTYPE  maxvect;
+	VTYPE  fhseq;          	// The fragment of the horizontal sequence
+							// to consider for aligment
+	VTYPE  fvseq;			// The fragment of the horizontal sequence
+							// to consider for aligment
+	VTYPE  match;
+	int band;
+	int line;
+	int limit;
+	int lcs;
+	int h;
+	int i;
+	column_t *column;
+		// Made seq1 the longest sequences
+	lseq1=strlen(seq1);
+	lseq2=strlen(seq2);
+	if (lseq1 < 10 || lseq2 < 10)
+		return simpleLCS(seq1,seq2,ppcolumn,lpath);
+	if (lseq1 < lseq2)
+	{
+		itmp=lseq1;
+		lseq1=lseq2;
+		lseq2=itmp;
+		stmp=seq1;
+		seq1=seq2;
+		seq2=stmp;
+	}
+							// we add one to the both length for taking into
+							// account the extra line and column in the score
+							// matrix
+	lseq1++;
+	lseq2++;
+							// a band sized to the smallest sequence is allocated
+	if (ppcolumn)
+		column = *ppcolumn;
+	else
+		column=NULL;
+	column = allocateColumn(lseq2,column,VMODE);
+							// Check memory allocation
+	if (column == NULL)
+		return -1;
+	for (i=0; i<lseq2;i++)
+	{
+		column->data.CMENB[i]=MIN_SCORE;
+		column->score.CMENB[i]=-1;
+	}
+	nbands = lseq1 / VSIZE;					// You have VSIZE element in one SSE register
+											// Alignment will be realized in nbands
+	lastband = lseq1 - (nbands * VSIZE);	// plus one of width lastband except if
+											// lastband==0
+	if (lastband) nbands++;
+	else lastband=VSIZE;
+	lastband--;
+//	printf("seq1 : %s  seq2 : %s\n",seq1,seq2);
+	minus2 = SET_CONST(MIN_SCORE);
+	minus1 = _MM_SETZERO_SI128();
+	sminus1= _MM_SETZERO_SI128();
+	sminus2= _MM_SETZERO_SI128();
+	onevect= SET_CONST(1);
+	maxvect= SET_CONST(MAX_SCORE);
+	h=0;
+	fhseq = _MM_SETZERO_SI128();
+	fvseq = _MM_SETZERO_SI128();
+					//
+					// Beginnig of the first band
+					//
+	for (line = 0; line < VSIZE; line++,h++) // avant VSIZE - 1
+	{
+//		printf("line= %4d   h= %4d\n",line,h);
+		SET_H_SYMBOLE(fhseq,line,h)
+		PUSH_V_SYMBOLE(fvseq,line)
+		minus2 = insert_reg(minus2,0,h);
+		minus1 = insert_reg(minus1,MIN_SCORE,line); // 0 avant
+		match = EQUAL(fhseq,fvseq);
+		if (lpath)
+		{
+			sminus2 = insert_reg(sminus2,line-1,line);  // Je ne suis pas certain de l'initialisation
+			sminus1 = insert_reg(sminus1,0,line);
+		}
+//		printreg(fvseq);
+//		printreg(fhseq);
+//		printreg(match);
+//		printf("================================\n");
+		current = minus1;      // The best score is the upper one
+							   // It cannot be the best as set to MIN_SCORE
+		left = minus1;
+//		printf("Vert = "); printreg(current);
+		LSHIFT_SCORE(minus1)    // I shift minus1 so know I'll compare with the left position
+		minus1=insert_reg(minus1,(column)->data.CMENB[line],0);
+		top=minus1;
+		if (lpath)
+		{
+			sleft=sminus1;  // I store the path length corresponding to the upper path
+			LSHIFT_SCORE(sminus1)  // I shift to prepare the score coming from the left side
+			sminus1=insert_reg(sminus1,(column)->score.CMENB[line],0);
+			stop=sminus1;
+			sdiag=sminus2;
+		}
+//		printf("Horz = "); printreg(minus1);
+		current = GET_MAX(current,minus1); // Look for the best between upper and left
+//		printf("BstHV= "); printreg(current);
+//		printf("Diag = "); printreg(ADD_REG(minus2,match));
+		diag=minus2;
+		// minus2 = ;	// Minus2 contains the diagonal score, so I add the match reward
+		                // Diag score are setup to 0 so this one will win on the first iteration
+		current = GET_MAX(current,ADD_REG(minus2,match));
+		if (lpath)
+		{
+//			printf("\n");
+//			printf("current: ");
+//			printreg(current);
+//			printf("current: ");
+//			printreg(SUB_REG(current,match));
+//			printf("diag   : ");
+//			printreg(diag);
+//			printf("left   : ");
+//			printreg(left);
+//			printf("top    : ");
+//			printreg(top);
+			way     = EQUAL_REG(SUB_REG(current,match),diag);
+			scurrent= OR_REG(AND_REG(way,sdiag),
+					         ANDNOT_REG(way,maxvect));
+//			printf("sdiag  : ");
+//			printreg(scurrent);
+			way     = EQUAL_REG(current,left);
+			scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,sleft),
+							  ANDNOT_REG(way,maxvect)));
+//			printf("sleft  : ");
+//			printreg(scurrent);
+			way     = EQUAL_REG(current,top);
+			scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,stop),
+							  ANDNOT_REG(way,maxvect)));
+//			printf("stop   : ");
+//			printreg(scurrent);
+			scurrent= ADD_REG(scurrent,onevect);
+			sminus2=sminus1;
+			sminus1=scurrent;
+		}
+//		printf("line %d :Best = ",line); printreg(current);
+//		printf("================================\n");
+		minus2=minus1;
+		minus1=current;
+//		printf("min2 = "); printreg(minus2);
+//		printf("min1 = "); printreg(minus1);
+//		printf("================================\n");
+//		printf("\n");
+//		printf("sdiag  : ");
+//		printreg(sminus2);
+//		printf("scur   : ");
+//		printreg(scurrent);
+//		printf("current: ");
+//		printreg(current);
+//		printf("%8s\n",seq1);
+//		printf("%8s\n",seq2);
+//		printf("================================\n");
+	}  ///// <<<<<<<<------- Fin du debut de la premiere bande
+//		printf("================================\n");
+	(column)->data.CMENB[lseq2-VSIZE+line]=EXTRACT_REG(current,VSIZE-1);
+	if (lpath)
+		(column)->score.CMENB[lseq2-VSIZE+line]=EXTRACT_REG(scurrent,VSIZE-1);
+	for (band=0; band < nbands; band++)
+	{
+//		SET_H_SYMBOLE(fhseq,line,h)
+//		minus2 = insert_reg(minus2,0,line);
+//		minus1 = insert_reg(minus1,MIN_SCORE,line); // 0 avant
+//		h++;
+		for (; line < lseq2; line++)
+		{
+//			printf("Je tourne avec line= %d \n",line);
+			PUSH_V_SYMBOLE(fvseq,line)
+			match = EQUAL(fhseq,fvseq);
+//			printreg(fvseq);
+//			printreg(fhseq);
+//			printreg(match);
+//			printf("================================\n");
+			current = minus1;
+			left = minus1;
+			// Store the last current score in extra column
+			(column)->data.CMENB[line-VSIZE]=EXTRACT_REG(current,VSIZE-1);
+			LSHIFT_SCORE(minus1)
+			minus1=insert_reg(minus1,(column)->data.CMENB[line],0);
+			top = minus1;
+//			printf("Vert = "); printreg(current);
+			if (lpath)
+			{
+				sleft= sminus1;
+				(column)->score.CMENB[line-VSIZE]=EXTRACT_REG(scurrent,VSIZE-1);
+				LSHIFT_SCORE(sminus1)
+				sminus1=insert_reg(sminus1,(column)->score.CMENB[line],0);
+				stop=sminus1;
+				sdiag=sminus2;
+			}
+//			printf("line = %d --> get = %d\n",line,(column)->data.CMENB[line]);
+//			printf("Horz = "); printreg(minus1);
+			current = GET_MAX(current,minus1);
+			diag=minus2;
+			current = GET_MAX(current,ADD_REG(minus2,match));
+			if (lpath)
+			{
+//				printf("\n");
+//				printf("current: ");
+//				printreg(current);
+//				printf("current: ");
+//				printreg(SUB_REG(current,match));
+//				printf("diag   : ");
+//				printreg(diag);
+//				printf("left   : ");
+//				printreg(left);
+//				printf("top    : ");
+//				printreg(top);
+				way     = EQUAL_REG(SUB_REG(current,match),diag);
+				scurrent= OR_REG(AND_REG(way,sdiag),
+						         ANDNOT_REG(way,maxvect));
+//				printf("sdiag  : ");
+//				printreg(scurrent);
+				way     = EQUAL_REG(current,left);
+				scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,sleft),
+								  ANDNOT_REG(way,maxvect)));
+//				printf("sleft  : ");
+//				printreg(scurrent);
+				way     = EQUAL_REG(current,top);
+				scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,stop),
+								  ANDNOT_REG(way,maxvect)));
+//				printf("stop   : ");
+//				printreg(scurrent);
+				scurrent= ADD_REG(scurrent,onevect);
+				sminus2=sminus1;
+				sminus1=scurrent;
+			}
+			minus2=minus1;
+			minus1=current;
+//			printf("\n");
+//			printf("sdiag  : ");
+//			printreg(sminus2);
+//			printf("scur   : ");
+//			printreg(scurrent);
+//			printf("current: ");
+//			printreg(current);
+//			printf("%8s\n",seq1);
+//			printf("%8s\n",seq2);
+		}
+//		printf("================================\n");
+								// end of the band and beginnig of the next one
+		limit=(band==(nbands-1)) ? lastband:VSIZE;
+		for (line = 0; line < limit; line++,h++)
+		{
+//			printf("Je fini avec line= %d \n",line);
+			SET_H_SYMBOLE(fhseq,line,h)
+			PUSH_V_SYMBOLE(fvseq,line)
+			minus2 = insert_reg(minus2,MIN_SCORE,line);
+			minus1 = insert_reg(minus1,MIN_SCORE,line);
+			current = minus1;
+			left=minus1;
+			match = EQUAL(fhseq,fvseq);
+			if (lpath)
+			{
+				sminus2 = insert_reg(sminus2,lseq2-VSIZE+line,line);
+				sminus1 = insert_reg(sminus1,h,line);
+				sleft= sminus1;
+			}
+//			printf("\n");
+//			printf("fhseq = "); printreg(fhseq);
+//			printf("fvseq = "); printreg(fvseq);
+//			printf("----------------------------------------------------------------\n");
+//			printf("match = "); printreg(match);
+			(column)->data.CMENB[lseq2-VSIZE+line]=EXTRACT_REG(current,VSIZE-1);
+			LSHIFT_SCORE(minus1)
+			minus1=insert_reg(minus1,(column)->data.CMENB[line],0);
+			top=minus1;
+			current = GET_MAX(current,minus1);
+			if (lpath)
+			{
+				(column)->score.CMENB[lseq2-VSIZE+line]=EXTRACT_REG(scurrent,VSIZE-1);
+				LSHIFT_SCORE(sminus1)
+				sminus1=insert_reg(sminus1,(column)->score.CMENB[line],0);
+				stop=sminus1;
+				sdiag=sminus2;
+				way     = EQUAL_REG(current,minus1);
+				scurrent= OR_REG(AND_REG(way,sminus1),
+						         ANDNOT_REG(way,scurrent));
+			}
+			diag=minus2;
+			current = GET_MAX(current,ADD_REG(minus2,match));
+			if (lpath)
+			{
+				way     = EQUAL_REG(SUB_REG(current,match),diag);
+				scurrent= OR_REG(AND_REG(way,sdiag),
+						         ANDNOT_REG(way,maxvect));
+				way     = EQUAL_REG(current,left);
+				scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,sleft),
+								  ANDNOT_REG(way,maxvect)));
+				way     = EQUAL_REG(current,top);
+				scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,stop),
+								  ANDNOT_REG(way,maxvect)));
+				scurrent= ADD_REG(scurrent,onevect);
+				sminus2=sminus1;
+				sminus1=scurrent;
+			}
+//			printf("currt = "); printreg(current);
+			minus2=minus1;
+			minus1=current;
+//			printf("\n");
+//			printf("sdiag  : ");
+//			printreg(sminus2);
+//			printf("scur   : ");
+//			printreg(scurrent);
+//			printf("current: ");
+//			printreg(current);
+//			printf("%8s\n",seq1);
+//			printf("%8s\n",seq2);
+//			printf("Je stocke line= %d la valeur %d\n",lseq2-VSIZE+line,(column)->data.CMENB[lseq2-VSIZE+line]);
+		}
+	}
+//	printf("\n");
+//	printf("line = %d, h= %d, lastband = %d\n",line,h,lastband);
+//	printf("currt = "); printreg(current);
+	lcs  = extract_reg(current,lastband);
+	if(lpath)
+		*lpath= extract_reg(scurrent,lastband);
+//	printf("lastband = %d (%d) lcs = %d\n",lastband,lseq2,lcs);
+	if (ppcolumn)
+		*ppcolumn=column;
+	else
+		freeColumn(column);
+	return lcs;
+int FASTLCSSCORE(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath)
+	return simpleLCS(seq1,seq2,ppcolumn,lpath);
+#endif /* __SSE2__ */
diff --git a/src/obitools/align/_nws.pxd b/src/obitools/align/_nws.pxd
new file mode 100644
index 0000000..9ed1e7f
--- /dev/null
+++ b/src/obitools/align/_nws.pxd
@@ -0,0 +1,10 @@
+from _dynamic cimport *
+cdef class NWS(DynamicProgramming):
+    cdef double _match
+    cdef double _mismatch
+    cdef double matchScore(self,int h, int v)
+    cdef double doAlignment(self) except? 0
diff --git a/src/obitools/align/_nws.pyx b/src/obitools/align/_nws.pyx
new file mode 100644
index 0000000..5c97f31
--- /dev/null
+++ b/src/obitools/align/_nws.pyx
@@ -0,0 +1,162 @@
+Created on 6 Nov. 2009
+ at author: coissac
+from _nws cimport * 
+cdef class NWS(DynamicProgramming):
+    def __init__(self,match=4,mismatch=-6,opengap=-8,extgap=-2):
+        DynamicProgramming.__init__(self,opengap,extgap)
+        self._match=match
+        self._mismatch=mismatch
+    cdef double matchScore(self,int h, int v):
+        cdef double score
+        score = iupacPartialMatch(self.hSeq.sequence[h-1],self.vSeq.sequence[v-1])
+        return score * self._match + (1-score) * self._mismatch
+    cdef double doAlignment(self) except? 0:
+        cdef int i  # vertical index
+        cdef int j  # horizontal index
+        cdef int idx
+        cdef int jump
+        cdef int delta
+        cdef double score 
+        cdef double scoremax
+        cdef int    path
+        if self.needToCompute:
+            self.allocate()
+            self.reset()
+            for j in range(1,self._hlen()+1):
+                idx = self.index(j,0)
+                self.matrix.matrix[idx].score = self._opengap + (self._extgap * (j-1))
+                self.matrix.matrix[idx].path  = j
+            for i in range(1,self._vlen()+1):
+                idx = self.index(0,i)
+                self.matrix.matrix[idx].score = self._opengap + (self._extgap * (i-1))
+                self.matrix.matrix[idx].path  = -i
+            for i in range(1,self._vlen()+1):
+                for j in range(1,self._hlen()+1):
+                    # 1 - came from diagonal
+                    idx = self.index(j-1,i-1)
+                    # print "computing cell : %d,%d --> %d/%d" % (j,i,self.index(j,i),self.matrix.msize),
+                    scoremax = self.matrix.matrix[idx].score + \
+                               self.matchScore(j,i)
+                    path = 0
+                    # print "so=%f sd=%f sm=%f" % (self.matrix.matrix[idx].score,self.matchScore(j,i),scoremax),
+                    # 2 - open horizontal gap
+                    idx = self.index(j-1,i)
+                    score = self.matrix.matrix[idx].score + \
+                            self._opengap
+                    if score > scoremax : 
+                        scoremax = score
+                        path = +1
+                    # 3 - open vertical gap
+                    idx = self.index(j,i-1)
+                    score = self.matrix.matrix[idx].score + \
+                            self._opengap
+                    if score > scoremax : 
+                        scoremax = score
+                        path = -1
+                    # 4 - extend horizontal gap
+                    jump = self.matrix.bestHJump[i]
+                    if jump >= 0:
+                        idx = self.index(jump,i)
+                        delta = j-jump
+                        score = self.matrix.matrix[idx].score + \
+                                self._extgap * delta
+                        if score > scoremax :
+                            scoremax = score
+                            path = delta+1 
+                    # 5 - extend vertical gap
+                    jump = self.matrix.bestVJump[j]
+                    if jump >= 0:
+                        idx = self.index(j,jump)
+                        delta = i-jump
+                        score = self.matrix.matrix[idx].score + \
+                                self._extgap * delta
+                        if score > scoremax :
+                            scoremax = score
+                            path = -delta-1 
+                    idx = self.index(j,i)
+                    self.matrix.matrix[idx].score = scoremax
+                    self.matrix.matrix[idx].path  = path 
+                    if path == -1:
+                        self.matrix.bestVJump[j]=i
+                    elif path == +1 :
+                        self.matrix.bestHJump[i]=j
+        self.sequenceChanged=False
+        self.scoreChanged=False
+        idx = self.index(self._hlen(),self._vlen())
+        return self.matrix.matrix[idx].score
+    cdef void backtrack(self):
+        #cdef list path=[]
+        cdef int i
+        cdef int j 
+        cdef int p
+        self.doAlignment()
+        i=self._vlen()
+        j=self._hlen()
+        self.path=allocatePath(i,j,self.path)
+        while (i or j):
+            p=self.matrix.matrix[self.index(j,i)].path
+            self.path.path[self.path.length]=p
+            self.path.length+=1
+            #path.append(p)
+            if p==0:
+                i-=1
+                j-=1
+            elif p < 0:
+                i+=p
+            else:
+                j-=p
+        #path.reverse()
+        #reversePath(self.path)
+        self.path.hStart=0
+        self.path.vStart=0
+        #return 0,0,path
+    property match:
+        def __get__(self):
+            return self._match
+        def __set__(self,match):
+            self._match=match 
+            self.scoreChanged=True
+    property mismatch:
+        def __get__(self):
+            return self._mismatch
+        def __set__(self,mismatch):
+            self._mismatch=mismatch 
+            self.scoreChanged=True
diff --git a/src/obitools/align/_nwsdnabyprot.pxd b/src/obitools/align/_nwsdnabyprot.pxd
new file mode 100644
index 0000000..18987e9
--- /dev/null
+++ b/src/obitools/align/_nwsdnabyprot.pxd
@@ -0,0 +1,36 @@
+from _dynamic cimport *
+cdef struct CodonAlignCell :
+    double score
+    int   path 
+    int   frame
+cdef struct CodonAlignMatrix :
+    CodonAlignCell*  matrix
+    int*        bestVJump
+    int*        bestHJump
+    int         msize
+    int         vsize
+    int         hsize
+cdef CodonAlignMatrix* allocateCodonMatrix(int hsize, int vsize,CodonAlignMatrix *matrix=?)
+cdef void freeCodonMatrix(CodonAlignMatrix* matrix)
+cdef void resetCodonMatrix(CodonAlignMatrix* matrix)
+cdef double iupacPartialCodonMatch(char[3] c1, char[3] c2)
+cdef class NWSDNAByProt(DynamicProgramming):
+    cdef double _match
+    cdef double _mismatch
+    cdef int _sframe
+    cdef object _gc
+    cdef void getPossibleCodon(self,char[3] codon,int h,int v,int frame)    
+    cdef double aaScore(self,char aa1,char aa2)
+    cdef double matchScore(self,int h, int v, int qframe)
+    cdef double doAlignment(self) except? 0
+    cdef void reset(self)
+    cdef int allocate(self) except -1
+    cdef void clean(self)
diff --git a/src/obitools/align/_nwsdnabyprot.pyx b/src/obitools/align/_nwsdnabyprot.pyx
new file mode 100644
index 0000000..918a1ce
--- /dev/null
+++ b/src/obitools/align/_nwsdnabyprot.pyx
@@ -0,0 +1,516 @@
+Created on 6 Nov. 2009
+ at author: coissac
+import sys
+from _nwsdnabyprot cimport * 
+from obitools.sequenceencoder.geneticcode import TranslationEncoder
+from obitools.translate import GeneticCode
+from obitools import BioSequence
+from obitools.alignment import AlignedSequence
+from obitools.alignment import Alignment
+cdef CodonAlignMatrix* allocateCodonMatrix(int hsize, int vsize,CodonAlignMatrix *matrix=NULL):
+    vsize+=1
+    hsize+=1
+    if matrix is NULL:
+        matrix = <CodonAlignMatrix*>malloc(sizeof(CodonAlignMatrix))
+        matrix.vsize=0
+        matrix.hsize=0
+        matrix.msize=0
+        matrix.matrix=NULL
+        matrix.bestVJump=NULL
+        matrix.bestHJump=NULL
+    if hsize > matrix.hsize:
+        matrix.bestVJump = <int*>realloc(matrix.bestVJump,hsize * sizeof(int))
+        matrix.hsize=hsize
+    if vsize > matrix.vsize:
+        matrix.bestHJump = <int*>realloc(matrix.bestHJump,vsize * sizeof(int))
+        matrix.vsize=vsize
+    if (hsize * vsize) > matrix.msize:
+        matrix.msize = hsize * vsize
+        matrix.matrix = <CodonAlignCell*>realloc(matrix.matrix, matrix.msize * sizeof(CodonAlignCell))
+    return matrix
+cdef void freeCodonMatrix(CodonAlignMatrix* matrix):
+    if matrix is not NULL:
+        if matrix.matrix is not NULL:
+            free(matrix.matrix)
+        if matrix.bestVJump is not NULL:
+            free(matrix.bestVJump)
+        if matrix.bestHJump is not NULL:
+            free(matrix.bestHJump)
+        free(matrix)
+cdef void resetCodonMatrix(CodonAlignMatrix* matrix):
+    if matrix is not NULL:
+        if matrix.matrix is not NULL:
+            bzero(<void*>matrix.matrix, matrix.msize * sizeof(CodonAlignCell))
+        if matrix.bestHJump is not NULL:
+            memset(<void*>matrix.bestHJump,255,matrix.vsize * sizeof(int))
+        if matrix.bestVJump is not NULL:
+            memset(<void*>matrix.bestVJump,255,matrix.hsize * sizeof(int))
+cdef double iupacPartialCodonMatch(char[3] c1, char[3] c2):
+    return (iupacPartialMatch(c1[0],c2[0]) +
+            iupacPartialMatch(c1[1],c2[1]) +
+            iupacPartialMatch(c1[2],c2[2])) / 3.0
+cdef class NWSDNAByProt(DynamicProgramming):
+    def __init__(self,match=4,
+                      mismatch=-6,
+                      opengap=-8,
+                      extgap=-2,
+                      geneticCode=None,
+                      startingFrame=0):
+        DynamicProgramming.__init__(self,opengap,extgap)
+        self._match=match
+        self._mismatch=mismatch
+        if geneticCode is None:
+            self._gc = TranslationEncoder()
+        else:
+            self._gc = GeneticCode
+        self._sframe = startingFrame
+    cdef double aaScore(self,char aa1,char aa2):
+        if aa1==aa2 or aa1=='X' or aa2=='X':
+            return self._match
+        else:
+            return self._mismatch
+    cdef void getPossibleCodon(self,char[3] codon,int h, int v,int frame):
+        cdef CodonAlignMatrix* matrix
+        cdef CodonAlignCell* smatrix 
+        cdef int path
+        cdef int vv
+        matrix = <CodonAlignMatrix*>self.matrix
+        smatrix= matrix.matrix
+        path   = smatrix[self.index(h,v)].path
+        if frame == 0:
+            codon[0]=self.vSeq.sequence[v-1]
+            if v < (self.vSeq.length):
+                codon[1]=self.vSeq.sequence[v]
+            else:
+                codon[1]='*'
+            if v < (self.vSeq.length-1):
+                codon[2]=self.vSeq.sequence[v+1]
+            else:
+                codon[2]='*'
+        elif frame==1 :
+            vv=v
+            if v>1:
+                if path==0:
+                    vv-=1
+                while(path!=0):
+                    if path < 0:
+                        vv+=path
+                    else:
+                        h-=path 
+                    path   = smatrix[self.index(h,vv)].path
+                codon[0]=self.vSeq.sequence[vv-1]
+            else:
+                codon[0]='*'
+            codon[1]=self.vSeq.sequence[v-1]
+            if v < (self.vSeq.length):
+                codon[2]=self.vSeq.sequence[v]
+            else:
+                codon[2]='*'
+        else:
+            vv=v
+            if v>1:
+                if path==0:
+                    vv-=1
+                while(path!=0):
+                    if path < 0:
+                        vv+=path
+                    else:
+                        h-=path 
+                    path   = smatrix[self.index(h,vv)].path
+                codon[1]=self.vSeq.sequence[vv-1]
+                vv-=1
+                h-=1
+                path   = smatrix[self.index(h,vv)].path
+            else:
+                codon[1]='*'
+            if v>2:
+                if path==0:
+                    vv-=1
+                while(path!=0):
+                    if path < 0:
+                        vv+=path
+                    else:
+                        h-=path 
+                    path   = smatrix[self.index(h,vv)].path
+                codon[0]=self.vSeq.sequence[vv-1]
+                codon[0]=self.vSeq.sequence[v-3]
+            else:
+                codon[0]='*'
+            codon[2]=self.vSeq.sequence[v-1]
+    cdef double matchScore(self,int h, int v, int qframe):
+        cdef double score
+        cdef int frame
+        cdef char[3] codon
+        cdef char[3] qcodon
+        cdef char aa
+        cdef char qaa
+        frame=((h - 1 + self._sframe) % 3)
+                        # extract reference codon
+        if frame==0:
+            codon[0]=self.hSeq.sequence[h-1]
+            if h < (self.hSeq.length):
+                codon[1]=self.hSeq.sequence[h]
+            else:
+                codon[1]='*'
+            if h < (self.hSeq.length-1):
+                codon[2]=self.hSeq.sequence[h+1]
+            else:
+                codon[2]='*'
+        elif frame==1 :
+            if h>1:
+                codon[0]=self.hSeq.sequence[h-2]
+            else:
+                codon[0]='*'
+            codon[1]=self.hSeq.sequence[h-1]
+            if h < (self.hSeq.length):
+                codon[2]=self.hSeq.sequence[h]
+            else:
+                codon[2]='*'
+        else:
+            if h>2:
+                codon[0]=self.hSeq.sequence[h-3]
+            else:
+                codon[0]='*'
+            if h>1:
+                codon[1]=self.hSeq.sequence[h-2]
+            else:
+                codon[1]='*'
+            codon[2]=self.hSeq.sequence[h-1]
+        aa=ord(self._gc[str(codon)])
+        self.getPossibleCodon(qcodon,h,v,qframe)
+        qaa=ord(self._gc[str(qcodon)])
+        score = iupacPartialMatch(self.hSeq.sequence[h-1],self.vSeq.sequence[v-1])
+        score = self._match * score + self._mismatch * (1-score) + self.aaScore(aa,qaa)
+#        print >>sys.stderr, h,frame,chr(aa),chr(codon[0])+chr(codon[1])+chr(codon[2]),  
+#        print >>sys.stderr, chr(qaa),chr(qcodon[0])+chr(qcodon[1])+chr(qcodon[2]), score
+        return score
+    cdef double doAlignment(self) except? 0:
+        cdef int i  # vertical index
+        cdef int j  # horizontal index
+        cdef int idx
+        cdef int jump
+        cdef int delta
+        cdef double score 
+        cdef double scoremax
+        cdef int    path
+        cdef int    frame
+        cdef bint   sframe
+        cdef int fframe
+        cdef CodonAlignMatrix* matrix
+        cdef CodonAlignCell* smatrix 
+        cdef fscost=-10
+        if self.needToCompute:
+            self.allocate()
+            self.reset()
+            matrix = <CodonAlignMatrix*>self.matrix
+            smatrix= matrix.matrix
+            smatrix[0].frame=(self._sframe-1) % 3
+            for j in range(1,self.hSeq.length+1):
+                idx = self.index(j,0)
+                smatrix[idx].score = self._opengap + (self._extgap * (j-1))
+                smatrix[idx].path  = j
+                smatrix[idx].frame = smatrix[0].frame
+            for i in range(1,self.vSeq.length+1):
+                idx = self.index(0,i)
+                smatrix[idx].score = self._opengap + (self._extgap * (i-1))
+                smatrix[idx].path  = -i
+                smatrix[idx].frame = smatrix[0].frame
+            for i in range(1,self.vSeq.length+1):
+                for j in range(1,self.hSeq.length+1):
+                    # 1 - came from diagonal
+                    idx = self.index(j-1,i-1)
+                    fframe=smatrix[idx].frame
+                    fframe=(fframe + 1) % 3
+                    # print "computing cell : %d,%d --> %d/%d" % (j,i,self.index(j,i),self.matrix.msize),
+                    scoremax = smatrix[idx].score + \
+                               self.matchScore(j,i,0) + \
+                               (fframe > -1 and fframe != 0) * fscost
+                    path = 0
+                    frame= 0
+                    score    = smatrix[idx].score + \
+                               self.matchScore(j,i,1) + \
+                               (fframe > -1 and fframe != 1) * fscost
+                    if score > scoremax or (fframe==1 and score==scoremax): 
+                        scoremax = score
+                        frame = 1
+                    score    = smatrix[idx].score + \
+                               self.matchScore(j,i,2) + \
+                               (fframe > -1 and fframe != 2) * fscost
+                    if score > scoremax or (fframe==2 and score==scoremax) : 
+                        scoremax = score
+                        frame = 2
+                    # print >>sys.stderr,j,i,frame,scoremax
+                    # print "so=%f sd=%f sm=%f" % (self.matrix.matrix[idx].score,self.matchScore(j,i),scoremax),
+                    # 2 - open horizontal gap
+                    idx = self.index(j-1,i)
+                    score = smatrix[idx].score + \
+                            self._opengap
+                    if score > scoremax : 
+                        scoremax = score
+                        path = +1
+                        frame= smatrix[idx].frame
+                    # 3 - open vertical gap
+                    idx = self.index(j,i-1)
+                    score = smatrix[idx].score + \
+                            self._opengap
+                    if score > scoremax : 
+                        scoremax = score
+                        path = -1
+                        frame= smatrix[idx].frame
+                    # 4 - extend horizontal gap
+                    jump = matrix.bestHJump[i]
+                    if jump >= 0:
+                        idx = self.index(jump,i)
+                        delta = j-jump
+                        score = smatrix[idx].score + \
+                                self._extgap * delta
+                        if score > scoremax :
+                            scoremax = score
+                            path = delta+1 
+                            frame= smatrix[idx].frame
+                    # 5 - extend vertical gap
+                    jump = matrix.bestVJump[j]
+                    if jump >= 0:
+                        idx = self.index(j,jump)
+                        delta = i-jump
+                        score = smatrix[idx].score + \
+                                self._extgap * delta
+                        if score > scoremax :
+                            scoremax = score
+                            path = -delta-1 
+                            frame= smatrix[idx].frame
+                    idx = self.index(j,i)
+                    smatrix[idx].score = scoremax
+                    smatrix[idx].path  = path 
+                    smatrix[idx].frame = frame 
+                    if path == -1:
+                        matrix.bestVJump[j]=i
+                    elif path == +1 :
+                        matrix.bestHJump[i]=j
+        self.sequenceChanged=False
+        self.scoreChanged=False
+        idx = self.index(self.hSeq.length,self.vSeq.length)
+        return smatrix[idx].score
+    cdef void backtrack(self):
+        #cdef list path=[]
+        cdef int i
+        cdef int j 
+        cdef int p
+        cdef CodonAlignMatrix* matrix
+        cdef CodonAlignCell* smatrix 
+        self.doAlignment()
+        matrix = <CodonAlignMatrix*>self.matrix
+        smatrix= matrix.matrix
+        i=self.vSeq.length
+        j=self.hSeq.length
+        self.path=allocatePath(i,j,self.path)
+        while (i or j):
+            p=smatrix[self.index(j,i)].path
+            self.path.path[self.path.length]=p
+            self.path.length+=1
+            #path.append(p)
+            if p==0:
+                i-=1
+                j-=1
+            elif p < 0:
+                i+=p
+            else:
+                j-=p
+        #path.reverse()
+        #reversePath(self.path)
+        self.path.hStart=0
+        self.path.vStart=0
+        #return 0,0,path
+    property match:
+        def __get__(self):
+            return self._match
+        def __set__(self,match):
+            self._match=match 
+            self.scoreChanged=True
+    property mismatch:
+        def __get__(self):
+            return self._mismatch
+        def __set__(self,mismatch):
+            self._mismatch=mismatch 
+            self.scoreChanged=True
+    cdef int allocate(self) except -1:
+        assert self.horizontalSeq is not None,'Sequence A must be set'
+        assert self.verticalSeq is not None,'Sequence B must be set'
+        cdef long lenH=self.hSeq.length
+        cdef long lenV=self.vSeq.length
+        self.matrix=<AlignMatrix*>allocateCodonMatrix(lenH,lenV,<CodonAlignMatrix*>self.matrix)
+        return 0
+    cdef void reset(self):
+        self.scoreChanged=True
+        resetCodonMatrix(<CodonAlignMatrix*>self.matrix)
+    cdef void clean(self):
+        freeCodonMatrix(<CodonAlignMatrix*>self.matrix)
+        freeSequence(self.hSeq)
+        freeSequence(self.vSeq)
+        freePath(self.path)
+    def __call__(self):
+        cdef list hgaps=[]
+        cdef list vgaps=[]
+        cdef list vframe=[]
+        cdef list b
+        cdef int  hp=0
+        cdef int  vp=0
+        cdef int  lenh=0
+        cdef int  lenv=0
+        cdef int  h,v,p
+        cdef int  i
+        cdef object ali
+        cdef double score
+        cdef CodonAlignMatrix* matrix
+        cdef CodonAlignCell* smatrix 
+        if self._needToCompute():
+            score = self.doAlignment()
+            self.backtrack()
+            h=self.path.hStart
+            v=self.path.vStart
+            matrix = <CodonAlignMatrix*>self.matrix
+            smatrix= matrix.matrix
+            for i in range(self.path.length-1,-1,-1):
+                p=self.path.path[i]
+                if p==0:
+                    hp+=1
+                    vp+=1
+                    lenh+=1
+                    lenv+=1
+                    v+=1
+                    h+=1
+                    vframe.append(smatrix[self.index(h,v)].frame)
+                elif p>0:
+                    hp+=p
+                    lenh+=p
+                    vgaps.append([vp,p])
+                    vp=0
+                    h+=p
+                    #vframe.extend(['*']*p)
+                else:
+                    vp-=p
+                    lenv-=p
+                    hgaps.append([hp,-p])
+                    hp=0
+                    v-=p
+                    vframe.extend([smatrix[self.index(h,v)].frame]*-p)
+            if hp:
+                hgaps.append([hp,0])
+            if vp:
+                vgaps.append([vp,0])
+            if lenh < self.hSeq.length:
+                hseq=self.horizontalSeq[self.path.hStart:self.path.hStart+lenh]
+            else:
+                hseq=self.horizontalSeq
+            hseq=AlignedSequence(hseq) 
+            hseq.gaps=hgaps       
+            if lenv < self.vSeq.length:
+                vseq=self.verticalSeq[self.path.vStart:self.path.vStart+lenv]
+            else:
+                vseq=self.verticalSeq
+            vseq=AlignedSequence(vseq) 
+            vseq.gaps=vgaps       
+            ali=Alignment()
+            ali.append(hseq)
+            ali.append(vseq)
+            ali.score=score
+            self.alignment=ali
+        ali=self.alignment.clone()
+        ali[1]['frame']=vframe
+        ali.score=self.alignment.score
+        return ali
diff --git a/src/obitools/align/_profilenws.pxd b/src/obitools/align/_profilenws.pxd
new file mode 100644
index 0000000..c1a779d
--- /dev/null
+++ b/src/obitools/align/_profilenws.pxd
@@ -0,0 +1,23 @@
+from _nws cimport *
+from obitools.profile._profile cimport *
+cdef struct alignProfile:
+    long    length
+    long    buffsize
+    double* frequency
+cdef alignProfile* allocateProfile(object profile, alignProfile* prof=?)
+cdef void freeProfile(alignProfile* prof)
+cdef class ProfileNWS(NWS):
+    cdef alignProfile* hProf
+    cdef alignProfile* vProf
+    cdef double matchScore(self,int h, int v)
+    cdef void clean(self)
+    cdef int _vlen(self)
+    cdef int _hlen(self)
+    cdef double doAlignment(self) except? 0
\ No newline at end of file
diff --git a/src/obitools/align/_profilenws.pyx b/src/obitools/align/_profilenws.pyx
new file mode 100644
index 0000000..2380aa4
--- /dev/null
+++ b/src/obitools/align/_profilenws.pyx
@@ -0,0 +1,211 @@
+Created on 01 Feb. 2011
+ at author: celine
+from _profilenws cimport *
+from obitools.profile._profile import DNAProfile
+cdef alignProfile* allocateProfile(object profile, alignProfile* prof=NULL):
+    cdef int i,j
+    cdef int buffsize
+    cdef double* freq
+    if prof is NULL:
+        prof = <alignProfile*>malloc(sizeof(alignProfile))
+        prof.length=0
+        prof.buffsize=0
+        prof.frequency=NULL
+    prof.length=len(profile)
+    buffsize = 6 * prof.length * sizeof(double)
+    if buffsize > prof.buffsize:
+        prof.frequency = <double*>realloc(prof.frequency,buffsize)
+        prof.buffsize = buffsize
+    freq = prof.frequency
+    for i in range(prof.length):
+        freq[i]              = profile.fA(i)
+        freq[i+prof.length]  = profile.fC(i)
+        freq[i+prof.length*2]= profile.fG(i)
+        freq[i+prof.length*3]= profile.fT(i)
+        freq[i+prof.length*4]= profile.fOg(i)
+        freq[i+prof.length*5]= profile.fEg(i)
+    return prof
+cdef void freeProfile(alignProfile* prof):
+    if prof is not NULL:
+        if prof.frequency is not NULL:
+            free(<void*>prof.frequency)
+        free(prof)
+cdef class ProfileNWS(NWS):
+    def __init__(self,match=4,mismatch=-6,opengap=-8,extgap=-2):
+        DynamicProgramming.__init__(self,opengap,extgap)
+        self._match=match
+        self._mismatch=mismatch
+        self.hProf=NULL
+        self.vProf=NULL
+    cdef double matchScore(self,int h, int v):
+        cdef double pmatch
+        cdef double* hp = self.hProf.frequency
+        cdef double* vp = self.vProf.frequency
+        cdef int     hl = self.hProf.length
+        cdef int     vl = self.vProf.length
+        h-=1
+        v-=1
+        pmatch =  hp[h]*vp[v] + hp[h+hl]*vp[v+vl] + hp[h+2*hl]*vp[v+2*vl] + hp[h+3*hl]*vp[v+3*vl]        
+        return self._match * pmatch + (1-pmatch) * self._mismatch
+    cdef int _vlen(self):
+        return self.vProf.length
+    cdef int _hlen(self):
+        return self.hProf.length
+    property seqA:
+            def __get__(self):
+                return self.horizontalSeq
+            def __set__(self, seq):
+                self.sequenceChanged=True
+                if not isinstance(seq,DNAProfile):
+                    seq=DNAProfile(seq)
+                self.horizontalSeq=seq
+                self.hProf=allocateProfile(seq,self.hProf)
+    property seqB:
+            def __get__(self):
+                return self.verticalSeq
+            def __set__(self, seq):
+                self.sequenceChanged=True
+                if not isinstance(seq,DNAProfile):
+                    seq=DNAProfile(seq)
+                self.verticalSeq=seq
+                self.vProf=allocateProfile(seq,self.vProf)
+    cdef void clean(self):
+        freeProfile(self.hProf)
+        freeProfile(self.vProf)
+        freeMatrix(self.matrix)
+        freePath(self.path)
+    def __call__(self):
+        cdef list hgaps=[]
+        cdef list vgaps=[]
+        cdef list b
+        cdef int  hp
+        cdef int  vp
+        cdef int  rp
+        cdef int  lenh=0
+        cdef int  lenv=0
+        cdef int  h,v,p
+        cdef int  i
+        cdef object ali
+        cdef double score
+        cdef DNAProfile newProfile
+        cdef DNAProfile horizontalSeq=self.horizontalSeq
+        cdef DNAProfile verticalSeq=self.verticalSeq
+        if self._needToCompute():
+            score = self.doAlignment()
+            self.backtrack()
+            sum = 0
+            for p in xrange(self.path.length) :
+                v = self.path.path[p]
+                if v == 0 :
+                    sum += 1
+                else :
+                    sum += abs(v)
+            newProfile = DNAProfile(size=sum)
+            newProfile.profile.weight = horizontalSeq.profile.weight+verticalSeq.profile.weight
+            hp=horizontalSeq.profile.length-1
+            vp=verticalSeq.profile.length-1
+            rp=newProfile.profile.length-1
+            for i in range(self.path.length):
+                p=self.path.path[i]
+            for i in range(self.path.length):
+                p=self.path.path[i]
+                if p==0:
+                    newProfile.A[rp] = horizontalSeq.A[hp] + verticalSeq.A[vp]
+                    newProfile.C[rp] = horizontalSeq.C[hp] + verticalSeq.C[vp]
+                    newProfile.G[rp] = horizontalSeq.G[hp] + verticalSeq.G[vp]
+                    newProfile.T[rp] = horizontalSeq.T[hp] + verticalSeq.T[vp]
+                    newProfile.Og[rp] = horizontalSeq.Og[hp] + verticalSeq.Og[vp]
+                    newProfile.Eg[rp] = horizontalSeq.Eg[hp] + verticalSeq.Eg[vp]
+                    hp-=1
+                    vp-=1
+                    rp-=1
+                elif p>0:
+                    for x in xrange(abs(p)-1) :
+                        newProfile.A[rp] = horizontalSeq.A[hp]
+                        newProfile.C[rp] = horizontalSeq.C[hp]
+                        newProfile.G[rp] = horizontalSeq.G[hp]
+                        newProfile.T[rp] = horizontalSeq.T[hp]
+                        newProfile.Og[rp] = horizontalSeq.Og[hp]
+                        newProfile.Eg[rp] = horizontalSeq.Eg[hp] + verticalSeq.profile.weight
+                        hp-=1
+                        rp-=1
+                    newProfile.A[rp] = horizontalSeq.A[hp]
+                    newProfile.C[rp] = horizontalSeq.C[hp]
+                    newProfile.G[rp] = horizontalSeq.G[hp]
+                    newProfile.T[rp] = horizontalSeq.T[hp]
+                    newProfile.Og[rp] = horizontalSeq.Og[hp] + verticalSeq.profile.weight
+                    newProfile.Eg[rp] = horizontalSeq.Eg[hp]
+                    hp-=1
+                    rp-=1
+                else:
+                    for x in xrange(abs(p)-1) :
+                        newProfile.A[rp] = verticalSeq.A[vp]
+                        newProfile.C[rp] = verticalSeq.C[vp]
+                        newProfile.G[rp] = verticalSeq.G[vp]
+                        newProfile.T[rp] = verticalSeq.T[vp]
+                        newProfile.Og[rp] = verticalSeq.Og[vp]
+                        newProfile.Eg[rp] = verticalSeq.Eg[vp] + horizontalSeq.profile.weight
+                        vp-=1
+                        rp-=1  
+                    newProfile.A[rp] = verticalSeq.A[vp]
+                    newProfile.C[rp] = verticalSeq.C[vp]
+                    newProfile.G[rp] = verticalSeq.G[vp]
+                    newProfile.T[rp] = verticalSeq.T[vp]
+                    newProfile.Og[rp] = verticalSeq.Og[vp] + horizontalSeq.profile.weight
+                    newProfile.Eg[rp] = verticalSeq.Eg[vp]
+                    vp-=1
+                    rp-=1
+            self.alignment = newProfile
+        ali=DNAProfile(self.alignment)
+        return ali
diff --git a/src/obitools/align/_qsassemble.pyx b/src/obitools/align/_qsassemble.pyx
new file mode 100644
index 0000000..aa5d625
--- /dev/null
+++ b/src/obitools/align/_qsassemble.pyx
@@ -0,0 +1,89 @@
+Created on 6 Nov. 2009
+ at author: coissac
+from _dynamic cimport * 
+from _assemble cimport DirectAssemble
+cdef class QSolexaDirectAssemble(DirectAssemble):
+    cdef double* hError
+    cdef double* vError
+    def __init__(self,match=4,mismatch=-4,opengap=-8,extgap=-2):
+        """
+         Rapport entre score de match et mismatch:
+             si mismatch = - match / 3
+             alors quand scrore temps vers 0 et qu'il est impossible de decider
+             pas de penalisation (s'=0) 
+             si mismatch < - match / 3 la non decidabilite est penalisee.
+        """           
+        DirectAssemble.__init__(self,match,mismatch,opengap,extgap)
+    cdef double matchScore(self,int h, int v):
+        cdef double score
+        cdef double smatch
+        cdef double smismatch
+        cdef double hok=1-self.hError[h-1]
+        cdef double vok=1-self.vError[v-1]
+        score=iupacPartialMatch(self.hSeq.sequence[h-1],self.vSeq.sequence[v-1])
+        smatch=((4*hok*vok-hok-vok)*(self._match-self._mismatch)+self._match+2*self._mismatch)/3
+        smismatch=((hok+vok-4*hok*vok)*(self._match-self._mismatch)+2*self._match+7*self._mismatch)/9
+        return smatch * score + smismatch * (1. - score)
+    property seqA:
+            def __get__(self):
+                return self.horizontalSeq
+            def __set__(self, seq):
+                cdef object oaddresse,olength
+                assert hasattr(seq, "quality"),"You must use sequence with quality indices"
+                self.sequenceChanged=True
+                self.horizontalSeq=seq
+                self.hSeq=allocateSequence(self.horizontalSeq,self.hSeq)
+                (oaddress,olength)=seq.quality.buffer_info()
+                self.hError=<double*><unsigned long int>oaddress 
+    property seqB:
+            def __get__(self):
+                return self.verticalSeq
+            def __set__(self, seq):
+                cdef object oaddresse,olength
+                assert hasattr(seq, "quality"),"You must use sequence with quality indices"
+                self.sequenceChanged=True
+                self.verticalSeq=seq
+                self.vSeq=allocateSequence(self.verticalSeq,self.vSeq)
+                (oaddress,olength)=seq.quality.buffer_info()
+                self.vError=<double*><unsigned long int>oaddress 
+cdef class QSolexaReverseAssemble(QSolexaDirectAssemble):    
+    cdef double matchScore(self,int h, int v):
+        cdef double score
+        cdef double smatch
+        cdef double smismatch
+        cdef double hok=1-self.hError[h-1]
+        cdef double vok=1-self.vError[self.vSeq.length - v]
+        score=iupacPartialMatch(self.hSeq.sequence[h-1],self.vSeq.sequence[v-1])
+        smatch=((4*hok*vok-hok-vok)*(self._match-self._mismatch)+self._match+2*self._mismatch)/3
+        smismatch=((hok+vok-4*hok*vok)*(self._match-self._mismatch)+2*self._match+7*self._mismatch)/9
+        return smatch * score + smismatch * (1. - score)
+    property seqB:
+            def __get__(self):
+                return self.verticalSeq.wrapped
+            def __set__(self, seq):
+                cdef object oaddresse,olength
+                assert hasattr(seq, "quality"),"You must use sequence with quality indices"
+                self.sequenceChanged=True
+                self.verticalSeq=seq.complement()
+                self.vSeq=allocateSequence(self.verticalSeq,self.vSeq)
+                (oaddress,olength)=seq.quality.buffer_info()
+                self.vError=<double*><unsigned long int>oaddress 
diff --git a/src/obitools/align/_qsrassemble.pyx b/src/obitools/align/_qsrassemble.pyx
new file mode 100644
index 0000000..01f6ddb
--- /dev/null
+++ b/src/obitools/align/_qsrassemble.pyx
@@ -0,0 +1,88 @@
+Created on 6 Nov. 2009
+ at author: coissac
+from _dynamic cimport * 
+from _rassemble cimport RightDirectAssemble
+cdef class QSolexaRightDirectAssemble(RightDirectAssemble):             
+     cdef double* hError
+     cdef double* vError
+     def __init__(self,match=4,mismatch=-4,opengap=-8,extgap=-2):  
+         """
+         Rapport entre score de match et mismatch:
+             si mismatch = - match / 3
+             alors quand scrore temps vers 0 et qu'il est impossible de decider
+             pas de penalisation (s'=0) 
+             si mismatch < - match / 3 la non decidabilite est penalisee.
+         """           
+         RightDirectAssemble.__init__(self,match,mismatch,opengap,extgap)
+     cdef double matchScore(self,int h, int v):             
+         cdef double score
+         cdef double smatch
+         cdef double smismatch
+         cdef double hok=1-self.hError[h-1]             
+         cdef double vok=1-self.vError[v-1]
+         score=iupacPartialMatch(self.hSeq.sequence[h-1],self.vSeq.sequence[v-1])
+         smatch=((4*hok*vok-hok-vok)*(self._match-self._mismatch)+self._match+2*self._mismatch)/3             
+         smismatch=((hok+vok-4*hok*vok)*(self._match-self._mismatch)+2*self._match+7*self._mismatch)/9
+         return smatch * score + smismatch * (1. - score)
+     property seqA:
+             def __get__(self):             
+                 return self.horizontalSeq
+             def __set__(self, seq):
+                 cdef object oaddresse,olength
+                 assert hasattr(seq, "quality"),"You must use sequence with quality indices"             
+                 self.sequenceChanged=True
+                 self.horizontalSeq=seq
+                 self.hSeq=allocateSequence(self.horizontalSeq,self.hSeq)
+                 (oaddress,olength)=seq.quality.buffer_info()             
+                 self.hError=<double*><unsigned long int>oaddress
+     property seqB:
+             def __get__(self):             
+                 return self.verticalSeq
+             def __set__(self, seq):
+                 cdef object oaddresse,olength
+                 assert hasattr(seq, "quality"),"You must use sequence with quality indices"             
+                 self.sequenceChanged=True
+                 self.verticalSeq=seq
+                 self.vSeq=allocateSequence(self.verticalSeq,self.vSeq)
+                 (oaddress,olength)=seq.quality.buffer_info()             
+                 self.vError=<double*><unsigned long int>oaddress
+cdef class QSolexaRightReverseAssemble(QSolexaRightDirectAssemble):             
+     cdef double matchScore(self,int h, int v):
+         cdef double score
+         cdef double smatch
+         cdef double smismatch
+         cdef double hok=1-self.hError[h-1]             
+         cdef double vok=1-self.vError[self.vSeq.length - v]
+         score=iupacPartialMatch(self.hSeq.sequence[h-1],self.vSeq.sequence[v-1])
+         smatch=((4*hok*vok-hok-vok)*(self._match-self._mismatch)+self._match+2*self._mismatch)/3
+         smismatch=((hok+vok-4*hok*vok)*(self._match-self._mismatch)+2*self._match+7*self._mismatch)/9             
+         return smatch * score + smismatch * (1. - score)
+     property seqB:
+             def __get__(self):             
+                 return self.verticalSeq.wrapped
+             def __set__(self, seq):
+                 cdef object oaddresse,olength
+                 assert hasattr(seq, "quality"),"You must use sequence with quality indices"             
+                 self.sequenceChanged=True
+                 self.verticalSeq=seq.complement()
+                 self.vSeq=allocateSequence(self.verticalSeq,self.vSeq)
+                 (oaddress,olength)=seq.quality.buffer_info()             
+                 self.vError=<double*><unsigned long int>oaddress
diff --git a/src/obitools/align/_rassemble.pxd b/src/obitools/align/_rassemble.pxd
new file mode 100644
index 0000000..2b32986
--- /dev/null
+++ b/src/obitools/align/_rassemble.pxd
@@ -0,0 +1,10 @@
+from _nws cimport *  
+cdef class RightDirectAssemble(NWS):
+    cdef double xsmax
+    cdef int    xmax
+    cdef double doAlignment(self) except? 0
+cdef class RightReverseAssemble(RightDirectAssemble):    
+    pass
\ No newline at end of file
diff --git a/src/obitools/align/_rassemble.pyx b/src/obitools/align/_rassemble.pyx
new file mode 100644
index 0000000..544ac77
--- /dev/null
+++ b/src/obitools/align/_rassemble.pyx
@@ -0,0 +1,157 @@
+#... at PydevCodeAnalysisIgnore
+Created on 6 Nov. 2009
+ at author: coissac
+from _rassemble cimport *  
+cdef class RightDirectAssemble(NWS):
+    def __init__(self,match=4,mismatch=-6,opengap=-8,extgap=-2):
+        NWS.__init__(self,match,mismatch,opengap,extgap)
+        self.xsmax=0
+        self.xmax=0
+    cdef double doAlignment(self) except? 0:
+        cdef int i  # vertical index
+        cdef int j  # horizontal index
+        cdef int idx
+        cdef int jump
+        cdef int delta
+        cdef double score
+        cdef double scoremax
+        cdef int    path
+        if self.needToCompute:
+            self.allocate()
+            self.reset()
+            self.xsmax=0
+            self.xmax=0
+            for j in range(1,self.hSeq.length+1):
+                idx = self.index(j,0)
+                self.matrix.matrix[idx].score = self._opengap + (self._extgap * (j-1))
+                self.matrix.matrix[idx].path  = j
+            for i in range(1,self.vSeq.length+1):
+                idx = self.index(0,i)
+                self.matrix.matrix[idx].score = 0
+                self.matrix.matrix[idx].path  = -i
+            for i in range(1,self.vSeq.length+1):
+                for j in range(1,self.hSeq.length+1):
+                    # 1 - came from diagonal
+                    idx = self.index(j-1,i-1)
+                    # print "computing cell : %d,%d --> %d/%d" % (j,i,self.index(j,i),self.matrix.msize),
+                    scoremax = self.matrix.matrix[idx].score + \
+                               self.matchScore(j,i)
+                    path = 0
+                    # print "so=%f sd=%f sm=%f" % (self.matrix.matrix[idx].score,self.matchScore(j,i),scoremax),
+                    # 2 - open horizontal gap
+                    idx = self.index(j-1,i)
+                    score = self.matrix.matrix[idx].score+ \
+                            self._opengap
+                    if score > scoremax : 
+                        scoremax = score
+                        path = +1
+                    # 3 - open vertical gap
+                    idx = self.index(j,i-1)
+                    score = self.matrix.matrix[idx].score + \
+                            self._opengap
+                    if score > scoremax : 
+                        scoremax = score
+                        path = -1
+                    # 4 - extend horizontal gap
+                    jump = self.matrix.bestHJump[i]
+                    if jump >= 0:
+                        idx = self.index(jump,i)
+                        delta = j-jump
+                        score = self.matrix.matrix[idx].score + \
+                                self._extgap * delta
+                        if score > scoremax :
+                            scoremax = score
+                            path = delta+1 
+                    # 5 - extend vertical gap
+                    jump = self.matrix.bestVJump[j]
+                    if jump >= 0:
+                        idx = self.index(j,jump)
+                        delta = i-jump
+                        score = self.matrix.matrix[idx].score + \
+                                self._extgap * delta
+                        if score > scoremax :
+                            scoremax = score
+                            path = -delta-1 
+                    idx = self.index(j,i)
+                    self.matrix.matrix[idx].score = scoremax
+                    self.matrix.matrix[idx].path  = path 
+                    if path == -1:
+                        self.matrix.bestVJump[j]=i
+                    elif path == +1 :
+                        self.matrix.bestHJump[i]=j
+                    if i==self.vSeq.length and scoremax > self.xsmax:
+                        self.xsmax=scoremax
+                        self.xmax=j
+        self.sequenceChanged=False
+        self.scoreChanged=False
+        return self.xsmax
+    cdef void backtrack(self):
+        cdef list path=[]
+        cdef int i
+        cdef int j 
+        cdef int p
+        self.doAlignment()
+        j=self.xmax
+        i=self.vSeq.length
+        self.path=allocatePath(i,j+1,self.path)
+        if self.xmax<self.hSeq.length:
+            self.path.path[self.path.length]=self.hSeq.length-self.xmax
+            self.path.length+=1
+        while (i or j):
+            p=self.matrix.matrix[self.index(j,i)].path
+            self.path.path[self.path.length]=p
+            self.path.length+=1
+            #path.append(p)
+            if p==0:
+                i-=1
+                j-=1
+            elif p < 0:
+                i+=p
+            else:
+                j-=p
+        #path.reverse()
+        self.path.hStart=0
+        self.path.vStart=0
+        #reversePath(self.path)
+        #return 0,0,path
+cdef class RightReverseAssemble(RightDirectAssemble):    
+    property seqB:
+            def __get__(self):
+                return self.verticalSeq.wrapped
+            def __set__(self, seq):
+                self.sequenceChanged=True
+                self.verticalSeq=seq.complement()
+                self.vSeq=allocateSequence(self.verticalSeq,self.vSeq)
diff --git a/src/obitools/align/_sse.h b/src/obitools/align/_sse.h
new file mode 100644
index 0000000..bf1bd64
--- /dev/null
+++ b/src/obitools/align/_sse.h
@@ -0,0 +1,929 @@
+#include <string.h>
+#include <inttypes.h>
+#ifdef __SSE2__
+#include <xmmintrin.h>
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+#endif /* __SSE2__ */
+#ifndef MAX
+#define MAX(x,y) (((x)>(y)) ? (x):(y))
+#define MIN(x,y) (((x)<(y)) ? (x):(y))
+#define ALIGN __attribute__((aligned(16)))
+typedef __m128i vUInt8;
+typedef __m128i vInt8;
+typedef __m128i vUInt16;
+typedef __m128i vInt16;
+typedef __m128i vUInt64;
+typedef union
+     __m128i i;
+     int64_t  s64[ 2];
+     int16_t  s16[ 8];
+     int8_t   s8 [16];
+     uint8_t  u8 [16];
+     uint16_t u16[8 ];
+	 uint32_t u32[4 ];
+     uint64_t u64[2 ];
+} um128;
+typedef union
+    {
+	    vUInt8 m;
+	    uint8_t c[16];
+    } uchar_v;
+typedef union
+	{
+		vUInt16 m;
+		uint16_t c[8];
+	} ushort_v;
+typedef union
+	{
+		vUInt64 m;
+		uint64_t c[2];
+	} uint64_v;
+#ifdef __SSE2__
+static inline int8_t _s2_extract_epi8(__m128i r, const int p)
+#define ACTIONP(r,x) return _mm_extract_epi16(r,x) & 0xFF
+#define ACTIONI(r,x) return _mm_extract_epi16(r,x) >> 8
+	switch (p) {
+	case  0:  ACTIONP(r,0);
+	case  1:  ACTIONI(r,0);
+	case  2:  ACTIONP(r,1);
+	case  3:  ACTIONI(r,1);
+	case  4:  ACTIONP(r,2);
+	case  5:  ACTIONI(r,2);
+	case  6:  ACTIONP(r,3);
+	case  7:  ACTIONI(r,3);
+	case  8:  ACTIONP(r,4);
+	case  9:  ACTIONI(r,4);
+	case 10:  ACTIONP(r,5);
+	case 11:  ACTIONI(r,5);
+	case 12:  ACTIONP(r,6);
+	case 13:  ACTIONI(r,6);
+	case 14:  ACTIONP(r,7);
+	case 15:  ACTIONI(r,7);
+	}
+#undef ACTIONP
+#undef ACTIONI
+	return 0;
+static inline __m128i _s2_max_epi8(__m128i a, __m128i b)
+	__m128i mask  = _mm_cmpgt_epi8( a, b );
+	a = _mm_and_si128   (a,mask );
+	b = _mm_andnot_si128(mask,b);
+	return _mm_or_si128(a,b);
+static inline __m128i _s2_min_epi8(__m128i a, __m128i b)
+	__m128i mask  = _mm_cmplt_epi8( a, b );
+	a = _mm_and_si128   (a,mask );
+	b = _mm_andnot_si128(mask,b);
+	return _mm_or_si128(a,b);
+static inline __m128i _s2_insert_epi8(__m128i r, int b, const int p)
+#define ACTIONP(r,x) return _mm_insert_epi16(r,(_mm_extract_epi16(r,x) & 0xFF00) | (b & 0x00FF),x)
+#define ACTIONI(r,x) return _mm_insert_epi16(r,(_mm_extract_epi16(r,x) & 0x00FF) | ((b << 8)& 0xFF00),x)
+	switch (p) {
+	case  0:  ACTIONP(r,0);
+	case  1:  ACTIONI(r,0);
+	case  2:  ACTIONP(r,1);
+	case  3:  ACTIONI(r,1);
+	case  4:  ACTIONP(r,2);
+	case  5:  ACTIONI(r,2);
+	case  6:  ACTIONP(r,3);
+	case  7:  ACTIONI(r,3);
+	case  8:  ACTIONP(r,4);
+	case  9:  ACTIONI(r,4);
+	case 10:  ACTIONP(r,5);
+	case 11:  ACTIONI(r,5);
+	case 12:  ACTIONP(r,6);
+	case 13:  ACTIONI(r,6);
+	case 14:  ACTIONP(r,7);
+	case 15:  ACTIONI(r,7);
+	}
+#undef ACTIONP
+#undef ACTIONI
+	return _mm_setzero_si128();
+// Fill a SSE Register with 16 time the same 8bits integer value
+#define _MM_SET1_EPI8(x)        _mm_set1_epi8(x)
+#define _MM_INSERT_EPI8(r,x,i)	_s2_insert_epi8((r),(x),(i))
+#define _MM_CMPEQ_EPI8(x,y)     _mm_cmpeq_epi8((x),(y))
+#define _MM_CMPGT_EPI8(x,y)     _mm_cmpgt_epi8((x),(y))
+#define _MM_CMPLT_EPI8(x,y)     _mm_cmplt_epi8((x),(y))
+#define _MM_MAX_EPI8(x,y)       _s2_max_epi8((x),(y))
+#define _MM_MIN_EPI8(x,y)       _s2_min_epi8((x),(y))
+#define _MM_ADD_EPI8(x,y)       _mm_add_epi8((x),(y))
+#define _MM_SUB_EPI8(x,y)       _mm_sub_epi8((x),(y))
+#define _MM_EXTRACT_EPI8(r,p)   _s2_extract_epi8((r),(p))
+#define _MM_MIN_EPU8(x,y)       _mm_min_epu8((x),(y))
+// Fill a SSE Register with 8 time the same 16bits integer value
+#define _MM_SET1_EPI16(x)       _mm_set1_epi16(x)
+#define _MM_INSERT_EPI16(r,x,i)	_mm_insert_epi16((r),(x),(i))
+#define _MM_CMPEQ_EPI16(x,y)    _mm_cmpeq_epi16((x),(y))
+#define _MM_CMPGT_EPI16(x,y)    _mm_cmpgt_epi16((x),(y))
+#define _MM_CMPLT_EPI16(x,y)    _mm_cmplt_epi16((x),(y))
+#define _MM_MAX_EPI16(x,y)      _mm_max_epi16((x),(y))
+#define _MM_MIN_EPI16(x,y)      _mm_min_epi16((x),(y))
+#define _MM_ADD_EPI16(x,y)      _mm_add_epi16((x),(y))
+#define _MM_SUB_EPI16(x,y)      _mm_sub_epi16((x),(y))
+#define _MM_EXTRACT_EPI16(r,p)  _mm_extract_epi16((r),(p))
+#define _MM_UNPACKLO_EPI8(a,b)  _mm_unpacklo_epi8((a),(b))
+#define _MM_UNPACKHI_EPI8(a,b)  _mm_unpackhi_epi8((a),(b))
+#define _MM_ADDS_EPU16(x,y)     _mm_adds_epu16((x),(y))
+#define _MM_SRLI_EPI64(r,x)     _mm_srli_epi64((r),(x))
+#define _MM_SLLI_EPI64(r,x)     _mm_slli_epi64((r),(x))
+// Set a SSE Register to 0
+#define _MM_SETZERO_SI128() _mm_setzero_si128()
+#define _MM_AND_SI128(x,y)     _mm_and_si128((x),(y))
+#define _MM_ANDNOT_SI128(x,y)  _mm_andnot_si128((x),(y))
+#define _MM_OR_SI128(x,y)      _mm_or_si128((x),(y))
+#define _MM_XOR_SI128(x,y)     _mm_xor_si128((x),(y))
+#define _MM_SLLI_SI128(r,s)	   _mm_slli_si128((r),(s))
+#define _MM_SRLI_SI128(r,s)    _mm_srli_si128((r),(s))
+// Load a SSE register from an unaligned address
+#define _MM_LOADU_SI128(x) _mm_loadu_si128(x)
+// #define	_MM_UNPACKLO_EPI8(x,y) _mm_unpacklo_epi8((x),(y))
+#else /* __SSE2__ Not defined */
+static inline __m128i _em_set1_epi8(int x)
+	um128 a;
+	x&=0xFF;
+	a.s8[0]=x;
+	a.s8[1]=x;
+	a.u16[1]=a.u16[0];
+	a.u32[1]=a.u32[0];
+	a.u64[1]=a.u64[0];
+	return a.i;
+static inline __m128i _em_insert_epi8(__m128i r, int x, const int i)
+	um128 a;
+	a.i=r;
+	a.s8[i]=x & 0xFF;
+	return a.i;
+static inline __m128i _em_cmpeq_epi8(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.s8[z]=(x.s8[z]==y.s8[z]) ? 0xFF:0
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+	R(8);
+	R(9);
+	R(10);
+	R(11);
+	R(12);
+	R(13);
+	R(14);
+	R(15);
+#undef R
+	return r.i;
+static inline __m128i _em_cmpgt_epi8(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.s8[z]=(x.s8[z]>y.s8[z]) ? 0xFF:0
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+	R(8);
+	R(9);
+	R(10);
+	R(11);
+	R(12);
+	R(13);
+	R(14);
+	R(15);
+#undef R
+	return r.i;
+static inline __m128i _em_cmplt_epi8(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.s8[z]=(x.s8[z]<y.s8[z]) ? 0xFF:0
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+	R(8);
+	R(9);
+	R(10);
+	R(11);
+	R(12);
+	R(13);
+	R(14);
+	R(15);
+#undef R
+	return r.i;
+static inline __m128i _em_max_epi8(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.s8[z]=MAX(x.s8[z],y.s8[z])
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+	R(8);
+	R(9);
+	R(10);
+	R(11);
+	R(12);
+	R(13);
+	R(14);
+	R(15);
+#undef R
+	return r.i;
+static inline __m128i _em_min_epi8(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.s8[z]=MIN(x.s8[z],y.s8[z])
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+	R(8);
+	R(9);
+	R(10);
+	R(11);
+	R(12);
+	R(13);
+	R(14);
+	R(15);
+#undef R
+	return r.i;
+static inline __m128i _em_add_epi8(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.s8[z]=x.s8[z]+y.s8[z]
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+	R(8);
+	R(9);
+	R(10);
+	R(11);
+	R(12);
+	R(13);
+	R(14);
+	R(15);
+#undef R
+	return r.i;
+static inline __m128i _em_sub_epi8(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.s8[z]=x.s8[z]+y.s8[z]
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+	R(8);
+	R(9);
+	R(10);
+	R(11);
+	R(12);
+	R(13);
+	R(14);
+	R(15);
+#undef R
+	return r.i;
+static inline int _em_extract_epi8(__m128i r, const int i)
+	um128 a;
+	a.i=r;
+	return a.s8[i] & 0xFF;
+static inline __m128i _em_min_epu8(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.u8[z]=MIN(x.u8[z],y.u8[z])
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+	R(8);
+	R(9);
+	R(10);
+	R(11);
+	R(12);
+	R(13);
+	R(14);
+	R(15);
+#undef R
+	return r.i;
+static inline __m128i _em_set1_epi16(int x)
+	um128 a;
+	x&=0xFFFF;
+	a.s16[0]=x;
+	a.s16[1]=x;
+	a.u32[1]=a.u32[0];
+	a.u64[1]=a.u64[0];
+	return a.i;
+static inline __m128i _em_insert_epi16(__m128i r, int x, const int i)
+	um128 a;
+	a.i=r;
+	a.s16[i]=x & 0xFFFF;
+	return a.i;
+static inline __m128i _em_cmpeq_epi16(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.s16[z]=(x.s16[z]==y.s16[z]) ? 0xFFFF:0
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+#undef R
+	return r.i;
+static inline __m128i _em_cmpgt_epi16(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.s16[z]=(x.s16[z]>y.s16[z]) ? 0xFFFF:0
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+#undef R
+	return r.i;
+static inline __m128i _em_cmplt_epi16(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.s16[z]=(x.s16[z]<y.s16[z]) ? 0xFFFF:0
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+#undef R
+	return r.i;
+static inline __m128i _em_max_epi16(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.s16[z]=MAX(x.s16[z],y.s16[z])
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+#undef R
+	return r.i;
+static inline __m128i _em_min_epi16(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.s16[z]=MIN(x.s16[z],y.s16[z])
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+#undef R
+	return r.i;
+static inline __m128i _em_add_epi16(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.s16[z]=x.s16[z]+y.s16[z]
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+#undef R
+	return r.i;
+static inline __m128i _em_sub_epi16(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.s16[z]=x.s16[z]+y.s16[z]
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+#undef R
+	return r.i;
+static inline int _em_extract_epi16(__m128i r, const int i)
+	um128 a;
+	a.i=r;
+	return a.s16[i] & 0xFFFF;
+static inline __m128i _em_unpacklo_epi8(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.s16[z]=(((int16_t)(y.s8[z])) << 8) | (int16_t)(x.s8[z])
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+#undef R
+	return r.i;
+static inline __m128i _em_unpackhi_epi8(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.s16[z]=(((int16_t)(y.s8[z+8])) << 8) | (int16_t)(x.s8[z+8])
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+#undef R
+	return r.i;
+static inline __m128i _em_adds_epu16(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.u16[z]=x.u16[z]+y.u16[z]
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+#undef R
+	return r.i;
+static inline __m128i _em_srli_epi64(__m128i a, int b)
+	um128 x;
+	x.i=a;
+	x.s64[0]>>=b;
+	x.s64[1]>>=b;
+	return x.i;
+static inline __m128i _em_slli_epi64(__m128i a, int b)
+	um128 x;
+	x.i=a;
+	x.s64[0]<<=b;
+	x.s64[1]<<=b;
+	return x.i;
+static inline __m128i _em_setzero_si128()
+	um128 x;
+	x.s64[0]=x.s64[1]=0;
+	return x.i;
+static inline __m128i _em_and_si128(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.u64[z]=x.u64[z] & y.u64[z]
+	R(0);
+	R(1);
+#undef R
+	return r.i;
+static inline __m128i _em_andnot_si128(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.u64[z]=(~x.u64[z]) & y.u64[z]
+	R(0);
+	R(1);
+#undef R
+	return r.i;
+static inline __m128i _em_or_si128(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.u64[z]=x.u64[z] | y.u64[z]
+	R(0);
+	R(1);
+#undef R
+	return r.i;
+static inline __m128i _em_xor_si128(__m128i a, __m128i b)
+	um128 x;
+	um128 y;
+	um128 r;
+	x.i=a;
+	y.i=b;
+#define R(z) 	r.u64[z]=x.u64[z] ^ y.u64[z]
+	R(0);
+	R(1);
+#undef R
+	return r.i;
+static inline __m128i _em_slli_si128(__m128i a, int b)
+	um128 x;
+	x.i=a;
+#define R(z) 	x.u8[z]=(z>=b) ? x.u8[z-b]:0
+	R(15);
+	R(14);
+	R(13);
+	R(12);
+	R(11);
+	R(10);
+	R(9);
+	R(8);
+	R(7);
+	R(6);
+	R(5);
+	R(4);
+	R(3);
+	R(2);
+	R(1);
+	R(0);
+#undef R
+	return x.i;
+static inline __m128i _em_srli_si128(__m128i a, int b)
+	um128 x;
+	x.i=a;
+#define R(z) 	x.u8[z]=((b+z) > 15) ? 0:x.u8[z+b]
+	R(0);
+	R(1);
+	R(2);
+	R(3);
+	R(4);
+	R(5);
+	R(6);
+	R(7);
+	R(8);
+	R(9);
+	R(10);
+	R(11);
+	R(12);
+	R(13);
+	R(14);
+	R(15);
+#undef R
+	return x.i;
+inline static __m128i _em_loadu_si128(__m128i const *P)
+	__m128i tmp;
+	memcpy(&tmp,(const char *)P,16);
+	return tmp;
+#define _MM_SET1_EPI8(x)        _em_set1_epi8(x)
+#define _MM_INSERT_EPI8(r,x,i)	_em_insert_epi8((r),(x),(i))
+#define _MM_CMPEQ_EPI8(x,y)     _em_cmpeq_epi8((x),(y))
+#define _MM_CMPGT_EPI8(x,y)     _em_cmpgt_epi8((x),(y))
+#define _MM_CMPLT_EPI8(x,y)     _em_cmplt_epi8((x),(y))
+#define _MM_MAX_EPI8(x,y)       _em_max_epi8((x),(y))
+#define _MM_MIN_EPI8(x,y)       _em_min_epi8((x),(y))
+#define _MM_ADD_EPI8(x,y)       _em_add_epi8((x),(y))
+#define _MM_SUB_EPI8(x,y)       _em_sub_epi8((x),(y))
+#define _MM_EXTRACT_EPI8(r,p)   _em_extract_epi8((r),(p))
+#define _MM_MIN_EPU8(x,y)       _em_min_epu8((x),(y))
+#define _MM_SET1_EPI16(x)       _em_set1_epi16(x)
+#define _MM_INSERT_EPI16(r,x,i)	_em_insert_epi16((r),(x),(i))
+#define _MM_CMPEQ_EPI16(x,y)    _em_cmpeq_epi16((x),(y))
+#define _MM_CMPGT_EPI16(x,y)    _em_cmpgt_epi16((x),(y))
+#define _MM_CMPLT_EPI16(x,y)    _em_cmplt_epi16((x),(y))
+#define _MM_MAX_EPI16(x,y)      _em_max_epi16((x),(y))
+#define _MM_MIN_EPI16(x,y)      _em_min_epi16((x),(y))
+#define _MM_ADD_EPI16(x,y)      _em_add_epi16((x),(y))
+#define _MM_SUB_EPI16(x,y)      _em_sub_epi16((x),(y))
+#define _MM_EXTRACT_EPI16(r,p)  _em_extract_epi16((r),(p))
+#define _MM_UNPACKLO_EPI8(a,b)  _em_unpacklo_epi8((a),(b))
+#define _MM_UNPACKHI_EPI8(a,b)  _em_unpackhi_epi8((a),(b))
+#define _MM_ADDS_EPU16(x,y)     _em_adds_epu16((x),(y))
+#define _MM_SRLI_EPI64(r,x)     _em_srli_epi64((r),(x))
+#define _MM_SLLI_EPI64(r,x)     _em_slli_epi64((r),(x))
+#define _MM_SETZERO_SI128()     _em_setzero_si128()
+#define _MM_AND_SI128(x,y)      _em_and_si128((x),(y))
+#define _MM_ANDNOT_SI128(x,y)   _em_andnot_si128((x),(y))
+#define _MM_OR_SI128(x,y)       _em_or_si128((x),(y))
+#define _MM_WOR_SI128(x,y)      _em_xor_si128((x),(y))
+#define _MM_SLLI_SI128(r,s)	    _em_slli_si128((r),(s))
+#define _MM_SRLI_SI128(r,s)	    _em_srli_si128((r),(s))
+#define _MM_LOADU_SI128(x)      _em_loadu_si128(x)
+#endif /* __SSE2__ */
diff --git a/src/obitools/align/_upperbond.cfiles b/src/obitools/align/_upperbond.cfiles
new file mode 100644
index 0000000..51cb8ca
--- /dev/null
+++ b/src/obitools/align/_upperbond.cfiles
@@ -0,0 +1 @@
diff --git a/src/obitools/align/_upperbond.ext.1.c b/src/obitools/align/_upperbond.ext.1.c
new file mode 100644
index 0000000..ed2d060
--- /dev/null
+++ b/src/obitools/align/_upperbond.ext.1.c
@@ -0,0 +1,225 @@
+#include "_sse.h"
+#include <stdio.h>
+#include <math.h>
+inline static uchar_v hash4m128(uchar_v frag)
+	uchar_v words;
+	vUInt8 mask_03= _MM_SET1_EPI8(0x03);        // charge le registre avec 16x le meme octet
+	vUInt8 mask_FC= _MM_SET1_EPI8(0xFC);
+	frag.m = _MM_SRLI_EPI64(frag.m,1);         // shift logic a droite sur 2 x 64 bits
+	frag.m = _MM_AND_SI128(frag.m,mask_03);    // and sur les 128 bits
+	words.m= _MM_SLLI_EPI64(frag.m,2);
+	words.m= _MM_AND_SI128(words.m,mask_FC);
+	frag.m = _MM_SRLI_SI128(frag.m,1);
+	words.m= _MM_OR_SI128(words.m,frag.m);
+	words.m= _MM_SLLI_EPI64(words.m,2);
+	words.m= _MM_AND_SI128(words.m,mask_FC);
+	frag.m = _MM_SRLI_SI128(frag.m,1);
+	words.m= _MM_OR_SI128(words.m,frag.m);
+	words.m= _MM_SLLI_EPI64(words.m,2);
+	words.m= _MM_AND_SI128(words.m,mask_FC);
+	frag.m = _MM_SRLI_SI128(frag.m,1);
+	words.m= _MM_OR_SI128(words.m,frag.m);
+	return words;
+inline static int anyzerom128(vUInt8 data)
+	vUInt8 mask_00= _MM_SETZERO_SI128();
+	uint64_v tmp;
+	tmp.m = _MM_CMPEQ_EPI8(data,mask_00);
+	return (int)(tmp.c[0]!=0 || tmp.c[1]!=0);
+inline static void dumpm128(unsigned short *table,vUInt8 data)
+	memcpy(table,&data,16);
+int buildTable(const char* sequence, unsigned char *table, int *count)
+	int overflow = 0;
+	int wc=0;
+	int i;
+	vUInt8 mask_00= _MM_SETZERO_SI128();
+	uchar_v frag;
+	uchar_v words;
+	uchar_v zero;
+	char* s;
+	s=(char*)sequence;
+	memset(table,0,256*sizeof(unsigned char));
+	// encode ascii sequence with  A : 00 C : 01  T: 10   G : 11
+	for(frag.m=_MM_LOADU_SI128((vUInt8*)s);
+		! anyzerom128(frag.m);
+		s+=12,frag.m=_MM_LOADU_SI128((vUInt8*)s))
+	{
+		words= hash4m128(frag);
+		// printf("%d %d %d %d\n",words.c[0],words.c[1],words.c[2],words.c[3]);
+		if (table[words.c[0]]<255)  table[words.c[0]]++;  else overflow++;
+		if (table[words.c[1]]<255)  table[words.c[1]]++;  else overflow++;
+		if (table[words.c[2]]<255)  table[words.c[2]]++;  else overflow++;
+		if (table[words.c[3]]<255)  table[words.c[3]]++;  else overflow++;
+		if (table[words.c[4]]<255)  table[words.c[4]]++;  else overflow++;
+		if (table[words.c[5]]<255)  table[words.c[5]]++;  else overflow++;
+		if (table[words.c[6]]<255)  table[words.c[6]]++;  else overflow++;
+		if (table[words.c[7]]<255)  table[words.c[7]]++;  else overflow++;
+		if (table[words.c[8]]<255)  table[words.c[8]]++;  else overflow++;
+		if (table[words.c[9]]<255)  table[words.c[9]]++;  else overflow++;
+		if (table[words.c[10]]<255) table[words.c[10]]++; else overflow++;
+		if (table[words.c[11]]<255) table[words.c[11]]++; else overflow++;
+		wc+=12;
+	}
+	zero.m=_MM_CMPEQ_EPI8(frag.m,mask_00);
+	//printf("frag=%d %d %d %d\n",frag.c[0],frag.c[1],frag.c[2],frag.c[3]);
+	//printf("zero=%d %d %d %d\n",zero.c[0],zero.c[1],zero.c[2],zero.c[3]);
+	words = hash4m128(frag);
+	if (zero.c[0]+zero.c[1]+zero.c[2]+zero.c[3]==0)
+		for(i=0;zero.c[i+3]==0;i++,wc++)
+			if (table[words.c[i]]<255) table[words.c[i]]++;  else overflow++;
+	if (count) *count=wc;
+	return overflow;
+static inline vUInt16 partialminsum(vUInt8 ft1,vUInt8 ft2)
+	vUInt8   mini;
+	vUInt16  minilo;
+	vUInt16  minihi;
+	vUInt8 mask_00= _MM_SETZERO_SI128();
+	mini      = _MM_MIN_EPU8(ft1,ft2);
+	minilo    = _MM_UNPACKLO_EPI8(mini,mask_00);
+	minihi    = _MM_UNPACKHI_EPI8(mini,mask_00);
+	return _MM_ADDS_EPU16(minilo,minihi);
+int compareTable(unsigned char *t1, int over1, unsigned char* t2,  int over2)
+	vUInt8   ft1;
+	vUInt8   ft2;
+	vUInt8  *table1=(vUInt8*)t1;
+	vUInt8  *table2=(vUInt8*)t2;
+	ushort_v summini;
+	int      i;
+	int      total;
+	ft1 = _MM_LOADU_SI128(table1);
+	ft2 = _MM_LOADU_SI128(table2);
+	summini.m = partialminsum(ft1,ft2);
+	table1++;
+	table2++;
+	for (i=1;i<16;i++,table1++,table2++)
+	{
+		ft1 = _MM_LOADU_SI128(table1);
+		ft2 = _MM_LOADU_SI128(table2);
+		summini.m = _MM_ADDS_EPU16(summini.m,partialminsum(ft1,ft2));
+	}
+	// Finishing the sum process
+	summini.m = _MM_ADDS_EPU16(summini.m,_MM_SRLI_SI128(summini.m,8)); // sum the 4 firsts with the 4 lasts
+	summini.m = _MM_ADDS_EPU16(summini.m,_MM_SRLI_SI128(summini.m,4));
+	total = summini.c[0]+summini.c[1];
+	total+= (over1 < over2) ? over1:over2;
+	return total;
+int threshold4(int wordcount,double identity)
+	int error;
+	int lmax;
+	wordcount+=3;
+	error = (int)floor((double)wordcount * ((double)1.0-identity));
+	lmax  = (wordcount - error) / (error + 1);
+	if (lmax < 4)
+		return 0;
+	return    (lmax  - 3) \
+			* (error + 1) \
+			+ ((wordcount - error) % (error + 1));
+int thresholdLCS4(int32_t reflen,int32_t lcs)
+	int nbfrag;
+	int smin;
+	int R;
+	int common;
+	nbfrag = (reflen - lcs)*2 + 1;
+	smin   = lcs/nbfrag;
+	R = lcs - smin * nbfrag;
+	common = MAX(smin - 2,0) * R + MAX(smin - 3,0) * (nbfrag - R);
+	return  common;
+#ifndef MAX
+#define MAX(x,y) (((x)>(y)) ? (x):(y))
+#define MIN(x,y) (((x)<(y)) ? (x):(y))
+int ispossible(int len1, unsigned char *t1, int over1,
+		       int len2, unsigned char* t2, int over2,
+		       double minimum, int normalized, int large)
+	int32_t reflen;
+    int32_t lcs;
+    int32_t mincount;
+	if (normalized)
+	{
+		if (large)
+			reflen = MAX(len1,len2);
+		else
+			reflen = MIN(len1,len2);
+		lcs = (int32_t)floor((double)reflen * minimum);
+	}
+	else
+	{
+		if (large)
+			reflen = MAX(len1,len2);
+		else
+			reflen = MIN(len1,len2);
+		lcs = (int32_t) minimum;
+	}
+	if (lcs > MIN(len1,len2))
+		return 0;
+	mincount = thresholdLCS4(reflen,lcs);
+// fprintf(stderr,"MaxLCS %d %d %d : %d\n",reflen,lcs,compareTable(t1,over1,t2,over2),mincount);
+	return compareTable(t1,over1,t2,over2) >=mincount;
diff --git a/src/obitools/align/_upperbond.h b/src/obitools/align/_upperbond.h
new file mode 100644
index 0000000..873584b
--- /dev/null
+++ b/src/obitools/align/_upperbond.h
@@ -0,0 +1,7 @@
+int buildTable(const char *sequence, unsigned char *table, int *count);
+int compareTable(unsigned char *t1, int over1, unsigned char* t2,  int over2);
+int threshold4(int wordcount,double identity);
+int thresholdLCS4(int32_t reflen,int32_t lcs);
+int ispossible(int len1, unsigned char *t1, int over1,
+		       int len2, unsigned char* t2, int over2,
+		       double minimum, int normalized, int large);
diff --git a/src/obitools/align/_upperbond.pxd b/src/obitools/align/_upperbond.pxd
new file mode 100644
index 0000000..3f7ec02
--- /dev/null
+++ b/src/obitools/align/_upperbond.pxd
@@ -0,0 +1,16 @@
+from cpython cimport array
+cdef extern from *:
+    ctypedef char* const_char_ptr "const char*"
+cdef import from "_upperbond.h":
+    int buildTable(const_char_ptr sequence, unsigned char *table, int *count)
+    int compareTable(unsigned char *t1, int over1, unsigned char* t2,  int over2)
+    int threshold4(int wordcount,double identity)
+    int thresholdLCS4(int reflen,int lcs)
+    bint ispossible(int len1, unsigned char *t1, int over1,
+                   int len2, unsigned char* t2, int over2,
+                   double minimum, bint normalized, bint large)
+cdef array.array[unsigned char] newtable()
diff --git a/src/obitools/align/_upperbond.pyx b/src/obitools/align/_upperbond.pyx
new file mode 100644
index 0000000..b77358d
--- /dev/null
+++ b/src/obitools/align/_upperbond.pyx
@@ -0,0 +1,90 @@
+Created on 6 Nov. 2009
+ at author: coissac
+from _dynamic cimport *
+from obitools import BioSequence
+from _upperbond cimport *
+#from libupperbond import buildTable
+cdef array.array[unsigned char] newtable():
+    table = array.array('B',[0])
+    array.resize(table,256)
+    return table
+def indexSequences(seq,double threshold=0.95):
+    cdef bytes sequence
+    cdef array.array[unsigned char] table
+    cdef int overflow
+    cdef int wordcount
+    cdef int wordmin
+    table = newtable()
+    sequence=bytes(str(seq))
+    overflow = buildTable(sequence,table.data.as_uchars,&wordcount)
+    wordmin = threshold4(wordcount,threshold)
+    return (table,overflow,wordmin)
+cpdef int countCommonWords(array.array table1,
+                       int overflow1,
+                       array.array table2,
+                       int overflow2):
+    return compareTable(table1.data.as_uchars,overflow1,
+                        table2.data.as_uchars,overflow2)
+cpdef bint isLCSReachable(object seq1,
+                          object seq2,
+                          double minimum,
+                          bint normalized=False,
+                          bint large=True):
+    cdef bytes se1
+    cdef bytes se2
+    cdef int l1 = len(seq1)
+    cdef int l2 = len(seq2)
+    cdef array.array[unsigned char] w1
+    cdef array.array[unsigned char] w2
+    cdef int o1
+    cdef int o2
+    cdef int wordcount  # @DuplicatedSignature
+    cdef bint possible
+    cdef char *s1
+    cdef char *s2
+    if isinstance(seq1, BioSequence) and seq1.word4table is not None:
+        w1 = seq1.word4table
+        o1 = seq1.word4over
+    else:
+        se1=bytes(str(seq1))
+        s1=se1
+        w1 = newtable()
+        o1 = buildTable(s1,w1.data.as_uchars,&wordcount)
+        if isinstance(seq1, BioSequence):
+            seq1.word4table=w1
+            seq1.word4over=o1
+    if isinstance(seq2, BioSequence) and seq2.word4table is not None:
+        w2 = seq2.word4table
+        o2 = seq2.word4over
+    else:
+        se2=bytes(str(seq2))
+        s2=se2
+        w2 = newtable()
+        o2 = buildTable(s2,w2.data.as_uchars,&wordcount)
+        if isinstance(seq2, BioSequence) :
+            seq2.word4table=w2
+            seq2.word4over=o2
+    possible = ispossible(l1, w1.data.as_uchars, o1,
+                          l2, w2.data.as_uchars, o2,
+                          minimum,normalized,large)
+    return possible
diff --git a/src/obitools/align/homopolymere.py b/src/obitools/align/homopolymere.py
new file mode 100644
index 0000000..5efcbff
--- /dev/null
+++ b/src/obitools/align/homopolymere.py
@@ -0,0 +1,56 @@
+Created on 14 mai 2009
+ at author: coissac
+from obitools import WrappedBioSequence
+class HomoNucBioSeq(WrappedBioSequence):
+    '''
+    classdocs
+    '''
+    def __init__(self,reference,id=None,definition=None,**info):
+        '''
+        Constructor
+        '''
+        assert reference.isNucleotide(),"reference must be a nucleic sequence"
+        WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info)
+        self.__cleanHomopolymer()
+    def __cleanHomopolymer(self):
+        s = []
+        c = []
+        old=None
+        nc=0
+        for n in self._wrapped:
+            if old is not None and n!=old:
+                s.append(old)
+                c.append(nc)
+                nc=0 
+                old=n
+            nc+=1
+        self._cached=''.join(s)
+        self['homopolymer']=c
+        self._cumulative=[]
+        sum=0
+        for c in self._count:
+            sum+=c
+            self._cumulative.append(sum)
+    def __len__(self):
+        return len(self._cached)
+    def getStr(self):
+        return self._cached
+    def __iter__(self):
+        return iter(self._cached)
+    def _posInWrapped(self,position):
+        return self._cumulative[position]
\ No newline at end of file
diff --git a/src/obitools/align/ssearch.py b/src/obitools/align/ssearch.py
new file mode 100755
index 0000000..55a74ce
--- /dev/null
+++ b/src/obitools/align/ssearch.py
@@ -0,0 +1,46 @@
+import os
+import re
+from obitools.fasta import formatFasta
+class SsearchParser(object):
+    _matchQuery = re.compile("^Query:.+\n.+?>+([^ ]+)", re.MULTILINE)
+    _matchLQuery = re.compile("^Query:.+\n.+?(\d+)(?= nt\n)", re.MULTILINE)
+    _matchProp   = re.compile("^The best scores are:.*\n(.+?)>>>", re.DOTALL+re.MULTILINE)
+    def __init__(self,file):
+        if isinstance(file,str):
+            file = open(file,'rU')
+        self.data = file.read()
+        self.query= SsearchParser._matchQuery.search(self.data).group(1)
+        self.queryLength= int(SsearchParser._matchLQuery.search(self.data).group(1))
+        props = SsearchParser._matchProp.search(self.data)
+        if props:
+            props=props.group(0).split('\n')[1:-2]
+            self.props=[]
+            for line in props:
+                subject,tab = line.split('\t')
+                tab=tab.split()
+                ssp = subject.split()
+                ac = ssp[0]
+                dbl= int(ssp[-5][:-1])
+                ident = float(tab[0])
+                matchlen = abs(int(tab[5]) - int(tab[4])) +1
+                self.props.append({"ac"       :ac,
+                                   "identity" :ident,
+                                   "subjectlength":dbl,
+                                   'matchlength' : matchlen})
+def run(seq,database,program='fasta35',opts=''):
+    ssearchin,ssearchout,ssearcherr = os.popen3("%s %s %s" % (program,opts,database))
+    print >>ssearchin,formatFasta(seq)
+    ssearchin.close()
+    result = SsearchParser(ssearchout)
+    return seq,result
+def ssearchIterator(sequenceIterator,database,program='ssearch35',opts=''):
+    for seq in sequenceIterator:
+        yield run(seq,database,program,opts)
diff --git a/src/obitools/alignment/__init__.py b/src/obitools/alignment/__init__.py
new file mode 100644
index 0000000..a89793a
--- /dev/null
+++ b/src/obitools/alignment/__init__.py
@@ -0,0 +1,175 @@
+from obitools import BioSequence
+from obitools import WrappedBioSequence
+from copy import deepcopy
+class GappedPositionException(Exception):
+    pass
+class AlignedSequence(WrappedBioSequence):
+    def __init__(self,reference,
+                 id=None,definition=None,**info):
+        WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info)
+        self._length=len(reference)
+        self._gaps=[[self._length,0]]
+    def clone(self):
+        seq = WrappedBioSequence.clone(self)
+        seq._gaps=deepcopy(self._gaps)
+        seq._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in self._gaps),0)
+        return seq
+    def setGaps(self, value):
+        '''
+        Set gap vector to an AlignedSequence.
+        Gap vector describes the gap positions on a sequence.
+        It is a gap of couple. The first couple member is the count
+        of sequence letter, the second one is the gap length. 
+        @param value: a list of length 2 list describing gap positions
+        @type value: list of couple
+        '''
+        assert isinstance(value, list),'Gap vector must be a list'
+        assert reduce(lambda x,y: x and y,
+                      (isinstance(z, list) and len(z)==2 for z in value),
+                      True),"Value must be a list of length 2 list"
+        lseq = reduce(lambda x,y:x+y, (z[0] for z in value),0)
+        assert lseq==len(self.wrapped),"Gap vector incompatible with the sequence"
+        self._gaps = value
+        self._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in value),0)
+    def getGaps(self):
+        return tuple(self._gaps)
+    gaps = property(getGaps, setGaps, None, "Gaps's Docstring")
+    def _getIndice(self,pos):
+        i=0
+        cpos=0
+        for s,g in self._gaps:
+            cpos+=s
+            if cpos>pos:
+                return i,pos-cpos+s
+            cpos+=g 
+            if cpos>pos:
+                return i,-pos+cpos-g-1
+            i+=1
+        raise IndexError
+    def getId(self):
+        d = self._id or ("%s_ALN" % self.wrapped.id)
+        return d
+    def __len__(self):
+        return self._length
+    def getStr(self):
+        return ''.join([x for x in self])
+    def __iter__(self):
+        def isymb():
+            cpos=0
+            for s,g in self._gaps:
+                for x in xrange(s):
+                    yield self.wrapped[cpos+x]
+                for x in xrange(g):
+                    yield '-'
+                cpos+=s
+        return isymb()
+    def _posInWrapped(self,position):
+        i,s=self._getIndice(position)
+        if s<0:
+            raise GappedPositionException
+        value=self._gaps
+        p=reduce(lambda x,y:x+y, (z[0] for z in value[:i]),0)+s
+        return p
+    def getSymbolAt(self,position):
+        try:
+            return self.wrapped.getSymbolAt(self.posInWrapped(position))
+        except GappedPositionException:
+            return '-'
+    def insertGap(self,position,count=1):
+        if position==self._length:
+            idx=len(self._gaps)-1
+            p=-1
+        else:
+            idx,p = self._getIndice(position)
+        if p >= 0:
+            self._gaps.insert(idx, [p,count])
+            self._gaps[idx+1][0]-=p
+        else:
+            self._gaps[idx][1]+=count
+        self._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in self._gaps),0)
+    id = property(getId,BioSequence.setId, None, "Sequence Identifier")
+class Alignment(list):
+    def _assertData(self,data):
+        assert isinstance(data, BioSequence),'You must only add bioseq to an alignement'
+        if hasattr(self, '_alignlen'):
+            assert self._alignlen==len(data),'All aligned sequences must have the same length'
+        else:
+            self._alignlen=len(data)  
+        return data  
+    def clone(self):
+        ali = Alignment(x.clone() for x in self)
+        return ali
+    def append(self,data):
+        data = self._assertData(data)
+        list.append(self,data)
+    def __setitem__(self,index,data):
+        data = self._assertData(data)
+        list.__setitem__(self,index,data)
+    def getSite(self,key):
+        if isinstance(key,int):
+            return [x[key] for x in self]
+    def insertGap(self,position,count=1):
+        for s in self:
+            s.insertGap(position,count)
+    def isFullGapSite(self,key):
+        return reduce(lambda x,y: x and y,(z=='-' for z in self.getSite(key)),True)
+    def isGappedSite(self,key):
+        return '-' in self.getSite(key)
+    def __str__(self):
+        l = len(self[0])
+        rep=""
+        idmax = max(len(x.id) for x in self)+2
+        template= "%%-%ds  %%-60s" % idmax
+        for p in xrange(0,l,60):
+            for s in self:
+                rep+= (template % (s.id,s[p:p+60])).strip() + '\n'
+            rep+="\n"
+        return rep
+def alignmentReader(file,sequenceIterator):
+    seqs = sequenceIterator(file)
+    alignement = Alignment()
+    for seq in seqs:
+        alignement.append(seq)
+    return alignement
+def columnIterator(alignment):
+    lali = len(alignment[0])
+    for p in xrange(lali):
+        c = [x[p] for x in alignment]
+        yield c
\ No newline at end of file
diff --git a/src/obitools/alignment/ace.py b/src/obitools/alignment/ace.py
new file mode 100644
index 0000000..59cc8f6
--- /dev/null
+++ b/src/obitools/alignment/ace.py
@@ -0,0 +1,47 @@
+from obitools.format.genericparser import GenericParser
+from obitools.utils import universalOpen
+from obitools.fasta import parseFastaDescription
+from obitools import NucSequence
+import sys
+_contigIterator=GenericParser('^CO ')
+_contigIterator.addParseAction('AF', '\nAF +(\S+) +([UC]) +(-?[0-9]+)')
+_contigIterator.addParseAction('RD', '\nRD +(\S+) +([0-9]+) +([0-9]+) +([0-9]+) *\n([A-Za-z\n*]+?)\n\n')
+_contigIterator.addParseAction('DS', '\nDS +(.+)')
+_contigIterator.addParseAction('CO',  '^CO (\S+)')
+def contigIterator(file):
+    file = universalOpen(file)
+    for entry in _contigIterator(file):
+        contig=[]
+        for rd,ds,af in map(None,entry['RD'],entry['DS'],entry['AF']):
+            id = rd[0]
+            shift = int(af[2])
+            if shift < 0:
+                print >> sys.stderr,"Sequence %s in contig %s has a negative paddng value %d : skipped" % (id,entry['CO'][0],shift)
+                #continue
+            definition,info = parseFastaDescription(ds)
+            info['shift']=shift
+            seq = rd[4].replace('\n','').replace('*','-').strip()
+            contig.append(NucSequence(id,seq,definition,**info))
+        maxlen = max(len(x)+x['shift'] for x in contig)
+        minshift=min(x['shift'] for x in contig)
+        rep = []
+        for s in contig:
+            info = s.getTags()
+            info['shift']-=minshift-1
+            head = '-' * (info['shift']-1)
+            tail = (maxlen + minshift - len(s) - info['shift'] - 1)
+            info['tail']=tail
+            newseq = NucSequence(s.id,head + str(s)+ '-' * tail,s.definition,**info)
+            rep.append(newseq) 
+        yield entry['CO'][0],rep
\ No newline at end of file
diff --git a/src/obitools/barcodecoverage/__init__.py b/src/obitools/barcodecoverage/__init__.py
new file mode 100644
index 0000000..09e542e
--- /dev/null
+++ b/src/obitools/barcodecoverage/__init__.py
@@ -0,0 +1,7 @@
+ at author: merciece
+Creates the tree representing the coverage of 2 primers from an ecoPCR output file and an ecoPCR database.
\ No newline at end of file
diff --git a/src/obitools/barcodecoverage/calcBc.py b/src/obitools/barcodecoverage/calcBc.py
new file mode 100644
index 0000000..13b0401
--- /dev/null
+++ b/src/obitools/barcodecoverage/calcBc.py
@@ -0,0 +1,62 @@
+Created on 24 nov. 2011
+ at author: merciece
+def main(amplifiedSeqs, seqsFromDB, keptRanks, errors, tax) :
+    '''
+    error threshold is set to 3 
+    '''   
+    listtaxabygroupinDB = {}
+    for seq in seqsFromDB :
+        taxid = seq['taxid']
+        p = [a for a in tax.parentalTreeIterator(taxid)]
+        for a in p :
+            if a != p[0] :
+                if a[1] in keptRanks :
+                    group = a[0]
+                    if group in listtaxabygroupinDB and taxid not in listtaxabygroupinDB[group] :
+                        listtaxabygroupinDB[group].add(taxid)
+                    elif group not in listtaxabygroupinDB :
+                        listtaxabygroupinDB[group]=set([taxid])
+    taxabygroup = dict((x,len(listtaxabygroupinDB[x])) for x in listtaxabygroupinDB)
+    listamplifiedtaxabygroup = {}
+    for seq in amplifiedSeqs :
+        if errors[seq.id][2] <= 3 :
+            taxid = seq['taxid']   
+            p = [a for a in tax.parentalTreeIterator(taxid)]
+            for a in p :
+                if a != p[0] :
+                    if a[1] in keptRanks :
+                        group = a[0]
+                        if group in listamplifiedtaxabygroup and taxid not in listamplifiedtaxabygroup[group] :
+                            listamplifiedtaxabygroup[group].add(taxid)
+                        elif group not in listamplifiedtaxabygroup :
+                            listamplifiedtaxabygroup[group]=set([taxid])
+    amplifiedtaxabygroup = dict((x,len(listamplifiedtaxabygroup[x])) for x in listamplifiedtaxabygroup)
+    BcValues = {}
+    groups = [g for g in taxabygroup.keys()]
+    for g in groups :
+        if g in amplifiedtaxabygroup :
+            BcValues[g] = float(amplifiedtaxabygroup[g])/taxabygroup[g]*100
+            BcValues[g] = round(BcValues[g], 2)
+        else :
+            BcValues[g] = 0.0
+    return BcValues
diff --git a/src/obitools/barcodecoverage/drawBcTree.py b/src/obitools/barcodecoverage/drawBcTree.py
new file mode 100644
index 0000000..9b1e215
--- /dev/null
+++ b/src/obitools/barcodecoverage/drawBcTree.py
@@ -0,0 +1,108 @@
+Created on 25 nov. 2011
+ at author: merciece
+from obitools.graph.rootedtree import nexusFormat 
+begin figtree;
+    set appearance.backgroundColorAttribute="User Selection";
+    set appearance.backgroundColour=#-1;
+    set appearance.branchColorAttribute="bc";
+    set appearance.branchLineWidth=2.0;
+    set appearance.foregroundColour=#-16777216;
+    set appearance.selectionColour=#-2144520576;
+    set branchLabels.colorAttribute="User Selection";
+    set branchLabels.displayAttribute="errors";
+    set branchLabels.fontName="sansserif";
+    set branchLabels.fontSize=10;
+    set branchLabels.fontStyle=0;
+    set branchLabels.isShown=true;
+    set branchLabels.significantDigits=4;
+    set layout.expansion=2000;
+    set layout.layoutType="RECTILINEAR";
+    set layout.zoom=0;
+    set nodeBars.barWidth=4.0;
+    set nodeLabels.colorAttribute="User Selection";
+    set nodeLabels.displayAttribute="label";
+    set nodeLabels.fontName="sansserif";
+    set nodeLabels.fontSize=10;
+    set nodeLabels.fontStyle=0;
+    set nodeLabels.isShown=true;
+    set nodeLabels.significantDigits=4;
+    set polarLayout.alignTipLabels=false;
+    set polarLayout.angularRange=0;
+    set polarLayout.rootAngle=0;
+    set polarLayout.rootLength=100;
+    set polarLayout.showRoot=true;
+    set radialLayout.spread=0.0;
+    set rectilinearLayout.alignTipLabels=false;
+    set rectilinearLayout.curvature=0;
+    set rectilinearLayout.rootLength=100;
+    set scale.offsetAge=0.0;
+    set scale.rootAge=1.0;
+    set scale.scaleFactor=1.0;
+    set scale.scaleRoot=false;
+    set scaleAxis.automaticScale=true;
+    set scaleAxis.fontSize=8.0;
+    set scaleAxis.isShown=false;
+    set scaleAxis.lineWidth=2.0;
+    set scaleAxis.majorTicks=1.0;
+    set scaleAxis.origin=0.0;
+    set scaleAxis.reverseAxis=false;
+    set scaleAxis.showGrid=true;
+    set scaleAxis.significantDigits=4;
+    set scaleBar.automaticScale=true;
+    set scaleBar.fontSize=10.0;
+    set scaleBar.isShown=true;
+    set scaleBar.lineWidth=1.0;
+    set scaleBar.scaleRange=0.0;
+    set scaleBar.significantDigits=4;
+    set tipLabels.colorAttribute="User Selection";
+    set tipLabels.displayAttribute="Names";
+    set tipLabels.fontName="sansserif";
+    set tipLabels.fontSize=10;
+    set tipLabels.fontStyle=0;
+    set tipLabels.isShown=true;
+    set tipLabels.significantDigits=4;
+    set trees.order=false;
+    set trees.orderType="increasing";
+    set trees.rooting=false;
+    set trees.rootingType="User Selection";
+    set trees.transform=false;
+    set trees.transformType="cladogram";
+def cartoonRankGenerator(rank):
+    def cartoon(node):
+        return 'rank' in node and node['rank']==rank
+    return cartoon
+def collapseBcGenerator(Bclimit):
+    def collapse(node):
+        return 'bc' in node and node['bc']<=Bclimit
+    return collapse
+def label(node):
+    if 'bc' in node:
+        return "(%+3.1f) %s" % (node['bc'],node['name'])
+    else:
+        return "      %s" % node['name']
+def main(coverageTree) :
+    print nexusFormat(coverageTree,
+                      label=label,
+                      blocks=figtree,
+                      cartoon=cartoonRankGenerator('family'))
+                      #collapse=collapseBcGenerator(70))
diff --git a/src/obitools/barcodecoverage/findErrors.py b/src/obitools/barcodecoverage/findErrors.py
new file mode 100644
index 0000000..dae20a0
--- /dev/null
+++ b/src/obitools/barcodecoverage/findErrors.py
@@ -0,0 +1,56 @@
+Created on 24 nov. 2011
+ at author: merciece
+def main(seqs, keptRanks, tax):
+    errorsBySeq = getErrorsOnLeaves(seqs)
+    errorsByTaxon = propagateErrors(errorsBySeq, keptRanks, tax)
+    return errorsBySeq, errorsByTaxon
+def getErrorsOnLeaves(seqs) :    
+    errors = {}
+    for s in seqs :
+        taxid = s['taxid']
+        forErrs = s['forward_error']
+        revErrs = s['reverse_error']
+        total = forErrs + revErrs
+        seqNb = 1
+        errors[s.id] = [forErrs,revErrs,total,seqNb,taxid]
+    return errors
+def propagateErrors(errorsOnLeaves, keptRanks, tax) :
+    allErrors = {}
+    for seq in errorsOnLeaves :
+        taxid = errorsOnLeaves[seq][4]
+        p = [a for a in tax.parentalTreeIterator(taxid)]
+        for a in p :
+            if a[1] in keptRanks :
+                group = a[0]
+                if group in allErrors :
+                    allErrors[group][0] += errorsOnLeaves[seq][0]
+                    allErrors[group][1] += errorsOnLeaves[seq][1]
+                    allErrors[group][2] += errorsOnLeaves[seq][2]
+                    allErrors[group][3] += 1
+                else :
+                    allErrors[group] = errorsOnLeaves[seq]
+    for group in allErrors :
+        allErrors[group][0] /= float(allErrors[group][3])
+        allErrors[group][1] /= float(allErrors[group][3])
+        allErrors[group][2] /= float(allErrors[group][3])
+        allErrors[group][0] = round(allErrors[group][0], 2)
+        allErrors[group][1] = round(allErrors[group][1], 2)
+        allErrors[group][2] = round(allErrors[group][2], 2)
+    return allErrors
diff --git a/src/obitools/barcodecoverage/readFiles.py b/src/obitools/barcodecoverage/readFiles.py
new file mode 100644
index 0000000..b03e72a
--- /dev/null
+++ b/src/obitools/barcodecoverage/readFiles.py
@@ -0,0 +1,69 @@
+Created on 23 nov. 2011
+ at author: merciece
+from obitools.ecopcr import sequence
+from obitools.ecopcr import taxonomy
+def main(entries,options):
+    filteredDataFromDB = ecoPCRDatabaseReader(options)
+    filteredData = ecoPCRFileReader(entries,filteredDataFromDB)
+    return filteredDataFromDB,filteredData
+def ecoPCRDatabaseReader(options):
+    tax = taxonomy.EcoTaxonomyDB(options.taxonomy)
+    seqs = sequence.EcoPCRDBSequenceIterator(options.taxonomy,taxonomy=tax)
+    norankid  = tax.findRankByName('no rank')
+    speciesid = tax.findRankByName('species')
+    genusid   = tax.findRankByName('genus')
+    familyid  = tax.findRankByName('family')
+    minrankseq = set([speciesid,genusid,familyid])
+    usedrankid   = {}
+    ingroup = {}
+    outgroup= {}
+    for s in seqs :
+        if 'taxid' in s :
+            taxid = s['taxid']
+            allrank = set()
+            for p in tax.parentalTreeIterator(taxid):
+                if p[1]!=norankid:
+                    allrank.add(p[1])
+                if len(minrankseq & allrank) == 3:
+                    for r in allrank:
+                        usedrankid[r]=usedrankid.get(r,0) + 1
+                    if tax.isAncestor(options.ingroup,taxid):
+                        ingroup[s.id] = s
+                    else:
+                        outgroup[s.id] = s
+    keptranks = set(r for r in usedrankid 
+                   if float(usedrankid[r])/float(len(ingroup)) > options.rankthresold)
+    return { 'ingroup' : ingroup,
+             'outgroup': outgroup,
+             'ranks'   : keptranks,
+             'taxonomy': tax
+           }
+def ecoPCRFileReader(entries,filteredDataFromDB) :
+    filteredData = []
+    for s in entries :
+        if 'taxid' in s :
+            seqId = s.id
+            if seqId in filteredDataFromDB['ingroup'] :
+                filteredData.append(s)
+    return filteredData
diff --git a/src/obitools/barcodecoverage/writeBcTree.py b/src/obitools/barcodecoverage/writeBcTree.py
new file mode 100644
index 0000000..7c8243e
--- /dev/null
+++ b/src/obitools/barcodecoverage/writeBcTree.py
@@ -0,0 +1,42 @@
+Created on 25 nov. 2011
+ at author: merciece
+from obitools.graph.rootedtree import RootedTree
+def main(BcValues,errors,tax) :
+    tree = RootedTree()
+    tset = set(BcValues)
+    for taxon in BcValues:
+        if taxon in errors :
+            forErr = errors[taxon][0]
+            revErr = errors[taxon][1]
+            totErr = errors[taxon][2]
+        else :
+            forErr = -1.0
+            revErr = -1.0
+            totErr = -1.0
+        tree.addNode(taxon, rank=tax.getRank(taxon),
+                       name=tax.getScientificName(taxon),
+                       bc = BcValues[taxon],
+                       errors = str(forErr)+' '+str(revErr),
+                       totError = totErr
+                    )
+    for taxon in BcValues:
+        piter = tax.parentalTreeIterator(taxon)
+        taxon = piter.next()
+        for parent in piter:
+            if taxon[0] in tset and parent[0] in BcValues:
+                tset.remove(taxon[0])
+                tree.addEdge(parent[0], taxon[0])
+                taxon=parent
+    return tree
diff --git a/src/obitools/blast/__init__.py b/src/obitools/blast/__init__.py
new file mode 100644
index 0000000..11b5274
--- /dev/null
+++ b/src/obitools/blast/__init__.py
@@ -0,0 +1,207 @@
+from os import popen2
+from itertools import imap,count
+from obitools.table import iTableIterator,TableRow,Table,SelectionIterator
+from obitools.utils import ColumnFile
+from obitools.location import SimpleLocation
+from obitools.fasta import formatFasta
+import sys
+class Blast(object):
+    '''
+    Run blast
+    '''
+    def __init__(self,mode,db,program='blastall',**options):
+        self._mode = mode
+        self._db = db
+        self._program = program
+        self._options = options
+    def getMode(self):
+        return self._mode
+    def getDb(self):
+        return self._db
+    def getProgram(self):
+        return self._program
+    def _blastcmd(self):
+        tmp = """%(program)s     \\
+                    -p %(mode)s  \\
+                    -d %(db)s    \\
+                    -m 8         \\
+                    %(options)s   \\
+              """
+        options = ' '.join(['-%s %s' % (x[0],str(x[1])) 
+                               for x in self._options.iteritems()])
+        data = {
+                 'program' : self.program,
+                 'db'      : self.db,
+                 'mode'    : self.mode,
+                 'options' : options
+               }
+        return tmp % data
+    def __call__(self,sequence):
+        '''
+        Run blast with one sequence object
+        @param sequence:
+        @type sequence:
+        '''
+        cmd = self._blastcmd()
+        (blast_in,blast_out) = popen2(cmd)
+        print >>blast_in,formatFasta(sequence)
+        blast_in.close()
+        blast  = BlastResultIterator(blast_out)
+        return blast
+    mode = property(getMode, None, None, "Mode's Docstring")
+    db = property(getDb, None, None, "Db's Docstring")
+    program = property(getProgram, None, None, "Program's Docstring")
+class NetBlast(Blast):
+    '''
+    Run blast on ncbi servers
+    '''
+    def __init__(self,mode,db,**options):
+        '''
+        @param mode:
+        @param db:
+        '''
+        Blast.__init__(self, mode, db, 'blastcl3',**options)
+class BlastResultIterator(iTableIterator):
+    def __init__(self,blastoutput,query=None):
+        '''
+        @param blastoutput:
+        @type blastoutput:
+        '''
+        self._blast = ColumnFile(blastoutput, 
+                                 strip=True, 
+                                 skip="#",
+                                 sep="\t",
+                                 types=self.types
+                                 )
+        self._query = query
+        self._hindex = dict((k,i) for i,k in imap(None,count(),self._getHeaders()))
+    def _getHeaders(self):
+        return ('Query id','Subject id',
+               '% identity','alignment length', 
+               'mismatches', 'gap openings', 
+               'q. start', 'q. end', 
+               's. start', 's. end', 
+               'e-value', 'bit score')
+    def _getTypes(self):
+        return (str,str,
+                float,int,
+                int,int,
+                int,int,
+                int,int,
+                float,float)
+    def _getRowFactory(self):
+        return BlastMatch
+    def _getSubrowFactory(self):
+        return TableRow 
+    def _getQuery(self):
+        return self._query
+    headers = property(_getHeaders,None,None)
+    types   = property(_getTypes,None,None)
+    rowFactory    = property(_getRowFactory,None,None)
+    subrowFactory = property(_getSubrowFactory,None,None)
+    query = property(_getQuery,None,None)
+    def next(self):
+        '''
+        '''
+        value = self._blast.next()
+        return self.rowFactory(self,value)
+class BlastResult(Table):
+    '''
+    Results of a blast run
+    '''
+class BlastMatch(TableRow):
+    '''
+    Blast high scoring pair between two sequences
+    '''
+    def getQueryLocation(self):
+        l = SimpleLocation(self[6], self[7])
+        return l
+    def getSubjectLocation(self):
+        l = SimpleLocation(self[8], self[9])
+        return l
+    def getSubjectSequence(self,database):
+        return database[self[1]]
+    def queryCov(self,query=None):
+        '''
+        Compute coverage of match on query sequence.
+        @param query: the query sequence. Default is None.
+                      In this case the query sequence associated
+                      to this blast result is used.
+        @type query: L{obitools.BioSequence}
+        @return: coverage fraction 
+        @rtype: float
+        '''
+        if query is None:
+            query = self.table.query
+        assert query is not None
+        return float(self[7]-self[6]+1)/float(len(query))
+    def __getitem__(self,key):
+        if key=='query coverage' and self.table.query is not None:
+            return self.queryCov()
+        else:
+            return TableRow.__getitem__(self,key)
+class BlastCovMinFilter(SelectionIterator):
+    def __init__(self,blastiterator,covmin,query=None,**conditions):
+        if query is None:
+            query = blastiterator.table.query
+        assert query is not None
+        SelectionIterator.__init__(self,blastiterator,**conditions)
+        self._query = query
+        self._covmin=covmin
+    def _covMinPredicat(self,row):
+        return row.queryCov(self._query)>=self._covmin
+    def _checkCondition(self,row):
+        return self._covMinPredicat(row) and SelectionIterator._checkCondition(self, row)
\ No newline at end of file
diff --git a/src/obitools/carto/__init__.py b/src/obitools/carto/__init__.py
new file mode 100644
index 0000000..b7ac176
--- /dev/null
+++ b/src/obitools/carto/__init__.py
@@ -0,0 +1,376 @@
+# -*- coding: latin1 -*-
+from obitools import SVGdraw
+import math
+class Map(object):
+    """
+        Map represente une instance d'une carte genetique physique.
+        Une telle carte est definie par la longueur de la sequence
+        qui lui est associe.
+        A une carte est associe un certain nombre de niveaux (Level)
+        eux meme decoupe en sous-niveau (SubLevel)
+        Les sous niveaux contiennent eux des features 
+    """
+    def __init__(self,name,seqlength,scale=1):
+        """
+            Constructeur d'une nouvelle carte 
+            *Param*:
+                name
+                    nom de la carte
+                seqlength
+                    longueur de la sequence associee a la carte
+                scale
+                    echelle de la carte indicant combien de pixel
+                    correspondent a une unite de la carte
+        """
+        self.name = name
+        self.seqlength = seqlength
+        self.scale = scale
+        self.levels = {}
+        self.basicHSize = 10
+    def __str__(self):
+        return '<%s>' % self.name
+    def __getitem__(self,level):
+        """
+            retourne le niveau *level* de la carte et
+            le cree s'il n'existe pas
+        """
+        if not isinstance(level,int):
+            raise TypeError('level must be an non Zero integer value')
+        elif level==0:
+            raise AssertionError('Level cannot be set to 0')
+        try:
+            return self.levels[level]
+        except KeyError:
+            self.levels[level] = Level(level,self)
+        return self.levels[level] 
+    def getBasicHSize(self):
+        """
+            retourne la hauteur de base d'un element de cartographie
+            exprimee en pixel
+        """
+        return self.basicHSize
+    def getScale(self):
+        """
+            Retourne l'echelle de la carte en nombre de pixels par
+            unite physique de la carte
+        """
+        return self.scale
+    def getNegativeBase(self):
+        return reduce(lambda x,y:x-y,[self.levels[z].getHeight() 
+                                          for z in self.levels
+                                          if z < 0],self.getHeight())
+    def getPositiveBase(self):
+        return self.getNegativeBase() - 3 * self.getBasicHSize()
+    def getHeight(self):
+        return reduce(lambda x,y:x+y,[z.getHeight() for z in self.levels.values()],0) \
+               + 4 * self.getBasicHSize()
+    def toXML(self,file=None,begin=0,end=None):
+        dessin = SVGdraw.drawing()
+        if end==None:
+            end = self.seqlength
+        hauteur= self.getHeight()
+        largeur=(end-begin+1)*self.scale
+        svg    = SVGdraw.svg((begin*self.scale,0,largeur,hauteur),
+                             '%fpx' % (self.seqlength * self.scale),
+                             '%dpx' % hauteur)
+        centre = self.getPositiveBase() + (1 + 1/4) * self.getBasicHSize()
+        svg.addElement(SVGdraw.rect(0,centre,self.seqlength * self.scale,self.getBasicHSize()/2))
+        for e in self.levels.values():
+            svg.addElement(e.getElement())
+        dessin.setSVG(svg)
+        return dessin.toXml(file)
+class Feature(object):
+    pass
+class Level(object):
+    def __init__(self,level,map):
+        if not isinstance(map,Map):
+            raise AssertionError('map is not an instance of class Map')
+        if level in map.levels:
+            raise AssertionError('Level %d already define for map %s' % (level,map))
+        else:
+            map.levels[level] = self
+        self.map = map
+        self.level = level
+        self.sublevels = {}
+    def __getitem__(self,sublevel):
+        """
+            retourne le niveau *sublevel* du niveau en
+            le creant s'il n'existe pas
+        """
+        if not isinstance(sublevel,int):
+            raise TypeError('sublevel must be a positive integer value')
+        elif sublevel<0:
+            raise AssertionError('Level cannot be negative')
+        try:
+            return self.sublevels[sublevel]
+        except KeyError:
+            self.sublevels[sublevel] = SubLevel(sublevel,self)
+        return self.sublevels[sublevel] 
+    def getBase(self):
+        if self.level < 0:
+            base = self.map.getNegativeBase()
+            base += reduce(lambda x,y:x+y,[self.map.levels[z].getHeight() 
+                                           for z in self.map.levels
+                                           if z <0 and z >= self.level],0)
+            return base
+        else:
+            base = self.map.getPositiveBase() 
+            base -= reduce(lambda x,y:x+y,[self.map.levels[z].getHeight() 
+                                           for z in self.map.levels
+                                           if z >0 and z < self.level],0)
+            return base
+    def getElement(self):
+        objet = SVGdraw.group('level%d' % self.level)
+        for e in self.sublevels.values():
+            objet.addElement(e.getElement())
+        return objet
+    def getHeight(self):
+        return reduce(lambda x,y:x+y,[z.getHeight() for z in self.sublevels.values()],0) \
+               + 2 * self.map.getBasicHSize()
+class SubLevel(object):
+    def __init__(self,sublevel,level):
+        if not isinstance(level,Level):
+            raise AssertionError('level is not an instance of class Level')
+        if level in level.sublevels:
+            raise AssertionError('Sublevel %d already define for level %s' % (sublevel,level))
+        else:
+            level.sublevels[sublevel] = self
+        self.level = level
+        self.sublevel = sublevel
+        self.features = {}
+    def getHeight(self):
+        return max([x.getHeight() for x in self.features.values()]+[0]) + 4 * self.level.map.getBasicHSize()
+    def getBase(self):
+        base = self.level.getBase()
+        if self.level.level < 0:
+            base -= self.level.getHeight() - 2 * self.level.map.getBasicHSize()
+            base += reduce(lambda x,y:x+y,[self.level.sublevels[z].getHeight() 
+                                           for z in self.level.sublevels
+                                           if z <= self.sublevel],0)
+            base -= 2* self.level.map.getBasicHSize()
+        else:
+            base -= reduce(lambda x,y:x+y,[self.level.sublevels[z].getHeight() 
+                                           for z in self.level.sublevels
+                                           if z < self.sublevel],0)
+            base -= self.level.map.getBasicHSize()
+        return base
+    def getElement(self):
+        base = self.getBase()
+        objet = SVGdraw.group('sublevel%d' % self.sublevel)
+        for e in self.features.values():
+            objet.addElement(e.getElement(base))
+        return objet
+    def add(self,feature):
+        if not isinstance(feature,Feature):
+            raise TypeError('feature must be an instance oof Feature')
+        if feature.name in self.features:
+            raise AssertionError('A feature with the same name (%s) have already be insert in this sublevel'
+                                 % feature.name)
+        self.features[feature.name]=feature
+        feature.sublevel=self
+class SimpleFeature(Feature):
+    def __init__(self,name,begin,end,visiblename=False,color=0):
+        self.begin    = begin
+        self.end      = end
+        self.name     = name
+        self.color    = color
+        self.sublevel = None 
+        self.visiblename=visiblename
+    def getHeight(self):
+        if not self.sublevel:
+            raise AssertionError('Not affected Simple feature')
+        if self.visiblename:
+            return self.sublevel.level.map.getBasicHSize() * 2
+        else:
+            return self.sublevel.level.map.getBasicHSize() 
+    def getElement(self,base):
+        scale = self.sublevel.level.map.getScale()
+        y     = base - self.sublevel.level.map.getBasicHSize()
+        x     = self.begin * scale
+        width = (self.end - self.begin + 1) * scale
+        heigh = self.sublevel.level.map.getBasicHSize()
+        objet = SVGdraw.rect(x,y,width,heigh,stroke=self.color)
+        objet.addElement(SVGdraw.description(self.name))
+        return objet
+class BoxFeature(SimpleFeature):
+    def getHeight(self):
+        if not self.sublevel:
+            raise AssertionError('Not affected Box feature')
+        if self.visiblename:
+            return self.sublevel.level.map.getBasicHSize() * 4
+        else:
+            return self.sublevel.level.map.getBasicHSize() * 3 
+    def getElement(self,base):
+        scale = self.sublevel.level.map.getScale()
+        y     = base - self.sublevel.level.map.getBasicHSize() * 2
+        x     = self.begin * scale
+        width = (self.end - self.begin + 1) * scale
+        height = self.sublevel.level.map.getBasicHSize() * 3
+        objet = SVGdraw.rect(x,y,width,height,stroke=self.color,fill="none")
+        objet.addElement(SVGdraw.description(self.name))
+        return objet
+class MultiPartFeature(Feature):
+    def __init__(self,name,*args,**kargs):
+        self.limits    = args
+        self.name     = name
+        try:
+            self.color    = kargs['color']
+        except KeyError:
+            self.color    = "black"
+        try:
+            self.visiblename=kargs['visiblename']
+        except KeyError:
+            self.visiblename=None
+        try:
+            self.flatlink=kargs['flatlink']
+        except KeyError:
+            self.flatlink=False
+        try:
+            self.roundlink=kargs['roundlink']
+        except KeyError:
+            self.roundlink=False
+        self.sublevel = None 
+    def getHeight(self):
+        if not self.sublevel:
+            raise AssertionError('Not affected Simple feature')
+        if self.visiblename:
+            return self.sublevel.level.map.getBasicHSize() * 3
+        else:
+            return self.sublevel.level.map.getBasicHSize() * 2
+    def getElement(self,base):
+        scale = self.sublevel.level.map.getScale()
+        y     = base - self.sublevel.level.map.getBasicHSize()
+        height = self.sublevel.level.map.getBasicHSize()
+        objet = SVGdraw.group(self.name)
+        for (debut,fin) in self.limits:
+            x     = debut * scale
+            width = (fin - debut + 1) * scale
+            part = SVGdraw.rect(x,y,width,height,fill=self.color)
+            objet.addElement(part)
+        debut = self.limits[0][1]
+        for (fin,next) in self.limits[1:]:
+            debut*=scale
+            fin*=scale
+            path = SVGdraw.pathdata(debut,y + height / 2)
+            delta = height / 2
+            if self.roundlink:
+                path.qbezier((debut+fin)/2, y - delta,fin,y + height / 2)
+            else:
+                if self.flatlink:
+                    delta = - height / 2
+                path.line((debut+fin)/2, y - delta)
+                path.line(fin,y + height / 2)
+            path = SVGdraw.path(path,fill="none",stroke=self.color)
+            objet.addElement(path)
+            debut = next
+        objet.addElement(SVGdraw.description(self.name))
+        return objet
+class TagFeature(Feature):
+    def __init__(self,name,begin,length,ratio,visiblename=False,color=0):
+        self.begin    = begin
+        self.length   = length
+        self.ratio    = ratio
+        self.name     = name
+        self.color    = color
+        self.sublevel = None 
+        self.visiblename=visiblename
+    def getHeight(self):
+        if not self.sublevel:
+            raise AssertionError('Not affected Tag feature')
+        return self.sublevel.level.map.getBasicHSize()*11
+    def getElement(self,base):
+        scale = self.sublevel.level.map.getScale()
+        height = math.floor(max(1,self.sublevel.level.map.getBasicHSize()* 10 * self.ratio))
+        y     = base + self.sublevel.level.map.getBasicHSize() - height
+        x     = self.begin * scale
+        width = self.length * scale
+        objet = SVGdraw.rect(x,y,width,height,stroke=self.color)
+        objet.addElement(SVGdraw.description(self.name))
+        return objet
+if __name__ == '__main__':
+    carte = Map('essai',20000,scale=0.5)
+    carte[-1][0].add(SimpleFeature('toto',100,300))
+    carte[1][0].add(SimpleFeature('toto',100,300))
+    carte[1][1].add(SimpleFeature('toto',200,1000))
+    carte[1][0].add(MultiPartFeature('bout',(1400,1450),(1470,1550),(1650,1800),color='red',flatlink=True))
+    carte[1][0].add(MultiPartFeature('titi',(400,450),(470,550),(650,800),color='red',flatlink=True))
+    carte[-1][1].add(MultiPartFeature('titi',(400,450),(470,550),(650,800),color='green'))
+    carte[-1][2].add(MultiPartFeature('titi',(400,450),(470,550),(650,800),color='purple',roundlink=True))
+    carte[-1][1].add(BoxFeature('tutu',390,810,color='purple'))
+    carte[1][0].add(BoxFeature('tutu',390,810,color='red'))
+    carte[2][0].add(TagFeature('t1',1400,20,0.8))
+    carte[2][0].add(TagFeature('t2',1600,20,0.2))
+    carte.basicHSize=6
+    print carte.toXML('truc.svg',begin=0,end=1000)
+    print carte.toXML('truc2.svg',begin=460,end=2000)
diff --git a/src/obitools/collections.py b/src/obitools/collections.py
new file mode 100644
index 0000000..96c4512
--- /dev/null
+++ b/src/obitools/collections.py
@@ -0,0 +1,190 @@
+from operator import itemgetter
+from heapq import nlargest
+from itertools import repeat, ifilter
+class Counter(dict):
+    '''Dict subclass for counting hashable objects.  Sometimes called a bag
+    or multiset.  Elements are stored as dictionary keys and their counts
+    are stored as dictionary values.
+    >>> Counter('zyzygy')
+    Counter({'y': 3, 'z': 2, 'g': 1})
+    '''
+    def __init__(self, iterable=None, **kwds):
+        '''Create a new, empty Counter object.  And if given, count elements
+        from an input iterable.  Or, initialize the count from another mapping
+        of elements to their counts.
+        >>> c = Counter()                           # a new, empty counter
+        >>> c = Counter('gallahad')                 # a new counter from an iterable
+        >>> c = Counter({'a': 4, 'b': 2})           # a new counter from a mapping
+        >>> c = Counter(a=4, b=2)                   # a new counter from keyword args
+        '''        
+        self.update(iterable, **kwds)
+    def __missing__(self, key):
+        return 0
+    def most_common(self, n=None):
+        '''List the n most common elements and their counts from the most
+        common to the least.  If n is None, then list all element counts.
+        >>> Counter('abracadabra').most_common(3)
+        [('a', 5), ('r', 2), ('b', 2)]
+        '''        
+        if n is None:
+            return sorted(self.iteritems(), key=itemgetter(1), reverse=True)
+        return nlargest(n, self.iteritems(), key=itemgetter(1))
+    def elements(self):
+        '''Iterator over elements repeating each as many times as its count.
+        >>> c = Counter('ABCABC')
+        >>> sorted(c.elements())
+        ['A', 'A', 'B', 'B', 'C', 'C']
+        If an element's count has been set to zero or is a negative number,
+        elements() will ignore it.
+        '''
+        for elem, count in self.iteritems():
+            for _ in repeat(None, count):
+                yield elem
+    # Override dict methods where the meaning changes for Counter objects.
+    @classmethod
+    def fromkeys(cls, iterable, v=None):
+        raise NotImplementedError(
+            'Counter.fromkeys() is undefined.  Use Counter(iterable) instead.')
+    def update(self, iterable=None, **kwds):
+        '''Like dict.update() but add counts instead of replacing them.
+        Source can be an iterable, a dictionary, or another Counter instance.
+        >>> c = Counter('which')
+        >>> c.update('witch')           # add elements from another iterable
+        >>> d = Counter('watch')
+        >>> c.update(d)                 # add elements from another counter
+        >>> c['h']                      # four 'h' in which, witch, and watch
+        4
+        '''        
+        if iterable is not None:
+            if hasattr(iterable, 'iteritems'):
+                if self:
+                    self_get = self.get
+                    for elem, count in iterable.iteritems():
+                        self[elem] = self_get(elem, 0) + count
+                else:
+                    dict.update(self, iterable) # fast path when counter is empty
+            else:
+                self_get = self.get
+                for elem in iterable:
+                    self[elem] = self_get(elem, 0) + 1
+        if kwds:
+            self.update(kwds)
+    def copy(self):
+        'Like dict.copy() but returns a Counter instance instead of a dict.'
+        return Counter(self)
+    def __delitem__(self, elem):
+        'Like dict.__delitem__() but does not raise KeyError for missing values.'
+        if elem in self:
+            dict.__delitem__(self, elem)
+    def __repr__(self):
+        if not self:
+            return '%s()' % self.__class__.__name__
+        items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
+        return '%s({%s})' % (self.__class__.__name__, items)
+    # Multiset-style mathematical operations discussed in:
+    #       Knuth TAOCP Volume II section 4.6.3 exercise 19
+    #       and at http://en.wikipedia.org/wiki/Multiset
+    #
+    # Outputs guaranteed to only include positive counts.
+    #
+    # To strip negative and zero counts, add-in an empty counter:
+    #       c += Counter()
+    def __add__(self, other):
+        '''Add counts from two counters.
+        >>> Counter('abbb') + Counter('bcc')
+        Counter({'b': 4, 'c': 2, 'a': 1})
+        '''
+        if not isinstance(other, Counter):
+            return NotImplemented
+        result = Counter()
+        for elem in set(self) | set(other):
+            newcount = self[elem] + other[elem]
+            if newcount > 0:
+                result[elem] = newcount
+        return result
+    def __sub__(self, other):
+        ''' Subtract count, but keep only results with positive counts.
+        >>> Counter('abbbc') - Counter('bccd')
+        Counter({'b': 2, 'a': 1})
+        '''
+        if not isinstance(other, Counter):
+            return NotImplemented
+        result = Counter()
+        for elem in set(self) | set(other):
+            newcount = self[elem] - other[elem]
+            if newcount > 0:
+                result[elem] = newcount
+        return result
+    def __or__(self, other):
+        '''Union is the maximum of value in either of the input counters.
+        >>> Counter('abbb') | Counter('bcc')
+        Counter({'b': 3, 'c': 2, 'a': 1})
+        '''
+        if not isinstance(other, Counter):
+            return NotImplemented
+        _max = max
+        result = Counter()
+        for elem in set(self) | set(other):
+            newcount = _max(self[elem], other[elem])
+            if newcount > 0:
+                result[elem] = newcount
+        return result
+    def __and__(self, other):
+        ''' Intersection is the minimum of corresponding counts.
+        >>> Counter('abbb') & Counter('bcc')
+        Counter({'b': 1})
+        '''
+        if not isinstance(other, Counter):
+            return NotImplemented
+        _min = min
+        result = Counter()
+        if len(self) < len(other):
+            self, other = other, self
+        for elem in ifilter(self.__contains__, other):
+            newcount = _min(self[elem], other[elem])
+            if newcount > 0:
+                result[elem] = newcount
+        return result
+if __name__ == '__main__':
+    import doctest
+    print doctest.testmod()
diff --git a/src/obitools/decorator.py b/src/obitools/decorator.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/obitools/distances/__init__.py b/src/obitools/distances/__init__.py
new file mode 100644
index 0000000..1542fa9
--- /dev/null
+++ b/src/obitools/distances/__init__.py
@@ -0,0 +1,29 @@
+class DistanceMatrix(object):
+    def __init__(self,alignment):
+        '''
+        DistanceMatrix constructor.
+            @param alignment: aligment used to compute distance matrix
+            @type alignment: obitools.align.Alignment
+        '''
+        self.aligment = alignment
+        self.matrix = [[None] * (x+1) for x in xrange(len(alignment))]
+    def evaluateDist(self,x,y):
+        raise NotImplementedError
+    def __getitem__(self,key):
+        assert isinstance(key,(tuple,list)) and len(key)==2, \
+               'key must be a tuple or a list of two integers'
+        x,y = key
+        if y < x:
+            z=x
+            x=y
+            y=z
+        rep = self.matrix[y][x]
+        if rep is None:
+            rep = self.evaluateDist(x,y)
+            self.matrix[y][x] = rep
+        return rep
\ No newline at end of file
diff --git a/src/obitools/distances/observed.py b/src/obitools/distances/observed.py
new file mode 100644
index 0000000..8828d92
--- /dev/null
+++ b/src/obitools/distances/observed.py
@@ -0,0 +1,77 @@
+Module dedicated to compute observed divergeances from
+an alignment. No distance correction is applied at all
+from itertools import imap
+from obitools.distances import DistanceMatrix
+class PairewiseGapRemoval(DistanceMatrix):
+    '''
+    Observed divergeance matrix from an alignment.
+    Gap are removed from the alignemt on a pairewise
+    sequence base
+    '''
+    def evaluateDist(self,x,y):
+        '''
+        Compute the observed divergeance from two sequences
+        of an aligment. 
+        @attention: For performance purpose this method should 
+                    be directly used. use instead the __getitem__
+                    method from DistanceMatrix.
+        @see: L{__getitem__}
+        @param x: number of the fisrt sequence in the aligment
+        @type x: int
+        @param y: umber of the second sequence in the aligment
+        @type y: int
+        '''
+        seq1 = self.aligment[x]
+        seq2 = self.aligment[y]
+        diff,tot = reduce(lambda x,y: (x[0]+y,x[1]+1),
+                          (z[0]!=z[1] for z in imap(None,seq1,seq2)
+                           if '-' not in z),(0,0))
+        return float(diff)/tot
+class Pairewise(DistanceMatrix):
+    '''
+    Observed divergeance matrix from an alignment.
+    Gap are kept from the alignemt
+    '''
+    def evaluateDist(self,x,y):
+        '''
+        Compute the observed divergeance from two sequences
+        of an aligment.
+        @attention: For performance purpose this method should 
+                    be directly used. use instead the __getitem__
+                    method from DistanceMatrix.
+        @see: L{__getitem__}
+        @param x: number of the fisrt sequence in the aligment
+        @type x: int
+        @param y: umber of the second sequence in the aligment
+        @type y: int
+        '''
+        seq1 = self.aligment[x]
+        seq2 = self.aligment[y]
+        diff,tot = reduce(lambda x,y: (x[0]+y,x[1]+1),
+                          (z[0]!=z[1] for z in imap(None,seq1,seq2)),
+                          (0,0))
+        return float(diff)/tot
\ No newline at end of file
diff --git a/src/obitools/distances/phylip.py b/src/obitools/distances/phylip.py
new file mode 100644
index 0000000..e2043fa
--- /dev/null
+++ b/src/obitools/distances/phylip.py
@@ -0,0 +1,35 @@
+import sys
+from itertools import imap,count
+def writePhylipMatrix(matrix):
+    names = [x.id for x in matrix.aligment]
+    pnames= [x[:10] for x in names]
+    unicity={}
+    redundent=[]
+    for n in pnames:
+        unicity[n]=unicity.get(n,0)+1
+        redundent.append(unicity[n])
+    for i,n,r in imap(None,count(),pnames,redundent):
+        alternate = n
+        if r > 1:
+            while alternate in pnames:
+                lcut = 9 - len(str(r)) 
+                alternate = n[:lcut]+ '_%d' % r
+                r+=1
+        pnames[i]='%-10s' % alternate
+    firstline = '%5d' % len(matrix.aligment)
+    rep = [firstline]
+    for i,n in imap(None,count(),pnames):
+        line = [n]
+        for j in xrange(i):
+            line.append('%5.4f' % matrix[(j,i)])
+        rep.append('  '.join(line))
+    return '\n'.join(rep)
\ No newline at end of file
diff --git a/src/obitools/distances/r.py b/src/obitools/distances/r.py
new file mode 100644
index 0000000..f674a4c
--- /dev/null
+++ b/src/obitools/distances/r.py
@@ -0,0 +1,25 @@
+import sys
+from itertools import imap,count
+def writeRMatrix(matrix):
+    names = [x.id for x in matrix.aligment]
+    lmax = max(max(len(x) for x in names),5)
+    lali = len(matrix.aligment)
+    nformat = '%%-%ds' % lmax
+    dformat = '%%%d.4f' % lmax
+    pnames=[nformat % x for x in names]
+    rep = ['  '.join(pnames)]
+    for i in xrange(lali):
+        line=[]
+        for j in xrange(lali):
+            line.append('%5.4f' % matrix[(j,i)])
+        rep.append('  '.join(line))
+    return '\n'.join(rep)
\ No newline at end of file
diff --git a/src/obitools/dnahash/__init__.py b/src/obitools/dnahash/__init__.py
new file mode 100644
index 0000000..ca02e35
--- /dev/null
+++ b/src/obitools/dnahash/__init__.py
@@ -0,0 +1,100 @@
+_R= _A + _G
+_Y= _C + _T
+_M= _C + _A
+_K= _T + _G
+_W= _T + _A
+_S= _C + _G
+_B= _C + _G + _T
+_D= _A + _G + _T
+_H= _A + _C + _T
+_V= _A + _C + _G
+_N= _A + _C + _G + _T
+          'c':_C,
+          'g':_G,
+          't':_T,
+          'r':_R,
+          'y':_Y,
+          'm':_M,
+          'k':_K,
+          'w':_W,
+          's':_S,
+          'b':_B,
+          'd':_D,
+          'h':_H,
+          'v':_V,
+          'n':_N,
+          }
+def hashCodeIterator(sequence,wsize,degeneratemax=0,offset=0):
+    errors   = 0
+    emask    = [0] * wsize
+    epointer = 0
+    size = 0
+    position = offset
+    hashs = set([0])
+    hashmask = 0
+    for i in xrange(wsize):
+        hashmask <<= 2
+        hashmask +=3
+    for l in sequence:
+        l = l.lower()
+        hl = _dnahash[l]
+        if emask[epointer]:
+            errors-=1
+            emask[epointer]=0
+        if len(hl) > 1:
+            errors +=1
+            emask[epointer]=1
+        epointer+=1
+        epointer%=wsize
+        if errors > degeneratemax:
+            hl=set([hl[0]])  
+        hashs=set((((hc<<2) | cl) & hashmask)
+                  for hc in hashs
+                  for cl in hl) 
+        if size < wsize:
+            size+=1
+        if size==wsize:
+            if errors <= degeneratemax:
+                yield (position,hashs,errors)
+            position+=1
+def hashSequence(sequence,wsize,degeneratemax=0,offset=0,hashs=None):       
+    if hashs is None:
+        hashs=[[] for x in xrange(4**wsize)]
+    for pos,keys,errors in hashCodeIterator(sequence, wsize, degeneratemax, offset):
+        for k in keys:
+            hashs[k].append(pos)
+    return hashs
+def hashSequences(sequences,wsize,maxpos,degeneratemax=0):       
+    hashs=None
+    offsets=[]
+    offset=0
+    for s in sequences:
+        offsets.append(offset)
+        hashSequence(s,wsize,degeneratemax=degeneratemax,offset=offset,hashs=hashs)
+        offset+=len(s)
+    return hashs,offsets
\ No newline at end of file
diff --git a/src/obitools/ecobarcode/__init__.py b/src/obitools/ecobarcode/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/obitools/ecobarcode/databases.py b/src/obitools/ecobarcode/databases.py
new file mode 100644
index 0000000..70d2319
--- /dev/null
+++ b/src/obitools/ecobarcode/databases.py
@@ -0,0 +1,32 @@
+Created on 25 sept. 2010
+ at author: coissac
+from obitools import NucSequence
+def referenceDBIterator(options):
+    cursor = options.ecobarcodedb.cursor()
+    cursor.execute("select id from databases.database where name='%s'" % options.database)
+    options.dbid = cursor.fetchone()[0]
+    cursor.execute('''
+                      select s.accession,r.id,r.taxid,r.sequence
+                      from databases.database    d,
+                           databases.reference   r,
+                           databases.relatedsequences s
+                      where r.database = d.id
+                        and s.reference= r.id
+                        and s.mainac
+                        and d.name = '%s'
+                   ''' % options.database
+                  )
+    for ac,id,taxid,sequence in cursor:
+        s = NucSequence(ac,sequence)
+        s['taxid']=taxid
+        s['refdbid']=id
+        yield s
\ No newline at end of file
diff --git a/src/obitools/ecobarcode/ecotag.py b/src/obitools/ecobarcode/ecotag.py
new file mode 100644
index 0000000..2ebd3fb
--- /dev/null
+++ b/src/obitools/ecobarcode/ecotag.py
@@ -0,0 +1,50 @@
+Created on 25 sept. 2010
+ at author: coissac
+def alreadyIdentified(seqid,options):
+    cursor = options.ecobarcodedb.cursor()    
+    cursor.execute('''
+                     select count(*) 
+                     from ecotag.identification 
+                     where sequence=%s
+                     and database=%s
+                   ''',(int(seqid),int(options.dbid)))
+    return int(cursor.fetchone()[0]) > 0;
+def storeIdentification(seqid,
+                        idstatus,taxid,
+                        matches,
+                        options
+                        ):
+    cursor = options.ecobarcodedb.cursor()    
+    if not options.updatedb:
+        cursor.execute('''
+                       delete from ecotag.identification where sequence=%s and database=%s
+                       ''',(int(seqid),int(options.dbid)))
+    cursor.execute('''
+                    insert into ecotag.identification (sequence,database,idstatus,taxid)
+                    values (%s,%s,%s,%s)
+                    returning id
+                   ''' , (int(seqid),int(options.dbid),idstatus,int(taxid)))
+    idid = cursor.fetchone()[0]
+    for seq,identity in matches.iteritems():
+        cursor.execute('''
+                        insert into ecotag.evidence (identification,reference,identity)
+                        values (%s,
+                                %s,
+                                %s)
+                       ''',(idid,seq,identity))
+    cursor.close()
+    options.ecobarcodedb.commit()   
diff --git a/src/obitools/ecobarcode/options.py b/src/obitools/ecobarcode/options.py
new file mode 100644
index 0000000..6086423
--- /dev/null
+++ b/src/obitools/ecobarcode/options.py
@@ -0,0 +1,64 @@
+Created on 23 sept. 2010
+ at author: coissac
+import psycopg2 
+from obitools.ecobarcode.taxonomy import EcoTaxonomyDB
+def addEcoBarcodeDBOption(optionManager):
+    optionManager.add_option('--dbname',
+                             action="store", dest="ecobarcodedb",
+                             type='str',
+                             default=None,
+                             help="Specify the name of the ecobarcode database")
+    optionManager.add_option('--server',
+                             action="store", dest="dbserver",
+                             type='str',
+                             default="localhost",
+                             help="Specify the adress of the ecobarcode database server")
+    optionManager.add_option('--user',
+                             action="store", dest="dbuser",
+                             type='str',
+                             default='postgres',
+                             help="Specify the user of the ecobarcode database")
+    optionManager.add_option('--port',
+                             action="store", dest="dbport",
+                             type='str',
+                             default=5432,
+                             help="Specify the port of the ecobarcode database")
+    optionManager.add_option('--passwd',
+                             action="store", dest="dbpasswd",
+                             type='str',
+                             default='',
+                             help="Specify the passwd of the ecobarcode database")
+    optionManager.add_option('--primer',
+                             action="store", dest="primer",
+                             type='str',
+                             default=None,
+                             help="Specify the primer used for amplification")
+def ecobarcodeDatabaseConnection(options):
+    if options.ecobarcodedb is not None:
+        connection = psycopg2.connect(database=options.ecobarcodedb, 
+                                      user=options.dbuser, 
+                                      password=options.dbpasswd,
+                                      host=options.dbserver,
+                                      port=options.dbport)
+        options.dbname=options.ecobarcodedb
+    else:
+        connection=None
+    if connection is not None:
+        options.ecobarcodedb=connection
+        taxonomy = EcoTaxonomyDB(connection)
+    else:
+        taxonomy=None
+    return taxonomy
diff --git a/src/obitools/ecobarcode/rawdata.py b/src/obitools/ecobarcode/rawdata.py
new file mode 100644
index 0000000..a5f58cf
--- /dev/null
+++ b/src/obitools/ecobarcode/rawdata.py
@@ -0,0 +1,38 @@
+Created on 25 sept. 2010
+ at author: coissac
+from obitools import NucSequence
+from obitools.utils import progressBar
+from obitools.ecobarcode.ecotag import alreadyIdentified
+import sys
+def sequenceIterator(options):
+    cursor = options.ecobarcodedb.cursor()
+    cursor.execute('''
+                      select s.id,sum(o.count),s.sequence
+                      from rawdata.sequence      s,
+                           rawdata.occurrences   o
+                      where o.sequence= s.id
+                        and s.primers = '%s'
+                      group by s.id,s.sequence
+                   ''' % options.primer
+                  )
+    nbseq = cursor.rowcount
+    progressBar(1, nbseq, True, head=options.dbname)
+    for id,count,sequence in cursor:
+        progressBar(cursor.rownumber+1, nbseq, head=options.dbname)
+        if not options.updatedb or not alreadyIdentified(id,options):
+            s = NucSequence(id,sequence)
+            s['count']=count
+            print >>sys.stderr,' +', cursor.rownumber+1,
+            yield s
+        else:
+            print >>sys.stderr,' @', cursor.rownumber+1,
+    print >>sys.stderr
diff --git a/src/obitools/ecobarcode/taxonomy.py b/src/obitools/ecobarcode/taxonomy.py
new file mode 100644
index 0000000..c7d0185
--- /dev/null
+++ b/src/obitools/ecobarcode/taxonomy.py
@@ -0,0 +1,120 @@
+Created on 24 sept. 2010
+ at author: coissac
+from obitools.ecopcr.taxonomy import  TaxonomyDump 
+from obitools.ecopcr.taxonomy import  Taxonomy
+import sys
+class EcoTaxonomyDB(TaxonomyDump) :
+    def __init__(self,dbconnect):
+        self._dbconnect=dbconnect
+        print >> sys.stderr,"Reading ecobarcode taxonomy database..."
+        self._readNodeTable()
+        print >> sys.stderr," ok"
+        print >>sys.stderr,"Adding scientific name..."
+        self._name=[]
+        for taxid,name,classname in self._nameIterator():
+            self._name.append((name,classname,self._index[taxid]))
+            if classname == 'scientific name':
+                self._taxonomy[self._index[taxid]].append(name)
+        print >>sys.stderr,"Adding taxid alias..."
+        for taxid,current in self._mergedNodeIterator():
+            self._index[taxid]=self._index[current]
+        print >>sys.stderr,"Adding deleted taxid..."
+        for taxid in self._deletedNodeIterator():
+            self._index[taxid]=None
+        Taxonomy.__init__(self)
+    #####
+    #
+    # Iterator functions
+    #
+    #####
+    def _readNodeTable(self):
+        cursor = self._dbconnect.cursor()
+        cursor.execute("""
+                            select     taxid,rank,parent
+                            from ncbitaxonomy.nodes
+                       """)
+        print >>sys.stderr,"Reading taxonomy nodes..."
+        taxonomy=[list(n) for n in cursor]
+        print >>sys.stderr,"List all taxonomy rank..."    
+        ranks =list(set(x[1] for x in taxonomy))
+        ranks.sort()
+        rankidx = dict(map(None,ranks,xrange(len(ranks))))
+        print >>sys.stderr,"Sorting taxons..."
+        taxonomy.sort(TaxonomyDump._taxonCmp)
+        self._taxonomy=taxonomy
+        print >>sys.stderr,"Indexing taxonomy..."
+        index = {}
+        for t in self._taxonomy:
+            index[t[0]]=self._bsearchTaxon(t[0])
+        print >>sys.stderr,"Indexing parent and rank..."
+        for t in self._taxonomy:
+            t[1]=rankidx[t[1]]
+            t[2]=index[t[2]]
+        self._ranks=ranks
+        self._index=index 
+        cursor.close()
+    def _nameIterator(self):
+        cursor = self._dbconnect.cursor()
+        cursor.execute("""
+                            select     taxid,name,nameclass
+                            from ncbitaxonomy.names
+                       """)
+        for taxid,name,nameclass in cursor:
+            yield taxid,name,nameclass
+        cursor.close()
+    def _mergedNodeIterator(self):
+        cursor = self._dbconnect.cursor()
+        cursor.execute("""
+                            select     oldtaxid,newtaxid
+                            from ncbitaxonomy.merged
+                       """)
+        for oldtaxid,newtaxid in cursor:
+                yield oldtaxid,newtaxid
+        cursor.close()
+    def _deletedNodeIterator(self):
+        cursor = self._dbconnect.cursor()
+        cursor.execute("""
+                            select  taxid
+                            from ncbitaxonomy.delnodes
+                       """)
+        for taxid in cursor:
+                yield taxid[0]
+        cursor.close()
diff --git a/src/obitools/ecopcr/__init__.py b/src/obitools/ecopcr/__init__.py
new file mode 100644
index 0000000..111c8ac
--- /dev/null
+++ b/src/obitools/ecopcr/__init__.py
@@ -0,0 +1,69 @@
+from obitools import utils
+from obitools import NucSequence
+from obitools.utils import universalOpen, universalTell, fileSize, progressBar
+import struct
+import sys
+class EcoPCRFile(utils.ColumnFile):
+    def __init__(self,stream):
+        utils.ColumnFile.__init__(self,
+                                  stream, ' | ', True, 
+                                  (str,int,int,
+                                   str,int,str,
+                                   int,str,int,
+                                   str,int,str,
+                                   str,str,int,float,
+                                   str,int,float,
+                                   int,
+                                   str,str), "#")
+    def next(self):
+        data = utils.ColumnFile.next(self)
+        seq = NucSequence(data[0],data[20],data[21])
+        seq['seq_length_ori']=data[1]
+        seq['taxid']=data[2]
+        seq['rank']=data[3]
+        seq['species']=data[4]
+        seq['species_name']=data[5]
+        seq['genus']=data[6]
+        seq['genus_name']=data[7]
+        seq['family']=data[8]
+        seq['family_name']=data[9]
+        seq['strand']=data[12]
+        seq['forward_match']=data[13]
+        seq['forward_error']=data[14]
+        seq['forward_tm']=data[15]
+        seq['reverse_match']=data[16]
+        seq['reverse_error']=data[17]
+        seq['reverse_tm']=data[18]
+        return seq
+class EcoPCRDBFile(object):
+    def _ecoRecordIterator(self,file,noError=False):
+        file = universalOpen(file,noError)
+        (recordCount,) = struct.unpack('> I',file.read(4))
+        self._recover=False
+        if recordCount:
+            for i in xrange(recordCount):
+                (recordSize,)=struct.unpack('>I',file.read(4))
+                record = file.read(recordSize)
+                yield record
+        else:
+            print >> sys.stderr,"\n\n  WARNING : EcoPCRDB reading set into recover data mode\n"
+            self._recover=True
+            ok=True
+            while(ok):
+                try:
+                    (recordSize,)=struct.unpack('>I',file.read(4))
+                    record = file.read(recordSize)
+                    yield record
+                except:
+                    ok=False
\ No newline at end of file
diff --git a/src/obitools/ecopcr/annotation.py b/src/obitools/ecopcr/annotation.py
new file mode 100644
index 0000000..7c76fb2
--- /dev/null
+++ b/src/obitools/ecopcr/annotation.py
@@ -0,0 +1,104 @@
+import struct
+class EcoPCRDBAnnotationWriter(object):
+    '''
+    Class used to write Annotation description in EcoPCRDB format.
+    EcoPCRDBAnnotationWriter is oftenly called through the EcoPCRDBSequenceWriter class
+    @see: L{ecopcr.sequence.EcoPCRDBSequenceWriter}
+    '''
+    def __init__(self,dbname,id,fileidx=1,type=('CDS'),definition=None):
+        '''
+        class constructor
+        @param dbname: name of ecoPCR database
+        @type dbname: C{str}
+        @param id: name of the qualifier used as feature id
+        @type id: C{str}
+        @param fileidx:
+        @type fileidx: C{int}
+        @param type:
+        @type type: C{list} or C{tuple}
+        @param definition: 
+        @type definition: C{str}
+        '''
+        self._type = type
+        self._definition = definition
+        self._id = id
+        self._filename="%s_%03d.adx" % (dbname,fileidx)
+        self._file = open(self._filename,'wb')
+        self._sequenceIdx=0
+        ftname  ="%s.fdx" % (dbname)
+        ft = open(ftname,'wb')
+        self._fttypeidx=dict(map(None,type,xrange(len(type))))
+        ft.write(struct.pack('> I',len(type)))
+        for t in type:
+            ft.write(self._ecoFtTypePacker(t))
+        ft.close()
+        self._annotationCount=0
+        self._file.write(struct.pack('> I',self._annotationCount))
+    def _ecoFtTypePacker(self,type):
+        totalSize = len(type)
+        packed = struct.pack('> I %ds' % totalSize,totalSize,type)
+        assert len(packed) == totalSize+4, "error in feature type packing"
+        return packed
+    def _ecoAnnotationPacker(self,feature,seqidx):
+        begin  = feature.begin-1
+        end    = feature.end
+        type   = self._fttypeidx[feature.ftType]
+        strand = feature.isDirect()
+        id     = feature[self._id][0]
+        if self._definition in feature:
+            definition = feature[self._definition][0]
+        else:
+            definition = ''
+        assert strand is not None,"Only strand defined features can be stored"
+        deflength = len(definition)
+        totalSize = 4 + 4 + 4 + 4 + 4 + 20 + 4 + deflength
+        packed = struct.pack('> I I I I I 20s I %ds' % (deflength),
+                             totalSize,
+                             seqidx,
+                             begin,
+                             end,
+                             type,
+                             int(strand),
+                             id,
+                             deflength,
+                             definition)
+        assert len(packed) == totalSize+4, "error in annotation packing"
+        return packed
+    def put(self,sequence,seqidx=None):
+        if seqidx is None:
+            seqidx = self._sequenceIdx
+            self._sequenceIdx+=1
+        for feature in sequence.getFeatureTable():
+            if feature.ftType in self._type:
+                self._annotationCount+=1
+                self._file.write(self._ecoAnnotationPacker(feature,seqidx))
+    def __del__(self):
+        self._file.seek(0,0)
+        self._file.write(struct.pack('> I',self._annotationCount))
+        self._file.close()
diff --git a/src/obitools/ecopcr/options.py b/src/obitools/ecopcr/options.py
new file mode 100644
index 0000000..d9329ac
--- /dev/null
+++ b/src/obitools/ecopcr/options.py
@@ -0,0 +1,140 @@
+Created on 13 fevr. 2011
+ at author: coissac
+from obitools.ecopcr.taxonomy import Taxonomy, EcoTaxonomyDB, TaxonomyDump, ecoTaxonomyWriter
+#    from obitools.ecobarcode.options import addEcoBarcodeDBOption,ecobarcodeDatabaseConnection
+#except ImportError:
+#    def addEcoBarcodeDBOption(optionmanager):
+#        pass
+#    def ecobarcodeDatabaseConnection(options):
+#        return None
+def addTaxonomyDBOptions(optionManager):
+    # addEcoBarcodeDBOption(optionManager)
+    group = optionManager.add_option_group('Taxonomy loading options')
+    group.add_option('-d','--database',
+                     action="store", dest="taxonomy",
+                     metavar="<FILENAME>",
+                     type="string",
+                     help="ecoPCR taxonomy Database "
+                                  "name")
+    group.add_option('-t','--taxonomy-dump',
+                     action="store", dest="taxdump",
+                     metavar="<FILENAME>",
+                     type="string",
+                     help="NCBI Taxonomy dump repository "
+                          "name")
+def addTaxonomyFilterOptions(optionManager):
+    addTaxonomyDBOptions(optionManager)
+    group = optionManager.add_option_group('Taxonomy-related filtering options')
+    group.add_option('--require-rank',
+                     action="append", 
+                     dest='requiredRank',
+                     metavar="<RANK_NAME>",
+                     type="string",
+                     default=[],
+                     help="select sequence with taxid tag containing "
+                          "a parent of rank <RANK_NAME>")
+    group.add_option('-r','--required',
+                     action="append", 
+                     dest='required',
+                     metavar="<TAXID>",
+                     type="int",
+                     default=[],
+                     help="Select the sequences having "
+                          "the ancestor of taxid <TAXID>. If several ancestors are specified "
+                          "(with \n'-r taxid1 -r taxid2'), the sequences "
+                          "having at least one of them are selected")
+    group.add_option('-i','--ignore',
+                     action="append", 
+                     dest='ignored',
+                     metavar="<TAXID>",
+                     type="int",
+                     default=[],
+                     help="ignored taxid")
+def loadTaxonomyDatabase(options):
+    assert hasattr(options, 'taxonomy'), 'No options to load Taxonomy available'
+    if isinstance(options.taxonomy, Taxonomy):
+        return options.taxonomy
+    taxonomy = None
+    if (options.taxonomy is not None or 
+        options.taxdump is not None):
+        if options.taxdump is not None:
+            taxonomy = TaxonomyDump(options.taxdump)
+        if taxonomy is not None and isinstance(options.taxonomy, str):
+            ecoTaxonomyWriter(options.taxonomy,taxonomy)
+            options.ecodb=options.taxonomy
+        if isinstance(options.taxonomy, Taxonomy):
+            taxonomy = options.taxonomy
+        if taxonomy is None and isinstance(options.taxonomy, str):
+            import sys
+            taxonomy = EcoTaxonomyDB(options.taxonomy)
+            options.ecodb=options.taxonomy
+        options.taxonomy=taxonomy
+    return options.taxonomy
+def taxonomyFilterGenerator(options):
+    loadTaxonomyDatabase(options)
+    if options.taxonomy is not None:
+        taxonomy=options.taxonomy
+        def taxonomyFilter(seq):
+            def annotateAtRank(seq,rank):
+                if 'taxid' in seq and seq['taxid'] is not None:
+                    rtaxid= taxonomy.getTaxonAtRank(seq['taxid'],rank)
+                    return rtaxid
+                return None
+            good = True
+            if 'taxid' in seq:
+                taxid = seq['taxid']
+#                print taxid,
+                if options.requiredRank:
+                    taxonatrank = reduce(lambda x,y: x and y,
+                                         (annotateAtRank(seq,rank) is not None
+                                            for rank in options.requiredRank),True)
+                    good = good and taxonatrank 
+#                    print >>sys.stderr, " Has rank : ",good,
+                if options.required:
+                    good = good and reduce(lambda x,y: x or y,
+                                  (taxonomy.isAncestor(r,taxid) for r in options.required),
+                                  False)
+#                    print " Required : ",good,
+                if options.ignored:
+                    good = good and not reduce(lambda x,y: x or y,
+                                  (taxonomy.isAncestor(r,taxid) for r in options.ignored),
+                                  False)
+#                    print " Ignored : ",good,
+#                print " Global : ",good
+            return good
+    else:
+        def taxonomyFilter(seq):
+            return True
+    return taxonomyFilter
+def taxonomyFilterIteratorGenerator(options):
+    taxonomyFilter = taxonomyFilterGenerator(options)
+    def filterIterator(seqiterator):
+        for seq in seqiterator:
+            if taxonomyFilter(seq):
+                yield seq
+    return filterIterator
\ No newline at end of file
diff --git a/src/obitools/ecopcr/sequence.py b/src/obitools/ecopcr/sequence.py
new file mode 100644
index 0000000..e1a5627
--- /dev/null
+++ b/src/obitools/ecopcr/sequence.py
@@ -0,0 +1,183 @@
+from obitools import NucSequence
+from obitools.ecopcr import EcoPCRDBFile
+from obitools.ecopcr.taxonomy import EcoTaxonomyDB, ecoTaxonomyWriter
+from obitools.ecopcr.options  import loadTaxonomyDatabase
+from obitools.ecopcr.annotation import EcoPCRDBAnnotationWriter
+from obitools.utils import universalOpen
+from glob import glob
+import struct
+import gzip
+import sys
+import re
+class EcoPCRDBSequenceIterator(EcoPCRDBFile):
+    '''
+    Build an iterator over the sequences include in a sequence database
+    formated for ecoPCR
+    '''
+    def __init__(self,path,taxonomy=None):
+        '''
+        ecoPCR data iterator constructor
+        @param path: path to the ecoPCR database including the database prefix name
+        @type path: C{str}
+        @param taxonomy: a taxonomy can be given to the reader to decode the taxonomic data
+                         associated to the sequences. If no Taxonomy is furnish, it will be read 
+                         before the sequence database files using the same path.
+        @type taxonomy: L{obitools.ecopcr.taxonomy.Taxonomy}
+        '''
+        self._path = path
+        if taxonomy is not None:
+            self._taxonomy=taxonomy
+        else:
+            self._taxonomy=EcoTaxonomyDB(path)
+        self._seqfilesFiles =  glob('%s_???.sdx' % self._path)
+        self._seqfilesFiles.sort()
+    def __ecoSequenceIterator(self,file):
+        for record in self._ecoRecordIterator(file):
+            lrecord = len(record)
+            lnames  = lrecord - (4*4+20)
+            (taxid,seqid,deflength,seqlength,cptseqlength,string)=struct.unpack('> I 20s I I I %ds' % lnames, record)  # @UnusedVariable
+            seqid=seqid.strip('\x00')
+            de = string[:deflength]
+            seq = gzip.zlib.decompress(string[deflength:])
+            bioseq = NucSequence(seqid,seq,de,taxid=self._taxonomy._taxonomy[taxid][0])
+            yield  bioseq
+    def __iter__(self):
+        for seqfile in self._seqfilesFiles:
+            for seq in self.__ecoSequenceIterator(seqfile):
+                yield seq
+    @property
+    def taxonomy(self):
+        """Return the taxonomy associated to the ecoPCRDB reader"""
+        return self._taxonomy
+class EcoPCRDBSequenceWriter(object):
+    def __init__(self,options,fileidx=None,ftid=None,type=None,definition=None,append=False):
+        from obitools.options import currentInputFileName
+        self.currentInputFileName=currentInputFileName
+        # Take care of the taxonomy associated to the database
+        self._currentfile=None
+        self._taxonomy= loadTaxonomyDatabase(options)
+        dbname = options.ecopcroutput
+        if (self._taxonomy is not None
+            and (not hasattr(options,'ecodb') or options.ecodb!=dbname)):
+            print >> sys.stderr,"Writing the taxonomy file...",
+            ecoTaxonomyWriter(dbname,self._taxonomy)
+            print >> sys.stderr,"Ok"
+        # Identifiy the next sequence file number 
+        if fileidx is None:
+            p = re.compile(r'([0-9]{3})\.sdx')
+            fileidx = max(list(int(p.search(i).group(1)) 
+                               for i in glob('%s_[0-9][0-9][0-9].sdx' % dbname))+[0]
+                          ) +1
+        self._fileidx=fileidx
+        self._dbname=dbname
+        self._filename="%s_%03d.sdx" % (dbname,fileidx)
+        if append:
+            f = universalOpen(self._filename)
+            (recordCount,) = struct.unpack('> I',f.read(4))
+            self._sequenceCount=recordCount
+            self._sequenceFileCount=recordCount
+            del f
+            self.open('r+b')
+            self._file.seek(0,2)
+        else:
+            self._sequenceCount=0
+            self._sequenceFileCount=0
+            self.open("wb")
+        if type is not None:
+            assert ftid is not None,"You must specify an id attribute for features"
+            self._annotation = EcoPCRDBAnnotationWriter(dbname, ftid, fileidx, type, definition)
+        else: 
+            self._annotation = None
+    def _ecoSeqPacker(self,seq):
+        compactseq = gzip.zlib.compress(str(seq).upper(),9)
+        cptseqlength  = len(compactseq)
+        delength   = len(seq.definition)
+        totalSize = 4 + 20 + 4 + 4 + 4 + cptseqlength + delength
+        if self._taxonomy is None or 'taxid' not in seq :
+            taxon=-1
+        else:
+            taxon=self._taxonomy.findIndex(seq['taxid'])
+        if taxon==-1:
+            raise Exception("Taxonomy error for %s: %s"%(seq.id, "taxonomy is missing" if self._taxonomy is None else "sequence has no taxid" if 'taxid' not in seq else "wrong taxid"))
+        try:
+            packed = struct.pack('> I i 20s I I I %ds %ds' % (delength,cptseqlength),
+                                 totalSize,
+                                 taxon,
+                                 seq.id,
+                                 delength,
+                                 len(seq),
+                                 cptseqlength,
+                                 seq.definition,
+                                 compactseq)
+        except struct.error as e:
+            print >>sys.stderr,"\n\n============\n\nError on sequence : %s\n\n" % seq.id
+            raise e
+        assert len(packed) == totalSize+4, "error in sequence packing"
+        return packed
+    def close(self):
+        self._file.seek(0,0)
+        self._file.write(struct.pack('> I',self._sequenceFileCount))
+        self._file.close()
+    def open(self,mode):
+            self._filename="%s_%03d.sdx" % (self._dbname,self._fileidx)
+            self._file=open(self._filename,mode)
+            self._sequenceFileCount=0
+            self._file.write(struct.pack('> I',self._sequenceFileCount))
+    def put(self,sequence):
+        if self._currentfile is None:
+            self._currentfile=self.currentInputFileName()
+        if self.currentInputFileName() != self._currentfile:
+            self._currentfile=self.currentInputFileName()
+            self.close()
+            self._fileidx+=1
+            self.open('wb')
+        if self._taxonomy is not None:
+            if 'taxid' not in sequence and hasattr(sequence, 'extractTaxon'):
+                sequence.extractTaxon()
+        self._file.write(self._ecoSeqPacker(sequence))
+        if self._annotation is not None:
+            self._annotation.put(sequence, self._sequenceCount)
+        self._sequenceCount+=1
+        self._sequenceFileCount+=1
+    def __del__(self):
+        self.close()
diff --git a/src/obitools/ecopcr/taxonomy.py b/src/obitools/ecopcr/taxonomy.py
new file mode 100644
index 0000000..b9af63e
--- /dev/null
+++ b/src/obitools/ecopcr/taxonomy.py
@@ -0,0 +1,704 @@
+import struct
+import sys
+from itertools import count,imap,combinations
+from obitools.ecopcr import EcoPCRDBFile
+from obitools.utils import universalOpen
+from obitools.utils import ColumnFile
+import math
+    from collections import Counter
+except ImportError:
+    from obitools.collections import Counter
+class Taxonomy(object):
+    def __init__(self):
+        '''
+        The taxonomy database constructor
+        @param path: path to the ecoPCR database including the database prefix name
+        @type path: C{str}
+        '''
+        self._ranks.append('obi')
+        self._speciesidx = self._ranks.index('species')
+        self._genusidx   = self._ranks.index('genus')
+        self._familyidx   = self._ranks.index('family')
+        self._orderidx   = self._ranks.index('order')
+        self._nameidx = {}
+        for x in self._name :
+            if x[0] not in self._nameidx :
+                self._nameidx[x[0]] = [x[2]]
+            else :
+                self._nameidx[x[0]].append(x[2])
+        self._nameidx.update(dict((x[0],x[2]) for x in self._preferedName))
+        self._preferedidx=dict((x[2],x[1]) for x in self._preferedName)
+        self._bigestTaxid = max(x[0] for x in self._taxonomy)
+    def findTaxonByIdx(self,idx):
+        if idx is None:
+            return None
+        return self._taxonomy[idx]
+    def findIndex(self,taxid):
+        if taxid is None:
+            return None
+        return self._index[taxid]
+    def findTaxonByTaxid(self,taxid):
+        return self.findTaxonByIdx(self.findIndex(taxid))
+    def findTaxonByName(self,name):
+        taxa = []
+        for i in self._nameidx[name] :
+            taxa.append(self._taxonomy[i])
+        return taxa
+    def findRankByName(self,rank):
+        try:
+            return self._ranks.index(rank)
+        except ValueError:
+            return None
+    def __contains__(self,taxid):
+        try:
+            return self.findTaxonByTaxid(taxid) is not None
+        finally:
+            return False
+    #####
+    #
+    #
+    #####
+    def subTreeIterator(self, taxid):
+        "return subtree for given taxonomic id "
+        for t in xrange(len(self._taxonomy)) :
+            if self._taxonomy[t][0] == taxid :
+                idx = t
+        yield self._taxonomy[idx]
+        for t in self._taxonomy:
+            if t[2] == idx:
+                for subt in self.subTreeIterator(t[0]):
+                    yield subt
+    def parentalTreeIterator(self, taxid):
+        """
+           return parental tree for given taxonomic id starting from
+           first ancestor to the root.
+        """
+        taxon=self.findTaxonByTaxid(taxid)
+        if taxon is not None:
+            while taxon[2]!= 0: 
+                yield taxon
+                taxon = self._taxonomy[taxon[2]]
+            yield self._taxonomy[0]
+        else:
+            raise StopIteration
+    def isAncestor(self,parent,taxid):
+        return parent in [x[0] for x in self.parentalTreeIterator(taxid)]
+    def lastCommonTaxon(self,*taxids):
+        if not taxids:
+            return None
+        if len(taxids)==1:
+            return taxids[0]
+        if len(taxids)==2:
+            t1 = [x[0] for x in self.parentalTreeIterator(taxids[0])]
+            t2 = [x[0] for x in self.parentalTreeIterator(taxids[1])]
+            t1.reverse()
+            t2.reverse()
+            count = min(len(t1),len(t2))
+            i=0
+            while(i < count and t1[i]==t2[i]):
+                i+=1
+            i-=1
+            return t1[i]
+        ancetre = taxids[0]
+        for taxon in taxids[1:]:
+            ancetre = self.lastCommonTaxon(ancetre,taxon)
+        return ancetre
+    def depth(self,taxid):
+        return len([x for x in self.parentalTreeIterator(taxid)])
+    def betterCommonTaxon(self,error=0.2,*taxids): 
+        def permanentIterator(x):
+            for i in x:
+                yield i
+            while(1):
+                yield None
+        taxids = set(taxids)
+        if len(taxids)==1: return taxids.pop()
+        allLineage = [[x[0] for x in self.parentalTreeIterator(y)]
+                      for y in taxids]
+        for x in allLineage: x.reverse()
+        allLineage=[permanentIterator(x) for x in allLineage]
+        c=True
+        while(c):
+            lcas = Counter([x.next() for x in allLineage])
+            #print lcas
+            if len(lcas) > 1:
+                main = float(max(lcas.values()))/len(taxids)
+                c = main > (1 - error)
+            if c:
+                lca = lcas.most_common(1)[0][0]
+        #print lca
+        return lca
+    def getPreferedName(self,taxid):
+        idx = self.findIndex(taxid)
+        return self._preferedidx.get(idx,self._taxonomy[idx][3])
+    def getScientificName(self,taxid):
+        return self.findTaxonByTaxid(taxid)[3]
+    def getRankId(self,taxid):
+        return self.findTaxonByTaxid(taxid)[1]
+    def getRank(self,taxid):
+        return self._ranks[self.getRankId(taxid)]
+    def getTaxonAtRank(self,taxid,rankid):
+        if isinstance(rankid, str):
+            rankid=self._ranks.index(rankid)
+        try:
+            return [x[0] for x in self.parentalTreeIterator(taxid)
+                    if x[1]==rankid][0]
+        except IndexError:
+            return None
+    def getSpecies(self,taxid):
+        return self.getTaxonAtRank(taxid, self._speciesidx)
+    def getGenus(self,taxid):
+        return self.getTaxonAtRank(taxid, self._genusidx)
+    def getFamily(self,taxid):
+        return self.getTaxonAtRank(taxid, self._familyidx)
+    def getOrder(self,taxid):
+        return self.getTaxonAtRank(taxid, self._orderidx)
+    def rankIterator(self):
+        for x in imap(None,self._ranks,xrange(len(self._ranks))):
+            yield x
+    def groupTaxa(self,taxa,groupname):
+        t=[self.findTaxonByTaxid(x) for x in taxa]
+        a=set(x[2] for x in t)
+        assert len(a)==1,"All taxa must have the same parent"
+        newtaxid=max([2999999]+[x[0] for x in self._taxonomy if x[0]>=3000000 and x[0]<4000000])+1
+        newidx=len(self._taxonomy)
+        if 'GROUP' not in self._ranks:
+            self._ranks.append('GROUP')
+        rankid=self._ranks.index('GROUP')
+        self._taxonomy.append((newtaxid,rankid,a.pop(),groupname))
+        for x in t:
+            x[2]=newidx
+    def addLocalTaxon(self,name,rank,parent,minimaltaxid=10000000):
+        newtaxid = minimaltaxid if (self._bigestTaxid < minimaltaxid) else self._bigestTaxid+1
+        rankid=self.findRankByName(rank)
+        parentidx = self.findIndex(int(parent))
+        tx = (newtaxid,rankid,parentidx,name,'local')
+        self._taxonomy.append(tx)
+        newidx=len(self._taxonomy)-1
+        self._name.append((name,'scientific name',newidx))
+        if name not in self._nameidx :
+            self._nameidx[name]=[newidx]
+        else :
+            self._nameidx[name].append(newidx)
+        self._index[newtaxid]=newidx
+        self._bigestTaxid=newtaxid
+        return newtaxid
+    def removeLocalTaxon(self,taxid):
+        raise NotImplemented
+        txidx = self.findIndex(taxid)
+        taxon = self.findTaxonByIdx(txidx)
+        assert txidx >= self._localtaxon,"Only local taxon can be deleted"
+        for t in self._taxonomy:
+            if t[2] == txidx:
+                self.removeLocalTaxon(t[0])
+        return taxon
+    def addPreferedName(self,taxid,name):
+        idx = self.findIndex(taxid)
+        self._preferedName.append([name,'obi',idx])
+        self._preferedidx[idx]=name
+        return taxid
+class EcoTaxonomyDB(Taxonomy,EcoPCRDBFile):
+    '''
+    A taxonomy database class
+    '''
+    def __init__(self,path):
+        '''
+        The taxonomy database constructor
+        @param path: path to the ecoPCR database including the database prefix name
+        @type path: C{str}
+        '''
+        self._path = path
+        self._taxonFile =  "%s.tdx" % self._path
+        self._localTaxonFile =  "%s.ldx" % self._path
+        self._ranksFile =  "%s.rdx" % self._path
+        self._namesFile =  "%s.ndx" % self._path
+        self._preferedNamesFile =  "%s.pdx" % self._path
+        self._aliasFile =  "%s.adx" % self._path
+        print >> sys.stderr,"Reading binary taxonomy database..."
+        self.__readNodeTable()
+        print >> sys.stderr," ok"
+        Taxonomy.__init__(self)
+    #####
+    #
+    # Iterator functions
+    #
+    #####
+    def __ecoNameIterator(self,file):
+        for record in self._ecoRecordIterator(file):
+            lrecord = len(record)
+            lnames  = lrecord - 16
+            (isScientificName,namelength,classLength,indextaxid,names)=struct.unpack('> I I I I %ds' % lnames, record)
+            name=names[:namelength]
+            classname=names[namelength:]
+            yield (name,classname,indextaxid)
+    def __ecoTaxonomicIterator(self):
+        for record in self._ecoRecordIterator(self._taxonFile):
+            lrecord = len(record)
+            lnames  = lrecord - 16
+            (taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record)
+            yield  (taxid,rankid,parentidx,name,'ncbi')
+        try :
+            lt=0
+            for record in self._ecoRecordIterator(self._localTaxonFile,noError=True):
+                lrecord = len(record)
+                lnames  = lrecord - 16
+                (taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record)  # @UnusedVariable
+                lt+=1
+                yield  (taxid,rankid,parentidx,name,'local')
+            print >> sys.stderr, " [INFO : Local taxon file found] : %d added taxa" % lt
+        except:
+            print >> sys.stderr, " [INFO : Local taxon file not found] "
+    def __ecoRankIterator(self):
+        for record in self._ecoRecordIterator(self._ranksFile):
+            yield  record
+    def __ecoAliasIterator(self):
+        for record in self._ecoRecordIterator(self._aliasFile,noError=True):
+            (taxid,index) = struct.unpack('> I i',record)
+            yield taxid,index
+    #####
+    #
+    # Indexes
+    #
+    #####
+    def __ecoNameIndex(self):
+        indexName = [x for x in self.__ecoNameIterator(self._namesFile)]
+        return indexName
+    def __ecoRankIndex(self):
+        rank = [r for r in self.__ecoRankIterator()]
+        return rank
+    def __ecoTaxonomyIndex(self):
+        taxonomy = []
+        try :
+            index = dict(self.__ecoAliasIterator())
+            print >> sys.stderr, " [INFO : Taxon alias file found] "
+            buildIndex=False
+        except:
+            print >> sys.stderr, " [INFO : Taxon alias file not found] "
+            index={}
+            buildIndex=True
+        localtaxon=0
+        i=0
+        for x in self.__ecoTaxonomicIterator():
+            taxonomy.append(x)
+            if x[4]!='local':
+                localtaxon+=1
+            if buildIndex or x[4]=='local':
+                index[x[0]] = i 
+            i+=1
+        print  >> sys.stderr,"Taxonomical tree read"
+        return taxonomy, index, localtaxon
+    def __readNodeTable(self):
+        self._taxonomy, self._index, self._localtaxon= self.__ecoTaxonomyIndex()
+        self._ranks = self.__ecoRankIndex()
+        self._name  = self.__ecoNameIndex()
+        # Add local taxon tame to the name index
+        i=self._localtaxon
+        for t in self._taxonomy[self._localtaxon:]:
+            self._name.append((t[3],'scientific name',i))
+            i+=1
+        try :
+            self._preferedName = [(x[0],'obi',x[2]) 
+                                  for x in self.__ecoNameIterator(self._preferedNamesFile)]
+            print >> sys.stderr, " [INFO : Preferred taxon name file found] : %d added taxa" % len(self._preferedName)
+        except:
+            print >> sys.stderr, " [INFO : Preferred taxon name file not found]"
+            self._preferedName = []
+class TaxonomyDump(Taxonomy):  
+    def __init__(self,taxdir):
+        self._path=taxdir
+        self._readNodeTable('%s/nodes.dmp' % taxdir)
+        print >>sys.stderr,"Adding scientific name..."
+        self._name=[]
+        for taxid,name,classname in self._nameIterator('%s/names.dmp' % taxdir):
+            self._name.append((name,classname,self._index[taxid]))
+            if classname == 'scientific name':
+                self._taxonomy[self._index[taxid]].extend([name,'ncbi'])
+        print >>sys.stderr,"Adding taxid alias..."
+        for taxid,current in self._mergedNodeIterator('%s/merged.dmp' % taxdir):
+            self._index[taxid]=self._index[current]
+        print >>sys.stderr,"Adding deleted taxid..."
+        for taxid in self._deletedNodeIterator('%s/delnodes.dmp' % taxdir):
+            self._index[taxid]=None
+        Taxonomy.__init__(self)
+#         self._nameidx = {}
+#         for x in self._name :
+#             if x[0] not in self._nameidx :
+#                 self._nameidx[x[0]] = [x[2]]
+#             else :
+#                 self._nameidx[x[0]].append(x[2])
+    def _taxonCmp(t1,t2):
+        if t1[0] < t2[0]:
+            return -1
+        elif t1[0] > t2[0]:
+            return +1
+        return 0
+    _taxonCmp=staticmethod(_taxonCmp)
+    def _bsearchTaxon(self,taxid):
+        taxCount = len(self._taxonomy)
+        begin = 0
+        end   = taxCount 
+        oldcheck=taxCount
+        check = begin + end / 2
+        while check != oldcheck and self._taxonomy[check][0]!=taxid :
+            if self._taxonomy[check][0] < taxid:
+                begin=check
+            else:
+                end=check
+            oldcheck=check
+            check = (begin + end) / 2
+        if self._taxonomy[check][0]==taxid:
+            return check
+        else:
+            return None
+    def _readNodeTable(self,file):
+        file = universalOpen(file)
+        nodes = ColumnFile(file, 
+                           sep='|', 
+                           types=(int,int,str,
+                                  str,str,bool,
+                                  int,bool,int,
+                                  bool,bool,bool,str))
+        print >>sys.stderr,"Reading taxonomy dump file..."
+            # (taxid,rank,parent)
+        taxonomy=[[n[0],n[2],n[1]] for n in nodes]
+        print >>sys.stderr,"List all taxonomy rank..."    
+        ranks =list(set(x[1] for x in taxonomy))
+        ranks.sort()
+        rankidx = dict(map(None,ranks,xrange(len(ranks))))
+        # EC: Taxa are sorted by taxid in node.dmp file
+        # No need to sort them
+        #print >>sys.stderr,"Sorting taxons..."
+        #taxonomy.sort(TaxonomyDump._taxonCmp)
+        self._taxonomy=taxonomy
+        self._localtaxon=len(taxonomy)
+        print >>sys.stderr,"Indexing taxonomy..."
+        index = {}
+        for i in xrange(self._localtaxon):
+            index[self._taxonomy[i][0]]=i
+        print >>sys.stderr,"Indexing parent and rank..."
+        for t in self._taxonomy:
+            t[1]=rankidx[t[1]]
+            t[2]=index[t[2]]
+        self._ranks=ranks
+        self._index=index 
+        self._preferedName = []
+    def _nameIterator(self,file):
+        file = universalOpen(file)
+        names = ColumnFile(file, 
+                           sep='|', 
+                           types=(int,str,
+                                  str,str))
+        for taxid,name,unique,classname,white in names:
+            yield taxid,name,classname
+    def _mergedNodeIterator(self,file):
+        file = universalOpen(file)
+        merged = ColumnFile(file, 
+                           sep='|', 
+                           types=(int,int,str))
+        for taxid,current,white in merged:
+                yield taxid,current
+    def _deletedNodeIterator(self,file):
+        file = universalOpen(file)
+        deleted = ColumnFile(file, 
+                           sep='|', 
+                           types=(int,str))
+        for taxid,white in deleted:
+                yield taxid
+# Binary writer
+def ecoTaxonomyWriter(prefix, taxonomy,onlyLocal=False):
+    def ecoTaxPacker(tx):
+        namelength = len(tx[3])
+        totalSize = 4 + 4 + 4 + 4 + namelength
+        try:
+            packed = struct.pack('> I I I I I %ds' % namelength, 
+                                 totalSize, 
+                                 tx[0],
+                                 tx[1],
+                                 tx[2], 
+                                 namelength,
+                                 tx[3])
+        except :
+            raise TypeError,"Cannot convert %s" % tx[3]
+        return packed
+    def ecoRankPacker(rank):
+        namelength = len(rank)
+        packed = struct.pack('> I %ds' % namelength,
+                             namelength,
+                             rank)
+        return packed
+    def ecoAliasPacker(taxid,index):
+        totalSize = 4 + 4
+        try:
+            packed = struct.pack('> I I i',
+                                 totalSize,
+                                 taxid,
+                                 index)
+        except struct.error,e:
+            print >>sys.stderr,(totalSize,taxid,index)
+            print >>sys.stderr,"Total size : %d  taxid : %d  index : %d" %(totalSize,taxid,index)
+            raise e
+        return packed
+    def ecoNamePacker(name):
+        namelength = len(name[0])
+        classlength= len(name[1])
+        totalSize =  namelength + classlength + 4 + 4 + 4 + 4
+        packed = struct.pack('> I I I I I %ds %ds' % (namelength,classlength),
+                             totalSize,
+                             int(name[1]=='scientific name'),
+                             namelength,
+                             classlength,
+                             name[2],
+                             name[0],
+                             name[1])
+        return packed
+    def ecoTaxWriter(file,taxonomy):
+        output = open(file,'wb')
+        nbtaxon = reduce(lambda x,y:x+y,(1 for t in taxonomy if t[4]!='local'),0)
+        output.write(struct.pack('> I',nbtaxon))
+        for tx in taxonomy:
+            if tx[4]!='local':
+                output.write(ecoTaxPacker(tx))
+        output.close()
+        return nbtaxon < len(taxonomy)
+    def ecoLocalTaxWriter(file,taxonomy):
+        nbtaxon = reduce(lambda x,y:x+y,(1 for t in taxonomy if t[4]=='local'),0)
+        if nbtaxon:
+            output = open(file,'wb')
+            output.write(struct.pack('> I',nbtaxon))
+            for tx in taxonomy:
+                if tx[4]=='local':
+                    output.write(ecoTaxPacker(tx))
+            output.close()
+    def ecoRankWriter(file,ranks):
+        output = open(file,'wb')
+        output.write(struct.pack('> I',len(ranks)))
+        for rank in ranks:
+            output.write(ecoRankPacker(rank))
+        output.close()
+    def ecoAliasWriter(file,index):
+        output = open(file,'wb')
+        output.write(struct.pack('> I',len(index)))
+        for taxid in index:
+            i=index[taxid]
+            if i is None:
+                i=-1
+            output.write(ecoAliasPacker(taxid, i))
+        output.close()
+    def nameCmp(n1,n2):
+        name1=n1[0].upper()
+        name2=n2[0].upper()
+        if name1 < name2:
+            return -1
+        elif name1 > name2:
+            return 1
+        return 0
+    def ecoNameWriter(file,names):
+        output = open(file,'wb')
+        output.write(struct.pack('> I',len(names)))
+        names.sort(nameCmp)
+        for name in names:
+            output.write(ecoNamePacker(name))
+        output.close()
+    def ecoPreferedNameWriter(file,names):
+        output = open(file,'wb')
+        output.write(struct.pack('> I',len(names)))
+        for name in names:
+            output.write(ecoNamePacker(name))
+        output.close()
+    localtaxon=True
+    if not onlyLocal:
+        ecoRankWriter('%s.rdx' % prefix, taxonomy._ranks)
+        localtaxon = ecoTaxWriter('%s.tdx' % prefix, taxonomy._taxonomy)
+        ecoNameWriter('%s.ndx' % prefix, [x for x in taxonomy._name if x[2] < taxonomy._localtaxon])
+        ecoAliasWriter('%s.adx' % prefix, taxonomy._index)
+    if localtaxon:
+        ecoLocalTaxWriter('%s.ldx' % prefix, taxonomy._taxonomy)
+    if taxonomy._preferedName:
+        ecoNameWriter('%s.pdx' % prefix, taxonomy._preferedName)
diff --git a/src/obitools/ecotag/__init__.py b/src/obitools/ecotag/__init__.py
new file mode 100644
index 0000000..26c94d3
--- /dev/null
+++ b/src/obitools/ecotag/__init__.py
@@ -0,0 +1,2 @@
+class EcoTagResult(dict):
+    pass
\ No newline at end of file
diff --git a/src/obitools/ecotag/parser.py b/src/obitools/ecotag/parser.py
new file mode 100644
index 0000000..ff6865b
--- /dev/null
+++ b/src/obitools/ecotag/parser.py
@@ -0,0 +1,150 @@
+from itertools import imap
+from obitools import utils
+from obitools.ecotag import EcoTagResult
+class EcoTagFileIterator(utils.ColumnFile):
+    @staticmethod
+    def taxid(x):
+        x = int(x)
+        if x < 0:
+            return None
+        else:
+            return x
+    @staticmethod
+    def scientificName(x):
+        if x=='--':
+            return None
+        else:
+            return x
+    @staticmethod
+    def value(x):
+        if x=='--':
+            return None
+        else:
+            return float(x)
+    @staticmethod
+    def count(x):
+        if x=='--':
+            return None
+        else:
+            return int(x)
+    def __init__(self,stream):
+        utils.ColumnFile.__init__(self,
+                                  stream, '\t', True, 
+                                  (str,str,str,
+                                   EcoTagFileIterator.value,
+                                   EcoTagFileIterator.value,
+                                   EcoTagFileIterator.value,
+                                   EcoTagFileIterator.count,
+                                   EcoTagFileIterator.count,
+                                   EcoTagFileIterator.taxid,
+                                   EcoTagFileIterator.scientificName,
+                                   str,
+                                   EcoTagFileIterator.taxid,
+                                   EcoTagFileIterator.scientificName,
+                                   EcoTagFileIterator.taxid,
+                                   EcoTagFileIterator.scientificName,
+                                   EcoTagFileIterator.taxid,
+                                   EcoTagFileIterator.scientificName,
+                                   str
+                                   ))
+        self._memory=None
+    _colname = ['identification',
+                'seqid',
+                'best_match_ac',
+                'max_identity',
+                'min_identity',
+                'theorical_min_identity',
+                'count',
+                'match_count',
+                'taxid',
+                'scientific_name',
+                'rank',
+                'order_taxid',
+                'order_name',
+                'family_taxid',
+                'family_name',
+                'genus_taxid',
+                'genus_name',
+                'species_taxid',
+                'species_name',
+                'sequence']
+    def next(self):
+        if self._memory is not None:
+            data=self._memory
+            self._memory=None
+        else:
+            data = utils.ColumnFile.next(self)
+            data = EcoTagResult(imap(None,EcoTagFileIterator._colname[:len(data)],data))
+        if data['identification']=='ID':
+            data.cd=[]
+            try:
+                nextone = utils.ColumnFile.next(self)
+                nextone = EcoTagResult(imap(None,EcoTagFileIterator._colname[:len(nextone)],nextone))
+            except StopIteration:
+                nextone = None
+            while nextone is not None and nextone['identification']=='CD':
+                data.cd.append(nextone)
+                try:
+                    nextone = utils.ColumnFile.next(self)
+                    nextone = EcoTagResult(imap(None,EcoTagFileIterator._colname[:len(nextone)],nextone))
+                except StopIteration:
+                    nextone = None
+            self._memory=nextone
+        return data
+def ecoTagIdentifiedFilter(ecoTagIterator):
+    for x in ecoTagIterator:
+        if x['identification']=='ID':
+            yield x
+class EcoTagAbstractIterator(utils.ColumnFile):
+    _colname = ['scientific_name',
+                'taxid',
+                'rank',
+                'count',
+                'max_identity',
+                'min_identity']
+    @staticmethod
+    def taxid(x):
+        x = int(x)
+        if x < 0:
+            return None
+        else:
+            return x
+    def __init__(self,stream):
+        utils.ColumnFile.__init__(self,
+                                  stream, '\t', True, 
+                                  (str,
+                                   EcoTagFileIterator.taxid,
+                                   str,
+                                   int,
+                                   float,float,float))
+    def next(self):
+        data = utils.ColumnFile.next(self)
+        data = dict(imap(None,EcoTagAbstractIterator._colname,data))
+        return data
+def ecoTagAbstractFilter(ecoTagAbsIterator):
+    for x in ecoTagAbsIterator:
+        if x['taxid'] is not None:
+            yield x
\ No newline at end of file
diff --git a/src/obitools/eutils/__init__.py b/src/obitools/eutils/__init__.py
new file mode 100644
index 0000000..1e7d3b2
--- /dev/null
+++ b/src/obitools/eutils/__init__.py
@@ -0,0 +1,54 @@
+import time
+from urllib2 import urlopen
+import shelve
+from threading import Lock
+import sys
+class EUtils(object):
+    '''
+    '''
+    _last_request=0
+    _interval=3
+    def __init__(self):
+        self._lock = Lock()
+    def wait(self):
+        now=time.time() 
+        delta = now - EUtils._last_request
+        while delta < EUtils._interval:
+            time.sleep(delta)
+            now=time.time() 
+            delta = now - EUtils._last_request
+    def _sendRequest(self,url):
+        self.wait()
+        EUtils._last_request=time.time()
+        t = EUtils._last_request
+        print >>sys.stderr,"Sending request to NCBI @ %f" % t
+        data = urlopen(url).read()
+        print >>sys.stderr,"Data red from NCBI @ %f (%f)" % (t,time.time()-t)
+        return data
+    def setInterval(self,seconde):
+        EUtils._interval=seconde
+class EFetch(EUtils):
+    '''
+    '''
+    def __init__(self,db,tool='OBITools',
+                 retmode='text',rettype="native",
+                 server='eutils.ncbi.nlm.nih.gov'):
+        EUtils.__init__(self)
+        self._url = "http://%s/entrez/eutils/efetch.fcgi?db=%s&tool=%s&retmode=%s&rettype=%s"
+        self._url = self._url % (server,db,tool,retmode,rettype)
+    def get(self,**args):
+        key = "&".join(['%s=%s' % x for x in args.items()])
+        return self._sendRequest(self._url +"&" + key)
diff --git a/src/obitools/fast.py b/src/obitools/fast.py
new file mode 100644
index 0000000..760f493
--- /dev/null
+++ b/src/obitools/fast.py
@@ -0,0 +1,56 @@
+    implement fastn/fastp sililarity search algorithm for BioSequence.
+class Fast(object):
+    def __init__(self,seq,kup=2):
+        '''
+        @param seq: sequence to hash
+        @type seq: BioSequence
+        @param kup: word size used for hashing process
+        @type kup: int
+        '''
+        hash={}
+        seq = str(seq)
+        for word,pos in ((seq[i:i+kup].upper(),i) for i in xrange(len(seq)-kup)):
+            if word in hash:
+                hash[word].append(pos)
+            else:
+                hash[word]=[pos]
+        self._kup = kup
+        self._hash= hash
+        self._seq = seq
+    def __call__(self,seq):
+        '''
+        Align one sequence with the fast hash table.
+        @param seq: the sequence to align
+        @type seq: BioSequence
+        @return: where smax is the
+                 score of the largest diagonal and pmax the
+                 associated shift 
+        @rtype: a int tuple (smax,pmax)
+        '''
+        histo={}
+        seq = str(seq).upper()
+        hash= self._hash
+        kup = self._kup
+        for word,pos in ((seq[i:i+kup],i) for i in xrange(len(seq)-kup)):
+            matchedpos = hash.get(word,[])
+            for p in matchedpos:
+                delta = pos - p
+                histo[delta]=histo.get(delta,0) + 1
+        smax = max(histo.values())
+        pmax = [x for x in histo if histo[x]==smax]
+        return smax,pmax
+    def __len__(self):
+        return len(self._seq)
diff --git a/src/obitools/fasta/__init__.py b/src/obitools/fasta/__init__.py
new file mode 100644
index 0000000..45a3042
--- /dev/null
+++ b/src/obitools/fasta/__init__.py
@@ -0,0 +1,13 @@
+fasta module provides functions to read and write sequences in fasta format.
+from _fasta import parseFastaDescription, \
+                   fastaParser, fastaNucParser,fastaAAParser, fastFastaParser, \
+                   fastaIterator,fastFastaIterator, rawFastaIterator, \
+                   fastaNucIterator, fastaAAIterator, \
+                   formatFasta, formatSAPFastaGenerator
diff --git a/src/obitools/fasta/_fasta.pxd b/src/obitools/fasta/_fasta.pxd
new file mode 100644
index 0000000..e6057d1
--- /dev/null
+++ b/src/obitools/fasta/_fasta.pxd
@@ -0,0 +1,13 @@
+cpdef object fastaParser(bytes  seq,
+                         object bioseqfactory,
+                         object tagparser,
+                         bytes  rawparser,
+                         object joinseq=?)
+cpdef object fastFastaParser(bytes  seq,
+                             object tagparser,
+                             bytes  rawparser)
+cpdef tuple fastParseFastaDescription(bytes ds)
+cpdef tuple parseFastaDescription(bytes ds, object tagparser)                             
\ No newline at end of file
diff --git a/src/obitools/fasta/_fasta.pyx b/src/obitools/fasta/_fasta.pyx
new file mode 100644
index 0000000..a0e5714
--- /dev/null
+++ b/src/obitools/fasta/_fasta.pyx
@@ -0,0 +1,515 @@
+# cython: profile=True
+fasta module provides functions to read and write sequences in fasta format.
+from _fasta cimport *
+from obitools._obitools cimport  _bioSeqGenerator,BioSequence,AASequence,NucSequence
+from obitools._obitools cimport  __default_raw_parser
+from obitools.format.genericparser import genericEntryIteratorGenerator
+#from obitools.alignment import alignmentReader
+#from obitools.utils import universalOpen
+import re
+from obitools.ecopcr.options import loadTaxonomyDatabase
+from obitools.format import SequenceFileIterator
+#from _fasta import parseFastaDescription,fastaParser
+#from _fasta import _fastaJoinSeq
+#from _fasta import _parseFastaTag
+cdef extern from "regex.h":
+    struct regex_t:
+        pass
+    struct regmatch_t:
+        int rm_so         # start of match
+        int rm_eo         # end of match
+    enum REG_EXTENDED:
+        pass
+    int regcomp(regex_t *preg, char *pattern, int cflags)
+    int regexec(regex_t *preg, char *string, int nmatch, regmatch_t *pmatch, int eflags)
+    void regfree(regex_t *preg)
+cdef bytes _fastaJoinSeq(list seqarray):
+    return  b''.join([x.strip() for x in seqarray])
+cpdef tuple parseFastaDescription(bytes ds, object tagparser):
+    cdef bytes  definition
+    cdef bytes  info
+    cdef object m
+    ds = b' '+ds
+    m = tagparser.search(ds)
+    if m is not None:
+        info=m.group(0)
+        definition = ds[m.end(0):].rstrip()
+    else:
+        info=None
+        definition=ds
+    return definition,info
+cdef bytes  _fastTagParser=b'^[a-zA-Z][a-zA-Z.0-9_]* *= *[^;]*;( +[a-zA-Z][a-zA-Z.0-9_]* *= *[^;]*;)*'
+cdef object _cfastTagParser=re.compile(_fastTagParser)
+#cdef regex_t cfastTagParser
+#cdef int     regerror=regcomp(&cfastTagParser, fastTagParser, REG_EXTENDED)
+cpdef tuple fastParseFastaDescription(bytes ds):
+    cdef bytes  definition
+    cdef bytes  info
+    cdef object m
+    m = _cfastTagParser.search(ds)
+    if m is not None:
+        info=m.group(0)
+        definition = ds[m.end(0):].rstrip()
+    else:
+        info=None
+        definition=ds
+    return definition,info
+cpdef object fastFastaParser(bytes  seq,
+                             object tagparser,
+                             bytes  rawparser):
+    '''
+    Parse a fasta record.
+    @attention: internal purpose function
+    @param seq: a sequence object containing all lines corresponding
+                to one fasta sequence
+    @type seq: C{list} or C{tuple} of C{str}
+    @param bioseqfactory: a callable object return a BioSequence
+                          instance.
+    @type bioseqfactory: a callable object
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from 
+                      title line.
+    @type tagparser: regex instance
+    @return: a C{BioSequence} instance   
+    '''
+    cdef list  lseq  = seq.split(b'\n')    
+    cdef list  title = lseq.pop(0).split(None,1)    
+    cdef bytes id    = title[0][1:]
+    cdef bytes defintion,info
+    if len(title) == 2:
+        definition,info=fastParseFastaDescription(title[1])
+    else:
+        info= None
+        definition=None
+    seq=b''.join([x.rstrip() for x in lseq])
+    return _bioSeqGenerator(id, seq, definition,info,rawparser,{})
+cpdef object fastaParser(bytes  seq,
+                         object bioseqfactory,
+                         object tagparser,
+                         bytes  rawparser,
+                         object joinseq=None):
+    '''
+    Parse a fasta record.
+    @attention: internal purpose function
+    @param seq: a sequence object containing all lines corresponding
+                to one fasta sequence
+    @type seq: C{list} or C{tuple} of C{str}
+    @param bioseqfactory: a callable object return a BioSequence
+                          instance.
+    @type bioseqfactory: a callable object
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from 
+                      title line.
+    @type tagparser: regex instance
+    @return: a C{BioSequence} instance   
+    '''
+    cdef list  lseq  = seq.split(b'\n')    
+    cdef list  title = lseq.pop(0).split(None,1)    
+    cdef bytes id    = title[0][1:]
+    cdef bytes defintion,info
+    if len(title) == 2:
+        definition,info=parseFastaDescription(title[1], tagparser)
+    else:
+        info= None
+        definition=None
+    if joinseq is None:
+        seq=_fastaJoinSeq(lseq)
+    else:
+        seq=joinseq(lseq)
+    if bioseqfactory is None:
+        return _bioSeqGenerator(id, seq, definition,info,rawparser,{})
+    else:
+        return bioseqfactory(id, seq, definition,info,rawparser)
+def fastaNucParser(seq,tagparser=__default_raw_parser,joinseq=None):
+    return fastaParser(seq,NucSequence,tagparser=tagparser,joinseq=joinseq)
+def fastaAAParser(seq,tagparser=__default_raw_parser,joinseq=None):
+    return fastaParser(seq,AASequence,tagparser=tagparser,joinseq=joinseq)
+def fastFastaIterator(object file,bytes tagparser=__default_raw_parser):
+    '''
+    iterate through a fasta file sequence by sequence.
+    Returned sequences by this iterator will be BioSequence
+    instances
+    @param file: a line iterator containing fasta data or a filename
+    @type file:  an iterable object or str
+    @type bioseqfactory: a callable object
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from 
+                      title line.
+    @type tagparser: regex instance
+    @return: an iterator on C{BioSequence} instance
+    @see: L{fastaNucIterator}
+    @see: L{fastaAAIterator}
+    >>> from obitools.format.sequence.fasta import fastFastaIterator
+    >>> f = fastFastaIterator('monfichier')
+    >>> s = f.next()
+    >>> print s
+    gctagctagcatgctagcatgcta
+    >>>
+    '''
+    cdef bytes allparser = tagparser % b'[a-zA-Z][a-zA-Z0-9_]*'
+    rtagparser = re.compile('( *%s)+' % allparser)
+    for entry in fastaEntryIterator(file):
+        yield fastFastaParser(entry,rtagparser,tagparser)
+def fastaIterator(object file,
+                  object bioseqfactory=None,
+                  bytes tagparser=__default_raw_parser,
+                  object joinseq=None):
+    '''
+    iterate through a fasta file sequence by sequence.
+    Returned sequences by this iterator will be BioSequence
+    instances
+    @param file: a line iterator containing fasta data or a filename
+    @type file:  an iterable object or str
+    @param bioseqfactory: a callable object return a BioSequence
+                          instance.
+    @type bioseqfactory: a callable object
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from 
+                      title line.
+    @type tagparser: regex instance
+    @return: an iterator on C{BioSequence} instance
+    @see: L{fastaNucIterator}
+    @see: L{fastaAAIterator}
+    >>> from obitools.format.sequence.fasta import fastaIterator
+    >>> f = fastaIterator('monfichier')
+    >>> s = f.next()
+    >>> print s
+    gctagctagcatgctagcatgcta
+    >>>
+    '''
+    cdef bytes allparser = tagparser % b'[a-zA-Z][a-zA-Z0-9_]*'
+    rtagparser = re.compile('( *%s)+' % allparser)
+    for entry in fastaEntryIterator(file):
+        yield fastaParser(entry,bioseqfactory,rtagparser,tagparser,joinseq)
+def rawFastaIterator(file,bioseqfactory=None,
+                     tagparser=__default_raw_parser,
+                     joinseq=None):
+    rawparser=tagparser
+    allparser = tagparser % '[a-zA-Z][a-zA-Z.0-9_]*'
+    tagparser = re.compile('( *%s)+' % allparser)
+    for entry in rawFastaEntryIterator(file):
+        entry=entry.strip()
+        yield fastaParser(entry,bioseqfactory,tagparser,rawparser,joinseq)
+def fastaNucIterator(file,tagparser=__default_raw_parser):
+    '''
+    iterate through a fasta file sequence by sequence.
+    Returned sequences by this iterator will be NucSequence
+    instances
+    @param file: a line iterator containint fasta data
+    @type file: an iterable object
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from 
+                      title line.
+    @type tagparser: regex instance
+    @return: an iterator on C{NucBioSequence} instance
+    @rtype: a generator object
+    @see: L{fastaIterator}
+    @see: L{fastaAAIterator}
+    '''
+    return fastaIterator(file, NucSequence,tagparser)
+def fastaAAIterator(file,tagparser=__default_raw_parser):
+    '''
+    iterate through a fasta file sequence by sequence.
+    Returned sequences by this iterator will be AASequence
+    instances
+    @param file: a line iterator containing fasta data
+    @type file: an iterable object
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from 
+                      title line.
+    @type tagparser: regex instance
+    @return: an iterator on C{AABioSequence} instance
+    @see: L{fastaIterator}
+    @see: L{fastaNucIterator}
+    '''
+    return fastaIterator(file, AASequence,tagparser)
+def formatFasta(data,gbmode=False,upper=False,restrict=None):
+    '''
+    Convert a seqence or a set of sequences in a
+    string following the fasta format
+    @param data: sequence or a set of sequences
+    @type data: BioSequence instance or an iterable object 
+                on BioSequence instances
+    @param gbmode: if set to C{True} identifier part of the title
+                   line follows recommendation from nbci to allow
+                   sequence indexing with the blast formatdb command.
+    @type gbmode: bool
+    @param restrict: a set of key name that will be print in the formated
+                     output. If restrict is set to C{None} (default) then
+                     all keys are formated.
+    @type restrict: any iterable value or None
+    @return: a fasta formated string
+    @rtype: str
+    '''
+    if isinstance(data, BioSequence):
+        data = [data]
+    if restrict is not None and not isinstance(restrict, set):
+        restrict = set(restrict)    
+    rep = []
+    for sequence in data:
+        seq = str(sequence)
+        if sequence.definition is None:
+            definition=''
+        else:
+            definition=sequence.definition
+        if upper:
+            frgseq = '\n'.join([seq[x:x+60].upper() for x in xrange(0,len(seq),60)])
+        else:
+            frgseq = '\n'.join([seq[x:x+60] for x in xrange(0,len(seq),60)])
+        info='; '.join(['%s=%s' % x 
+                        for x in sequence.rawiteritems()
+                        if restrict is None or x[0] in restrict])
+        if info:
+            info=info+';'
+        if sequence._rawinfo is not None and sequence._rawinfo:
+            info+=" " + sequence._rawinfo.strip()
+        id = sequence.id
+        if gbmode:
+            if 'gi' in sequence:
+                id = "gi|%s|%s" % (sequence['gi'],id)
+            else:
+                id = "lcl|%s|" % (id)
+        title='>%s %s %s' %(id,info,definition)
+        rep.append("%s\n%s" % (title,frgseq))
+    return '\n'.join(rep)
+def formatSAPFastaGenerator(options):
+    loadTaxonomyDatabase(options)
+    taxonomy=None
+    if options.taxonomy is not None:
+        taxonomy=options.taxonomy
+    assert taxonomy is not None,"SAP formating require indication of a taxonomy database"
+    ranks = ('superkingdom', 'kingdom', 'subkingdom', 'superphylum', 
+             'phylum', 'subphylum', 'superclass', 'class', 'subclass', 
+             'infraclass', 'superorder', 'order', 'suborder', 'infraorder', 
+             'parvorder', 'superfamily', 'family', 'subfamily', 'supertribe', 'tribe', 
+             'subtribe', 'supergenus', 'genus', 'subgenus', 'species group', 
+             'species subgroup', 'species', 'subspecies')
+    trank=set(taxonomy._ranks)
+    ranks = [taxonomy._ranks.index(x) for x in ranks if x in trank]
+    def formatSAPFasta(data,gbmode=False,upper=False,restrict=None):
+        '''
+        Convert a seqence or a set of sequences in a
+        string following the fasta format as recommended for the SAP
+        software 
+        http://ib.berkeley.edu/labs/slatkin/munch/StatisticalAssignmentPackage.html
+        @param data: sequence or a set of sequences
+        @type data: BioSequence instance or an iterable object 
+                    on BioSequence instances
+        @param gbmode: if set to C{True} identifier part of the title
+                       line follows recommendation from nbci to allow
+                       sequence indexing with the blast formatdb command.
+        @type gbmode: bool
+        @param restrict: a set of key name that will be print in the formated
+                         output. If restrict is set to C{None} (default) then
+                         all keys are formated.
+        @type restrict: any iterable value or None
+        @return: a fasta formated string
+        @rtype: str
+        '''
+        if isinstance(data, BioSequence):
+            data = [data]
+        if restrict is not None and not isinstance(restrict, set):
+            restrict = set(restrict)    
+        rep = []
+        for sequence in data:
+            seq = str(sequence)
+            if upper:
+                frgseq = '\n'.join([seq[x:x+60].upper() for x in xrange(0,len(seq),60)])
+            else:
+                frgseq = '\n'.join([seq[x:x+60] for x in xrange(0,len(seq),60)])
+            try:    
+                taxid = sequence["taxid"]
+            except KeyError:
+                    raise AssertionError('All sequence must have a taxid')
+            definition=' ;'
+            for r in ranks:
+                taxon = taxonomy.getTaxonAtRank(taxid,r)
+                if taxon is not None:
+                    definition+=' %s: %s,' % (taxonomy._ranks[r],taxonomy.getPreferedName(taxon))
+            definition='%s ; %s' % (definition[0:-1],taxonomy.getPreferedName(taxid))
+            id = sequence.id
+            if gbmode:
+                if 'gi' in sequence:
+                    id = "gi|%s|%s" % (sequence['gi'],id)
+                else:
+                    id = "lcl|%s|" % (id)
+            title='>%s%s' %(id,definition)
+            rep.append("%s\n%s" % (title,frgseq))
+        return '\n'.join(rep)
+    return formatSAPFasta
+#class FastaIterator(SequenceFileIterator):
+#    entryIterator = genericEntryIteratorGenerator(startEntry='>')
+#    classmethod(entryIterator)
+#    def __init__(self,inputfile,bioseqfactory=bioSeqGenerator,
+#                      tagparser=_default_raw_parser,
+#                      joinseq=_fastaJoinSeq):
+#        SequenceFileIterator.__init__(self, inputfile, bioseqfactory)
+#        self.__file = FastaIterator.entryIterator(self._inputfile)
+#        self._tagparser = tagparser
+#        self._joinseq   = joinseq
+#    def get_tagparser(self):
+#        return self.__tagparser
+#    def set_tagparser(self, value):
+#        self._rawparser = value
+#        allparser = value % '[a-zA-Z][a-zA-Z0-9_]*'
+#        self.__tagparser = re.compile('( *%s)+' % allparser)
+#    def _parseFastaDescription(self,ds):
+#        m = self._tagparser.search(' '+ds)
+#        if m is not None:
+#            info=m.group(0)
+#            definition = ds[m.end(0):].strip()
+#        else:
+#            info=None
+#            definition=ds
+#        return definition,info
+#    def _parser(self):
+#        '''
+#        Parse a fasta record.
+#        @attention: internal purpose function
+#        @return: a C{BioSequence} instance   
+#        '''
+#        seq = self._seq.split('\n')
+#        title = seq[0].strip()[1:].split(None,1)
+#        id=title[0]
+#        if len(title) == 2:
+#            definition,info=self._parseFastaDescription(title[1])
+#        else:
+#            info= None
+#            definition=None
+#        seq=self._joinseq(seq[1:])
+#        return self._bioseqfactory(id, seq, definition,info,self._rawparser)
+#    _tagparser = property(get_tagparser, set_tagparser, None, "_tagparser's docstring")
diff --git a/src/obitools/fastq/__init__.py b/src/obitools/fastq/__init__.py
new file mode 100644
index 0000000..aa492c2
--- /dev/null
+++ b/src/obitools/fastq/__init__.py
@@ -0,0 +1,19 @@
+Created on 29 aout 2009
+ at author: coissac
+from _fastq import fastqQualitySangerDecoder,fastqQualitySolexaDecoder
+from _fastq import qualityToSangerError,qualityToSolexaError
+from _fastq import errorToSangerFastQStr
+from _fastq import formatFastq
+from _fastq import fastqParserGenetator
+from _fastq import fastqAAIterator,fastqIlluminaIterator,fastqSolexaIterator, \
+                   fastqSangerIterator, fastqIterator, fastqEntryIterator
+from _fastq import fastFastqParserGenetator
+from _fastq import fastFastqIlluminaIterator,fastFastqSolexaIterator, \
+                   fastFastqSangerIterator, fastFastqIterator
diff --git a/src/obitools/fastq/_fastq.pyx b/src/obitools/fastq/_fastq.pyx
new file mode 100644
index 0000000..4d2f318
--- /dev/null
+++ b/src/obitools/fastq/_fastq.pyx
@@ -0,0 +1,530 @@
+# cython: profile=True
+Created on 16 sept. 2009
+ at author: coissac
+#from obitools.fasta._fasta cimport *
+from cpython cimport array
+from obitools.fasta._fasta cimport fastParseFastaDescription
+from obitools.fasta._fasta cimport parseFastaDescription
+from obitools._obitools cimport BioSequence
+from obitools._obitools cimport __default_raw_parser
+from obitools._obitools cimport AASequence,NucSequence
+from obitools import bioSeqGenerator
+from obitools.format.genericparser import genericEntryIteratorGenerator
+from obitools.utils import universalOpen
+import re
+import sys
+cdef import from "math.h" :
+    double log10(double x)
+    double rint(double x)
+cdef import from "string.h":
+    int strlen(char* s)
+    void bzero(void *s, size_t n)
+cdef import from "stdlib.h":
+    void* malloc(int size)  except NULL
+    void* realloc(void* chunk,int size)  except NULL
+    void free(void* chunk)
+cpdef array.array[double] fastqQualityDecoder(char* qualstring, int base=0):
+    cdef int i=0
+    cdef int mq=255
+    cdef object oaddresse,olength
+    cdef int length
+    cdef array.array quality
+    cdef double* bdouble
+#    quality = array.array(b'd',[0]*strlen(qualstring))
+    quality = array.array(b'd',[0])
+#    print >>sys.stderr,"+@@> ",sys.getrefcount(quality)
+    array.resize(quality,strlen(qualstring))
+#    (oaddress,olength)=quality.buffer_info()
+    bdouble=quality.data.as_doubles
+    if base==0:
+        mq = 255
+        while (qualstring[i]!=0):
+            if qualstring[i]<mq:
+                mq=qualstring[i]
+            i+=1
+        if mq < 59:
+            base=33
+        else:
+            base=64
+    i=0
+    while (qualstring[i]!=0):
+        bdouble[i]=qualstring[i]-base
+        i+=1
+    return quality
+cpdef array.array[double] fastqQualitySangerDecoder(char* qualstring):
+        return fastqQualityDecoder(qualstring,33)
+cpdef array.array[double] fastqQualitySolexaDecoder(char* qualstring):
+        return fastqQualityDecoder(qualstring,64)
+cpdef array.array[double] qualityToSolexaError(array.array quality):
+    cdef int i=0                      # @DuplicatedSignature
+    cdef int lq
+    cdef double proba
+    cdef object oaddresse,olength     # @DuplicatedSignature
+    cdef int length                   # @DuplicatedSignature
+    cdef double* bdouble              # @DuplicatedSignature
+    (oaddress,olength)=quality.buffer_info()
+    bdouble=<double*><unsigned long int>oaddress
+    lq=olength
+    for i in range(lq):
+        proba=1/(1+10.**(bdouble[i]/10.))
+        bdouble[i]=proba
+    return quality
+cpdef array.array[double] qualityToSangerError(array.array quality):
+    cdef int i=0                    # @DuplicatedSignature
+    cdef int lq                     # @DuplicatedSignature
+    cdef double proba               # @DuplicatedSignature
+    cdef object oaddresse,olength   # @DuplicatedSignature
+    cdef int length                 # @DuplicatedSignature
+    cdef double* bdouble            # @DuplicatedSignature
+    (oaddress,olength)=quality.buffer_info()
+    bdouble=<double*><unsigned long int>oaddress
+    lq=olength
+    for i in range(lq):
+        proba=10.**(-bdouble[i]/10.)
+        bdouble[i]=proba
+    return quality
+cpdef array.array[double] errorToSangerQuality(array.array quality):
+    cdef int i=0                    # @DuplicatedSignature
+    cdef int lq                     # @DuplicatedSignature
+    cdef double proba               # @DuplicatedSignature
+    cdef object oaddresse,olength   # @DuplicatedSignature
+    cdef int length                 # @DuplicatedSignature
+    cdef double* bdouble            # @DuplicatedSignature
+    (oaddress,olength)=quality.buffer_info()
+    bdouble=<double*><unsigned long int>oaddress
+    lq=olength
+    for i in range(lq):
+        proba=-rint(log10(bdouble[i])*10)
+        bdouble[i]=proba
+    return quality
+cpdef array.array[double] solexaToSangerQuality(array.array quality):
+    cdef int i=0                    # @DuplicatedSignature
+    cdef int lq                     # @DuplicatedSignature
+    cdef double proba               # @DuplicatedSignature
+    cdef object oaddresse,olength   # @DuplicatedSignature
+    cdef int length                 # @DuplicatedSignature
+    cdef double* bdouble            # @DuplicatedSignature
+    (oaddress,olength)=quality.buffer_info()
+    bdouble=<double*><unsigned long int>oaddress
+    lq=olength
+    for i in range(lq):
+        proba=-rint(log10(1/(1+10.**(bdouble[i]/10.)))*10)
+        bdouble[i]=proba
+    return quality
+cpdef bytes errorToSangerFastQStr(array.array quality):
+    cdef int i=0                    # @DuplicatedSignature
+    cdef int lq                     # @DuplicatedSignature
+    cdef double proba               # @DuplicatedSignature
+    cdef object oaddresse,olength   # @DuplicatedSignature
+    cdef int length                 # @DuplicatedSignature
+    cdef double* bdouble            # @DuplicatedSignature
+    cdef char* result
+    cdef bytes code
+    (oaddress,olength)=quality.buffer_info()
+    bdouble=<double*><unsigned long int>oaddress
+    lq=olength
+    result=<char *>malloc(olength+1)
+    result[olength]=0
+    for i in range(lq):
+        proba=-rint(log10(bdouble[i])*10)
+        if proba > 93.:
+            proba=93.
+        result[i]=33 + <int>proba
+    code=result
+    free(<void *>result)
+    return code
+cpdef bytes formatFastq(object data, bint gbmode=False, bint upper=False):
+    cdef list rep=[]
+    cdef bytes  seq
+    cdef bytes  definition
+    cdef bytes  info
+    cdef bytes  quality
+    cdef bytes  id
+    if isinstance(data, BioSequence):
+        data = [data]
+    for sequence in data:
+        seq = str(sequence)
+        if upper:
+            seq=seq.upper()
+        if sequence.definition is None:
+            definition=b''
+        else:
+            definition=sequence.definition
+        info=b'; '.join([b'%s=%s' % x for x in sequence.rawiteritems()])
+        if info:
+            info=info+b';'
+        if sequence._rawinfo is not None and sequence._rawinfo:
+            info+=b" " + sequence._rawinfo.strip()
+        id = sequence.id
+        if gbmode:
+            if b'gi' in sequence:
+                id = bytes(b"gi|%s|%s" % (sequence[b'gi'],id))
+            else:
+                id = b"lcl|%s|" % (id)
+        if hasattr(sequence, b"quality"):
+            quality=errorToSangerFastQStr(sequence.quality)
+        else:
+            quality=b"I"*len(sequence)
+        title=b'@%s %s %s' %(id,info,definition)
+        rep.append(b"%s\n%s\n+\n%s" % (title,seq,quality))
+    return b'\n'.join(rep)
+cdef enum FastqType:
+    sanger,solexa
+cdef class fastqParserGenetator:
+    cdef object bioseqfactory
+    cdef object tagparser
+    cdef object rawparser
+    cdef bint _qualityDecoder
+    cdef bint _errorDecoder
+    def __init__(self,fastqvariant=b'sanger',bioseqfactory=NucSequence,tagparser=__default_raw_parser):
+        self.bioseqfactory = bioseqfactory
+        self.rawparser=tagparser
+        allparser = tagparser % b'[a-zA-Z][a-zA-Z.0-9_]*'
+        tagparser = re.compile(b'( *%s)+' % allparser)
+        self.tagparser = tagparser
+        # Sanger = True
+        # Solexa = False
+        self._qualityDecoder, self._errorDecoder = {b'sanger'   : (True,True),
+                                                    b'solexa'   : (False,False),
+                                                    b'illumina' : (False,True)}[fastqvariant]
+    cdef errorDecoder(self,object qualstring):
+        if self._errorDecoder:
+            return qualityToSangerError(qualstring)
+        else:
+            return qualityToSolexaError(qualstring)
+    cdef qualityDecoder(self,char* qualstring):
+        if self._qualityDecoder:
+            return fastqQualitySangerDecoder(qualstring)
+        else:
+            return fastqQualitySolexaDecoder(qualstring)
+    def __call__(self,seq):
+        cdef str  definition
+        cdef str info
+        cdef str  id
+        cdef str  s0
+        cdef str  tseq
+        cdef bytes tqual
+        s0=seq[0]
+        title = s0[1:].split(None,1)
+        id=title[0]
+        if len(title) == 2:
+            definition,info=parseFastaDescription(title[1], self.tagparser)
+        else:
+            info= None
+            definition=None
+        tqual = seq[3]
+        quality=self.errorDecoder(self.qualityDecoder(tqual))
+        tseq=seq[1]
+        seq = self.bioseqfactory(id, tseq, definition,info,self.rawparser)
+        seq.quality = quality
+        return seq
+def fastqIterator(file,fastqvariant=b'sanger',bioseqfactory=NucSequence,tagparser=__default_raw_parser):
+    '''
+    iterate through a fasta file sequence by sequence.
+    Returned sequences by this iterator will be BioSequence
+    instances
+    @param file: a line iterator containing fasta data or a filename
+    @type file:  an iterable object or str
+    @param bioseqfactory: a callable object return a BioSequence
+                          instance.
+    @type bioseqfactory: a callable object
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from
+                      title line.
+    @type tagparser: regex instance
+    @return: an iterator on C{BioSequence} instance
+    @see: L{fastaNucIterator}
+    @see: L{fastaAAIterator}
+    '''
+    fastqParser=fastqParserGenetator(fastqvariant, bioseqfactory, tagparser)
+    file = universalOpen(file)
+    for entry in fastqEntryIterator(file):
+        title=entry[0]
+        seq=b"".join(entry[1:-1])
+        quality=b''
+        lenseq=len(seq)
+        while (len(quality) < lenseq):
+            quality+=file.next().strip()
+        yield fastqParser([title,seq,b'+',quality])
+def fastqSangerIterator(file,tagparser=__default_raw_parser):
+    '''
+    iterate through a fastq file sequence by sequence.
+    Returned sequences by this iterator will be NucSequence
+    instances
+    @param file: a line iterator containint fasta data
+    @type file: an iterable object
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from
+                      title line.
+    @type tagparser: regex instance
+    @return: an iterator on C{NucBioSequence} instance
+    @see: L{fastqIterator}
+    @see: L{fastqAAIterator}
+    '''
+    return fastqIterator(file,b'sanger',NucSequence,tagparser)
+def fastqSolexaIterator(file,tagparser=__default_raw_parser):
+    '''
+    iterate through a fastq file sequence by sequence.
+    Returned sequences by this iterator will be NucSequence
+    instances
+    @param file: a line iterator containint fasta data
+    @type file: an iterable object
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from
+                      title line.
+    @type tagparser: regex instance
+    @return: an iterator on C{NucBioSequence} instance
+    @see: L{fastqIterator}
+    @see: L{fastqAAIterator}
+    '''
+    return fastqIterator(file,b'solexa',NucSequence,tagparser)
+def fastqIlluminaIterator(file,tagparser=__default_raw_parser):
+    '''
+    iterate through a fastq file sequence by sequence.
+    Returned sequences by this iterator will be NucSequence
+    instances
+    @param file: a line iterator containint fasta data
+    @type file: an iterable object
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from
+                      title line.
+    @type tagparser: regex instance
+    @return: an iterator on C{NucBioSequence} instance
+    @see: L{fastqIterator}
+    @see: L{fastqAAIterator}
+    '''
+    return fastqIterator(file,b'illumina',NucSequence,tagparser)
+def fastqAAIterator(file,tagparser=__default_raw_parser):
+    '''
+    iterate through a fastq file sequence by sequence.
+    Returned sequences by this iterator will be AASequence
+    instances
+    @param file: a line iterator containing fasta data
+    @type file: an iterable object
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from
+                      title line.
+    @type tagparser: regex instance
+    @return: an iterator on C{AABioSequence} instance
+    @see: L{fastqIterator}
+    @see: L{fastqNucIterator}
+    '''
+    return fastqIterator(file,b'sanger',AASequence,tagparser)
+cdef class fastFastqParserGenetator(fastqParserGenetator):
+    def __init__(self,fastqvariant=b'sanger'):
+        self.rawparser=__default_raw_parser
+        # Sanger = True
+        # Solexa = False
+        self._qualityDecoder, self._errorDecoder = {b'sanger'   : (True,True),
+                                                    b'solexa'   : (False,False),
+                                                    b'illumina' : (False,True)}[fastqvariant]
+    def __call__(self, list seq):
+        cdef bytes s0    = seq[0]
+        cdef list  title = s0.split(None,1)
+        cdef bytes id    = title[0][1:]
+        cdef bytes defintion,info
+        cdef bytes tqual = seq[3]
+        cdef bytes tseq  = seq[1]
+        cdef object sseq
+        if len(title) == 2:
+            definition,info=fastParseFastaDescription(title[1])
+        else:
+            info= None
+            definition=None
+#FIXME: regarder ici
+        quality=self.errorDecoder(self.qualityDecoder(tqual))
+#        print >>sys.stderr,b"@@@> ",sys.getrefcount(quality)
+        sseq = NucSequence(id, tseq, definition,info,__default_raw_parser)
+        sseq.quality = quality
+        return sseq
+def fastFastqIterator(file,fastqvariant=b'sanger'):
+    '''
+    iterate through a fasta file sequence by sequence.
+    Returned sequences by this iterator will be BioSequence
+    instances
+    @param file: a line iterator containing fasta data or a filename
+    @type file:  an iterable object or str
+    @param bioseqfactory: a callable object return a BioSequence
+                          instance.
+    @type bioseqfactory: a callable object
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from
+                      title line.
+    @type tagparser: regex instance
+    @return: an iterator on C{BioSequence} instance
+    @see: L{fastaNucIterator}
+    @see: L{fastaAAIterator}
+    '''
+    fastqParser=fastFastqParserGenetator(fastqvariant)
+    file = universalOpen(file)
+    for entry in fastqEntryIterator(file):
+        title=entry[0]
+        seq=b"".join(entry[1:-1])
+        quality=b''
+        lenseq=len(seq)
+        while (len(quality) < lenseq):
+            quality+=file.next().strip()
+        yield fastqParser([title,seq,b'+',quality])
+def fastFastqSangerIterator(file):
+    '''
+    iterate through a fastq file sequence by sequence.
+    Returned sequences by this iterator will be NucSequence
+    instances
+    @param file: a line iterator containint fasta data
+    @type file: an iterable object
+    @return: an iterator on C{NucBioSequence} instance
+    @see: L{fastqIterator}
+    @see: L{fastqAAIterator}
+    '''
+    return fastFastqIterator(file,b'sanger')
+def fastFastqSolexaIterator(file):
+    '''
+    iterate through a fastq file sequence by sequence.
+    Returned sequences by this iterator will be NucSequence
+    instances
+    @param file: a line iterator containint fasta data
+    @type file: an iterable object
+    @return: an iterator on C{NucBioSequence} instance
+    @see: L{fastqIterator}
+    @see: L{fastqAAIterator}
+    '''
+    return fastFastqIterator(file,b'solexa')
+def fastFastqIlluminaIterator(file):
+    '''
+    iterate through a fastq file sequence by sequence.
+    Returned sequences by this iterator will be NucSequence
+    instances
+    @param file: a line iterator containint fasta data
+    @type file: an iterable object
+    @return: an iterator on C{NucBioSequence} instance
+    @see: L{fastqIterator}
+    @see: L{fastqAAIterator}
+    '''
+    return fastFastqIterator(file,b'illumina')
diff --git a/src/obitools/fnaqual/__init__.py b/src/obitools/fnaqual/__init__.py
new file mode 100644
index 0000000..384eb96
--- /dev/null
+++ b/src/obitools/fnaqual/__init__.py
@@ -0,0 +1,2 @@
+fnaTag=' %s *= *([^\s]+)'
diff --git a/src/obitools/fnaqual/fasta.py b/src/obitools/fnaqual/fasta.py
new file mode 100644
index 0000000..102a13e
--- /dev/null
+++ b/src/obitools/fnaqual/fasta.py
@@ -0,0 +1,8 @@
+from obitools.fasta import fastaNucIterator
+from obitools.fnaqual import fnaTag
+def fnaFastaIterator(file):
+    x = fastaNucIterator(file, fnaTag)
+    return x
\ No newline at end of file
diff --git a/src/obitools/fnaqual/quality.py b/src/obitools/fnaqual/quality.py
new file mode 100644
index 0000000..092f610
--- /dev/null
+++ b/src/obitools/fnaqual/quality.py
@@ -0,0 +1,137 @@
+from obitools import _default_raw_parser
+from obitools.fasta import fastaIterator
+from obitools.fnaqual import fnaTag
+from obitools.location import Location
+import re
+class QualitySequence(list):
+    def __init__(self,id,seq,definition=None,rawinfo=None,rawparser=_default_raw_parser,**info):
+        '''
+        @param id:
+        @param seq:
+        @param definition:
+        '''
+        list.__init__(self,seq)
+        self._info = info
+        self.definition=definition
+        self.id=id
+        self._rawinfo=' ' + rawinfo
+        self._rawparser=rawparser
+    def getDefinition(self):
+        '''
+        Sequence definition getter
+            @return: the sequence definition
+            @rtype: str
+        '''
+        return self._definition
+    def setDefinition(self, value):
+        self._definition = value
+    def getId(self):
+        return self._id
+    def setId(self, value):
+        self._id = value
+    def getKey(self,key):
+        if key not in self._info:
+            p = re.compile(self._rawparser % key)
+            m = p.search(self._rawinfo)
+            if m is not None:
+                v=m.group(1)
+                self._rawinfo=' ' + self._rawinfo[0:m.start(0)]+self._rawinfo[m.end(0):]
+                try:
+                    v = eval(v)
+                except:
+                    pass
+                self._info[key]=v
+            else:
+                raise KeyError,key
+        else:
+            v=self._info[key]
+        return v
+    def __getitem__(self,key):
+        if isinstance(key,Location):
+            return key.extractSequence(self)
+        elif isinstance(key, str):
+            return self._getKey(key)
+        elif isinstance(key, int):
+            return list.__getitem__(self,key)
+        elif isinstance(key, slice):
+            subseq=list.__getitem__(self,key)
+            info = dict(self._info)
+            if key.start is not None:
+                start = key.start +1
+            else:
+                start = 1
+            if key.stop is not None:
+                stop = key.stop+1
+            else:
+                stop = len(self)
+            if key.step is not None:
+                step = key.step
+            else:
+                step = 1
+            info['cut']='[%d,%d,%s]' % (start,stop,step)
+            return QualitySequence(self.id, subseq, self.definition,self._rawinfo,self._rawparser,**info)
+        raise TypeError,'key must be an integer, a str or a slice'  
+    def __setitem__(self,key,value):
+        self._info[key]=value
+    def __delitem__(self,key):
+        if isinstance(key, str):
+            del self._info[key]
+        else:
+            raise TypeError,key
+    def __iter__(self):
+        return list.__iter__(self)
+    def __contains__(self,key):
+        return key in self._info
+    def getTags(self):
+        return self._info
+    def complement(self):
+        '''
+        '''
+        cseq = self[::-1]
+        rep = QualitySequence(self.id,cseq,self.definition,self._rawinfo,self._rawparser,**self._info)
+        rep._info['complemented']=not rep._info.get('complemented',False)
+        return rep
+    definition = property(getDefinition, setDefinition, None, "Sequence Definition")
+    id = property(getId, setId, None, 'Sequence identifier')
+def _qualityJoinSeq(seqarray):
+    text =  ' '.join([x.strip() for x in seqarray])
+    return [int(x) for x in text.split()]
+def qualityIterator(file):
+    for q in fastaIterator(file, QualitySequence, fnaTag, _qualityJoinSeq):
+        yield q
\ No newline at end of file
diff --git a/src/obitools/format/__init__.py b/src/obitools/format/__init__.py
new file mode 100644
index 0000000..a680505
--- /dev/null
+++ b/src/obitools/format/__init__.py
@@ -0,0 +1,28 @@
+from obitools import bioSeqGenerator
+from obitools.utils import universalOpen
+class SequenceFileIterator:
+    def __init__(self,inputfile,bioseqfactory=bioSeqGenerator):
+        self._inputfile = universalOpen(inputfile)
+        self._bioseqfactory = bioseqfactory
+    def get_inputfile(self):
+        return self.__file
+    def get_bioseqfactory(self):
+        return self.__bioseqfactory
+    def next(self):
+        entry = self.inputfile.next()
+        return self._parse(entry)
+    def __iter__(self):
+        return self
+    _inputfile = property(get_inputfile, None, None, "_file's docstring")
+    _bioseqfactory = property(get_bioseqfactory, None, None, "_bioseqfactory's docstring")
\ No newline at end of file
diff --git a/src/obitools/format/_format.pyx b/src/obitools/format/_format.pyx
new file mode 100644
index 0000000..5a7c7d2
--- /dev/null
+++ b/src/obitools/format/_format.pyx
@@ -0,0 +1,19 @@
+# cython: profile=True
+import sys
+from obitools.fasta import formatFasta
+#from obitools.ecopcr.sequence import EcoPCRDBSequenceWriter
+cpdef printOutput(options,seq,output=sys.stdout):
+    if options.output is not None:
+        r=options.output(seq)
+    elif options.outputFormater is not None:
+        r=options.outputFormater(seq,upper=options.uppercase)
+    else:
+        r=formatFasta(seq)
+    try:
+        output.write(r)
+        output.write("\n")
+    except IOError:
+        sys.exit(0)
diff --git a/src/obitools/format/genericparser/__init__.py b/src/obitools/format/genericparser/__init__.py
new file mode 100644
index 0000000..bdb2984
--- /dev/null
+++ b/src/obitools/format/genericparser/__init__.py
@@ -0,0 +1,219 @@
+G{packagetree format}
+import re
+from obitools.utils import universalOpen
+from _genericparser import genericEntryIteratorGenerator
+#def genericEntryIteratorGenerator(startEntry=None,endEntry=None,
+#                                  head=False,tail=False,
+#                                  strip=False,join=True):
+#    '''
+#    Transfome a text line iterator to an entry oriented iterator.
+#    This iterator converted is useful to implement first stage
+#    of flat file parsing.
+#    @param startEntry: a regular pattern matching the beginning of
+#                       an entry
+#    @type startEntry: C{str} or None
+#    @param endEntry:   a regular pattern matching the end of
+#                       an entry
+#    @type endEntry: C{str} or None
+#    @param head:       indicate if an header is present before
+#                       the first entry (as in many original genbank
+#                       files)
+#    @type head: C{bool}
+#    @param tail:       indicate if some extra informations are present 
+#                       after the last entry.
+#    @type tail: C{bool}
+#    @return: an iterator on entries in text format
+#    @rtype: an iterator on C{str}
+#    '''
+#    def isBeginning(line):
+#        return startEntry is None or startEntry.match(line) is not None
+#    def isEnding(line):
+#        return ((endEntry is not None and endEntry.match(line) is not None) or
+#                (endEntry is None and startEntry is not None and startEntry.match(line) is not None))
+#    def transparentIteratorEntry(file):
+#        file = universalOpen(file)
+#        return file
+#    def genericEntryIterator(file):
+#        file = universalOpen(file)
+#        entry = []
+#        line = file.next()
+#        started = head or isBeginning(line)
+#        try:
+#            while 1:
+#                while not started:
+#                    line = file.next()
+#                    started = isBeginning(line)
+#                if endEntry is None:
+#                    entry.append(line)
+#                    line = file.next()
+#                while started:
+#                    end = isEnding(line)
+#                    if end:
+#                        if endEntry is not None:
+#                            entry.append(line)
+#                        if join:
+#                            e = ''.join(entry)
+#                            if strip:
+#                                e=e.strip()
+#                        else:
+#                            e=entry
+#                            if strip:
+#                                e=[x.strip() for x in e]
+#                        entry=[]
+#                        yield e
+#                        started=False
+#                        if endEntry is not None:
+#                            line = file.next()
+#                    else:
+#                        entry.append(line)
+#                        line = file.next()
+#                started = isBeginning(line) 
+#        except StopIteration:
+#            if entry and (endEntry is None or tail):
+#                if join:
+#                    e = ''.join(entry)
+#                    if strip:
+#                        e=e.strip()
+#                else:
+#                    e=entry
+#                    if strip:
+#                        e=[x.strip() for x in e]
+#                yield e
+#    if startEntry is not None:
+#        startEntry = re.compile(startEntry)
+#    if endEntry is not None:
+#        endEntry = re.compile(endEntry)
+#    if startEntry is None and endEntry is None:
+#        return transparentIteratorEntry
+#    return genericEntryIterator
+class GenericParser(object):
+    def __init__(self,
+                 startEntry=None,
+                 endEntry=None,
+                 head=False,
+                 tail=False,
+                 strip=False,
+                 **parseAction):
+        """
+        @param startEntry: a regular pattern matching the beginning of
+                           an entry
+        @type startEntry: C{str} or None
+        @param endEntry:   a regular pattern matching the end of
+                           an entry
+        @type endEntry: C{str} or None
+        @param head:       indicate if an header is present before
+                           the first entry (as in many original genbank
+                           files)
+        @type head: C{bool}
+        @param tail:       indicate if some extra informations are present 
+                           after the last entry.
+        @type tail: C{bool}
+        @param parseAction:  
+        """
+        self.flatiterator= genericEntryIteratorGenerator(startEntry, 
+                                                         endEntry, 
+                                                         head, 
+                                                         tail,
+                                                         strip)
+        self.action={}
+        for k in parseAction:
+            self.addParseAction(k,*parseAction[k])
+    def addParseAction(self,name,dataMatcher,dataCleaner=None,cleanSub=''):
+        '''
+        Add a parse action to the generic parser. A parse action
+        allows to extract one information from an entry. A parse
+        action is defined by a name and a method to extract this 
+        information from the full text entry.
+        A parse action can be defined following two ways.
+            - via regular expression patterns
+            - via dedicated function.
+        In the first case, you have to indicate at least the
+        dataMatcher regular pattern. This pattern should match exactly
+        the data part you want to retrieve. If cleanning of extra 
+        characters is needed. The second pattern dataCLeanner can be
+        used to specifyed these characters.
+        In the second case you must provide a callable object (function)
+        that extract and clean data from the text entry. This function
+        should return an array containing all data retrevied even if 
+        no data or only one data is retrevied.
+        @summary: Add a parse action to the generic parser.
+        @param name: name of the data extracted
+        @type name:    C{str}
+        @param dataMatcher: a regular pattern matching the data
+                            or a callable object parsing the
+                            entry and returning a list of marched data
+        @type dataMatcher:  C{str} or C{SRE_Pattern} instance or a callable 
+                            object
+        @param dataCleaner: a regular pattern matching part of the data
+                            to suppress.
+        @type dataCleaner: C{str} or C{SRE_Pattern} instance or C{None}
+        @param cleanSub: string used to replace dataCleaner matches.
+                         Default is an empty string
+        @type cleanSub: C{str}
+        '''
+        if callable(dataMatcher):
+            self.action[name]=dataMatcher
+        else :
+            if isinstance(dataMatcher, str):
+                dataMatcher=re.compile(dataMatcher)
+            if isinstance(dataCleaner, str):
+                dataCleaner=re.compile(dataCleaner)
+            self.action[name]=self._buildREParser(dataMatcher,
+                                                 dataCleaner,
+                                                 cleanSub)
+    def _buildREParser(self,dataMatcher,dataCleaner,cleanSub):
+        def parser(data):
+            x = dataMatcher.findall(data)
+            if dataCleaner is not None:
+                x = [dataCleaner.sub(cleanSub,y) for y in x]
+            return x
+        return parser
+    def __call__(self,file):
+        for e in self.flatiterator(file):
+            pe = {'fullentry':e}
+            for k in self.action:
+                pe[k]=self.action[k](e)
+            yield pe
\ No newline at end of file
diff --git a/src/obitools/format/genericparser/_genericparser.pyx b/src/obitools/format/genericparser/_genericparser.pyx
new file mode 100644
index 0000000..b5062f2
--- /dev/null
+++ b/src/obitools/format/genericparser/_genericparser.pyx
@@ -0,0 +1,232 @@
+# cython: profile=True
+import re
+from obitools.utils import universalOpen
+cdef bint isBeginning(bytes line, object startEntry):
+    return startEntry is None or startEntry.match(line) is not None
+cdef bint isEnding(bytes line, object startEntry, object endEntry):
+    return ((endEntry is not None and endEntry.match(line) is not None) or
+            (endEntry is None and startEntry is not None and startEntry.match(line) is not None))
+def genericEntryIteratorGenerator(bytes startEntry=None,
+                                  bytes endEntry=None,
+                                  bint head=False,
+                                  bint tail=False,
+                                  bint strip=False,
+                                  bint join=True):
+    '''
+    Transfom a text line iterator to an entry oriented iterator.
+    This iterator converted is useful to implement first stage
+    of flat file parsing.
+    @param startEntry: a regular pattern matching the beginning of
+                       an entry
+    @type startEntry: C{str} or None
+    @param endEntry:   a regular pattern matching the end of
+                       an entry
+    @type endEntry: C{str} or None
+    @param head:       indicate if an header is present before
+                       the first entry (as in many original genbank
+                       files)
+    @type head: C{bool}
+    @param tail:       indicate if some extra informations are present 
+                       after the last entry.
+    @type tail: C{bool}
+    @return: an iterator on entries in text format
+    @rtype: an iterator on C{str}
+    '''
+    if startEntry is not None:
+        c_startEntry = re.compile(startEntry)
+    else:
+        c_startEntry = None
+    if endEntry is not None:
+        c_endEntry = re.compile(endEntry)
+    else:
+        c_endEntry = None
+    def transparentIteratorEntry(object f):
+        f = universalOpen(f)
+        return f
+    def genericEntryIterator(file):
+        cdef list entry = []
+        cdef bytes line
+        cdef bint started
+        if not hasattr(file, 'next'):
+            file = universalOpen(file)
+        line = file.next()
+        started = head or isBeginning(line,c_startEntry)
+        try:
+            while 1:
+                while not started:
+                    line = file.next()
+                    started = isBeginning(line,c_startEntry)
+                if endEntry is None:
+                    entry.append(line)
+                    line = file.next()
+                while started:
+                    end = isEnding(line,c_startEntry,c_endEntry)
+                    if end:
+                        if endEntry is not None:
+                            entry.append(line)
+                        if join:
+                            e = ''.join(entry)
+                            if strip:
+                                e=e.strip()
+                        else:
+                            e=entry
+                            if strip:
+                                e=[x.strip() for x in e]
+                        entry=[]
+                        yield e
+                        started=False
+                        if endEntry is not None:
+                            line = file.next()
+                    else:
+                        entry.append(line)
+                        line = file.next()
+                started = isBeginning(line,c_startEntry) 
+        except StopIteration:
+            if entry and (endEntry is None or tail):
+                if join:
+                    e = ''.join(entry)
+                    if strip:
+                        e=e.strip()
+                else:
+                    e=entry
+                    if strip:
+                        e=[x.strip() for x in e]
+                yield e
+    if startEntry is None and endEntry is None:
+        return transparentIteratorEntry
+    return genericEntryIterator
+class GenericParser(object):
+    def __init__(self,
+                 startEntry=None,
+                 endEntry=None,
+                 head=False,
+                 tail=False,
+                 strip=False,
+                 **parseAction):
+        """
+        @param startEntry: a regular pattern matching the beginning of
+                           an entry
+        @type startEntry: C{str} or None
+        @param endEntry:   a regular pattern matching the end of
+                           an entry
+        @type endEntry: C{str} or None
+        @param head:       indicate if an header is present before
+                           the first entry (as in many original genbank
+                           files)
+        @type head: C{bool}
+        @param tail:       indicate if some extra informations are present 
+                           after the last entry.
+        @type tail: C{bool}
+        @param parseAction:  
+        """
+        self.flatiterator= genericEntryIteratorGenerator(startEntry, 
+                                                         endEntry, 
+                                                         head, 
+                                                         tail,
+                                                         strip)
+        self.action={}
+        for k in parseAction:
+            self.addParseAction(k,*parseAction[k])
+    def addParseAction(self,name,dataMatcher,dataCleaner=None,cleanSub=''):
+        '''
+        Add a parse action to the generic parser. A parse action
+        allows to extract one information from an entry. A parse
+        action is defined by a name and a method to extract this 
+        information from the full text entry.
+        A parse action can be defined following two ways.
+            - via regular expression patterns
+            - via dedicated function.
+        In the first case, you have to indicate at least the
+        dataMatcher regular pattern. This pattern should match exactly
+        the data part you want to retrieve. If cleanning of extra 
+        characters is needed. The second pattern dataCLeanner can be
+        used to specifyed these characters.
+        In the second case you must provide a callable object (function)
+        that extract and clean data from the text entry. This function
+        should return an array containing all data retrevied even if 
+        no data or only one data is retrevied.
+        @summary: Add a parse action to the generic parser.
+        @param name: name of the data extracted
+        @type name:    C{str}
+        @param dataMatcher: a regular pattern matching the data
+                            or a callable object parsing the
+                            entry and returning a list of marched data
+        @type dataMatcher:  C{str} or C{SRE_Pattern} instance or a callable 
+                            object
+        @param dataCleaner: a regular pattern matching part of the data
+                            to suppress.
+        @type dataCleaner: C{str} or C{SRE_Pattern} instance or C{None}
+        @param cleanSub: string used to replace dataCleaner matches.
+                         Default is an empty string
+        @type cleanSub: C{str}
+        '''
+        if callable(dataMatcher):
+            self.action[name]=dataMatcher
+        else :
+            if isinstance(dataMatcher, str):
+                dataMatcher=re.compile(dataMatcher)
+            if isinstance(dataCleaner, str):
+                dataCleaner=re.compile(dataCleaner)
+            self.action[name]=self._buildREParser(dataMatcher,
+                                                 dataCleaner,
+                                                 cleanSub)
+    def _buildREParser(self,dataMatcher,dataCleaner,cleanSub):
+        def parser(data):
+            x = dataMatcher.findall(data)
+            if dataCleaner is not None:
+                x = [dataCleaner.sub(cleanSub,y) for y in x]
+            return x
+        return parser
+    def __call__(self,file):
+        for e in self.flatiterator(file):
+            pe = {'fullentry':e}
+            for k in self.action:
+                pe[k]=self.action[k](e)
+            yield pe
diff --git a/src/obitools/format/ontology/__init__.py b/src/obitools/format/ontology/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/obitools/format/ontology/go_obo.py b/src/obitools/format/ontology/go_obo.py
new file mode 100644
index 0000000..cd1d87e
--- /dev/null
+++ b/src/obitools/format/ontology/go_obo.py
@@ -0,0 +1,274 @@
+__docformat__ = 'restructuredtext'
+import re
+import string
+import textwrap
+from obitools.obo.go.parser import GOEntryIterator
+from obitools.obo.go.parser import GOTerm
+from obitools.obo.go.parser import GOEntry
+go_obo.py : gene_ontology_edit.obo  file parser:
+- OBOFile class: open a flat file and return an entry.
+class OBOFile(object):
+    """
+        Iterator over all entries of an OBO file
+    """
+    def __init__(self,_path):
+        self.file = GOEntryIterator(_path)
+    def __iter__(self):
+        return self
+    def next(self):
+        fiche = self.file.next()
+        if isinstance(fiche, GOTerm):
+            self.isaterm=True
+            return Term(fiche)
+        elif isinstance(fiche, GOEntry):
+            self.isaterm=False
+            return Entry(fiche)
+        else:
+            self.isaterm=False
+            return Header(fiche)
+############# tout le reste doit descendre a l'etage obitools/ogo/go/parser.py ##########
+# define an XRef into a go_obo.py script in the microbi pylib
+class Xref(object):
+    """
+    Class Xref
+        Xref.db    Xref database
+        Xref.id    Xref identifier
+    """
+    def __init__(self,description):
+        data = description.split(':')
+        self.db = data[0].strip()
+        self.id = data[1].strip()
+# define a RelatedTerm into a go_obo.py script in the microbi pylib
+class RelatedTerm(object):
+    """
+    Class RelatedTerm
+        RelatedTerm.relation    RelatedTerm relation
+        RelatedTerm.related_term    RelatedTerm GO identifier
+        RelatedTerm.comment    all terms have 0 or 1 comment
+    """
+    def __init__(self,relation,value,comment):
+        self.relation = relation
+        self.related_term = value.strip('GO:')
+        self.comment = comment
+# define into a go_obo.py script in the microbi pylib
+#class Term(object):
+#    """
+#    class representing an OBO term (entry).
+#    """
+#    def __init__(self):
+#      raise RuntimeError('biodb.go_obo is an abstract class')
+#    def __checkEntry__(self):
+#      minimum=(hasattr(self,'goid') )
+#      if not minimum:
+#        raise AssertionError('Misconstructed GO Term instance %s' % [x for x in dir(self) if x[0]!='_'])
+class Term(object):
+    """
+    Class Term
+        representing a GO term.
+    """
+    def __init__(self,data=None):
+        """
+        """
+        self.data=data
+        self.isaterm = True
+        if data:
+            self.__filtreGoid__()
+            self.__filtreName__()
+            self.__filtreComment__()
+            self.__filtreSynonyms__()
+            self.__filtreDef__()
+            self.__filtreParents__()
+            self.__filtreRelationships__()
+            self.__filtreRelation__()
+            self.__filtreObsolete__()
+            self.__filtreAltIds__()
+            self.__filtreXRefs__()
+            self.__filtreSubsets__()
+        # check if all required attributes were valued
+        self.__checkEntry__()
+    def __checkEntry__(self):
+      minimum=(hasattr(self,'goid') )
+      if not minimum:
+        raise AssertionError('Misconstructed GO Term instance %s' % [x for x in dir(self) if x[0]!='_'])
+    def __filtreGoid__(self):
+        """
+        Extract GO id.
+        """
+        self.goid = self.data.id.value.strip('GO:')
+    def __filtreName__(self):
+        """
+        Extract GO name.
+        """
+        self.name = self.data.name.value
+    def __filtreSynonyms__(self):
+        """
+        Extract GO synonym(s).
+        """
+        self.list_synonyms = {}
+        if self.data.synonyms:
+            for y in self.data.synonyms:
+                self.list_synonyms[y.value] = y.scope
+    def __filtreComment__(self):
+        """
+            manage None comments
+        """
+        if self.data.comment != None:
+            self.comment = self.data.comment.value
+        else:
+            self.comment = ""
+    def __filtreDef__(self):
+        """
+            Extract GO definition.
+        """
+        if self.data.definition != None:
+            self.definition = self.data.definition.value
+        else:
+            self.definition = ""
+    def __filtreParents__(self):
+        """
+            To make the is_a hierarchy
+        """
+        if self.data.is_a != None:
+            self.is_a = set([isa.value.strip('GO:') for isa in self.data.is_a])
+        else:
+            self.is_a = set()    
+    def __filtreRelation__(self):
+        """
+            To make the part_of hierarchy
+        """
+        self.part_of = set()
+        self.regulates = set()
+        self.negatively_regulates = set()
+        self.positively_regulates = set()
+        if self.data.relationship != None:
+            for rel in self.data.relationship:
+                if rel.relationship == "part_of":
+                    self.part_of.add(rel.value.strip('GO:'))
+                elif rel.relationship == "regulates":
+                    self.regulates.add(rel.value.strip('GO:'))
+                elif rel.relationship == "negatively_regulates":
+                    self.negatively_regulates.add(rel.value.strip('GO:'))
+                elif rel.relationship == "positively_regulates":
+                    self.positively_regulates.add(rel.value.strip('GO:'))
+    def __filtreRelationships__(self):
+        """
+            Relation list with other GO Terms (is_a, part_of or some regulates relation)
+        """
+        self.related_term =[]
+        if self.data.relationship != None:
+            for x in self.data.relationship:
+                self.related_term.append(RelatedTerm(x.relationship,x.value,x.__doc__))
+                #self.related_term.append(RelatedTerm(x.relationship,x.value,x.comment))
+        if self.data.is_a != None:
+            for x in self.data.is_a:
+                self.related_term.append(RelatedTerm('is_a',x.value,x.__doc__))
+                #self.related_term.append(RelatedTerm('is_a',x.value,x.comment))
+    def __filtreObsolete__(self):
+        """
+            for each obsolete terms corresponds a set of GO Identifiers
+            so that this GO term is consider as others GO Terms
+        """
+        self.considers = set()
+        self.replaces = set()
+        self.is_obsolete = self.data.is_obsolete
+        if self.data.is_obsolete:
+            if self.data.consider:
+                self.considers = set([considered.value.strip('GO:') for considered in self.data.consider])
+            if self.data.replaced_by:
+                self.replaces = set([replaced.value.strip('GO:') for replaced in self.data.replaced_by])
+    def __filtreAltIds__(self):
+        """
+            alternate(s) id(s) for this term (= alias in the geneontology schema model!)
+        """
+        if self.data.alt_ids:
+            self.alt_ids = set([x.value.strip('GO:') for x in self.data.alt_ids])
+        else:
+            self.alt_ids = set()
+    def __filtreXRefs__(self):
+        """
+            cross references to other databases
+        """
+        self.xrefs = set()
+        if self.data.xrefs:    
+            self.xrefs = set([Xref(x.value.reference) for x in self.data.xrefs])
+    def __filtreSubsets__(self):
+        """
+            subset label to make smaller sets of GO Terms
+        """
+        self.subsets = set()
+        if self.data.subsets:
+            self.subsets = set([x.value for x in self.data.subsets])
+class Entry(object):
+    """
+        a Stanza entry, like [Typedef] for example
+    """
+    def __init__(self,data=None):
+        self.data=data
+        self.isaterm=False
+        self.isanentry=True
+class Header(object):
+    """
+        class representing a GO header.
+    """
+    def __init__(self,data=None):
+        """
+        """
+        self.data=data
+        self.isaterm = False
diff --git a/src/obitools/format/options.py b/src/obitools/format/options.py
new file mode 100644
index 0000000..f7ca1ec
--- /dev/null
+++ b/src/obitools/format/options.py
@@ -0,0 +1,375 @@
+Created on 13 oct. 2009
+ at author: coissac
+from obitools.format.sequence.embl import emblIterator
+from obitools.format.sequence.genbank import genbankIterator
+from obitools.format.sequence.fnaqual import fnaFastaIterator
+from obitools.format.sequence.fasta import fastaAAIterator, fastaNucIterator, fastFastaIterator
+from obitools.format.sequence.fastq import fastFastqIlluminaIterator,fastFastqSolexaIterator
+from obitools.fastq import fastFastqSangerIterator
+from obitools.fnaqual.quality import qualityIterator
+from obitools.ecopcr.sequence import EcoPCRDBSequenceIterator
+from obitools.fasta import formatFasta, rawFastaIterator,\
+                           formatSAPFastaGenerator
+from obitools.fastq import formatFastq
+from obitools.ecopcr.sequence import EcoPCRDBSequenceWriter
+from cPickle import dump,load,UnpicklingError
+#from obitools.format._format import printOutput
+from array import array
+from itertools import chain
+import sys
+import re
+from obitools.ecopcr import EcoPCRFile
+from obitools.format.sequence import skipOnErrorIterator, skipfirst, only
+from obitools import BioSequence
+from obitools.utils import FakeFile
+def binarySequenceIterator(lineiterator):    
+    f = FakeFile(lineiterator)
+    try:
+        while(1):
+            try:
+                s = load(f)
+                yield s
+            except UnpicklingError:
+                pass
+    except EOFError:
+        raise StopIteration
+def addInputFormatOption(optionManager):
+    group = optionManager.add_option_group("Restriction to a sub-part options",
+                    "Allow to limit analysis to a sub-part of the data file")
+    group.add_option('--skip',
+                     action="store", dest="skip",
+                     metavar='<N>',
+                     default=None,
+                     type='int',
+                     help="skip the N first sequences")
+    group.add_option('--only',
+                     action="store", dest="only",
+                     metavar='<N>',
+                     default=None,
+                     type='int',
+                     help="treat only N sequences")
+    group = optionManager.add_option_group("Input format options",
+                    "If not specified, a test is done to determine the file format")
+    group.add_option('--genbank',
+                     action="store_const", dest="seqinformat",
+                     default=None,
+                     const='genbank',
+                     help="Input file is in genbank format")
+    group.add_option('--embl',
+                     action="store_const", dest="seqinformat",
+                     default=None,
+                     const='embl',
+                     help="Input file is in embl format")
+    group.add_option('--skip-on-error',
+                     action="store_true", dest="skiperror",
+                     default=False,
+                     help="Skip sequence entries with parse error")
+    group.add_option('--fasta',
+                     action="store_const", dest="seqinformat",
+                     default=None,
+                     const='fasta',
+                     help="Input file is in fasta nucleic format (including obitools fasta extentions)")
+    group.add_option('--ecopcr',
+                     action="store_const", dest="seqinformat",
+                     default=None,
+                     const='ecopcr',
+                     help="Input file is in ecopcr format")
+    group.add_option('--raw-fasta',
+                     action="store_const", dest="seqinformat",
+                     default=None,
+                     const='rawfasta',
+                     help="Input file is in fasta format (but more tolerant to format variant)")
+#    group.add_option('--fna',
+#                     action="store_const", dest="seqinformat",
+#                     default=None,
+#                     const='fna',
+#                     help="input file is in fasta nucleic format produced by 454 sequencer pipeline")
+#    group.add_option('--qual',
+#                     action="store", dest="withqualfile",
+#                     type='str',
+#                     default=None,
+#                     help="Specify the name of a quality file produced by 454 sequencer pipeline")
+    group.add_option('--sanger',
+                     action="store_const", dest="seqinformat",
+                     default=None,
+                     const='sanger',
+                     help="Input file is in sanger fastq nucleic format (standard fastq)")
+    group.add_option('--solexa',
+                     action="store_const", dest="seqinformat",
+                     default=None,
+                     const='solexa',
+                     help="Input file is in fastq nucleic format produced by solexa sequencer")
+    #===========================================================================
+    # group.add_option('--illumina',
+    #                         action="store_const", dest="seqinformat",
+    #                         default=None,
+    #                         const='illumina',
+    #                         help="input file is in fastq nucleic format produced by old solexa sequencer")
+    #===========================================================================
+    group.add_option('--ecopcrdb',
+                      action="store_const", dest="seqinformat",
+                      default=None,
+                      const='ecopcrdb',
+                      help="Input file is an ecopcr database")
+    group.add_option('--nuc',
+                     action="store_const", dest="moltype",
+                     default=None,
+                     const='nuc',
+                     help="Input file contains nucleic sequences")
+    group.add_option('--prot',
+                     action="store_const", dest="moltype",
+                     default=None,
+                     const='pep',
+                     help="Input file contains protein sequences")
+def addOutputFormatOption(optionManager):
+    group = optionManager.add_option_group("Output format options")
+#    optionManager.add_option('-B','--bin-output',
+#                             action="store_const", dest="output",
+#                             default=None,
+#                             const=dump,
+#                             help="output sequences in binary format")
+    group.add_option('--fasta-output',
+                             action="store_const", dest="output",
+                             default=None,
+                             const=formatFasta,
+                             help="Output sequences in obitools fasta format")
+    group.add_option('--fastq-output',
+                             action="store_const", dest="output",
+                             default=None,
+                             const=formatFastq,
+                             help="Output sequences in sanger fastq format")
+#    group.add_option('--sap-output',
+#                             action="store_const", dest="output",
+#                             default=None,
+#                             const=formatSAPFastaGenerator,
+#                             help="Output sequences in sap fasta format "
+#                                  "(Sequence must have a taxid and a taxonomy has to be loaded)")
+    group.add_option('--ecopcrdb-output',
+                             action="store", dest="ecopcroutput",
+                             default=None,
+                             help="Output sequences in ecopcr database format "
+                                  "(sequence records are not printed on standard output)")
+    group.add_option('--uppercase',
+                             action='store_true',dest='uppercase',
+                             default=False,
+                             help="Print sequences in upper case (default is lower case)")
+def addInOutputOption(optionManager):
+    addInputFormatOption(optionManager)
+    addOutputFormatOption(optionManager)
+def autoEntriesIterator(options):
+    options.outputFormater=formatFasta
+    options.outputFormat="fasta"
+    ecopcr_pattern = re.compile('^[^ ]+ +| +[0-9]+ +| + [0-9]+ + | +')
+    def annotatedIterator(formatIterator):
+        options.outputFormater=formatFasta
+        options.outputFormat="fasta"
+        def iterator(lineiterator):
+            for s in formatIterator(lineiterator):
+                s.extractTaxon()
+                yield s
+        return iterator
+    def withQualIterator(qualityfile):
+        options.outputFormater=formatFastq
+        options.outputFormat="fastq"
+        def iterator(lineiterator):
+            for s in fnaFastaIterator(lineiterator):
+                q = qualityfile.next()
+                quality = array('d',(10.**(-x/10.) for x in q))
+                s.quality=quality
+                yield s
+        return iterator
+    def autoSequenceIterator(lineiterator):
+        options.outputFormater=formatFasta
+        options.outputFormat="fasta"
+        first = lineiterator.next()
+        if first[0]==">":
+#            if options.withqualfile is not None:
+#                qualfile=qualityIterator(options.withqualfile)
+#                reader=withQualIterator(qualfile)
+#                options.outputFormater=formatFastq
+#                options.outputFormat="fastq"
+            if options.moltype=='nuc':
+                reader=fastaNucIterator
+            elif options.moltype=='pep':
+                reader=fastaAAIterator
+            else:
+                reader=fastFastaIterator
+        elif first[0]=='@':
+            reader=fastFastqSangerIterator
+            options.outputFormater=formatFastq
+            options.outputFormat="fastq"
+        elif first[0:3]=='ID ':
+            reader=emblIterator
+        elif first[0:6]=='LOCUS ':
+            reader=genbankIterator
+        elif first[0:8]=="#!Pickle":
+            reader=binarySequenceIterator
+        elif first[0]=="#" or ecopcr_pattern.search(first):
+            reader=EcoPCRFile 
+        else:
+            raise AssertionError,'file is not in fasta, fasta, embl, genbank or ecoPCR format'
+        if reader==binarySequenceIterator:
+            input = binarySequenceIterator(lineiterator)  # @ReservedAssignment
+        else:
+            input = reader(chain([first],lineiterator))  # @ReservedAssignment
+        return input
+    if options.seqinformat is None:
+        reader = autoSequenceIterator
+    else:
+        if options.seqinformat=='fasta':
+            if options.moltype=='nuc':
+                reader=fastaNucIterator
+            elif options.moltype=='pep':
+                reader=fastaAAIterator
+            else:
+                reader=fastFastaIterator
+        elif options.seqinformat=='rawfasta':
+            reader=annotatedIterator(rawFastaIterator)
+        elif options.seqinformat=='genbank':
+            reader=annotatedIterator(genbankIterator)
+        elif options.seqinformat=='embl':
+            reader=annotatedIterator(emblIterator)
+        elif options.seqinformat=='fna':
+            reader=fnaFastaIterator
+        elif options.seqinformat=='sanger':
+            options.outputFormater=formatFastq
+            options.outputFormat="fastq"
+            reader=fastFastqSangerIterator
+        elif options.seqinformat=='solexa':
+            options.outputFormater=formatFastq
+            options.outputFormat="fastq"
+            reader=fastFastqSolexaIterator
+        elif options.seqinformat=='illumina':
+            options.outputFormater=formatFastq
+            options.outputFormat="fastq"
+            reader=fastFastqIlluminaIterator
+        elif options.seqinformat=='ecopcr':
+            reader=EcoPCRFile
+        elif options.seqinformat=='ecopcrdb':
+            reader=EcoPCRDBSequenceIterator
+        if options.seqinformat=='fna' and options.withqualfile is not None:
+            qualfile=qualityIterator(options.withqualfile)
+            reader=withQualIterator(qualfile)
+            options.outputFormater=formatFastq
+            options.outputFormat="fastq"
+    if options.skiperror:
+        reader = skipOnErrorIterator(reader)
+    if hasattr(options, 'skip') and options.skip is not None:
+        print >>sys.stderr,"Skipping %d sequences" % options.skip
+        reader = skipfirst(reader,options.skip)
+    if hasattr(options, 'only') and options.only is not None:
+        print >>sys.stderr,"Analysing only %d sequences" % options.only
+        reader = only(reader,options.only)
+    return reader
+def sequenceWriterGenerator(options,output=sys.stdout):
+    class SequenceWriter:
+        def __init__(self,options,file=sys.stdout):  # @ReservedAssignment
+            self._format=None
+            self._file=file
+            self._upper=options.uppercase
+        def put(self,seq):
+            if self._format is None:
+                self._format=formatFasta
+                if options.output is not None:
+                    self._format=options.output
+                    if self._format is formatSAPFastaGenerator:
+                        self._format=formatSAPFastaGenerator(options)
+                elif options.outputFormater is not None:
+                    self._format=options.outputFormater
+            if hasattr(seq,'_hasTaxid') and seq._hasTaxid:
+                seq.extractTaxon()
+            s = self._format(seq,upper=self._upper)
+            try:
+                self._file.write(s)
+                self._file.write("\n")
+            except IOError:
+                sys.exit(0)
+    class BinaryWriter:
+        def __init__(self,options,file=sys.stdout):  # @ReservedAssignment
+            self._file=file
+            self._file.write("#!Pickle\n")
+        def put(self,seq):
+            try:
+                if isinstance(seq, BioSequence):
+                        dump(seq,self._file,protocol=2)
+                else:
+                    for s in seq:
+                        dump(s,self._file,protocol=2)
+            except IOError:
+                sys.exit(0)
+    if options.ecopcroutput is not None:
+        writer=EcoPCRDBSequenceWriter(options)
+    elif options.output==dump:
+        writer=BinaryWriter(options,output)
+    else:
+        writer=SequenceWriter(options,output)
+    def sequenceWriter(sequence):
+        writer.put(sequence)
+    return sequenceWriter
\ No newline at end of file
diff --git a/src/obitools/format/sequence/__init__.py b/src/obitools/format/sequence/__init__.py
new file mode 100644
index 0000000..9c3d8eb
--- /dev/null
+++ b/src/obitools/format/sequence/__init__.py
@@ -0,0 +1,69 @@
+from obitools.fasta import fastFastaIterator
+from obitools.fastq import fastqSangerIterator
+from obitools.seqdb.embl.parser import emblIterator
+from obitools.seqdb.genbank.parser import genbankIterator
+from itertools import chain
+from obitools.utils import universalOpen
+import sys
+def skipOnErrorIterator(seqIterator):
+    def internal(inputdata):
+        si = seqIterator(inputdata)
+        while(1):
+            try:
+                seq = si.next()
+                yield seq
+            except Exception,e:
+                print >>sys.stderr,"coucou"
+                if isinstance(e,StopIteration):
+                    raise e
+                else:
+                    continue
+    return internal
+def skipfirst(seqIterator,n):
+    def internal(inputdata):
+        si = seqIterator(inputdata)
+        c=0
+        for seq in si:            
+            c+=1
+            if c > n:
+                yield seq
+        print >>sys.stderr
+    return internal
+def only(seqIterator,n):
+    def internal(inputdata):
+        si = seqIterator(inputdata)
+        c=0
+        for seq in si:            
+            if c < n:
+                yield seq
+            else:
+                break
+            c+=1
+        print >>sys.stderr
+    return internal
+def autoSequenceIterator(file):
+    lineiterator = universalOpen(file)
+    first = lineiterator.next()
+    if first[0]==">":
+            reader=fastFastaIterator
+    elif first[0]=='@':
+        reader=fastqSangerIterator
+    elif first[0:3]=='ID ':
+        reader=emblIterator
+    elif first[0:6]=='LOCUS ':
+        reader=genbankIterator
+    else:
+        raise AssertionError,'file is not in fasta, fasta, embl, or genbank format'
+    input = reader(chain([first],lineiterator))
+    return input
diff --git a/src/obitools/format/sequence/embl.py b/src/obitools/format/sequence/embl.py
new file mode 100644
index 0000000..f59f14a
--- /dev/null
+++ b/src/obitools/format/sequence/embl.py
@@ -0,0 +1,2 @@
+from obitools.seqdb.embl.parser import emblIterator,emblParser
diff --git a/src/obitools/format/sequence/fasta.py b/src/obitools/format/sequence/fasta.py
new file mode 100644
index 0000000..74a55f3
--- /dev/null
+++ b/src/obitools/format/sequence/fasta.py
@@ -0,0 +1,4 @@
+from obitools.fasta import fastFastaIterator,fastaIterator,fastaParser
+from obitools.fasta import fastaAAIterator,fastaAAParser
+from obitools.fasta import fastaNucIterator,fastaNucParser
+from obitools.fasta import formatFasta
diff --git a/src/obitools/format/sequence/fastq.py b/src/obitools/format/sequence/fastq.py
new file mode 100644
index 0000000..9addf61
--- /dev/null
+++ b/src/obitools/format/sequence/fastq.py
@@ -0,0 +1,16 @@
+Created on 15 janv. 2010
+ at author: coissac
+from obitools.fastq import fastqIterator,fastqParserGenetator
+from obitools.fastq import fastqSangerIterator,fastqSolexaIterator, \
+                           fastqIlluminaIterator
+from obitools.fastq import fastFastqIterator,fastFastqParserGenetator
+from obitools.fastq import fastFastqSangerIterator,fastFastqSolexaIterator, \
+                           fastFastqIlluminaIterator
+from obitools.fastq import fastqAAIterator
+from obitools.fastq import formatFastq
diff --git a/src/obitools/format/sequence/fnaqual.py b/src/obitools/format/sequence/fnaqual.py
new file mode 100644
index 0000000..ab69916
--- /dev/null
+++ b/src/obitools/format/sequence/fnaqual.py
@@ -0,0 +1,8 @@
+Created on 12 oct. 2009
+ at author: coissac
+from obitools.fnaqual.fasta import fnaFastaIterator
+from obitools.fnaqual.quality import qualityIterator
diff --git a/src/obitools/format/sequence/genbank.py b/src/obitools/format/sequence/genbank.py
new file mode 100644
index 0000000..8524b6f
--- /dev/null
+++ b/src/obitools/format/sequence/genbank.py
@@ -0,0 +1,4 @@
+from obitools.seqdb.genbank.parser import genpepIterator,genpepParser
+from obitools.seqdb.genbank.parser import genbankIterator,genbankParser
diff --git a/src/obitools/format/sequence/tagmatcher.py b/src/obitools/format/sequence/tagmatcher.py
new file mode 100644
index 0000000..60ad8d8
--- /dev/null
+++ b/src/obitools/format/sequence/tagmatcher.py
@@ -0,0 +1,5 @@
+from obitools.tagmatcher.parser import tagMatcherParser
+from obitools.tagmatcher.parser import TagMatcherIterator
+from obitools.tagmatcher.parser import formatTagMatcher
diff --git a/src/obitools/goa/__init__.py b/src/obitools/goa/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/obitools/goa/parser.py b/src/obitools/goa/parser.py
new file mode 100644
index 0000000..8ffd1e3
--- /dev/null
+++ b/src/obitools/goa/parser.py
@@ -0,0 +1,33 @@
+from itertools import imap
+from obitools import utils
+class GoAFileIterator(utils.ColumnFile):
+    def __init__(self,stream):
+        utils.ColumnFile.__init__(self,
+                                  stream, '\t', True, 
+                                  (str,))
+    _colname = ['database',
+                'ac',
+                'symbol',
+                'qualifier',
+                'goid',
+                'origin',
+                'evidence',
+                'evidnce_origine',
+                'namespace',
+                'db_object_name',
+                'gene',
+                'object_type',
+                'taxid',
+                'date',
+                'assigned_by']
+    def next(self):
+        data = utils.ColumnFile.next(self)
+        data = dict(imap(None,GoAFileIterator._colname,data))
+        return data
diff --git a/src/obitools/graph/__init__.py b/src/obitools/graph/__init__.py
new file mode 100644
index 0000000..2d34fd9
--- /dev/null
+++ b/src/obitools/graph/__init__.py
@@ -0,0 +1,1016 @@
+**obitool.graph** for representing graph structure in obitools
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+This module offert classes to manipulate graphs, mainly trough the
+:py:class:`obitools.graph.Graph` class.
+.. inheritance-diagram:: Graph DiGraph UndirectedGraph
+    :parts: 2
+import sys
+from obitools.utils import progressBar
+class Indexer(dict):
+    '''
+    Allow to manage convertion between an arbitrarly hashable python
+    value and an unique integer key 
+    '''
+    def __init__(self):
+        self.__max=0
+        self.__reverse=[]
+    def getLabel(self,index):
+        '''
+        Return the python value associated to an integer index.
+        :param index: an index value
+        :type index: int
+        :raises: IndexError if the index is not used in this 
+                           Indexer instance
+        '''
+        return self.__reverse[index]
+    def getIndex(self,key,strict=False):
+        '''
+        Return the index associated to a **key** in the indexer. Two
+        modes are available :
+            - strict mode  :
+                if the key is not known by the :py:class:`Indexer` instance
+                a :py:exc:`KeyError` exception is raised.
+            - non strict mode :
+                in this mode if the requested *key** is absent, it is added to
+                the :py:class:`Indexer` instance and the new index is returned
+        :param key: the requested key
+        :type key: a hashable python value
+        :param strict: select the looking for mode
+        :type strict: bool
+        :return: the index corresponding to the key
+        :rtype: int
+        :raises: - :py:exc:`KeyError` in strict mode is key is absent 
+                   of the :py:class:`Indexer` instance
+                 - :py:exc:`TypeError` if key is not an hashable value.
+        '''
+        if dict.__contains__(self,key):
+            return dict.__getitem__(self,key)
+        elif strict:
+            raise KeyError,key
+        else:
+            value = self.__max
+            self[key]= value
+            self.__reverse.append(key)
+            self.__max+=1
+            return value
+    def __getitem__(self,key):
+        '''
+        Implement the [] operateor to emulate the standard dictionnary
+        behaviour on :py:class:`Indexer` and returns the integer key 
+        associated to a python value.
+        Actually this method call the:py:meth:`getIndex` method in
+        non strict mode so it only raises an :py:exc:`TypeError` 
+        if key is not an hashable value.
+        :param key: the value to index
+        :type key: an hashable python value
+        :return: an unique integer value associated to the key
+        :rtype: int
+        :raises: :py:exc:`TypeError` if **key** is not an hashable value.
+        '''
+        return self.getIndex(key)
+    def __equal__(self,index):
+        '''
+        Implement equal  operator **==** for comparing two :py:class:`Indexer` instances. 
+        Two :py:class:`Indexer` instances are equals only if they are physically 
+        the same instance
+        :param index: the second Indexer
+        :type index: an :py:class:`Indexer` instance
+        :return: True is the two :py:class:`Indexer` instances are the same
+        :rtype: bool
+        '''
+        return id(self)==id(index)
+class Graph(object):
+    '''
+    Class used to represent directed or undirected graph.
+    .. warning::
+        Only one edge can connect two nodes in a given direction.
+    .. warning::
+        Specifying nodes through their index seepud your code but as no check
+        is done on index value, it may result in inconsistency. So prefer the
+        use of node label to specify a node.
+    '''
+    def __init__(self,label='G',directed=False,indexer=None,nodes=None,edges=None):
+        '''
+        :param label: Graph name, set to 'G' by default
+        :type label: str
+        :param directed: true for directed graph, set to False by defalt
+        :type directed: boolean
+        :param indexer: node label indexer. This allows to define several graphs
+                        sharing the same indexer (see : :py:meth:`newEmpty`)
+        :type indexer: :py:class:`Indexer` 
+        :param nodes: set of nodes to add to the graph
+        :type nodes: iterable value
+        :param edges: set of edges to add to the graph
+        :type edges: iterable value
+        '''
+        self._directed=directed
+        if indexer is None:
+            indexer = Indexer()
+        self._index = indexer
+        self._node = {}
+        self._node_attrs = {} 
+        self._edge_attrs = {}
+        self._label=label
+    def newEmpty(self):
+        """
+        Build a new empty graph using the same :py:class:`Indexer` instance.
+        This allows two graph for sharing their vertices through their indices.
+        """
+        n = Graph(self._label+"_compact",self._directed,self._index)
+        return n
+    def addNode(self,node=None,index=None,**data):
+        '''
+        Add a new node or update an existing one.
+        :param node: the new node label or the label of an existing node
+                     for updating it.
+        :type node:  an hashable python value
+        :param index: the index of an existing node for updating it.
+        :type index: int
+        :return: the index of the node
+        :rtype: int
+        :raises: :py:exc:`IndexError` is index is not **None** and 
+                 corresponds to a not used index in this graph.
+        '''
+        if index is None:
+            index = self._index[node]
+        else:
+            if index >= len(self._index):
+                raise IndexError,"This index is not used in this graph..."
+        if index not in self._node:
+            self._node[index]=set()
+        if data:
+            if index in self._node_attrs:
+                self._node_attrs[index].update(data)
+            else:
+                self._node_attrs[index]=dict(data)
+        return index
+    def __contains__(self,node):
+        try:
+            index = self._index.getIndex(node,strict=True)
+            r = index in self._node
+        except KeyError:
+            r=False
+        return r
+    def getNode(self,node=None,index=None):
+        """
+        :param node: a node label.
+        :type node:  an hashable python value
+        :param index: the index of an existing node.
+        :type index: int
+        .. note:: Index value are prevalent over node label.
+        :return: the looked for node
+        :rtype: :py:class:`Node`
+        :raises: :py:exc:`IndexError` if specified node lablel
+                 corresponds to a non-existing node.
+        .. warning:: no check on index value
+        """
+        if index is None:
+            index = self._index.getIndex(node, True)
+        return Node(index,self)
+    def getBestNode(self,estimator):
+        '''
+        Select the node maximizing the estimator function
+        :param estimator: the function to maximize
+        :type estimator: a function returning a numerical value and accepting one
+                         argument of type :py:class:`Node`
+        :return: the best node
+        :rtype: py:class:`Node`
+        '''
+        bestScore=0
+        best=None
+        for n in self:
+            score = estimator(n)
+            if best is None or score > bestScore:
+                bestScore = score
+                best=n
+        return best
+    def delNode(self,node=None,index=None):
+        """
+        Delete a node from a graph and all associated edges.
+        :param node: a node label.
+        :type node:  an hashable python value
+        :param index: the index of an existing node.
+        :type index: int
+        .. note:: Index value are prevalent over node label.
+        :raises: :py:exc:`IndexError` if specified node lablel
+                 corresponds to a non-existing node.
+        .. warning:: no check on index value
+        """
+        if index is None:
+            index = self._index[node]
+        #
+        # Remove edges pointing to the node 
+        #
+        for n in self._node:
+            if n!=index:
+                e = self._node[n]
+                if index in e:
+                    if (n,index) in self._edge_attrs:
+                        del self._edge_attrs[(n,index)]
+                    e.remove(index)
+        #
+        # Remove edges starting from the node 
+        #
+        e = self._node[index]
+        for n in e:
+            if (index,n) in self._edge_attrs:
+                del self._edge_attrs[(index,n)]
+        #
+        # Remove the node by itself
+        #
+        del self._node[index]
+        #
+        # Remove attributes associated to the node
+        #
+        if index in self._node_attrs:
+            del self._node_attrs[index]
+    def hasEdge(self,node1=None,node2=None,index1=None,index2=None,**data):
+        if index1 is None:
+            index1 = self._index.getIndex(node1, True)
+        else:
+            if index1 >= len(self._index):
+                raise IndexError,"index1 = %d not in the graph" % index1
+        if index2 is None:
+            index2 = self._index.getIndex(node2, True)
+        else:
+            if index2 >= len(self._index):
+                raise IndexError,"index2 = %d not in the graph" % index1
+        rep = index2 in self._node[index1]
+        if not self._directed:
+            rep = rep or (index1 in self._node[index2])
+        return rep
+    def addEdge(self,node1=None,node2=None,index1=None,index2=None,**data):
+        '''
+        Create a new edge in the graph between both the specified nodes.
+        .. note:: Nodes can be specified using their label or their index in the graph
+        if both values are indicated the index is used.
+        :param node1: The first vertex label
+        :type node1:  an hashable python value
+        :param node2: The second vertex label
+        :type node2:  an hashable python value
+        :param index1: The first vertex index
+        :type index1:  int
+        :param index2: The second vertex index
+        :type index2:  int
+        :raises: :py:exc:`IndexError` if one of both the specified node lablel
+                 corresponds to a non-existing node.
+        .. warning:: no check on index value
+        '''
+        index1=self.addNode(node1, index1)
+        index2=self.addNode(node2, index2)
+        self._node[index1].add(index2)
+        if not self._directed:
+            self._node[index2].add(index1)
+        if data:
+            if (index1,index2) not in self._edge_attrs: 
+                data =dict(data) 
+                self._edge_attrs[(index1,index2)]=data   
+                if not self._directed:
+                    self._edge_attrs[(index2,index1)]=data
+            else:
+                self._edge_attrs[(index1,index2)].update(data)
+        return (index1,index2)
+    def getEdge(self,node1=None,node2=None,index1=None,index2=None):
+        '''
+        Extract the :py:class:`Edge` instance linking two nodes of the graph. 
+        .. note:: Nodes can be specified using their label or their index in the graph
+        if both values are indicated the index is used.
+        :param node1: The first vertex label
+        :type node1:  an hashable python value
+        :param node2: The second vertex label
+        :type node2:  an hashable python value
+        :param index1: The first vertex index
+        :type index1:  int
+        :param index2: The second vertex index
+        :type index2:  int
+        :raises: :py:exc:`IndexError` if one of both the specified node lablel
+                 corresponds to a non-existing node.
+        .. warning:: no check on index value
+        '''
+        node1=self.getNode(node1, index1)
+        node2=self.getNode(node2, index2)
+        return Edge(node1,node2)
+    def delEdge(self,node1=None,node2=None,index1=None,index2=None):
+        """
+        Delete the edge linking node 1 to node 2.
+        .. note:: Nodes can be specified using their label or their index in the graph
+        if both values are indicated the index is used.
+        :param node1: The first vertex label
+        :type node1:  an hashable python value
+        :param node2: The second vertex label
+        :type node2:  an hashable python value
+        :param index1: The first vertex index
+        :type index1:  int
+        :param index2: The second vertex index
+        :type index2:  int
+        :raises: :py:exc:`IndexError` if one of both the specified node lablel
+                 corresponds to a non-existing node.
+        .. warning:: no check on index value
+        """
+        if index1 is None:
+            index1 = self._index[node1]
+        if index2 is None:
+            index2 = self._index[node2]
+        if index1 in self._node and index2 in self._node[index1]:
+            self._node[index1].remove(index2)
+            if (index1,index2) in self._node_attrs:
+                del self._node_attrs[(index1,index2)]
+            if not self._directed:
+                self._node[index2].remove(index1)
+                if (index2,index1) in self._node_attrs:
+                    del self._node_attrs[(index2,index1)]
+    def edgeIterator(self,predicate=None):
+        """
+        Iterate through a set of selected vertices.
+        :param predicate: a function allowing node selection. Default value
+                          is **None** and indicate that all nodes are selected.
+        :type predicate:  a function returning a boolean value
+                          and accepting one argument of class :py:class:`Edge`
+        :return: an iterator over selected edge 
+        :rtype: interator over :py:class:`Edge` instances
+        .. seealso::
+            function :py:func:`selectEdgeAttributeFactory` for simple predicate.
+        """
+        for n1 in self._node:
+            for n2 in self._node[n1]:
+                if self._directed or n1 <= n2:
+                    e = self.getEdge(index1=n1, index2=n2) 
+                    if predicate is None or predicate(e):
+                        yield e
+    def nodeIterator(self,predicate=None):
+        """
+        Iterate through a set of selected vertices.
+        :param predicate: a function allowing edge selection. Default value
+                          is **None** and indicate that all edges are selected.
+        :type predicate:  a function returning a boolean value
+                          and accepting one argument of class :py:class:`Node`
+        :return: an iterator over selected nodes. 
+        :rtype: interator over :py:class:`Node` instances
+        """
+        for n in self._node:
+            node = self.getNode(index=n)
+            if predicate is None or predicate(node):
+                yield node
+    def nodeIndexIterator(self,predicate=None):
+        """
+        Iterate through the indexes of a set of selected vertices.
+        :param predicate: a function allowing edge selection. Default value
+                          is **None** and indicate that all edges are selected.
+        :type predicate:  a function returning a boolean value
+                          and accepting one argument of class :py:class:`Node`
+        :return: an iterator over selected node indices. 
+        :rtype: interator over `int`
+        """
+        for n in self._node:
+            node = self.getNode(index=n)
+            if predicate is None or predicate(node):
+                yield n
+    def neighbourIndexSet(self,node=None,index=None):
+        if index is None:
+            index=self.getNode(node).index
+        return self._node[index]
+    def edgeCount(self):
+        n = reduce(lambda x,y:x+y, (len(z) for z in self._node.itervalues()),0)
+        if not self._directed:
+            n=n/2
+        return n
+    def subgraph(self,nodes,name='G'):
+        sub = Graph(name,self._directed,self._index)
+        if not isinstance(nodes, set):
+            nodes = set(nodes)
+        for n in nodes:
+            sub._node[n]=nodes & self._node[n]
+            if n in self._node_attrs:
+                sub._node_attrs[n]=dict(self._node_attrs[n])
+            for n2 in sub._node[n]:
+                if not self._directed:
+                    if n <= n2:
+                        if (n,n2) in self._edge_attrs:
+                            data=dict(self._edge_attrs[(n,n2)])
+                            sub._edge_attrs[(n,n2)]=data
+                            sub._edge_attrs[(n2,n)]=data
+                else:
+                    if (n,n2) in self._edge_attrs:
+                        data=dict(self._edge_attrs[(n,n2)])
+                        sub._edge_attrs[(n,n2)]=data
+        return sub
+    def __len__(self):
+        return len(self._node)
+    def __getitem__(self,key):
+        return self.getNode(node=key)
+    def __delitem__(self,key):
+        self.delNode(node=key)
+    def __iter__(self):
+        return self.nodeIterator()
+    def dot(self,nodePredicat=None,edgePredicat=None):
+        def combinedPredicat(edge):
+            graph = edge.graph
+            n1 = graph.getNode(edge.node1)
+            n2 = graph.getNode(edge.node2)
+            return nodePredicat(n1) and nodePredicat(n2) and edgePredicat(edge)
+        if edgePredicat is not None and nodePredicat is not None:
+            edgePredicat = combinedPredicat
+        if self._directed:
+            kw ='digraph'
+        else:
+            kw='graph'
+        nodes = "\n    ".join([str(x) for x in self.nodeIterator(nodePredicat)])
+        edges = "\n    ".join([str(x) for x in self.edgeIterator(edgePredicat)])
+        return "%s %s {\n    %s\n\n    %s\n}" % (kw,self._label,nodes,edges)
+    def __str__(self):
+        return self.dot()
+class Node(object):
+    """
+    Class used for representing one node or vertex in a graph
+    """
+    def __init__(self,index,graph):
+        '''        
+        .. warning::
+            :py:class:`Node` constructor is usualy called through the :py:class:`Graph` methods
+        :param index: Index of the node in the graph
+        :type index:  int
+        :param graph: graph instance owning the node
+        :type graph:  :py:class:`obitools.graph.Graph`
+        '''
+        self.index = index
+        self.__graph = graph
+    def getGraph(self):
+        '''
+        return graph owning this node.
+        :rtype: :py:class:`obitools.graph.Graph`
+        '''
+        return self.__graph
+    def getLabel(self):
+        '''
+        return label associated to this node.
+        '''
+        return self.__graph._index.getLabel(self.index)
+    def has_key(self,key):
+        '''
+        test is the node instance has a property named 'key'.
+        :param key: the name of a property
+        :type key: str
+        :return: True if the nade has a property named <key>
+        :rtype: bool
+        '''
+        if self.index in self.__graph._node_attrs:
+            return key in self.__graph._node_attrs[self.index]
+        else:
+            return False
+    def neighbourIterator(self,nodePredicat=None,edgePredicat=None):
+        '''
+        iterate through the nodes directly connected to
+        this node.
+        :param nodePredicat: a function accepting one node as parameter
+                         and returning **True** if this node must be
+                         returned by the iterator.
+        :type nodePredicat: function
+        :param edgePredicat: a function accepting one edge as parameter
+                         and returning True if the edge linking self and
+                         the current must be considered.
+        :type edgePredicat: function
+        :rtype: iterator on Node instances
+        '''
+        for n in self.neighbourIndexIterator(nodePredicat, edgePredicat):
+            node = self.graph.getNode(index=n)
+            yield node
+    def neighbourIndexSet(self):
+        '''
+        Return a set of node indexes directely connected
+        to this node.
+        .. warning:: 
+                do not change this set unless you know
+                exactly what you do.
+        @rtype: set of int
+        '''
+        return self.__graph._node[self.index]
+    def neighbourIndexIterator(self,nodePredicat=None,edgePredicat=None):
+        '''
+        iterate through the node indexes directly connected to
+        this node.
+        :param nodePredicat: a function accepting one node as parameter
+                         and returning True if this node must be
+                         returned by the iterator.
+        :type nodePredicat: function
+        :param edgePredicat: a function accepting one edge as parameter
+                         and returning True if the edge linking self and
+                         the current must be considered.
+        :type edgePredicat: function
+        :rtype: iterator on int
+        '''
+        for n in self.neighbourIndexSet():
+            if nodePredicat is None or nodePredicat(self.__graph.getNode(index=n)):
+                if edgePredicat is None or edgePredicat(self.__graph.getEdge(index1=self.index,index2=n)):
+                    yield n
+    def degree(self,nodeIndexes=None):
+        '''
+        return count of edges linking this node to the
+        set of nodes describes by their index in  nodeIndexes
+        :param nodeIndexes: set of node indexes. 
+                            if set to None, all nodes of the
+                            graph are take into account.
+                            Set to None by default.
+        :type nodeIndexes:  set of int
+        :rtype: int
+        '''
+        if nodeIndexes is None:
+            return len(self.__graph._node[self.index])
+        else:
+            return len(self.__graph._node[self.index] & nodeIndexes)
+    def componentIndexSet(self,nodePredicat=None,edgePredicat=None):
+        '''
+        Return the set of node index in the same connected component.
+        :param nodePredicat: a function accepting one node as parameter
+                         and returning True if this node must be
+                         returned by the iterator.
+        :type nodePredicat: function
+        :param edgePredicat: a function accepting one edge as parameter
+                         and returning True if the edge linking self and
+                         the current must be considered.
+        :type edgePredicat: function
+        :rtype: set of int
+        '''
+        cc=set([self.index])
+        added = set(x for x in self.neighbourIndexIterator(nodePredicat, edgePredicat))
+        while added:
+            cc |= added
+            added = reduce(lambda x,y : x | y,
+                           (set(z for z in self.graph.getNode(index=c).neighbourIndexIterator(nodePredicat, edgePredicat)) 
+                                for c in added),
+                           set())
+            added -= cc
+        return cc
+    def componentIterator(self,nodePredicat=None,edgePredicat=None):
+        '''
+        Iterate through the nodes in the same connected
+        component.
+        :rtype: iterator on :py:class:`Node` instance
+        '''
+        for c in self.componentIndexSet(nodePredicat, edgePredicat):
+            yield self.graph.getNode(c)
+    def shortestPathIterator(self,nodes=None):
+        '''
+        Iterate through the shortest path sourcing
+        from this node. if nodes is not None, iterates
+        only path linkink this node to one node listed in
+        nodes
+        :param nodes: set of node index
+        :type nodes: iterable on int
+        :return: an iterator on list of int describing path
+        :rtype: iterator on list of int
+        '''
+        if nodes is not None:
+            nodes = set(nodes)
+        Q=[(self.index,-1)]
+        gray = set([self.index])
+        paths = {}
+        while Q and (nodes is None or nodes):
+            u,p = Q.pop()
+            paths[u]=p
+            next = self.graph._node[u] - gray
+            gray|=next
+            Q.extend((x,u) for x in next)
+            if nodes is None or u in nodes:
+                if nodes:
+                    nodes.remove(u)
+                path = [u]
+                while p >= 0:
+                    path.append(p)
+                    p = paths[p]
+                path.reverse()
+                yield path
+    def shortestPathTo(self,node=None,index=None):
+        '''
+        return one of the shortest path linking this
+        node to specified node.
+        :param node: a node label or None
+        :param index: a node index or None. the parameter index  
+                     has a priority on the parameter node.
+        :type index: int
+        :return: list of node index corresponding to the path or None
+                 if no path exists.
+        :rtype: list of int or None 
+        '''
+        if index is None:
+            index=self.graph.getNode(node).index
+        for p in self.shortestPathIterator([index]):
+            return p
+    def __getitem__(self,key):
+        '''
+        return the value of the <key> property of this node
+        :param key: the name of a property
+        :type key: str
+        '''
+        return self.__graph._node_attrs.get(self.index,{})[key]
+    def __setitem__(self,key,value):
+        '''
+        set the value of a node property. In the property doesn't
+        already exist a new property is added to this node.
+        :param key: the name of a property
+        :type key: str
+        :param value: the value of the property
+        .. seealso:: 
+            :py:meth:`Node.__getitem__`
+        '''
+        if self.index in self.__graph._node_attrs:
+            data = self.__graph._node_attrs[self.index]
+            data[key]=value
+        else:
+            self.graph._node_attrs[self.index]={key:value}
+    def __delitem__(self,key):
+        data = self.__graph._node_attrs[self.index]
+        del data[key]
+    def __len__(self):
+        '''
+        Count neighbour of this node
+        :rtype: int
+        .. seealso::  
+            :py:meth:`Node.degree`
+        '''
+        return len(self.__graph._node[self.index])
+    def __iter__(self):
+        '''
+        iterate through neighbour of this node
+        :rtype: iterator in :py:class:`Node` instances
+        .. seealso::
+            :py:meth:`Node.neighbourIterator`
+        '''
+        return self.neighbourIterator()
+    def __contains__(self,key):
+        return self.has_key(key)
+    def __str__(self):
+        if self.index in self.__graph._node_attrs:
+            keys = " ".join(['%s="%s"' % (x[0],str(x[1]).replace('"','\\"').replace('\n','\\n'))
+                              for x in self.__graph._node_attrs[self.index].iteritems()]
+                           )
+        else:
+            keys=''
+        return '%d  [label="%s" %s]' % (self.index,
+                                        str(self.label).replace('"','\\"').replace('\n','\\n'),
+                                        keys)      
+    def keys(self):
+        if self.index in self.__graph._node_attrs:
+            k = self.__graph._node_attrs[self.index].keys()
+        else:
+            k=[]
+        return k
+    label = property(getLabel, None, None, "Label of the node")
+    graph = property(getGraph, None, None, "Graph owning this node")
+class Edge(object):
+    """
+    Class used for representing one edge of a graph
+    """
+    def __init__(self,node1,node2):
+        '''
+        .. warning::
+            :py:class:`Edge` constructor is usualy called through the :py:class:`Graph` methods
+        :param node1: First node likend by the edge
+        :type node1:  :py:class:`Node`
+        :param node2: Seconde node likend by the edge
+        :type node2:  :py:class:`Node`
+        '''
+        self.node1 = node1
+        self.node2 = node2
+    def getGraph(self):
+        """
+        Return the :py:class:`Graph` instance owning this edge.
+        """
+        return self.node1.graph
+    def has_key(self,key):
+        '''
+        test is the :py:class:`Edge` instance has a property named **key**.
+        :param key: the name of a property
+        :type key: str
+        :return: True if the edge has a property named <key>
+        :rtype: bool
+        '''
+        if (self.node1.index,self.node2.index) in self.graph._edge_attrs:
+            return key in self.graph._edge_attrs[(self.node1.index,self.node2.index)]
+        else:
+            return False
+    def getDirected(self):
+        return self.node1.graph._directed
+    def __getitem__(self,key):
+        return self.graph._edge_attrs.get((self.node1.index,self.node2.index),{})[key]
+    def __setitem__(self,key,value):
+        e = (self.node1.index,self.node2.index)
+        if e in self.graph._edge_attrs:
+            data = self.graph._edge_attrs[e]
+            data[key]=value
+        else:
+            self.graph._edge_attrs[e]={key:value}
+    def __str__(self):
+        e = (self.node1.index,self.node2.index)
+        if e in self.graph._edge_attrs:
+            keys = "[%s]" % " ".join(['%s="%s"' % (x[0],str(x[1]).replace('"','\\"'))
+                                      for x in self.graph._edge_attrs[e].iteritems()]
+                                    )
+        else:
+            keys = ""
+        if self.directed:
+            link='->'
+        else:
+            link='--'
+        return "%d %s %d %s" % (self.node1.index,link,self.node2.index,keys) 
+    def __contains__(self,key):
+        return self.has_key(key)
+    graph = property(getGraph, None, None, "Graph owning this edge")
+    directed = property(getDirected, None, None, "Directed's Docstring")
+class DiGraph(Graph):
+    """
+    :py:class:`DiGraph class`is a specialisation of the :py:class:`Graph` class
+    dedicated to directed graph representation
+    .. seealso::
+        :py:class:`UndirectedGraph`
+    """
+    def __init__(self,label='G',indexer=None,nodes=None,edges=None):
+        '''
+        :param label: Graph name, set to 'G' by default
+        :type label: str
+        :param indexer: node label indexer
+        :type indexer: Indexer instance
+        :param nodes: set of nodes to add to the graph
+        :type nodes: iterable value
+        :param edges: set of edges to add to the graph
+        :type edges: iterable value
+        '''
+        Graph.__init__(self, label, True, indexer, nodes, edges)
+class UndirectedGraph(Graph):
+    """
+    :py:class:`UndirectGraph class`is a specialisation of the :py:class:`Graph` class
+    dedicated to undirected graph representation
+    .. seealso::
+        :py:class:`DiGraph`
+    """
+    def __init__(self,label='G',indexer=None,nodes=None,edges=None):
+        '''
+        :param label: Graph name, set to 'G' by default
+        :type label: str
+        :param indexer: node label indexer
+        :type indexer: Indexer instance
+        :param nodes: set of nodes to add to the graph
+        :type nodes: iterable value
+        :param edges: set of edges to add to the graph
+        :type edges: iterable value
+        '''
+        Graph.__init__(self, label, False, indexer, nodes, edges)
+def selectEdgeAttributeFactory(attribut,value):
+    """
+    This function help in building predicat function usable for selecting edge
+    in the folowing :py:class:`Graph` methods :
+        - :py:meth:`Graph.edgeIterator`
+    """
+    def selectEdge(e):
+        return attribut in e and e[attribut]==value
+    return selectEdge   
diff --git a/src/obitools/graph/algorithms/__init__.py b/src/obitools/graph/algorithms/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/obitools/graph/algorithms/clique.py b/src/obitools/graph/algorithms/clique.py
new file mode 100644
index 0000000..2007c1a
--- /dev/null
+++ b/src/obitools/graph/algorithms/clique.py
@@ -0,0 +1,134 @@
+import time
+import sys
+def cliqueIterator(graph,minsize=1,node=None,timeout=None):
+    global _maxsize,_solution,_notbound,_sizebound,_lastyield
+    _maxsize=0
+    _solution=0
+    _notbound=0
+    _sizebound=0
+    starttime = time.time()  
+    if node:
+        node = graph.getNode(node)
+        index = node.index
+        clique= set([index])
+        candidates= set(graph.neighbourIndexSet(index=index))
+    else:
+        clique=set()
+        candidates = set(x.index for x in graph)
+#    candidates = set(x for x in candidates
+#                     if len(graph.neighbourIndexSet(index=x) & candidates) >= (minsize - 1))
+    _lastyield=time.time()  
+    for c in _cliqueIterator(graph,clique,candidates,set(),minsize,start=starttime,timeout=timeout):
+        yield c
+def _cliqueIterator(graph,clique,candidates,notlist,minsize=0,start=None,timeout=None):
+    global _maxsize,_maxclique,_solution,_notbound,_sizebound,_lastyield
+                            # Speed indicator
+    lclique     = len(clique)
+    lcandidates = len(candidates)
+    notmin = lcandidates
+    notfix = None
+    for n in notlist:
+        nnc = candidates - graph.neighbourIndexSet(index=n) 
+        nc = len(nnc)
+        if nc < notmin:
+            notmin=nc
+            notfix=n
+            notfixneib = nnc
+    if lclique > _maxsize or not _solution % 1000 :   
+        if start is not None:
+            top   = time.time()
+            delta = top - start
+            if delta==0:
+                delta=1e-6
+            speed = _solution / delta
+            start = top
+        else:
+            speed = 0
+        print >>sys.stderr,"\rCandidates : %-5d Maximum clique size : %-5d Solutions explored : %10d   speed = %5.2f solutions/sec  sizebound=%10d notbound=%10d          " % (lcandidates,_maxsize,_solution,speed,_sizebound,_notbound),
+        sys.stderr.flush()
+        if lclique > _maxsize:
+            _maxsize=lclique
+#   print >>sys.stderr,'koukou'        
+    timer = time.time() - _lastyield
+    if not candidates and not notlist:
+        if lclique==_maxsize:
+            _maxclique=set(clique)
+        if lclique >= minsize:
+            yield set(clique)
+        if timeout is not None and timer > timeout and _maxclique is not None:
+            yield _maxclique
+            _maxclique=None
+    else:                        
+        while notmin and candidates and ((lclique + len(candidates)) >= minsize or (timeout is not None and timer > timeout)):
+                    # count explored solution
+            _solution+=1
+            if notfix is None:
+                nextcandidate = candidates.pop()
+            else:
+                nextcandidate = notfixneib.pop()
+                candidates.remove(nextcandidate)
+            clique.add(nextcandidate)     
+            neighbours = graph.neighbourIndexSet(index=nextcandidate)   
+            nextcandidates = candidates & neighbours
+            nextnot        = notlist    & neighbours
+            nnc = candidates - neighbours
+            lnnc=len(nnc)
+            for c in _cliqueIterator(graph, 
+                                     set(clique), 
+                                     nextcandidates,
+                                     nextnot,
+                                     minsize,
+                                     start,
+                                     timeout=timeout):
+                yield c
+            clique.remove(nextcandidate)
+            notmin-=1
+            if lnnc < notmin:
+                notmin = lnnc
+                notfix = nextcandidate
+                notfixneib = nnc
+            if notmin==0:
+                _notbound+=1
+            notlist.add(nextcandidate)
+        else:
+            if (lclique + len(candidates)) < minsize:
+                _sizebound+=1
diff --git a/src/obitools/graph/algorithms/compact.py b/src/obitools/graph/algorithms/compact.py
new file mode 100644
index 0000000..8065a93
--- /dev/null
+++ b/src/obitools/graph/algorithms/compact.py
@@ -0,0 +1,8 @@
+def compactGraph(graph,nodeSetIterator):
+    compact = graph.newEmpty()
+    for ns in nodeSetIterator(graph):
+        nlabel = "\n".join([str(graph.getNode(index=x).label) for x in ns])
+        compact.addNode(nlabel)
+        print 
+        print compact
diff --git a/src/obitools/graph/algorithms/component.py b/src/obitools/graph/algorithms/component.py
new file mode 100644
index 0000000..a17c8dd
--- /dev/null
+++ b/src/obitools/graph/algorithms/component.py
@@ -0,0 +1,82 @@
+Iterate through the connected components of a graph
+the module :py:mod:`obitools.graph.algorithm.component` provides
+two functions to deal with the connected component of a graph
+represented as a :py:class:`obitools.graph.Graph` instance.
+The whole set of connected component of a graph is a partition of this graph.
+So a node cannot belongs to two distinct connected component.
+Two nodes are in the same connected component if it exits a path through 
+the graph edges linking them.
+TODO: THere is certainly a bug with DirectedGraph
+def componentIterator(graph,nodePredicat=None,edgePredicat=None):
+    '''
+    Build an iterator over the connected component of a graph.
+    Each connected component returned by the iterator is represented 
+    as a `set` of node indices.
+    :param graph: the graph to partitionne
+    :type graph:  :py:class:`obitools.graph.Graph`
+    :param predicate: a function allowing edge selection. Default value
+                      is **None** and indicate that all edges are selected.
+    :type predicate:  a function returning a boolean value
+                      and accepting one argument of class :py:class:`Node`
+    :param predicate: a function allowing node selection. Default value
+                      is **None** and indicate that all nodes are selected.
+    :type predicate:  a function returning a boolean value
+                      and accepting one argument of class :py:class:`Edge`
+    :return: an iterator over the connected component set
+    :rtype: an iterator over `set` of `int`
+    .. seealso::
+        the :py:meth:`obitools.graph.Graph.componentIndexSet` method
+        on which is based this function.
+    '''
+    seen = set()
+    for n in graph.nodeIterator(nodePredicat):
+        if n.index not in seen:
+            cc=n.componentIndexSet(nodePredicat, edgePredicat)
+            yield cc
+            seen |= cc
+def componentCount(graph,nodePredicat=None,edgePredicat=None):
+    '''
+    Count the connected componnent in a graph.
+    :param graph: the graph to partitionne
+    :type graph:  :py:class:`obitools.graph.Graph`
+    :param predicate: a function allowing edge selection. Default value
+                      is **None** and indicate that all edges are selected.
+    :type predicate:  a function returning a boolean value
+                      and accepting one argument of class :py:class:`Node`
+    :param predicate: a function allowing node selection. Default value
+                      is **None** and indicate that all nodes are selected.
+    :type predicate:  a function returning a boolean value
+                      and accepting one argument of class :py:class:`Edge`
+    :return: an iterator over the connected component set
+    :rtype: an iterator over `set` of `int`
+    .. seealso::
+        the :py:func:`componentIterator` function
+        on which is based this function.
+    '''
+    n=0
+    for c in componentIterator(graph,nodePredicat, edgePredicat):
+        n+=1
+    return n
\ No newline at end of file
diff --git a/src/obitools/graph/dag.py b/src/obitools/graph/dag.py
new file mode 100644
index 0000000..c4e8d13
--- /dev/null
+++ b/src/obitools/graph/dag.py
@@ -0,0 +1,99 @@
+from obitools.graph import DiGraph,Node
+from obitools.graph.algorithms.component import componentIterator
+class DAG(DiGraph):
+    def __init__(self,label='G',indexer=None,nodes=None,edges=None):
+        '''
+        Directed Graph constructor.
+        @param label: Graph name, set to 'G' by default
+        @type label: str
+        @param indexer: node label indexer
+        @type indexer: Indexer instance
+        @param nodes: set of nodes to add to the graph
+        @type nodes: iterable value
+        @param edges: set of edges to add to the graph
+        @type edges: iterable value
+        '''
+        self._parents={}
+        DiGraph.__init__(self, label, indexer, nodes, edges)
+    def getNode(self,node=None,index=None):
+        if index is None:
+            index = self._index.getIndex(node, True)
+        return DAGNode(index,self)
+    def addEdge(self,node1=None,node2=None,index1=None,index2=None,**data):
+        index1=self.addNode(node1, index1)
+        index2 =self.addNode(node2, index2)
+        pindex = set(n.index 
+                     for n in self.getNode(index=index1).ancestorIterator())
+        assert index2 not in pindex,'Child node cannot be a parent node'
+        DiGraph.addEdge(self,index1=index1,index2=index2,**data)   
+        if index2 in self._parents:
+            self._parents[index2].add(index1)
+        else:
+            self._parents[index2]=set([index1])   
+        return (index1,index2)
+    def getRoots(self):
+        '''
+        Return the list of all roots of the DAG (i.e. nodes without parent)
+        @return: a list of DAGNode
+        '''
+        return [x for x in self.nodeIterator(lambda n : n.index not in self._parents)]
+    def getLeaves(self):
+        '''
+        Return the list of all leaves of the DAG (i.e. nodes without child)
+        @return: a list of DAGNode
+        '''
+        return [x for x in self.nodeIterator(lambda n : not n.neighbourIndexSet())]
+class DAGNode(Node):
+    def getParents(self):
+        if self.index in self.graph._parents:
+            return [DAGNode(p,self.graph) for p in self.graph._parents[self.index]]
+        else:
+            return []
+    def ancestorIterator(self):
+        if self.index in self.graph._parents:
+            for p in self.graph._parents[self.index]:
+                parent = DAGNode(p,self.graph)
+                yield parent
+                for pnode in parent.ancestorIterator():
+                    yield pnode
+    def getRoot(self):
+        x=self
+        for x in self.ancestorIterator():
+            pass
+        return x 
+    def leavesIterator(self):
+        if not self:
+            yield self
+        for n in self:
+            for nn in n.leavesIterator():
+                yield nn
+    def subgraphIterator(self):
+        yield self
+        for n in self:
+            for nn in n.subgraphIterator():
+                yield nn
diff --git a/src/obitools/graph/layout/__init__.py b/src/obitools/graph/layout/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/obitools/graph/layout/radialtree.py b/src/obitools/graph/layout/radialtree.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/obitools/graph/rootedtree.py b/src/obitools/graph/rootedtree.py
new file mode 100644
index 0000000..aaad598
--- /dev/null
+++ b/src/obitools/graph/rootedtree.py
@@ -0,0 +1,115 @@
+from obitools.graph.dag import DAG,DAGNode
+class RootedTree(DAG):
+    def addEdge(self,parent=None,node=None,indexp=None,index=None,**data):
+        indexp=self.addNode(parent, indexp)
+        index =self.addNode(node  , index)
+        assert index not in self._parents or indexp in self._parents[index], \
+                'Child node cannot have more than one parent node'
+        return DAG.addEdge(self,indexp=indexp,index=index,**data)   
+    def getNode(self,node=None,index=None):
+        if index is None:
+            index = self._index.getIndex(node, True)
+        return RootedTreeNode(index,self)
+class RootedTreeNode(DAGNode):
+    def subTreeSize(self):
+        n=1
+        for subnode in self:
+            n+=subnode.subTreeSize()
+        return n
+    def subTreeLeaves(self):
+        if not self:
+            return 1
+        n=0
+        for subnode in self:
+            n+=subnode.subTreeLeaves()
+        return n
+def nodeWriter(node,deep=0,label=None,distance="distance", bootstrap="bootstrap",cartoon=None,collapse=None):
+    ks = node.keys()
+    if label is None:
+        name=node.label
+    elif callable(label):
+        name=label(node)
+    elif isinstance(label, str) and label in node:
+        name=node[label]
+        ks.remove(label)
+    else:
+        name=''
+    if distance in node:
+        dist=':%6.5f' % node[distance]
+        ks.remove(distance)
+    else:
+        dist=''
+    ks = ["%s=%s" % (k,node[k]) for k in ks]
+    if cartoon is not None and cartoon(node):
+        ks.append("!cartoon={%d,0.0}" % node.subTreeLeaves())
+    if collapse is not None and collapse(node):
+        ks.append('!collapse={"collapsed",0.0}')
+    if ks:
+        ks="[&"+",".join(ks)+"]"
+    else:
+        ks=''
+    nodeseparator = ',\n' + ' ' * (deep+1)     
+    subnodes = nodeseparator.join([nodeWriter(x, deep+1,label,distance,bootstrap,cartoon=cartoon,collapse=collapse) 
+                                   for x in node])
+    if subnodes:
+        subnodes='(\n' + ' ' * (deep+1) + subnodes + '\n' + ' ' * deep + ')'
+    return '%s"%s"%s%s' % (subnodes,name,ks,dist)
+def nexusFormat(tree,startnode=None,label=None,blocks="",cartoon=None,collapse=None):
+    head="#NEXUS\n"
+    tx = []
+    for n in tree:
+        if label is None:
+            name=n.label
+        elif callable(label):
+            name=label(n)
+        elif isinstance(label, str) and label in n:
+            name=n[label]
+        else:
+            name=''
+        if name:
+            tx.append('"%s"' % name)
+    taxa = "begin taxa;\n\tdimensions ntax=%d;\n\ttaxlabels\n\t" % len(tx)
+    taxa+="\n\t".join(tx)
+    taxa+="\n;\nend;\n\n"
+    if startnode is not None:
+        roots =[startnode]
+    else:
+        roots = tree.getRoots()
+    trees = nodeWriter(roots[0],0,label,cartoon=cartoon,collapse=collapse)
+    trees = "begin trees;\n\ttree tree_1 = [&R] "+ trees +";\nend;\n\n"
+    return head+taxa+trees+"\n\n"+blocks+"\n"
diff --git a/src/obitools/graph/tree.py b/src/obitools/graph/tree.py
new file mode 100644
index 0000000..940ee44
--- /dev/null
+++ b/src/obitools/graph/tree.py
@@ -0,0 +1,37 @@
+from obitools.graph import UndirectedGraph,Node
+from obitools.graph.algorithms.component import componentCount
+class Forest(UndirectedGraph):
+    def getNode(self,node=None,index=None):
+        if index is None:
+            index = self._index.getIndex(node, True)
+        return TreeNode(index,self)
+    def addEdge(self,node1=None,node2=None,index1=None,index2=None,**data):
+        index1=self.addNode(node1, index1)
+        index2=self.addNode(node2, index2)
+        cc = set(n.index for n in self.getNode(index=index2).componentIterator())
+        assert index1 in self._node[index2] or index1 not in cc, \
+               "No more than one path is alloed between two nodes in a tree"
+        UndirectedGraph.addEdge(self, index1=index1, index2=index2,**data)
+        return (index1,index2)
+    def isASingleTree(self):
+        return componentCount(self)==1
+class TreeNode(Node):
+    def componentIterator(self):
+        for c in self:
+            yield c
+            for cc in c:
+                yield cc
\ No newline at end of file
diff --git a/src/obitools/gzip.py b/src/obitools/gzip.py
new file mode 100644
index 0000000..841641a
--- /dev/null
+++ b/src/obitools/gzip.py
@@ -0,0 +1,504 @@
+"""Functions that read and write gzipped files.
+The user of the file doesn't have to worry about the compression,
+but random access is not allowed.
+This consisted on a patched version of of standard gzip python
+module based on Andrew Kuchling's minigzip.py distributed with the zlib module
+# based on Andrew Kuchling's minigzip.py distributed with the zlib module
+import struct, sys, time
+import zlib
+import __builtin__
+__all__ = ["GzipFile","open"]
+READ, WRITE = 1, 2
+def U32(i):
+    """Return i as an unsigned integer, assuming it fits in 32 bits.
+    If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
+    """
+    if i < 0:
+        i += 1L << 32
+    return i
+def LOWU32(i):
+    """Return the low-order 32 bits of an int, as a non-negative int."""
+    return i & 0xFFFFFFFFL
+def write32(output, value):
+    output.write(struct.pack("<l", value))
+def write32u(output, value):
+    # The L format writes the bit pattern correctly whether signed
+    # or unsigned.
+    output.write(struct.pack("<L", value))
+def read32(input):
+    return struct.unpack("<l", input.read(4))[0]
+def unpack32(buf):
+    return struct.unpack("<l", buf)[0]
+def open(filename, mode="rb", compresslevel=9):
+    """Shorthand for GzipFile(filename, mode, compresslevel).
+    The filename argument is required; mode defaults to 'rb'
+    and compresslevel defaults to 9.
+    """
+    return GzipFile(filename, mode, compresslevel)
+class GzipFile:
+    """The GzipFile class simulates most of the methods of a file object with
+    the exception of the readinto() and truncate() methods.
+    """
+    myfileobj = None
+    max_read_chunk = 10 * 1024 * 1024   # 10Mb
+    def __init__(self, filename=None, mode=None,
+                 compresslevel=9, fileobj=None):
+        """Constructor for the GzipFile class.
+        At least one of fileobj and filename must be given a
+        non-trivial value.
+        The new class instance is based on fileobj, which can be a regular
+        file, a StringIO object, or any other object which simulates a file.
+        It defaults to None, in which case filename is opened to provide
+        a file object.
+        When fileobj is not None, the filename argument is only used to be
+        included in the gzip file header, which may includes the original
+        filename of the uncompressed file.  It defaults to the filename of
+        fileobj, if discernible; otherwise, it defaults to the empty string,
+        and in this case the original filename is not included in the header.
+        The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
+        depending on whether the file will be read or written.  The default
+        is the mode of fileobj if discernible; otherwise, the default is 'rb'.
+        Be aware that only the 'rb', 'ab', and 'wb' values should be used
+        for cross-platform portability.
+        The compresslevel argument is an integer from 1 to 9 controlling the
+        level of compression; 1 is fastest and produces the least compression,
+        and 9 is slowest and produces the most compression.  The default is 9.
+        """
+        # guarantee the file is opened in binary mode on platforms
+        # that care about that sort of thing
+        if mode and 'b' not in mode:
+            mode += 'b'
+        if fileobj is None:
+            fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
+        if filename is None:
+            if hasattr(fileobj, 'name'): filename = fileobj.name
+            else: filename = ''
+        if mode is None:
+            if hasattr(fileobj, 'mode'): mode = fileobj.mode
+            else: mode = 'rb'
+        if mode[0:1] == 'r':
+            self.mode = READ
+            # Set flag indicating start of a new member
+            self._new_member = True
+            self.extrabuf = ""
+            self.extrasize = 0
+            self.filename = filename
+            # Starts small, scales exponentially
+            self.min_readsize = 100
+        elif mode[0:1] == 'w' or mode[0:1] == 'a':
+            self.mode = WRITE
+            self._init_write(filename)
+            self.compress = zlib.compressobj(compresslevel,
+                                             zlib.DEFLATED,
+                                             -zlib.MAX_WBITS,
+                                             zlib.DEF_MEM_LEVEL,
+                                             0)
+        else:
+            raise IOError, "Mode " + mode + " not supported"
+        self.fileobj = fileobj
+        self.offset = 0
+        self.inputbuf = ''
+        self.last8 = ''
+        if self.mode == WRITE:
+            self._write_gzip_header()
+    def __repr__(self):
+        s = repr(self.fileobj)
+        return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
+    def _init_write(self, filename):
+        if filename[-3:] != '.gz':
+            filename = filename + '.gz'
+        self.filename = filename
+        self.crc = zlib.crc32("")
+        self.size = 0
+        self.writebuf = []
+        self.bufsize = 0
+    def _write_gzip_header(self):
+        self.fileobj.write('\037\213')             # magic header
+        self.fileobj.write('\010')                 # compression method
+        fname = self.filename[:-3]
+        flags = 0
+        if fname:
+            flags = FNAME
+        self.fileobj.write(chr(flags))
+        write32u(self.fileobj, long(time.time()))
+        self.fileobj.write('\002')
+        self.fileobj.write('\377')
+        if fname:
+            self.fileobj.write(fname + '\000')
+    def _init_read(self):
+        self.crc = zlib.crc32("")
+        self.size = 0
+    def _read_internal(self, size):
+        if len(self.inputbuf) < size:
+            self.inputbuf += self.fileobj.read(size-len(self.inputbuf))
+        chunk = self.inputbuf[:size]
+        # need to use len(chunk) bellow instead of size in case it's EOF.
+        if len(chunk) < 8:
+            self.last8 = self.last8[len(chunk):] + chunk
+        else:
+            self.last8 = chunk[-8:]
+        self.inputbuf = self.inputbuf[size:]
+        return chunk
+    def _read_gzip_header(self):
+        magic = self._read_internal(2)
+        if len(magic) != 2:
+            raise EOFError, "Reached EOF"
+        if magic != '\037\213':
+            raise IOError, 'Not a gzipped file'
+        method = ord( self._read_internal(1) )
+        if method != 8:
+            raise IOError, 'Unknown compression method'
+        flag = ord( self._read_internal(1) )
+        # modtime = self.fileobj.read(4)
+        # extraflag = self.fileobj.read(1)
+        # os = self.fileobj.read(1)
+        self._read_internal(6)
+        if flag & FEXTRA:
+            # Read & discard the extra field, if present
+            xlen = ord(self._read_internal(1))
+            xlen = xlen + 256*ord(self._read_internal(1))
+            self._read_internal(xlen)
+        if flag & FNAME:
+            # Read and discard a null-terminated string containing the filename
+            while True:
+                s = self._read_internal(1)
+                if not s or s=='\000':
+                    break
+        if flag & FCOMMENT:
+            # Read and discard a null-terminated string containing a comment
+            while True:
+                s = self._read_internal(1)
+                if not s or s=='\000':
+                    break
+        if flag & FHCRC:
+            self._read_internal(2)     # Read & discard the 16-bit header CRC
+    def write(self,data):
+        if self.mode != WRITE:
+            import errno
+            raise IOError(errno.EBADF, "write() on read-only GzipFile object")
+        if self.fileobj is None:
+            raise ValueError, "write() on closed GzipFile object"
+        if len(data) > 0:
+            self.size = self.size + len(data)
+            self.crc = zlib.crc32(data, self.crc)
+            self.fileobj.write( self.compress.compress(data) )
+            self.offset += len(data)
+    def read(self, size=-1):
+        if self.mode != READ:
+            import errno
+            raise IOError(errno.EBADF, "read() on write-only GzipFile object")
+        if self.extrasize <= 0 and self.fileobj is None:
+            return ''
+        readsize = 1024
+        if size < 0:        # get the whole thing
+            try:
+                while True:
+                    self._read(readsize)
+                    readsize = min(self.max_read_chunk, readsize * 2)
+            except EOFError:
+                size = self.extrasize
+        else:               # just get some more of it
+            try:
+                while size > self.extrasize:
+                    self._read(readsize)
+                    readsize = min(self.max_read_chunk, readsize * 2)
+            except EOFError:
+                if size > self.extrasize:
+                    size = self.extrasize
+        chunk = self.extrabuf[:size]
+        self.extrabuf = self.extrabuf[size:]
+        self.extrasize = self.extrasize - size
+        self.offset += size
+        return chunk
+    def _unread(self, buf):
+        self.extrabuf = buf + self.extrabuf
+        self.extrasize = len(buf) + self.extrasize
+        self.offset -= len(buf)
+    def _read(self, size=1024):
+        if self.fileobj is None:
+            raise EOFError, "Reached EOF"
+        if self._new_member:
+            # If the _new_member flag is set, we have to
+            # jump to the next member, if there is one.
+            #
+            # _read_gzip_header will raise EOFError exception
+            # if there no more members to read.
+            self._init_read()
+            self._read_gzip_header()
+            self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
+            self._new_member = False
+        # Read a chunk of data from the file
+        buf = self._read_internal(size)
+        # If the EOF has been reached, flush the decompression object
+        # and mark this object as finished.
+        if buf == "":
+            uncompress = self.decompress.flush()
+            self._read_eof()
+            self._add_read_data( uncompress )
+            raise EOFError, 'Reached EOF'
+        uncompress = self.decompress.decompress(buf)
+        self._add_read_data( uncompress )
+        if self.decompress.unused_data != "":
+            # Ending case: we've come to the end of a member in the file,
+            # so put back unused_data and initialize last8 by reading them.
+            self.inputbuf = self.decompress.unused_data + self.inputbuf
+            self._read_internal(8)
+            # Check the CRC and file size, and set the flag so we read
+            # a new member on the next call
+            self._read_eof()
+            self._new_member = True
+    def _add_read_data(self, data):
+        self.crc = zlib.crc32(data, self.crc)
+        self.extrabuf = self.extrabuf + data
+        self.extrasize = self.extrasize + len(data)
+        self.size = self.size + len(data)
+    def _read_eof(self):
+        # We've read to the end of the file, so we have to rewind in order
+        # to reread the 8 bytes containing the CRC and the file size.
+        # We check the that the computed CRC and size of the
+        # uncompressed data matches the stored values.  Note that the size
+        # stored is the true file size mod 2**32.
+        crc32 = unpack32(self.last8[:4])
+        isize = U32(unpack32(self.last8[4:]))   # may exceed 2GB
+        if U32(crc32) != U32(self.crc):
+            raise IOError, "CRC check failed"
+        elif isize != LOWU32(self.size):
+            raise IOError, "Incorrect length of data produced"
+    def close(self):
+        if self.mode == WRITE:
+            self.fileobj.write(self.compress.flush())
+            # The native zlib crc is an unsigned 32-bit integer, but
+            # the Python wrapper implicitly casts that to a signed C
+            # long.  So, on a 32-bit box self.crc may "look negative",
+            # while the same crc on a 64-bit box may "look positive".
+            # To avoid irksome warnings from the `struct` module, force
+            # it to look positive on all boxes.
+            write32u(self.fileobj, LOWU32(self.crc))
+            # self.size may exceed 2GB, or even 4GB
+            write32u(self.fileobj, LOWU32(self.size))
+            self.fileobj = None
+        elif self.mode == READ:
+            self.fileobj = None
+        if self.myfileobj:
+            self.myfileobj.close()
+            self.myfileobj = None
+    def __del__(self):
+        try:
+            if (self.myfileobj is None and
+                self.fileobj is None):
+                return
+        except AttributeError:
+            return
+        self.close()
+    def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
+        if self.mode == WRITE:
+            # Ensure the compressor's buffer is flushed
+            self.fileobj.write(self.compress.flush(zlib_mode))
+        self.fileobj.flush()
+    def fileno(self):
+        """Invoke the underlying file object's fileno() method.
+        This will raise AttributeError if the underlying file object
+        doesn't support fileno().
+        """
+        return self.fileobj.fileno()
+    def isatty(self):
+        return False
+    def tell(self):
+        return self.offset
+    def rewind(self):
+        '''Return the uncompressed stream file position indicator to the
+        beginning of the file'''
+        if self.mode != READ:
+            raise IOError("Can't rewind in write mode")
+        self.fileobj.seek(0)
+        self._new_member = True
+        self.extrabuf = ""
+        self.extrasize = 0
+        self.offset = 0
+    def seek(self, offset):
+        if self.mode == WRITE:
+            if offset < self.offset:
+                raise IOError('Negative seek in write mode')
+            count = offset - self.offset
+            for i in range(count // 1024):
+                self.write(1024 * '\0')
+            self.write((count % 1024) * '\0')
+        elif self.mode == READ:
+            if offset < self.offset:
+                # for negative seek, rewind and do positive seek
+                self.rewind()
+            count = offset - self.offset
+            for i in range(count // 1024):
+                self.read(1024)
+            self.read(count % 1024)
+    def readline(self, size=-1):
+        if size < 0:
+            size = sys.maxint
+            readsize = self.min_readsize
+        else:
+            readsize = size
+        bufs = []
+        while size != 0:
+            c = self.read(readsize)
+            i = c.find('\n')
+            # We set i=size to break out of the loop under two
+            # conditions: 1) there's no newline, and the chunk is
+            # larger than size, or 2) there is a newline, but the
+            # resulting line would be longer than 'size'.
+            if (size <= i) or (i == -1 and len(c) > size):
+                i = size - 1
+            if i >= 0 or c == '':
+                bufs.append(c[:i + 1])    # Add portion of last chunk
+                self._unread(c[i + 1:])   # Push back rest of chunk
+                break
+            # Append chunk to list, decrease 'size',
+            bufs.append(c)
+            size = size - len(c)
+            readsize = min(size, readsize * 2)
+        if readsize > self.min_readsize:
+            self.min_readsize = min(readsize, self.min_readsize * 2, 512)
+        return ''.join(bufs) # Return resulting line
+    def readlines(self, sizehint=0):
+        # Negative numbers result in reading all the lines
+        if sizehint <= 0:
+            sizehint = sys.maxint
+        L = []
+        while sizehint > 0:
+            line = self.readline()
+            if line == "":
+                break
+            L.append(line)
+            sizehint = sizehint - len(line)
+        return L
+    def writelines(self, L):
+        for line in L:
+            self.write(line)
+    def __iter__(self):
+        return self
+    def next(self):
+        line = self.readline()
+        if line:
+            return line
+        else:
+            raise StopIteration
+def _test():
+    # Act like gzip; with -d, act like gunzip.
+    # The input file is not deleted, however, nor are any other gzip
+    # options or features supported.
+    args = sys.argv[1:]
+    decompress = args and args[0] == "-d"
+    if decompress:
+        args = args[1:]
+    if not args:
+        args = ["-"]
+    for arg in args:
+        if decompress:
+            if arg == "-":
+                f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
+                g = sys.stdout
+            else:
+                if arg[-3:] != ".gz":
+                    print "filename doesn't end in .gz:", repr(arg)
+                    continue
+                f = open(arg, "rb")
+                g = __builtin__.open(arg[:-3], "wb")
+        else:
+            if arg == "-":
+                f = sys.stdin
+                g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
+            else:
+                f = __builtin__.open(arg, "rb")
+                g = open(arg + ".gz", "wb")
+        while True:
+            chunk = f.read(1024)
+            if not chunk:
+                break
+            g.write(chunk)
+        if g is not sys.stdout:
+            g.close()
+        if f is not sys.stdin:
+            f.close()
+if __name__ == '__main__':
+    _test()
diff --git a/src/obitools/interactive/__init__.py b/src/obitools/interactive/__init__.py
new file mode 100644
index 0000000..0911cce
--- /dev/null
+++ b/src/obitools/interactive/__init__.py
@@ -0,0 +1,30 @@
+from obitools import bioSeqGenerator as __bioSeqGenerator
+from obitools import BioSequence
+from obitools.fasta import formatFasta
+class InteractiveBioseqProxy:
+    def __init__(self,bio):
+        assert(isinstance(bio, BioSequence))
+        self._reference=bio
+    def __repr__(self):
+        return formatFasta(self._reference)
+    def __getattr__(self,key):
+        return getattr(self._reference,key)
+def bioseq(seq,id=None,definition=None):
+    global __anonymous_seq__
+    if id is None:
+        __anonymous_seq__+=1
+        id='seq%05d' % __anonymous_seq__
+    if definition is None:
+        definition=""
+    return InteractiveBioseqProxy(__bioSeqGenerator(id,seq,definition))
diff --git a/src/obitools/location/__init__.py b/src/obitools/location/__init__.py
new file mode 100644
index 0000000..26a347c
--- /dev/null
+++ b/src/obitools/location/__init__.py
@@ -0,0 +1,547 @@
+import obitools
+import re
+import array
+class Location(object):
+    """
+    Define a location on a sequence.  
+    """
+    def extractSequence(self,sequence):
+        '''
+        Extract subsequence corresponding to a Location.
+        @param sequence: 
+        @type sequence: C{BioSequence} or C{str}
+        '''
+        assert isinstance(sequence, (obitools.BioSequence,str)), \
+           "sequence must be an instance of str or BioSequence"
+        if isinstance(sequence, str):
+            seq = self._extractSequence(sequence)
+        else:
+            if isinstance(sequence, obitools.AASequence):
+                assert not self.needNucleic(), \
+                    "This location can be used only with Nucleic sequences"
+            seq = self._extractSequence(str(sequence))
+            if isinstance(sequence, obitools.AASequence):
+                st = obitools.AASequence
+            else:
+                st = obitools.NucSequence
+            seq = st(sequence.id,
+                     seq,
+                     sequence.definition,
+                     **sequence.getTags())
+            seq['location']=str(self)
+            if 'length' in  sequence.getTags():
+                seq['length']=len(seq)
+        if hasattr(sequence, 'quality'):
+            quality = self._extractQuality(sequence)
+            seq.quality=quality
+        return seq
+    def isDirect(self):
+        return None
+    def isSimple(self):
+        '''
+        Indicate if a location is composed of a single continuous 
+        region or is composed by the junction of several locations
+        by the C{join} operator.
+        @return: C{True} if the location is composed of a single
+                 continuous region.
+        @rtype: bool
+        '''
+        return None
+    def isFullLength(self):
+        return None
+    def needNucleic(self):
+        '''
+        If a location contains a complement operator, it can be use
+        only on nucleic sequence.
+        @return: C{True} if location contains a complement operator
+        @rtype: bool
+        '''
+        return None
+    def getGloc(self):
+        loc = self.simplify()
+        assert loc.isDirect() is not None,"Gloc cannot be created for multi oriented location : %s" % str(loc)
+        positions = ','.join([str(x) for x in loc._getglocpos()])
+        return "(%s,%s)" % ({True:'T',False:'F'}[loc.isDirect()],
+                            positions)
+    def shift(self,s):
+        return None
+    def getBegin(self):
+        return None
+    def getEnd(self):
+        return None
+    def getFivePrime(self):
+        return self.getBegin()
+    def getThreePrime(self):
+        return self.getEnd()
+    begin = property(getBegin,None,None,"beginning position of the location")
+    end = property(getEnd,None,None,"ending position of the location")
+    fivePrime=property(getFivePrime,None,None,"5' position of the location")
+    threePrime=property(getThreePrime,None,None,"3' position of the location")
+    def __abs__(self):
+        assert self.isDirect() is not None,"Abs operator cannot be applied on non oriented location"
+        if self.isDirect():
+            return self
+        else:
+            return ComplementLocation(self).simplify()
+    def __cmp__(self,y):
+        if self.begin < y.begin:
+            return -1
+        if self.begin > y.begin:
+            return 1
+        if self.isDirect() == y.isDirect():
+            return 0
+        if self.isDirect() and not y.isDirect():
+            return -1
+        return 1
+class SimpleLocation(Location):
+    """
+    A simple location is describe a continuous region of 
+    a sequence define by a C{begin} and a C{end} position.
+    """
+    def __init__(self,begin,end):
+        '''
+        Build a new C{SimpleLocation} instance. Valid
+        position are define on M{[1,N]} with N the length
+        of the sequence.
+        @param begin: start position of the location
+        @type begin:  int
+        @param end:   end position of the location
+        @type end:    int
+        '''
+        assert begin > 0 and end > 0
+        self._begin = begin
+        self._end   = end
+        self._before=False
+        self._after=False
+    def _extractSequence(self,sequence):
+        assert (    self._begin < len(sequence) 
+                and self._end <= len(sequence)), \
+                "Sequence length %d is too short" % len(sequence)
+        return sequence[self._begin-1:self._end]
+    def _extractQuality(self,sequence):
+        assert (    self._begin < len(sequence) 
+                and self._end <= len(sequence)), \
+                "Sequence length %d is too short" % len(sequence)
+        return sequence.quality[self._begin-1:self._end]
+    def isDirect(self):
+        return True
+    def isSimple(self):
+        return True
+    def isFullLength(self):
+        return not (self.before or self.after)
+    def simplify(self):
+        if self._begin == self._end:
+            return PointLocation(self._begin)
+        else:
+            return self
+    def needNucleic(self):
+        return False
+    def __str__(self):
+        before = {True:'<',False:''}[self.before]
+        after  = {True:'>',False:''}[self.after]
+        return "%s%d..%s%d" % (before,self._begin,after,self._end)
+    def shift(self,s):
+        assert (self._begin + s) > 0,"shift to large (%d)" % s 
+        if s == 0:
+            return self
+        return SimpleLocation(self._begin + s, self._end + s)
+    def _getglocpos(self):
+        return (self.begin,self.end)
+    def getGloc(self):
+        positions = ','.join([str(x) for x in self._getglocpos()])
+        return "(%s,%s)" % ({True:'T',False:'F'}[self.isDirect()],
+                            positions)
+    def getBegin(self):
+        return self._begin
+    def getEnd(self):
+        return self._end
+    begin = property(getBegin,None,None,"beginning position of the location")
+    end = property(getEnd,None,None,"ending position of the location")
+    def getBefore(self):
+        return self._before
+    def getAfter(self):
+        return self._after
+    def setBefore(self,value):
+        assert isinstance(value, bool)
+        self._before=value
+    def setAfter(self,value):
+        assert isinstance(value, bool)
+        self._after=value
+    before=property(getBefore,setBefore,None)
+    after=property(getAfter,setAfter,None)
+class PointLocation(Location):
+    """
+    A point location describes a location on a sequence
+    limited to a single position
+    """
+    def __init__(self,position):
+        assert position > 0
+        self._pos=position
+    def _extractSequence(self,sequence):
+        assert self._end <= len(sequence), \
+                "Sequence length %d is too short" % len(sequence)
+        return sequence[self._pos-1]
+    def _extractQuality(self,sequence):
+        assert self._end <= len(sequence), \
+                "Sequence length %d is too short" % len(sequence)
+        return sequence[self._pos-1:self._pos]
+    def isDirect(self):
+        return True
+    def isSimple(self):
+        return True
+    def isFullLength(self):
+        return True
+    def simplify(self):
+        return self
+    def needNucleic(self):
+        return False
+    def shift(self,s):
+        assert (self._pos + s) > 0,"shift to large (%d)" % s 
+        if s == 0:
+            return self
+        return PointLocation(self._pos + s)
+    def _getglocpos(self):
+        return (self._pos,self._pos)
+    def getBegin(self):
+        return self._pos
+    def getEnd(self):
+        return self._pos
+    begin = property(getBegin,None,None,"beginning position of the location")
+    end = property(getEnd,None,None,"ending position of the location")
+    def __str__(self):
+        return str(self._pos)
+class CompositeLocation(Location):
+    """
+    """
+    def __init__(self,locations):            
+        self._locs = tuple(locations)
+    def _extractSequence(self,sequence):
+        seq = ''.join([x._extractSequence(sequence)
+                       for x in self._locs])
+        return seq
+    def _extractQuality(self,sequence):
+        rep=array.array('d',[])
+        for x in self._locs:
+            rep.extend(x._extractQuality(sequence))
+        return rep
+    def isDirect(self):
+        hasDirect,hasReverse = reduce(lambda x,y: (x[0] or y,x[1] or not y),
+                            (z.isDirect() for z in self._locs),(False,False))
+        if hasDirect and not hasReverse:
+            return True
+        if hasReverse and not hasDirect:
+            return False
+        return None
+    def isSimple(self):
+        return False
+    def simplify(self):
+        if len(self._locs)==1:
+            return self._locs[0]
+        rep = CompositeLocation(x.simplify() for x in self._locs)
+        if reduce(lambda x,y : x and y,
+                      (isinstance(z, ComplementLocation) 
+                       for z in self._locs)):
+            rep = ComplementLocation(CompositeLocation(x._loc.simplify() 
+                                                       for x in rep._locs[::-1]))
+        return rep
+    def isFullLength(self):
+        return reduce(lambda x,y : x and y, (z.isFullLength() for z in self._locs),1)
+    def needNucleic(self):
+        return reduce(lambda x,y : x or y, 
+                      (z.needNucleic for z in self._locs),
+                      False)
+    def _getglocpos(self):
+        return reduce(lambda x,y : x + y,
+                      (z._getglocpos() for z in self._locs))
+    def getBegin(self):
+        return min(x.getBegin() for x in self._locs)
+    def getEnd(self):
+        return max(x.getEnd() for x in self._locs)
+    def shift(self,s):
+        assert (self.getBegin() + s) > 0,"shift to large (%d)" % s 
+        if s == 0:
+            return self
+        return CompositeLocation(x.shift(s) for x in self._locs)
+    begin = property(getBegin,None,None,"beginning position of the location")
+    end = property(getEnd,None,None,"ending position of the location")
+    def __str__(self):
+        return "join(%s)" % ','.join([str(x) 
+                                      for x in self._locs])
+class CompositeLocationOrder(CompositeLocation):
+    def __str__(self):
+        return "order(%s)" % ','.join([str(x) 
+                                      for x in self._locs])
+class ComplementLocation(Location):
+    """
+    """
+    _comp={'a': 't', 'c': 'g', 'g': 'c', 't': 'a',
+           'r': 'y', 'y': 'r', 'k': 'm', 'm': 'k', 
+           's': 's', 'w': 'w', 'b': 'v', 'd': 'h', 
+           'h': 'd', 'v': 'b', 'n': 'n', 'u': 'a',
+           '-': '-'}
+    def __init__(self,location):
+        self._loc = location
+    def _extractSequence(self,sequence):
+        seq = self._loc._extractSequence(sequence)
+        seq = ''.join([ComplementLocation._comp.get(x.lower(),'n') for x in seq[::-1]])
+        return seq
+    def _extractQuality(self,sequence):
+        return sequence.quality[::-1]
+    def isDirect(self):
+        return False
+    def isSimple(self):
+        return self._loc.isSimple()
+    def isFullLength(self):
+        return self._loc.isFullLength()
+    def simplify(self):
+        if isinstance(self._loc, ComplementLocation):
+            return self._loc._loc.simplify()
+        else:
+            return self
+    def needNucleic(self):
+        return True
+    def __str__(self):
+        return "complement(%s)" % self._loc
+    def shift(self,s):
+        assert (self.getBegin() + s) > 0,"shift to large (%d)" % s 
+        if s == 0:
+            return self
+        return ComplementLocation(self._loc.shift(s))
+    def _getglocpos(self):
+        return self._loc._getglocpos()
+    def getBegin(self):
+        return self._loc.getBegin()
+    def getEnd(self):
+        return self._loc.getEnd()
+    def getFivePrime(self):
+        return self.getEnd()
+    def getThreePrime(self):
+        return self.getBegin()
+    begin = property(getBegin,None,None,"beginning position of the location")
+    end = property(getEnd,None,None,"ending position of the location")
+    fivePrime=property(getFivePrime,None,None,"5' potisition of the location")
+    threePrime=property(getThreePrime,None,None,"3' potisition of the location")
+                                    #
+                                    # Internal functions used for location parsing
+                                    #
+def __sublocationIterator(text):
+    sl = []
+    plevel=0
+    for c in text:
+        assert plevel>=0,"Misformated location : %s" % text
+        if c == '(':
+            plevel+=1
+            sl.append(c)
+        elif c==')':
+            plevel-=1
+            sl.append(c)
+        elif c==',' and plevel == 0:
+            assert sl,"Misformated location : %s" % text
+            yield ''.join(sl)
+            sl=[]
+        else:
+            sl.append(c)
+    assert sl and plevel==0,"Misformated location : %s" % text
+    yield ''.join(sl)
+                                    #
+                                    # Internal functions used for location parsing
+                                    #
+__simplelocparser = re.compile('(?P<before><?)(?P<from>[0-9]+)(\.\.(?P<after>>?)(?P<to>[0-9]+))?')
+def __locationParser(text):
+    text=text.strip()
+    if text[0:5]=='join(':
+        assert text[-1]==')',"Misformated location : %s" % text
+        return CompositeLocation(__locationParser(sl) for sl in __sublocationIterator(text[5:-1]))
+    if text[0:6]=='order(':
+        assert text[-1]==')',"Misformated location : %s" % text
+        return CompositeLocationOrder(__locationParser(sl) for sl in __sublocationIterator(text[6:-1]))
+    elif text[0:11]=='complement(':
+        assert text[-1]==')',"Misformated location : %s" % text
+        subl = tuple(__locationParser(sl) for sl in __sublocationIterator(text[11:-1]))
+        if len(subl)>1:
+            subl = CompositeLocation(subl)
+        else:
+            subl = subl[0]
+        return ComplementLocation(subl)
+    else:
+        data = __simplelocparser.match(text)
+        assert data is not None,"Misformated location : %s" % text
+        data = data.groupdict()
+        if not data['to'] :
+            sl = PointLocation(int(data['from']))
+        else:
+            sl = SimpleLocation(int(data['from']),int(data['to']))
+        sl.before=data['before']=='<'
+        sl.after=data['after']=='>'
+        return sl
+def locationGenerator(locstring):
+    '''
+    Parse a location string as present in genbank or embl file.
+    @param locstring: string description of the location in embl/gb format
+    @type locstring: str
+    @return: a Location instance
+    @rtype: C{Location} subclass instance
+    '''
+    return __locationParser(locstring)
+_matchExternalRef = re.compile('[A-Za-z0-9_|]+(\.[0-9]+)?(?=:)')
+def extractExternalRefs(locstring):
+    '''
+    When a location describe external references (ex: D28156.1:1..>1292)
+    separate the external reference part of the location and the location
+    by itself.
+    @param locstring: text representation of the location.
+    @type locstring: str
+    @return: a tuple with a set of string describing accession number
+             of the referred sequences and a C{Location} instance.
+    @rtype: tuple(set,Location)
+    '''
+    m = set(x.group() for x in _matchExternalRef.finditer(locstring))
+    clean = re.compile(':|'.join([re.escape(x) for x in m])+':')
+    cloc = locationGenerator(clean.sub('',locstring))
+    return m,cloc
diff --git a/src/obitools/location/feature.py b/src/obitools/location/feature.py
new file mode 100644
index 0000000..89a183f
--- /dev/null
+++ b/src/obitools/location/feature.py
@@ -0,0 +1,177 @@
+from obitools.location import Location,locationGenerator
+import logging
+import re
+_featureMatcher = re.compile('^(FT|  )   [^ ].+\n((FT|  )    .+\n)+',re.M)
+_featureCleaner = re.compile('^FT',re.M)
+def textFeatureIterator(fttable):
+    '''
+    Iterate through a textual description of a feature table in a genbank
+    or embl format. Return at each step a text representation of each individual
+    feature composing the table.
+    @param fttable:  a string corresponding to the feature table of a genbank
+                     or an embl entry
+    @type fttable: C{str}
+    @return: an iterator on str
+    @rtype: iterator
+    @see: L{ftParser}
+    '''
+    for m in _featureMatcher.finditer(fttable):
+        t = m.group()
+        t = _featureCleaner.sub('  ',t)
+        yield t
+_qualifierMatcher = re.compile('(?<=^ {21}/).+(\n {21}[^/].+)*',re.M)
+_qualifierCleanner= re.compile("^ +",re.M)
+def qualifierIterator(qualifiers):
+    '''
+    Parse a textual description of a feature in embl or genbank format
+    as returned by the textFeatureIterator iterator and iterate through 
+    the key, value qualified defining this location.
+    @param qualifiers: substring containing qualifiers
+    @type qualifiers: str
+    @return: an iterator on tuple (key,value), where keys are C{str}
+    @rtype: iterator
+    '''
+    for m in _qualifierMatcher.finditer(qualifiers):
+        t = m.group()
+        t = _qualifierCleanner.sub('',t)
+        t = t.split('=',1)
+        if len(t)==1:
+            t = (t[0],None)
+        else:
+            if t[0]=='translation':
+                value = t[1].replace('\n','')
+            else:
+                value = t[1].replace('\n',' ')
+            try:
+                value = eval(value)
+            except:
+                pass
+            t = (t[0],value)
+        yield t
+_ftmatcher = re.compile('(?<=^ {5})\S+')
+_locmatcher= re.compile('(?<=^.{21})[^/]+',re.DOTALL)
+_cleanloc  = re.compile('[\s\n]+')
+_qualifiersMatcher = re.compile('^ +/.+',re.M+re.DOTALL)
+def ftParser(feature):
+    fttype = _ftmatcher.search(feature).group()
+    location=_locmatcher.search(feature).group()
+    location=_cleanloc.sub('',location)
+    qualifiers=_qualifiersMatcher.search(feature)
+    if qualifiers is not None:
+        qualifiers=qualifiers.group()
+    else:
+        qualifiers=""
+        logging.debug("Qualifiers regex not matching on \n=====\n%s\n========" % feature)
+    return fttype,location,qualifiers
+class Feature(dict,Location):
+    def __init__(self,type,location):
+        self._fttype=type
+        self._loc=location
+    def getFttype(self):
+        return self._fttype
+    def extractSequence(self,sequence,withQualifier=False):
+        seq = self._loc.extractSequence(sequence)
+        if withQualifier:
+            seq.getInfo().update(self)
+        return seq
+    def isDirect(self):
+        return self._loc.isDirect()
+    def isSimple(self):
+        return self._loc.isSimple()
+    def isFullLength(self):
+        return self._loc.isFullLength()
+    def simplify(self):
+        f = Feature(self._fttype,self._loc.simplify())
+        f.update(self)
+        return f
+    def locStr(self):
+        return str(self._loc)
+    def needNucleic(self):
+        return self._loc.needNucleic()
+    def __str__(self):
+        return repr(self)
+    def __repr__(self):
+        return str((self.ftType,str(self._loc),dict.__repr__(self)))
+    def __cmp__(self,y):
+        return self._loc.__cmp__(y)
+    def _getglocpos(self):
+        return self._loc._getglocpos()
+    ftType = property(getFttype, None, None, "Feature type name")
+    def shift(self,s):
+        assert (self.getBegin() + s) > 0,"shift to large (%d)" % s 
+        if s == 0:
+            return self
+        f = Feature(self._fttype,self._loc.shift(s))
+        f.update(self)
+        return f
+    def getBegin(self):
+        return self._loc.getBegin()
+    def getEnd(self):
+        return self._loc.getEnd()
+    begin = property(getBegin,None,None,"beginning position of the location")
+    end = property(getEnd,None,None,"ending position of the location")
+def featureFactory(featureDescription):
+    fttype,location,qualifiers = ftParser(featureDescription)
+    location = locationGenerator(location)
+    feature = Feature(fttype,location)
+    feature.raw  = featureDescription
+    for k,v in qualifierIterator(qualifiers):
+        feature.setdefault(k,[]).append(v)
+    return feature
+def featureIterator(featureTable,skipError=False):
+    for tft in textFeatureIterator(featureTable):
+        try:
+            feature = featureFactory(tft)
+        except AssertionError,e:
+            logging.debug("Parsing error on feature :\n===============\n%s\n===============" % tft)
+            if not skipError:
+                raise e
+            logging.debug("\t===> Error skipped")
+            continue
+        yield feature
\ No newline at end of file
diff --git a/src/obitools/metabarcoding/__init__.py b/src/obitools/metabarcoding/__init__.py
new file mode 100644
index 0000000..1a88003
--- /dev/null
+++ b/src/obitools/metabarcoding/__init__.py
@@ -0,0 +1,301 @@
+from obitools.ecopcr.options import addTaxonomyFilterOptions,\
+                                    loadTaxonomyDatabase
+from obitools.graph import UndirectedGraph
+from obitools.align import lenlcs,isLCSReachable
+from obitools.graph.algorithms.component import componentIterator
+from obitools.utils.bioseq import uniqSequence
+from obitools.utils import progressBar
+import math
+import sys
+from obitools.graph.rootedtree import RootedTree
+def average(x):
+    x=list(x)
+    s = sum(i*j for (i,j) in x)
+    n = sum(i[1] for i in x)
+    return (float(s)/float(n),n)
+def minimum(x):
+    x=list(x)
+    m = min(i[0] for i in x)
+    n = sum(i[1] for i in x)
+    return (float(m),n)
+def ecoPCRReader(entries,options):
+    '''
+    :param entries: an iterator over the entries to analyze
+    :type entries: an iterable element
+    :param options: the option structure return by the option manager
+    :type options: object
+    '''
+    taxonomy = loadTaxonomyDatabase(options) 
+    norankid =options.taxonomy.findRankByName('no rank')
+    speciesid=options.taxonomy.findRankByName('species')
+    genusid  =options.taxonomy.findRankByName('genus')
+    familyid =options.taxonomy.findRankByName('family')
+    #
+    # to be used a sequence must have at least 
+    # a species a genus and a family
+    #
+    minrankseq = set([speciesid,genusid,familyid])
+    usedrankid   = {}
+    ingroup = []
+    outgroup= []
+    totalentries     = 0
+    entrieswithtaxid = 0
+    goodtaxid        = 0
+    for s in entries:
+        totalentries+=1
+        if 'taxid' in s :
+            entrieswithtaxid+=1
+            taxid = s['taxid']
+            if taxid in taxonomy:
+                goodtaxid+=1
+                allrank = set()
+                for p in options.taxonomy.parentalTreeIterator(taxid):
+                    if p[1]!=norankid:
+                        allrank.add(p[1])
+                if len(minrankseq & allrank) == 3:
+                    if taxonomy.isAncestor(options.ingroup,taxid):
+                        for r in allrank:
+                            usedrankid[r]=usedrankid.get(r,0) + 1
+                        ingroup.append(s)
+                    else:
+                        outgroup.append(s)
+    keptrank = set(r for r in usedrankid 
+                   if float(usedrankid[r])/float(len(ingroup)) > options.rankthresold)
+    return { 'ingroup' : ingroup,   # The group of interest
+             'outgroup': outgroup,  # all other taxa
+             'ranks'   : keptrank   # the rank to analyzed (more frequent than  options.rankthresold
+           }
+def buildSimilarityGraph(dbseq,ranks,taxonomy,dcmax=5):
+    ldbseq = len(dbseq)
+    pos = 1
+    digit = int(math.ceil(math.log10(ldbseq)))
+    header = "Alignment  : %%0%dd x %%0%dd -> %%0%dd " % (digit,digit,digit)
+    aligncount = ldbseq*(ldbseq+1)/2
+    edgecount = 0
+    print >>sys.stderr
+    progressBar(1,aligncount,True,"Alignment  : %s x %s -> %s " % ('-'*digit,'-'*digit, '0'*digit))
+    sim = UndirectedGraph()
+    i=0
+    for s in dbseq:
+        taxid = s['taxid']
+        rtaxon = dict((rid,taxonomy.getTaxonAtRank(taxid,rid))
+                      for rid in ranks)
+        sim.addNode(i, seq=s,taxid=taxid,rtaxon=rtaxon) 
+        i+=1
+#    aligner = LCS()
+    for is1 in xrange(ldbseq):
+        s1 = dbseq[is1]
+        ls1= len(s1)
+#        aligner.seqA=s1
+        for is2 in xrange(is1+1,ldbseq):
+            s2=dbseq[is2]
+            ls2=len(s2)
+            lm = max(ls1,ls2)
+            lcsmin = lm - dcmax
+            if isLCSReachable(s1,s2,lcsmin):
+                llcs,lali=lenlcs(s1,s2)
+                ds1s2 = lali - llcs
+                if ds1s2 <= dcmax:
+                    sim.addEdge(node1=is1, node2=is2,ds1s2=ds1s2,label=ds1s2)
+                    edgecount+=1
+        progressBar(pos,aligncount,head=header % (is1,is2,edgecount))
+        pos+=(ldbseq-is1-1)
+    return sim
+def buildTsr(component):
+    '''
+    Build for each consider taxonomic rank the list of taxa
+    present in the connected component
+    :param component: the analyzed connected component
+    :type component: :py:class:`UndirectedGraph`
+    :return: a dictionary indexed by rankid containing a `dict` indexed by taxid and containing count of sequences for this taxid
+    :rtype: `dict` indexed by `int` containing `dict` indexed by `int` and containing of `int`
+    '''
+    taxalist = {}
+    for n in component:
+        for r in n['rtaxon']:
+            rtaxid = n['rtaxon'][r]
+            if rtaxid is not None:
+                ts =  taxalist.get(r,{})
+                ts[rtaxid]=ts.get(rtaxid,0)+1
+                taxalist[r]=ts
+    return taxalist
+def edgeDistSelector(dcmax):
+    def predicate(e):
+        return e['ds1s2'] <= dcmax
+    return predicate
+def distanceOfConfusion(simgraph,dcmax=5,aggregate=average):
+    alltaxa = set()
+    for n in simgraph:
+        alltaxa|=set(n['rtaxon'].values())
+    taxacount = len(alltaxa)
+    result = {}
+    pos = [1]
+    header = "Component  : %-5d Identified : %-8d "
+    progressBar(1,taxacount,True,header % (0,0))
+    def _idc(cc,dcmax):
+        composante=[]
+        for x in cc:
+            composante.extend(simgraph.subgraph(c)
+                              for c in componentIterator(x, 
+                                                         edgePredicat=edgeDistSelector(dcmax)))
+        good = set()
+        bad  = {}
+        complexe = []
+        for c in composante:       
+            tsr = buildTsr(c)
+            newbad=False
+            for r in tsr:
+                if len(tsr[r]) == 1:
+                    taxid = tsr[r].keys()[0]
+                    good.add((taxid,tsr[r][taxid]))
+                else:
+                    newbad=True
+                    for taxid in tsr[r]:
+                        bad[taxid]=bad.get(taxid,0)+tsr[r][taxid]
+            if newbad:
+                complexe.append(c)
+#       good = good - bad
+        for taxid,weight in good:
+            if taxid not in result:
+                result[taxid]=[]
+            result[taxid].append((dcmax+1,weight))
+            progressBar(pos[0],taxacount,False,header % (len(composante),pos[0]))
+            pos[0]=len(result)
+        if dcmax > 0:
+            dcmax-=1
+            _idc(complexe,dcmax)
+        else:
+            for taxid in bad:
+                if taxid not in result:
+                    result[taxid]=[]
+                result[taxid].append((0,bad[taxid]))                
+                progressBar(pos[0],taxacount,False,header % (len(composante),pos[0]))
+                pos[0]=len(result)
+    _idc([simgraph],dcmax)
+    for taxid in result:
+        result[taxid]=aggregate(result[taxid])
+    return result
+def propagateDc(tree,node=None,aggregate=min):
+    if node is None:
+        node = tree.getRoots()[0]
+    dca=aggregate(n['dc'] for n in node.leavesIterator())
+    node['dc']=dca
+    for n in node:
+        propagateDc(tree, n, aggregate)
+def confusionTree(distances,ranks,taxonomy,aggregate=min,bsrank='species',dcmax=1):
+    '''
+    :param distances:
+    :type distances:
+    :param ranks:
+    :type ranks:
+    :param taxonomy:
+    :type taxonomy:
+    :param aggregate:
+    :type aggregate:
+    :param bsrank:
+    :type bsrank:
+    :param dcmax:
+    :type dcmax:
+    '''
+    def Bs(node,rank,dcmax):
+        n = len(node)
+        if n:
+            g = [int(x['dc']>=dcmax) for x in node.subgraphIterator() if x['rank']==bsrank]
+            n = len(g)
+            g = sum(g)
+            bs= float(g)/float(n)
+            node['bs']=bs
+            node['bs_label']="%3.2f (%d)" % (bs,n)
+            for n in node:
+                Bs(n,rank,dcmax)
+    tree = RootedTree()
+    ranks = set(ranks)
+    tset = set(distances)
+    for taxon in distances:
+        tree.addNode(taxon, rank=taxonomy.getRank(taxon),
+                       name=taxonomy.getScientificName(taxon),
+                       dc=float(distances[taxon][0]),
+                       n=distances[taxon][1],
+                       dc_label="%4.2f (%d)" % (float(distances[taxon][0]),distances[taxon][1])
+                    )
+    for taxon in distances:
+        piter = taxonomy.parentalTreeIterator(taxon)
+        taxon = piter.next()
+        for parent in piter:
+            if taxon[0] in tset and parent[0] in distances:
+                tset.remove(taxon[0])
+                tree.addEdge(parent[0], taxon[0])
+                taxon=parent
+    root = tree.getRoots()[0]
+    Bs(root,bsrank,dcmax)
+    return tree
diff --git a/src/obitools/metabarcoding/options.py b/src/obitools/metabarcoding/options.py
new file mode 100644
index 0000000..08ff423
--- /dev/null
+++ b/src/obitools/metabarcoding/options.py
@@ -0,0 +1,34 @@
+Created on 30 oct. 2011
+ at author: coissac
+from obitools.ecopcr.options import addTaxonomyDBOptions
+def addMetabarcodingOption(optionManager):
+    addTaxonomyDBOptions(optionManager)
+    optionManager.add_option('--dcmax',
+                             action="store", dest="dc",
+                             metavar="###",
+                             type="int",
+                             default=0,
+                             help="Maximum confusion distance considered")
+    optionManager.add_option('--ingroup',
+                             action="store", dest="ingroup",
+                             metavar="###",
+                             type="int",
+                             default=1,
+                             help="ncbi taxid delimitation the in group")
+    optionManager.add_option('--rank-thresold',
+                             action="store", dest="rankthresold",
+                             metavar="#.##",
+                             type="float",
+                             default=0.5,
+                             help="minimum fraction of the ingroup sequences "
+                                  "for concidering the rank")
diff --git a/src/obitools/obischemas/__init__.py b/src/obitools/obischemas/__init__.py
new file mode 100644
index 0000000..6bcafde
--- /dev/null
+++ b/src/obitools/obischemas/__init__.py
@@ -0,0 +1,28 @@
+from obitools.obischemas import kb
+__connection__ = None
+def initConnection(options):
+    global __connection__
+    param = {}
+    if hasattr(options, "dbname") and options.dbname is not None:
+        param["database"]=options.dbname
+    if hasattr(options, "dbhost") and options.dbhost is not None:
+        param["host"]=options.dbhost
+    if hasattr(options, "dbuser") and options.dbuser is not None:
+        param["username"]=options.dbuser
+    if hasattr(options, "dbpassword") and options.dbpassword is not None:
+        param["password"]=options.dbpassword
+    __connection__=kb.getConnection(**param)
+    __connection__.autocommit=options.autocommit
+def getConnection(options=None):
+    global __connection__
+    if options is not None:
+        initConnection(options)
+    assert __connection__ is not None,"database connection is not initialized"
+    return __connection__
\ No newline at end of file
diff --git a/src/obitools/obischemas/kb/__init__.py b/src/obitools/obischemas/kb/__init__.py
new file mode 100644
index 0000000..7d35dcb
--- /dev/null
+++ b/src/obitools/obischemas/kb/__init__.py
@@ -0,0 +1,55 @@
+    kb package is devoted to manage access to postgresql database from python
+    script
+class Connection(object):
+    def __init__(self):
+        raise RuntimeError('pyROM.KB.Connection is an abstract class')
+    def cursor(self):
+        raise RuntimeError('pyROM.KB.Connection.cursor is an abstract function')
+    def commit(self):
+        raise RuntimeError('pyROM.KB.Connection.commit is an abstract function')
+    def rollback(self):
+        raise RuntimeError('pyROM.KB.Connection.rollback is an abstract function')
+    def __call__(self,query):
+        return self.cursor().execute(query)
+class Cursor(object):
+    def __init__(self,db):
+        raise RuntimeError('pyROM.KB.Cursor is an abstract class')
+    def execute(self,query):
+        raise RuntimeError('pyROM.KB.Cursor.execute is an abstract function')
+    __call__=execute
+_current_connection = None  # Static variable used to store connection to KB
+def getConnection(*args,**kargs):
+    """
+        return a connection to the database.
+        When call from database backend no argument are needed.
+        All connection returned by this function 
+    """
+    global _current_connection
+    if _current_connection==None or args or kargs :
+        try:
+            from obischemas.kb import backend
+            _current_connection = backend.Connection()
+        except ImportError:
+            from obischemas.kb import extern
+            _current_connection = extern.Connection(*args,**kargs)
+    return _current_connection
diff --git a/src/obitools/obischemas/kb/extern.py b/src/obitools/obischemas/kb/extern.py
new file mode 100644
index 0000000..ce2ff84
--- /dev/null
+++ b/src/obitools/obischemas/kb/extern.py
@@ -0,0 +1,78 @@
+Module : KB.extern
+Author : Eric Coissac
+Date   : 03/05/2004
+Module wrapping psycopg interface module to allow connection
+to a postgresql databases with the same interface from
+backend and external script.
+This module define a class usable from external script 
+import psycopg2
+import sys
+from obischemas import kb
+class Connection(kb.Connection):
+    def __init__(self,*connectParam,**kconnectParam):
+        if connectParam:
+            self.connectParam=={'dsn':connectParam}
+        else:
+            self.connectParam=kconnectParam 
+        print self.connectParam
+        self.db = psycopg2.connect(**(self.connectParam))
+    def restart(self):
+	ok=1
+	while (ok and ok < 1000):
+	  try:	
+	    self.db = psycopg2.connect(**self.connectParam)
+	  except:
+            ok+=1
+	  else:
+            ok=0
+    def cursor(self):
+        curs = Cursor(self.db)
+        if hasattr(self,'autocommit') and self.autocommit:
+            curs.autocommit = self.autocommit
+        return curs
+    def commit(self):
+        self.db.commit()
+    def rollback(self):
+        if hasattr(self,'db'):
+            self.db.rollback()
+    def __del__(self):
+        if hasattr(self,'db'):
+            self.rollback()
+class Cursor(kb.Cursor):
+    def __init__(self,db):
+	self.db   = db
+        self.curs = db.cursor()
+    def execute(self,query):
+        try:
+            self.curs.execute(query)
+            if hasattr(self,'autocommit') and self.autocommit:
+                self.db.commit()
+        except psycopg2.ProgrammingError,e:
+            print >>sys.stderr,"===> %s" % query
+            raise e
+        except psycopg2.IntegrityError,e:
+            print >>sys.stderr,"---> %s" % query
+            raise e
+        try:
+           label = [x[0] for x in self.curs.description]
+           return [dict(map(None,label,y))
+                   for y in self.curs.fetchall()]
+        except TypeError:
+           return []
diff --git a/src/obitools/obischemas/options.py b/src/obitools/obischemas/options.py
new file mode 100644
index 0000000..66f5138
--- /dev/null
+++ b/src/obitools/obischemas/options.py
@@ -0,0 +1,31 @@
+def addConnectionOptions(optionManager):
+    optionManager.add_option('-d','--dbname',
+                             action="store", dest="dbname",
+                             metavar="<DB NAME>",
+                             type="string",
+                             help="OBISchema database name containing"
+                                  "taxonomical data")
+    optionManager.add_option('-H','--host',
+                             action="store", dest="dbhost",
+                             metavar="<DB HOST>",
+                             type="string",
+                             help="host hosting OBISchema database")
+    optionManager.add_option('-U','--user',
+                             action="store", dest="dbuser",
+                             metavar="<DB USER>",
+                             type="string",
+                             help="user for OBISchema database connection")
+    optionManager.add_option('-W','--password',
+                             action="store", dest="dbpassword",
+                             metavar="<DB PASSWORD>",
+                             type="string",
+                             help="password for OBISchema database connection")
+    optionManager.add_option('-A','--autocommit',
+                             action="store_true",dest="autocommit",
+                             default=False,
+                             help="add commit action after each query")
\ No newline at end of file
diff --git a/src/obitools/obo/__init__.py b/src/obitools/obo/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/obitools/obo/go/__init__.py b/src/obitools/obo/go/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/obitools/obo/go/parser.py b/src/obitools/obo/go/parser.py
new file mode 100644
index 0000000..6902974
--- /dev/null
+++ b/src/obitools/obo/go/parser.py
@@ -0,0 +1,53 @@
+from obitools.obo.parser import OBOTerm
+from obitools.obo.parser import OBOEntry
+from obitools.obo.parser import stanzaIterator
+from logging import debug
+class GOEntry(OBOEntry):
+    '''
+       An entry of a GeneOntology .obo file. It can be a header (without a stanza name) or
+       a stanza (with a stanza name between brackets). It inherits from the class dict.
+    '''
+class GOTerm(OBOTerm):
+    '''
+       A stanza named 'Term'. It inherits from the class OBOTerm.
+    '''
+    def __init__(self,stanza):
+        ## use of the OBOEntry constructor.
+        OBOTerm.__init__(self, stanza)
+        assert 'namespace' in self and len(self['namespace'])==1, "An OBOTerm must belong to one of the cell_component, molecular_function or biological_process namespace"
+def GOEntryFactory(stanza):
+    '''
+    Dispatcher of stanza.
+    @param stanza: a stanza composed of several lines.
+    @type stanza: text
+    @return: an C{OBOTerm} | C{OBOEntry} instance
+    @note: The dispatcher treats differently the stanza which are OBO "Term"
+    and the others.
+    '''
+    stanzaType = OBOEntry.parseStanzaName(stanza)
+    if stanzaType=="Term":
+        return GOTerm(stanza)
+    else:
+        return OBOEntry(stanza)
+def GOEntryIterator(file):
+    entries =  stanzaIterator(file)
+    for e in entries:
+        debug(e)
+        yield GOEntryFactory(e)
diff --git a/src/obitools/obo/parser.py b/src/obitools/obo/parser.py
new file mode 100644
index 0000000..f6f05f3
--- /dev/null
+++ b/src/obitools/obo/parser.py
@@ -0,0 +1,707 @@
+from obitools.utils import skipWhiteLineIterator,multiLineWrapper
+from obitools.utils import universalOpen
+from obitools.format.genericparser import genericEntryIteratorGenerator
+from logging import debug,warning
+import re
+##                           Stanza preparation area                           ##
+class FileFormatError(Exception):
+    '''
+       An error derived from the class Exception.
+    ''' 
+    pass
+_oboEntryIterator = genericEntryIteratorGenerator(endEntry='^ *$',
+                                                  strip=True)
+def stanzaIterator(inputfile):
+    '''
+    Iterator of stanza. The stanza are the basic units of OBO files.
+    @param inputfile: a stream of strings from an opened OBO file.
+    @type inputfile: a stream of strings
+    @return: a stream of stanza
+    @rtype: a stream of aggregated strings
+    @note: The iterator constructs stanza by aggregate strings from the
+    OBO file.
+    '''
+    inputfile = universalOpen(inputfile)
+    inputfile = multiLineWrapper(inputfile)
+    return _oboEntryIterator(inputfile)
+##                      Trailing Modifiers treatment area                      ##
+class TrailingModifier(dict):
+    '''
+       A class object which inherits from the class dict. Trailing modifiers can be found
+       at the end of TaggedValue objects when they exist.
+    '''
+    _match_brace = re.compile('(?<=\ {)[^\]]*(\}) *( !|$)')
+    def __init__(self,string):
+        ## search for trailing modifiers signals
+        trailing_modifiers = TrailingModifier._match_brace.search(string)
+        ## the trailing modifiers exist
+        if trailing_modifiers:
+            trailing_modifiers=trailing_modifiers.group(0).strip()
+            print trailing_modifiers
+            ## creates and feeds the dictionary of trailing modifiers
+            dict.__init__(self,(x.strip().split('=',1) for x in trailing_modifiers.split(',')))
+def trailingModifierFactory(string):
+    '''
+    Dispatcher of trailing modifiers.
+    @param string: a string from a TaggedValue object with a trailing modifiers signal.
+    @type string: string
+    @return: a class object
+    @note: The dispatcher is currently very simple. Only one case is treated by the function.
+    `the function returns a class object inherited from the class dict if the trailing modifiers
+    exist, None if they don't.
+    '''
+    trailing_modifiers = TrailingModifier(string)
+    if not trailing_modifiers:
+        trailing_modifiers=None
+    return trailing_modifiers
+##                          TaggedValue treatment area                         ##
+class TaggedValue(object):
+    '''
+       A couple 'tag:value' of an OBOEntry.
+    ''' 
+    _match_value   = re.compile('(("(\\\\"|[^\"])*")|(\\\\"|[^\"]))*?( !| {|$)')
+    _split_comment = re.compile('^!| !')
+    _match_quotedString = re.compile('(?<=")(\\\\"|[^\"])*(?=")')
+    _match_bracket = re.compile('\[[^\]]*\]')
+    def __init__(self,line):
+        '''
+        Constructor of the class TaggedValue.
+        @param line: a line of an OBOEntry composed of a tag and a value.
+        @type line: string
+        @note: The constructor separates tags from right terms. 'value' is extracted 
+        from right terms using a regular expression (value is at the beginning of the
+        string, between quotes or not). Then, 'comment' is extracted from the rest of the 
+        string using another regular expression ('comment' is at the end of the string 
+        after a '!'. By default, 'comment' is set to None). Finally, 'trailing_modifiers'
+        are extracted from the last string using another regular expression.
+        The tag, the value, the comment and the trailing_modifiers are saved.
+        '''
+        debug("tagValueParser : %s" % line)
+        ## by default :
+        trailing_modifiers = None
+        comment = None
+        ## the tag is saved. 'right' is composed of the value, the comment and the trailing modifiers
+        tag,rigth = line.split(':',1)
+        ## the value is saved
+        value = TaggedValue._match_value.search(rigth).group(0)
+        debug("Extracted value : %s" % value)
+        ## if there is a value AND a sign of a comment or trailing modifiers
+        if value and value[-1] in '!{':
+            lvalue = len(value)
+            ## whatever it is a comment or trailing modifiers, it is saved into 'extra'
+            extra = rigth[lvalue-1:].strip()
+            ## a comment is extracted
+            extra =TaggedValue._split_comment.split(extra,1)
+            ## and saved if it exists
+            if len(extra)==2:
+                comment=extra[1].strip()
+            ## trailing modifiers are extracted
+            extra=extra[0]
+            trailing_modifiers = trailingModifierFactory(extra)
+            ## the value is cleaned of any comment or trailing modifiers signals
+            value = value[0:-1]
+        if tag=='use_term':
+            tag='consider'
+            raise DeprecationWarning,"user_term is a deprecated tag, you should instead use consider"
+        ## recording zone
+        self.value  =value.strip()
+        self.tag    = tag
+        self.__doc__=comment
+        self.trailing_modifiers=trailing_modifiers
+    def __str__(self):
+        return str(self.value)
+    def __repr__(self):
+        return '''"""%s"""''' % str(self)
+class NameValue(TaggedValue):
+    '''
+       A couple 'name:value' inherited from the class TaggedValue. Used to manage name tags.
+    ''' 
+    def __init__(self,line):
+        ## no use of the TaggedValue constructor. The NameValue is very simple.
+        tag,rigth = line.split(':',1)
+        ## recording zone
+        self.value = rigth.strip()
+        self.tag = 'name'
+        self.__doc__=None
+        self.trailing_modifiers=None
+class DefValue(TaggedValue):
+    '''
+       A couple 'def:value' inherited from the class TaggedValue. Used to manage def tags.
+    ''' 
+    def __init__(self,line):
+        '''
+        Constructor of the class DefValue.
+        @param line: a line of an OBOEntry composed of a tag named 'def' and a value.
+        @type line: string
+        @note: The constructor calls the TaggedValue constructor. A regular expression 
+        is used to extract the 'definition' from TaggedValue.value (definition is a not 
+        quoted TaggedValue.value). A regular expression is used to extract 'dbxrefs' 
+        from the aggedValue.value without the definition (dbxrefs are between brackets
+        and definition can be so). Definition is saved as the new value of the DefValue.
+        dbxrefs are saved.
+        '''
+        ## use of the TaggedValue constructor
+        TaggedValue.__init__(self, line)
+        ## definition, which is quoted, is extracted from the standard value of a TaggedValue.
+        definition = TaggedValue._match_quotedString.search(self.value).group(0)
+        ## the standard value is cleaned of the definition.
+        cleanvalue = self.value.replace(definition,'')
+        cleanvalue = cleanvalue.replace('  ',' ')
+        ## dbxrefs are searched into the rest of the standard value.
+        dbxrefs    = TaggedValue._match_bracket.search(cleanvalue).group(0)
+        ## recording zone
+        self.tag = 'def'
+        ## the value of a DefValue is not the standard value but the definition.
+        self.value=definition
+        self.dbxrefs=xrefFactory(dbxrefs)
+class SynonymValue(TaggedValue):
+    '''
+       A couple 'synonym:value' inherited from the class TaggedValue. Used to manage 
+       synonym tags, exact_synonym tags, broad_synonym tags and narrow_synonym tags.
+    ''' 
+    _match_scope = re.compile('(?<="")[^\[]*(?=\[|$)')
+    def __init__(self,line):
+        '''
+        Constructor of the class SynonymValue.
+        @param line: a line of an OBOEntry composed of a tag named 'synonym' or
+        'exact_synonym' or 'broad_synonym' or 'narrow_synonym' and a value.
+        @type line: string
+        @note: SynonymValue is composed of a tag, a value, a scope, a list of types and 
+        dbxrefs.
+        The constructor calls the TaggedValue constructor. A regular expression 
+        is used to extract 'definition' from TaggedValue.value (definition is a not 
+        quoted TaggedValue.value). Definition is saved as the new value of the class
+        SynonymValue.
+        A regular expression is used to extract 'attributes' from the rest of the
+        string. Attributes may contain an optional synonym scope and an optional list 
+        of synonym types. The scope is extracted from attributes or set by default to
+        'RELATED'. It is saved as the scope of the class. The types are the rest of the 
+        attributes and are saved as the list of types of the class.
+        For deprecated tags 'exact_synonym', 'broad_synonym' and 'narrow_synonym', tag
+        is set to 'synonym' and scope is set respectively to 'EXACT', 'BROAD' and 'NARROW'.
+        A regular expression is used to extract 'dbxrefs' from the TaggedValue.value 
+        without the definition (dbxrefs are between brackets and definition can be so).
+        dbxrefs are saved.
+        '''
+        ## use of the TaggedValue constructor
+        TaggedValue.__init__(self, line)
+        ## definition, which is quoted, is extracted from the standard value of a TaggedValue.
+        definition = TaggedValue._match_quotedString.search(self.value).group(0)
+        ## the standard value is cleaned of the definition.
+        cleanvalue = self.value.replace(definition,'')
+        cleanvalue = cleanvalue.replace('  ',' ')
+        ## 1) attributes are searched into the rest of the standard value.
+        ## 2) then they are stripped.
+        ## 3) then they are split on every ' '.
+        ## 4) finally they are ordered into a set.
+        attributes = set(SynonymValue._match_scope.search(cleanvalue).group(0).strip().split())
+        ## the scopes are the junction between the attributes and a set of specific terms. 
+        scopes     = attributes & set(['RELATED','EXACT','BROAD','NARROW'])
+        ## the types are the rest of the attributes.
+        types      = attributes - scopes
+        ## this is a constraint of the OBO format
+        assert len(scopes)< 2,"Only one synonym scope allowed"
+        ## the scope of the SynonymValue is into scopes or set by default to RELATED
+        if scopes:
+            scope = scopes.pop()
+        else:
+            scope = 'RELATED'
+        ## Specific rules are defined for the following tags :    
+        if self.tag == 'exact_synonym':
+            raise DeprecationWarning,'exact_synonym is a deprecated tag use instead synonym tag'
+            self.tag   = 'synonym'
+            scope = 'EXACT'
+        if self.tag == 'broad_synonym':
+            raise DeprecationWarning,'broad_synonym is a deprecated tag use instead synonym tag'
+            self.tag   = 'synonym'
+            scope = 'BROAD'
+        if self.tag == 'narrow_synonym':
+            raise DeprecationWarning,'narrow_synonym is a deprecated tag use instead synonym tag'
+            self.tag   = 'synonym'
+            scope = 'NARROW'
+        if self.tag == 'systematic_synonym':
+            #raise DeprecationWarning,'narrow_synonym is a deprecated tag use instead sysnonym tag'
+            self.tag   = 'synonym'
+            scope = 'SYSTEMATIC'
+        ## this is our own constraint. deprecated tags are not saved by this parser.    
+        assert self.tag =='synonym',"%s synonym type is not managed" % self.tag
+        ## dbxrefs are searched into the rest of the standard value.
+        dbxrefs    = TaggedValue._match_bracket.search(cleanvalue).group(0)
+        ## recording zone
+        ## the value of a SynonymValue is not the standard value but the definition.
+        self.value   = definition
+        self.dbxrefs = xrefFactory(dbxrefs)
+        self.scope   = scope
+        self.types   = list(types)
+    def __eq__(self,b):
+        return ((self.value==b.value) and (self.dbxrefs==b.dbxrefs) 
+                and (self.scope==b.scope) and (self.types==b.types)
+                and (self.__doc__==b.__doc__) and (self.tag==b.tag)
+                and (self.trailing_modifiers==b.trailing_modifiers))
+    def __hash__(self):
+        return (reduce(lambda x,y:x+y,(hash(z) for z in [self.__doc__,
+                                                         self.value,
+                                                         frozenset(self.dbxrefs),
+                                                         self.scope,
+                                                         frozenset(self.types),
+                                                         self.tag,
+                                                         self.trailing_modifiers]),0)) % (2**31)
+class XrefValue(TaggedValue):
+    '''
+       A couple 'xref:value' inherited from the class TaggedValue. Used to manage 
+       xref tags.
+    ''' 
+    def __init__(self,line):
+        ## use of the TaggedValue constructor
+        TaggedValue.__init__(self, line)
+        ## use the same function as the dbxrefs
+        self.value=xrefFactory(self.value)
+        if self.tag in ('xref_analog','xref_unk'):
+            raise DeprecationWarning,'%s is a deprecated tag use instead sysnonym tag' % self.tag
+            self.tag='xref'
+        ## this is our own constraint. deprecated tags are not saved by this parser.    
+        assert self.tag=='xref'
+class RelationshipValue(TaggedValue):
+    '''
+       A couple 'xref:value' inherited from the class TaggedValue. Used to manage 
+       xref tags.
+    ''' 
+    def __init__(self,line):
+        ## use of the TaggedValue constructor
+        TaggedValue.__init__(self, line)
+        ## the value is split on the first ' '.
+        value = self.value.split(None,1)
+        ## succesful split !
+        if len(value)==2:
+            relationship=value[0]
+            term=value[1]
+        ## unsuccesful split. The relationship is set by default to IS_A
+        else:
+            relationship='is_a'
+            term=value[0]
+        ## recording zone    
+        self.value=term
+        self.relationship=relationship
+class NamespaceValue(TaggedValue):
+    def __init__(self,line):
+        TaggedValue.__init__(self, line)
+class RemarkValue(TaggedValue):
+    def __init__(self,line):
+        TaggedValue.__init__(self, line)
+        label,value = self.value.split(':',1)
+        label = label.strip()
+        value = value.strip()
+        self.value=value
+        self.label=label
+def taggedValueFactory(line):
+    '''
+    A function used to dispatch lines of an OBOEntry between the class TaggedValue
+    and its inherited classes.
+    @param line: a line of an OBOEntry composed of a tag and a value.
+    @type line: string
+    @return: a class object
+    '''
+    if (line[0:9]=='namespace' or
+          line[0:17]=='default-namespace'):
+        return NamespaceValue(line)
+    ## DefValue is an inherited class of TaggedValue
+    elif line[0:3]=='def':
+        return DefValue(line)
+    ## SynonymValue is an inherited class of TaggedValue
+    elif ((line[0:7]=="synonym" and line[0:14]!="synonymtypedef") or
+          line[0:13]=="exact_synonym" or
+          line[0:13]=="broad_synonym" or
+          line[0:14]=="narrow_synonym"):
+        return SynonymValue(line)
+    ## XrefValue is an inherited class of TaggedValue
+    elif line[0:4]=='xref':
+        return XrefValue(line)
+    ## NameValue is an inherited class of TaggedValue
+    elif line[0:4]=='name':
+        return NameValue(line)
+    ## RelationshipValue is an inherited class of TaggedValue
+    elif (line[0:15]=='intersection_of' or
+          line[0:8] =='union_of' or
+          line[0:12]=='relationship'):
+        return RelationshipValue(line)
+    elif (line[0:6]=='remark'):
+        return RemarkValue(line)
+    ## each line is a couple : tag / value (and some more features)
+    else:
+        return TaggedValue(line)
+##                               Xref treatment area                           ##
+class Xref(object):
+    '''
+       A xref object of an OBOentry. It may be the 'dbxrefs' of SynonymValue and 
+       DefValue objects or the 'value' of XrefValue objects.
+    '''
+    __splitdata__ = re.compile(' +(?=["{])')
+    def __init__(self,ref):
+        if ref == '' :                    #
+            ref  = None                   #
+            data = ''                     #
+        else :                            # Modifs JJ sinon erreur : list index out of range
+            data = Xref.__splitdata__.split(ref,1)      #
+            ref  = data[0]                #
+        description=None
+        trailing_modifiers = None
+        if len(data)> 1:
+            extra = data[1]
+            description = TaggedValue._match_quotedString.search(extra)
+            if description is not None:
+                description = description.group(0)
+                extra.replace(description,'')
+            trailing_modifiers=trailingModifierFactory(extra)
+        self.reference=ref
+        self.description=description
+        self.trailing_modifiers=trailing_modifiers
+    def __eq__(self,b):
+        return ((self.reference==b.reference) and (self.description==b.description) 
+                and (self.trailing_modifiers==b.trailing_modifiers))
+    def __hash__(self):
+        return (reduce(lambda x,y:x+y,(hash(z) for z in [self.reference,
+                                                         self.description,
+                                                         self.trailing_modifiers]),0)) % (2**31)
+def xrefFactory(string):
+    '''
+    Dispatcher of xrefs.
+    @param string: a string (between brackets) from an inherited TaggedValue object with a dbxrefs 
+                   signal (actually, the signal can only be found into SynonymValue and DefValue 
+                   objects) or a string (without brackets) from a XrefValue object.
+    @type string: string
+    @return: a class object
+    @note: The dispatcher treats differently the strings between brackets (from SynonymValue and 
+    DefValue objects) and without brackets (from XrefValue objects).
+    '''
+    string = string.strip()
+    if string[0]=='[':
+        return [Xref(x.strip()) for x in string[1:-1].split(',')]  
+    else:
+        return Xref(string)
+##                              Stanza treatment area                          ##
+class OBOEntry(dict):
+    '''
+       An entry of an OBOFile. It can be a header (without a stanza name) or
+       a stanza (with a stanza name between brackets). It inherits from the class dict.
+    '''
+    _match_stanza_name = re.compile('(?<=^\[)[^\]]*(?=\])')
+    def __init__(self,stanza):
+        ## tests if it is the header of the OBO file (returns TRUE) or not (returns FALSE)
+        self.isHeader = stanza[0]!='['
+        lines = stanza.split('\n')
+        ## not the header : there is a [stanzaName]
+        if not self.isHeader:
+            self.stanzaName = lines[0].strip()[1:-1]
+            lines=lines[1:]
+            self["stanza"] = [stanza.strip()]    
+        ## whatever the stanza is. 
+        for line in lines:
+            ## each line is a couple : tag / value
+            taggedvalue = taggedValueFactory(line)
+            if taggedvalue.tag in self:
+                self[taggedvalue.tag].append(taggedvalue)
+            else:
+                self[taggedvalue.tag]=[taggedvalue]
+    def parseStanzaName(stanza):
+        sm = OBOEntry._match_stanza_name.search(stanza)
+        if sm:
+            return sm.group(0)
+        else:
+            return None
+    parseStanzaName=staticmethod(parseStanzaName)           
+class OBOTerm(OBOEntry):
+    '''
+       A stanza named 'Term'. It inherits from the class OBOEntry.
+    '''
+    def __init__(self,stanza):
+        ## use of the OBOEntry constructor.
+        OBOEntry.__init__(self, stanza)
+        assert self.stanzaName=='Term'
+        assert 'stanza' in self
+        assert 'id' in self and len(self['id'])==1,"An OBOTerm must have an id"
+        assert 'name' in self and len(self['name'])==1,"An OBOTerm must have a name"
+        assert 'namespace' not in self or len(self['namespace'])==1, "Only one namespace is allowed for an OBO term"
+        assert 'def' not in self or len(self['def'])==1,"Only one definition is allowed for an OBO term"
+        assert 'comment' not in self or len(self['comment'])==1,"Only one comment is allowed for an OBO term"
+        assert 'union_of' not in self or len(self['union_of'])>=2,"Only one union relationship is allowed for an OBO term"
+        assert 'intersection_of' not in self or len(self['intersection_of'])>=2,"Only one intersection relationship is allowed for an OBO term"
+        if self._isObsolete():
+            #assert 'is_a' not in self
+            assert 'relationship' not in self
+            assert 'inverse_of' not in self
+            assert 'disjoint_from' not in self
+            assert 'union_of' not in self
+            assert 'intersection_of' not in self
+        assert 'replaced_by' not in self or self._isObsolete()
+        assert 'consider' not in self or self._isObsolete()
+    def _getStanza(self):
+        return self['stanza'][0]
+    ## make-up functions.            
+    def _getDefinition(self):
+        if 'def' in self:
+            return self['def'][0]
+        return None
+    def _getId(self):
+        return self['id'][0]
+    def _getNamespace(self):
+        return self['namespace'][0]
+    def _getName(self):
+        return self['name'][0]
+    def _getComment(self):
+        if 'comment' in self:
+            return self['comment'][0]
+        return None
+    def _getAltIds(self):
+        if 'alt_id' in self:
+            return list(set(self.get('alt_id',None)))
+        return None
+    def _getIsA(self):
+        if 'is_a' in self:
+            return list(set(self.get('is_a',None)))
+        return None
+    def _getSynonym(self):
+        if 'synonym' in self :
+            return list(set(self.get('synonym',None)))
+        return None
+    def _getSubset(self):
+        if self.get('subset',None) != None:
+            return list(set(self.get('subset',None)))
+        else:
+            return None
+    def _getXref(self):
+        if 'xref' in self:
+            return list(set(self.get('xref',None)))
+        return None
+    def _getRelationShip(self):
+        if 'relationship' in self:
+            return list(set(self.get('relationship',None)))
+        return None
+    def _getUnion(self):
+        return list(set(self.get('union_of',None)))
+    def _getIntersection(self):
+        return list(set(self.get('intersection_of',None)))
+    def _getDisjonction(self):
+        return list(set(self.get('disjoint_from',None)))
+    def _isObsolete(self):
+        return 'is_obsolete' in self and str(self['is_obsolete'][0])=='true'
+    def _getReplacedBy(self):
+        if 'replaced_by' in self:
+            return list(set(self.get('replaced_by',None)))
+        return None
+    def _getConsider(self):
+        if 'consider' in self:
+            return list(set(self.get('consider',None)))
+        return None
+    ## automatically make-up !
+    stanza             = property(_getStanza,None,None)
+    definition         = property(_getDefinition,None,None)
+    id                 = property(_getId,None,None) 
+    namespace          = property(_getNamespace,None,None)
+    name               = property(_getName,None,None) 
+    comment            = property(_getComment,None,None)
+    alt_ids            = property(_getAltIds,None,None)
+    is_a               = property(_getIsA,None,None)
+    synonyms           = property(_getSynonym,None,None)
+    subsets            = property(_getSubset,None,None)
+    xrefs              = property(_getXref,None,None)
+    relationship       = property(_getRelationShip,None,None)
+    union_of           = property(_getUnion,None,None)
+    intersection_of    = property(_getIntersection,None,None)
+    disjoint_from      = property(_getDisjonction,None,None)
+    is_obsolete        = property(_isObsolete,None,None)
+    replaced_by        = property(_getReplacedBy,None,None)
+    consider           = property(_getConsider,None,None)      
+def OBOEntryFactory(stanza):
+    '''
+    Dispatcher of stanza.
+    @param stanza: a stanza composed of several lines.
+    @type stanza: text
+    @return: an C{OBOTerm} | C{OBOEntry} instance
+    @note: The dispatcher treats differently the stanza which are OBO "Term"
+    and the others.
+    '''
+    stanzaType = OBOEntry.parseStanzaName(stanza)
+    if stanzaType=="Term":
+        return OBOTerm(stanza)
+    else:
+        return OBOEntry(stanza)
+def OBOEntryIterator(file):
+    entries =  stanzaIterator(file)
+    for e in entries:
+        debug(e)
+        yield OBOEntryFactory(e)
\ No newline at end of file
diff --git a/src/obitools/options/__init__.py b/src/obitools/options/__init__.py
new file mode 100644
index 0000000..3590db3
--- /dev/null
+++ b/src/obitools/options/__init__.py
@@ -0,0 +1,101 @@
+    Module providing high level functions to manage command line options.
+import logging
+import sys
+from logging import debug
+from optparse import OptionParser
+from optparse import IndentedHelpFormatter                     
+from obitools.utils import universalOpen
+from obitools.utils import fileSize
+from obitools.utils import universalTell
+from obitools.utils import progressBar
+from obitools.format.options import addInputFormatOption, addInOutputOption,\
+    autoEntriesIterator
+import time
+from _options import fileWithProgressBar        # @UnresolvedImport 
+from _options import currentInputFileName       # @UnresolvedImport 
+from _options import currentInputFile           # @UnresolvedImport 
+from _options import currentFileSize            # @UnresolvedImport 
+from _options import currentFileTell            # @UnresolvedImport 
+from _options import allEntryIterator           # @UnresolvedImport
+from obitools.ecopcr.sequence import EcoPCRDBSequenceIterator
+class ObiHelpFormatter (IndentedHelpFormatter):
+    def __init__(self,
+                 indent_increment=2,
+                 max_help_position=24,
+                 width=None,
+                 short_first=1):
+        IndentedHelpFormatter.__init__(self, indent_increment, max_help_position, width, short_first)
+    def format_heading(self, heading):
+        return '\n'.join(("%*s%s" % (self.current_indent, "", '*'*(len(heading)+4)),
+                          "%*s* %s *" % (self.current_indent, "", heading),
+                          "%*s%s\n" % (self.current_indent, "", '*'*(len(heading)+4))))
+def getOptionManager(optionDefinitions,entryIterator=None,progdoc=None,checkFormat=False):
+    '''
+    Build an option manager function. that is able to parse
+    command line options of the script.
+    @param optionDefinitions: list of function describing a set of 
+                              options. Each function must allows as
+                              unique parameter an instance of OptionParser.
+    @type optionDefinitions:  list of functions.
+    @param entryIterator:     an iterator generator function returning
+                              entries from the data files. 
+    @type entryIterator:      an iterator generator function with only one
+                              parameter of type file
+    '''
+    parser = OptionParser(usage=progdoc, formatter=ObiHelpFormatter())
+    parser.add_option('--DEBUG',
+                      action="store_true", dest="debug",
+                      default=False,
+                      help="Set logging in debug mode")
+    parser.add_option('--without-progress-bar',
+                      action="store_false", dest="progressbar",
+                      default=True,
+                      help="desactivate progress bar")
+    for f in optionDefinitions:
+        if f == addInputFormatOption or f == addInOutputOption:
+            checkFormat=True 
+        f(parser)
+    def commandLineAnalyzer():
+        options,files = parser.parse_args()
+        if options.debug:
+            logging.root.setLevel(logging.DEBUG)
+        if checkFormat:
+            if not hasattr(options, "skiperror"):
+                options.skiperror=False
+            ei=autoEntriesIterator(options)
+        else:
+            ei=entryIterator
+        options.readerIterator=ei
+        if ei==EcoPCRDBSequenceIterator:
+            options.taxonomy=files[0]
+        i = allEntryIterator(files,ei,with_progress=options.progressbar,options=options)
+        return options,i
+    return commandLineAnalyzer
\ No newline at end of file
diff --git a/src/obitools/options/_bioseqfilter.pyx b/src/obitools/options/_bioseqfilter.pyx
new file mode 100644
index 0000000..9e36ae6
--- /dev/null
+++ b/src/obitools/options/_bioseqfilter.pyx
@@ -0,0 +1,82 @@
+# cython: profile=True
+from obitools.options.taxonomyfilter import taxonomyFilterGenerator
+def filterGenerator(options):
+    taxfilter = taxonomyFilterGenerator(options)
+    if options.idlist is not None:
+        idset=set(x.strip() for x in  open(options.idlist))
+    else:
+        idset=None
+    def sequenceFilter(seq):
+        cdef bint good = True
+        if hasattr(options, 'sequencePattern'):
+            good = <bint>(options.sequencePattern.search(str(seq)))
+        if good and hasattr(options, 'identifierPattern'):
+            good = <bint>(options.identifierPattern.search(seq.id))
+        if good and idset is not None:
+            good = seq.id in idset
+        if good and hasattr(options, 'definitionPattern'):
+            good = <bint>(options.definitionPattern.search(seq.definition))
+        if good :
+            good = reduce(lambda bint x, bint y:x and y,
+                           (k in seq for k in options.has_attribute),
+                           True)
+        if good and hasattr(options, 'attributePatterns'):
+            good = (reduce(lambda bint x, bint y : x and y,
+                           (<bint>(options.attributePatterns[p].search(str(seq[p])))
+                            for p in options.attributePatterns
+                             if p in seq),True)
+                    and
+                    reduce(lambda bint x, bint y : x and y,
+                           (bool(p in seq)
+                            for p in options.attributePatterns),True)
+                   )
+        if good and hasattr(options, 'predicats') and options.predicats is not None:
+            if options.taxonomy is not None:
+                e = {'taxonomy' : options.taxonomy,'sequence':seq}
+            else:
+                e = {'sequence':seq}
+            good = (reduce(lambda bint x, bint y: x and y,
+                           (bool(eval(p,e,seq))
+                            for p in options.predicats),True)
+                   )
+        if good and hasattr(options, 'lmin') and options.lmin is not None:
+            good = len(seq) >= options.lmin
+        if good and hasattr(options, 'lmax') and options.lmax is not None:
+            good = len(seq) <= options.lmax
+        if good:
+            good = taxfilter(seq)
+        if hasattr(options, 'invertedFilter') and options.invertedFilter:
+            good=not good
+        return good
+    return sequenceFilter
+def sequenceFilterIteratorGenerator(options):
+    filter = filterGenerator(options)
+    def sequenceFilterIterator(seqIterator):
+        for seq in seqIterator:
+            if filter(seq):
+                yield seq
+    return sequenceFilterIterator
diff --git a/src/obitools/options/_options.pyx b/src/obitools/options/_options.pyx
new file mode 100644
index 0000000..c972215
--- /dev/null
+++ b/src/obitools/options/_options.pyx
@@ -0,0 +1,124 @@
+# cython: profile=True
+from obitools.utils._utils cimport progressBar
+from obitools.utils import universalOpen
+from obitools.utils import universalTell
+from obitools.utils import fileSize
+from obitools.ecopcr.sequence import EcoPCRDBSequenceIterator
+from glob import glob 
+from logging import debug
+import sys
+cdef extern from "stdio.h":
+    ctypedef unsigned int off_t "unsigned long long"
+cdef class CurrentFileStatus:
+    cdef public bytes currentInputFileName
+    cdef public object currentFile
+    cdef public off_t currentFileSize
+    def __init__(self):
+        self.currentInputFileName=None
+        self.currentFile = None
+        self.currentFileSize = -1
+cpdef bytes currentInputFileName():
+    return cfs.currentInputFileName
+cpdef object  currentInputFile():
+    return cfs.currentFile
+cpdef off_t currentFileSize():
+    return cfs.currentFileSize
+cpdef off_t currentFileTell():
+    return universalTell(cfs.currentFile)
+def fileWithProgressBar(file, int step=100):
+    cdef off_t size
+    cdef off_t pos
+    size = cfs.currentFileSize
+    def fileBar():
+        cdef str l
+        pos=1
+        progressBar(pos, size, True,cfs.currentInputFileName)
+        for l in file:
+            progressBar(currentFileTell,size, False,
+                        cfs.currentInputFileName)
+            yield l 
+        print >>sys.stderr,''   
+    if size < 0:
+        return file
+    else:
+        f = fileBar()
+        return f
+def allEntryIterator(files,entryIterator,with_progress=False,histo_step=102,options=None):
+    if files :
+        for f in files:
+            if (entryIterator != EcoPCRDBSequenceIterator) :
+                cfs.currentInputFileName=f
+                try:
+                    f = universalOpen(f,noError=True)
+                except Exception as e:    
+                    if glob('%s_[0-9][0-9][0-9].sdx' % f):
+                        entryIterator=EcoPCRDBSequenceIterator
+                    else:
+                        print >>sys.stderr, e
+                        sys.exit();
+                else:
+                    cfs.currentFile=f
+                    cfs.currentFileSize=fileSize(cfs.currentFile)
+                    debug(f)
+                    if with_progress and cfs.currentFileSize >0:
+                        f=fileWithProgressBar(f,step=histo_step)           
+            if entryIterator is None:
+                for line in f:
+                    yield line
+            else:
+                if entryIterator == EcoPCRDBSequenceIterator and options is not None:
+                    if hasattr(options,'ecodb') and options.ecodb==f:
+                        iterator = entryIterator(f,options.taxonomy)
+                    else:
+                        iterator = entryIterator(f)
+                        options.taxonomy=iterator.taxonomy
+                        options.ecodb=f
+                else:
+                    iterator = entryIterator(f)
+                for entry in iterator:
+                    yield entry
+    else:
+        if entryIterator is None:
+            for line in sys.stdin:
+                yield line
+        else:
+            import os, stat
+            mode = os.fstat(0).st_mode
+            if stat.S_ISFIFO(mode):
+                pass
+            elif stat.S_ISREG(mode):
+                pass
+            else:
+                print>>sys.stderr, "No Entry to process"
+                sys.exit()
+            for entry in entryIterator(sys.stdin):
+                yield entry
diff --git a/src/obitools/options/bioseqcutter.py b/src/obitools/options/bioseqcutter.py
new file mode 100644
index 0000000..349a019
--- /dev/null
+++ b/src/obitools/options/bioseqcutter.py
@@ -0,0 +1,87 @@
+from logging import debug
+def _beginOptionCallback(options,opt,value,parser):
+    def beginCutPosition(seq):
+        debug("begin = %s" % value )
+        if hasattr(options, 'taxonomy') and options.taxonomy is not None:
+            environ = {'taxonomy' : options.taxonomy,'sequence':seq}
+        else:
+            environ = {'sequence':seq}
+        return eval(value,environ,seq) - 1
+    parser.values.beginCutPosition=beginCutPosition
+def _endOptionCallback(options,opt,value,parser):
+    def endCutPosition(seq):
+        if hasattr(options, 'taxonomy') and options.taxonomy is not None:
+            environ = {'taxonomy' : options.taxonomy,'sequence':seq}
+        else:
+            environ = {'sequence':seq}
+        return eval(value,environ,seq)
+    parser.values.endCutPosition=endCutPosition
+def addSequenceCuttingOptions(optionManager):
+    group = optionManager.add_option_group('Cutting options')
+    group.add_option('-b','--begin',
+                             action="callback", callback=_beginOptionCallback,
+                             metavar="<PYTHON_EXPRESSION>",
+                             type="string",
+                             help="python expression to be evaluated in the "
+                                  "sequence context. The attribute name can be "
+                                  "used in the expression as variable name. "
+                                  "An extra variable named 'sequence' refers "
+                                  "to the sequence object itself. ")
+    group.add_option('-e','--end',
+                             action="callback", callback=_endOptionCallback,
+                             metavar="<PYTHON_EXPRESSION>",
+                             type="string",
+                             help="python expression to be evaluated in the "
+                                  "sequence context. The attribute name can be "
+                                  "used in the expression as variable name ."
+                                  "An extra variable named 'sequence' refers"
+                                  "to the sequence object itself. ")
+def cutterGenerator(options):
+    def sequenceCutter(seq):
+        lseq = len(seq)
+        if hasattr(options, 'beginCutPosition'):
+            begin = int(options.beginCutPosition(seq))
+        else:
+            begin = 0
+        if hasattr(options, 'endCutPosition'):
+            end = int(options.endCutPosition(seq))
+        else:
+            end = lseq
+        if begin > 0 or end < lseq:
+            seq = seq[begin:end]
+            seq['subsequence']="%d..%d" % (begin+1,end)
+        return seq
+    return sequenceCutter
+def cutterIteratorGenerator(options):
+    _cutter = cutterGenerator(options)
+    def sequenceCutterIterator(seqIterator):
+        for seq in seqIterator:
+            yield _cutter(seq)
+    return sequenceCutterIterator
diff --git a/src/obitools/options/bioseqedittag.py b/src/obitools/options/bioseqedittag.py
new file mode 100644
index 0000000..50f0e0a
--- /dev/null
+++ b/src/obitools/options/bioseqedittag.py
@@ -0,0 +1,317 @@
+import sys
+from obitools.options.taxonomyfilter import loadTaxonomyDatabase
+import math
+def addSequenceEditTagOptions(optionManager):
+    group = optionManager.add_option_group('Sequences and attributes editing options')
+    group.add_option('--seq-rank',
+                             action="store_true", dest='addrank',
+                             default=False,
+                             help="add a rank attribute to the sequence "
+                                  "indicating the sequence position in the input data")
+    group.add_option('-R','--rename-tag',
+                             action="append", 
+                             dest='renameTags',
+                             metavar="<OLD_NAME:NEW_NAME>",
+                             type="string",
+                             default=[],
+                             help="change tag name from OLD_NAME to NEW_NAME")
+    group.add_option('--delete-tag',
+                             action="append", 
+                             dest='deleteTags',
+                             metavar="<TAG_NAME>",
+                             type="string",
+                             default=[],
+                             help="delete tag TAG_NAME")
+    group.add_option('-S','--set-tag',
+                             action="append", 
+                             dest='setTags',
+                             metavar="<TAG_NAME:PYTHON_EXPRESSION>",
+                             type="string",
+                             default=[],
+                             help="Add a new tag named TAG_NAME with "
+                                  "a value computed from PYTHON_EXPRESSION")
+    group.add_option('--tag-list',
+                             action="store", 
+                             dest='taglist',
+                             metavar="<FILENAME>",
+                             type="string",
+                             default=None,
+                             help="Indicate a file containing tag and values "
+                                  "to modify on specified sequences")
+    group.add_option('--set-identifier',
+                             action="store", 
+                             dest='setIdentifier',
+                             metavar="<PYTHON_EXPRESSION>",
+                             type="string",
+                             default=None,
+                             help="Set sequence identifier with "
+                                  "a value computed from PYTHON_EXPRESSION")
+    group.add_option('--run',
+                             action="store", 
+                             dest='run',
+                             metavar="<PYTHON_EXPRESSION>",
+                             type="string",
+                             default=None,
+                             help="Run a python expression on each selected sequence")
+    group.add_option('--set-sequence',
+                             action="store", 
+                             dest='setSequence',
+                             metavar="<PYTHON_EXPRESSION>",
+                             type="string",
+                             default=None,
+                             help="Change the sequence itself with "
+                                  "a value computed from PYTHON_EXPRESSION")
+    group.add_option('-T','--set-definition',
+                             action="store", 
+                             dest='setDefinition',
+                             metavar="<PYTHON_EXPRESSION>",
+                             type="string",
+                             default=None,
+                             help="Set sequence definition with "
+                                  "a value computed from PYTHON_EXPRESSION")
+    group.add_option('-O','--only-valid-python',
+                             action="store_true", 
+                             dest='onlyValid',
+                             default=False,
+                             help="only valid python expressions are allowed")
+    group.add_option('-C','--clear',
+                             action="store_true", 
+                             dest='clear',
+                             default=False,
+                             help="clear all tags associated to the sequences")
+    group.add_option('-k','--keep',
+                             action='append',
+                             dest='keep',
+                             default=[],
+                             type="string",
+                             help="only keep this tag")
+    group.add_option('--length',
+                             action="store_true", 
+                             dest='length',
+                             default=False,
+                             help="add seqLength tag with sequence length")
+    group.add_option('--with-taxon-at-rank',
+                             action='append',
+                             dest='taxonrank',
+                             default=[],
+                             type="string",
+                             help="add taxonomy annotation at a specified rank level")
+    group.add_option('-m','--mcl',
+                             action="store", dest="mcl",
+                             metavar="<mclfile>",
+                             type="string",
+                             default=None,
+                             help="add cluster tag to sequences according to a mcl graph clustering partition")
+    group.add_option('--uniq-id',
+                             action="store_true", dest="uniqids",
+                             default=False,
+                             help="force sequence ids to be uniq")
+def readMCLFile(file):
+    partition=1
+    parts = {}
+    for l in file:
+        for seq in l.strip().split():
+            parts[seq]=partition
+        partition+=1
+    return parts
+def readTagFile(f):
+    tags = {}
+    for l in f:
+        ident,tag,value = l.split(None,2)
+        value=value.strip()
+        d = tags.get(ident,[])
+        try:
+            value = eval(value)
+        except Exception:
+            pass
+        d.append((tag,value))
+        tags[ident]=d
+    return tags
+def sequenceTaggerGenerator(options):
+    toDelete = options.deleteTags[:]
+    toRename = [x.split(':',1) for x in options.renameTags if len(x.split(':',1))==2]
+    toSet    = [x.split(':',1) for x in options.setTags if len(x.split(':',1))==2]
+    newId    = options.setIdentifier
+    newDef   = options.setDefinition
+    newSeq   = options.setSequence
+    clear    = options.clear
+    keep     = set(options.keep)
+    length   = options.length
+    run      = options.run
+    uniqids  = options.uniqids
+    counter  = [0]
+    loadTaxonomyDatabase(options)
+    if options.taxonomy is not None:
+        annoteRank=options.taxonrank
+    else:
+        annoteRank=[]
+    if options.mcl is not None:
+        parts = readMCLFile(open(options.mcl))
+    else:
+        parts = False
+    if options.taglist is not None:
+        tags = readTagFile(open(options.taglist))
+    else:
+        tags = False
+    if uniqids:
+        idlist = {}
+    def sequenceTagger(seq):
+        if counter[0]>=0:
+            counter[0]+=1
+        if clear or keep:
+            ks = seq.keys()
+            for k in ks:
+                if k not in keep:
+                    del seq[k]
+        else:
+            for i in toDelete:
+                if i in seq:
+                    del seq[i]                
+            for o,n in toRename:
+                if o in seq:
+                    seq[n]=seq[o]
+                    del seq[o]
+        for rank in annoteRank:
+            if 'taxid' in seq:
+                taxid = seq['taxid']
+                if taxid is not None:
+                    rtaxid = options.taxonomy.getTaxonAtRank(taxid,rank)
+                    if rtaxid is not None:
+                        scn = options.taxonomy.getScientificName(rtaxid)
+                    else:
+                        scn=None
+                    seq[rank]=rtaxid
+                    seq["%s_name"%rank]=scn 
+        if parts and seq.id in parts:   
+            seq['cluster']=parts[seq.id]
+        if tags and seq.id in tags: 
+            for t,v in tags[seq.id]:
+                seq[t]=v
+        if options.addrank:
+            seq['seq_rank']=counter[0]
+        for i,v in toSet:
+            try:
+                if options.taxonomy is not None:
+                    environ = {'taxonomy' : options.taxonomy,'sequence':seq, 'counter':counter[0], 'math':math}
+                else:
+                    environ = {'sequence':seq, 'counter':counter[0], 'math':math}
+                val = eval(v,environ,seq)
+            except Exception,e:
+                if options.onlyValid:
+                    raise e
+                val = v
+            seq[i]=val
+        if length:
+            seq['seq_length']=len(seq)
+        if newId is not None:
+            try:
+                if options.taxonomy is not None:
+                    environ = {'taxonomy' : options.taxonomy,'sequence':seq, 'counter':counter[0], 'math':math}
+                else:
+                    environ = {'sequence':seq, 'counter':counter[0], 'math':math}
+                val = eval(newId,environ,seq)
+            except Exception,e:
+                if options.onlyValid:
+                    raise e
+                val = newId
+            seq.id=val
+        if newDef is not None:
+            try:
+                if options.taxonomy is not None:
+                    environ = {'taxonomy' : options.taxonomy,'sequence':seq, 'counter':counter[0], 'math':math}
+                else:
+                    environ = {'sequence':seq, 'counter':counter[0], 'math':math}
+                val = eval(newDef,environ,seq)
+            except Exception,e:
+                if options.onlyValid:
+                    raise e
+                val = newDef
+            seq.definition=val
+        if newSeq is not None:
+            try:
+                if options.taxonomy is not None:
+                    environ = {'taxonomy' : options.taxonomy,'sequence':seq, 'counter':counter[0], 'math':math}
+                else:
+                    environ = {'sequence':seq, 'counter':counter[0], 'math':math}
+                val = eval(newSeq,environ,seq)
+            except Exception,e:
+                if options.onlyValid:
+                    raise e
+                val = newSeq
+            if hasattr(seq, '_seq'):
+                seq._seq=str(val).lower()
+                if 'seq_length' in seq:
+                    seq['seq_length']=len(seq)
+        if run is not None:
+            try:
+                if options.taxonomy is not None:
+                    environ = {'taxonomy' : options.taxonomy,'sequence':seq, 'counter':counter[0], 'math':math}
+                else:
+                    environ = {'sequence':seq, 'counter':counter[0], 'math':math}
+                val = eval(run,environ,seq)
+            except Exception,e:
+                if options.onlyValid:
+                    raise e
+        if uniqids:
+            n = idlist.get(seq.id,0)
+            if (n > 0):
+                newid = seq.id
+                while (n > 0):
+                    old = newid
+                    newid = "%s.%d" % (old,n)
+                    n = idlist.get(newid,0)
+                idlist[old]+=1
+                seq.id=newid
+            idlist[seq.id]=1
+        return seq
+    return sequenceTagger
\ No newline at end of file
diff --git a/src/obitools/options/bioseqfilter.py b/src/obitools/options/bioseqfilter.py
new file mode 100644
index 0000000..5dd4491
--- /dev/null
+++ b/src/obitools/options/bioseqfilter.py
@@ -0,0 +1,121 @@
+import re
+from obitools.options.taxonomyfilter import addTaxonomyFilterOptions
+from _bioseqfilter import filterGenerator,sequenceFilterIteratorGenerator
+def _sequenceOptionCallback(options,opt,value,parser):
+    parser.values.sequencePattern = re.compile(value,re.I)
+def _defintionOptionCallback(options,opt,value,parser):
+    parser.values.definitionPattern = re.compile(value)
+def _identifierOptionCallback(options,opt,value,parser):
+    parser.values.identifierPattern = re.compile(value)
+def _attributeOptionCallback(options,opt,value,parser):
+    if not hasattr(options, 'attributePatterns'):
+        parser.values.attributePatterns={}
+    attribute,pattern=value.split(':',1)
+    parser.values.attributePatterns[attribute]=re.compile(pattern)
+def _predicatOptionCallback(options,opt,value,parser):
+    if not hasattr(options, 'predicats'):
+        options.predicats=[]
+    parser.values.predicats.append(value)
+def addSequenceFilteringOptions(optionManager):
+    group = optionManager.add_option_group('Filtering options')
+    group.add_option('-s','--sequence',
+                             action="callback", callback=_sequenceOptionCallback,
+                             metavar="<REGULAR_PATTERN>",
+                             type="string",
+                             help="regular expression pattern used to select "
+                                  "the sequence. The pattern is case insensitive")
+    group.add_option('-D','--definition',
+                             action="callback", callback=_defintionOptionCallback,
+                             type="string",
+                             metavar="<REGULAR_PATTERN>",
+                             help="regular expression pattern matched against "
+                                  "the definition of the sequence. "
+                                  "The pattern is case sensitive")
+    group.add_option('-I','--identifier',
+                             action="callback", callback=_identifierOptionCallback,
+                             type="string",
+                             metavar="<REGULAR_PATTERN>",
+                             help="regular expression pattern matched against "
+                                  "the identifier of the sequence. "
+                                  "The pattern is case sensitive")
+    group.add_option('--id-list',
+                             action="store", dest="idlist",
+                             metavar="<FILENAME>",
+                             type="string",
+                             default=None,
+                             help="file containing identifiers of sequences to select")
+    group.add_option('-a','--attribute',
+                             action="callback", callback=_attributeOptionCallback,
+                             type="string",
+                             metavar="<ATTRIBUTE_NAME>:<REGULAR_PATTERN>",
+                             help="regular expression pattern matched against "
+                                  "the attributes of the sequence. "
+                                  "the value of this atribute is of the form : "
+                                  "attribute_name:regular_pattern. "
+                                  "The pattern is case sensitive."
+                                  "Several -a option can be used on the same "
+                                  "commande line.")
+    group.add_option('-A','--has-attribute',
+                             action="append",
+                             type="string",
+                             dest="has_attribute",
+                             default=[],
+                             metavar="<ATTRIBUTE_NAME>",
+                             help="select sequence with attribute <ATTRIBUTE_NAME> "
+                                   "defined")
+    group.add_option('-p','--predicat',
+                             action="append", dest="predicats",
+                             metavar="<PYTHON_EXPRESSION>",
+                             help="python boolean expression to be evaluated in the "
+                                  "sequence context. The attribute name can be "
+                                  "used in the expression as variable name ."
+                                  "An extra variable named 'sequence' refers"
+                                  "to the sequence object itself. "
+                                  "Several -p option can be used on the same "
+                                  "commande line.")
+    group.add_option('-L','--lmax',
+                             action='store',
+                             metavar="<##>",
+                             type="int",dest="lmax",
+                             help="keep sequences shorter than lmax")
+    group.add_option('-l','--lmin',
+                             action='store',
+                             metavar="<##>",
+                             type="int",dest="lmin",
+                             help="keep sequences longer than lmin")
+    group.add_option('-v','--inverse-match',
+                             action='store_true',
+                             default=False,
+                             dest="invertedFilter",
+                             help="revert the sequence selection "
+                                  "[default : %default]")
+    addTaxonomyFilterOptions(optionManager)
\ No newline at end of file
diff --git a/src/obitools/options/taxonomyfilter.py b/src/obitools/options/taxonomyfilter.py
new file mode 100644
index 0000000..5526c79
--- /dev/null
+++ b/src/obitools/options/taxonomyfilter.py
@@ -0,0 +1,6 @@
+from obitools.ecopcr.options import addTaxonomyDBOptions, \
+                                    addTaxonomyFilterOptions, \
+                                    loadTaxonomyDatabase, \
+                                    taxonomyFilterGenerator, \
+                                    taxonomyFilterIteratorGenerator
diff --git a/src/obitools/parallel/__init__.py b/src/obitools/parallel/__init__.py
new file mode 100644
index 0000000..2aa1b07
--- /dev/null
+++ b/src/obitools/parallel/__init__.py
@@ -0,0 +1,99 @@
+import threading
+class TaskPool(object):
+    def __init__(self,iterable,function,count=2):
+        self.pool = []
+        self.queue= []
+        self.plock= threading.Lock()
+        self.qlock= threading.Lock()
+        self.function=function
+        self.event=threading.Event()
+        self.iterable=iterable
+        for i in xrange(count):
+            Task(self)
+    def register(self,task):
+        self.plock.acquire()
+        self.pool.append(task)
+        self.plock.release()
+        self.ready(task)
+    def unregister(self,task):
+        task.thread.join()
+        self.plock.acquire()
+        self.pool.remove(task)
+        self.plock.release()
+    def ready(self,task):
+        self.qlock.acquire()
+        self.queue.append(task)
+        self.qlock.release()
+        self.event.set()
+    def __iter__(self):
+        for data in self.iterable:
+            while not self.queue:
+                self.event.wait()
+            self.event.clear()
+            self.qlock.acquire()
+            task=self.queue.pop(0)
+            self.qlock.release()
+            if hasattr(task, 'rep'):
+                yield task.rep
+            #print "send ",data
+            if isinstance(data,dict):
+                task.submit(**data)
+            else:
+                task.submit(*data)
+        while self.pool:
+            self.pool[0].finish()
+            while self.queue:
+               self.event.clear()
+               self.qlock.acquire()
+               task=self.queue.pop(0)
+               self.qlock.release()
+               if hasattr(task, 'rep'):
+                   yield task.rep
+class Task(object):
+    def __init__(self,pool):
+        self.pool = pool
+        self.lock = threading.Lock()
+        self.dataOk = threading.Event()
+        self.repOk = threading.Event()
+        self.args = None
+        self.kwargs=None
+        self.stop=False
+        self.thread = threading.Thread(target=self)
+        self.thread.start()
+        self.pool.register(self)
+    def __call__(self):
+        self.dataOk.wait()
+        while(not self.stop):
+            self.lock.acquire()
+            self.dataOk.clear()
+            self.rep=self.pool.function(*self.args,**self.kwargs)
+            self.pool.ready(self)
+            self.lock.release()
+            self.dataOk.wait()
+    def submit(self,*args,**kwargs):
+        self.args=args
+        self.kwargs=kwargs
+        self.dataOk.set()
+    def finish(self):
+        self.lock.acquire()
+        self.stop=True
+        self.dataOk.set()
+        self.pool.unregister(self)
diff --git a/src/obitools/parallel/jobqueue.py b/src/obitools/parallel/jobqueue.py
new file mode 100644
index 0000000..9df4804
--- /dev/null
+++ b/src/obitools/parallel/jobqueue.py
@@ -0,0 +1,183 @@
+import threading
+from logging import warning,info
+from time import sleep,time
+from obitools.parallel import TaskPool
+class JobPool(dict):
+    '''
+    JobPool is dedicated to manage a job queue. These jobs
+    will run in a limited number of thread. 
+    '''
+    def __init__(self,count,precision=0.01):
+        '''
+        @param count: number of thread dedicated to this JobPool
+        @type count: int
+        @param precision: delay between two check for new job (in second)
+        @type precision: float
+        '''
+        self._iterator = JobIterator(self)
+        self._taskPool = TaskPool(self._iterator, 
+                                  self._runJob, 
+                                  count)
+        self._precision=precision
+        self._toRun=set()
+        self._runnerThread = threading.Thread(target=self._runner)
+        self._runnerThread.start()
+        self._finalyzed=False
+    def _runner(self):
+        for rep in self._taskPool:
+            info('Job %d finnished' % id(rep))
+        info('All jobs in %d JobPool finished' % id(self))
+    def _jobIterator(self):
+        return self._iterator
+    def _runJob(self,job):
+        job.started= time()
+        info('Job %d started' % id(job))
+        job.result = job()
+        job.ended  = time()
+        job.finished=True
+        return job
+    def submit(self,job,priority=1.0,userid=None):
+        '''
+        Submit a new job to the JobPool.
+        @param job: the new submited job
+        @type job: Job instance
+        @param priority: priority level of this job (higher is better)
+        @type priority: float
+        @param userid: a user identifier (Default is None)
+        @return: job identifier
+        @rtype: int
+        '''
+        assert not self._finalyzed,\
+          "This jobPool does not accept new job"
+        if job.submitted is not None:
+            warning('Job %d was already submitted' % id(job))
+            return id(job)
+        job.submitted = time()
+        job.priority  = priority
+        job.userid    = userid
+        i=id(job)
+        job.id=id
+        self[i]=job
+        self._toRun.add(job)
+        info('Job %d submitted' % i)
+        return i
+    def finalyze(self):
+        '''
+        Indicate to the JobPool, that no new jobs will
+        be submitted. 
+        '''
+        self._iterator.finalyze()
+        self._finalyzed=True
+    def __del__(self):
+        self.finalyze()
+class JobIterator(object):
+    def __init__(self,pool):
+        self._pool = pool
+        self._finalyze=False
+        self._nextLock=threading.Lock()
+    def __iter__(self):
+        return self
+    def finalyze(self):
+        '''
+        Indicate to the JobIterator, that no new jobs will
+        be submitted. 
+        '''
+        self._finalyze=True
+    def next(self):
+        '''
+        @return: the next job to run       
+        @rtype: Job instance
+        '''
+        self._nextLock.acquire()
+        while self._pool._toRun or not self._finalyze:
+            rep = None
+            maxScore=0
+            for k in self._pool._toRun:
+                s = k.runScore()
+                if s > maxScore:
+                    maxScore=s
+                    rep=k
+            if rep is not None:
+                self._pool._toRun.remove(rep)
+                self._nextLock.release()
+                return (rep,)
+            sleep(self._pool._precision)
+        self._nextLock.release()
+        info('No more jobs in %d JobPool' % id(self._pool))
+        raise StopIteration
+class Job(object):
+    def __init__(self,pool=None,function=None,*args,**kwargs):
+        '''
+        Create a new job
+        @param pool: the jobpool used to run job. Can be None to not
+                    execute the job immediately.
+        @type pool: JobPool instance
+        @param function: the function to run for the job
+        @type function: callable object
+        @param args: parametters for function call 
+        @param kwargs: named parametters for function call 
+        @precondition: function cannot be None
+        '''
+        assert function is not None
+        self._args=args
+        self._kwargs = kwargs
+        self._function = function
+        self.running = False
+        self.finished= False
+        self.submitted = None
+        self.priority  = None
+        self.userid    = None
+        if pool is not None:
+            pool.submit(self)
+    def runScore(self):
+        '''
+        @return: the score used to ordonnance job in the queue
+        @rtype: C{float}
+        '''
+        return (time() - self.submitted) * self.priority
+    def __call__(self):
+        return self._function(*self._args,**self._kwargs)
\ No newline at end of file
diff --git a/src/obitools/phylogeny/__init__.py b/src/obitools/phylogeny/__init__.py
new file mode 100644
index 0000000..8eb1587
--- /dev/null
+++ b/src/obitools/phylogeny/__init__.py
@@ -0,0 +1,119 @@
+from obitools.graph.tree import Forest,TreeNode
+from obitools.graph import Edge
+class PhylogenicTree(Forest):
+    def __init__(self,label='G',indexer=None,nodes=None,edges=None):
+        Forest.__init__(self, label, indexer, nodes, edges)
+        self.root=None
+        self.comment=None
+    def addNode(self,node=None,index=None,**data):
+        if node is None and index is None:
+            node = '__%d' % (len(self._node)+1)
+        return Forest.addNode(self, node, index, **data)
+    def getNode(self,node=None,index=None):
+        if index is None:
+            index = self._index.getIndex(node, True)
+        return PhylogenicNode(index,self)
+    def getEdge(self,node1=None,node2=None,index1=None,index2=None):
+        '''
+        @param node1:
+        @type node1:
+        @param node2:
+        @type node2:
+        @param index1:
+        @type index1:
+        @param index2:
+        @type index2:
+        '''
+        node1=self.getNode(node1, index1)
+        node2=self.getNode(node2, index2)
+        return PhylogenicEdge(node1,node2)
+class PhylogenicNode(TreeNode):
+    def getLabel(self):
+        label = TreeNode.getLabel(self)
+        if label[0:2]=='__':
+            return None
+        else:
+            return label
+    def __str__(self):
+        if self.index in self.graph._node_attrs:
+            keys = " ".join(['%s="%s"' % (x[0],str(x[1]).replace('"','\\"'))
+                              for x in self.graph._node_attrs[self.index].iteritems()]
+                           )
+        else:
+            keys=''
+        if self.label is None:
+            label=''
+            shape='point'
+        else:
+            label=self.label
+            shape='box'
+        return '%d  [label="%s" shape="%s" %s]' % (self.index,str(label).replace('"','\\"'),shape,keys)   
+    def distanceTo(self,node=None,index=None):  
+        '''
+        compute branch length between the two nodes.
+        If distances are not secified for this tree, None is returned.
+        @param node: a node label or None
+        @param index: a node index or None. the parameter index  
+                     has a priority on the parameter node.
+        @type index: int
+        @return: the evolutive distance between the two nodes
+        @rtype: int, float or None
+        '''
+        path = self.shortestPathTo(node, index)
+        start = path.pop(0)
+        dist=0
+        for dest in path:
+            edge = self.graph.getEdge(index1=start,index2=dest)
+            if 'distance' in edge:
+                dist+=edge['distance']
+            else:
+                return None
+            start=dest
+        return dist
+    label = property(getLabel, None, None, "Label of the node")
+class PhylogenicEdge(Edge):
+    def __str__(self):
+        e = (self.node1.index,self.node2.index)
+        if e in self.graph._edge_attrs:
+            keys = "[%s]" % " ".join(['%s="%s"' % (x[0],str(x[1]).replace('"','\\"'))
+                                      for x in self.graph._edge_attrs[e].iteritems()
+                                      if x[0] not in ('distance','bootstrap')]
+                                    )
+        else:
+            keys = ""
+        if self.directed:
+            link='->'
+        else:
+            link='--'
+        return "%d %s %d %s" % (self.node1.index,link,self.node2.index,keys) 
diff --git a/src/obitools/phylogeny/newick.py b/src/obitools/phylogeny/newick.py
new file mode 100644
index 0000000..cf0330c
--- /dev/null
+++ b/src/obitools/phylogeny/newick.py
@@ -0,0 +1,123 @@
+import re
+import sys
+from obitools.utils import universalOpen
+from obitools.phylogeny import PhylogenicTree
+def subNodeIterator(data):
+    level=0
+    start = 1
+    if data[0]=='(':
+        for i in xrange(1,len(data)):
+            c=data[i]
+            if c=='(':
+                level+=1
+            elif c==')':
+                level-=1
+            if c==',' and not level:
+                yield data[start:i]
+                start = i+1
+        yield data[start:i]
+    else:
+        yield data
+_nodeParser=re.compile('\s*(?P<subnodes>\(.*\))?(?P<name>[^ :]+)? *(?P<bootstrap>[0-9.]+)?(:(?P<distance>-?[0-9.]+))?')
+def nodeParser(data):
+    parsedNode = _nodeParser.match(data).groupdict(0)
+    if not parsedNode['name']:
+        parsedNode['name']=None
+    if not parsedNode['bootstrap']:
+        parsedNode['bootstrap']=None
+    else:
+        parsedNode['bootstrap']=float(parsedNode['bootstrap'])
+    if not parsedNode['distance']:
+        parsedNode['distance']=None
+    else:
+        parsedNode['distance']=float(parsedNode['distance'])
+    if not parsedNode['subnodes']:
+        parsedNode['subnodes']=None
+    return parsedNode
+def treeParser(data,tree=None,parent=None):
+    if tree is None:
+        tree = PhylogenicTree()
+        data = _cleanTreeData.sub(' ',data).strip()
+    parsedNode = nodeParser(data)
+    if parent is not None:
+        son,parent = tree.addEdge(node1=parsedNode['name'], 
+                                  index2=parent,
+                                  distance=parsedNode['distance'],
+                                  bootstrap=parsedNode['bootstrap'])
+    else:
+        son = tree.addNode(node1=parsedNode['name'])
+        tree.root=son
+    if parsedNode['subnodes']:
+        for subnode in subNodeIterator(parsedNode['subnodes']):
+            treeParser(subnode,tree,son)
+    return tree
+def treeIterator(file):
+    file = universalOpen(file)
+    data = file.read()
+    comment = _treecomment.findall(data)
+    data=_treecomment.sub('',data).strip()
+    if comment:
+        comment=comment[0]
+    else:
+        comment=None
+    for tree in data.split(';'):
+        t = treeParser(tree)
+        if comment:
+            t.comment=comment
+        yield t
+def nodeWriter(tree,node,deep=0):
+    name = node._name
+    if name is None: 
+        name=''
+    distance=node._dist
+    if distance is None:
+        distance=''
+    else:
+        distance = ':%6.5f' % distance
+    bootstrap=node._bootstrap
+    if bootstrap is None:
+        bootstrap=''
+    else:
+        bootstrap=' %d' % int(bootstrap)
+    nodeseparator = ',\n' + ' ' * (deep+1)     
+    subnodes = nodeseparator.join([nodeWriter(tree, x, deep+1) 
+                        for x in tree.childNodeIterator(node)])
+    if subnodes:
+        subnodes='(\n' + ' ' * (deep+1) + subnodes + '\n' + ' ' * deep + ')'
+    return '%s%s%s%s' % (subnodes,name,bootstrap,distance)
+def treeWriter(tree,startnode=None):
+    if startnode is not None:
+        root=startnode
+    else:
+        root = tree.getRoot()
+    return nodeWriter(tree,root)+';'
diff --git a/src/obitools/profile/__init__.py b/src/obitools/profile/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/obitools/profile/_profile.pxd b/src/obitools/profile/_profile.pxd
new file mode 100644
index 0000000..8d79b35
--- /dev/null
+++ b/src/obitools/profile/_profile.pxd
@@ -0,0 +1,72 @@
+cdef import from "stdlib.h":
+    void* malloc(int size)  except NULL
+    void* realloc(void* chunk,int size)  except NULL
+    void free(void* chunk)
+cdef import from "string.h":
+    void bzero(void *s, size_t n)
+    void memset(void* chunk,int car,int length)
+    void memcpy(void* s1, void* s2, int n)
+    int memcmp(void* s1, void* s2, int n)
+cdef import from "math.h":
+    double exp(double x)
+cdef extern from *:
+    ctypedef int* int_p "int*"
+cdef struct dnaprofile_t:
+    int     length
+    int     weight
+    int value
+    double  pseudo
+    int_p   A
+    int_p   C
+    int_p   G
+    int_p   T
+    int_p   Og
+    int_p   Eg
+cdef dnaprofile_t* allocateDNAProfile(int size)
+cdef void freeDNAProfile(dnaprofile_t* profile)
+cdef void copyDNAProfile(dnaprofile_t* dest, dnaprofile_t* source)
+cdef class _MemIntArray:
+    cdef    int_p   start
+    cdef    int     size
+    cdef initialize(self, int_p start,int size)
+    cdef int normalize(self, int pos)
+    cpdef double frequency(self,int pos, int weight, double pseudo=?)
+cdef class DNAProfile:
+    cdef dnaprofile_t* profile
+    cdef _MemIntArray _baseA
+    cdef _MemIntArray _baseC
+    cdef _MemIntArray _baseG
+    cdef _MemIntArray _baseT
+    cdef _MemIntArray _Og
+    cdef _MemIntArray _Eg
+    cpdef bint equal(self,DNAProfile profile)
+    cpdef DNAProfile add(DNAProfile self,DNAProfile profile)
+    cpdef double lproba(DNAProfile self,DNAProfile profile) except 1.
+    cpdef double proba(DNAProfile self,DNAProfile profile) except -1.
+    cdef void _initLetter(self)
+    cdef void _initFromString(self, char *seq)
+    cpdef double fA(self,int pos)
+    cpdef double fC(self,int pos)
+    cpdef double fG(self,int pos)
+    cpdef double fT(self,int pos)
+    cpdef double fOg(self,int pos)
+    cpdef double fEg(self,int pos)
+    cpdef int WP(self)
\ No newline at end of file
diff --git a/src/obitools/profile/_profile.pyx b/src/obitools/profile/_profile.pyx
new file mode 100644
index 0000000..459ed57
--- /dev/null
+++ b/src/obitools/profile/_profile.pyx
@@ -0,0 +1,289 @@
+from obitools import NucSequence
+from math import log
+from _profile cimport *
+cdef dnaprofile_t* allocateDNAProfile(int size):
+    cdef dnaprofile_t* profile 
+    cdef int sblock
+    profile = <dnaprofile_t*>malloc(sizeof(dnaprofile_t))
+    profile.length = size
+    profile.weight = 0
+    profile.pseudo = 0
+    sblock = sizeof(int)*6*size
+    profile.A = <int*>malloc(sblock)
+    bzero(<void*>profile.A, sblock)
+    profile.C = profile.A + size
+    profile.G = profile.C + size
+    profile.T = profile.G + size
+    profile.Og= profile.T + size
+    profile.Eg= profile.Og+ size
+    return profile
+cdef void freeDNAProfile(dnaprofile_t *profile):
+    if profile is not NULL:
+        if profile.A is not NULL:
+            free(profile.A)
+        free(profile)
+cdef void copyDNAProfile(dnaprofile_t* dest, dnaprofile_t *source):
+    cdef int size
+    assert source is not NULL and dest is not NULL
+    assert source.length==dest.length
+    size = source.length * 6 * sizeof(int)
+    memcpy(dest.A,source.A,size)
+    dest.weight=source.weight
+    dest.pseudo=source.pseudo
+cdef class _MemIntArray:
+    cdef initialize(self, int* begin,int size):
+        self.start=begin
+        self.size=size
+    cdef int normalize(self, int pos):
+        if pos < 0:
+            pos = self.size + pos
+        if pos >= self.size or pos < 0:
+            raise IndexError
+        return pos    
+    def __init__(self):
+        self.start=NULL
+    def __getitem__(self, int pos):
+        pos = self.normalize(pos)   
+        return self.start[pos]
+    def __setitem__(self,int pos, int value):
+        pos = self.normalize(pos)   
+        self.start[pos]=value   
+    def __len__(self):
+        return self.size
+    cpdef double frequency(self,int pos, int weight, double pseudo=0):
+        pos = self.normalize(pos) 
+        if weight==0:
+            raise ZeroDivisionError
+        pseudo*=weight
+        return  (<double>self.start[pos]+ weight * pseudo/6) / (<double>weight + pseudo)
+cdef class DNAProfile:
+    cdef void _initLetter(self):
+        cdef dnaprofile_t* profile = self.profile
+        self._baseA  = _MemIntArray()
+        self._baseA.initialize(profile.A ,profile.length)
+        self._baseC  = _MemIntArray()
+        self._baseC.initialize(profile.C ,profile.length)
+        self._baseG  = _MemIntArray()
+        self._baseG.initialize(profile.G ,profile.length)
+        self._baseT  = _MemIntArray()
+        self._baseT.initialize(profile.T ,profile.length)
+        self._Og = _MemIntArray()
+        self._Og.initialize(profile.Og,profile.length)
+        self._Eg = _MemIntArray()
+        self._Eg.initialize(profile.Eg,profile.length)
+    def __init__(self,sequence=None,size=None,pseudo=0):
+        if sequence is not None:
+            size = len(sequence)
+        self.profile = allocateDNAProfile(size)
+        self.profile.pseudo=pseudo
+        if sequence is not None:
+            if isinstance(sequence,NucSequence):
+                seq = str(sequence).lower()
+                self._initFromString(seq)
+            elif isinstance(sequence,str):
+                seq = sequence.lower()
+                self._initFromString(seq)
+            elif isinstance(sequence,DNAProfile):
+                copyDNAProfile(self.profile,(<DNAProfile>sequence).profile)
+        self._initLetter()
+    def __dealloc__(self):
+        freeDNAProfile(self.profile)
+    def __hash__(self):
+        return id(self)
+    def __str__(self):
+        cdef int i
+        cdef int lseq = self.profile.length
+        cdef list output=[]
+        cdef str  line
+        cdef int* A= self.profile.A
+        cdef int* C= self.profile.C
+        cdef int* G= self.profile.G
+        cdef int* T= self.profile.T
+        cdef int* Og=self.profile.Og
+        cdef int* Eg=self.profile.Eg
+        for i in range(lseq):
+            line = "%6d %6d %6d %6d %6d %6d %6d " % (i,A[i],C[i],G[i],T[i],Og[i],Eg[i])
+            output.append(line)
+        line = "\n".join(output)
+        return "  pos       A      C      G      T      Og     Eg\n"+line
+    def __len__(self):
+        return self.profile.length
+    cpdef bint equal(DNAProfile self,DNAProfile profile):
+        cdef int sblock
+        cdef bint r
+        cdef int size
+        r=False
+        if self.profile.length == profile.profile.length :
+            if self.profile.weight == profile.profile.weight :
+                size = self.profile.length
+                sblock = sizeof(int)*6*size
+                r = memcmp(<void*>self.profile.A, <void*>profile.profile.A, sblock) == 0
+        return r
+    def __richcmp__(DNAProfile self,DNAProfile profile,int op):
+        if op==2:
+            return self.equal(profile)
+        else:
+            return NotImplemented
+    cpdef DNAProfile add(DNAProfile self,DNAProfile profile):
+        cdef DNAProfile newProfile
+        cdef int p
+        assert self.profile.length==profile.profile.length,'Only profiles with identical length can be added'
+        pc = max(self.profile.pseudo,profile.profile.pseudo)
+        newProfile = DNAProfile(size=self.profile.length,pseudo=pc)
+        for p in xrange(self.profile.length) :
+            newProfile.profile.A[p] = self.profile.A[p] + profile.A[p]
+            newProfile.profile.C[p] = self.profile.C[p] + profile.profile.C[p]
+            newProfile.profile.T[p] = self.profile.T[p] + profile.profile.T[p]
+            newProfile.profile.G[p] = self.profile.G[p] + profile.profile.G[p]
+            newProfile.profile.Og[p] = self.profile.Og[p] + profile.profile.Og[p]
+            newProfile.profile.Eg[p] = self.profile.Eg[p] + profile.profile.Eg[p]
+        newProfile.profile.weight = self.profile.weight + profile.profile.weight
+        return newProfile
+    def __add__(DNAProfile self,DNAProfile profile):
+        return self.add(profile)
+    cpdef double lproba(DNAProfile self,DNAProfile profile) except 1.:
+        cdef float score
+        cdef float prob
+        cdef int pos
+        assert self.profile.length==profile.profile.length,'Only profiles with identical length can be added'
+        score = 0
+        for pos in xrange(self.profile.length) :
+            prob = self.fA(pos)*profile.fA(pos) + \
+                   self.fC(pos)*profile.fC(pos) + \
+                   self.fT(pos)*profile.fT(pos) + \
+                   self.fG(pos)*profile.fG(pos) + \
+                   self.fOg(pos)*profile.fOg(pos) + \
+                   self.fEg(pos)*profile.fEg(pos)
+            #if prob != 0 :
+            score += log(prob)
+        return score
+    cpdef double proba(DNAProfile self,DNAProfile profile) except -1.:
+        return exp(self.lproba(profile))
+    cdef void _initFromString(self, char *seq):
+        cdef int i=0
+        cdef int lseq = len(seq)
+        cdef int* A= self.profile.A
+        cdef int* C= self.profile.C
+        cdef int* G= self.profile.G
+        cdef int* T= self.profile.T
+        cdef int* Og=self.profile.Og
+        cdef int* Eg=self.profile.Eg
+        for i in range(lseq):
+            nuc = seq[i]
+            if nuc=='a':
+                A[i]=1
+            elif nuc=='c':
+                C[i]=1
+            elif nuc=='g':
+                G[i]=1
+            elif nuc=='t':
+                T[i]=1
+            elif nuc=='-':
+                if i > 0 and seq[i-1]=='-':
+                    Eg[i]=1
+                else:
+                    Og[i]=1
+        self.profile.weight = 1
+    property A:
+        def __get__(self):
+            return self._baseA
+    property C:
+        def __get__(self):
+            return self._baseC
+    property G:
+        def __get__(self):
+            return self._baseG
+    property T:
+        def __get__(self):
+            return self._baseT
+    property Og:
+        def __get__(self):
+            return self._Og
+    property Eg:
+        def __get__(self):
+            return self._Eg
+    cpdef double fA(self,int pos):
+        return self.A.frequency(pos,self.profile.weight,self.profile.pseudo)
+    cpdef double fC(self,int pos):
+        return self.C.frequency(pos,self.profile.weight,self.profile.pseudo)
+    cpdef double fG(self,int pos):
+        return self.G.frequency(pos,self.profile.weight,self.profile.pseudo)
+    cpdef double fT(self,int pos):
+        return self.T.frequency(pos,self.profile.weight,self.profile.pseudo)
+    cpdef double fOg(self,int pos):
+        return self.Og.frequency(pos,self.profile.weight,self.profile.pseudo)
+    cpdef double fEg(self,int pos):
+        return self.Eg.frequency(pos,self.profile.weight,self.profile.pseudo)
+    cpdef int WP(self):
+        return self.profile.weight
diff --git a/src/obitools/sample.py b/src/obitools/sample.py
new file mode 100644
index 0000000..7c68e96
--- /dev/null
+++ b/src/obitools/sample.py
@@ -0,0 +1,87 @@
+Created on 31 oct. 2009
+ at author: coissac
+from random import randrange, sample
+    from collections import Counter
+except ImportError:
+    from obitools.collections import Counter
+def lookfor(x,cumsum):
+    lmax=len(cumsum)
+    lmin=0
+    assert x < cumsum[-1],"x must be smaller then cumulative sum"
+    while((lmax - lmin) > 0):
+        i=(lmax+lmin)/2
+        #print i,lmin,lmax
+        if (x<cumsum[i] and (i==0 or x>cumsum[i-1])):
+            #print "return 1 :",i,cumsum[i-1],"<",x,"<",cumsum[i]
+            return i
+        elif cumsum[i]==x:
+            while cumsum[i]==x:
+                i+=1
+            #print "return 2 :",i,cumsum[i],"<",x,"<",cumsum[i+1]
+            return i
+        elif x<cumsum[i]:
+            lmax=i
+        else:
+            lmin=i
+    raise AssertionError
+    #print "end return :",i,cumsum[i-1],"<",x,"<",cumsum[i]
+    return i
+def weigthedSample(events,size):
+    entries = events.keys()
+    cumul=[0] * len(entries)
+    s=0
+    i=0
+    for e in entries:
+        s+=events[e]
+        cumul[i]=s
+        i+=1
+    c = [randrange(0,s) for x in xrange(size)]
+    c.sort()
+    i = 0
+    for j in xrange(len(c)):
+        v = c[j]
+        while (v > cumul[i]):
+            i+=1
+        c[j]=entries[i]
+    result=Counter(c)
+    return result
+def weigthedSampleWithoutReplacement(events,size):
+    # entries = [k for k in events.iterkeys() if events[k]>0]
+    entries = events.keys()
+    cumul=[0] * len(entries)
+    s=0
+    i=0
+    for e in entries:
+        s+=events[e]
+        cumul[i]=s
+        i+=1
+    c = sample(xrange(s),size)
+    c.sort()
+    i = 0
+    for j in xrange(len(c)):
+        v = c[j]
+        while (v > cumul[i]):
+            i+=1
+        c[j]=entries[i]
+    result=Counter(c)
+    return result
diff --git a/src/obitools/seqdb/__init__.py b/src/obitools/seqdb/__init__.py
new file mode 100644
index 0000000..ef89f43
--- /dev/null
+++ b/src/obitools/seqdb/__init__.py
@@ -0,0 +1,88 @@
+from obitools import NucSequence,AASequence
+from obitools.format.genericparser import genericEntryIteratorGenerator
+from obitools.location.feature import featureIterator
+from itertools import chain
+class AnnotatedSequence(object):
+    def __init__(self,header,featureTable,secondaryAcs):
+        self._header = header
+        self._featureTableText = featureTable
+        self._featureTable=None
+        self._secondaryAcs=secondaryAcs
+        self._hasTaxid=True
+    def getHeader(self):
+        return self._header
+    def getFeatureTable(self,skipError=False):
+        if self._featureTable is None:
+            self._featureTable = [x for x in featureIterator(self._featureTableText,skipError)]
+        return self._featureTable
+    def getSecondaryAcs(self):
+        return self._secondaryAcs
+    def extractTaxon(self):
+        if b'taxid' not in self and self._hasTaxid:
+            if self._featureTable is not None:
+                s = [f for f in self._featureTable if f.ftType=='source']
+            else:
+                s = featureIterator(self._featureTableText).next()
+                if s.ftType=='source':
+                    s = [s]
+                else:
+                    s = [f for f in self.featureTable if f.ftType=='source']
+            t =set(int(v[6:]) for v in chain(*tuple(f['db_xref'] for f in s if  'db_xref' in f))
+                      if  v[0:6]=='taxon:') 
+            self._hasTaxid=False
+            if len(t)==1 :
+                taxid=t.pop()
+                if taxid >=0:
+                    self['taxid']=taxid
+                    self._hasTaxid=True
+            t =set(chain(*tuple(f['organism'] for f in s if  'organism' in f))) 
+            if len(t)==1:
+                self['organism']=t.pop()
+    header = property(getHeader, None, None, "Header's Docstring")
+    featureTable = property(getFeatureTable, None, None, "FeatureTable's Docstring")
+    secondaryAcs = property(getSecondaryAcs, None, None, "SecondaryAcs's Docstring")
+class AnnotatedNucSequence(AnnotatedSequence,NucSequence):
+    '''
+    '''
+    def __init__(self,id,seq,de,header,featureTable,secondaryAcs,**info):
+        NucSequence.__init__(self, id, seq, de,**info)
+        AnnotatedSequence.__init__(self, header, featureTable, secondaryAcs)
+class AnnotatedAASequence(AnnotatedSequence,AASequence):
+    '''
+    '''
+    def __init__(self,id,seq,de,header,featureTable,secondaryAcs,**info):
+        AASequence.__init__(self, id, seq, de,**info)
+        AnnotatedSequence.__init__(self, header, featureTable, secondaryAcs)
diff --git a/src/obitools/seqdb/blastdb/__init__.py b/src/obitools/seqdb/blastdb/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/obitools/seqdb/dnaparser.py b/src/obitools/seqdb/dnaparser.py
new file mode 100644
index 0000000..85b82a2
--- /dev/null
+++ b/src/obitools/seqdb/dnaparser.py
@@ -0,0 +1,16 @@
+from obitools.format.sequence import embl,fasta,genbank
+class UnknownFormatError(Exception):
+    pass
+def whichParser(seq):
+    if seq[0]=='>':
+        return fasta.fastaNucParser
+    if seq[0:2]=='ID':
+        return embl.emblParser
+    if seq[0:5]=='LOCUS':
+        return genbank.genbankParser
+    raise UnknownFormatError,"Unknown nucleic format"
+def nucleicParser(seq):
+    return whichParser(seq)(seq)
diff --git a/src/obitools/seqdb/embl/__init__.py b/src/obitools/seqdb/embl/__init__.py
new file mode 100644
index 0000000..94f9efc
--- /dev/null
+++ b/src/obitools/seqdb/embl/__init__.py
@@ -0,0 +1,13 @@
+from obitools.seqdb import AnnotatedNucSequence, AnnotatedAASequence
+from obitools.location import locationGenerator,extractExternalRefs
+class EmblSequence(AnnotatedNucSequence):
+    '''
+    Class used to represent a nucleic sequence issued from EMBL.
+    '''
diff --git a/src/obitools/seqdb/embl/parser.py b/src/obitools/seqdb/embl/parser.py
new file mode 100644
index 0000000..b90278f
--- /dev/null
+++ b/src/obitools/seqdb/embl/parser.py
@@ -0,0 +1,52 @@
+import re
+import sys
+from obitools.seqdb import embl
+from obitools.seqdb import nucEntryIterator
+_featureMatcher = re.compile('(^FT  .*\n)+', re.M)
+_cleanFT       = re.compile('^FT',re.M)
+_headerMatcher = re.compile('^ID.+(?=\nFH  )', re.DOTALL)
+_seqMatcher    = re.compile('(^    ).+(?=//\n)', re.DOTALL + re.M)
+_cleanSeq      = re.compile('[ \n0-9]+')
+_acMatcher     = re.compile('(?<=^AC   ).+',re.M)
+_deMatcher     = re.compile('(^DE   .+\n)+',re.M)
+_cleanDe       = re.compile('(^|\n)DE +')
+def __emblparser(text):
+    try:
+        header = _headerMatcher.search(text).group()
+        ft     = _featureMatcher.search(text).group()
+        ft     = _cleanFT.sub('  ',ft)
+        seq    = _seqMatcher.search(text).group()
+        seq    = _cleanSeq.sub('',seq).upper()
+        acs    = _acMatcher.search(text).group()
+        acs    = acs.replace(';', ' ')
+        acs    = acs.split()
+        ac     = acs[0]
+        acs    = acs[1:]
+        de     = _deMatcher.search(header).group()
+        de     = _cleanDe.sub(' ',de).strip().strip('.')
+    except AttributeError,e:
+        print >>sys.stderr,'======================================================='
+        print >>sys.stderr,text
+        print >>sys.stderr,'======================================================='
+        raise e
+    return (ac,seq,de,header,ft,acs)
+def emblParser(text):
+    return embl.EmblSequence(*__emblparser(text))
+def emblIterator(file):
+    for e in nucEntryIterator(file):
+        yield emblParser(e)
diff --git a/src/obitools/seqdb/genbank/__init__.py b/src/obitools/seqdb/genbank/__init__.py
new file mode 100644
index 0000000..fb5b622
--- /dev/null
+++ b/src/obitools/seqdb/genbank/__init__.py
@@ -0,0 +1,84 @@
+from obitools.seqdb import AnnotatedNucSequence, AnnotatedAASequence
+from obitools.location import locationGenerator,extractExternalRefs
+class GbSequence(AnnotatedNucSequence):
+    '''
+    Class used to represent a nucleic sequence issued from Genbank.
+    '''
+class GpepSequence(AnnotatedAASequence):
+    '''
+    Class used to represent a peptidic sequence issued from Genpep.   
+    '''
+    def __init__(self,id,seq,de,header,featureTable,secondaryAcs,**info):
+        AnnotatedAASequence.__init__(self,id, seq, de, header, featureTable, secondaryAcs,**info)
+        self.__hasNucRef=None
+    def __getGeneRef(self):
+        if self.__hasNucRef is None:
+            self.__hasNucRef=False
+            cds = [x for x in self.featureTable
+                   if x.ftType=='CDS' 
+                   and 'coded_by' in x]
+            if cds:
+                source = cds[0]['coded_by'][0]
+                if 'transl_table' in cds[0]:
+                    tt = cds[0]['transl_table'][0]
+                else:
+                    tt=None
+                ac,loc = extractExternalRefs(source)
+                if len(ac)==1:
+                    ac = ac.pop()
+                    self.__hasNucRef=True
+                    self.__nucRef = (ac,loc,tt)
+    def geneAvailable(self):
+        '''
+        Predicat indicating if reference to the nucleic sequence encoding
+        this protein is available in feature table.
+        @return: True if gene description is available
+        @rtype: bool
+        '''
+        self.__getGeneRef()
+        return self.__hasNucRef is not None and self.__hasNucRef
+    def getCDS(self,database):
+        '''
+        Return the nucleic sequence coding for this protein if
+        data are available.
+        @param database: a database object where looking for the sequence
+        @type database: a C{dict} like object
+        @return: a NucBioseq instance carreponding to the CDS
+        @rtype: NucBioSeq
+        @raise AssertionError: if no gene references are available
+        @see: L{geneAvailable}
+        '''
+        assert self.geneAvailable(), \
+            "No information available to retreive gene sequence"
+        ac,loc,tt = self.__nucRef
+        seq = database[ac]
+        seq.extractTaxon()
+        gene = seq[loc]   
+        if tt is not None:
+            gene['transl_table']=tt
+        return gene
diff --git a/src/obitools/seqdb/genbank/ncbi.py b/src/obitools/seqdb/genbank/ncbi.py
new file mode 100644
index 0000000..40ddf91
--- /dev/null
+++ b/src/obitools/seqdb/genbank/ncbi.py
@@ -0,0 +1,79 @@
+from urllib2 import urlopen
+import sys
+import re
+import cStringIO
+from obitools.eutils import EFetch
+from parser import genbankParser,genpepParser
+from parser import genbankIterator,genpepIterator
+from obitools.utils import CachedDB
+class NCBIGenbank(EFetch):
+    def __init__(self):
+        EFetch.__init__(self,db='nucleotide',
+                        rettype='gbwithparts')
+    def __getitem__(self,ac):
+        if isinstance(ac,str):
+            text = self.get(id=ac)
+            seq = genbankParser(text)
+            return seq
+        else:
+            query = ','.join([x for x in ac])
+            data = cStringIO.StringIO(self.get(id=query))
+            return genbankIterator(data)
+class NCBIGenpep(EFetch):
+    def __init__(self):
+        EFetch.__init__(self,db='protein',
+                        rettype='gbwithparts')
+    def __getitem__(self,ac):
+        if isinstance(ac,str):
+            text = self.get(id=ac)
+            seq = genpepParser(text)
+            return seq
+        else:
+            query = ','.join([x for x in ac])
+            data = cStringIO.StringIO(self.get(id=query))
+            return genpepIterator(data)
+class NCBIAccession(EFetch):
+    _matchACS = re.compile(' +accession +"([^"]+)"')
+    def __init__(self):
+        EFetch.__init__(self,db='nucleotide',
+                        rettype='seqid')
+    def __getitem__(self,ac):
+        if isinstance(ac,str):
+            text = self.get(id=ac)
+            rep = NCBIAccession._matchACS.search(text).group(1)
+            return rep
+        else:
+            query = ','.join([x for x in ac])
+            text = self.get(id=query)
+            rep = (ac.group(1) for ac in NCBIAccession._matchACS.finditer(text))
+            return rep
+def Genbank(cache=None):
+    gb = NCBIGenbank()
+    if cache is not None:
+        gb = CachedDB(cache, gb)
+    return gb
+def Genpep(cache=None):
+    gp = NCBIGenpep()
+    if cache is not None:
+        gp = CachedDB(cache, gp)
+    return gp
diff --git a/src/obitools/seqdb/genbank/parser.py b/src/obitools/seqdb/genbank/parser.py
new file mode 100644
index 0000000..b52fe59
--- /dev/null
+++ b/src/obitools/seqdb/genbank/parser.py
@@ -0,0 +1,53 @@
+import re
+import sys
+import obitools.seqdb.genbank as gb
+from obitools.seqdb import nucEntryIterator,aaEntryIterator
+_featureMatcher = re.compile('^FEATURES.+\n(?=ORIGIN)',re.DOTALL + re.M)
+_headerMatcher = re.compile('^LOCUS.+(?=\nFEATURES)', re.DOTALL + re.M)
+_seqMatcher    = re.compile('(?<=ORIGIN).+(?=//\n)', re.DOTALL + re.M)
+_cleanSeq      = re.compile('[ \n0-9]+')
+_acMatcher     = re.compile('(?<=^ACCESSION   ).+',re.M)
+_deMatcher     = re.compile('(?<=^DEFINITION  ).+\n( .+\n)*',re.M)
+_cleanDe       = re.compile('\n *')
+def __gbparser(text):
+    try:
+        header = _headerMatcher.search(text).group()
+        ft     = _featureMatcher.search(text).group()
+        seq    = _seqMatcher.search(text).group()
+        seq    = _cleanSeq.sub('',seq).upper()
+        acs    = _acMatcher.search(text).group()
+        acs    = acs.split()
+        ac     = acs[0]
+        acs    = acs[1:]
+        de     = _deMatcher.search(header).group()
+        de     = _cleanDe.sub(' ',de).strip().strip('.')
+    except AttributeError,e:
+        print >>sys.stderr,'======================================================='
+        print >>sys.stderr,text
+        print >>sys.stderr,'======================================================='
+        raise e
+    return (ac,seq,de,header,ft,acs)
+def genbankParser(text):
+    return gb.GbSequence(*__gbparser(text))
+def genbankIterator(file):
+    for e in nucEntryIterator(file):
+        yield genbankParser(e)
+def genpepParser(text):
+    return gb.GpepSequence(*__gbparser(text))
+def genpepIterator(file):
+    for e in aaEntryIterator(file):
+        yield genpepParser(e)
\ No newline at end of file
diff --git a/src/obitools/sequenceencoder/__init__.py b/src/obitools/sequenceencoder/__init__.py
new file mode 100644
index 0000000..89a8a59
--- /dev/null
+++ b/src/obitools/sequenceencoder/__init__.py
@@ -0,0 +1,73 @@
+from obitools import location
+class SequenceEncoder(object):
+    pass
+class DNAComplementEncoder(SequenceEncoder):
+    _comp={'a': 't', 'c': 'g', 'g': 'c', 't': 'a',
+           'r': 'y', 'y': 'r', 'k': 'm', 'm': 'k', 
+           's': 's', 'w': 'w', 'b': 'v', 'd': 'h', 
+           'h': 'd', 'v': 'b', 'n': 'n', 'u': 'a',
+           '-': '-'}
+    _info={'complemented':True}
+    @staticmethod
+    def _encode(seq,position=slice(None, None, -1)):
+        cseq = [DNAComplementEncoder._comp.get(x.lower(),'n') for x in seq[position]]
+        return ''.join(cseq)
+    @staticmethod
+    def _check(seq):
+        assert seq.isNucleotide()
+    @staticmethod
+    def _convertpos(position):
+        if isinstance(position, int):
+            return -(position+1)
+        elif isinstance(position, slice):
+            return slice(-(position.stop+1),
+                         -(position.start+1),
+                         -position.step)
+        elif isinstance(position, location.Location):
+            return location.ComplementLocation(position).simplify()
+        raise TypeError,"position must be an int, slice or Location instance"
+    @staticmethod
+    def complement(seq):
+        return seq
+class SeqFragmentEncoder(SequenceEncoder):
+    def __init__(self,begin,end):
+        assert begin < end and begin >=0
+        self._limits = slice(begin,end)
+        self._info = {'cut' : [begin,end,1]}
+        self._len = end - begin + 1
+    def _check(self,seq):
+        lseq = len(seq)
+        assert self._limits.stop <= lseq
+    def _encode(self,seq,position=None):
+        return str(seq)[self._limits]
+    def _convertpos(self,position):
+        if isinstance(position, int):
+            if position < -self._len or position >= self._len:
+                raise IndexError,position
+            if position >=0:
+                return self._limits.start + position
+            else:
+                return self._limits.stop + position + 1
+        elif isinstance(position, slice):
+            return slice(-(position.stop+1),
+                         -(position.start+1),
+                         -position.step)
+        elif isinstance(position, location.Location):
+            return location.ComplementLocation(position).simplify()
+        raise TypeError,"position must be an int, slice or Location instance"
\ No newline at end of file
diff --git a/src/obitools/solexa/__init__.py b/src/obitools/solexa/__init__.py
new file mode 100644
index 0000000..60e35f8
--- /dev/null
+++ b/src/obitools/solexa/__init__.py
@@ -0,0 +1,45 @@
+from obitools import utils
+from obitools import NucSequence
+from obitools.dnahash import hashCodeIterator
+class SolexaSequence(NucSequence):
+    def __init__(self,id,seq,definition=None,quality=None,**info):
+        NucSequence.__init__(self, id, seq, definition,**info)
+        self._quality=quality
+        self._hash=None
+    def getQuality(self):
+        if isinstance(self._quality, str):
+            self._quality=[int(x) for x in self._quality.split()]
+        return self._quality
+    def __hash__(self):
+        if self._hash is None:
+            self._hash = hashCodeIterator(str(self), len(str(self)), 16, 0).next()[1].pop()
+        return self._hash
+class SolexaFile(utils.ColumnFile):
+    def __init__(self,stream):
+        utils.ColumnFile.__init__(self,
+                                  stream, ':', True, 
+                                  (str,
+                                   int,int,int,int,
+                                   str,
+                                   str), "#")
+    def next(self):
+        data = utils.ColumnFile.next(self)
+        seq = SolexaSequence('%d_%d_%d_%d'%(data[1],data[2],data[3],data[4]),
+                             data[5],
+                             quality=data[6])
+        seq['machine']=data[0]
+        seq['channel']=data[1]
+        seq['tile']=data[2]
+        seq['pos_x']=data[3]
+        seq['pos_y']=data[4]
+        #assert len(seq['quality'])==len(seq),"Error in file format"
+        return seq
diff --git a/src/obitools/solexaPairEnd.py b/src/obitools/solexaPairEnd.py
new file mode 100644
index 0000000..5d082ec
--- /dev/null
+++ b/src/obitools/solexaPairEnd.py
@@ -0,0 +1,103 @@
+Created on 30 dec. 2009
+ at author: coissac
+from obitools.options import getOptionManager
+from obitools.fastq import fastqSolexaIterator, formatFastq
+from obitools.align import QSolexaReverseAssemble
+from obitools.align import QSolexaRightReverseAssemble
+from obitools.tools._solexapairend import buildConsensus
+from itertools import chain
+def addSolexaPairEndOptions(optionManager):
+    optionManager.add_option('-r','--reverse-reads',
+                             action="store", dest="reverse",
+                             metavar="<FILENAME>",
+                             type="string",
+                             default=None,
+                             help="Filename containing reverse solexa reads "
+                            )
+def cutDirectReverse(entries):
+    first = []
+    for i in xrange(10):
+        first.append(entries.next())
+    lens = [len(x) for x in first]
+    clen = {}
+    for i in lens:
+        clen[i]=clen.get(i,0)+1
+    freq = max(clen.values())
+    freq = [k for k in clen if clen[k]==freq]
+    assert len(freq)==1,"To many sequence length"
+    freq = freq[0]
+    assert freq % 2 == 0, ""
+    lread = freq/2
+    seqs = chain(first,entries)
+    for s in seqs:
+        d = s[0:lread]
+        r = s[lread:]
+        yield(d,r)
+def seqPairs(direct,reverse):
+    for d in direct:
+        r = reverse.next()
+        yield(d,r)
+def checkAlignOk(ali):
+    #print not (ali[0][0]=='-' or ali[1][len(ali[1])-1]=='-')
+    return not (ali[0][0]=='-' or ali[1][len(ali[1])-1]=='-')
+def buildAlignment(sequences):
+    la = QSolexaReverseAssemble()
+    ra = QSolexaRightReverseAssemble()
+    for d,r in sequences:
+        la.seqA=d 
+        la.seqB=r 
+        ali=la()
+        ali.direction='left'
+        if not checkAlignOk(ali):
+#            print >>sys.stderr,"-> bad : -------------------------"
+#            print >>sys.stderr,ali
+#            print >>sys.stderr,"----------------------------------"
+            ra.seqA=d 
+            ra.seqB=r
+            ali=ra()
+            ali.direction='right'
+#            print >>sys.stderr,ali
+#            print >>sys.stderr,"----------------------------------"
+        yield ali
+if __name__ == '__main__':
+    optionParser = getOptionManager([addSolexaPairEndOptions],
+                                    entryIterator=fastqSolexaIterator
+                                    )
+    (options, direct) = optionParser()
+    if options.reverse is None:
+        sequences=cutDirectReverse(direct)
+    else:
+        reverse = fastqSolexaIterator(options.reverse)
+        sequences=seqPairs(direct,reverse)
+    for ali in buildAlignment(sequences):
+        consensus = buildConsensus(ali)
+        print formatFastq(consensus)
diff --git a/src/obitools/statistics/__init__.py b/src/obitools/statistics/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/obitools/statistics/hypergeometric.py b/src/obitools/statistics/hypergeometric.py
new file mode 100644
index 0000000..9a9b812
--- /dev/null
+++ b/src/obitools/statistics/hypergeometric.py
@@ -0,0 +1,166 @@
+# -*- coding: utf-8 -*-
+   Module de calcules statistiques.
+   Le module `statistics` contient des fonctions permettant le calcule
+   des probabilités associées à la loi hypergéométrique et 
+   hypergéométrique cumulée, ainsi d'une méthode de correction pour les
+   tests multiples. 
+from decimal import *
+getcontext().prec = 28
+def _hyper0(N,n,r):
+    """
+        Fonction interne permetant le calcule du terme 0 de la loi hypergéométrique.
+        Le calcule est réalisé selon la méthode décrite dans l'article
+             Trong Wu, An accurate computation of the hypergeometric distribution function, 
+             ACM Trans. Math. Softw. 19 (1993), no. 1, 33–43.
+        Paramètres:
+        - `N` : La taille de la population
+        - `n` : Le nombre d'éléments marqués
+        - `r` : La taille de l'echantillon
+        Retourne un *float* indiquant la probabilité de récupérer 0 élément
+        marqué parmi *n* dans une population de taille *N* lors du tirage
+        d'un échantillon de taille *r*
+    """
+    #
+    # au numerateur nous avons :
+    #    [N -r + 1 -n;N - n + 1[
+    #
+    # au denominateur :
+    #    [N - r + 1; N + 1]
+    #
+    # avec X = N - r + 1
+    #   et Y = N + 1
+    #
+    # Numerateur   -> [ X - n; Y - n [
+    # Denominateur -> [ X    ; Y [
+    #
+    # On peut donc siplifier 
+    #
+    # Numerateur    -> [X - n; X [
+    # Denominateur  -> [Y - n; Y [
+    numerateur  = xrange(N - r + 1 - n, N - r + 1)
+    denominateur= xrange(N + 1 - n, N + 1)
+#    version original
+#    m = N - n
+#    numerateur   = set(range(m-r+1,m+1))
+#    denominateur = set(range(N-r+1,N+1))
+#    simplification = numerateur & denominateur
+#    numerateur -= simplification
+#    denominateur -= simplification
+#    numerateur = list(numerateur)
+#    denominateur=list(denominateur)
+#    numerateur.sort()
+#    denominateur.sort()
+    p = reduce(lambda x,y:x*y,map(lambda i,j:Decimal(i)/Decimal(j),numerateur,denominateur))
+    return p
+def hypergeometric(x,N,n,r):
+    """
+        Calcule le terme *x* d'une loi hypergéométrique
+        Le calcule est réalisé selon la méthode décrite dans l'article
+        Trong Wu, An accurate computation of the hypergeometric distribution function, 
+        ACM Trans. Math. Softw. 19 (1993), no. 1, 33–43.
+        Paramètres:
+        - `x` : Nombre d'éléments marqués attendu
+        - `N` : La taille de la population
+        - `n` : Le nombre d'éléments marqués
+        - `r` : La taille de l'echantillon
+        Retourne un *float* indiquant la probabilité de récupérer *x* éléments
+        marqués parmi *n* dans une population de taille *N* lors du tirage
+        d'un échantillon de taille *r*        
+    """
+    if n < r:
+        s = n
+        n = r
+        r = s
+    assert x>=0 and x <= r,"x out of limits"
+    if x > 0 :
+        return hypergeometric(x-1,N,n,r) * (n - x + 1)/x * (r - x + 1)/(N-n-r+x)
+    else:
+        return _hyper0(N,n,r)
+def chypergeometric(xmin,xmax,N,n,r):
+    """
+        Calcule le terme *x* d'une loi hypergéométrique
+        Le calcule est réalisé selon la méthode décrite dans l'article
+        Trong Wu, An accurate computation of the hypergeometric distribution function, 
+        ACM Trans. Math. Softw. 19 (1993), no. 1, 33–43.
+        Paramètres:
+        - `xmin` : Nombre d'éléments marqués minimum attendu
+        - `xmax` : Nombre d'éléments marqués maximum attendu
+        - `N` : La taille de la population
+        - `n` : Le nombre d'éléments marqués
+        - `r` : La taille de l'echantillon
+        Retourne un *float* indiquant la probabilité de récupérer entre
+        *xmin* et *xmax* éléments marqués parmi *n* dans une population 
+        de taille *N* lors du tirage d'un échantillon de taille *r*     
+    """
+    if n < r:
+        s = n
+        n = r
+        r = s
+    assert xmin>=0 and xmin <= r and xmax>=0 and xmax <= r and xmin <=xmax,"x out of limits"
+    hg  = hypergeometric(xmin,N,n,r)
+    rep = hg
+    for x in xrange(xmin+1,xmax+1):
+        hg = hg * (n - x + 1)/x * (r - x + 1)/(N-n-r+x)
+        rep+=hg
+    return rep
+def multipleTest(globalPvalue,testList):
+    """
+        Correction pour les tests multiples.
+        Séléctionne parmis un ensemble de test le plus grand sous ensemble
+        telque le risque global soit inférieur à une pvalue déterminée.
+        Paramètres:
+        - `globalPvalue` : Risque global à prendre pour l'ensemble des tests
+        - `testList` : un élément itérable sur un ensemble de tests. 
+          Chaque test est une liste ou un tuple dont le dernier élément
+          est la pvalue associée au test
+        Retourne une liste contenant le sous ensemble des tests selectionnés dans
+        `testList`
+    """
+    testList=list(testList)
+    testList.sort(lambda x,y:cmp(x[-1],y[-1]))
+    h0=1.0-globalPvalue
+    p=1.0
+    rep = []
+    for t in testList:
+      p*=1.0-t[-1]
+      if p > h0:
+        rep.append(t)
+    return rep
\ No newline at end of file
diff --git a/src/obitools/statistics/noncentralhypergeo.py b/src/obitools/statistics/noncentralhypergeo.py
new file mode 100644
index 0000000..e6a96ce
--- /dev/null
+++ b/src/obitools/statistics/noncentralhypergeo.py
@@ -0,0 +1,208 @@
+from decimal import *
+from math import log
+#from obitools.utils import moduleInDevelopment
+# from : http://www.programmish.com/?p=25
+def dec_log(self, base=10):
+    cur_prec = getcontext().prec
+    getcontext().prec += 2
+    baseDec = Decimal(10)
+    retValue = self
+    if isinstance(base, Decimal):
+        baseDec = base
+    elif isinstance(base, float):
+        baseDec = Decimal("%f" % (base))
+    else:
+        baseDec = Decimal(base)
+    integer_part = Decimal(0)
+    while retValue < 1:
+        integer_part = integer_part - 1
+        retValue = retValue * baseDec
+    while retValue >= baseDec:
+        integer_part = integer_part + 1
+        retValue = retValue / baseDec
+    retValue = retValue ** 10
+    decimal_frac = Decimal(0)
+    partial_part = Decimal(1)
+    while cur_prec > 0:
+        partial_part = partial_part / Decimal(10)
+        digit = Decimal(0)
+        while retValue >= baseDec:
+            digit += 1
+            retValue = retValue / baseDec
+        decimal_frac = decimal_frac + digit * partial_part
+        retValue = retValue ** 10
+        cur_prec -= 1
+    getcontext().prec -= 2
+    return integer_part + decimal_frac
+class Interval(object):
+    def __init__(self,begin,end,facteur=1):
+        self._begin = begin
+        self._end = end
+        self._facteur=facteur
+    def __str__(self):
+        return '[%d,%d] ^ %d' % (self._begin,self._end,self._facteur)
+    def __repr__(self):
+        return 'Interval(%d,%d,%d)' % (self._begin,self._end,self._facteur)
+    def begin(self):
+        return (self._begin,self._facteur,True)
+    def end(self):
+        return (self._end,-self._facteur,False)
+def cmpb(i1,i2):
+    x= cmp(i1[0],i2[0])
+    if x==0:
+        x = cmp(i2[2],i1[2])
+    return x
+class Product(object):
+    def __init__(self,i=None):
+        if i is not None:
+            self.prod=[i]
+        else:
+            self.prod=[]
+        self._simplify()    
+    def _simplify(self):
+        bornes=[]
+        prod  =[]
+        if self.prod:
+            for i in self.prod:
+                bornes.append(i.begin())
+                bornes.append(i.end())
+            bornes.sort(cmpb)
+            j=0
+            r=len(bornes)
+            for i in xrange(1,len(bornes)):
+                if bornes[i][0]==bornes[j][0] and bornes[i][2]==bornes[j][2]:
+                    bornes[j]=(bornes[j][0],bornes[j][1]+bornes[i][1],bornes[i][2])
+                    r-=1
+                else:
+                    j+=1
+                    bornes[j]=bornes[i]
+            bornes=bornes[0:r]
+            facteur=0
+            close=1
+            for b,level,open in bornes:
+                if not open:
+                    close=0
+                else:
+                    close=1
+                if facteur:
+                    prod.append(Interval(debut,b-close,facteur))
+                debut=b+1-close  
+                facteur+=level
+        self.prod=prod
+    def __mul__(self,p):
+        res = Product()
+        res.prod=list(self.prod)
+        res.prod.extend(p.prod)
+        res._simplify()
+        return res
+    def __div__(self,p):
+        np = Product()
+        np.prod = [Interval(x._begin,x._end,-x._facteur) for x in p.prod]
+        return self * np
+    def __str__(self):
+        return str(self.prod)   
+    def log(self):   
+        p=Decimal(0)
+        for k in self.prod:
+            p+= Decimal(k._facteur) * reduce(lambda x,y:x+dec_log(Decimal(y),Decimal(10)),xrange(k._begin,k._end+1),Decimal(0))
+        return p  
+    def product(self):
+        p=Decimal(1)
+        for k in self.prod:
+            p*= reduce(lambda x,y:x*Decimal(y),xrange(k._begin,k._end+1),Decimal(1)) ** Decimal(k._facteur)
+        return p  
+    def __call__(self,log=True):
+        if log:
+            return self.log()
+        else:
+            return self.product()
+def fact(n):
+    return Product(Interval(1,n))
+def cnp(n,p):
+    return fact(n)/fact(p)/fact(n-p)
+def hypergeometic(x,n,M,N):
+    '''
+    @param x: Variable aleatoire
+    @type x:  int
+    @param n: taille du tirage
+    @type n:  int
+    @param M: boule gagnante
+    @type M:  int
+    @param N: nombre total dans l'urne
+    @type N:  int
+    p(x)=  cnp(M,x)  * cnp(N-M,n-x)  / cnp(N,n)
+    '''
+    return cnp(M,x)  * cnp(N-M,n-x)  / cnp(N,n)
+def nchypergeometique(x,n,M,N,r):
+    '''
+    @param x: Variable aleatoire
+    @type x:  int
+    @param n: taille du tirage
+    @type n:  int
+    @param M: boule gagnante
+    @type M:  int
+    @param N: nombre total dans l'urne
+    @type N:  int
+    @param r: odd ratio
+    @type r: float 
+    p(x)=  cnp(M,x)  * cnp(N-M,n-x)  / cnp(N,n)
+    '''
+    xmin = max(0,n-N+M)
+    xmax = min(n,M)
+    lr   =  dec_log(r)
+    xlr  = x * lr
+    num  = cnp(M,x) * cnp(N-M,n-x)
+    den  = [cnp(M,y) * cnp(N-M,n-y) / num for y in xrange(xmin,xmax+1)]
+    fden = [lr * y - xlr for y in xrange(xmin,xmax+1)]
+    inverse=reduce(lambda x,y : x+y,
+                   map(lambda i,j: i(False) * 10**j ,den,fden))
+    return 1/inverse
\ No newline at end of file
diff --git a/src/obitools/svg.py b/src/obitools/svg.py
new file mode 100644
index 0000000..c42e3ef
--- /dev/null
+++ b/src/obitools/svg.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+SVG.py - Construct/display SVG scenes.
+The following code is a lightweight wrapper around SVG files. The metaphor
+is to construct a scene, add objects to it, and then write it to a file
+to display it.
+This program uses ImageMagick to display the SVG files. ImageMagick also 
+does a remarkable job of converting SVG files into other formats.
+import os
+display_prog = 'display' # Command to execute to display images.
+class Scene:
+    def __init__(self,name="svg",height=400,width=400):
+        self.name = name
+        self.items = []
+        self.height = height
+        self.width = width
+        return
+    def add(self,item): self.items.append(item)
+    def strarray(self):
+        var = ["<?xml version=\"1.0\"?>\n",
+               "<svg height=\"%d\" width=\"%d\" >\n" % (self.height,self.width),
+               " <g style=\"fill-opacity:1.0; stroke:black;\n",
+               "  stroke-width:1;\">\n"]
+        for item in self.items: var += item.strarray()            
+        var += [" </g>\n</svg>\n"]
+        return var
+    def write_svg(self,filename=None):
+        if filename:
+            self.svgname = filename
+        else:
+            self.svgname = self.name + ".svg"
+        file = open(self.svgname,'w')
+        file.writelines(self.strarray())
+        file.close()
+        return
+    def display(self,prog=display_prog):
+        os.system("%s %s" % (prog,self.svgname))
+        return        
+class Line:
+    def __init__(self,start,end):
+        self.start = start #xy tuple
+        self.end = end     #xy tuple
+        return
+    def strarray(self):
+        return ["  <line x1=\"%d\" y1=\"%d\" x2=\"%d\" y2=\"%d\" />\n" %\
+                (self.start[0],self.start[1],self.end[0],self.end[1])]
+class Circle:
+    def __init__(self,center,radius,color):
+        self.center = center #xy tuple
+        self.radius = radius #xy tuple
+        self.color = color   #rgb tuple in range(0,256)
+        return
+    def strarray(self):
+        return ["  <circle cx=\"%d\" cy=\"%d\" r=\"%d\"\n" %\
+                (self.center[0],self.center[1],self.radius),
+                "    style=\"fill:%s;\"  />\n" % colorstr(self.color)]
+class Rectangle:
+    def __init__(self,origin,height,width,color):
+        self.origin = origin
+        self.height = height
+        self.width = width
+        self.color = color
+        return
+    def strarray(self):
+        return ["  <rect x=\"%d\" y=\"%d\" height=\"%d\"\n" %\
+                (self.origin[0],self.origin[1],self.height),
+                "    width=\"%d\" style=\"fill:%s;\" />\n" %\
+                (self.width,colorstr(self.color))]
+class Text:
+    def __init__(self,origin,text,size=24):
+        self.origin = origin
+        self.text = text
+        self.size = size
+        return
+    def strarray(self):
+        return ["  <text x=\"%d\" y=\"%d\" font-size=\"%d\">\n" %\
+                (self.origin[0],self.origin[1],self.size),
+                "   %s\n" % self.text,
+                "  </text>\n"]
+def colorstr(rgb): return "#%x%x%x" % (rgb[0]/16,rgb[1]/16,rgb[2]/16)
+def test():
+    scene = Scene('test')
+    scene.add(Rectangle((100,100),200,200,(0,255,255)))
+    scene.add(Line((200,200),(200,300)))
+    scene.add(Line((200,200),(300,200)))
+    scene.add(Line((200,200),(100,200)))
+    scene.add(Line((200,200),(200,100)))
+    scene.add(Circle((200,200),30,(0,0,255)))
+    scene.add(Circle((200,300),30,(0,255,0)))
+    scene.add(Circle((300,200),30,(255,0,0)))
+    scene.add(Circle((100,200),30,(255,255,0)))
+    scene.add(Circle((200,100),30,(255,0,255)))
+    scene.add(Text((50,50),"Testing SVG"))
+    scene.write_svg()
+    scene.display()
+    return
+if __name__ == '__main__': test()
diff --git a/src/obitools/table/__init__.py b/src/obitools/table/__init__.py
new file mode 100644
index 0000000..41e00bd
--- /dev/null
+++ b/src/obitools/table/__init__.py
@@ -0,0 +1,633 @@
+from itertools import imap,count,chain
+from itertools import imap,count,chain
+class Table(list):
+    """
+    Tables are list of rows of the same model
+    """
+    def __init__(self, headers=None, 
+                       types=None, 
+                       colcount=None,
+                       rowFactory=None,
+                       subrowFactory=None):
+        '''
+        @param headers: the list of column header.
+                        if this parametter is C{None}, C{colcount}
+                        parametter must be set.
+        @type headers: C{list}, C{tuple} or and iterable object
+        @param types: the list of data type associated to each column.
+                      If this parametter is specified its length must be
+                      equal to the C{headers} length or to C{colcount}.
+        @type types: C{list}, C{tuple} or and iterable object
+        @param colcount: number of column in the created table.
+                         If C{headers} parametter is not C{None} this
+                         parametter is ignored
+        @type colcount: int
+        '''
+        assert headers is not None or colcount is not None,\
+            'headers or colcount parametter must be not None value'
+        if headers is None:
+            headers = tuple('Col_%d' % x for x in xrange(colcount))
+        self.headers = headers
+        self.types   = types
+        self.colcount= len(self.headers)
+        if rowFactory is None:
+            self.rowFactory=TableRow
+        else:
+            self.rowFactory=rowFactory
+        if subrowFactory is None:
+            self.subrowFactory=TableRow
+        else:
+            self.subrowFactory=rowFactory
+        self.likedTo=set()
+    def isCompatible(self,data):
+        assert isinstance(data,(Table,TableRow))
+        return (self.colcount == data.colcount and
+                (id(self.types)==id(data.types) or
+                 self.types==data.types
+                )
+               )
+    def __setitem__ (self,key,value):
+        '''
+        @param key:
+        @type key: C{int}, C{slice} or C{str}
+        @param value:
+        @type value:
+        '''
+        if isintance(key,int):
+            if not isinstance(value, TableRow):
+                value = self.rowFactory(self,value)
+            else:
+                assert self.isCompatible(value)
+            list.__setitem__(self,key,value.row)
+        elif isinstance(key,slice):
+            indices = xrange(key.indices(len(self)))
+            for i,d in imap(None,indices,value):
+                self[i]=d
+        else:        
+            raise TypeError, "Key must be an int or slice value"
+    def __getitem__(self,key):
+        '''
+        this function has different comportements depending
+        of the data type of C{key} and the table used.
+        @param key: description of the table part to return
+        @type key: C{int} or C{slice}
+        @return: return a TableRow (if key is C{int})
+                 or a subpart of the table (if key is C{slice}).
+        '''
+        if isinstance(key,int):
+            return self.rowFactory(self,
+                                   list.__getitem__(self,key))
+        if isinstance(key,slice):
+            newtable=Table(self.headers,self.types)
+            indices = xrange(key.indices(len(self)))
+            for i in indices:
+                list.append(newtable,list.__getitem__(self,i))
+            self.likedTo.add(newtable)
+            return newtable
+        raise TypeError
+    def __getslice__(self,x,y):
+        return self.__getitem__(slice(x,y))
+    def __iter__(self):
+        return TableIterator(self)
+    def __hash__(self):
+        return id(self)
+    def __add__(self,itable):
+        return concatTables(self,itable)
+    def _setTypes(self,types):
+        if types is not None and not isinstance(type,tuple):
+            types = tuple(x for x in types)
+        assert types is None or len(types)==len(self._headers)
+        self._types = types
+        if types is not None:
+            for row in self:
+                row.castRow()
+    def _getTypes(self):
+        return self._types
+    types = property(_getTypes,_setTypes)
+    def _getHeaders(self):
+        return self._headers
+    def _setHeaders(self,headers):
+        if not isinstance(headers, tuple):
+            headers = tuple(x for x in headers)
+        self._hindex = dict((k,i) for i,k in imap(None,count(),headers))
+        self._headers=headers
+        self.colcount=len(headers)
+    headers=property(_getHeaders,_setHeaders)   
+    def append(self,value):
+        if not isinstance(value, TableRow):
+            value = self.rowFactory(self,value)
+        else:
+            assert self.isCompatible(value)
+        list.append(self,value.row)
+class _Row(list):
+    def __init__(self,data,size):
+        if data is None:
+            list.__init__(self,(None for x in xrange(size)))
+        else:
+            list.__init__(self,data)
+            assert len(self)==size, \
+              "Size of data is not correct (%d instead of %d)" % (len(self),size)
+    def append(self,value):
+        raise NotImplementedError, \
+              "Rows cannot change of size"
+    def pop(self,key=None):
+        raise NotImplementedError, \
+              "Rows cannot change of size"
+    def extend(self,values):
+        raise NotImplementedError, \
+              "Rows cannot change of size"
+class TableRow(object):    
+    '''
+    '''
+    def __init__(self, table,
+                       data=None,
+                       ):
+        self.table = table
+        if isinstance(data,_Row):
+            self.row=row
+        else:
+            data = self._castRow(data)
+            self.row=_Row(data,self._colcount)
+    def getType(self):
+        return self.table.types
+    def getHeaders(self):
+        return self.table.headers
+    def getHIndex(self):
+        return self.table._hindex
+    def getColCount(self):
+        return self.table.colcount
+    types  = property(getType,None,None,
+                      "List of types associated to this row")
+    headers= property(getHeaders,None,None,
+                      "List of headers associated to this row")
+    _hindex= property(getHIndex,None,None)
+    _colcount = property(getColCount,None,None)
+    def _castValue(t,x):
+        '''
+        Cast a value to a specified type, with exception of
+        C{None} values that are returned without cast.
+        @param t: the destination type
+        @type t: C{type}
+        @param x: the value to cast
+        @return: the casted value or C{None}
+        '''
+        if x is None or t is None:
+            return x
+        else:
+            return t(x)
+    _castValue=staticmethod(_castValue)
+    def _castRow(self,data):
+        if not isinstance(data, (list,dict)):
+            data=[x for x in data]
+        if isinstance(data,list):
+            assert len(data)==self._colcount, \
+                   'values has not good length'
+            if self.types is not None:
+                data=[TableRow._castValue(t, x)
+                       for t,x in imap(None,self.types,data)]
+        elif isinstance(data,dict):
+            lvalue = [None] * len(self.header)
+            for k,v in data.items():
+                try:
+                    hindex = self._hindex[k]
+                    if self.types is not None:
+                        lvalue[hindex]=TableRow._castValue(self.types[hindex], v)
+                    else:
+                        lvalue[hindex]=v
+                except KeyError:
+                    info('%s is not a table column' % k)
+            data=lvalue
+        else:
+            raise TypeError
+        return data
+    def __getitem__(self,key):
+        '''
+        @param key:
+        @type key:
+        '''
+        if isinstance(key,(int,slice)):
+            return self.row[key]
+        if isinstance(key,str):
+            i = self._hindex[key]
+            return self.row[i]
+        raise TypeError, "Key must be an int, slice or str value"
+    def __setitem__(self,key,value):
+        '''
+        @param key:
+        @type key:
+        @param value:
+        @type value:
+        '''
+        if isinstance(key,str):
+            key = self._hindex[key]
+        elif isinstance(key,int):
+            if self.types is not None:
+                value = TableRow._castValue(self.types[key], value)
+            self.row[key]=value
+        elif isinstance(key,slice):
+            indices = xrange(key.indices(len(self.row)))
+            for i,v in imap(None,indices,value):
+                self[i]=v
+        else:        
+            raise TypeError, "Key must be an int, slice or str value"
+    def __iter__(self):
+        '''
+        '''
+        return iter(self.row)
+    def append(self,value):
+        raise NotImplementedError, \
+              "Rows cannot change of size"
+    def pop(self,key=None):
+        raise NotImplementedError, \
+              "Rows cannot change of size"
+    def extend(self,values):
+        raise NotImplementedError, \
+              "Rows cannot change of size"
+    def __len__(self):
+        return self._colcount
+    def __repr__(self):
+        return repr(self.row)
+    def __str__(self):
+        return str(self.row)
+    def castRow(self):
+        self.row = _Row(self._castRow(self.row),len(self.row))
+class iTableIterator(object):
+    def _getHeaders(self):
+        raise NotImplemented
+    def _getTypes(self):
+        raise NotImplemented
+    def _getRowFactory(self):
+        raise NotImplemented
+    def _getSubrowFactory(self):
+        raise NotImplemented
+    def _getColcount(self):
+        return len(self._getTypes())
+    def __iter__(self):
+        return self
+    headers = property(_getHeaders,None,None)
+    types   = property(_getTypes,None,None)
+    rowFactory    = property(_getRowFactory,None,None)
+    subrowFactory = property(_getSubrowFactory,None,None)
+    colcount = property(_getColcount,None,None)
+    def columnIndex(self,name):
+        if isinstance(name,str):
+            return self._reference.headers.index(name)
+        elif isinstance(name,int):
+            lh = len(self._reference.headers)
+            if name < lh and name >=0:
+                return name
+            elif name < 0 and name >= -lh:
+                return lh - name
+            raise IndexError
+        raise TypeError
+    def next(self):
+        raise NotImplemented
+class TableIterator(iTableIterator):
+    def __init__(self,table):
+        if not isinstance(table,Table):
+            raise TypeError
+        self._reftable=table
+        self._i=0
+    def _getHeaders(self):
+        return self._reftable.headers
+    def _getTypes(self):
+        return self._reftable.types
+    def _getRowFactory(self):
+        return self._reftable.rowFactory
+    def _getSubrowFactory(self):
+        return self._reftable.subrowFactory
+    def columnIndex(self,name):
+        if isinstance(name,str):
+            return self._reftable._hindex[name]
+        elif isinstance(name,int):
+            lh = len(self._reftable._headers)
+            if name < lh and name >=0:
+                return name
+            elif name < 0 and name >= -lh:
+                return lh - name
+            raise IndexError
+        raise TypeError
+    def rewind(self):
+        i=0
+    def next(self):
+        if self._i < len(self._reftable):
+            rep=self._reftable[self._i]
+            self._i+=1
+            return rep
+        else:
+            raise StopIteration
+    headers       = property(_getHeaders,None,None)
+    types         = property(_getTypes,None,None)
+    rowFactory    = property(_getRowFactory,None,None)
+    subrowFactory = property(_getSubrowFactory,None,None)
+class ProjectionIterator(iTableIterator):
+    def __init__(self,tableiterator,*cols):
+        self._reference = iter(tableiterator)
+        assert isinstance(self._reference, iTableIterator)
+        self._selected = tuple(self._reference.columnIndex(x)
+                          for x in cols)
+        self._headers = tuple(self._reference.headers[x] 
+                         for x in self._selected)
+        if self._reference.types is not None:
+            self._types= tuple(self._reference.types[x] 
+                         for x in self._selected)
+        else:
+            self._types=None
+    def _getRowFactory(self):
+        return self._reference.subrowFactory
+    def _getSubrowFactory(self):
+        return self._reference.subrowFactory
+    def _getHeaders(self):
+        return self._headers
+    def _getTypes(self):
+        return self._types
+    headers = property(_getHeaders,None,None)
+    types   = property(_getTypes,None,None)
+    rowFactory    = property(_getRowFactory,None,None)
+    subrowFactory = property(_getSubrowFactory,None,None)
+    def next(self):
+        value = self._reference.next()
+        value = (value[x] for x in self._selected)
+        return self.rowFactory(self,value)
+class SelectionIterator(iTableIterator):
+    def __init__(self,tableiterator,**conditions):
+        self._reference = iter(tableiterator)
+        assert isinstance(self._reference, iTableIterator)
+        self._conditions=dict((self._reference.columnIndex(i),c) 
+                              for i,c in conditions.iteritems())
+    def _checkCondition(self,row):
+        return reduce(lambda x,y : x and y,
+                      (bool(self._conditions[i](row[i]))
+                       for i in self._conditions),
+                       True)
+    def _getRowFactory(self):
+        return self._reference.rowFactory
+    def _getSubrowFactory(self):
+        return self._reference.subrowFactory
+    def _getHeaders(self):
+        return self._reference.headers
+    def _getTypes(self):
+        return self._reference.types
+    def next(self):
+        row = self._reference.next()
+        while not self._checkCondition(row):
+            row = self._reference.next()
+        return row
+    headers = property(_getHeaders,None,None)
+    types   = property(_getTypes,None,None)
+    rowFactory    = property(_getRowFactory,None,None)
+    subrowFactory = property(_getSubrowFactory,None,None)
+class UnionIterator(iTableIterator):
+    def __init__(self,*itables):
+        self._itables=[iter(x) for x in itables]
+        self._types = self._itables[0].types
+        self._headers = self._itables[0].headers
+        assert reduce(lambda x,y: x and y,
+                      (    isinstance(z,iTableIterator) 
+                       and len(z.headers)==len(self._headers)
+                       for z in self._itables),
+                    True)
+        self._iterator = chain(*self._itables)
+    def _getRowFactory(self):
+        return self._itables[0].rowFactory
+    def _getSubrowFactory(self):
+        return self._itables[0].subrowFactory
+    def _getHeaders(self):
+        return self._headers
+    def _getTypes(self):
+        return self._types
+    def next(self):
+        value = self._iterator.next()
+        return self.rowFactory(self,value.row)
+    headers = property(_getHeaders,None,None)
+    types   = property(_getTypes,None,None)
+    rowFactory    = property(_getRowFactory,None,None)
+    subrowFactory = property(_getSubrowFactory,None,None)
+def tableFactory(tableiterator):
+    tableiterator = iter(tableiterator)
+    assert isinstance(tableiterator, iTableIterator)
+    newtable = Table(tableiterator.headers,
+                     tableiterator.types,
+                     tableiterator.rowFactory,
+                     tableiterator.subrowFactory)
+    for r in tableiterator:
+        newtable.append(r)
+    return newtable
+def projectTable(tableiterator,*cols):
+    return tableFactory(ProjectionIterator(tableiterator,*cols))
+def subTable(tableiterator,**conditions):
+    return tableFactory(SelectionIterator(tableiterator,**conditions))
+def concatTables(*itables):
+    '''
+    Concatene severals tables.
+    concatenation is done using the L{UnionIterator<UnionIterator>}
+    @type itables: iTableIterator or Table
+    @return: a new Table
+    @rtype: c{Table}
+    @see: L{UnionIterator<UnionIterator>}
+    '''
+    return tableFactory(UnionIterator(*itables))
+class TableIteratorAsDict(object):
+    def __init__(self,tableiterator):
+        self._reference = iter(tableiterator)
+        assert isinstance(self._reference, iTableIterator)
+        self._headers  = self._reference.headers
+        self._types     = self._reference.types
+        if self._types is not None:
+            self._types = dict((n,t) 
+                               for n,t in imap(None,self._headers,self._types))
+    def __iter__(self):
+        return self
+    def next(self):
+        value = self._reference.next()
+        return dict((n,t)
+                    for n,t in imap(None,self._headers,value))
+    def _getHeaders(self):
+        return self._headers
+    def _getTypes(self):
+        return self._types
+    headers = property(_getHeaders,None,None)
+    types   = property(_getTypes,None,None)
\ No newline at end of file
diff --git a/src/obitools/table/csv.py b/src/obitools/table/csv.py
new file mode 100644
index 0000000..1d9a73d
--- /dev/null
+++ b/src/obitools/table/csv.py
@@ -0,0 +1,52 @@
+obitools.table.csv module provides an iterator adapter
+allowing to parse csv (comma separatted value) file
+import re
+def csvIterator(lineIterator,sep=','):
+    '''
+    Allows easy parsing of a csv file. This function
+    convert an iterator on line over a csv text file
+    in an iterator on data list. Each list corresponds
+    to all values present n one line.
+    @param lineIterator: iterator on text lines
+    @type lineIterator: iterator
+    @param sep: string of one letter used as separator
+                blank charactere or " is not allowed as
+                separator
+    @type sep: string
+    @return: an iterator on data list
+    @rtype: iterator
+    '''
+    assert len(sep)==1 and not sep.isspace() and sep!='"'
+    valueMatcher=re.compile('\s*((")(([^"]|"")*)"|([^%s]*?))\s*(%s|$)' % (sep,sep))
+    def iterator():
+        for l in lineIterator:
+            yield _csvParse(l,valueMatcher)
+    return iterator()
+def _csvParse(line,valueMatcher):
+    data=[]
+    i = iter(valueMatcher.findall(line))
+    m = i.next()
+    if m[0]:
+        while m[-1]!='':
+            if m[1]=='"':
+                data.append(m[2].replace('""','"'))
+            else:
+                data.append(m[0])
+            m=i.next()
+        if m[1]=='"':
+            data.append(m[2].replace('""','"'))
+        else:
+            data.append(m[0])
+    return data
\ No newline at end of file
diff --git a/src/obitools/tagmatcher/__init__.py b/src/obitools/tagmatcher/__init__.py
new file mode 100644
index 0000000..880ead0
--- /dev/null
+++ b/src/obitools/tagmatcher/__init__.py
@@ -0,0 +1,35 @@
+from obitools import NucSequence
+from obitools.location import locationGenerator,extractExternalRefs
+class TagMatcherSequence(NucSequence):
+    '''
+    Class used to represent a nucleic sequence issued mapped
+    on a genome by the tagMatcher software.
+    '''
+    def __init__(self,seq,cd,locs,dm,rm):
+        NucSequence.__init__(self, seq, seq)
+        self['locations']=locs
+        self['conditions']=cd
+        self['dm']=dm
+        self['rm']=rm
+        self['tm']=dm+rm
+    def eminEmaxFilter(self,emin=None,emax=None):
+        result = [x for x in self['locations'] 
+                  if (emin is None or x['error'] >=emin)
+                  and (emax is None or x['error'] <=emax)]
+        self['locations']=result
+        dm=0
+        rm=0
+        for x in result:
+            if x.isDirect():
+                dm+=1
+            else:
+                rm+=1
+        self['dm']=dm
+        self['rm']=rm
+        self['tm']=dm+rm
+        return self
diff --git a/src/obitools/tagmatcher/options.py b/src/obitools/tagmatcher/options.py
new file mode 100644
index 0000000..45673ce
--- /dev/null
+++ b/src/obitools/tagmatcher/options.py
@@ -0,0 +1,14 @@
+def addTagMatcherErrorOptions(optionManager):
+    optionManager.add_option('-E','--emax',
+                             action='store',
+                             metavar="<##>",
+                             type="int",dest="emax",
+                             default=None,
+                             help="keep match with no more than emax errors")
+    optionManager.add_option('-e','--emin',
+                             action='store',
+                             metavar="<##>",
+                             type="int",dest="emin",
+                             default=0,
+                             help="keep match with at least emin errors")
diff --git a/src/obitools/tagmatcher/parser.py b/src/obitools/tagmatcher/parser.py
new file mode 100644
index 0000000..a843e66
--- /dev/null
+++ b/src/obitools/tagmatcher/parser.py
@@ -0,0 +1,89 @@
+import re
+import sys
+from obitools import tagmatcher
+from obitools.seqdb import nucEntryIterator
+from obitools.location.feature import Feature
+from obitools.location import locationGenerator
+_seqMatcher    = re.compile('(?<=TG   )[acgtrymkwsbdhvnACGTRYMKWSBDHVN]+')
+_cdMatcher     = re.compile('(?<=CD   ) *([^:]+?) +: +([0-9]+)')
+_loMatcher     = re.compile('(?<=LO   ) *([ACGTRYMKWSBDHVN]+) +([^ ]+) +([^ ]+) +\(([0-9]+)\)')
+_dmMatcher     = re.compile('(?<=DM   )[0-9]+')
+_rmMatcher     = re.compile('(?<=RM   )[0-9]+')
+def __tagmatcherparser(text):
+    try:
+        seq    = _seqMatcher.search(text).group()
+        cd     = dict((x[0],int(x[1])) for x in  _cdMatcher.findall(text))
+        locs = []
+        for (match,ac,loc,err) in _loMatcher.findall(text):
+            feat = Feature('location', locationGenerator(loc))
+            feat['error']=int(err)
+            feat['match']=match
+            feat['contig']=ac
+            locs.append(feat)
+        dm = int(_dmMatcher.search(text).group())
+        rm = int(_rmMatcher.search(text).group())
+    except AttributeError,e:
+        print >>sys.stderr,'======================================================='
+        print >>sys.stderr,text
+        print >>sys.stderr,'======================================================='
+        raise e
+    return (seq,cd,locs,dm,rm)
+def tagMatcherParser(text):
+    return tagmatcher.TagMatcherSequence(*__tagmatcherparser(text))
+class TagMatcherIterator(object):
+    _cdheadparser  = re.compile('condition [0-9]+ : (.+)') 
+    def __init__(self,file):
+        self._ni = nucEntryIterator(file)
+        self.header=self._ni.next()
+        self.conditions=TagMatcherIterator._cdheadparser.findall(self.header)
+    def next(self):
+        return tagMatcherParser(self._ni.next())
+    def __iter__(self):
+        return self
+def formatTagMatcher(tmseq,reader=None):
+    if isinstance(tmseq, TagMatcherIterator):
+        return tmseq.header
+    assert isinstance(tmseq,tagmatcher.TagMatcherSequence),'Only TagMatcherSequence can be used'
+    lo = '\n'.join(['LO   %s %s %s (%d)' % (l['match'],l['contig'],l.locStr(),l['error']) 
+                    for l in tmseq['locations']])     
+    if reader is not None:
+        cd = '\n'.join(['CD   %s : %d' % (x,tmseq['conditions'][x])
+                        for x in reader.conditions])
+    else:   
+        cd = '\n'.join(['CD   %s : %d' % (x,tmseq['conditions'][x])
+                        for x in tmseq['conditions']])
+    tg = 'TG   %s' % str(tmseq)
+    e=[tg]
+    if cd:
+        e.append(cd)
+    if lo:
+        e.append(lo)
+    tm = 'TM   %d' % tmseq['tm']
+    dm = 'DM   %d' % tmseq['dm']
+    rm = 'RM   %d' % tmseq['rm']
+    e.extend((tm,dm,rm,'//'))
+    return '\n'.join(e)
diff --git a/src/obitools/thermo/__init__.py b/src/obitools/thermo/__init__.py
new file mode 100644
index 0000000..492dbb9
--- /dev/null
+++ b/src/obitools/thermo/__init__.py
@@ -0,0 +1,597 @@
+from math import log
+from array import array
+from copy import deepcopy
+           'a':1,'c':2,'g':3,'t':4,
+           '-':0
+          }
+           'a':4,'c':3,'g':2,'t':1,
+           '-':0
+          }
+R                      = 1.987
+DEF_CONC_PRIMERS       = 8.e-7
+DEF_SALT               = 0.05
+forbidden_entropy      = 0.
+forbidden_enthalpy     = 1.e18
+__dH = [[[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
+      [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
+      [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
+      [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
+      [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
+      [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]]
+      ]
+__dS =  [[[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
+      [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
+      [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
+      [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
+      [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
+      [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
+       [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]]
+      ]
+def initParams(c1, c2, kp, sm,nparm={}):
+    global forbidden_entropy
+    global dH,dS
+    dH=deepcopy(__dH)
+    dS=deepcopy(__dS)
+    nparm['Ct1'] = c1;
+    nparm['Ct2'] = c2;
+    nparm['kplus'] = kp;
+    maxCT = 1;
+    if(nparm['Ct2'] > nparm['Ct1']):
+        maxCT = 2
+    if(nparm['Ct1'] == nparm['Ct2']):
+        ctFactor = nparm['Ct1']/2
+    elif (maxCT == 1):
+        ctFactor = nparm['Ct1']-nparm['Ct2']/2
+    else:
+        ctFactor = nparm['Ct2']-nparm['Ct1']/2
+    nparm['rlogc'] = R * log(ctFactor)
+    forbidden_entropy = nparm['rlogc']
+    nparm['kfac'] = 0.368 * log(nparm['kplus'])
+    nparm['saltMethod'] = sm
+    # Set all X-/Y-, -X/Y- and X-/-Y so, that TM will be VERY small!
+    for x in xrange(1,5):
+        for y in xrange(1,5):
+            dH[0][x][y][0]=forbidden_enthalpy;
+            dS[0][x][y][0]=forbidden_entropy;
+            dH[x][0][0][y]=forbidden_enthalpy;
+            dS[x][0][0][y]=forbidden_entropy;
+            dH[x][0][y][0]=forbidden_enthalpy;
+            dS[x][0][y][0]=forbidden_entropy;
+            # forbid X-/Y$ and X$/Y- etc., i.e. terminal must not be paired with gap!
+            dH[x][5][y][0]=forbidden_enthalpy;
+            dS[x][5][y][0]=forbidden_entropy;
+            dH[x][0][y][5]=forbidden_enthalpy;
+            dS[x][0][y][5]=forbidden_entropy;
+            dH[5][x][0][y]=forbidden_enthalpy;
+            dS[5][x][0][y]=forbidden_entropy;
+            dH[0][x][5][y]=forbidden_enthalpy;
+            dS[0][x][5][y]=forbidden_entropy;
+            #forbid X$/-Y etc.
+            dH[x][5][0][y]=forbidden_enthalpy;
+            dS[x][5][0][y]=forbidden_entropy;
+            dH[x][0][5][y]=forbidden_enthalpy;
+            dS[x][0][5][y]=forbidden_entropy;
+            dH[5][x][y][0]=forbidden_enthalpy;
+            dS[5][x][y][0]=forbidden_entropy;
+            dH[0][x][y][5]=forbidden_enthalpy;
+            dS[0][x][y][5]=forbidden_entropy;
+        #also, forbid x-/-- and --/x-, i.e. no two inner gaps paired
+        dH[x][0][0][0]=forbidden_enthalpy;
+        dS[x][0][0][0]=forbidden_entropy;
+        dH[0][0][x][0]=forbidden_enthalpy;
+        dS[0][0][x][0]=forbidden_entropy;
+        # x-/-$
+        dH[x][0][0][5]=forbidden_enthalpy;
+        dS[x][0][0][5]=forbidden_entropy;
+        dH[5][0][0][x]=forbidden_enthalpy;
+        dS[5][0][0][x]=forbidden_entropy;
+        dH[0][5][x][0]=forbidden_enthalpy;
+        dS[x][0][0][5]=forbidden_entropy;
+        dH[0][x][5][0]=forbidden_enthalpy;
+        dS[0][x][5][0]=forbidden_entropy;
+    # forbid --/--
+    dH[0][0][0][0]=forbidden_enthalpy;
+    dS[0][0][0][0]=forbidden_entropy;
+    dH[5][0][0][0]=forbidden_enthalpy;
+    dS[5][0][0][0]=forbidden_entropy;
+    dH[0][0][5][0]=forbidden_enthalpy;
+    dS[0][0][5][0]=forbidden_entropy;
+    dH[0][5][5][0]=forbidden_enthalpy;
+    dS[0][5][5][0]=forbidden_entropy;
+    # Interior loops (double Mismatches)
+    iloop_entropy=-0.97
+    iloop_enthalpy=0.0
+    for x in xrange(1,5):
+        for y in xrange(1,5):
+            for a in xrange(1,5):
+                for b in xrange(1,5):
+                    # AT and CG pair, and as A=1, C=2, G=3, T=4 this means
+                    # we have Watson-Crick pairs if (x+a==5) and (y+b)==5.
+                    if ( not ((x+a==5) or (y+b==5))):
+                        # No watson-crick-pair, i.e. double mismatch!
+                        # set enthalpy/entropy to loop expansion!
+                        dH[x][y][a][b] = iloop_enthalpy;
+                        dS[x][y][a][b] = iloop_entropy;
+    # xy/-- and --/xy (Bulge Loops of size > 1)
+    bloop_entropy=-1.3
+    bloop_enthalpy=0.0
+    for x in xrange(1,5):
+        for y in xrange(1,5):
+            dH[x][y][0][0] = bloop_enthalpy;
+            dS[x][y][0][0] = bloop_entropy;
+            dH[0][0][x][y] = bloop_enthalpy;
+            dS[0][0][x][y] = bloop_entropy;
+    # x-/ya abd xa/y- as well as -x/ay and ax/-y
+    # bulge opening and closing parameters with
+    # adjacent matches / mismatches
+    # obulge_mism and cbulge_mism chosen so high to avoid
+    #     AAAAAAAAA
+    #    T--G----T
+    # being better than
+    #     AAAAAAAAA
+    #     TG------T
+    obulge_match_H  =-2.66e3
+    obulge_match_S  =-14.22
+    cbulge_match_H  =-2.66e3
+    cbulge_match_S  =-14.22
+    obulge_mism_H   = 0.0
+    obulge_mism_S   = -6.45
+    cbulge_mism_H   = 0.0
+    cbulge_mism_S   =-6.45
+    for x in xrange(1,5):
+        for y in xrange(1,5):
+            for a in xrange(1,5):
+                if (x+y==5):  # other base pair matches!
+                    dH[x][0][y][a]=obulge_match_H;  # bulge opening
+                    dS[x][0][y][a]=obulge_match_S;
+                    dH[x][a][y][0]=obulge_match_H;
+                    dS[x][a][y][0]=obulge_match_S;
+                    dH[0][x][a][y]=cbulge_match_H;  # bulge closing
+                    dS[0][x][a][y]=cbulge_match_S;
+                    dH[a][x][0][y]=cbulge_match_H;
+                    dS[a][x][0][y]=cbulge_match_S;
+                else:
+                    # mismatch in other base pair!
+                    dH[x][0][y][a]=obulge_mism_H;   # bulge opening
+                    dS[x][0][y][a]=obulge_mism_S;
+                    dH[x][a][y][0]=obulge_mism_H;
+                    dS[x][a][y][0]=obulge_mism_S;
+                    dH[0][x][a][y]=cbulge_mism_H;   # bulge closing
+                    dS[0][x][a][y]=cbulge_mism_S;
+                    dH[a][x][0][y]=cbulge_mism_H;
+                    dS[a][x][0][y]=cbulge_mism_S;
+    # Watson-Crick pairs (note that only ten are unique, as obviously
+    # 5'-AG-3'/3'-TC-5'  =  5'-CT-3'/3'-GA-5' etc.
+    dH[1][1][4][4]=-7.6e3;  dS[1][1][4][4]=-21.3    # AA/TT 04
+    dH[1][2][4][3]=-8.4e3;  dS[1][2][4][3]=-22.4    # AC/TG adapted GT/CA
+    dH[1][3][4][2]=-7.8e3;  dS[1][3][4][2]=-21.0    # AG/TC adapted CT/GA
+    dH[1][4][4][1]=-7.2e3;  dS[1][4][4][1]=-20.4    # AT/TA 04
+    dH[2][1][3][4]=-8.5e3;  dS[2][1][3][4]=-22.7    # CA/GT 04
+    dH[2][2][3][3]=-8.0e3;  dS[2][2][3][3]=-19.9    # CC/GG adapted GG/CC
+    dH[2][3][3][2]=-10.6e3; dS[2][3][3][2]=-27.2    # CG/GC 04
+    dH[2][4][3][1]=-7.8e3;  dS[2][4][3][1]=-21.0    # CT/GA 04
+    dH[3][1][2][4]=-8.2e3;  dS[3][1][2][4]=-22.2    # GA/CT 04
+    dH[3][2][2][3]=-9.8e3;  dS[3][2][2][3]=-24.4    # GC/CG 04
+    dH[3][3][2][2]=-8.0e3;  dS[3][3][2][2]=-19.9    # GG/CC 04
+    dH[3][4][2][1]=-8.4e3;  dS[3][4][2][1]=-22.4    # GT/CA 04
+    dH[4][1][1][4]=-7.2e3;  dS[4][1][1][4]=-21.3    # TA/AT 04
+    dH[4][2][1][3]=-8.2e3;  dS[4][2][1][3]=-22.2    # TC/AG adapted GA/CT
+    dH[4][3][1][2]=-8.5e3;  dS[4][3][1][2]=-22.7    # TG/AC adapted CA/GT
+    dH[4][4][1][1]=-7.6e3;  dS[4][4][1][1]=-21.3    # TT/AA adapted AA/TT
+    # A-C Mismatches (Values for pH 7.0)
+    dH[1][1][2][4]=7.6e3;   dS[1][1][2][4]=20.2     # AA/CT
+    dH[1][1][4][2]=2.3e3;   dS[1][1][4][2]=4.6      # AA/TC
+    dH[1][2][2][3]=-0.7e3;  dS[1][2][2][3]=-3.8     # AC/CG
+    dH[1][2][4][1]=5.3e3;   dS[1][2][4][1]=14.6     # AC/TA
+    dH[1][3][2][2]=0.6e3;   dS[1][3][2][2]=-0.6     # AG/CC
+    dH[1][4][2][1]=5.3e3;   dS[1][4][2][1]=14.6     # AT/CA
+    dH[2][1][1][4]=3.4e3;   dS[2][1][1][4]=8.0      # CA/AT
+    dH[2][1][3][2]=1.9e3;   dS[2][1][3][2]=3.7      # CA/GC
+    dH[2][2][1][3]=5.2e3;   dS[2][2][1][3]=14.2     # CC/AG
+    dH[2][2][3][1]=0.6e3;   dS[2][2][3][1]=-0.6     # CC/GA
+    dH[2][3][1][2]=1.9e3;   dS[2][3][1][2]=3.7      # CG/AC
+    dH[2][4][1][1]=2.3e3;   dS[2][4][1][1]=4.6      # CT/AA
+    dH[3][1][2][2]=5.2e3;   dS[3][1][2][2]=14.2     # GA/CC
+    dH[3][2][2][1]=-0.7e3;  dS[3][2][2][1]=-3.8     # GC/CA
+    dH[4][1][1][2]=3.4e3;   dS[4][1][1][2]=8.0      # TA/AC
+    dH[4][2][1][1]=7.6e3;   dS[4][2][1][1]=20.2     # TC/AA
+    # C-T Mismatches
+    dH[1][2][4][4]=0.7e3;   dS[1][2][4][4]=0.2      # AC/TT
+    dH[1][4][4][2]=-1.2e3;  dS[1][4][4][2]=-6.2     # AT/TC
+    dH[2][1][4][4]=1.0e3;   dS[2][1][4][4]=0.7      # CA/TT
+    dH[2][2][3][4]=-0.8e3;  dS[2][2][3][4]=-4.5     # CC/GT
+    dH[2][2][4][3]=5.2e3;   dS[2][2][4][3]=13.5     # CC/TG
+    dH[2][3][4][2]=-1.5e3;  dS[2][3][4][2]=-6.1     # CG/TC
+    dH[2][4][3][2]=-1.5e3;  dS[2][4][3][2]=-6.1     # CT/GC
+    dH[2][4][4][1]=-1.2e3;  dS[2][4][4][1]=-6.2     # CT/TA
+    dH[3][2][2][4]=2.3e3;   dS[3][2][2][4]=5.4      # GC/CT
+    dH[3][4][2][2]=5.2e3;   dS[3][4][2][2]=13.5     # GT/CC
+    dH[4][1][2][4]=1.2e3;   dS[4][1][2][4]=0.7      # TA/CT
+    dH[4][2][2][3]=2.3e3;   dS[4][2][2][3]=5.4      # TC/CG
+    dH[4][2][1][4]=1.2e3;   dS[4][2][1][4]=0.7      # TC/AT
+    dH[4][3][2][2]=-0.8e3;  dS[4][3][2][2]=-4.5     # TG/CC
+    dH[4][4][2][1]=0.7e3;   dS[4][4][2][1]=0.2      # TT/CA
+    dH[4][4][1][2]=1.0e3;   dS[4][4][1][2]=0.7      # TT/AC
+    # G-A Mismatches
+    dH[1][1][3][4]=3.0e3;   dS[1][1][3][4]=7.4      # AA/GT
+    dH[1][1][4][3]=-0.6e3;  dS[1][1][4][3]=-2.3     # AA/TG
+    dH[1][2][3][3]=0.5e3;   dS[1][2][3][3]=3.2      # AC/GG
+    dH[1][3][3][2]=-4.0e3;  dS[1][3][3][2]=-13.2    # AG/GC
+    dH[1][3][4][1]=-0.7e3;  dS[1][3][4][1]=-2.3     # AG/TA
+    dH[1][4][3][1]=-0.7e3;  dS[1][4][3][1]=-2.3     # AT/GA
+    dH[2][1][3][3]=-0.7e3;  dS[2][1][3][3]=-2.3     # CA/GG
+    dH[2][3][3][1]=-4.0e3;  dS[2][3][3][1]=-13.2    # CG/GA
+    dH[3][1][1][4]=0.7e3;   dS[3][1][1][4]=0.7      # GA/AT
+    dH[3][1][2][3]=-0.6e3;  dS[3][1][2][3]=-1.0     # GA/CG
+    dH[3][2][1][3]=-0.6e3;  dS[3][2][1][3]=-1.0     # GC/AG
+    dH[3][3][1][2]=-0.7e3;  dS[3][3][1][2]=-2.3     # GG/AC
+    dH[3][3][2][1]=0.5e3;   dS[3][3][2][1]=3.2      # GG/CA
+    dH[3][4][1][1]=-0.6e3;  dS[3][4][1][1]=-2.3     # GT/AA
+    dH[4][1][1][3]=0.7e3;   dS[4][1][1][3]=0.7      # TA/AG
+    dH[4][3][1][1]=3.0e3;   dS[4][3][1][1]=7.4      # TG/AA
+    # G-T Mismatches
+    dH[1][3][4][4]=1.0e3;   dS[1][3][4][4]=0.9      # AG/TT
+    dH[1][4][4][3]=-2.5e3;  dS[1][4][4][3]=-8.3     # AT/TG
+    dH[2][3][3][4]=-4.1e3;  dS[2][3][3][4]=-11.7    # CG/GT
+    dH[2][4][3][3]=-2.8e3;  dS[2][4][3][3]=-8.0     # CT/GG
+    dH[3][1][4][4]=-1.3e3;  dS[3][1][4][4]=-5.3     # GA/TT
+    dH[3][2][4][3]=-4.4e3;  dS[3][2][4][3]=-12.3    # GC/TG
+    dH[3][3][2][4]=3.3e3;   dS[3][3][2][4]=10.4     # GG/CT
+    dH[3][3][4][2]=-2.8e3;  dS[3][3][4][2]=-8.0     # GG/TC
+#    dH[3][3][4][4]=5.8e3;   dS[3][3][4][4]=16.3     # GG/TT
+    dH[3][4][2][3]=-4.4e3;  dS[3][4][2][3]=-12.3    # GT/CG
+    dH[3][4][4][1]=-2.5e3;  dS[3][4][4][1]=-8.3     # GT/TA
+#    dH[3][4][4][3]=4.1e3;   dS[3][4][4][3]=9.5      # GT/TG
+    dH[4][1][3][4]=-0.1e3;  dS[4][1][3][4]=-1.7     # TA/GT
+    dH[4][2][3][3]=3.3e3;   dS[4][2][3][3]=10.4     # TC/GG
+    dH[4][3][1][4]=-0.1e3;  dS[4][3][1][4]=-1.7     # TG/AT
+    dH[4][3][3][2]=-4.1e3;  dS[4][3][3][2]=-11.7    # TG/GC
+#    dH[4][3][3][4]=-1.4e3;  dS[4][3][3][4]=-6.2     # TG/GT
+    dH[4][4][1][3]=-1.3e3;  dS[4][4][1][3]=-5.3     # TT/AG
+    dH[4][4][3][1]=1.0e3;   dS[4][4][3][1]=0.9      # TT/GA
+#    dH[4][4][3][3]=5.8e3;   dS[4][4][3][3]=16.3     # TT/GG
+    # A-A Mismatches
+    dH[1][1][1][4]=4.7e3;   dS[1][1][1][4]=12.9     # AA/AT
+    dH[1][1][4][1]=1.2e3;   dS[1][1][4][1]=1.7      # AA/TA
+    dH[1][2][1][3]=-2.9e3;  dS[1][2][1][3]=-9.8     # AC/AG
+    dH[1][3][1][2]=-0.9e3;  dS[1][3][1][2]=-4.2     # AG/AC
+    dH[1][4][1][1]=1.2e3;   dS[1][4][1][1]=1.7      # AT/AA
+    dH[2][1][3][1]=-0.9e3;  dS[2][1][3][1]=-4.2     # CA/GA
+    dH[3][1][2][1]=-2.9e3;  dS[3][1][2][1]=-9.8     # GA/CA
+    dH[4][1][1][1]=4.7e3;   dS[4][1][1][1]=12.9     # TA/AA
+    # C-C Mismatches
+    dH[1][2][4][2]=0.0e3;   dS[1][2][4][2]=-4.4     # AC/TC
+    dH[2][1][2][4]=6.1e3;   dS[2][1][2][4]=16.4     # CA/CT
+    dH[2][2][2][3]=3.6e3;   dS[2][2][2][3]=8.9      # CC/CG
+    dH[2][2][3][2]=-1.5e3;  dS[2][2][3][2]=-7.2     # CC/GC
+    dH[2][3][2][2]=-1.5e3;  dS[2][3][2][2]=-7.2     # CG/CC
+    dH[2][4][2][1]=0.0e3;   dS[2][4][2][1]=-4.4     # CT/CA
+    dH[3][2][2][2]=3.6e3;   dS[3][2][2][2]=8.9      # GC/CC
+    dH[4][2][1][2]=6.1e3;   dS[4][2][1][2]=16.4     # TC/AC
+    # G-G Mismatches
+    dH[1][3][4][3]=-3.1e3;  dS[1][3][4][3]=-9.5     # AG/TG
+    dH[2][3][3][3]=-4.9e3;  dS[2][3][3][3]=-15.3    # CG/GG
+    dH[3][1][3][4]=1.6e3;   dS[3][1][3][4]=3.6      # GA/GT
+    dH[3][2][3][3]=-6.0e3;  dS[3][2][3][3]=-15.8    # GC/GG
+    dH[3][3][2][3]=-6.0e3;  dS[3][3][2][3]=-15.8    # GG/CG
+    dH[3][3][3][2]=-4.9e3;  dS[3][3][3][2]=-15.3    # GG/GC
+    dH[3][4][3][1]=-3.1e3;  dS[3][4][3][1]=-9.5     # GT/GA
+    dH[4][3][1][3]=1.6e3;   dS[4][3][1][3]=3.6      # TG/AG
+    # T-T Mismatches
+    dH[1][4][4][4]=-2.7e3;  dS[1][4][4][4]=-10.8    # AT/TT
+    dH[2][4][3][4]=-5.0e3;  dS[2][4][3][4]=-15.8    # CT/GT
+    dH[3][4][2][4]=-2.2e3;  dS[3][4][2][4]=-8.4     # GT/CT
+    dH[4][1][4][4]=0.2e3;   dS[4][1][4][4]=-1.5     # TA/TT
+    dH[4][2][4][3]=-2.2e3;  dS[4][2][4][3]=-8.4     # TC/TG
+    dH[4][3][4][2]=-5.0e3;  dS[4][3][4][2]=-15.8    # TG/TC
+    dH[4][4][1][4]=0.2e3;   dS[4][4][1][4]=-1.5     # TT/AT
+    dH[4][4][4][1]=-2.7e3;  dS[4][4][4][1]=-10.8    # TT/TA
+    # Dangling Eds
+    dH[5][1][1][4]=-0.7e3;  dS[5][1][1][4]=-0.8     # $A/AT
+    dH[5][1][2][4]=4.4e3;   dS[5][1][2][4]=14.9     # $A/CT
+    dH[5][1][3][4]=-1.6e3;  dS[5][1][3][4]=-3.6     # $A/GT
+    dH[5][1][4][4]=2.9e3;   dS[5][1][4][4]=10.4     # $A/TT
+    dH[5][2][1][3]=-2.1e3;  dS[5][2][1][3]=-3.9     # $C/AG
+    dH[5][2][2][3]=-0.2e3;  dS[5][2][2][3]=-0.1     # $C/CG
+    dH[5][2][3][3]=-3.9e3;  dS[5][2][3][3]=-11.2    # $C/GG
+    dH[5][2][4][3]=-4.4e3;  dS[5][2][4][3]=-13.1    # $C/TG
+    dH[5][3][1][2]=-5.9e3;  dS[5][3][1][2]=-16.5    # $G/AC
+    dH[5][3][2][2]=-2.6e3;  dS[5][3][2][2]=-7.4     # $G/CC
+    dH[5][3][3][2]=-3.2e3;  dS[5][3][3][2]=-10.4    # $G/GC
+    dH[5][3][4][2]=-5.2e3;  dS[5][3][4][2]=-15.0    # $G/TC
+    dH[5][4][1][1]=-0.5e3;  dS[5][4][1][1]=-1.1     # $T/AA
+    dH[5][4][2][1]=4.7e3;   dS[5][4][2][1]=14.2     # $T/CA
+    dH[5][4][3][1]=-4.1e3;  dS[5][4][3][1]=-13.1    # $T/GA
+    dH[5][4][4][1]=-3.8e3;  dS[5][4][4][1]=-12.6    # $T/TA
+    dH[1][5][4][1]=-2.9e3;  dS[1][5][4][1]=-7.6     # A$/TA
+    dH[1][5][4][2]=-4.1e3;  dS[1][5][4][2]=-13.0    # A$/TC
+    dH[1][5][4][3]=-4.2e3;  dS[1][5][4][3]=-15.0    # A$/TG
+    dH[1][5][4][4]=-0.2e3;  dS[1][5][4][4]=-0.5     # A$/TT
+    dH[1][1][5][4]=0.2e3;   dS[1][1][5][4]=2.3      # AA/$T
+    dH[1][1][4][5]=-0.5e3;  dS[1][1][4][5]=-1.1     # AA/T$
+    dH[1][2][5][3]=-6.3e3;  dS[1][2][5][3]=-17.1    # AC/$G
+    dH[1][2][4][5]=4.7e3;   dS[1][2][4][5]=14.2     # AC/T$
+    dH[1][3][5][2]=-3.7e3;  dS[1][3][5][2]=-10.0    # AG/$C
+    dH[1][3][4][5]=-4.1e3;  dS[1][3][4][5]=-13.1    # AG/T$
+    dH[1][4][5][1]=-2.9e3;  dS[1][4][5][1]=-7.6     # AT/$A
+    dH[1][4][4][5]=-3.8e3;  dS[1][4][4][5]=-12.6    # AT/T$
+    dH[2][5][3][1]=-3.7e3;  dS[2][5][3][1]=-10.0    # C$/GA
+    dH[2][5][3][2]=-4.0e3;  dS[2][5][3][2]=-11.9    # C$/GC
+    dH[2][5][3][3]=-3.9e3;  dS[2][5][3][3]=-10.9    # C$/GG
+    dH[2][5][3][4]=-4.9e3;  dS[2][5][3][4]=-13.8    # C$/GT
+    dH[2][1][5][4]=0.6e3;   dS[2][1][5][4]=3.3      # CA/$T
+    dH[2][1][3][5]=-5.9e3;  dS[2][1][3][5]=-16.5    # CA/G$
+    dH[2][2][5][3]=-4.4e3;  dS[2][2][5][3]=-12.6    # CC/$G
+    dH[2][2][3][5]=-2.6e3;  dS[2][2][3][5]=-7.4     # CC/G$
+    dH[2][3][5][2]=-4.0e3;  dS[2][3][5][2]=-11.9    # CG/$C
+    dH[2][3][3][5]=-3.2e3;  dS[2][3][3][5]=-10.4    # CG/G$
+    dH[2][4][5][1]=-4.1e3;  dS[2][4][5][1]=-13.0    # CT/$A
+    dH[2][4][3][5]=-5.2e3;  dS[2][4][3][5]=-15.0    # CT/G$
+    dH[3][5][2][1]=-6.3e3;  dS[3][5][2][1]=-17.1    # G$/CA
+    dH[3][5][2][2]=-4.4e3;  dS[3][5][2][2]=-12.6    # G$/CC
+    dH[3][5][2][3]=-5.1e3;  dS[3][5][2][3]=-14.0    # G$/CG
+    dH[3][5][2][4]=-4.0e3;  dS[3][5][2][4]=-10.9    # G$/CT
+    dH[3][1][5][4]=-1.1e3;  dS[3][1][5][4]=-1.6     # GA/$T
+    dH[3][1][2][5]=-2.1e3;  dS[3][1][2][5]=-3.9     # GA/C$
+    dH[3][2][5][3]=-5.1e3;  dS[3][2][5][3]=-14.0    # GC/$G
+    dH[3][2][2][5]=-0.2e3;  dS[3][2][2][5]=-0.1     # GC/C$
+    dH[3][3][5][2]=-3.9e3;  dS[3][3][5][2]=-10.9    # GG/$C
+    dH[3][3][2][5]=-3.9e3;  dS[3][3][2][5]=-11.2    # GG/C$
+    dH[3][4][5][1]=-4.2e3;  dS[3][4][5][1]=-15.0    # GT/$A
+    dH[3][4][2][5]=-4.4e3;  dS[3][4][2][5]=-13.1    # GT/C$
+    dH[4][5][1][1]=0.2e3;   dS[4][5][1][1]=2.3      # T$/AA
+    dH[4][5][1][2]=0.6e3;   dS[4][5][1][2]=3.3      # T$/AC
+    dH[4][5][1][3]=-1.1e3;  dS[4][5][1][3]=-1.6     # T$/AG
+    dH[4][5][1][4]=-6.9e3;  dS[4][5][1][4]=-20.0    # T$/AT
+    dH[4][1][5][4]=-6.9e3;  dS[4][1][5][4]=-20.0    # TA/$T
+    dH[4][1][1][5]=-0.7e3;  dS[4][1][1][5]=-0.7     # TA/A$
+    dH[4][2][5][3]=-4.0e3;  dS[4][2][5][3]=-10.9    # TC/$G
+    dH[4][2][1][5]=4.4e3;   dS[4][2][1][5]=14.9     # TC/A$
+    dH[4][3][5][2]=-4.9e3;  dS[4][3][5][2]=-13.8    # TG/$C
+    dH[4][3][1][5]=-1.6e3;  dS[4][3][1][5]=-3.6     # TG/A$
+    dH[4][4][5][1]=-0.2e3;  dS[4][4][5][1]=-0.5     # TT/$A
+    dH[4][4][1][5]=2.9e3;   dS[4][4][1][5]=10.4     # TT/A$
+    nparm['dH']=dH
+    nparm['dS']=dS
+    return nparm
+def seqencoder(seq):
+    return [bpencoder[x] for x in seq]
+def getInitialEntropy(nparm=defaultParm):
+    return     -5.9+nparm['rlogc']
+def getEnthalpy(x0, x1, y0, y1,nparm=defaultParm):
+    return nparm['dH'][x0][x1][y0][y1]
+def GetEntropy(x0, x1, y0, y1,nparm=defaultParm):
+    nx0=x0
+    nx1=x1
+    ny0=y0
+    ny1=y1
+    dH=nparm['dH']
+    dS=nparm['dS']
+    answer = dS[nx0][nx1][ny0][ny1]
+    if (nparm['saltMethod'] == SALT_METHOD_SANTALUCIA):
+        if(nx0!=5 and 1<= nx1 and nx1<=4): 
+            answer += 0.5*nparm['kfac']
+        if(ny1!=5 and 1<= ny0 and ny0<=4): 
+            answer += 0.5*nparm['kfac']
+    if (nparm['saltMethod'] == SALT_METHOD_OWCZARZY):
+        logk = log(nparm['kplus']);
+        answer += dH[nx0][nx1][ny0][ny1]*((4.29 * nparm['gcContent']-3.95)* 1e-5 * logk + 0.0000094*logk**2);
+    return answer;
+def CalcTM(entropy,enthalpy):
+    tm = 0
+    if (enthalpy>=forbidden_enthalpy) :
+        return 0;
+    if (entropy<0) : 
+        tm = enthalpy/entropy
+        if (tm<0):
+            return 0;
+    return tm;
+def countGCContent(seq): 
+    count = 0;
+    for k in seq :
+        if k in 'cgGC':
+            count+=1;
+    return count;
+#def cleanSeq (inseq,outseq,length):
+#    seqlen = len(inseq)
+#    if (len != 0)
+#        seqlen = length;
+#    j=0
+#    for i in xrange(seqlen):
+#    {
+#        switch (inseq[i])
+#        {
+#        case 'a':
+#        case '\0':
+#        case 'A':
+#            outseq[j++] = 'A'; break;
+#        case 'c':
+#        case '\1':
+#        case 'C':
+#            outseq[j++] = 'C'; break;
+#        case 'g':
+#        case '\2':
+#        case 'G':
+#            outseq[j++] = 'G'; break;
+#        case 't':
+#        case '\3':
+#        case 'T':
+#            outseq[j++] = 'T'; break;
+#        }
+#    }
+#    outseq[j] = '\0';
+def calcSelfTM(seq,nparm=defaultParm):
+    dH=nparm['dH']
+    dS=nparm['dS']
+    length=len(seq)
+    thedH = 0;
+    thedS = -5.9+nparm['rlogc']
+    for i in xrange(1,length):
+        c1 = rvencoder[seq[i-1]];
+        c2 = rvencoder[seq[i]];
+        c3 = bpencoder[seq[i-1]];
+        c4 = bpencoder[seq[i]];
+        thedH += dH[c3][c4][c1][c2];
+        thedS += GetEntropy(c3, c4, c1, c2, nparm)
+    mtemp = CalcTM(thedS,thedH);
+#    print thedH,thedS,nparm['rlogc']
+    return mtemp-273.15;
+def calcTMTwoSeq(seq1,seq2,nparm=defaultParm):
+    thedH = 0;
+    thedS = -5.9+nparm['rlogc']
+    dH=nparm['dH']
+    dS=nparm['dS']
+    length=len(seq1)
+    for i in xrange(1,length):
+        c1 = rvencoder[seq2[i-1]]
+        c2 = rvencoder[seq2[i]]
+        c3 = bpencoder[seq1[i-1]]
+        c4 = bpencoder[seq1[i]]
+        thedH += dH[c3][c4][c1][c2]
+        thedS += GetEntropy(c3, c4, c1, c2, nparm)
+    mtemp = CalcTM(thedS,thedH);
+#    print thedH,thedS,nparm['rlogc']
+    return mtemp-273.15;
diff --git a/src/obitools/tools/__init__.py b/src/obitools/tools/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/obitools/tools/_solexapairend.pyx b/src/obitools/tools/_solexapairend.pyx
new file mode 100644
index 0000000..c145df9
--- /dev/null
+++ b/src/obitools/tools/_solexapairend.pyx
@@ -0,0 +1,187 @@
+# cython: profile=False
+from cpython cimport array
+from obitools.tools.solexapairend import iterOnAligment
+from obitools import NucSequence
+cdef class IterOnConsensus:
+    cdef object _ali
+    cdef int __seqASingle
+    cdef int __seqBSingle
+    cdef int __seqABMatch
+    cdef int __seqAMismatch
+    cdef int __seqBMismatch
+    cdef int __seqAInsertion
+    cdef int __seqBInsertion
+    cdef int __seqADeletion
+    cdef int __seqBDeletion
+    cdef object __ioa
+    cdef bint __firstSeqB
+    def __cinit__(self,ali):
+        self._ali=ali
+        self.__seqASingle=0
+        self.__seqBSingle=0
+        self.__seqABMatch=0
+        self.__seqAMismatch=0
+        self.__seqBMismatch=0
+        self.__seqAInsertion=0
+        self.__seqBInsertion=0
+        self.__seqADeletion=0
+        self.__seqBDeletion=0
+        self.__ioa = iterOnAligment(self._ali)
+        self.__firstSeqB=False
+    def get_seqASingle(self):
+        return self.__seqASingle
+    def get_seqBSingle(self):
+        return self.__seqBSingle
+    def get_seqABMatch(self):
+        return self.__seqABMatch
+    def get_seqAMismatch(self):
+        return self.__seqAMismatch
+    def get_seqBMismatch(self):
+        return self.__seqBMismatch
+    def get_seqAInsertion(self):
+        return self.__seqAInsertion
+    def get_seqBInsertion(self):
+        return self.__seqBInsertion
+    def get_seqADeletion(self):
+        return self.__seqADeletion
+    def get_seqBDeletion(self):
+        return self.__seqBDeletion
+    def __next__(self):
+        cdef bytes snuc0
+        cdef bytes snuc1
+        cdef char* nuc0
+        cdef char* nuc1
+        cdef char* dash='-'
+        cdef double score0
+        cdef double score1
+        cdef double h0
+        cdef double h1
+        while(1):
+            snuc0,score0,snuc1,score1 = self.__ioa.next()
+            nuc0=snuc0
+            nuc1=snuc1
+            if nuc0[0]==nuc1[0]:
+                if nuc1[0]!=dash[0]:
+                    self.__firstSeqB=True
+                    self.__seqABMatch+=1
+                    self.__seqBSingle=0
+                    return (nuc0,score0*score1)
+            else:
+                h0 = score0 * (1-score1/3)
+                h1 = score1 * (1-score0/3)
+                if h0 < h1:
+                    if nuc0[0]!=dash[0]:
+                        self.__seqBSingle=0
+                        if nuc1[0]==dash[0]:
+                            if self.__firstSeqB:
+                                self.__seqAInsertion+=1
+                            else:
+                                self.__seqASingle+=1
+                        else:
+                            self.__firstSeqB=True
+                            self.__seqAMismatch+=1
+                        return (nuc0,h0)
+                    else:
+                        self.__seqADeletion+=1
+                else:
+                    if nuc1[0]!=dash[0]:
+                        self.__firstSeqB=True
+                        if nuc0[0]==dash[0]:
+                            self.__seqBInsertion+=1
+                            self.__seqBSingle+=1
+                        else:
+                            self.__seqBMismatch+=1
+                            self.__seqBSingle=0
+                        return (nuc1,h1)
+                    else:
+                        self.__seqBSingle=0
+                        self.__seqBDeletion+=1
+    def __iter__(self):
+        return self
+    seqASingle = property(get_seqASingle, None, None, "direct's docstring")
+    seqBSingle = property(get_seqBSingle, None, None, "reverse's docstring")
+    seqABMatch = property(get_seqABMatch, None, None, "idem's docstring")
+    seqAMismatch = property(get_seqAMismatch, None, None, "mismatchdirect's docstring")
+    seqBMismatch = property(get_seqBMismatch, None, None, "mismatchreverse's docstring")
+    seqAInsertion = property(get_seqAInsertion, None, None, "insertdirect's docstring")
+    seqBInsertion = property(get_seqBInsertion, None, None, "insertreverse's docstring")
+    seqADeletion = property(get_seqADeletion, None, None, "deletedirect's docstring")
+    seqBDeletion = property(get_seqBDeletion, None, None, "deletereverse's docstring")
+def buildConsensus(ali):
+    cdef double quality[1000]
+    cdef char   aseq[1000]
+    cdef int i=0
+    cdef int j=0
+    cdef char* cnuc
+    cdef bytes nuc
+    cdef double score
+    cdef bytes sseq
+    if len(ali[0])>999:
+        raise AssertionError,"To long alignemnt"
+    ic=IterOnConsensus(ali)
+    for nuc,score in ic:
+        cnuc=nuc
+        quality[i]=score
+        aseq[i]=cnuc[0]
+        i+=1
+    aseq[i]=0
+    sseq=aseq
+    seq=NucSequence(ali[0].wrapped.id+'_CONS',sseq,**ali[0].wrapped.getTags())
+    seq.quality=array.array('d',[quality[j] for j in range(i)])
+    if hasattr(ali, "direction"):
+        seq['direction']=ali.direction
+    if hasattr(ali, "counter"):
+        seq['alignement_id']=ali.counter
+    seq['seq_a_single']=ic.seqASingle
+    seq['seq_b_single']=ic.seqBSingle
+    seq['seq_ab_match']=ic.seqABMatch
+    seq['seq_a_mismatch']=ic.seqAMismatch
+    seq['seq_b_mismatch']=ic.seqBMismatch
+    seq['seq_a_insertion']=ic.seqAInsertion
+    seq['seq_b_insertion']=ic.seqBInsertion-ic.seqBSingle
+    seq['seq_a_deletion']=ic.seqADeletion
+    seq['seq_b_deletion']=ic.seqBDeletion
+    seq['score']=ali.score
+    seq['ali_length']=len(seq)-ic.seqASingle-ic.seqBSingle
+    if seq['ali_length']>0:
+        seq['score_norm']=float(ali.score)/seq['ali_length']
+    seq['mode']='alignment'
+    return seq
diff --git a/src/obitools/tools/solexapairend.py b/src/obitools/tools/solexapairend.py
new file mode 100644
index 0000000..609f533
--- /dev/null
+++ b/src/obitools/tools/solexapairend.py
@@ -0,0 +1,51 @@
+Created on 17 mai 2010
+ at author: coissac
+from obitools.alignment import columnIterator
+def iterOnAligment(ali):
+    pos0=0
+    pos1=len(ali[1].wrapped)-1
+    begin0=False
+    end0=False
+    begin1=False
+    end1=False
+    for nuc0,nuc1 in columnIterator(ali):
+        if nuc0=='-':
+            if begin0:
+                if not end0:
+                    score0 = ( ali[0].wrapped.quality[pos0-1]
+                              +ali[0].wrapped.quality[pos0]
+                             )/2
+                else:
+                    score0 = 1.
+            else:
+                score0 = 0.
+        else:
+            begin0=True
+            score0 = ali[0].wrapped.quality[pos0]
+            pos0+=1
+            end0= pos0==len(ali[0].wrapped)
+        if nuc1=='-':
+            if begin1:
+                if not end1:
+                    score1 = ( ali[1].wrapped.wrapped.quality[pos1]
+                              +ali[1].wrapped.wrapped.quality[pos1+1]
+                             )/2
+                else:
+                    score1 = 0.
+            else:
+                score1 = 1.
+        else:
+            begin1=True
+            score1 = ali[1].wrapped.wrapped.quality[pos1]
+            pos1-=1
+            end1=pos1<0
+        result = (nuc0,score0,nuc1,score1)
+        yield result    
diff --git a/src/obitools/tree/__init__.py b/src/obitools/tree/__init__.py
new file mode 100644
index 0000000..facb5ff
--- /dev/null
+++ b/src/obitools/tree/__init__.py
@@ -0,0 +1,116 @@
+import re
+class Tree(set):
+    def registerNode(self,node):
+        assert isinstance(node, TreeNode)
+        self.add(node)
+    def childNodeIterator(self,node):
+        assert isinstance(node, TreeNode)
+        return (x for x in self if x._parent==node)
+    def subTreeSize(self,node):
+        n=1
+        for subnode in self.childNodeIterator(node):
+            n+=self.subTreeSize(subnode)
+        return n
+    def getRoot(self):
+        roots = [x for x in self if x._parent is None]
+        assert len(roots)==1,'Tree cannot have several root node'
+        return roots[0]
+    def ancestorNodeIterator(self,node):
+        assert isinstance(node, TreeNode)
+        while node._parent is not None:
+            yield node
+            node=node._parent
+        yield node
+    def terminalNodeIterator(self):
+        return (x for x in self if x._isterminal)
+    def commonAncestor(self,node1,node2):
+        anc1 = set(x for x in self.ancestorNodeIterator(node1))
+        rep  = [x for x in self.ancestorNodeIterator(node2)
+                  if x in anc1]
+        assert len(rep)>=1
+        return rep[0]
+    def getDist(self,node1,node2):
+        ca = self.commonAncestor(node1, node2)
+        dist = 0
+        while node1 != ca:
+            dist+=node1._dist
+            node1=node1._parent
+        while node2 != ca:
+            dist+=node2._dist
+            node2=node2._parent
+        return dist
+    def farestNodes(self):
+        dmax=0
+        n1=None
+        n2=None
+        for node1 in self.terminalNodeIterator():
+            for node2 in self.terminalNodeIterator():
+                d = self.getDist(node1, node2)
+                if d > dmax:
+                    dmax = d
+                    n1=node1
+                    n2=node2
+        return node1,node2,dmax
+    def setRoot(self,node,dist):
+        assert node in self 
+        assert node._parent and node._dist > dist
+        newroot = TreeNode(self)
+        parent  = node._parent
+        node._parent = newroot
+        compdist = node._dist - dist
+        node._dist=dist
+        node = parent
+        while node:
+            parent  = node._parent
+            if parent:
+                dist    = node._dist
+            node._parent = newroot
+            node._dist = compdist
+            newroot = node
+            node    = parent
+            if node:
+                compdist=dist
+        for child in self.childNodeIterator(newroot):
+            child._parent = newroot._parent
+            child._dist  += newroot._dist
+        self.remove(newroot)
+class TreeNode(object):
+    def __init__(self,tree,name=None,dist=None,bootstrap=None,**info):
+        self._parent=None
+        self._name=name
+        self._dist=dist
+        self._bootstrap=bootstrap
+        self._info=info
+        tree.registerNode(self)
+        self._isterminal=True
+    def linkToParent(self,parent):
+        assert isinstance(parent, TreeNode) or parent is None
+        self._parent=parent
+        if parent is not None:
+            parent._isterminal=False
diff --git a/src/obitools/tree/dot.py b/src/obitools/tree/dot.py
new file mode 100644
index 0000000..a21c4a1
--- /dev/null
+++ b/src/obitools/tree/dot.py
@@ -0,0 +1,18 @@
+from obitools.utils import universalOpen
+from obitools.tree import Tree,TreeNode
+def nodeWriter(tree,node,nodes):
+    data=[]
+    if node._parent:
+       data.append('%d -> %d ' % (nodes[node],nodes[node._parent]))
+    return "\n".join(data)
+def treeWriter(tree):
+    nodes=dict(map(None,tree,xrange(len(tree))))
+    code=[]
+    for node in tree:
+        code.append(nodeWriter(tree,node,nodes)) 
+    code = "\n".join(code)
+    return 'digraph tree { node [shape=point]\n%s\n};' % code
\ No newline at end of file
diff --git a/src/obitools/tree/layout.py b/src/obitools/tree/layout.py
new file mode 100644
index 0000000..a39ba77
--- /dev/null
+++ b/src/obitools/tree/layout.py
@@ -0,0 +1,103 @@
+class NodeLayout(dict):
+    '''
+    Layout data associated to a tree node.
+    '''
+    pass
+class TreeLayout(dict):
+    '''
+    Description of a phylogenetic tree layout
+    @see: 
+    '''
+    def addNode(self,node):
+        self[node]=NodeLayout()
+    def setAttribute(self,node,key,value):
+        self[node][key]=value
+    def hasAttribute(self,node,key):
+        return key in self[node]
+    def getAttribute(self,node,key,default=None):
+        return self[node].get(key,default)
+    def setNodesColor(self,color,predicate=True):
+        '''
+        @param color:
+        @type color:
+        @param predicat:
+        @type predicat:
+        '''
+        for node in self:
+            if callable(predicat):
+                change = predicat(node)
+            else:
+                change = predicat
+            if change:
+                if callable(color):
+                    c = color(node)
+                else:
+                    c = color
+                self.setAttribute(node, 'color', color)
+    def setCircular(self,iscircularpredicat):
+        for node in self:
+            if callable(iscircularpredicat):
+                change = iscircularpredicat(node)
+            else:
+                change = iscircularpredicat
+            if change:
+                self.setAttribute(node, 'shape', 'circle')
+            else:
+                self.setAttribute(node, 'shape', 'square')
+    def setRadius(self,radius,predicate=True):
+        for node in self:
+            if callable(predicat):
+                change = predicat(node)
+            else:
+                change = predicat
+            if change:
+                if callable(radius):
+                    r = radius(node)
+                else:
+                    r = radius
+                self.setAttribute(node, 'radius', r)
+def predicatGeneratorIsInfoEqual(info,value):
+    def isInfoEqual(node):
+        data = node._info
+        return data is not None and info in data and data[info]==value
+    return isInfoEqual
+def isTerminalNode(node):
+    return node._isterminal
+def constantColorGenerator(color):
+    def colorMaker(node):
+        return color
+    return colorMaker
+def constantColorGenerator(color):
+    def colorMaker(node):
+        return color
+    return colorMaker
+def notPredicatGenerator(predicate):
+    def notpred(x):
+        return not predicat(x)
+    return notpred
\ No newline at end of file
diff --git a/src/obitools/tree/newick.py b/src/obitools/tree/newick.py
new file mode 100644
index 0000000..c69d0d3
--- /dev/null
+++ b/src/obitools/tree/newick.py
@@ -0,0 +1,117 @@
+import re
+import sys
+from obitools.utils import universalOpen
+from obitools.tree import Tree,TreeNode
+def subNodeIterator(data):
+    level=0
+    start = 1
+    if data[0]=='(':
+        for i in xrange(1,len(data)):
+            c=data[i]
+            if c=='(':
+                level+=1
+            elif c==')':
+                level-=1
+            if c==',' and not level:
+                yield data[start:i]
+                start = i+1
+        yield data[start:i]
+    else:
+        yield data
+_nodeParser=re.compile('\s*(?P<subnodes>\(.*\))?(?P<name>[^ :]+)? *(?P<bootstrap>[0-9.]+)?(:(?P<distance>-?[0-9.]+))?')
+def nodeParser(data):
+    parsedNode = _nodeParser.match(data).groupdict(0)
+    if not parsedNode['name']:
+        parsedNode['name']=None
+    if not parsedNode['bootstrap']:
+        parsedNode['bootstrap']=None
+    else:
+        parsedNode['bootstrap']=float(parsedNode['bootstrap'])
+    if not parsedNode['distance']:
+        parsedNode['distance']=None
+    else:
+        parsedNode['distance']=float(parsedNode['distance'])
+    if not parsedNode['subnodes']:
+        parsedNode['subnodes']=None
+    return parsedNode
+def treeParser(data,tree=None,parent=None):
+    if tree is None:
+        tree = Tree()
+        data = _cleanTreeData.sub(' ',data).strip()
+    parsedNode = nodeParser(data)
+    node = TreeNode(tree,
+                    parsedNode['name'],
+                    parsedNode['distance'],
+                    parsedNode['bootstrap'])
+    node.linkToParent(parent)
+    if parsedNode['subnodes']:
+        for subnode in subNodeIterator(parsedNode['subnodes']):
+            treeParser(subnode,tree,node)
+    return tree
+def treeIterator(file):
+    file = universalOpen(file)
+    data = file.read()
+    comment = _treecomment.findall(data)
+    data=_treecomment.sub('',data).strip()
+    if comment:
+        comment=comment[0]
+    else:
+        comment=None
+    for tree in data.split(';'):
+        t = treeParser(tree)
+        if comment:
+            t.comment=comment
+        yield t
+def nodeWriter(tree,node,deep=0):
+    name = node._name
+    if name is None: 
+        name=''
+    distance=node._dist
+    if distance is None:
+        distance=''
+    else:
+        distance = ':%6.5f' % distance
+    bootstrap=node._bootstrap
+    if bootstrap is None:
+        bootstrap=''
+    else:
+        bootstrap=' %d' % int(bootstrap)
+    nodeseparator = ',\n' + ' ' * (deep+1)     
+    subnodes = nodeseparator.join([nodeWriter(tree, x, deep+1) 
+                        for x in tree.childNodeIterator(node)])
+    if subnodes:
+        subnodes='(\n' + ' ' * (deep+1) + subnodes + '\n' + ' ' * deep + ')'
+    return '%s%s%s%s' % (subnodes,name,bootstrap,distance)
+def treeWriter(tree,startnode=None):
+    if startnode is not None:
+        root=startnode
+    else:
+        root = tree.getRoot()
+    return nodeWriter(tree,root)+';'
diff --git a/src/obitools/tree/svg.py b/src/obitools/tree/svg.py
new file mode 100644
index 0000000..ff51a8c
--- /dev/null
+++ b/src/obitools/tree/svg.py
@@ -0,0 +1,70 @@
+import math
+from obitools.svg import Scene,Circle,Line,Rectangle,Text
+from obitools.tree import Tree
+def displayTreeLayout(layout,width=400,height=400,radius=3,scale=1.0):
+    '''
+    Convert a tree layout object in an svg file.
+    @param layout: the tree layout object
+    @type layout: obitools.tree.layout.TreeLayout
+    @param width: svg document width
+    @type width:  int
+    @param height: svg document height
+    @type height: int
+    @param radius: default radius of node in svg unit (default 3)
+    @type radius: int 
+    @param scale: scale factor applied to the svg coordinates (default 1.0)
+    @type scale: float
+    @return: str containing svg code
+    '''
+    xmin = min(layout.getAttribute(n,'x') for n in layout)
+    xmax = max(layout.getAttribute(n,'x') for n in layout)
+    ymin = min(layout.getAttribute(n,'y') for n in layout)
+    ymax = max(layout.getAttribute(n,'y') for n in layout)
+    dx = xmax - xmin
+    dy = ymax - ymin
+    xscale = width * 0.95 / dx * scale
+    yscale = height * 0.95 / dy * scale
+    def X(x):
+        return (x - xmin ) * xscale + width * 0.025
+    def Y(y):
+        return (y - ymin ) * yscale + height * 0.025
+    scene = Scene('unrooted', height, width)
+    for n in layout:
+        if n._parent is not None:
+            parent = n._parent
+            xf = layout.getAttribute(n,'x')
+            yf = layout.getAttribute(n,'y')
+            xp = layout.getAttribute(parent,'x')
+            yp = layout.getAttribute(parent,'y')
+            scene.add(Line((X(xf),Y(yf)),(X(xp),Y(yp))))
+    for n in layout:
+        xf = layout.getAttribute(n,'x')
+        yf = layout.getAttribute(n,'y')
+        cf = layout.getAttribute(n,'color')
+        sf = layout.getAttribute(n,'shape')
+        if layout.hasAttribute(n,'radius'):
+            rf=layout.getAttribute(n,'radius')
+        else:
+            rf=radius
+        if sf=='circle':
+            scene.add(Circle((X(xf),Y(yf)),rf,cf))
+        else:
+            scene.add(Rectangle((X(xf)-rf,Y(yf)-rf),2*rf,2*rf,cf))
+    return ''.join(scene.strarray())
\ No newline at end of file
diff --git a/src/obitools/tree/unrooted.py b/src/obitools/tree/unrooted.py
new file mode 100644
index 0000000..9a9f3e6
--- /dev/null
+++ b/src/obitools/tree/unrooted.py
@@ -0,0 +1,33 @@
+from obitools.tree.layout import TreeLayout
+import math
+def subtreeLayout(tree,node,layout,start,end,x,y,default):
+    nbotu = tree.subTreeSize(node)
+    delta = (end-start)/(nbotu+1)
+    layout.addNode(node)
+    layout.setAttribute(node,'x',x)
+    layout.setAttribute(node,'y',y)
+    layout.setAttribute(node,'color',(255,0,0))
+    layout.setAttribute(node,'shape','circle')
+    for subnode in tree.childNodeIterator(node):
+        snbotu = tree.subTreeSize(subnode)
+        end = start + snbotu * delta
+        med = start + snbotu * delta /2
+        r = subnode._dist
+        if r is None or r <=0:
+            r=default
+        subx=math.cos(med) * r + x
+        suby=math.sin(med) * r + y
+        subtreeLayout(tree, subnode, layout, start, end, subx, suby, default)
+        start=end
+    return layout
+def treeLayout(tree):
+    layout = TreeLayout()
+    root   = tree.getRoot()
+    dmin = min(n._dist for n in tree if n._dist is not None and n._dist > 0)
+    return subtreeLayout(tree,root,layout,0,2*math.pi,0,0,dmin / 100)
\ No newline at end of file
diff --git a/src/obitools/unit/__init__.py b/src/obitools/unit/__init__.py
new file mode 100644
index 0000000..d02c812
--- /dev/null
+++ b/src/obitools/unit/__init__.py
@@ -0,0 +1,8 @@
+import unittest
+from obitools import tests_group as obitools_tests_group
diff --git a/src/obitools/unit/obitools/__init__.py b/src/obitools/unit/obitools/__init__.py
new file mode 100644
index 0000000..3c9fc13
--- /dev/null
+++ b/src/obitools/unit/obitools/__init__.py
@@ -0,0 +1,91 @@
+import unittest
+import obitools
+from utils import tests_group as utils_tests_group
+class BioseqTest(unittest.TestCase):
+    sequenceId = 'id1'
+    sequenceDefinition = 'sequence definition'
+    sequenceQualifier = {'extra':3}
+    def setUp(self):
+        self.bioseq = self.bioseqClass(self.sequenceId,
+                                       self.sequenceString,
+                                       self.sequenceDefinition,
+                                       **self.sequenceQualifier)
+        title = self.__doc__.strip()
+        underline = "=" * len(title)
+        #print "%s\n%s" % (title,underline)
+    def tearDown(self):
+        pass
+        #print "\n"
+    def testIdAttribute(self):
+        '''
+        test if id attribute exists
+        '''
+        self.failUnless(hasattr(self.bioseq, 'id'), 'id missing attribute')
+    def testIdValue(self):
+        '''
+        test if id attribute value is 'id1'
+        '''
+        self.failUnlessEqual(self.bioseq.id, 'id1', 
+                             'identifier is created with good value')
+    def testDefinitionAttribute(self):
+        '''
+        test if definition attribute exists
+        '''
+        self.failUnless(hasattr(self.bioseq, 'definition'), 'definition missing attribute')
+    def testSequenceIsLowerCase(self):
+        '''
+        test if sequence is stored as lower case letter
+        '''
+        self.failUnlessEqual(str(self.bioseq), 
+                             str(self.bioseq).lower(), 
+                             "Sequence is not stored as lower case string")
+    def testSequenceQualifier(self):
+        '''
+        test if the extra qualifier is present and its value is three.
+        '''
+        self.failUnlessEqual(self.bioseq['extra'], 
+                             3, 
+                             "Sequence qualifier cannot be successfully retrieve")
+    def testCreateSequenceQualifier(self):
+        self.bioseq['testqualifier']='ok'
+        self.failUnlessEqual(self.bioseq['testqualifier'], 
+                             'ok', 
+                             "Sequence qualifier cannot be successfully created")
+class NucBioseqTest(BioseqTest):
+    '''
+    Test obitools.NucSequence class
+    '''
+    bioseqClass = obitools.NucSequence
+    sequenceString = 'AACGT' * 5
+class AABioseqTest(BioseqTest):
+    '''
+    Test obitools.AASequence class
+    '''
+    bioseqClass = obitools.AASequence
+    sequenceString = 'MLKCVT' * 5
+tests_group = utils_tests_group + [NucBioseqTest,AABioseqTest] 
\ No newline at end of file
diff --git a/src/obitools/utils/__init__.py b/src/obitools/utils/__init__.py
new file mode 100644
index 0000000..425ceb9
--- /dev/null
+++ b/src/obitools/utils/__init__.py
@@ -0,0 +1,319 @@
+import sys
+import time
+import re
+import shelve
+from threading import Lock
+from logging  import warning
+import urllib2
+from obitools.gzip import GzipFile
+from obitools.zipfile import ZipFile
+import os.path
+from _utils import FakeFile     # @UnresolvedImport
+from _utils import progressBar  # @UnresolvedImport
+import zlib
+    from collections import Counter
+except ImportError:
+    from obitools.collections import Counter
+class FileFormatError(Exception):
+    pass
+def uncompressFile(fileobj):
+    d = zlib.decompressobj(16+zlib.MAX_WBITS) 
+    READ_BLOCK_SIZE = 1024*8
+    buf = ""    
+    while True:
+        data = fileobj.read(READ_BLOCK_SIZE)
+        if not data: break
+        buf = buf + d.decompress(data)
+        lines = buf.split('\n')
+        buf=lines[-1]
+        for line in lines[0:-1]:
+            yield line+"\n"
+def universalOpen(file,noError=False):
+    '''
+    Open a file gziped or not.
+    If file is a C{str} instance, file is
+    concidered as a file name. In this case 
+    the C{.gz} suffixe is tested to eventually
+    open it a a gziped file.
+    If file is an other kind of object, it is assumed
+    that this object follow the C{file} interface 
+    and it is return as is.
+    @param file: the file to open
+    @type file: C{str} or a file like object
+    @return: an iterator on text lines.
+    '''
+    if isinstance(file,str):
+        try:
+            if urllib2.urlparse.urlparse(file)[0]=='':
+                rep = open(file)
+            else:
+                rep  = urllib2.urlopen(file,timeout=15)
+            if file[-3:] == '.gz':
+                rep = uncompressFile(fileobj=rep)
+            if file[-4:] == '.zip':
+                zip = ZipFile(file=rep)
+                data = zip.infolist()
+                assert len(data)==1,'Only zipped file containning a single file can be open'
+                name = data[0].filename
+                rep = zip.open(name)
+        except Exception as e:
+            if not noError:
+                print >>sys.stderr, e
+                sys.exit();
+            else:
+                raise e
+    else:
+        rep = file
+    return rep
+def universalTell(file):
+    '''
+    Return the position in the file even if
+    it is a gziped one.
+    @param file: the file to check
+    @type file: a C{file} like instance
+    @return: position in the file
+    @rtype:  C{int}
+    '''
+    if hasattr(file, "tell"):
+        return file.tell()
+    else:
+        return None
+def fileSize(file):
+    '''
+    Return the file size even if it is a 
+    gziped one.
+    @param file: the file to check
+    @type file: a C{file} like instance
+    @return: the size of the file
+    @rtype: C{int}
+    '''
+    if hasattr(file, "tell"):
+        pos = file.tell()
+        file.seek(0,2)
+        length = file.tell()
+        file.seek(pos,0)
+    else:
+        length=0
+    return length
+def endLessIterator(endedlist):
+    for x in endedlist:
+        yield x
+    while(1):
+        yield endedlist[-1]
+def multiLineWrapper(lineiterator):
+    '''
+    Aggregator of strings.
+    @param lineiterator: a stream of strings from an opened OBO file.
+    @type lineiterator: a stream of strings.
+    @return: an aggregated stanza.
+    @rtype: an iterotor on str
+    @note: The aggregator aggregates strings from an opened OBO file.
+    When the length of a string is < 2, the current stanza is over.
+    '''
+    for line in lineiterator:
+        rep = [line]
+        while len(line)>=2 and line[-2]=='\\':
+            rep[-1]=rep[-1][0:-2]
+            try:
+                line = lineiterator.next()
+            except StopIteration:
+                raise FileFormatError
+            rep.append(line)
+        yield ''.join(rep)
+def skipWhiteLineIterator(lineiterator):
+    '''
+    Curator of stanza.
+    @param lineiterator: a stream of strings from an opened OBO file.
+    @type lineiterator: a stream of strings.
+    @return: a stream of strings without blank strings.
+    @rtype: a stream strings
+    @note: The curator skip white lines of the current stanza.
+    '''
+    for line in lineiterator:
+        cleanline = line.strip()
+        if cleanline:
+            yield line
+        else:
+            print 'skipped'
+class ColumnFile(object):
+    def __init__(self,stream,sep=None,strip=True,
+                 types=None,skip=None,head=None,
+                 extra=None,
+                 extraformat='([a-zA-Z]\w*) *= *([^;]+);'):
+        self._stream = universalOpen(stream)
+        self._delimiter=sep
+        self._strip=strip
+        self._extra=extra
+        self._extraformat = re.compile(extraformat)
+        if types:
+            self._types=[x for x in types]
+            for i in xrange(len(self._types)):
+                if self._types[i] is bool:
+                    self._types[i]=ColumnFile.str2bool
+        else:
+            self._types=None
+        self._skip = skip
+        if skip is not None:
+            self._lskip= len(skip)
+        else:
+            self._lskip= 0
+        self._head=head
+    def str2bool(x):
+        return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
+    str2bool = staticmethod(str2bool)
+    def __iter__(self):
+        return self
+    def next(self):
+        def cast(txt,type):
+            try:
+                v = type(txt)
+            except:
+                v=None
+            return v
+        ligne = self._stream.next()
+        if self._skip is not None:
+            while ligne[0:self._lskip]==self._skip:
+                ligne = self._stream.next()
+        if self._extra is not None:
+            try:
+                (ligne,extra) = ligne.rsplit(self._extra,1)
+                extra = dict(self._extraformat.findall(extra))
+            except ValueError:
+                extra=None
+        else:
+            extra = None
+        data = ligne.split(self._delimiter)
+        if self._strip or self._types:
+            data = [x.strip() for x in data]
+        if self._types:
+            it = endLessIterator(self._types)
+            data = [cast(*x) for x in ((y,it.next()) for y in data)]
+        if self._head is not None:
+            data=dict(map(None, self._head,data))
+            if extra is not None:
+                data['__extra__']=extra
+        else:
+            if extra is not None:
+                data.append(extra)
+        return data
+    def tell(self):
+        return universalTell(self._stream)
+class CachedDB(object):
+    def __init__(self,cachefile,masterdb):
+        self._cache = shelve.open(cachefile,'c')
+        self._db = masterdb
+        self._lock=Lock()
+    def _cacheSeq(self,seq):
+        self._lock.acquire()
+        self._cache[seq.id]=seq
+        self._lock.release()
+        return seq
+    def __getitem__(self,ac):
+        if isinstance(ac,str):
+            self._lock.acquire()
+            if ac in self._cache:
+#                print >>sys.stderr,"Use cache for %s" % ac
+                data = self._cache[ac]
+                self._lock.release()
+            else:
+                self._lock.release()
+                data = self._db[ac]
+                self._cacheSeq(data)
+            return data
+        else:
+            self._lock.acquire()
+            acs = [[x,self._cache.get(x,None)] for x in ac]
+            self._lock.release()
+            newacs = [ac for ac,cached in acs if cached is None]
+            if newacs:
+                newseqs = self._db[newacs]
+            else:
+                newseqs = iter([])
+            for r in acs:
+                if r[1] is None:
+                    r[1]=self._cacheSeq(newseqs.next())
+#                else:
+#                    print >>sys.stderr,"Use cache for %s" % r[0]
+            return (x[1] for x in acs)
+def moduleInDevelopment(name):
+    Warning('This module %s is under development : use it with caution' % name)
+def deprecatedScript(newscript):
+    current = sys.argv[0]
+    print >>sys.stderr,"        "   
+    print >>sys.stderr,"        "   
+    print >>sys.stderr,"        "   
+    print >>sys.stderr,"#########################################################"
+    print >>sys.stderr,"#                                                       #"
+    print >>sys.stderr,"    W A R N I N G :"
+    print >>sys.stderr,"        %s is a deprecated script                     " % os.path.split(current)[1]
+    print >>sys.stderr,"        it will disappear in the next obitools version" 
+    print >>sys.stderr,"        "   
+    print >>sys.stderr,"    The new corresponding command is %s    " % newscript   
+    print >>sys.stderr,"#                                                       #"
+    print >>sys.stderr,"#########################################################"
+    print >>sys.stderr,"        "   
+    print >>sys.stderr,"        "   
+    print >>sys.stderr,"        "   
diff --git a/src/obitools/utils/_utils.pxd b/src/obitools/utils/_utils.pxd
new file mode 100644
index 0000000..f86fec0
--- /dev/null
+++ b/src/obitools/utils/_utils.pxd
@@ -0,0 +1,42 @@
+cdef extern from "stdio.h":
+    struct FILE
+    int fprintf(FILE *stream, char *format, ...)
+    FILE* stderr
+    ctypedef unsigned int off_t "unsigned long long"
+cdef extern from "time.h":
+    struct tm :
+        int tm_yday 
+        int tm_hour
+        int tm_min
+        int tm_sec
+    enum: CLOCKS_PER_SEC
+    ctypedef int time_t
+    ctypedef int clock_t
+    tm *gmtime_r(time_t *clock, tm *result)
+    time_t time(time_t *tloc)
+    clock_t clock()
+cdef class FakeFile:
+    cdef object _li
+    cdef list   __buffer
+    cdef int    __bufsize
+    cpdef str read(self,int size=?)
+    cpdef str readline(self)
+cpdef object progressBar(object pos,
+                         off_t maxi,
+                         bint reset=?,
+                         bytes head=?,
+                         list delta=?,
+                         list step=?)
\ No newline at end of file
diff --git a/src/obitools/utils/_utils.pyx b/src/obitools/utils/_utils.pyx
new file mode 100644
index 0000000..3b662bf
--- /dev/null
+++ b/src/obitools/utils/_utils.pyx
@@ -0,0 +1,160 @@
+# cython: profile=True
+from _utils cimport *
+import sys
+cdef class FakeFile:
+    def __init__(self,li):
+        self._li = li
+        self.__buffer = []
+        self.__bufsize=0
+    cpdef str read(self,int size=-1):
+        cdef int csize=self.__bufsize
+        cdef str line
+        cdef str buffer
+        try:
+            while(csize < size or size < 0):
+                    line = self._li.next()
+                    csize+=len(line)
+                    self.__buffer.append(line)
+        except StopIteration:
+            if csize==0:
+                raise EOFError
+        buffer = ''.join(self.__buffer)
+        if size >= 0:
+            self.__buffer=[buffer[size:]]
+            self.__bufsize=len(self.__buffer[0])
+            buffer=buffer[0:size]
+        else:
+            self.__buffer=[]
+            self.__bufsize=0
+        return buffer
+    cpdef str readline(self):
+            cdef str line  # @DuplicatedSignature
+            try:
+                if self.__buffer:
+                    line = self.__buffer[0]
+                    self.__buffer=[]
+                    self.__bufsize=0
+                else:
+                    line=self._li.next()
+            except StopIteration:
+                raise EOFError
+            return line
+cpdef object progressBar(object pos,
+                  off_t maxi,
+                  bint reset=False,
+                  bytes head=b'',
+                  list delta=[],
+                  list step=[1,0,0]):
+    cdef off_t    ipos
+    cdef double percent 
+    cdef int days,hour,minu,sec
+    cdef bytes bar
+    cdef off_t fraction
+    cdef int freq,cycle,arrow
+    cdef tm remain
+    cdef clock_t d
+    cdef clock_t elapsed
+    cdef clock_t newtime 
+    cdef clock_t more
+    #                   0123456789
+    cdef char* wheel=  '|/-\\'
+    cdef char*  spaces='          ' \
+                       '          ' \
+                       '          ' \
+                       '          ' \
+                       '          '
+    cdef char*  diese ='##########' \
+                       '##########' \
+                       '##########' \
+                       '##########' \
+                       '##########' 
+    if reset:
+        del delta[:]
+        step[:]=[1,0,0]
+    if not delta:
+        delta.append(clock())
+        delta.append(clock())
+    if ( maxi<=0):
+        maxi=1
+    freq,cycle,arrow = step
+    cycle+=1
+    if cycle % freq == 0:
+        cycle=1
+        newtime = clock()
+        d = newtime-delta[1]
+        if d < 0.2 * CLOCKS_PER_SEC :
+            freq*=2
+        elif d > 0.4 * CLOCKS_PER_SEC and freq>1:
+            freq/=2
+        delta[1]=newtime
+        elapsed = newtime-delta[0]
+        if callable(pos):
+            ipos=pos()
+        else:
+            ipos=pos
+        percent = <double>ipos/<double>maxi
+        more = <time_t>((<double>elapsed / percent * (1. - percent))/CLOCKS_PER_SEC)
+        <void>gmtime_r(&more, &remain)
+        days = remain.tm_yday 
+        hour = remain.tm_hour
+        minu  = remain.tm_min
+        sec  = remain.tm_sec
+        fraction=<int>(percent * 50.)
+        if fraction < 0:
+            fraction=0
+        if fraction > 50:
+            fraction=50
+        arrow=(arrow+1) % 4
+        if days:
+            <void>fprintf(stderr,b'\r%s %5.1f %% |%.*s%c%.*s] remain : %d days %02d:%02d:%02d',
+                            <char*>head,
+                            percent*100,
+                            fraction,diese,
+                            wheel[arrow],
+                            50-fraction,spaces,
+                            days,hour,minu,sec)
+        else:
+            <void>fprintf(stderr,b'\r%s %5.1f %% |%.*s%c%.*s] remain : %02d:%02d:%02d',
+                            <char*>head,
+                            percent*100.,
+                            fraction,diese,
+                            wheel[arrow],
+                            50-fraction,spaces,
+                            hour,minu,sec)
+    else:
+        cycle+=1
+    step[0:3] = freq,cycle,arrow
\ No newline at end of file
diff --git a/src/obitools/utils/bioseq.py b/src/obitools/utils/bioseq.py
new file mode 100644
index 0000000..2031ab1
--- /dev/null
+++ b/src/obitools/utils/bioseq.py
@@ -0,0 +1,234 @@
+def mergeTaxonomyClassification(uniqSeq,taxonomy):
+    for seq in uniqSeq:
+        if seq['merged_taxid']:
+            seq['taxid']=taxonomy.lastCommonTaxon(*seq['merged_taxid'].keys())
+            tsp = taxonomy.getSpecies(seq['taxid'])
+            tgn = taxonomy.getGenus(seq['taxid'])
+            tfa = taxonomy.getFamily(seq['taxid'])
+            if tsp is not None:
+                sp_sn = taxonomy.getScientificName(tsp)
+            else:
+                sp_sn="###"
+                tsp=-1
+            if tgn is not None:
+                gn_sn = taxonomy.getScientificName(tgn)
+            else:
+                gn_sn="###"
+                tgn=-1
+            if tfa is not None:
+                fa_sn = taxonomy.getScientificName(tfa)
+            else:
+                fa_sn="###"
+                tfa=-1
+            seq['species']=tsp
+            seq['genus']=tgn
+            seq['family']=tfa
+            seq['species_name']=sp_sn
+            seq['genus_name']=gn_sn
+            seq['family_name']=fa_sn
+            seq['rank']=taxonomy.getRank(seq['taxid'])
+            seq['scientific_name']=fa_sn = taxonomy.getScientificName(seq['taxid'])
+def uniqSequence(seqIterator,taxonomy=None,mergedKey=None,mergeIds=False,categories=None):
+    uniques={}
+    uniqSeq=[]
+    if categories is None:
+        categories=[]
+    if mergedKey is not None:
+        mergedKey=set(mergedKey)
+    else:
+        mergedKey=set() 
+    if taxonomy is not None:
+        mergedKey.add('taxid')
+    for seq in seqIterator:    
+        s = tuple(seq[x] for x in categories) + (str(seq),)
+        if s in uniques:
+            s = uniques[s]
+            if 'count' in seq:
+                s['count']+=seq['count']
+            else:
+                s['count']+=1
+                seq['count']=1
+#            if taxonomy is not None and 'taxid' in seq:
+#                s['merged_taxid'][seq['taxid']]=
+            for key in mergedKey:
+                if key=='taxid' and mergeIds:
+                    if 'taxid_dist' in seq:
+                        s["taxid_dist"].update(seq["taxid_dist"])
+                    if 'taxid' in seq:
+                        s["taxid_dist"][seq.id]=seq['taxid']
+                mkey = "merged_%s" % key 
+                #cas ou on met a jour les merged_keys mais il n'y a pas de merged_keys dans la sequence qui arrive
+                if key in seq:
+                    s[mkey][seq[key]]=s[mkey].get(seq[key],0)+seq['count']
+                #cas ou merged_keys existe deja
+                else:
+                    if mkey in seq:
+                        for skey in seq[mkey]:
+                            s[mkey][skey]=s[mkey].get(skey,0)+seq[mkey][skey]                            
+            for key in seq.iterkeys():
+                # Merger proprement l'attribut merged s'il exist
+                if key in s and s[key]!=seq[key] and key!='count' and key[0:7]!='merged_' and key!='merged':
+                    del(s[key])
+            if mergeIds:        
+                s['merged'].append(seq.id)
+        else:
+            uniques[s]=seq
+            for key in mergedKey:
+                if key=='taxid' and mergeIds:
+                    if 'taxid_dist' not in seq:
+                        seq["taxid_dist"]={}
+                    if 'taxid' in seq:
+                        seq["taxid_dist"][seq.id]=seq['taxid']
+                mkey = "merged_%s" % key 
+                if mkey not in seq:
+                    seq[mkey]={}
+                if key in seq:
+                    seq[mkey][seq[key]]=seq[mkey].get(seq[key],0)+seq['count']
+                    del(seq[key])
+            if 'count' not in seq:
+                seq['count']=1
+            if mergeIds:        
+                seq['merged']=[seq.id]
+            uniqSeq.append(seq)
+    if taxonomy is not None:
+        mergeTaxonomyClassification(uniqSeq, taxonomy)
+    return uniqSeq
+def uniqPrefixSequence(seqIterator,taxonomy=None,mergedKey=None,mergeIds=False,categories=None):
+    if categories is None:
+        categories=[]
+    def cmpseq(s1,s2):
+        return cmp(str(s1),str(s2))
+    if mergedKey is not None:
+        mergedKey=set(mergedKey)
+    else:
+        mergedKey=set() 
+    if taxonomy is not None:
+        mergedKey.add('taxid')
+    sequences=list(seqIterator)
+    if not sequences:
+        return []
+    sequences.sort(cmpseq)
+    old=sequences.pop()
+    uniqSeq=[old]
+    if 'count' not in old:
+        old['count']=1
+    for key in mergedKey:
+        mkey = "merged_%s" % key 
+        if mkey not in old:
+            old[mkey]={}
+        if key in old:
+            old[mkey][old[key]]=old[mkey].get(old[key],0)+1
+    if mergeIds:        
+        old['merged']=[old.id]
+    while(sequences):
+        seq=sequences.pop()
+        lseq=len(seq)
+        pold = str(old)[0:lseq]
+        if pold==str(seq):
+            if 'count' in seq:
+                old['count']+=seq['count']
+            else:
+                old['count']+=1
+            for key in mergedKey:
+                mkey = "merged_%s" % key 
+                if key in seq:
+                    old[mkey][seq[key]]=old[mkey].get(seq[key],0)+1
+                if mkey in seq:
+                    for skey in seq[mkey]:
+                        if skey in old:
+                            old[mkey][skey]=old[mkey].get(seq[skey],0)+seq[mkey][skey]
+                        else:
+                            old[mkey][skey]=seq[mkey][skey]
+            for key in seq.iterkeys():
+                if key in old and old[key]!=seq[key]:
+                    del(old[key])
+            if mergeIds:        
+                old['merged'].append(seq.id)
+        else:
+            old=seq
+            for key in mergedKey:
+                mkey = "merged_%s" % key 
+                if mkey not in seq:
+                    seq[mkey]={}
+                if key in seq:
+                    seq[mkey][seq[key]]=seq[mkey].get(seq[key],0)+1
+                    del(seq[key])
+            if 'count' not in seq:
+                seq['count']=1
+            if mergeIds:        
+                seq['merged']=[seq.id]
+            uniqSeq.append(seq)
+    if taxonomy is not None:
+        mergeTaxonomyClassification(uniqSeq, taxonomy)
+    return uniqSeq
+def _cmpOnKeyGenerator(key,reverse=False):
+    def compare(x,y):
+        try:
+            c1 = x[key]
+        except KeyError:
+            c1=None
+        try:
+            c2 = y[key]
+        except KeyError:
+            c2=None
+        if reverse:
+            s=c1
+            c1=c2
+            c2=s
+        return cmp(c1,c2)
+    return compare
+def sortSequence(seqIterator,key,reverse=False):
+    seqs = list(seqIterator)
+    seqs.sort(_cmpOnKeyGenerator(key, reverse))
+    return seqs
\ No newline at end of file
diff --git a/src/obitools/utils/crc64.py b/src/obitools/utils/crc64.py
new file mode 100644
index 0000000..537391e
--- /dev/null
+++ b/src/obitools/utils/crc64.py
@@ -0,0 +1,53 @@
+# Code obtained from : 
+#   http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/259177/index_txt
+# Initialisation
+# 32 first bits of generator polynomial for CRC64
+# the 32 lower bits are assumed to be zero
+POLY64REVh = 0xd8000000L
+CRCTableh = [0] * 256
+CRCTablel = [0] * 256
+isInitialized = False
+def CRC64(aString):
+    global isInitialized
+    crcl = 0
+    crch = 0
+    if (isInitialized is not True):
+        isInitialized = True
+        for i in xrange(256): 
+            partl = i
+            parth = 0L
+            for j in xrange(8):
+                rflag = partl & 1L                
+                partl >>= 1L               
+                if (parth & 1):
+                    partl |= (1L << 31L)
+                parth >>= 1L
+                if rflag:
+                    parth ^= POLY64REVh
+            CRCTableh[i] = parth;
+            CRCTablel[i] = partl;
+    for item in aString:
+        shr = 0L
+        shr = (crch & 0xFF) << 24
+        temp1h = crch >> 8L
+        temp1l = (crcl >> 8L) | shr                        
+        tableindex = (crcl ^ ord(item)) & 0xFF
+        crch = temp1h ^ CRCTableh[tableindex]
+        crcl = temp1l ^ CRCTablel[tableindex]
+    return (crch, crcl)
+def CRC64digest(aString):
+    return "%08X%08X" % (CRC64(aString))
+if __name__ == '__main__':
+    assert CRC64("IHATEMATH") == (3822890454, 2600578513)
+    assert CRC64digest("IHATEMATH") == "E3DCADD69B01ADD1"
+    print 'CRC64: dumb test successful'
diff --git a/src/obitools/utils/iterator.py b/src/obitools/utils/iterator.py
new file mode 100644
index 0000000..f53537f
--- /dev/null
+++ b/src/obitools/utils/iterator.py
@@ -0,0 +1,8 @@
+from itertools import chain
+def uniqueChain(*args):
+    see = set()
+    for x in chain(*args):
+        if x not in see:
+            see.add(x)
+            yield x
\ No newline at end of file
diff --git a/src/obitools/version.py b/src/obitools/version.py
new file mode 100644
index 0000000..863c966
--- /dev/null
+++ b/src/obitools/version.py
@@ -0,0 +1,5 @@
+major = 1
+minor = 2
+serial= '11'
+version = "%2d.%02d %s" % (major,minor,serial)
diff --git a/src/obitools/word/__init__.py b/src/obitools/word/__init__.py
new file mode 100644
index 0000000..2719b3b
--- /dev/null
+++ b/src/obitools/word/__init__.py
@@ -0,0 +1,135 @@
+from itertools import imap
+from _binary import *
+def wordCount(liste):
+    count = {}
+    for e in liste:
+        count[e]=count.get(e,0) + 1
+    return count
+def wordIterator(sequence,lword,step=1,endIncluded=False,circular=False):
+    assert not (endIncluded and circular), \
+      "endIncluded and circular cannot not be set to True at the same time"
+    L = len(sequence)
+    sequence = str(sequence)
+    if circular:
+        sequence += sequence[0:lword]
+        pmax=L
+    elif endIncluded:
+        pmax=L
+    else:
+        pmax = L - lword + 1
+    pos = xrange(0,pmax,step)
+    for x in pos:
+        yield encodeWord(sequence[x:x+lword])
+def filterIterator(sequence,step=32,maxword=50,wordsize=4,circular=False):
+    assert step < 64
+    wi = wordIterator(sequence, wordsize, circular=circular)
+    lfilter=0
+    winfilter=0
+    filter=0
+    buffer=[]
+    pos=0
+    for w in wi:
+        code = 1 << w
+        buffer.append(code)
+        if len(buffer)>step:
+            buffer.pop(0)
+        lfilter+=1
+        if not filter & code:
+            filter |=code
+            winfilter+=1
+            if winfilter>=maxword:
+                yield (sequence.id,pos,lfilter,filter)
+                pos = pos + lfilter - len(buffer)
+                filter=0
+                lfilter=0
+                winfilter=0
+                for code in buffer:
+                    lfilter+=1
+                    if not filter & code:
+                        filter |=code
+                        winfilter+=1
+                buffer=[]
+    yield (sequence.id,pos,lfilter,filter)
+def filterDict(sequence,step=32,maxword=50,wordsize=4,circular=False,filters=None):  
+    fi = filterIterator(sequence, step, maxword,wordsize, circular)  
+    if filters is None:
+        r = {}
+    else:
+        r = filters
+    for f in fi:
+        lseq = r.get(f[2],{})
+        r[f[3]]=lseq
+        lpos = lseq.get(f[0],[])
+        lseq[f[0]]=lpos
+        lpos.append((f[1],f[2]))
+    return r
+def primerWordMin(lprimer,error=2,lword=4):
+    parts = error+1
+    match = lprimer - error
+    mparts= (match / parts) + 1
+    if mparts < lword:
+        return 0
+    remains = mparts * parts - match
+    return (mparts - lword + 1) * parts - remains
+def wordSelector(words,accept=None,reject=None):
+    '''
+    Filter over a DNA word iterator.
+    @param words: an iterable object other a list of DNA words
+    @type words: an iterator
+    @param accept: a list of predicate. Each predicate is a function
+                   accepting one str parametter and returning a boolean
+                   value.
+    @type accept: list
+    @param reject: a list of predicat. Each predicat is a function
+                   accepting one str parametter and returning a boolean
+                   value.
+    @type reject: list
+    @return: an iterator on DNA word (str)
+    @rtype: iterator
+    '''
+    if accept is None:
+        accept=[]
+    if reject is None:
+        reject=[]
+    for w in words:
+#        print [bool(p(w)) for p in accept]
+        accepted = reduce(lambda x,y: bool(x) and bool(y),
+                          (p(w) for p in accept),
+                          True)
+#        print [(p.__name__,bool(p(w))) for p in reject]
+        rejected = reduce(lambda x,y:bool(x) or bool(y),
+                          (p(w) for p in reject),
+                          False)
+#        print decodeWord(w,5),accepted,rejected,
+        if accepted and not rejected:
+#            print " conserved"
+            yield w
+#        else:
+#            print
diff --git a/src/obitools/word/_binary.pyx b/src/obitools/word/_binary.pyx
new file mode 100644
index 0000000..17c2b50
--- /dev/null
+++ b/src/obitools/word/_binary.pyx
@@ -0,0 +1,269 @@
+Created on 2 juil. 2009
+ at author: coissac
+maxword = sizeof(unsigned long int) * 8 /2
+cdef import from "math.h":
+    double ceil(double x)
+    double log(double x)
+cdef int binarywordsize(unsigned long int x):
+    return <int>ceil(log(x)/log(2))
+cpdef str bin2str(unsigned long int x):
+    cdef str rep=''
+    cdef unsigned long int i
+    cdef int ws = binarywordsize(x)
+    for i in range(ws):
+        if x & (1 << i):
+            rep = '1' + rep
+        else:
+            rep = '0' + rep
+    return rep
+cdef class WordPattern :
+    cdef public unsigned long int a 
+    cdef public unsigned long int c 
+    cdef public unsigned long int g 
+    cdef public unsigned long int t 
+    def __init__(self, unsigned long int a,
+                       unsigned long int c,
+                       unsigned long int g,
+                       unsigned long int t):
+        self.a=a
+        self.c=c 
+        self.g=g 
+        self.t=t 
+    def __str__(self):
+        return b"(a:%s,c:%s,g:%s,t:%s)" % (bin2str(self.a),
+                                           bin2str(self.c),
+                                           bin2str(self.g),
+                                           bin2str(self.t))
+cdef unsigned int bitCount(unsigned long int x):
+    cdef unsigned int i=0
+    while(x):
+        i+=1
+        x&=x-1
+    return i
+def allDNAWordIterator(size):
+    '''
+    Iterate thought the list of all DNA word of
+    size `size`.
+    @param size: size of the DNA word
+    @type size: int
+    @return: an iterator on DNA word (int)
+    @rtype: iterator
+    '''
+    maxi=4**size
+    return xrange(maxi)
+cpdef int wordDist(unsigned long int w1,unsigned long int w2):
+    '''
+    estimate Hamming distance between two words of the same size.
+    @param w1: the first word
+    @type w1:  str
+    @param w2: the second word
+    @type w2:  str
+    @return: the count of difference between the two words
+    @rtype: int
+    '''
+    cdef unsigned long int diff
+    cdef unsigned long int dist
+    diff = (~(w1 & w2) & (w1 | w2))
+    diff = (diff | (diff >> 1)) & 0x55555555 
+    dist = bitCount(diff)
+    return dist
+cpdef int homoMax(unsigned long int word,unsigned int size):
+    cdef unsigned long int mask
+    cdef unsigned long int good
+    cdef unsigned long int maxi
+    cdef unsigned long int shift
+    mask = (1 << (size << 1))-1
+    good = 0x55555555
+    maxi=0
+    shift = word
+    while good:
+        maxi+=1
+        shift>>=2
+        mask>>=2
+        id = (word & shift) | (~word & ~shift)
+        good&= id & (id>>1) & mask
+    return maxi
+cpdef int countA(unsigned long int word,unsigned int size):
+    cdef unsigned long int mask
+    cdef unsigned long int id
+    cdef unsigned long int good
+    mask = (1 << (size << 1))-1
+    id = ~word
+    good= id & (id>>1) & 0x55555555 & mask
+    return bitCount(good)
+cpdef int countT(unsigned long int word,unsigned int size):
+    cdef unsigned long int good
+    good= word & (word>>1) & 0x55555555
+    return bitCount(good)
+cpdef int countAT(unsigned long int word,unsigned int size):
+    cdef unsigned long int mask
+    cdef unsigned long int shift
+    cdef unsigned long int good
+    mask = (1 << (size << 1))-1
+    shift = word >> 1
+    good  = ((word & shift) | (~word & ~shift)) & 0x55555555 & mask
+    return bitCount(good)
+cpdef int countC(unsigned long int word,unsigned int size):
+    cdef unsigned long int mask
+    cdef unsigned long int good
+    mask = (1 << (size << 1))-1
+    good = ((word & 0x55555555) | (~word & 0xAAAAAAAA)) 
+    good &= (good >> 1) & 0x55555555 & mask
+    return bitCount(good)
+cpdef int countG(unsigned long int word,unsigned int size):
+    cdef unsigned long int mask
+    cdef unsigned long int good
+    mask = (1 << (size << 1))-1
+    good  = ((word & 0xAAAAAAAA) | (~word & 0x55555555))  
+    good &= (good >> 1) & 0x55555555 & mask
+    return bitCount(good)
+cpdef int countCG(unsigned long int word,unsigned int size):
+    cdef unsigned long int mask
+    cdef unsigned long int shift
+    cdef unsigned long int good
+    mask = (1 << (size << 1))-1
+    shift = word >> 1
+    good  = ((word & ~shift) | (~word & shift)) & 0x55555555 & mask
+    return bitCount(good)
+cpdef str decodeWord(unsigned long int word,unsigned int size):
+    return ''.join(['acgt'[(word >> i) & 3] for i in xrange(size*2-2,-1,-2)])
+cpdef int encodeWord(word) except -1:
+    assert len(word)<=32,"Word length should be less or equal to 32"
+    w=0
+    word=word.lower()
+    for l in word:
+        w<<=2
+        if l=='c' :
+            w|=1
+        elif l=='g':
+            w|=2
+        elif l=='t':
+            w|=3
+        elif l!='a':
+            raise RuntimeError,"word should only contain a, c, g or t (%s)" % word
+    return w
+def encodePattern(pattern):
+    a=0
+    c=0
+    g=0
+    t=0
+    pattern=pattern.lower()
+    for l in pattern:
+        a<<=2
+        c<<=2
+        g<<=2
+        t<<=2
+        if l in 'armwdhvn':
+            a|=1
+        if l in 'cymsbhvn':
+            c|=1
+        if l in 'grksbdvn':
+            g|=1
+        if l in 'tykwbdhn':
+            t|=1
+    return WordPattern(a,c,g,t)
+cpdef bint matchPattern(unsigned long int word,pattern):
+    all   = pattern.a|pattern.c|pattern.g|pattern.t
+    eq    = ~word
+    match = eq & (eq >> 1) & pattern.a
+    eq    = (word & 0x55555555 | ~word & 0xAAAAAAAA)
+    match|= eq & (eq >> 1) & pattern.c
+    eq    = (word & 0xAAAAAAAA | ~word & 0x55555555) 
+    match|= eq & (eq >> 1) &  pattern.g
+    eq    = word
+    match|= eq & (eq >> 1) &  pattern.t        
+    return match == all
+cdef class ErrorPositionIterator:
+    cdef int _wsize
+    cdef int _errors
+    cdef unsigned long int _mask
+    cdef int _errorpos[32]
+    cdef bint _end
+    def __init__(self,wordsize,errorcount):
+        self._wsize=wordsize
+        self._errors=errorcount
+        self._mask=0
+        for i in range(errorcount):
+            self._errorpos[i]=i
+        self._end=False
+    def __iter__(self):
+        return self
+    def next(self):
+        cdef unsigned long int rep
+        cdef bint move=False
+        cdef int i
+        if self._end:
+            raise StopIteration
+        rep = 0
+        for i in range(self._errors):
+            rep |= 1 << self._errorpos[i]
+            print bin2str(rep)
+        move=False
+        i=0
+        while (not move):
+            if self._errorpos[i]<self._errorpos[i+1]-1:
+                 self._errorpos[i]+=1
+                 move=True
+                 i=0
+                 print "pos %d/%d moved" % (i,self._wsize)
+            else:
+                self._errorpos[i]=i
+                i+=1
+            if i==self._errors-1 and self._errorpos[i]==self._wsize:
+                self._end=True
+                move=True
+        return rep
\ No newline at end of file
diff --git a/src/obitools/word/_readindex.cfiles b/src/obitools/word/_readindex.cfiles
new file mode 100644
index 0000000..3f1b3df
--- /dev/null
+++ b/src/obitools/word/_readindex.cfiles
@@ -0,0 +1 @@
\ No newline at end of file
diff --git a/src/obitools/word/_readindex.ext.1.c b/src/obitools/word/_readindex.ext.1.c
new file mode 100644
index 0000000..0b8b1be
--- /dev/null
+++ b/src/obitools/word/_readindex.ext.1.c
@@ -0,0 +1,202 @@
+ * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ *
+ * 
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ * 
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ */
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Ronnie Kon at Mindcraft Inc., Kevin Lew and Elmer Yglesias.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ */
+#include <sys/types.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stddef.h>
+ * Swap two areas of size number of bytes.  Although qsort(3) permits random
+ * blocks of memory to be sorted, sorting pointers is almost certainly the
+ * common case (and, were it not, could easily be made so).  Regardless, it
+ * isn't worth optimizing; the SWAP's get sped up by the cache, and pointer
+ * arithmetic gets lost in the time required for comparison function calls.
+ */
+#define	SWAP(a, b, count, size, tmp) { \
+	count = size; \
+	do { \
+		tmp = *a; \
+		*a++ = *b; \
+		*b++ = tmp; \
+	} while (--count); \
+/* Copy one block of size size to another. */
+#define COPY(a, b, count, size, tmp1, tmp2) { \
+	count = size; \
+	tmp1 = a; \
+	tmp2 = b; \
+	do { \
+		*tmp1++ = *tmp2++; \
+	} while (--count); \
+ * Build the list into a heap, where a heap is defined such that for
+ * the records K1 ... KN, Kj/2 >= Kj for 1 <= j/2 <= j <= N.
+ *
+ * There two cases.  If j == nmemb, select largest of Ki and Kj.  If
+ * j < nmemb, select largest of Ki, Kj and Kj+1.
+ */
+#define CREATE(initval, nmemb, par_i, child_i, par, child, size, count, tmp) { \
+	for (par_i = initval; (child_i = par_i * 2) <= nmemb; \
+	    par_i = child_i) { \
+		child = base + child_i * size; \
+		if (child_i < nmemb && compar(child, child + size) < 0) { \
+			child += size; \
+			++child_i; \
+		} \
+		par = base + par_i * size; \
+		if (compar(child, par) <= 0) \
+			break; \
+		SWAP(par, child, count, size, tmp); \
+	} \
+ * Select the top of the heap and 'heapify'.  Since by far the most expensive
+ * action is the call to the compar function, a considerable optimization
+ * in the average case can be achieved due to the fact that k, the displaced
+ * elememt, is ususally quite small, so it would be preferable to first
+ * heapify, always maintaining the invariant that the larger child is copied
+ * over its parent's record.
+ *
+ * Then, starting from the *bottom* of the heap, finding k's correct place,
+ * again maintianing the invariant.  As a result of the invariant no element
+ * is 'lost' when k is assigned its correct place in the heap.
+ *
+ * The time savings from this optimization are on the order of 15-20% for the
+ * average case. See Knuth, Vol. 3, page 158, problem 18.
+ *
+ * XXX Don't break the #define SELECT line, below.  Reiser cpp gets upset.
+ */
+#define SELECT(par_i, child_i, nmemb, par, child, size, k, count, tmp1, tmp2) { \
+	for (par_i = 1; (child_i = par_i * 2) <= nmemb; par_i = child_i) { \
+		child = base + child_i * size; \
+		if (child_i < nmemb && compar(child, child + size) < 0) { \
+			child += size; \
+			++child_i; \
+		} \
+		par = base + par_i * size; \
+		COPY(par, child, count, size, tmp1, tmp2); \
+	} \
+	for (;;) { \
+		child_i = par_i; \
+		par_i = child_i / 2; \
+		child = base + child_i * size; \
+		par = base + par_i * size; \
+		if (child_i == 1 || compar(k, par) < 0) { \
+			COPY(child, k, count, size, tmp1, tmp2); \
+			break; \
+		} \
+		COPY(child, par, count, size, tmp1, tmp2); \
+	} \
+ * Heapsort -- Knuth, Vol. 3, page 145.  Runs in O (N lg N), both average
+ * and worst.  While heapsort is faster than the worst case of quicksort,
+ * the BSD quicksort does median selection so that the chance of finding
+ * a data set that will trigger the worst case is nonexistent.  Heapsort's
+ * only advantage over quicksort is that it requires little additional memory.
+ */
+heapsort(vbase, nmemb, size, compar)
+	void *vbase;
+	size_t nmemb, size;
+	int (*compar) __P((const void *, const void *));
+	register int cnt, i, j, l;
+	register char tmp, *tmp1, *tmp2;
+	char *base, *k, *p, *t;
+	if (nmemb <= 1)
+		return (0);
+	if (!size) {
+		errno = EINVAL;
+		return (-1);
+	}
+	if ((k = malloc(size)) == NULL)
+		return (-1);
+	/*
+	 * Items are numbered from 1 to nmemb, so offset from size bytes
+	 * below the starting address.
+	 */
+	base = (char *)vbase - size;
+	for (l = nmemb / 2 + 1; --l;)
+		CREATE(l, nmemb, i, j, t, p, size, cnt, tmp);
+	/*
+	 * For each element of the heap, save the largest element into its
+	 * final slot, save the displaced element (k), then recreate the
+	 * heap.
+	 */
+	while (nmemb > 1) {
+		COPY(k, base + nmemb * size, cnt, size, tmp1, tmp2);
+		COPY(base + nmemb * size, base + size, cnt, size, tmp1, tmp2);
+		--nmemb;
+		SELECT(i, j, nmemb, t, p, size, k, cnt, tmp1, tmp2);
+	}
+	free(k);
+	return (0);
diff --git a/src/obitools/word/_readindex.h b/src/obitools/word/_readindex.h
new file mode 100644
index 0000000..438a7a0
--- /dev/null
+++ b/src/obitools/word/_readindex.h
@@ -0,0 +1,94 @@
+Binary constant generator macro
+By Tom Torfs - donated to the public domain
+/* All macro's evaluate to compile-time constants */
+/* *** helper macros *** */
+/* turn a numeric literal into a hex constant
+(avoids problems with leading zeroes)
+8-bit constants max value 0x11111111, always fits in unsigned long
+#define HEX__(n) 0x##n##LU
+/* 8-bit conversion function */
+#define B8__(x) ((x&0x0000000FLU)?1:0) \
++((x&0x000000F0LU)?2:0) \
++((x&0x00000F00LU)?4:0) \
++((x&0x0000F000LU)?8:0) \
++((x&0x000F0000LU)?16:0) \
++((x&0x00F00000LU)?32:0) \
++((x&0x0F000000LU)?64:0) \
+/* *** user macros *** */
+/* for upto 8-bit binary constants */
+#define B8(d) ((unsigned char)B8__(HEX__(d)))
+/* for upto 16-bit binary constants, MSB first */
+#define B16(dmsb,dlsb) (((unsigned short)B8(dmsb)<< \
++ B8(dlsb))
+/* for upto 32-bit binary constants, MSB first */
+#define B32(dmsb,db2,db3,dlsb) (((unsigned long)B8(dmsb)<<24) \
++ ((unsigned long)B8(db2)<<16) \
++ ((unsigned long)B8(db3)<< \
++ B8(dlsb))
+typedef struct obinuc {
+	unsigned int seqused : 1;     // this sequence is already used
+	unsigned int endofread : 1;    // this word is already used
+	unsigned int zero : 1;        // this is a non standard nucleotide
+    unsigned int direction : 1;   // 0 -> use direct word 1 -> use reverse word
+    unsigned int reverse    : 2;  // reverse nucleotide 0 : A 1 : C 2 : G 3 : T
+    unsigned int forward    : 2;  // forward nucleotide 0 : A 1 : C 2 : G 3 : T
+} *pobinuc, obinuc;
+typedef char obinuc,*pobinuc;
+#define SET_SEQUSED(x)   ((char)((x) | B8(10000000)))
+#define SET_ENDOFREAD(x) ((char)((x) | B8(01000000)))
+#define SET_ZERO(x)      ((char)((x) | B8(00100000)))
+#define SET_DIRECTION(x) ((char)((x) | B8(00010000)))
+#define UNSET_SEQUSED(x)   ((char)((x) & B8(01111111)))
+#define UNSET_ENDOFREAD(x) ((char)((x) & B8(10111111)))
+#define UNSET_ZERO(x)      ((char)((x) & B8(11011111)))
+#define UNSET_DIRECTION(x) ((char)((x) & B8(11101111)))
+#define SET_REVERSE(x,val)   ((char)(((x) & B8(11110011)) | (((val) & B8(00000011)) << 2)))
+#define SET_FORWARD(x,val)   ((char)(((x) & B8(11111100)) | (((val) & B8(00000011)))))
+#define GET_SEQUSED(x)   ((char)(((x) >> 7) & 1))
+#define GET_ENDOFREAD(x) ((char)(((x) >> 6) & 1))
+#define GET_ZERO(x)      ((char)(((x) >> 5) & 1))
+#define GET_DIRECTION(x) ((char)(((x) >> 4) & 1))
+#define GET_REVERSE(x)   ((char)(((x) >> 2) & B8(00000011)))
+#define GET_FORWARD(x)   ((char)(x) & B8(00000011))
+#define DECODE_NUC(x)    (*("acgt" + GET_FORWARD(x)))
+#define DECODE_NUC_FR(x,d)    (*("acgtn" + (((d==0) ? GET_FORWARD(x):GET_REVERSE(x)) | (GET_ZERO(x) << 2))))
+#define A B8(00001100)
+#define C B8(00001001)
+#define G B8(00000110)
+#define T B8(00000011)
+#define N B8(00100000)
diff --git a/src/obitools/word/_readindex.pyx b/src/obitools/word/_readindex.pyx
new file mode 100644
index 0000000..48afcf1
--- /dev/null
+++ b/src/obitools/word/_readindex.pyx
@@ -0,0 +1,805 @@
+from libc.stdlib cimport free
+from libc.stdlib cimport malloc,realloc
+from libc.stdio cimport fopen,fclose,fread,fwrite,FILE
+from libc.string cimport strlen 
+from cpython.bytes cimport PyBytes_FromString
+from cpython.bytes cimport PyBytes_FromStringAndSize
+import sys
+from threading import Lock
+from cPickle import dumps,loads
+from obitools._obitools import NucSequence
+from turtle import Tbuffer
+cdef extern from "_readindex.h":
+    ctypedef char obinuc        
+    ctypedef obinuc* pobinuc
+    obinuc SET_SEQUSED(char x)
+    obinuc SET_ENDOFREAD(char x)
+    obinuc SET_ZERO(char x)
+    obinuc SET_DIRECTION(char x)
+    obinuc UNSET_SEQUSED(char x)
+    obinuc UNSET_ENDOFREAD(char x)
+    obinuc UNSET_ZERO(char x)
+    obinuc UNSET_DIRECTION(char x)
+    obinuc SET_REVERSE(char x, unsigned int val)
+    obinuc SET_FORWARD(char x, unsigned int val)
+    unsigned int GET_SEQUSED(obinuc x)
+    unsigned int GET_ENDOFREAD(obinuc x)
+    unsigned int GET_ZERO(obinuc x)
+    unsigned int GET_DIRECTION(obinuc x)
+    unsigned int GET_REVERSE(obinuc x)
+    unsigned int GET_FORWARD(obinuc x)
+    char DECODE_NUC(obinuc x)
+    char DECODE_NUC_FR(obinuc x, unsigned int d)
+    enum:
+        A 
+    enum:
+        C 
+    enum:
+        G 
+    enum:
+        T 
+    enum:
+        N 
+cdef extern from *:
+    ctypedef void* pconstvoid "const void*"
+cdef extern from "stdlib.h":
+    void heapsort(void *base, size_t nel, size_t  width, int (*compar)(pconstvoid, pconstvoid))
+    void qsort(void *base, size_t nel, size_t  width, int (*compar)(pconstvoid, pconstvoid))
+    void* bsearch(pconstvoid key, pconstvoid base, size_t nel, size_t width, int (*compar) (pconstvoid, pconstvoid))
+cdef obinuc encodeobinuc(char nuc):    
+    nuc&=0b11011111
+    if nuc=='A':
+        return A
+    elif nuc=='C':
+        return C
+    elif nuc=='G':
+        return G
+    elif nuc=='T':
+        return T
+    else:
+        return N
+cdef int hashword(pobinuc word, int lkey, int lword):
+    cdef int key=0
+    cdef int k 
+    cdef int dir=1 if GET_DIRECTION(word[0])==0 else -1
+    if lword < lkey:
+        lkey=lword
+    if dir < 0:
+        word+=lword-1
+    for k in range(lkey):
+        key<<=2
+        key|=GET_FORWARD(word[0]) if dir > 0 else GET_REVERSE(word[0])
+        word+=dir 
+    return key 
+cdef void encode_direction(pobinuc word, int lword):
+    cdef int direction=0
+    cdef pobinuc bnuc = word
+    cdef pobinuc enuc = word + lword - 1
+    while (bnuc < enuc and GET_FORWARD(bnuc[0])==GET_REVERSE(enuc[0])):
+        bnuc+=1
+        enuc-=1
+    if GET_FORWARD(bnuc[0]) > GET_REVERSE(enuc[0]):
+        word[0]=SET_DIRECTION(word[0])
+    else:   
+        word[0]=UNSET_DIRECTION(word[0])
+cdef bytes decodeword(pobinuc w, int lword):      
+    cdef char[1024] cword 
+    cdef bytes bword
+    cdef int d
+    cdef int dir=1
+    cdef int j
+    d = GET_DIRECTION(w[0])
+    if d==1:
+        dir=-1
+        w+= lword - 1
+    for j in range(lword):
+        cword[j]=DECODE_NUC_FR(w[0],d)
+        w+=dir
+    cword[lword]=0
+    bword = PyBytes_FromStringAndSize(cword,lword)
+    return bword
+cpdef minword(bytes word):
+    cdef obinuc[1024] nword 
+    cdef char*        cword=word
+    cdef int          lword=len(word)
+    for i in range(lword):
+        nword[i]=encodeobinuc(cword[i])
+    encode_direction(nword,lword)
+    return decodeword(nword,lword)
+cdef object cmpwordlengthLock=Lock()
+cdef int cmpwordlength=0
+cdef int cmpwords(pconstvoid pw1, pconstvoid pw2):
+    cdef pobinuc w1=(<pobinuc*>pw1)[0]   
+    cdef pobinuc w2=(<pobinuc*>pw2)[0]
+    cdef int dir1=1
+    cdef int dir2=1
+    cdef int d1=GET_DIRECTION(w1[0])
+    cdef int d2=GET_DIRECTION(w2[0])
+    cdef int i=0
+    cdef int n1
+    cdef int n2
+    cdef int rep=0
+    global cmpwordlength
+#    print "-->",decodeword(w1,cmpwordlength),cmpwordlength,d1
+#    print "-->",decodeword(w2,cmpwordlength),cmpwordlength,d2
+    if d1==1:
+        dir1=-1
+        w1+=cmpwordlength-1
+    if d2==1:
+        dir2=-1
+        w2+=cmpwordlength-1
+    n1 = GET_FORWARD(w1[0]) if d1==0 else GET_REVERSE(w1[0])
+    n2 = GET_FORWARD(w2[0]) if d2==0 else GET_REVERSE(w2[0])
+#    print n1,n2
+    while (n1==n2 and i < cmpwordlength):
+#        print i,n1,n2
+        i+=1
+        w1+=dir1
+        w2+=dir2
+        n1 = GET_FORWARD(w1[0]) if d1==0 else GET_REVERSE(w1[0])
+        n2 = GET_FORWARD(w2[0]) if d2==0 else GET_REVERSE(w2[0])
+    if  cmpwordlength==i:
+        rep=0
+    elif n1 < n2:
+        rep = -1
+    elif n1 > n2:
+        rep = 1
+#    print rep
+    return rep
+cdef class ReadIndex:
+    cdef int _size
+    cdef int _readsize
+    cdef int _chuncksize
+    cdef int _seqsize
+    cdef long long _buffer_size
+    cdef pobinuc _buffer
+    cdef long long _endofreads
+    cdef list _ids
+    cdef pobinuc* _wordlist
+    cdef long long _wordlist_size
+    cdef int _wordlength
+    cdef int[4096] _index
+    cdef int _lindex
+    cdef int* _globalwordlength
+    def __init__(self, int readsize=-1, int chuncksize=1000000):
+        cdef int i 
+        global cmpwordlength
+        assert readsize < 1024,"You cannot use reads longer than 1023 base pair"
+        self._readsize=readsize
+        self._seqsize=(self._readsize+1)*2
+        self._chuncksize=chuncksize
+        self._buffer=NULL
+        self._buffer_size=0
+        self._endofreads=0
+        self._size=0
+        self._ids=[]
+        self._wordlist=NULL
+        self._wordlist_size=0
+        self._wordlength=0
+        self._lindex=0
+        self._globalwordlength=&cmpwordlength
+        for i in range(4096):
+            self._index[i]=-1
+    def __del__(self):
+        if self._buffer != NULL:
+            free(self._buffer)
+        if self._wordlist != NULL:
+            free(self._wordlist)
+    def __len__(self):
+        return self._size
+    def save(self,bytes filename, bint verbose=False):
+        cdef char* cfile=filename
+        cdef FILE *f = fopen(cfile,'w')
+        cdef long long i
+        cdef bytes btitle
+        cdef char* title
+        cdef int ltitle
+        cdef size_t transfered
+        cdef size_t ltbuffer
+        cdef bytes tbuffer
+        cdef char* tcbuffer
+        assert f!=NULL,"cannot open file Ms" % filename
+        if verbose:
+            print >>sys.stderr,"Writing header..."
+        transfered = fwrite(&(self._size),sizeof(int),1,f)
+        assert transfered==1,"Error during size writing"
+        transfered = fwrite(&(self._readsize),sizeof(int),1,f)
+        assert transfered==1,"Error during readsize writing"
+        transfered = fwrite(&(self._seqsize),sizeof(int),1,f)
+        assert transfered==1,"Error during seqsize writing"
+        transfered = fwrite(&(self._buffer_size),sizeof(long long),1,f)
+        assert transfered==1,"Error during buffer size writing"
+        transfered = fwrite(&(self._buffer),sizeof(pobinuc),1,f)
+        assert transfered==1,"Error during buffer address writing"
+        print >> sys.stderr,self._endofreads
+        transfered = fwrite(&(self._endofreads),sizeof(long long),1,f)
+        assert transfered==1,"Error during endofread writing"
+        transfered = fwrite(&(self._wordlist_size),sizeof(long long),1,f)
+        assert transfered==1,"Error during wordlist size writing"
+        transfered = fwrite(&(self._wordlength),sizeof(int),1,f)
+        assert transfered==1,"Error during word length writing"
+        transfered = fwrite(&(self._lindex),sizeof(int),1,f)
+        assert transfered==1,"Error during lindex writing"
+        if verbose:
+            print >>sys.stderr,"Writing sequences..."
+        fwrite(self._buffer,1,self._buffer_size,f)
+        if verbose:
+            print >>sys.stderr,"Writing %d words index..." % self._wordlist_size
+#        for i in range(self._wordlist_size):    
+#            print >>sys.stderr,'--> %d %d' % (i,<long long>self._wordlist[i]),
+#            self._wordlist[i]-=<long long>self._buffer
+#            print " %d" % <long long>self._wordlist[i]
+        fwrite(self._wordlist,sizeof(pobinuc),self._wordlist_size,f)
+#        for i in range(self._wordlist_size):    
+#            print >>sys.stderr,'--> %d %d' % (i,<long long>self._wordlist[i]),
+#            self._wordlist[i]+=<long long>self._buffer
+#            print " %d" % <long long>self._wordlist[i]
+        if verbose:
+            print >>sys.stderr,"Writing sequence identifiers..."
+        tbuffer=dumps(self._ids)
+        tcbuffer=tbuffer
+        ltbuffer=strlen(tcbuffer)
+        if verbose:
+            print >>sys.stderr,"  identifier size = %d" % ltbuffer
+        fwrite(&ltbuffer,sizeof(size_t),1,f)
+        fwrite(tcbuffer,1,ltbuffer,f)
+#        for i in range(self._size):
+#            ltitple=len(self._ids[i])
+#            btitle= self._ids[i]
+#            title = btitle
+#            fwrite(&(ltitle),sizeof(int),1,f)
+#            fwrite(title,1,ltitle,f)
+        print >>sys.stderr
+        if verbose:
+            print >>sys.stderr,"Save done"
+        fclose(f)
+        if verbose:
+            print >>sys.stderr,"File closed"
+    def load(self,bytes filename, bint verbose=False):
+        cdef char* cfile=filename
+        cdef FILE *f = fopen(cfile,'r')
+        cdef char[10000] ctitle
+        cdef bytes btitle
+        cdef int ltitle
+        cdef pobinuc oldbuf
+        cdef size_t transfered
+        cdef size_t ltbuffer
+        cdef bytes tbuffer
+        cdef char* tcbuffer
+#        print >>sys.stderr,sizeof(int),sizeof(pobinuc),sizeof(long long)
+        assert f!=NULL,"cannot open file Ms" % filename
+        if verbose:
+            print >>sys.stderr,"Reading header..."
+        transfered = fread(&(self._size),sizeof(int),1,f)
+        assert transfered==1,"Error during size reading"
+        if verbose:
+            print >>sys.stderr,"  index contains %d sequence pairs" % self._size
+        transfered = fread(&(self._readsize),sizeof(int),1,f)
+        assert transfered==1,"Error during read size reading"
+        if verbose:
+            print >>sys.stderr,"  read size is %d pb" % self._readsize
+        transfered = fread(&(self._seqsize),sizeof(int),1,f)
+        assert transfered==1,"Error during seqsize reading"
+        if verbose:
+            print >>sys.stderr,"  sequence size is %d bytes" % self._seqsize
+        transfered = fread(&(self._buffer_size),sizeof(long long),1,f)
+        assert transfered==1,"Error during buffer size reading"
+        if verbose:
+            print >>sys.stderr,"  buffer size is %d bytes" % self._buffer_size
+        transfered = fread(&(oldbuf),sizeof(pobinuc),1,f)
+        assert transfered==1,"Error during buffer address reading"
+        transfered = fread(&(self._endofreads),sizeof(long long),1,f)
+        assert transfered==1,"Error during endofread reading"
+        if verbose:
+            print >>sys.stderr,"  end of reads is %d" % self._endofreads
+        transfered = fread(&(self._wordlist_size),sizeof(long long),1,f)
+        assert transfered==1,"Error during word list size reading"
+        if verbose:
+            print >>sys.stderr,"  index contains %d words" % self._wordlist_size
+        transfered = fread(&(self._wordlength),sizeof(int),1,f)
+        assert transfered==1,"Error during word length reading"
+        transfered = fread(&(self._lindex),sizeof(int),1,f)
+        assert transfered==1,"Error during lindex reading"
+        if verbose:
+            print >>sys.stderr,"Reading sequences..."
+        if (self._buffer!=NULL):
+            free(self._buffer)
+        self._buffer=<pobinuc>malloc(self._buffer_size)
+        transfered = fread(self._buffer,1,self._buffer_size,f)
+        if verbose:
+            print >>sys.stderr,"Reading %d words index..." % self._wordlist_size
+        if (self._wordlist!=NULL):
+            free(self._wordlist)
+        self._wordlist = <pobinuc *>malloc(self._wordlist_size * sizeof(pobinuc))
+        transfered = fread(self._wordlist,sizeof(pobinuc),self._wordlist_size,f)
+        if verbose:
+            print >>sys.stderr,"Patching word index..."
+        for i in range(self._wordlist_size):    
+            self._wordlist[i]+= (self._buffer - oldbuf)
+        self._ids=[]
+        if verbose:
+            print >>sys.stderr,"Reading sequence ids..."
+        fread(&ltbuffer,sizeof(size_t),1,f)
+        if verbose:
+            print >>sys.stderr,"  identifier size = %d" % ltbuffer
+        tcbuffer = <char*>malloc(ltbuffer)
+        fread(tcbuffer,1,ltbuffer,f)
+        self._ids=loads(PyBytes_FromStringAndSize(tcbuffer,ltbuffer))
+        free(tcbuffer)
+        fclose(f)
+        self._lindex=6 if self._wordlength >=6 else self._wordlength
+        if verbose:
+            print >>sys.stderr,"Hashing word prefix..."
+        for i in range(4096):
+            self._index[i]=-1
+        for i in range(self._wordlist_size):
+            k = hashword(self._wordlist[i],self._lindex,self._wordlength)
+            if self._index[k]==-1:
+                self._index[k]=i
+                #print k,i
+        fclose(f)
+    def indexWords(self,int lword,bint verbose=False):
+        cdef int error=0
+        cdef pobinuc sword=self._buffer
+        cdef pobinuc eword=sword
+        cdef pobinuc endbuff = self._buffer + self._endofreads
+        cdef int i=0
+        cdef int k=0
+        cdef int maxwords = (self._readsize - lword + 1) * self._size * 2
+        assert sword != NULL,"Cannot index empty ReadIndex"
+        assert lword <= self._readsize,"words cannot be longer than reads"
+        if verbose:
+            print >>sys.stderr,"Indexing words from %d sequences..." % len(self)
+        if self._wordlist!=NULL:
+            free(self._wordlist)
+        self._wordlist = <pobinuc*>malloc(maxwords * sizeof(pobinuc*))
+        for i in range(lword):
+            error+=GET_ZERO(eword[0])
+            eword+=1
+        i=0
+        while (eword < endbuff):
+            if error==0:
+                self._wordlist[i]=sword
+                encode_direction(sword,lword)
+                i+=1
+            error-=GET_ZERO(sword[0])
+            error+=GET_ZERO(eword[0])
+            sword+=1
+            eword+=1
+        self._wordlist = <pobinuc*>realloc(self._wordlist, i * sizeof(pobinuc*))
+        self._wordlist_size=i
+        self._wordlength=lword
+        if verbose:
+            print >>sys.stderr,"Sorting %d words..." % i
+        cmpwordlengthLock.acquire()
+        self._globalwordlength[0]=lword
+        heapsort(self._wordlist,i,sizeof(pobinuc),cmpwords)
+        cmpwordlengthLock.release()
+        self._lindex=6 if lword >=6 else lword
+        if verbose:
+            print >>sys.stderr,"Hashing word prefix..."
+        for i in range(4096):
+            self._index[i]=-1
+        for i in range(self._wordlist_size):
+            k = hashword(self._wordlist[i],self._lindex,lword)
+            if self._index[k]==-1:
+                self._index[k]=i
+                #print k,i
+    def itermarkedpairs(self):
+        cdef size_t i
+        cdef pobinuc start1
+        cdef pobinuc start2
+        for i in range(self._size):
+            start1=self._buffer+ i * self._seqsize
+            start2=start1 + self._seqsize / 2
+            if GET_SEQUSED(start1[0])==1 and GET_SEQUSED(start2[0])==1:
+                yield self.getSeqPairAt(start1,False)
+    def itermarkedsingleton(self):
+        cdef size_t i
+        cdef pobinuc start1
+        cdef pobinuc start2
+        for i in range(self._size):
+            start1=self._buffer+ i * self._seqsize
+            start2=start1 + self._seqsize / 2
+            if (GET_SEQUSED(start1[0])==1 or GET_SEQUSED(start2[0])==1) \
+               and not (GET_SEQUSED(start1[0])==1 and GET_SEQUSED(start2[0])==1):
+                if GET_SEQUSED(start1[0])==1:
+                    yield self.getSeqAt(start1,False)
+                else:
+                    yield self.getSeqAt(start2,False)
+    def iterreads(self,bytes word):
+        cdef obinuc nword[1024]
+        cdef pobinuc pnword=nword
+        cdef pobinuc* ppnword=&pnword
+        cdef pobinuc* found
+        cdef char* cword=word 
+        cdef int i 
+        cdef int lword=self._wordlength
+        cdef int k
+        cdef int nk=1 << (2*self._lindex)
+        cdef long long wstart
+        cdef long long wend
+        cdef long long wpoint
+        cdef int pcomp
+        cdef int scomp
+        cdef int ecomp
+        assert len(word) == lword
+        for i in range(lword):
+            nword[i]=encodeobinuc(cword[i])
+        encode_direction(nword,lword)
+        k=hashword(nword,self._lindex,lword)
+        wstart=self._index[k]
+        if wstart==-1:
+            raise StopIteration
+        k+=1
+        while (k < nk and self._index[k]==-1):
+            k+=1
+        if k==nk:
+            wend=self._wordlist_size 
+        else:
+            wend=self._index[k] 
+#        print "coucou : %d %d" % (wstart,wend)
+#        print "locking 0"  
+        cmpwordlengthLock.acquire()
+#        print "locked 0"  
+        self._globalwordlength[0]=lword
+#        print decodeword(ppnword[0],lword)
+#        print decodeword((self._wordlist+wstart)[0],lword)
+        found = <pobinuc*>bsearch(ppnword,self._wordlist+wstart,wend-wstart,sizeof(pobinuc),cmpwords) 
+        if found==NULL:
+            cmpwordlengthLock.release()
+            raise StopIteration
+        wpoint = found - self._wordlist
+        wstart = wpoint
+        while (wpoint >0 and cmpwords(ppnword,self._wordlist+wpoint)==0):
+            s=self.getSeqAt(self._wordlist[wpoint],True)
+            if s is not None:
+                cmpwordlengthLock.release()
+                yield s
+                cmpwordlengthLock.acquire()
+                self._globalwordlength[0]=lword
+            wpoint-=1
+        wstart = wpoint+1
+        while (wpoint < self._wordlist_size and cmpwords(ppnword,self._wordlist+wpoint)==0):
+            s=self.getSeqAt(self._wordlist[wpoint],True)
+            if s is not None:
+                cmpwordlengthLock.release()
+                yield s
+                cmpwordlengthLock.acquire()
+                self._globalwordlength[0]=lword
+            wpoint+=1
+        cmpwordlengthLock.release()
+    def iterwords(self):
+        cdef int i 
+        assert self._wordlist != NULL,'You must index words'
+        for i in range(self._wordlist_size):
+            yield decodeword(self._wordlist[i],self._wordlength)
+    def add(self,sequence):
+        cdef bytes bseq
+        cdef char* seq
+        if  self._readsize<0:
+            self._readsize=len(sequence[0])
+            self._seqsize=(self._readsize+1)*2
+            assert self._readsize < 1024,"You cannot use reads longer than 1023 base pair"
+        else:
+            assert len(sequence[0]) <= self._readsize and len(sequence[1]) <= self._readsize
+        if self._buffer==NULL:
+            self._buffer = <pobinuc>malloc(self._seqsize*self._chuncksize)
+            self._buffer_size=self._seqsize*self._chuncksize
+            self._endofreads=0
+        if self._endofreads + self._seqsize >= self._buffer_size:
+            self._buffer_size+=self._seqsize*self._chuncksize
+            self._buffer = <pobinuc> realloc(<void*>self._buffer,self._buffer_size)
+        self._ids.append(sequence[0].id[0:-2])
+        bseq = bytes(sequence[0])
+        seq = bseq
+        l=0
+        while seq[0]!=0:
+            self._buffer[self._endofreads]=encodeobinuc(seq[0])
+            self._endofreads+=1
+            seq+=1
+            l+=1
+        while l<=self._readsize:
+            self._buffer[self._endofreads]=SET_ENDOFREAD(N)
+            self._endofreads+=1
+            l+=1
+        bseq = bytes(sequence[1])
+        seq = bseq
+        l=0
+        while seq[0]!=0:
+            self._buffer[self._endofreads]=encodeobinuc(seq[0])
+            self._endofreads+=1
+            seq+=1
+            l+=1
+        while l<=self._readsize:
+            self._buffer[self._endofreads]=SET_ENDOFREAD(N)
+            self._endofreads+=1
+            l+=1
+        self._size+=1
+    cdef object getSeqAt(self,pobinuc word,bint lock=False):
+        cdef long long delta
+        cdef pobinuc start1
+        cdef pobinuc start2
+        cdef char[1024] cseqf
+        cdef char[1024] cseqr
+        cdef char* pseq
+        cdef bytes bseqf
+        cdef bytes bseqr
+        cdef bytes n=b"/1"
+        delta = <void*>word - <void*>self._buffer
+        delta/= self._seqsize
+        start1=self._buffer+ delta * self._seqsize
+        start2=start1 + self._seqsize / 2
+        if word >= start2:
+            start1=start2
+            n=b"/2"
+        if lock:
+            if GET_SEQUSED(start1[0])==1:
+                return None
+            else:
+                start1[0]=SET_SEQUSED(start1[0])
+        pseq = cseqf
+        while (GET_ENDOFREAD(start1[0])==0):
+            pseq[0]=DECODE_NUC(start1[0])
+            start1+=1
+            pseq+=1
+        pseq[0]=0
+        bseqf = PyBytes_FromString(cseqf)
+        return NucSequence(self._ids[delta]+n,bseqf)
+    cdef object getSeqPairAt(self,pobinuc word,bint lock=False):
+        cdef long long delta
+        cdef pobinuc start1
+        cdef pobinuc start2
+        cdef char[1024] cseqf
+        cdef char[1024] cseqr
+        cdef char* pseq
+        cdef bytes bseqf
+        cdef bytes bseqr
+        delta = <void*>word - <void*>self._buffer
+        delta/= self._seqsize
+        start1=self._buffer+ delta * self._seqsize
+        start2=start1 + self._seqsize / 2
+        if lock:
+            if GET_SEQUSED(start1[0])==1:
+                return None,None
+            else:
+                start1[0]=SET_SEQUSED(start1[0])
+                start2[0]=SET_SEQUSED(start2[0])
+        pseq = cseqf
+        while (GET_ENDOFREAD(start1[0])==0):
+            pseq[0]=DECODE_NUC(start1[0])
+            start1+=1
+            pseq+=1
+        pseq[0]=0
+        bseqf = PyBytes_FromString(cseqf)
+        pseq = cseqr
+        while (GET_ENDOFREAD(start2[0])==0):
+            pseq[0]=DECODE_NUC(start2[0])
+            start2+=1
+            pseq+=1
+        pseq[0]=0
+        bseqr = PyBytes_FromString(cseqr)
+        return NucSequence(self._ids[delta]+'/1',bseqf),NucSequence(self._ids[delta]+'/2',bseqr)
+    def __getitem__(self,int index):
+        if index >= self._size:
+            raise IndexError(index)
+        if index < 0:
+            index+=self._size
+        if index < 0:
+            raise IndexError(index)
+        return self.getSeqAt(self._buffer + index * self._seqsize)
diff --git a/src/obitools/word/options.py b/src/obitools/word/options.py
new file mode 100644
index 0000000..f67a757
--- /dev/null
+++ b/src/obitools/word/options.py
@@ -0,0 +1,117 @@
+from obitools.word import wordSelector
+from obitools.word import allDNAWordIterator,encodeWord
+from obitools.word import predicate
+def _acceptedOptionCallback(options,opt,value,parser):
+    if not hasattr(parser.values, 'acceptedOligo'):
+        parser.values.acceptedOligo=[]
+    parser.values.acceptedOligo.append(predicate.predicateMatchPattern(value,))
+def _rejectedOptionCallback(options,opt,value,parser):
+    if not hasattr(parser.values, 'rejectedOligo'):
+        parser.values.rejectedOligo=[]
+    parser.values.rejectedOligo.append(predicate.predicateMatchPattern(value))
+def addOligoOptions(optionManager):
+    optionManager.add_option('-L','--oligo-list',
+                             action="store", dest="oligoList",
+                             metavar="<filename>",
+                             type="str",
+                             help="filename containing a list of oligonucleotide")
+    optionManager.add_option('-s','--oligo-size',
+                             action="store", dest="oligoSize",
+                             metavar="<###>",
+                             type="int",
+                             help="Size of oligonucleotide to generate")
+    optionManager.add_option('-f','--family-size',
+                             action="store", dest="familySize",
+                             metavar="<###>",
+                             type="int",
+                             help="Size of oligonucleotide family to generate")
+    optionManager.add_option('-d','--distance',
+                             action="store", dest="oligoDist",
+                             metavar="<###>",
+                             type="int",
+                             default=1,
+                             help="minimal distance between two oligonucleotides")
+    optionManager.add_option('-g','--gc-max',
+                             action="store", dest="gcMax",
+                             metavar="<###>",
+                             type="int",
+                             default=0,
+                             help="maximum count of G or C nucleotide acceptable in a word")
+    optionManager.add_option('-a','--accepted',
+                             action="append",dest="acceptedPattern",
+                             metavar="<regular pattern>",
+                             default=[],
+                             type="str",
+                             help="pattern of accepted oligonucleotide")
+    optionManager.add_option('-r','--rejected',
+                             action="append",dest="rejectedPattern",
+                             metavar="<regular pattern>",
+                             default=[],
+                             type="str",
+                             help="pattern of rejected oligonucleotide")
+    optionManager.add_option('-p','--homopolymer',
+                             action="store", dest="homopolymere",
+                             metavar="<###>",
+                             type="int",
+                             default=0,
+                             help="reject oligo with homopolymer longer than.")
+    optionManager.add_option('-P','--homopolymer-min',
+                             action="store", dest="homopolymere_min",
+                             metavar="<###>",
+                             type="int",
+                             default=0,
+                             help="accept only oligo with homopolymer longer or equal to.")
+def dnaWordIterator(options):
+    assert options.oligoSize is not None or options.oligoList is not None,"option -s or --oligo-size must be specified"
+    assert options.familySize is not None,"option -f or --family-size must be specified"
+    assert options.oligoDist is not None,"option -d or --distance must be specified"
+    if options.oligoList is not None:
+        options.oligoSize=len(open(options.oligoList).next().strip())
+        words = (encodeWord(x.strip().lower()) for x in open(options.oligoList))
+    else:
+        words = allDNAWordIterator(options.oligoSize)
+    #seed  = 'a' * options.oligoSize
+    options.acceptedOligo=[]
+    for p in options.acceptedPattern:
+        assert len(p)==options.oligoSize,"Accept pattern with bad lenth : %s" % p
+        options.acceptedOligo.append(predicate.predicateMatchPattern(p, options.oligoSize))
+    options.rejectedOligo=[]
+    for p in options.rejectedPattern:
+        assert len(p)==options.oligoSize,"Reject pattern with bad lenth : %s" % p
+        options.rejectedOligo.append(predicate.predicateMatchPattern(p, options.oligoSize))
+    #options.acceptedOligo.append(predicat.distMinGenerator(seed, options.oligoDist))
+    if options.homopolymere:
+        options.rejectedOligo.append(predicate.predicateHomoPolymerLarger(options.homopolymere, options.oligoSize))
+    if options.homopolymere_min:
+        options.acceptedOligo.append(predicate.predicateHomoPolymerLarger(options.homopolymere_min-1, options.oligoSize))
+    if options.gcMax:
+        options.rejectedOligo.append(predicate.predicateGCUpperBond(options.gcMax, options.oligoSize))
+    return wordSelector(words, options.acceptedOligo, options.rejectedOligo)
diff --git a/src/obitools/word/predicate.py b/src/obitools/word/predicate.py
new file mode 100644
index 0000000..082b80f
--- /dev/null
+++ b/src/obitools/word/predicate.py
@@ -0,0 +1,41 @@
+Created on 14 oct. 2009
+ at author: coissac
+from _binary import wordDist, \
+                    homoMax, \
+                    countCG, \
+                    matchPattern, \
+                    encodePattern
+def predicateWordDistMin(word,dmin,size):
+    def predicate(w):
+        return wordDist(word, w) >= dmin
+    return predicate
+def predicateHomoPolymerLarger(count,size):
+    def predicate(w):
+        return homoMax(w, size) > count
+    return predicate
+def predicateHomoPolymerSmaller(count,size):
+    def predicate(w):
+        return homoMax(w, size) < count
+    return predicate
+def predicateGCUpperBond(count,size):
+    def predicate(w):
+        return countCG(w, size) > count
+    return predicate
+def predicateMatchPattern(pattern,size):
+    pattern=encodePattern(pattern)
+    def predicate(w):
+        return matchPattern(w, pattern)
+    return predicate
diff --git a/src/obitools/zipfile.py b/src/obitools/zipfile.py
new file mode 100644
index 0000000..41e4bcb
--- /dev/null
+++ b/src/obitools/zipfile.py
@@ -0,0 +1,1282 @@
+Read and write ZIP files.
+import struct, os, time, sys, shutil
+import binascii, cStringIO
+    import zlib # We may need its compression method
+    crc32 = zlib.crc32
+except ImportError:
+    zlib = None
+    crc32 = binascii.crc32
+__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
+           "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
+class BadZipfile(Exception):
+    pass
+class LargeZipFile(Exception):
+    """
+    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
+    and those extensions are disabled.
+    """
+error = BadZipfile      # The exception raised by this module
+ZIP64_LIMIT= (1 << 31) - 1
+# constants for Zip file compression methods
+# Other ZIP compression methods not supported
+# Here are some struct module formats for reading headers
+structEndArchive = "<4s4H2LH"     # 9 items, end of archive, 22 bytes
+stringEndArchive = "PK\005\006"   # magic number for end of archive record
+structCentralDir = "<4s4B4HLLL5HLL"# 19 items, central directory, 46 bytes
+stringCentralDir = "PK\001\002"   # magic number for central directory
+structFileHeader = "<4s2B4HLLL2H"  # 12 items, file header record, 30 bytes
+stringFileHeader = "PK\003\004"   # magic number for file header
+structEndArchive64Locator = "<4sLQL" # 4 items, locate Zip64 header, 20 bytes
+stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
+structEndArchive64 = "<4sQHHLLQQQQ" # 10 items, end of archive (Zip64), 56 bytes
+stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
+# indexes of entries in the central directory structure
+_CD_EXTRACT_SYSTEM = 4                  # is this meaningful?
+_CD_TIME = 7
+_CD_DATE = 8
+_CD_CRC = 9
+# indexes of entries in the local file header structure
+_FH_EXTRACT_SYSTEM = 2                  # is this meaningful?
+_FH_CRC = 7
+def is_zipfile(filename):
+    """Quickly see if file is a ZIP file by checking the magic number."""
+    try:
+        fpin = open(filename, "rb")
+        endrec = _EndRecData(fpin)
+        fpin.close()
+        if endrec:
+            return True                 # file has correct magic number
+    except IOError:
+        pass
+    return False
+def _EndRecData64(fpin, offset, endrec):
+    """
+    Read the ZIP64 end-of-archive records and use that to update endrec
+    """
+    locatorSize = struct.calcsize(structEndArchive64Locator)
+    fpin.seek(offset - locatorSize, 2)
+    data = fpin.read(locatorSize)
+    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
+    if sig != stringEndArchive64Locator:
+        return endrec
+    if diskno != 0 or disks != 1:
+        raise BadZipfile("zipfiles that span multiple disks are not supported")
+    # Assume no 'zip64 extensible data'
+    endArchiveSize = struct.calcsize(structEndArchive64)
+    fpin.seek(offset - locatorSize - endArchiveSize, 2)
+    data = fpin.read(endArchiveSize)
+    sig, sz, create_version, read_version, disk_num, disk_dir, \
+            dircount, dircount2, dirsize, diroffset = \
+            struct.unpack(structEndArchive64, data)
+    if sig != stringEndArchive64:
+        return endrec
+    # Update the original endrec using data from the ZIP64 record
+    endrec[1] = disk_num
+    endrec[2] = disk_dir
+    endrec[3] = dircount
+    endrec[4] = dircount2
+    endrec[5] = dirsize
+    endrec[6] = diroffset
+    return endrec
+def _EndRecData(fpin):
+    """Return data from the "End of Central Directory" record, or None.
+    The data is a list of the nine items in the ZIP "End of central dir"
+    record followed by a tenth item, the file seek offset of this record."""
+    fpin.seek(-22, 2)               # Assume no archive comment.
+    filesize = fpin.tell() + 22     # Get file size
+    data = fpin.read()
+    if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
+        endrec = struct.unpack(structEndArchive, data)
+        endrec = list(endrec)
+        endrec.append("")               # Append the archive comment
+        endrec.append(filesize - 22)    # Append the record start offset
+        if endrec[-4] == 0xffffffff:
+            return _EndRecData64(fpin, -22, endrec)
+        return endrec
+    # Search the last END_BLOCK bytes of the file for the record signature.
+    # The comment is appended to the ZIP file and has a 16 bit length.
+    # So the comment may be up to 64K long.  We limit the search for the
+    # signature to a few Kbytes at the end of the file for efficiency.
+    # also, the signature must not appear in the comment.
+    END_BLOCK = min(filesize, 1024 * 4)
+    fpin.seek(filesize - END_BLOCK, 0)
+    data = fpin.read()
+    start = data.rfind(stringEndArchive)
+    if start >= 0:     # Correct signature string was found
+        endrec = struct.unpack(structEndArchive, data[start:start+22])
+        endrec = list(endrec)
+        comment = data[start+22:]
+        if endrec[7] == len(comment):     # Comment length checks out
+            # Append the archive comment and start offset
+            endrec.append(comment)
+            endrec.append(filesize - END_BLOCK + start)
+            if endrec[-4] == 0xffffffff:
+                return _EndRecData64(fpin, - END_BLOCK + start, endrec)
+            return endrec
+    return      # Error, return None
+class ZipInfo (object):
+    """Class with attributes describing each file in the ZIP archive."""
+    __slots__ = (
+            'orig_filename',
+            'filename',
+            'date_time',
+            'compress_type',
+            'comment',
+            'extra',
+            'create_system',
+            'create_version',
+            'extract_version',
+            'reserved',
+            'flag_bits',
+            'volume',
+            'internal_attr',
+            'external_attr',
+            'header_offset',
+            'CRC',
+            'compress_size',
+            'file_size',
+            '_raw_time',
+        )
+    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
+        self.orig_filename = filename   # Original file name in archive
+        # Terminate the file name at the first null byte.  Null bytes in file
+        # names are used as tricks by viruses in archives.
+        null_byte = filename.find(chr(0))
+        if null_byte >= 0:
+            filename = filename[0:null_byte]
+        # This is used to ensure paths in generated ZIP files always use
+        # forward slashes as the directory separator, as required by the
+        # ZIP format specification.
+        if os.sep != "/" and os.sep in filename:
+            filename = filename.replace(os.sep, "/")
+        self.filename = filename        # Normalized file name
+        self.date_time = date_time      # year, month, day, hour, min, sec
+        # Standard values:
+        self.compress_type = ZIP_STORED # Type of compression for the file
+        self.comment = ""               # Comment for each file
+        self.extra = ""                 # ZIP extra data
+        if sys.platform == 'win32':
+            self.create_system = 0          # System which created ZIP archive
+        else:
+            # Assume everything else is unix-y
+            self.create_system = 3          # System which created ZIP archive
+        self.create_version = 20        # Version which created ZIP archive
+        self.extract_version = 20       # Version needed to extract archive
+        self.reserved = 0               # Must be zero
+        self.flag_bits = 0              # ZIP flag bits
+        self.volume = 0                 # Volume number of file header
+        self.internal_attr = 0          # Internal attributes
+        self.external_attr = 0          # External file attributes
+        # Other attributes are set by class ZipFile:
+        # header_offset         Byte offset to the file header
+        # CRC                   CRC-32 of the uncompressed file
+        # compress_size         Size of the compressed file
+        # file_size             Size of the uncompressed file
+    def FileHeader(self):
+        """Return the per-file header as a string."""
+        dt = self.date_time
+        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
+        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
+        if self.flag_bits & 0x08:
+            # Set these to zero because we write them after the file data
+            CRC = compress_size = file_size = 0
+        else:
+            CRC = self.CRC
+            compress_size = self.compress_size
+            file_size = self.file_size
+        extra = self.extra
+        if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
+            # File is larger than what fits into a 4 byte integer,
+            # fall back to the ZIP64 extension
+            fmt = '<HHQQ'
+            extra = extra + struct.pack(fmt,
+                    1, struct.calcsize(fmt)-4, file_size, compress_size)
+            file_size = 0xffffffff # -1
+            compress_size = 0xffffffff # -1
+            self.extract_version = max(45, self.extract_version)
+            self.create_version = max(45, self.extract_version)
+        header = struct.pack(structFileHeader, stringFileHeader,
+                 self.extract_version, self.reserved, self.flag_bits,
+                 self.compress_type, dostime, dosdate, CRC,
+                 compress_size, file_size,
+                 len(self.filename), len(extra))
+        return header + self.filename + extra
+    def _decodeExtra(self):
+        # Try to decode the extra field.
+        extra = self.extra
+        unpack = struct.unpack
+        while extra:
+            tp, ln = unpack('<HH', extra[:4])
+            if tp == 1:
+                if ln >= 24:
+                    counts = unpack('<QQQ', extra[4:28])
+                elif ln == 16:
+                    counts = unpack('<QQ', extra[4:20])
+                elif ln == 8:
+                    counts = unpack('<Q', extra[4:12])
+                elif ln == 0:
+                    counts = ()
+                else:
+                    raise RuntimeError, "Corrupt extra field %s"%(ln,)
+                idx = 0
+                # ZIP64 extension (large files and/or large archives)
+                # XXX Is this correct? won't this exclude 2**32-1 byte files?
+                if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
+                    self.file_size = counts[idx]
+                    idx += 1
+                if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
+                    self.compress_size = counts[idx]
+                    idx += 1
+                if self.header_offset == -1 or self.header_offset == 0xffffffffL:
+                    old = self.header_offset
+                    self.header_offset = counts[idx]
+                    idx+=1
+            extra = extra[ln+4:]
+class _ZipDecrypter:
+    """
+    Class to handle decryption of files stored within a ZIP archive.
+    ZIP supports a password-based form of encryption. Even though known
+    plaintext attacks have been found against it, it is still useful
+    to be able to get data out of such a file.
+    Usage ::
+        zd = _ZipDecrypter(mypwd)
+        plain_char = zd(cypher_char)
+        plain_text = map(zd, cypher_text)
+    """
+    def _GenerateCRCTable():
+        """Generate a CRC-32 table.
+        ZIP encryption uses the CRC32 one-byte primitive for scrambling some
+        internal keys. We noticed that a direct implementation is faster than
+        relying on binascii.crc32().
+        """
+        poly = 0xedb88320
+        table = [0] * 256
+        for i in range(256):
+            crc = i
+            for j in range(8):
+                if crc & 1:
+                    crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
+                else:
+                    crc = ((crc >> 1) & 0x7FFFFFFF)
+            table[i] = crc
+        return table
+    crctable = _GenerateCRCTable()
+    def _crc32(self, ch, crc):
+        """Compute the CRC32 primitive on one byte."""
+        return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
+    def __init__(self, pwd):
+        self.key0 = 305419896
+        self.key1 = 591751049
+        self.key2 = 878082192
+        for p in pwd:
+            self._UpdateKeys(p)
+    def _UpdateKeys(self, c):
+        self.key0 = self._crc32(c, self.key0)
+        self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
+        self.key1 = (self.key1 * 134775813 + 1) & 4294967295
+        self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
+    def __call__(self, c):
+        """Decrypt a single character."""
+        c = ord(c)
+        k = self.key2 | 2
+        c = c ^ (((k * (k^1)) >> 8) & 255)
+        c = chr(c)
+        self._UpdateKeys(c)
+        return c
+class ZipExtFile:
+    """File-like object for reading an archive member.
+       Is returned by ZipFile.open().
+    """
+    def __init__(self, fileobj, zipinfo, decrypt=None):
+        self.fileobj = fileobj
+        self.decrypter = decrypt
+        self.bytes_read = 0L
+        self.rawbuffer = ''
+        self.readbuffer = ''
+        self.linebuffer = ''
+        self.eof = False
+        self.univ_newlines = False
+        self.nlSeps = ("\n", )
+        self.lastdiscard = ''
+        self.compress_type = zipinfo.compress_type
+        self.compress_size = zipinfo.compress_size
+        self.closed  = False
+        self.mode    = "r"
+        self.name = zipinfo.filename
+        # read from compressed files in 64k blocks
+        self.compreadsize = 64*1024
+        if self.compress_type == ZIP_DEFLATED:
+            self.dc = zlib.decompressobj(-15)
+    def set_univ_newlines(self, univ_newlines):
+        self.univ_newlines = univ_newlines
+        # pick line separator char(s) based on universal newlines flag
+        self.nlSeps = ("\n", )
+        if self.univ_newlines:
+            self.nlSeps = ("\r\n", "\r", "\n")
+    def __iter__(self):
+        return self
+    def next(self):
+        nextline = self.readline()
+        if not nextline:
+            raise StopIteration()
+        return nextline
+    def close(self):
+        self.closed = True
+    def _checkfornewline(self):
+        nl, nllen = -1, -1
+        if self.linebuffer:
+            # ugly check for cases where half of an \r\n pair was
+            # read on the last pass, and the \r was discarded.  In this
+            # case we just throw away the \n at the start of the buffer.
+            if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
+                self.linebuffer = self.linebuffer[1:]
+            for sep in self.nlSeps:
+                nl = self.linebuffer.find(sep)
+                if nl >= 0:
+                    nllen = len(sep)
+                    return nl, nllen
+        return nl, nllen
+    def readline(self, size = -1):
+        """Read a line with approx. size. If size is negative,
+           read a whole line.
+        """
+        if size < 0:
+            size = sys.maxint
+        elif size == 0:
+            return ''
+        # check for a newline already in buffer
+        nl, nllen = self._checkfornewline()
+        if nl >= 0:
+            # the next line was already in the buffer
+            nl = min(nl, size)
+        else:
+            # no line break in buffer - try to read more
+            size -= len(self.linebuffer)
+            while nl < 0 and size > 0:
+                buf = self.read(min(size, 100))
+                if not buf:
+                    break
+                self.linebuffer += buf
+                size -= len(buf)
+                # check for a newline in buffer
+                nl, nllen = self._checkfornewline()
+            # we either ran out of bytes in the file, or
+            # met the specified size limit without finding a newline,
+            # so return current buffer
+            if nl < 0:
+                s = self.linebuffer
+                self.linebuffer = ''
+                return s
+        buf = self.linebuffer[:nl]
+        self.lastdiscard = self.linebuffer[nl:nl + nllen]
+        self.linebuffer = self.linebuffer[nl + nllen:]
+        # line is always returned with \n as newline char (except possibly
+        # for a final incomplete line in the file, which is handled above).
+        return buf + "\n"
+    def readlines(self, sizehint = -1):
+        """Return a list with all (following) lines. The sizehint parameter
+        is ignored in this implementation.
+        """
+        result = []
+        while True:
+            line = self.readline()
+            if not line: break
+            result.append(line)
+        return result
+    def read(self, size = None):
+        # act like file() obj and return empty string if size is 0
+        if size == 0:
+            return ''
+        # determine read size
+        bytesToRead = self.compress_size - self.bytes_read
+        # adjust read size for encrypted files since the first 12 bytes
+        # are for the encryption/password information
+        if self.decrypter is not None:
+            bytesToRead -= 12
+        if size is not None and size >= 0:
+            if self.compress_type == ZIP_STORED:
+                lr = len(self.readbuffer)
+                bytesToRead = min(bytesToRead, size - lr)
+            elif self.compress_type == ZIP_DEFLATED:
+                if len(self.readbuffer) > size:
+                    # the user has requested fewer bytes than we've already
+                    # pulled through the decompressor; don't read any more
+                    bytesToRead = 0
+                else:
+                    # user will use up the buffer, so read some more
+                    lr = len(self.rawbuffer)
+                    bytesToRead = min(bytesToRead, self.compreadsize - lr)
+        # avoid reading past end of file contents
+        if bytesToRead + self.bytes_read > self.compress_size:
+            bytesToRead = self.compress_size - self.bytes_read
+        # try to read from file (if necessary)
+        if bytesToRead > 0:
+            bytes = self.fileobj.read(bytesToRead)
+            self.bytes_read += len(bytes)
+            self.rawbuffer += bytes
+            # handle contents of raw buffer
+            if self.rawbuffer:
+                newdata = self.rawbuffer
+                self.rawbuffer = ''
+                # decrypt new data if we were given an object to handle that
+                if newdata and self.decrypter is not None:
+                    newdata = ''.join(map(self.decrypter, newdata))
+                # decompress newly read data if necessary
+                if newdata and self.compress_type == ZIP_DEFLATED:
+                    newdata = self.dc.decompress(newdata)
+                    self.rawbuffer = self.dc.unconsumed_tail
+                    if self.eof and len(self.rawbuffer) == 0:
+                        # we're out of raw bytes (both from the file and
+                        # the local buffer); flush just to make sure the
+                        # decompressor is done
+                        newdata += self.dc.flush()
+                        # prevent decompressor from being used again
+                        self.dc = None
+                self.readbuffer += newdata
+        # return what the user asked for
+        if size is None or len(self.readbuffer) <= size:
+            bytes = self.readbuffer
+            self.readbuffer = ''
+        else:
+            bytes = self.readbuffer[:size]
+            self.readbuffer = self.readbuffer[size:]
+        return bytes
+class ZipFile:
+    """ Class with methods to open, read, write, close, list zip files.
+    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
+    @var file: Either the path to the file, or a file-like object.
+          If it is a path, the file will be opened and closed by ZipFile.
+    @var mode: The mode can be either read "r", write "w" or append "a".
+    @var compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
+    @var allowZip64: if True ZipFile will create files with ZIP64 extensions when
+                needed, otherwise it will raise an exception when this would
+                be necessary.
+    """
+    fp = None                   # Set here since __del__ checks it
+    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
+        """Open the ZIP file with mode read "r", write "w" or append "a"."""
+        if mode not in ("r", "w", "a"):
+            raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
+        if compression == ZIP_STORED:
+            pass
+        elif compression == ZIP_DEFLATED:
+            if not zlib:
+                raise RuntimeError,\
+                      "Compression requires the (missing) zlib module"
+        else:
+            raise RuntimeError, "That compression method is not supported"
+        self._allowZip64 = allowZip64
+        self._didModify = False
+        self.debug = 0  # Level of printing: 0 through 3
+        self.NameToInfo = {}    # Find file info given name
+        self.filelist = []      # List of ZipInfo instances for archive
+        self.compression = compression  # Method of compression
+        self.mode = key = mode.replace('b', '')[0]
+        self.pwd = None
+        # Check if we were passed a file-like object
+        if isinstance(file, basestring):
+            self._filePassed = 0
+            self.filename = file
+            modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
+            try:
+                self.fp = open(file, modeDict[mode])
+            except IOError:
+                if mode == 'a':
+                    mode = key = 'w'
+                    self.fp = open(file, modeDict[mode])
+                else:
+                    raise
+        else:
+            self._filePassed = 1
+            self.fp = file
+            self.filename = getattr(file, 'name', None)
+        if key == 'r':
+            self._GetContents()
+        elif key == 'w':
+            pass
+        elif key == 'a':
+            try:                        # See if file is a zip file
+                self._RealGetContents()
+                # seek to start of directory and overwrite
+                self.fp.seek(self.start_dir, 0)
+            except BadZipfile:          # file is not a zip file, just append
+                self.fp.seek(0, 2)
+        else:
+            if not self._filePassed:
+                self.fp.close()
+                self.fp = None
+            raise RuntimeError, 'Mode must be "r", "w" or "a"'
+    def _GetContents(self):
+        """Read the directory, making sure we close the file if the format
+        is bad."""
+        try:
+            self._RealGetContents()
+        except BadZipfile:
+            if not self._filePassed:
+                self.fp.close()
+                self.fp = None
+            raise
+    def _RealGetContents(self):
+        """Read in the table of contents for the ZIP file."""
+        fp = self.fp
+        endrec = _EndRecData(fp)
+        if not endrec:
+            raise BadZipfile, "File is not a zip file"
+        if self.debug > 1:
+            print endrec
+        size_cd = endrec[5]             # bytes in central directory
+        offset_cd = endrec[6]   # offset of central directory
+        self.comment = endrec[8]        # archive comment
+        # endrec[9] is the offset of the "End of Central Dir" record
+        if endrec[9] > ZIP64_LIMIT:
+            x = endrec[9] - size_cd - 56 - 20
+        else:
+            x = endrec[9] - size_cd
+        # "concat" is zero, unless zip was concatenated to another file
+        concat = x - offset_cd
+        if self.debug > 2:
+            print "given, inferred, offset", offset_cd, x, concat
+        # self.start_dir:  Position of start of central directory
+        self.start_dir = offset_cd + concat
+        fp.seek(self.start_dir, 0)
+        data = fp.read(size_cd)
+        fp = cStringIO.StringIO(data)
+        total = 0
+        while total < size_cd:
+            centdir = fp.read(46)
+            total = total + 46
+            if centdir[0:4] != stringCentralDir:
+                raise BadZipfile, "Bad magic number for central directory"
+            centdir = struct.unpack(structCentralDir, centdir)
+            if self.debug > 2:
+                print centdir
+            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
+            # Create ZipInfo instance to store file information
+            x = ZipInfo(filename)
+            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
+            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
+            total = (total + centdir[_CD_FILENAME_LENGTH]
+                     + centdir[_CD_EXTRA_FIELD_LENGTH]
+                     + centdir[_CD_COMMENT_LENGTH])
+            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
+            (x.create_version, x.create_system, x.extract_version, x.reserved,
+                x.flag_bits, x.compress_type, t, d,
+                x.CRC, x.compress_size, x.file_size) = centdir[1:12]
+            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
+            # Convert date/time code to (year, month, day, hour, min, sec)
+            x._raw_time = t
+            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
+                                     t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
+            x._decodeExtra()
+            x.header_offset = x.header_offset + concat
+            self.filelist.append(x)
+            self.NameToInfo[x.filename] = x
+            if self.debug > 2:
+                print "total", total
+    def namelist(self):
+        """Return a list of file names in the archive."""
+        l = []
+        for data in self.filelist:
+            l.append(data.filename)
+        return l
+    def infolist(self):
+        """Return a list of class ZipInfo instances for files in the
+        archive."""
+        return self.filelist
+    def printdir(self):
+        """Print a table of contents for the zip file."""
+        print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size")
+        for zinfo in self.filelist:
+            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
+            print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
+    def testzip(self):
+        """Read all the files and check the CRC."""
+        for zinfo in self.filelist:
+            try:
+                self.read(zinfo.filename)       # Check CRC-32
+            except BadZipfile:
+                return zinfo.filename
+    def getinfo(self, name):
+        """Return the instance of ZipInfo given 'name'."""
+        info = self.NameToInfo.get(name)
+        if info is None:
+            raise KeyError(
+                'There is no item named %r in the archive' % name)
+        return info
+    def setpassword(self, pwd):
+        """Set default password for encrypted files."""
+        self.pwd = pwd
+    def read(self, name, pwd=None):
+        """Return file bytes (as a string) for name."""
+        return self.open(name, "r", pwd).read()
+    def open(self, name, mode="r", pwd=None):
+        """Return file-like object for 'name'."""
+        if mode not in ("r", "U", "rU"):
+            raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
+        if not self.fp:
+            raise RuntimeError, \
+                  "Attempt to read ZIP archive that was already closed"
+        # Only open a new file for instances where we were not
+        # given a file object in the constructor
+        if self._filePassed:
+            zef_file = self.fp
+        else:
+            zef_file = open(self.filename, 'rb')
+        # Get info object for name
+        zinfo = self.getinfo(name)
+        filepos = zef_file.tell()
+        zef_file.seek(zinfo.header_offset, 0)
+        # Skip the file header:
+        fheader = zef_file.read(30)
+        if fheader[0:4] != stringFileHeader:
+            raise BadZipfile, "Bad magic number for file header"
+        fheader = struct.unpack(structFileHeader, fheader)
+        fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
+        if fheader[_FH_EXTRA_FIELD_LENGTH]:
+            zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
+        if fname != zinfo.orig_filename:
+            raise BadZipfile, \
+                      'File name in directory "%s" and header "%s" differ.' % (
+                          zinfo.orig_filename, fname)
+        # check for encrypted flag & handle password
+        is_encrypted = zinfo.flag_bits & 0x1
+        zd = None
+        if is_encrypted:
+            if not pwd:
+                pwd = self.pwd
+            if not pwd:
+                raise RuntimeError, "File %s is encrypted, " \
+                      "password required for extraction" % name
+            zd = _ZipDecrypter(pwd)
+            # The first 12 bytes in the cypher stream is an encryption header
+            #  used to strengthen the algorithm. The first 11 bytes are
+            #  completely random, while the 12th contains the MSB of the CRC,
+            #  or the MSB of the file time depending on the header type
+            #  and is used to check the correctness of the password.
+            bytes = zef_file.read(12)
+            h = map(zd, bytes[0:12])
+            if zinfo.flag_bits & 0x8:
+                # compare against the file type from extended local headers
+                check_byte = (zinfo._raw_time >> 8) & 0xff
+            else:
+                # compare against the CRC otherwise
+                check_byte = (zinfo.CRC >> 24) & 0xff
+            if ord(h[11]) != check_byte:
+                raise RuntimeError("Bad password for file", name)
+        # build and return a ZipExtFile
+        if zd is None:
+            zef = ZipExtFile(zef_file, zinfo)
+        else:
+            zef = ZipExtFile(zef_file, zinfo, zd)
+        # set universal newlines on ZipExtFile if necessary
+        if "U" in mode:
+            zef.set_univ_newlines(True)
+        return zef
+    def extract(self, member, path=None, pwd=None):
+        """Extract a member from the archive to the current working directory,
+           using its full name. Its file information is extracted as accurately
+           as possible. `member' may be a filename or a ZipInfo object. You can
+           specify a different directory using `path'.
+        """
+        if not isinstance(member, ZipInfo):
+            member = self.getinfo(member)
+        if path is None:
+            path = os.getcwd()
+        return self._extract_member(member, path, pwd)
+    def extractall(self, path=None, members=None, pwd=None):
+        """Extract all members from the archive to the current working
+           directory. `path' specifies a different directory to extract to.
+           `members' is optional and must be a subset of the list returned
+           by namelist().
+        """
+        if members is None:
+            members = self.namelist()
+        for zipinfo in members:
+            self.extract(zipinfo, path, pwd)
+    def _extract_member(self, member, targetpath, pwd):
+        """Extract the ZipInfo object 'member' to a physical
+           file on the path targetpath.
+        """
+        # build the destination pathname, replacing
+        # forward slashes to platform specific separators.
+        if targetpath[-1:] == "/":
+            targetpath = targetpath[:-1]
+        # don't include leading "/" from file name if present
+        if os.path.isabs(member.filename):
+            targetpath = os.path.join(targetpath, member.filename[1:])
+        else:
+            targetpath = os.path.join(targetpath, member.filename)
+        targetpath = os.path.normpath(targetpath)
+        # Create all upper directories if necessary.
+        upperdirs = os.path.dirname(targetpath)
+        if upperdirs and not os.path.exists(upperdirs):
+            os.makedirs(upperdirs)
+        source = self.open(member.filename, pwd=pwd)
+        target = file(targetpath, "wb")
+        shutil.copyfileobj(source, target)
+        source.close()
+        target.close()
+        return targetpath
+    def _writecheck(self, zinfo):
+        """Check for errors before writing a file to the archive."""
+        if zinfo.filename in self.NameToInfo:
+            if self.debug:      # Warning for duplicate names
+                print "Duplicate name:", zinfo.filename
+        if self.mode not in ("w", "a"):
+            raise RuntimeError, 'write() requires mode "w" or "a"'
+        if not self.fp:
+            raise RuntimeError, \
+                  "Attempt to write ZIP archive that was already closed"
+        if zinfo.compress_type == ZIP_DEFLATED and not zlib:
+            raise RuntimeError, \
+                  "Compression requires the (missing) zlib module"
+        if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
+            raise RuntimeError, \
+                  "That compression method is not supported"
+        if zinfo.file_size > ZIP64_LIMIT:
+            if not self._allowZip64:
+                raise LargeZipFile("Filesize would require ZIP64 extensions")
+        if zinfo.header_offset > ZIP64_LIMIT:
+            if not self._allowZip64:
+                raise LargeZipFile("Zipfile size would require ZIP64 extensions")
+    def write(self, filename, arcname=None, compress_type=None):
+        """Put the bytes from filename into the archive under the name
+        arcname."""
+        if not self.fp:
+            raise RuntimeError(
+                  "Attempt to write to ZIP archive that was already closed")
+        st = os.stat(filename)
+        mtime = time.localtime(st.st_mtime)
+        date_time = mtime[0:6]
+        # Create ZipInfo instance to store file information
+        if arcname is None:
+            arcname = filename
+        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
+        while arcname[0] in (os.sep, os.altsep):
+            arcname = arcname[1:]
+        zinfo = ZipInfo(arcname, date_time)
+        zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
+        if compress_type is None:
+            zinfo.compress_type = self.compression
+        else:
+            zinfo.compress_type = compress_type
+        zinfo.file_size = st.st_size
+        zinfo.flag_bits = 0x00
+        zinfo.header_offset = self.fp.tell()    # Start of header bytes
+        self._writecheck(zinfo)
+        self._didModify = True
+        fp = open(filename, "rb")
+        # Must overwrite CRC and sizes with correct data later
+        zinfo.CRC = CRC = 0
+        zinfo.compress_size = compress_size = 0
+        zinfo.file_size = file_size = 0
+        self.fp.write(zinfo.FileHeader())
+        if zinfo.compress_type == ZIP_DEFLATED:
+            cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
+                 zlib.DEFLATED, -15)
+        else:
+            cmpr = None
+        while 1:
+            buf = fp.read(1024 * 8)
+            if not buf:
+                break
+            file_size = file_size + len(buf)
+            CRC = crc32(buf, CRC) & 0xffffffff
+            if cmpr:
+                buf = cmpr.compress(buf)
+                compress_size = compress_size + len(buf)
+            self.fp.write(buf)
+        fp.close()
+        if cmpr:
+            buf = cmpr.flush()
+            compress_size = compress_size + len(buf)
+            self.fp.write(buf)
+            zinfo.compress_size = compress_size
+        else:
+            zinfo.compress_size = file_size
+        zinfo.CRC = CRC
+        zinfo.file_size = file_size
+        # Seek backwards and write CRC and file sizes
+        position = self.fp.tell()       # Preserve current position in file
+        self.fp.seek(zinfo.header_offset + 14, 0)
+        self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
+              zinfo.file_size))
+        self.fp.seek(position, 0)
+        self.filelist.append(zinfo)
+        self.NameToInfo[zinfo.filename] = zinfo
+    def writestr(self, zinfo_or_arcname, bytes):
+        """Write a file into the archive.  The contents is the string
+        'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or
+        the name of the file in the archive."""
+        if not isinstance(zinfo_or_arcname, ZipInfo):
+            zinfo = ZipInfo(filename=zinfo_or_arcname,
+                            date_time=time.localtime(time.time())[:6])
+            zinfo.compress_type = self.compression
+        else:
+            zinfo = zinfo_or_arcname
+        if not self.fp:
+            raise RuntimeError(
+                  "Attempt to write to ZIP archive that was already closed")
+        zinfo.file_size = len(bytes)            # Uncompressed size
+        zinfo.header_offset = self.fp.tell()    # Start of header bytes
+        self._writecheck(zinfo)
+        self._didModify = True
+        zinfo.CRC = crc32(bytes) & 0xffffffff       # CRC-32 checksum
+        if zinfo.compress_type == ZIP_DEFLATED:
+            co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
+                 zlib.DEFLATED, -15)
+            bytes = co.compress(bytes) + co.flush()
+            zinfo.compress_size = len(bytes)    # Compressed size
+        else:
+            zinfo.compress_size = zinfo.file_size
+        zinfo.header_offset = self.fp.tell()    # Start of header bytes
+        self.fp.write(zinfo.FileHeader())
+        self.fp.write(bytes)
+        self.fp.flush()
+        if zinfo.flag_bits & 0x08:
+            # Write CRC and file sizes after the file data
+            self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
+                  zinfo.file_size))
+        self.filelist.append(zinfo)
+        self.NameToInfo[zinfo.filename] = zinfo
+    def __del__(self):
+        """Call the "close()" method in case the user forgot."""
+        self.close()
+    def close(self):
+        """Close the file, and for mode "w" and "a" write the ending
+        records."""
+        if self.fp is None:
+            return
+        if self.mode in ("w", "a") and self._didModify: # write ending records
+            count = 0
+            pos1 = self.fp.tell()
+            for zinfo in self.filelist:         # write central directory
+                count = count + 1
+                dt = zinfo.date_time
+                dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
+                dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
+                extra = []
+                if zinfo.file_size > ZIP64_LIMIT \
+                        or zinfo.compress_size > ZIP64_LIMIT:
+                    extra.append(zinfo.file_size)
+                    extra.append(zinfo.compress_size)
+                    file_size = 0xffffffff #-1
+                    compress_size = 0xffffffff #-1
+                else:
+                    file_size = zinfo.file_size
+                    compress_size = zinfo.compress_size
+                if zinfo.header_offset > ZIP64_LIMIT:
+                    extra.append(zinfo.header_offset)
+                    header_offset = 0xffffffffL  # -1 32 bit
+                else:
+                    header_offset = zinfo.header_offset
+                extra_data = zinfo.extra
+                if extra:
+                    # Append a ZIP64 field to the extra's
+                    extra_data = struct.pack(
+                            '<HH' + 'Q'*len(extra),
+                            1, 8*len(extra), *extra) + extra_data
+                    extract_version = max(45, zinfo.extract_version)
+                    create_version = max(45, zinfo.create_version)
+                else:
+                    extract_version = zinfo.extract_version
+                    create_version = zinfo.create_version
+                try:
+                    centdir = struct.pack(structCentralDir,
+                     stringCentralDir, create_version,
+                     zinfo.create_system, extract_version, zinfo.reserved,
+                     zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
+                     zinfo.CRC, compress_size, file_size,
+                     len(zinfo.filename), len(extra_data), len(zinfo.comment),
+                     0, zinfo.internal_attr, zinfo.external_attr,
+                     header_offset)
+                except DeprecationWarning:
+                    print >>sys.stderr, (structCentralDir,
+                     stringCentralDir, create_version,
+                     zinfo.create_system, extract_version, zinfo.reserved,
+                     zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
+                     zinfo.CRC, compress_size, file_size,
+                     len(zinfo.filename), len(extra_data), len(zinfo.comment),
+                     0, zinfo.internal_attr, zinfo.external_attr,
+                     header_offset)
+                    raise
+                self.fp.write(centdir)
+                self.fp.write(zinfo.filename)
+                self.fp.write(extra_data)
+                self.fp.write(zinfo.comment)
+            pos2 = self.fp.tell()
+            # Write end-of-zip-archive record
+            if pos1 > ZIP64_LIMIT:
+                # Need to write the ZIP64 end-of-archive records
+                zip64endrec = struct.pack(
+                        structEndArchive64, stringEndArchive64,
+                        44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
+                self.fp.write(zip64endrec)
+                zip64locrec = struct.pack(
+                        structEndArchive64Locator,
+                        stringEndArchive64Locator, 0, pos2, 1)
+                self.fp.write(zip64locrec)
+                endrec = struct.pack(structEndArchive, stringEndArchive,
+                            0, 0, count, count, pos2 - pos1, 0xffffffffL, 0)
+                self.fp.write(endrec)
+            else:
+                endrec = struct.pack(structEndArchive, stringEndArchive,
+                         0, 0, count, count, pos2 - pos1, pos1, 0)
+                self.fp.write(endrec)
+            self.fp.flush()
+        if not self._filePassed:
+            self.fp.close()
+        self.fp = None
+class PyZipFile(ZipFile):
+    """Class to create ZIP archives with Python library files and packages."""
+    def writepy(self, pathname, basename = ""):
+        """Add all files from "pathname" to the ZIP archive.
+        If pathname is a package directory, search the directory and
+        all package subdirectories recursively for all *.py and enter
+        the modules into the archive.  If pathname is a plain
+        directory, listdir *.py and enter all modules.  Else, pathname
+        must be a Python *.py file and the module will be put into the
+        archive.  Added modules are always module.pyo or module.pyc.
+        This method will compile the module.py into module.pyc if
+        necessary.
+        """
+        dir, name = os.path.split(pathname)
+        if os.path.isdir(pathname):
+            initname = os.path.join(pathname, "__init__.py")
+            if os.path.isfile(initname):
+                # This is a package directory, add it
+                if basename:
+                    basename = "%s/%s" % (basename, name)
+                else:
+                    basename = name
+                if self.debug:
+                    print "Adding package in", pathname, "as", basename
+                fname, arcname = self._get_codename(initname[0:-3], basename)
+                if self.debug:
+                    print "Adding", arcname
+                self.write(fname, arcname)
+                dirlist = os.listdir(pathname)
+                dirlist.remove("__init__.py")
+                # Add all *.py files and package subdirectories
+                for filename in dirlist:
+                    path = os.path.join(pathname, filename)
+                    root, ext = os.path.splitext(filename)
+                    if os.path.isdir(path):
+                        if os.path.isfile(os.path.join(path, "__init__.py")):
+                            # This is a package directory, add it
+                            self.writepy(path, basename)  # Recursive call
+                    elif ext == ".py":
+                        fname, arcname = self._get_codename(path[0:-3],
+                                         basename)
+                        if self.debug:
+                            print "Adding", arcname
+                        self.write(fname, arcname)
+            else:
+                # This is NOT a package directory, add its files at top level
+                if self.debug:
+                    print "Adding files from directory", pathname
+                for filename in os.listdir(pathname):
+                    path = os.path.join(pathname, filename)
+                    root, ext = os.path.splitext(filename)
+                    if ext == ".py":
+                        fname, arcname = self._get_codename(path[0:-3],
+                                         basename)
+                        if self.debug:
+                            print "Adding", arcname
+                        self.write(fname, arcname)
+        else:
+            if pathname[-3:] != ".py":
+                raise RuntimeError, \
+                      'Files added with writepy() must end with ".py"'
+            fname, arcname = self._get_codename(pathname[0:-3], basename)
+            if self.debug:
+                print "Adding file", arcname
+            self.write(fname, arcname)
+    def _get_codename(self, pathname, basename):
+        """Return (filename, archivename) for the path.
+        Given a module name path, return the correct file path and
+        archive name, compiling if necessary.  For example, given
+        /python/lib/string, return (/python/lib/string.pyc, string).
+        """
+        file_py  = pathname + ".py"
+        file_pyc = pathname + ".pyc"
+        file_pyo = pathname + ".pyo"
+        if os.path.isfile(file_pyo) and \
+                            os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
+            fname = file_pyo    # Use .pyo file
+        elif not os.path.isfile(file_pyc) or \
+             os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
+            import py_compile
+            if self.debug:
+                print "Compiling", file_py
+            try:
+                py_compile.compile(file_py, file_pyc, None, True)
+            except py_compile.PyCompileError,err:
+                print err.msg
+            fname = file_pyc
+        else:
+            fname = file_pyc
+        archivename = os.path.split(fname)[1]
+        if basename:
+            archivename = "%s/%s" % (basename, archivename)
+        return (fname, archivename)
+def main(args = None):
+    import textwrap
+    USAGE=textwrap.dedent("""\
+        Usage:
+            zipfile.py -l zipfile.zip        # Show listing of a zipfile
+            zipfile.py -t zipfile.zip        # Test if a zipfile is valid
+            zipfile.py -e zipfile.zip target # Extract zipfile into target dir
+            zipfile.py -c zipfile.zip src ... # Create zipfile from sources
+        """)
+    if args is None:
+        args = sys.argv[1:]
+    if not args or args[0] not in ('-l', '-c', '-e', '-t'):
+        print USAGE
+        sys.exit(1)
+    if args[0] == '-l':
+        if len(args) != 2:
+            print USAGE
+            sys.exit(1)
+        zf = ZipFile(args[1], 'r')
+        zf.printdir()
+        zf.close()
+    elif args[0] == '-t':
+        if len(args) != 2:
+            print USAGE
+            sys.exit(1)
+        zf = ZipFile(args[1], 'r')
+        zf.testzip()
+        print "Done testing"
+    elif args[0] == '-e':
+        if len(args) != 3:
+            print USAGE
+            sys.exit(1)
+        zf = ZipFile(args[1], 'r')
+        out = args[2]
+        for path in zf.namelist():
+            if path.startswith('./'):
+                tgt = os.path.join(out, path[2:])
+            else:
+                tgt = os.path.join(out, path)
+            tgtdir = os.path.dirname(tgt)
+            if not os.path.exists(tgtdir):
+                os.makedirs(tgtdir)
+            fp = open(tgt, 'wb')
+            fp.write(zf.read(path))
+            fp.close()
+        zf.close()
+    elif args[0] == '-c':
+        if len(args) < 3:
+            print USAGE
+            sys.exit(1)
+        def addToZip(zf, path, zippath):
+            if os.path.isfile(path):
+                zf.write(path, zippath, ZIP_DEFLATED)
+            elif os.path.isdir(path):
+                for nm in os.listdir(path):
+                    addToZip(zf,
+                            os.path.join(path, nm), os.path.join(zippath, nm))
+            # else: ignore
+        zf = ZipFile(args[1], 'w', allowZip64=True)
+        for src in args[2:]:
+            addToZip(zf, src, os.path.basename(src))
+        zf.close()
+if __name__ == "__main__":
+    main()
diff --git a/src/obiuniq.py b/src/obiuniq.py
new file mode 100755
index 0000000..63c41c7
--- /dev/null
+++ b/src/obiuniq.py
@@ -0,0 +1,107 @@
+:py:mod:`obiuniq`: groups and dereplicates sequences  
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+The :py:mod:`obiuniq` command is in some way analog to the standard Unix ``uniq -c`` command.
+Instead of working text line by text line as the standard Unix tool, the processing is done on 
+sequence records. 
+A sequence record is a complex object composed of an identifier, a set of 
+attributes (``key=value``), a definition, and the sequence itself. 
+The :py:mod:`obiuniq` command groups together sequence records. Then, for each group, a sequence 
+record is printed.
+A group is defined by the sequence and optionally by the values of a set of attributes 
+specified with the ``-c`` option.
+As the identifier, the set of attributes (``key=value``) and the definition of the sequence 
+records that are grouped together may be different, two options (``-m`` and ``-i``) 
+allow refining how these parts of the records are reported.
+    - By default, only attributes with identical values 
+      within a group of sequence records are kept.
+    - A ``count`` attribute is set to the total number of sequence records for each group.
+    - For each attribute specified by the ``-m`` option, a new attribute whose key is prefixed 
+      by ``merged_`` is created. These new attributes contain the number of times each value
+      occurs within the group of sequence records. 
+:py:mod:`obiuniq` and taxonomic information
+When a taxonomy is loaded (``-d`` or ``-t`` options), the ``merged_taxid`` 
+attribute is created and records the number of times each *taxid* has been found in the 
+group (it may be empty if no sequence record has a *taxid* attribute in the group). 
+In addition, a set of taxonomy-related attributes are generated for each group having at 
+least one sequence record with a *taxid* attribute. The *taxid* attribute of the sequence
+group is set to the last common ancestor of the *taxids* of the group. All other taxonomy-related 
+attributes created (``species``, ``genus``, ``family``, ``species_name``, ``genus_name``, 
+``family_name``, ``rank``, ``scientific_name``) give information on the last common ancestor.
+from obitools.format.options import addInputFormatOption
+from obitools.fasta import formatFasta
+from obitools.utils.bioseq import uniqSequence,uniqPrefixSequence
+from obitools.options import getOptionManager
+from obitools.options.taxonomyfilter import addTaxonomyDBOptions
+from obitools.options.taxonomyfilter import loadTaxonomyDatabase
+def addUniqOptions(optionManager):
+    group = optionManager.add_option_group('Obiuniq specific options')
+    group.add_option('-m','--merge',
+                             action="append", dest="merge",
+                             metavar="<TAG NAME>",
+                             type="string",
+                             default=[],
+                             help="Attributes to merge")
+    group.add_option('-i','--merge-ids',
+                             action="store_true", dest="mergeids",
+                             default=False,
+                             help="Add the merged key with all ids of merged sequences")
+    group.add_option('-c','--category-attribute',
+                             action="append", dest="categories",
+                             metavar="<Attribute Name>",
+                             default=[],
+                             help="Add one attribute to the list of attributes "
+                             "used to group sequences before dereplication "
+                             "(option can be used several times)")
+    group.add_option('-p','--prefix',
+                             action="store_true", dest="prefix",
+                             default=False,
+                             help="Dereplication is done based on prefix matching: "
+                                  "(i) The shortest sequence of each group is a prefix "
+                                  "of any sequence of its group (ii) Two shortest "
+                                  "sequences of any couple of groups are not the"
+                                  "prefix of the other one")
+if __name__=='__main__':
+#    root.setLevel(DEBUG)
+    optionParser = getOptionManager([addUniqOptions,addTaxonomyDBOptions,addInputFormatOption],progdoc=__doc__)
+    (options, entries) = optionParser()
+    taxonomy=loadTaxonomyDatabase(options)
+    if options.prefix:
+        usm = uniqPrefixSequence
+    else:
+        usm= uniqSequence
+    uniqSeq=usm(entries,taxonomy,options.merge,options.mergeids,options.categories)
+    for seq in uniqSeq:         
+        print formatFasta(seq) 
diff --git a/src/oligotag.py b/src/oligotag.py
new file mode 100755
index 0000000..16a56bb
--- /dev/null
+++ b/src/oligotag.py
@@ -0,0 +1,106 @@
+:py:mod:`oligotag`: Designs a set of oligonucleotides with specified properties
+.. codeauthor:: Eric Coissac <eric.coissac at metabarcoding.org>
+:py:mod:`oligotag` designs a set of oligonucleotides that can be used for tagging a set 
+of samples during PCR reactions, by adding the oligonucleotides on the 5' end of the primers.
+Many options allow designing a set of oligonucleotides according to specified properties.
+import sys
+from obitools.options import getOptionManager
+from obitools import word
+from obitools.word.options import addOligoOptions
+from obitools.word.options import dnaWordIterator
+from obitools.word import wordDist,decodeWord
+from obitools.graph.algorithms.clique import cliqueIterator
+from obitools.graph import Graph
+def addOligoTagOptions(optionManager):
+#    optionManager.add_option('-E','--bad-pairs',
+#                             action="store", dest="badPairs",
+#                             metavar="<filename>",
+#                             type="str",
+#                             help="filename containing a list of oligonucleotide")
+    optionManager.add_option('-T','--timeout',
+                             action="store", dest="timeout",
+                             metavar="<seconde>",
+                             type="int",
+                             default=None,
+                             help="timeout to identify a clique of good size")
+def edgeIterator(words,distmin=1,error=None):
+    words=[x for x in words]
+    for i in xrange(len(words)):
+        for j in xrange(i+1,len(words)):
+            D = wordDist(words[i], words[j])
+            if D>=distmin:
+                yield words[i], words[j]
+            elif error is not None:
+                print >>error,words[i], words[j],D
+def readData(edges):
+    graph = Graph()
+    for x,y in edges:
+        graph.addEdge(x, y)
+    return graph
+if __name__=='__main__':
+    optionParser = getOptionManager([addOligoOptions,addOligoTagOptions],
+                                    )
+    (options, entries) = optionParser()
+#    if options.badPairs is not None:
+#        error = open(options.badPairs,'w')
+#    else:
+    error = None
+    goodOligo = dnaWordIterator(options)
+    print >>sys.stderr,"Build good words graph..."
+    graph= readData(edgeIterator(goodOligo,options.oligoDist,error))
+    print >>sys.stderr,"Initial  graph size : %d  edge count : %d" % (len(graph),graph.edgeCount())
+    print >>sys.stderr
+    ci = cliqueIterator(graph, options.familySize,timeout=options.timeout)
+    try:
+        result = ci.next()
+        print >>sys.stderr
+        for word in result:
+            print decodeWord(graph.getNode(index=word).getLabel(),options.oligoSize)
+    except StopIteration:
+        print >>sys.stderr
+        print >>sys.stderr,"-------------------------------------------"
+        print >>sys.stderr
+        print >>sys.stderr,"No solutions for this parametter set"        
+        print >>sys.stderr
+        print >>sys.stderr,"-------------------------------------------"
+        print >>sys.stderr

