[med-svn] [python-csb] 01/03: Imported Upstream version 1.2.1+dfsg
Tomás Di Domenico
tdido-guest at alioth.debian.org
Thu Oct 10 15:13:34 UTC 2013
This is an automated email from the git hooks/post-receive script.
tdido-guest pushed a commit to branch master
in repository python-csb.
commit de1f8cb0ec5dbe0324b17bded99443029d79d51c
Author: Tomás Di Domenico <tdido at tdido.com.ar>
Date: Wed Oct 9 20:09:05 2013 +0100
Imported Upstream version 1.2.1+dfsg
---
PKG-INFO | 2 +-
csb/__init__.py | 2 +-
csb/apps/__init__.py | 10 +-
csb/apps/buildhmm.py | 63 +-
csb/apps/csfrag.py | 542 ++++++++
csb/apps/hhfrag.py | 302 ++++-
csb/apps/hhsearch.py | 5 +-
csb/apps/precision.py | 30 +-
csb/bio/fragments/__init__.py | 207 ++-
csb/bio/fragments/rosetta.py | 4 +-
csb/bio/io/clans.py | 5 +-
csb/bio/io/cs.py | 206 +++
csb/bio/io/fasta.py | 16 +-
csb/bio/io/hhpred.py | 2 +-
csb/bio/io/noe.py | 269 ++++
csb/bio/io/wwpdb.py | 1096 +++++++++++----
csb/bio/nmr/__init__.py | 1105 ++++++++++++++-
csb/bio/nmr/resources/AtomConnectivity.xml | 812 +++++++++++
csb/bio/sequence/__init__.py | 21 +-
csb/bio/sequence/alignment.py | 617 +++++++++
csb/bio/structure/__init__.py | 52 +-
csb/build.py | 28 +-
csb/io/__init__.py | 5 +
csb/io/tsv.py | 38 +-
csb/numeric/__init__.py | 16 +-
csb/numeric/integrators.py | 10 +-
csb/statistics/__init__.py | 4 +-
csb/statistics/samplers/__init__.py | 12 +-
csb/statistics/samplers/mc/__init__.py | 838 +-----------
csb/statistics/samplers/mc/multichain.py | 1502 +++++++++++++++++++--
csb/statistics/samplers/mc/neqsteppropagator.py | 1239 +++++++++++++++++
csb/statistics/samplers/mc/propagators.py | 55 +-
csb/statistics/samplers/mc/singlechain.py | 311 ++++-
csb/test/__init__.py | 2 +-
csb/test/cases/bio/fragments/__init__.py | 11 +-
csb/test/cases/bio/hmm/__init__.py | 11 +-
csb/test/cases/bio/io/clans/__init__.py | 24 +-
csb/test/cases/bio/io/cs/__init__.py | 87 ++
csb/test/cases/bio/io/fasta/__init__.py | 4 +-
csb/test/cases/bio/io/noe/__init__.py | 148 ++
csb/test/cases/bio/io/wwpdb/__init__.py | 194 ++-
csb/test/cases/bio/nmr/__init__.py | 348 ++++-
csb/test/cases/bio/sequence/alignment/__init__.py | 196 +++
csb/test/cases/bio/structure/__init__.py | 29 +-
csb/test/cases/core/__init__.py | 22 +-
csb/test/cases/io/__init__.py | 87 +-
csb/test/cases/numeric/__init__.py | 59 +-
csb/test/cases/statistics/samplers/__init__.py | 728 +++++++++-
csb/test/data/1d3z.regular.pdb | 342 ++---
csb/test/data/2l01.v2.str | 31 +
csb/test/data/2l01.v3.str | 45 +
csb/test/data/Sparky.peaks | 5 +
csb/test/data/Xeasy1.peaks | 9 +
csb/test/data/Xeasy2.peaks | 8 +
csb/test/data/csb.tsv | 2 +-
csb/test/data/mapping.pdb | 12 +
csb/test/data/mapping2.pdb | 10 +
csb/test/data/mapping3.pdb | 9 +
csb/test/data/modified.pdb | 16 +
csb/test/data/modified2.pdb | 16 +
csb/test/data/out.clans | 14 +-
61 files changed, 10224 insertions(+), 1671 deletions(-)
diff --git a/PKG-INFO b/PKG-INFO
index 9303b87..8aca75a 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: csb
-Version: 1.1.1
+Version: 1.2.1
Summary: Computational Structural Biology Toolbox
Home-page: http://csb.codeplex.com
Author: Michael Habeck et al.
diff --git a/csb/__init__.py b/csb/__init__.py
index d6434c6..ff4c846 100644
--- a/csb/__init__.py
+++ b/csb/__init__.py
@@ -185,7 +185,7 @@ CSB is open source and distributed under OSI-approved MIT license::
"""
-__version__ = '1.1.1.523'
+__version__ = '1.2.1.603'
class Version(object):
diff --git a/csb/apps/__init__.py b/csb/apps/__init__.py
index 5360565..3a5ccc0 100644
--- a/csb/apps/__init__.py
+++ b/csb/apps/__init__.py
@@ -88,6 +88,8 @@ class AppExit(Exception):
self.message = message
self.code = code
self.usage = usage
+
+ super(AppExit, self).__init__(message, code, usage)
class Application(object):
"""
@@ -98,9 +100,6 @@ class Application(object):
"""
__metaclass__ = ABCMeta
- USAGE = ''
- HELP = ''
-
def __init__(self, args, log=sys.stdout):
self.__args = None
@@ -329,7 +328,7 @@ class ArgHandler(object):
self._parser = argparse.ArgumentParser(prog=program, description=description)
- def _add(self, kind, name, shortname, *a, **k):
+ def _add(self, kind, name, shortname, help="", *a, **k):
args = []
kargs = dict(k)
@@ -353,7 +352,8 @@ class ArgHandler(object):
args.append(ArgHandler.LONG_PREFIX + name)
assert len(args) in (1, 2)
- args.extend(a)
+ args.extend(a)
+ kargs["help"] = help.replace("%", "%%") # workaround for a bug in argparse
self.parser.add_argument(*args, **kargs)
diff --git a/csb/apps/buildhmm.py b/csb/apps/buildhmm.py
index 74aafbf..352c043 100644
--- a/csb/apps/buildhmm.py
+++ b/csb/apps/buildhmm.py
@@ -1,5 +1,5 @@
"""
-Build an HMM from a FASTA sequence. This program is a proxy to buildali.pl
+Build an HMM from a FASTA sequence. This program is a proxy to hhblits/addss.pl
and hhmake from the HHpred package.
@note: assuming you have the full HHpred package installed and configured.
@@ -40,7 +40,9 @@ class AppRunner(csb.apps.AppRunner):
'Used for naming the output files. Also, if the input is a PDB file with '
'multiple chains, CHAIN is used to pull the required chain from the file.',
required=True)
- cmd.add_scalar_option('tk_root', 't', str, 'path to the ToolkitRoot folder in your HHpred setup', default='/ebio/abt1_toolkit/share/wye')
+ cmd.add_scalar_option('tk-root', 't', str, 'path to the ToolkitRoot folder in your HHsuite setup', default='/ebio/abt1_toolkit/share/wye')
+ cmd.add_scalar_option('database', 'd', str, 'custom HHblits database; if not defined, toolkit\'s unirpto20 will be used', default=None)
+ cmd.add_scalar_option('tk-config', 'c', str, 'path to a folder containing custom HHsuite configs (e.g. HHPaths.pm)', default='.')
cmd.add_scalar_option('cpu', None, int, 'maximum degree of parallelism', default=1)
cmd.add_boolean_option('no-ss', None, 'do not include secondary structure', default=False)
@@ -54,7 +56,7 @@ class AppRunner(csb.apps.AppRunner):
class BuildProfileApp(csb.apps.Application):
- def main(self):
+ def main(self):
if os.path.isfile(self.args.query_id + '.hhm'):
BuildProfileApp.exit('# Profile "{0}" already exists, skipping'.format(self.args.query_id),
@@ -62,11 +64,10 @@ class BuildProfileApp(csb.apps.Application):
try:
self.log('# Building profile HMM for {0}...'.format(self.args.query))
- pb = ProfileBuilder.create(self.args.query, self.args.query_id, self.args.tk_root,
+ pb = ProfileBuilder.create(self.args.query, self.args.query_id, self.args.database, self.args.tk_root, self.args.tk_config,
pseudo=not self.args.no_pseudo, ss=not self.args.no_ss, cpu=self.args.cpu)
pb.build_alignment()
-
pb.make_hmm()
if not self.args.no_calibration:
@@ -119,7 +120,10 @@ class ProfileBuilder(object):
TRANSITION_PSEUDO = '-gapb 1.0 -gapd 0.15 -gape 1.0 -gapf 0.6 -gapg 0.6 -gapi 0.6'
@staticmethod
- def create(query, target_id, tk_root, pseudo=True, ss=True, cpu=1):
+ def create(query, target_id, database, tk_root, tk_config, pseudo=True, ss=True, cpu=1):
+
+ if database is None:
+ database = os.path.join(tk_root, "databases", "hhblits", "uniprot20")
if not os.path.isfile(query):
raise BuildIOError('File not found: ' + query)
@@ -130,21 +134,28 @@ class ProfileBuilder(object):
continue
if line.startswith('>'):
- return FASTAProfileBuilder(query, target_id, tk_root, pseudo, ss, cpu)
+ return FASTAProfileBuilder(query, target_id, database, tk_root, tk_config, pseudo, ss, cpu)
elif line.startswith('HEADER') or line.startswith('ATOM'):
- return PDBProfileBuilder(query, target_id, tk_root, pseudo, ss, cpu)
+ return PDBProfileBuilder(query, target_id, database, tk_root, tk_config, pseudo, ss, cpu)
else:
raise BuildArgError('Unknown input file format')
- def __init__(self, query, target_id, tk_root, pseudo=True, ss=True, cpu=1):
+ def __init__(self, query, target_id, database, tk_root, tk_config, pseudo=True, ss=True, cpu=1):
self.tk_root = tk_root
+ self.tk_config = tk_config
+ self.hhlib = os.path.join(tk_root, "bioprogs", "hhsuite")
+
if 'TK_ROOT' not in os.environ or not os.environ['TK_ROOT']:
- os.putenv('TK_ROOT', tk_root)
-
+ os.putenv('TK_ROOT', self.tk_root)
+ if 'HHLIB' not in os.environ or not os.environ['HHLIB']:
+ os.putenv('HHLIB', self.hhlib)
+ os.environ["PATH"] += os.pathsep + os.path.join(self.hhlib, "bin")
+
self.query = query
self.accession = target_id[:-1]
self.chain = target_id[-1]
+ self.database = database
self.pseudo = bool(pseudo)
self.ss = bool(ss)
self.cpu = cpu
@@ -172,20 +183,32 @@ class ProfileBuilder(object):
def build_alignment(self):
assert self._input is not None
- program = os.path.join(self.tk_root, 'bioprogs', 'hhpred', 'buildali.pl')
-
- if not self.ss:
- noss = '-noss'
- else:
- noss = ''
- cmd = 'perl {0} {1} -cpu {2} {3}'.format(program, noss, self.cpu, self._input)
+ program = os.path.join(self.tk_root, 'bioprogs', 'hhsuite', 'bin', 'hhblits')
+
+ ali = self.target_id + '.a3m'
+ cmd = '{0} -cpu {1} -i {2} -d {3} -nodiff -oa3m {4}'.format(
+ program, self.cpu, self._input, self.database, ali)
bali = csb.io.Shell.run(cmd)
- ali = self.target_id + '.a3m'
if bali.code != 0:
raise csb.io.ProcessError(bali)
if not os.path.isfile(ali):
- raise NoOutputError(ali, bali)
+ raise NoOutputError(ali, bali)
+
+ if self.ss:
+ program2 = os.path.join(self.tk_root, 'bioprogs', 'hhsuite', 'scripts', 'addss.pl')
+
+ with csb.io.TempFile() as patch:
+ for l in open(program2):
+ if l.lstrip().startswith("use HHPaths"):
+ patch.write('use lib "{0}";\n'.format(self.tk_config))
+ patch.write(l);
+ patch.flush()
+
+ cmd2 = "perl {0} {1}".format(patch.name, ali)
+ addss = csb.io.Shell.run(cmd2)
+ if addss.code != 0:
+ raise csb.io.ProcessError(addss)
self._ali = ali
return ali
diff --git a/csb/apps/csfrag.py b/csb/apps/csfrag.py
new file mode 100644
index 0000000..bd5985d
--- /dev/null
+++ b/csb/apps/csfrag.py
@@ -0,0 +1,542 @@
+"""
+CSfrag: build a dynamic library of analogous fragments, given a list
+of assigned chemical shifts.
+"""
+
+import os
+import numpy
+import multiprocessing
+
+import csb.io
+import csb.apps
+
+from csb.bio.io.wwpdb import FileSystemStructureProvider, StructureNotFoundError, PDBParseError
+from csb.bio.nmr import RandomCoil, ChemShiftScoringModel
+from csb.bio.structure import Chain, Broken3DStructureError
+from csb.bio.fragments import ChemShiftTarget, ChemShiftAssignment, RosettaFragsetFactory
+from csb.bio.io.cs import ChemShiftReader, ChemShiftFormatError
+from csb.bio.io.fasta import SequenceParser, SequenceFormatError
+
+
+class ExitCodes(csb.apps.ExitCodes):
+
+ IO_ERROR = 2
+ INVALID_DATA = 3
+ NO_OUTPUT = 5
+
+class AppRunner(csb.apps.AppRunner):
+
+ @property
+ def target(self):
+ return CSfragApp
+
+ def command_line(self):
+
+ cmd = csb.apps.ArgHandler(self.program, __doc__)
+ cpu = multiprocessing.cpu_count()
+
+ cmd.add_scalar_option('database', 'd', str, 'PDBS25 database directory (containing PDBS25cs.scs)', required=True)
+ cmd.add_scalar_option('shifts', 's', str, 'assigned chemical shifts table (NMR STAR file fragment)', required=True)
+
+ cmd.add_scalar_option('window', 'w', int, 'sliding window size', default=8)
+ cmd.add_scalar_option('top', 't', int, 'maximum number per starting position', default=25)
+ cmd.add_scalar_option('cpu', 'c', int, 'maximum degree of parallelism', default=cpu)
+
+ cmd.add_scalar_option('verbosity', 'v', int, 'verbosity level', default=1)
+ cmd.add_scalar_option('output', 'o', str, 'output directory', default='.')
+ cmd.add_boolean_option('filtered-map', 'f', 'make an additional filtered fragment map of centroids', default=False)
+
+ cmd.add_positional_argument('QUERY', str, 'query sequence (FASTA file)')
+
+ return cmd
+
+class CSfragApp(csb.apps.Application):
+
+ def main(self):
+ if not os.path.isdir(self.args.output):
+ CSfragApp.exit('Output directory does not exist', code=ExitCodes.INVALID_DATA, usage=True)
+
+ try:
+ csf = CSfrag(self.args.QUERY, self.args.shifts, self.args.database, self.args.window, logger=self)
+ output = os.path.join(self.args.output, csf.query.accession)
+
+ frags = csf.extract_fragments(self.args.top, self.args.cpu)
+
+ if len(frags) == 0:
+ CSfragApp.exit('No fragments found!', code=ExitCodes.NO_OUTPUT)
+
+ fragmap = csf.build_fragment_map()
+ fragmap.dump(output + '.csfrags.08')
+
+ if self.args.filtered_map:
+ fragmap = csf.build_filtered_map()
+ fragmap.dump(output + '.filtered.08')
+
+ self.log('\nDONE.')
+
+ except ArgumentIOError as ae:
+ CSfragApp.exit(str(ae), code=ExitCodes.IO_ERROR)
+
+ except ArgumentError as ae:
+ CSfragApp.exit(str(ae), code=ExitCodes.INVALID_DATA, usage=True)
+
+ except ChemShiftFormatError as ce:
+ msg = "Can't parse input chemical shifts: " + str(ce)
+ CSfragApp.exit(msg, code=ExitCodes.INVALID_DATA)
+
+
+ def log(self, message, ending='\n', level=1):
+
+ if level <= self.args.verbosity:
+ super(CSfragApp, self).log(message, ending)
+
+
+class SecondaryShiftConverter(object):
+ """
+ Helper, which reads assigned shifts from NMR STAR files and calculates
+ corrected secondary shifts.
+ """
+
+ def convert(self, file, chain):
+ """
+ Compute secondary shofts.
+
+ @param file: NMR STAR path and file name
+ @type file: str
+ @param chain: the protein chain, containing the chemical shifts
+ (L{Chain.from_sequence} may be useful)
+ @type chain: L{Chain}
+
+ @return: dictionary of the form: [rank: [nucleus: sec shift]]
+ @rtype: dict
+ """
+ rc = RandomCoil.get()
+ cs = {}
+
+ for ni in ChemShiftReader().guess(file).read_file(file):
+
+ if ni.name in ChemShiftScoringModel.NUCLEI:
+ ni.shift = rc.secondary_shift(chain, ni.position, ni.name, ni.shift)
+
+ cs.setdefault(ni.position, {})
+ cs[ni.position][ni.name] = ni.shift
+
+ return cs
+
+class SecondaryShiftReader(object):
+ """
+ Reads secondary shifts from files in CSfrag format.
+ """
+
+ DB = 'pdbs25cs.scs'
+
+ def read_shifts(self, string):
+ """
+ Read secondary shifts.
+ @param string: complete secondary shift block
+ @type string: str
+
+ @return: dictionary of the form: [rank: [nucleus: sec shift]]
+ @rtype: dict
+ """
+
+ shifts = {}
+
+ for l in string.splitlines():
+
+ if l.startswith('#') or not l.strip():
+ continue
+
+ l = l.split('\t')
+ rank = int(l[0])
+
+ for n, cs in zip(ChemShiftScoringModel.NUCLEI, l[1:]):
+ if cs != '':
+ shifts.setdefault(rank, {})[n] = float(cs)
+
+ return shifts
+
+ def load_database(self, path, file=DB):
+ """
+ Read the entire PDBS25CS database.
+
+ @return: dictionary of the form: [entry ID: [rank: [nucleus: sec shift]]]
+ @rtype: dict
+ """
+
+ db = {}
+ file = os.path.join(path, file)
+
+ with open(file) as stream:
+ er = csb.io.EntryReader(stream, '#', None)
+
+ for e in er.entries():
+ entry = e[10:15]
+ db[entry] = self.read_shifts(e)
+
+ return db
+
+class ScoringHelper(object):
+
+ def __init__(self, window):
+
+ self._window = window
+ self._model = ChemShiftScoringModel()
+
+ @property
+ def window(self):
+ return self._window
+
+ def score(self, qcs, scs, qstart, qend, sstart, send):
+
+ window = self._window
+
+ if window is None:
+ window = min(qend - qstart + 1, send - sstart + 1)
+
+ off_start, off_end = self.offsets(qstart, qend, window=window)
+ qs = qstart + off_start
+ qe = qend - off_end
+ ss = sstart + off_start
+ se = send - off_end
+
+ assert qe - qs + 1 == se - ss + 1 == window
+
+ score = 0
+
+ for nucleus in ChemShiftScoringModel.NUCLEI:
+ query = []
+ subject = []
+
+ for qr, sr in zip(range(qs, qe + 1), range(ss, se + 1)):
+ try:
+ qshift = qcs[qr][nucleus]
+ sshift = scs[sr][nucleus]
+
+ if qshift is not None and sshift is not None:
+ query.append(qshift)
+ subject.append(sshift)
+
+ except KeyError:
+ continue
+
+ if query and subject:
+ deltas = numpy.array(query) - numpy.array(subject)
+ score += self._model.score(nucleus, deltas).sum()
+
+ return score
+
+ def offsets(self, start, end, window=6):
+
+ if end - start + 1 <= window:
+ return 0, 0
+
+ d1 = ((end - start + 1) - window) / 2
+ ns = start + d1
+ ne = ns + window - 1
+ d2 = end - ne
+
+ return d1, d2
+
+
+class ArgumentError(ValueError):
+ pass
+
+class ArgumentIOError(ArgumentError):
+ pass
+
+class InvalidOperationError(ValueError):
+ pass
+
+
+class CSfrag(object):
+ """
+ @param query: query FASTA sequence path and file name
+ @type query: str
+ @param cstable: file, containing the table of assigned experimental chemical shifts
+ @type cstable: str
+ @param database: path to the PDBS25 directory
+ @type database: str
+ @param logger: logging client (needs to have a C{log} method)
+ @type logger: L{Application}
+ """
+
+ def __init__(self, query, cstable, database, window=8, logger=None):
+
+ self._query = None
+ self._qcs = None
+ self._matches = None
+ self._helper = ScoringHelper(window)
+ self._database = None
+ self._window = None
+ self._app = logger
+ self._pdb = None
+
+ try:
+ fasta = SequenceParser().parse_file(query)
+ if len(fasta) != 1:
+ raise ArgumentError("The input FASTA file should contain one sequence")
+ elif fasta[0].length < 1:
+ raise ArgumentError("Zero-length query sequence")
+
+ self._query = Chain.from_sequence(fasta[0], 'A')
+ self._query.accession = fasta[0].id
+ self._qcs = SecondaryShiftConverter().convert(cstable, self._query)
+
+ if len(self._qcs) == 0:
+ raise ArgumentError("No chemical shifts read; check your input")
+
+ except IOError as io:
+ raise ArgumentIOError(str(io))
+
+ except SequenceFormatError as se:
+ raise ArgumentError("Can't parse FASTA file: {0}".format(str(se)))
+
+
+ self.database = database
+ self.window = window
+
+ @property
+ def query(self):
+ return self._query
+
+ @property
+ def database(self):
+ return self._database
+ @database.setter
+ def database(self, value):
+ database = value
+ pdbs25cs = os.path.join(value, SecondaryShiftReader.DB)
+ if not os.path.isfile(pdbs25cs):
+ raise ArgumentError('PDBS25CS not found here: ' + pdbs25cs)
+ self._database = database
+ self._pdb = FileSystemStructureProvider(database)
+
+ @property
+ def window(self):
+ return self._window
+ @window.setter
+ def window(self, value):
+ value = int(value)
+ if value < 1:
+ raise ValueError("Invalid sliding window: {0}".format(value))
+ self._window = value
+
+ def log(self, *a, **ka):
+ if self._app:
+ self._app.log(*a, **ka)
+
+ def extract_fragments(self, top=25, cpu=2):
+ """
+ Extract fragments with matching chemical shifts using a sliding window.
+
+ @param top: L{MatchTable} capacity per starting position
+ @type top: int
+ @param cpu: degree of parallelism
+ @type cpu: int
+
+ @rtype: tuple of L{ChemShiftAssignment}s
+ """
+ self.log("# Reading chemical shifts...", level=1)
+ db = SecondaryShiftReader().load_database(self.database)
+ matches = MatchTable(self.query.length, capacity=top)
+
+ slices = []
+ fragments = []
+
+ for qs in range(1, self.query.length + 1):
+ qe = qs + self.window - 1
+ if qe > self.query.length:
+ break
+
+ slices.append((qs, qe))
+
+ self.log("\n# Processing target {0}...".format(self.query.accession), level=1)
+ pool = multiprocessing.Pool(cpu)
+
+ try:
+ for subject in db:
+ tasks = []
+
+ for qs, qe in slices:
+ task = pool.apply_async(_task, [self._helper, subject, qs, qe, self._qcs, db[subject]])
+ tasks.append(task)
+
+ for task in tasks:
+ for result in task.get():
+ if result.score > ChemShiftAssignment.BIT_SCORE_THRESHOLD * self.window:
+ matches.add(result)
+
+ except KeyboardInterrupt:
+ pass
+ finally:
+ pool.terminate()
+
+ for rank in matches:
+ msg = '{0:3} {1:3} ({2:2} aa) {3:3} fragments'
+ self.log(msg.format(rank, rank + self.window - 1, self.window, len(matches[rank])),
+ level=1)
+
+
+ self.log("\n# Extracting fragments...")
+
+ for group in matches.by_source:
+ try:
+ source_id = group[0].entry_id
+ source = self._pdb.get(source_id).first_chain
+ source.compute_torsion()
+
+ for match in group:
+ try:
+ row = ' {0.entry_id:5} L{0.qs:3} {0.qe:3} {1}aa S:{0.score:5.1f}'
+ self.log(row.format(match, self.window), ending='', level=2)
+
+ fragment = ChemShiftAssignment(source=source, start=match.ss, end=match.se,
+ qstart=match.qs, qend=match.qe,
+ window=self.window, rmsd=None, score=match.score)
+ fragments.append(fragment)
+ self.log('', level=2)
+
+ except Broken3DStructureError:
+ self.log(' corrupt', level=2)
+ continue
+ except PDBParseError:
+ continue
+ except StructureNotFoundError:
+ self.log(" Warning: Template {0} is missing!".format(source_id))
+
+ self._matches = fragments
+ return tuple(fragments)
+
+ def build_fragment_map(self):
+ """
+ Build a full Rosetta fragset.
+ @rtype: L{RosettaFragmentMap}
+ """
+
+ if self._matches is None:
+ self.extract_fragments()
+
+ self.log('\n# Building fragment map...')
+
+ target = ChemShiftTarget(self.query.accession, self.query.length, self.query.residues)
+ target.assignall(self._matches)
+
+ factory = RosettaFragsetFactory()
+ return factory.make_fragset(target)
+
+ def build_filtered_map(self):
+ """
+ Build a filtered fragset of centroids.
+ @rtype: L{RosettaFragmentMap}
+ """
+
+ if self._matches is None:
+ self.extract_fragments()
+
+ self.log('\n# Building filtered map...')
+
+ target = ChemShiftTarget(self.query.accession, self.query.length, self.query.residues)
+ target.assignall(self._matches)
+
+ factory = RosettaFragsetFactory()
+ return factory.make_filtered(target, extend=False)
+
+class MatchInfo(object):
+
+ def __init__(self, entry_id, qs, qe, ss, se, score):
+
+ self.entry_id = entry_id
+ self.qs = qs
+ self.qe = qe
+ self.ss = ss
+ self.se = se
+ self.score = score
+
+ def __str__(self):
+ return '{0.qs:4} {0.qe:4} {0.ss:4} {0.se:4} {0.score:10.3f}'.format(self)
+
+ def __cmp__(self, other):
+ return cmp(self.score, other.score)
+
+class MatchTable(object):
+
+ def __init__(self, length, capacity=25):
+
+ if capacity < 1:
+ capacity = 1
+
+ self._capacity = capacity
+ self._length = length
+ self._t = {}
+
+ for i in range(1, length + 1):
+ self._t[i] = []
+
+ def add(self, m):
+
+ matches = self._t[m.qs]
+
+ if len(matches) < self._capacity:
+
+ matches.append(m)
+ matches.sort()
+
+ elif m.score > matches[-1].score:
+
+ matches.pop()
+ matches.append(m)
+ matches.sort()
+
+ def __getitem__(self, rank):
+ return tuple(self._t[rank])
+
+ def __iter__(self):
+ return iter(self._t)
+
+ @property
+ def by_source(self):
+
+ matches = {}
+
+ for rank in self:
+ for m in self[rank]:
+ if m.entry_id not in matches:
+ matches[m.entry_id] = []
+
+ matches[m.entry_id].append(m)
+
+ for entry_id in matches:
+ yield tuple(matches[entry_id])
+
+def _task(helper, subject, qs, qe, qcs, scs):
+
+ try:
+ results = []
+ slength = max(scs or [0])
+
+ for ss in range(1, slength + 1, 3):
+ se = ss + helper.window - 1
+
+ if se > slength:
+ break
+
+ score = helper.score(qcs, scs, qs, qe, ss, se)
+
+ if score is not None:
+ info = MatchInfo(subject, qs, qe, ss, se, score)
+ results.append(info)
+
+ return results
+
+ except KeyboardInterrupt:
+ return []
+
+
+
+
+if __name__ == '__main__':
+
+ args = "cs.py -v 1 -f -t 12 -d /home/ivan/Desktop/cstest/db -s /home/ivan/Desktop/cstest/t.str -o /home/ivan/Desktop/cstest /home/ivan/Desktop/cstest/t.fa".split()
+ AppRunner().run()
+
\ No newline at end of file
diff --git a/csb/apps/hhfrag.py b/csb/apps/hhfrag.py
index 456c227..51e0b86 100644
--- a/csb/apps/hhfrag.py
+++ b/csb/apps/hhfrag.py
@@ -14,7 +14,7 @@ import csb.bio.fragments
import csb.bio.fragments.rosetta as rosetta
import csb.bio.structure
-import csb.io
+import csb.io.tsv
import csb.core
@@ -44,14 +44,18 @@ class AppRunner(csb.apps.AppRunner):
cmd.add_scalar_option('max', 'M', int, 'maximum query segment length', default=21)
cmd.add_scalar_option('step', 's', int, 'query segmentation step', default=3)
cmd.add_scalar_option('cpu', 'c', int, 'maximum degree of parallelism', default=cpu)
+
+ cmd.add_scalar_option('gap-filling', 'g', str, 'path to a fragment file (e.g. CSfrag or Rosetta NNmake), which will be used '
+ 'to complement low-confidence regions (when specified, a hybrid fragment library be produced)')
+ cmd.add_scalar_option('filtered-filling', 'F', str, 'path to a filtered fragment file (e.g. filtered CSfrag-ments), which will '
+ 'be mixed with the HHfrag-set and then filtered, resulting in a double-filtered library')
+ cmd.add_boolean_option('filtered-map', 'f', 'make an additional filtered fragment map of centroids and predict torsion angles', default=False)
+ cmd.add_boolean_option('c-alpha', None, 'include also C-alpha vectors in the output', default=False)
+ cmd.add_scalar_option('confidence-threshold', 't', float, 'confidence threshold for gap filling', default=0.7)
cmd.add_scalar_option('verbosity', 'v', int, 'verbosity level', default=2)
- cmd.add_scalar_option('output', 'o', str, 'output directory', default='.')
- cmd.add_scalar_option('gap-filling', 'g', str, 'path to a Rosetta 9-mer fragment file, that will be used '
- 'to complement gaps in the fragment map (if specified, a joint fragment file will be produced)')
- cmd.add_boolean_option('filtered-map', 'f', 'make an additional filtered fragment map', default=False)
- cmd.add_boolean_option('c-alpha', None, 'include also C-alpha vectors in the output', default=False)
-
+ cmd.add_scalar_option('output', 'o', str, 'output directory', default='.')
+
cmd.add_positional_argument('QUERY', str, 'query profile HMM (e.g. created with csb.apps.buildhmm)')
return cmd
@@ -82,20 +86,29 @@ class HHfragApp(csb.apps.Application):
fragmap.dump(output + '.hhfrags.09', builder)
if self.args.filtered_map:
- fragmap = hhf.build_filtered_map()
+ fragmap, events = hhf.build_filtered_map()
fragmap.dump(output + '.filtered.09', builder)
-
+ tsv = PredictionBuilder.create(events).product
+ tsv.dump(output + '.centroids.tsv')
+
+ if self.args.filtered_filling:
+ fragmap, events = hhf.build_hybrid_filtered_map(self.args.filtered_filling)
+ fragmap.dump(output + '.hybrid.filtered.09', builder)
+ tsv = PredictionBuilder.create(events).product
+ tsv.dump(output + '.hybrid.centroids.tsv')
+
if self.args.gap_filling:
- fragmap = hhf.build_combined_map(self.args.gap_filling)
+ fragmap = hhf.build_combined_map(self.args.gap_filling, self.args.confidence_threshold)
fragmap.dump(output + '.complemented.09', builder)
+
self.log('\nDONE.')
except ArgumentIOError as ae:
HHfragApp.exit(str(ae), code=ExitCodes.IO_ERROR)
except ArgumentError as ae:
- HHfragApp.exit(str(ae), code=ExitCodes.INVALID_DATA, usage=True)
+ HHfragApp.exit(str(ae), code=ExitCodes.INVALID_DATA)
except csb.io.InvalidCommandError as ose:
msg = '{0!s}: {0.program}'.format(ose)
@@ -107,7 +120,9 @@ class HHfragApp(csb.apps.Application):
except csb.io.ProcessError as pe:
message = 'Bad exit code from HHsearch: #{0.code}.\nSTDERR: {0.stderr}\nSTDOUT: {0.stdout}'.format(pe.context)
- HHfragApp.exit(message, ExitCodes.HHSEARCH_FAILURE)
+ HHfragApp.exit(message, ExitCodes.HHSEARCH_FAILURE)
+
+
def log(self, message, ending='\n', level=1):
@@ -125,6 +140,18 @@ class InvalidOperationError(ValueError):
class HHfrag(object):
+ """
+ The HHfrag dynamic fragment detection protocol.
+
+ @param query: query HMM path and file name
+ @type query: str
+ @param binary: the HHsearch binary
+ @type binary: str
+ @param database: path to the PDBS25 directory
+ @type database: str
+ @param logger: logging client (needs to have a C{log} method)
+ @type logger: L{Application}
+ """
PDBS = 'pdbs25.hhm'
@@ -140,11 +167,13 @@ class HHfrag(object):
self._app = logger
self._database = None
self._pdbs25 = None
- self._output = None
self._aligner = None
self.database = database
self.aligner = hhsearch.HHsearch(binary, self.pdbs25, cpu=2)
+
+ if self.query.layers.length < 1:
+ raise ArgumentError("Zero-length sequence profile")
@property
def query(self):
@@ -182,6 +211,20 @@ class HHfrag(object):
self._app.log(*a, **ka)
def slice_query(self, min=6, max=21, step=3, cpu=None):
+ """
+ Run the query slicer and collect the optimal query segments.
+
+ @param min: min segment length
+ @type min: int
+ @param max: max segment length
+ @type max: int
+ @param step: slicing step
+ @type step: int
+ @param cpu: degree of parallelism
+ @type cpu: int
+
+ @rtype: tuple of L{SliceContext}
+ """
if not 0 < min <= max:
raise ArgumentError('min and max must be positive numbers, with max >= min')
@@ -221,12 +264,17 @@ class HHfrag(object):
return tuple(hsqs)
def extract_fragments(self):
-
- self.log('\n# Extracting fragments...')
+ """
+ Extract all matching fragment instances, given the list of optimal
+ query slices, generated during the first stage.
+
+ @rtype: tuple of L{Assignment}s
+ """
if self._hsqs is None:
- raise InvalidOperationError('The query has to be sliced first')
-
+ self.slice_query()
+
+ self.log('\n# Extracting fragments...')
fragments = []
for si in self._hsqs:
@@ -249,10 +297,9 @@ class HHfrag(object):
source.compute_torsion()
try:
- fragment = csb.bio.fragments.Assignment(source,
- chunk.start, chunk.end, hit.id,
- chunk.qstart, chunk.qend, chunk.probability,
- rmsd=None, tm_score=None)
+ fragment = csb.bio.fragments.Assignment(source, chunk.start, chunk.end,
+ chunk.qstart, chunk.qend,
+ probability=chunk.probability)
fragments.append(fragment)
if cn > 1:
self.log(' (chunk #{0})'.format(cn), level=2)
@@ -282,46 +329,90 @@ class HHfrag(object):
self.log(bar, level=2)
def build_fragment_map(self):
+ """
+ Build a full Rosetta fragset.
+ @rtype: L{RosettaFragmentMap}
+ """
- self.log('\n# Building dynamic fragment map...')
-
if self._matches is None:
- raise InvalidOperationError('You need to extract some fragments first')
-
+ self.extract_fragments()
+
+ self.log('\n# Building dynamic fragment map...')
self._plot_lengths()
target = csb.bio.fragments.Target.from_profile(self.query)
target.assignall(self._matches)
-
+
factory = csb.bio.fragments.RosettaFragsetFactory()
return factory.make_fragset(target)
- def _filter_event_handler(self, ri):
- if ri.rep is None:
- self.log('{0.rank:3}. {0.confidence:5.3f} {0.count:3} - - -'.format(ri, ri.rep), level=2)
+ def _filter_event_handler(self, ri):
+
+ if ri.gap is True or ri.confident is False:
+ self.log('{0.rank:3}. {0.confidence:5.3f} {0.count:3}'.format(ri), level=2)
+
else:
- self.log('{0.rank:3}. {0.confidence:5.3f} {0.count:3} {1.id:5} {1.start:3} {1.end:3}'.format(ri, ri.rep), level=2)
+ phi = PredictionBuilder.format_angle(ri.torsion.phi)
+ psi = PredictionBuilder.format_angle(ri.torsion.psi)
+ omega = PredictionBuilder.format_angle(ri.torsion.omega)
+
+ pred = "{0.source_id:5} {0.start:3} {0.end:3} {1} {2} {3}".format(ri.rep, phi, psi, omega)
+ self.log('{0.rank:3}. {0.confidence:5.3f} {0.count:3} {1}'.format(ri, pred), level=2)
def build_filtered_map(self):
-
+ """
+ Build a filtered fragset of centroids.
+ @return: filtered fragset and a list of residue-wise predictions
+ (centroid and torsion angles)
+ @rtype: L{RosettaFragmentMap}, list of L{ResidueEventInfo}
+ """
+
+ if self._matches is None:
+ self.extract_fragments()
+
self.log('\n# Building filtered map...')
- self.log('\n Confidence Count Representative', level=2)
+ self.log('\n Confidence Recurrence Representative Phi Psi Omega', level=2)
+
+ events = []
+ def logger(ri):
+ events.append(ri)
+ self._filter_event_handler(ri)
target = csb.bio.fragments.Target.from_profile(self.query)
target.assignall(self._matches)
factory = csb.bio.fragments.RosettaFragsetFactory()
- return factory.make_filtered(target, extend=True,
- callback=self._filter_event_handler)
+ fragset = factory.make_filtered(target, extend=True, callback=logger)
+
+ return fragset, events
- def _merge_event_handler(self, rei):
- if rei.confidence is None:
- self.log('{0.rank:3}. - {0.count:3}'.format(rei), level=2)
- else:
- self.log('{0.rank:3}. {0.confidence:5.3f} {0.count:3}'.format(rei), level=2)
+ def _merge_event_handler(self, ri):
+
+ marked = ""
+
+ if ri.gap is True or ri.confident is False:
+ marked = "*"
+
+ self.log('{0.rank:3}. {0.confidence:5.3f} {0.count:3} {1:>3}'.format(ri, marked), level=2)
- def build_combined_map(self, fragfile, top=25):
-
+ def build_combined_map(self, fragfile, threshold=0.7, top=25):
+ """
+ Build a hybrid map, where low-confidence regions are complemented
+ with the specified filling.
+
+ @param threshold: confidence threshold
+ @type threshold: float
+ @param fragfile: filling fragset (Rosetta fragment file)
+ @type fragfile: str
+
+ @return: filtered fragset and a list of residue-wise predictions
+ (centroid and torsion angles)
+ @rtype: L{RosettaFragmentMap}, list of L{ResidueEventInfo}
+ """
+
+ if self._matches is None:
+ self.extract_fragments()
+
self.log('\n# Building complemented map...')
try:
@@ -329,17 +420,69 @@ class HHfrag(object):
except IOError as io:
raise ArgumentIOError(str(io))
- self.log('\n {0} rosetta fragments loaded'.format(filling.size))
- self.log(' Confidence Count', level=2)
+ self.log('\n {0} supplementary fragments loaded'.format(filling.size))
+ self.log(' Confidence Recurrence Fill?', level=2)
target = csb.bio.fragments.Target.from_profile(self.query)
target.assignall(self._matches)
factory = csb.bio.fragments.RosettaFragsetFactory()
- return factory.make_combined(target, filling, threshold=0.5,
+ return factory.make_combined(target, filling, threshold=threshold,
callback=self._merge_event_handler)
+ def build_hybrid_filtered_map(self, fragfile):
+ """
+ Mix the fragset with the specified (filtered)filling and then filter
+ the mixture. If the filling is a filtered CSfrag library, this will
+ produce a double-filtered map.
+
+ @param fragfile: filtered filling (filtered CSfrag fragment file)
+ @type fragfile: str
+
+ @rtype: L{RosettaFragmentMap}
+ """
+
+ if self._matches is None:
+ self.extract_fragments()
+
+ self.log('\n# Building hybrid filtered map...')
+
+ filling = []
+ events = []
+
+ def logger(ri):
+ events.append(ri)
+ self._filter_event_handler(ri)
+
+ try:
+ db = csb.bio.io.wwpdb.FileSystemStructureProvider(self.database)
+
+ for f in rosetta.RosettaFragmentMap.read(fragfile):
+ filling.append(csb.bio.fragments.Assignment.from_fragment(f, db))
+
+ except IOError as io:
+ raise ArgumentIOError(str(io))
+ except csb.bio.io.wwpdb.StructureNotFoundError as sne:
+ msg = "{0} is not a PDBS25-derived fragset (template {1} not found)"
+ raise ArgumentIOError(msg.format(fragfile, str(sne)))
+
+ self.log('\n {0} supplementary fragments loaded'.format(len(filling)))
+ self.log('\n Confidence Recurrence Representative Phi Psi Omega', level=2)
+
+ if len(filling) > self.query.layers.length:
+ msg = "{0} does not look like a filtered fragset (too many centroids)"
+ raise ArgumentError(msg.format(fragfile))
+
+ target = csb.bio.fragments.Target.from_profile(self.query)
+ target.assignall(self._matches)
+ target.assignall(filling)
+
+ factory = csb.bio.fragments.RosettaFragsetFactory()
+ fragset = factory.make_filtered(target, extend=False, callback=logger)
+
+ return fragset, events
+
class SliceContext(hhsearch.Context):
def __init__(self, segment, start, end):
@@ -372,8 +515,75 @@ class SliceContext(hhsearch.Context):
return self.recurrence < other.recurrence
-
-
+class PredictionBuilder(object):
+
+ HEADER = "rank:int residue:str confidence:float centroid:str phi:float psi:float omega:float"
+
+ @staticmethod
+ def format_angle(angle):
+ """
+ @param angle: torsion angle value
+ @type angle: float
+ """
+
+ if angle is None:
+ return '{0:>6}'.format("-")
+ else:
+ return '{0:6.1f}'.format(angle)
+
+ @staticmethod
+ def create(ri):
+ """
+ @param ri: all predictions
+ @type ri: list of L{ResidueEventInfo}
+ """
+ builder = PredictionBuilder()
+ builder.addall(ri)
+ return builder
+
+ def __init__(self):
+ self._tsv = csb.io.tsv.Table(PredictionBuilder.HEADER)
+
+ @property
+ def product(self):
+ """
+ @rtype: L{Table}
+ """
+ return self._tsv
+
+ def add(self, ri):
+ """
+ @param ri: single residue prediction
+ @type ri: L{ResidueEventInfo}
+ """
+
+ row = [ri.rank, repr(ri.type), ri.confidence]
+
+ if ri.rep:
+ row.append(ri.rep.id)
+ row.append(ri.torsion.phi)
+ row.append(ri.torsion.psi)
+ row.append(ri.torsion.omega)
+
+ else:
+ row.extend([None] * 4)
+
+ self.product.insert(row)
+
+ def addall(self, ri):
+ """
+ @param ri: all predictions
+ @type ri: list of L{ResidueEventInfo}
+ """
+
+ ri = list(ri)
+ ri.sort(key=lambda i: i.rank)
+
+ for i in ri:
+ self.add(i)
+
+
+
if __name__ == '__main__':
AppRunner().run()
diff --git a/csb/apps/hhsearch.py b/csb/apps/hhsearch.py
index fc5d1be..eeab329 100644
--- a/csb/apps/hhsearch.py
+++ b/csb/apps/hhsearch.py
@@ -266,10 +266,9 @@ class HHsearch(object):
for c in pool.map(_task, taskargs):
results.append(c)
except KeyboardInterrupt:
+ pass
+ finally:
pool.terminate()
- except:
- pool.terminate()
- raise
return results
diff --git a/csb/apps/precision.py b/csb/apps/precision.py
index 030eb65..cbe8632 100644
--- a/csb/apps/precision.py
+++ b/csb/apps/precision.py
@@ -138,7 +138,7 @@ class LibrarySuperimposer(object):
self._library = library
self._output = os.path.abspath(output)
self._tab = os.path.join(self._output, native.entry_id + '.fragments.tab')
- self._figure = os.path.join(self._output, native.entry_id + '.precision.png')
+ self._figure = os.path.join(self._output, native.entry_id + '.precision.pdf')
self._out = open(self._tab, 'w')
self._save = bool(save)
self._cutoff = float(cutoff)
@@ -184,6 +184,7 @@ class LibrarySuperimposer(object):
self._out.write(line)
matches.append(match)
+ pool.terminate()
return matches
def plot(self, matches):
@@ -229,8 +230,8 @@ class LibrarySuperimposer(object):
with csb.io.plots.Chart() as chart:
- chart.plot.bar(residues, background, color='#FFB0B0', linewidth=None, edgecolor='#FFB0B0')
- chart.plot.bar(residues, precision2, color='#50A6DA', linewidth=None, edgecolor='#50A6DA')
+ chart.plot.bar(residues, background, color='#f5f5f5', linewidth=None, edgecolor='#f5f5f5')
+ chart.plot.bar(residues, precision2, color='#5ba9da', linewidth=None, edgecolor='#5ba9da')
chart.plot.set_title(self._native.entry_id)
chart.plot.set_xlabel('Residue')
@@ -238,15 +239,28 @@ class LibrarySuperimposer(object):
chart.plot.set_ylabel('Precision, %')
chart.plot.set_ylim(0, 100)
- xaxis = chart.plot.axes.xaxis
- xaxis.set_minor_locator(matplotlib.ticker.IndexLocator(1, 0))
- xaxis.set_major_locator(matplotlib.ticker.IndexLocator(5, 0))
-
+ xaxis = chart.plot.axes.xaxis
+ yaxis = chart.plot.axes.yaxis
+ #xaxis.set_minor_locator(matplotlib.ticker.IndexLocator(1, 0))
+ xaxis.set_major_locator(matplotlib.ticker.IndexLocator(10, 0))
+ xaxis.tick_bottom()
+ yaxis.tick_left()
+ for t in xaxis.get_major_ticks():
+ t.tick1On = False
+ t.tick2On = False
+ for t in xaxis.get_ticklabels():
+ t.set_fontsize(16)
+ for t in yaxis.get_ticklabels():
+ t.set_fontsize(16)
+
+ chart.plot.spines["right"].set_visible(False)
+ chart.plot.spines["top"].set_visible(False)
+
try:
chart.width = 15
chart.height = 5.5
- chart.save(self._figure)
+ chart.save(self._figure, chart.formats.PDF)
except IOError as io:
raise ArgumentIOError("Can't save figure: " + str(io))
diff --git a/csb/bio/fragments/__init__.py b/csb/bio/fragments/__init__.py
index a9582e8..68735c3 100644
--- a/csb/bio/fragments/__init__.py
+++ b/csb/bio/fragments/__init__.py
@@ -18,6 +18,7 @@ import csb.core
import csb.bio.utils
import csb.bio.structure
import csb.bio.sequence
+
from csb.bio.structure import SecondaryStructure
@@ -384,6 +385,17 @@ class AssignmentFactory(object):
def assignment(self, *a, **k):
return Assignment(*a, **k)
+
+class ChemShiftAssignmentFactory(object):
+
+ def target(self, *a, **k):
+ return ChemShiftTarget(*a, **k)
+
+ def residue(self, *a, **k):
+ return ChemShiftTargetResidue(*a, **k)
+
+ def assignment(self, *a, **k):
+ return ChemShiftAssignment(*a, **k)
class Target(csb.core.AbstractNIContainer):
"""
@@ -570,7 +582,28 @@ class Target(csb.core.AbstractNIContainer):
overlap=self._overlap, segments=segments)
return target
-
+
+class ChemShiftTarget(Target):
+
+ def __init__(self, id, length, residues, overlap=None):
+
+ super(ChemShiftTarget, self).__init__(id, length, residues, overlap=overlap,
+ segments=None, factory=ChemShiftAssignmentFactory())
+
+ def assign(self, fragment):
+
+ if not 1 <= fragment.qstart <= fragment.qend <= len(self._residues):
+ raise ValueError("Fragment out of range")
+
+ self._assignments._append_item(fragment)
+
+ rank = fragment.qstart
+ ai = ResidueAssignmentInfo(fragment, rank)
+ self._residues[rank].assign(ai)
+
+ def clone(self):
+ return self._factory.target(self.id, self.length, [r.native for r in self.residues],
+ overlap=self._overlap)
class TargetResidue(object):
"""
@@ -683,6 +716,25 @@ class TargetResidue(object):
pos = len(positive) * 100.0 / self.assignments.length
return pos
+
+class ChemShiftTargetResidue(TargetResidue):
+
+ def verybest(self):
+
+ best = None
+
+ for ai in self.assignments:
+ a = ai.fragment
+
+ if a.score < ChemShiftAssignment.BIT_SCORE_THRESHOLD * a.window:
+ continue
+
+ if best is None or a.score > best.score:
+ best = a
+ elif a.score == best.score and a.length > best.length:
+ best = a
+
+ return best
class TargetSegment(object):
@@ -949,8 +1001,8 @@ class Assignment(FragmentMatch):
@type rmsd: float
"""
- def __init__(self, source, start, end, id, qstart, qend, probability, rmsd, tm_score=None,
- score=None, neff=None, segment=None, internal_id=None):
+ def __init__(self, source, start, end, qstart, qend, id=None, probability=None, rmsd=None,
+ tm_score=None, score=None, neff=None, segment=None, internal_id=None):
assert source.has_torsion
sub = source.subregion(start, end, clone=True)
@@ -975,6 +1027,9 @@ class Assignment(FragmentMatch):
self._segment_start = segment
self.internal_id = internal_id
+ if id is None:
+ id = "{0}:{1}-{2}".format(self.source_id, self.start, self.end)
+
super(Assignment, self).__init__(id, qstart, qend, probability, rmsd, tm_score, None)
self._ss = SecondaryStructure('-' * self.length)
@@ -991,14 +1046,17 @@ class Assignment(FragmentMatch):
@rtype: L{Assignment}
"""
- structure = provider.get(fragment.accession)
+ try:
+ structure = provider.get(fragment.accession)
+ except KeyError:
+ structure = provider.get(fragment.source_id)
source = structure.chains[fragment.chain]
source.compute_torsion()
id = "{0}:{1}-{2}".format(fragment.source_id, fragment.start, fragment.end)
- return Assignment(source, fragment.start, fragment.end, id,
- fragment.qstart, fragment.qend, 0, 0)
+ return Assignment(source, fragment.start, fragment.end,
+ fragment.qstart, fragment.qend, id, 0, 0)
@property
def backbone(self):
@@ -1164,6 +1222,8 @@ class Assignment(FragmentMatch):
@rtype: float
"""
+ if self is other:
+ return 0
common = self.overlap(other)
@@ -1251,6 +1311,22 @@ class Assignment(FragmentMatch):
stream.write(' {0:4} {1:1} {2:>5} {3!s:1} {4!s:1} {5:>8.3f} {6:>8.3f} {7:>8.3f} {8:>8.3f}\n'.format(acc, ch, start, aa, ss, phi, psi, omega, weight))
return stream.getvalue()
+
+class ChemShiftAssignment(Assignment):
+
+ BIT_SCORE_THRESHOLD = 1.1
+
+ def __init__(self, source, start, end, qstart, qend, window, score, rmsd):
+
+ self._window = window
+
+ super(ChemShiftAssignment, self).__init__(
+ source, start, end, qstart, qend, id=None, probability=1.0,
+ rmsd=rmsd, tm_score=None, score=score, neff=None, segment=None, internal_id=None)
+
+ @property
+ def window(self):
+ return self._window
class ClusterExhaustedError(ValueError):
pass
@@ -1277,8 +1353,9 @@ class FragmentCluster(object):
@param threshold: RMSD threshold; continue shrinking until the mean distance
drops below this value (default=1.5)
@type threshold: float
- @param connectedness: use only nodes which are connected to at least c% of all
- initial nodes (default=0.5, that means 50%)
+ @param connectedness: when calculating centroids, consider only nodes
+ connected to at least c% of all surviving vertices
+ (default=0.5)
@type connectedness: float
"""
@@ -1291,26 +1368,34 @@ class FragmentCluster(object):
self._matrix = {}
self._threshold = float(threshold)
self._connectedness = float(connectedness)
-
+ self._weight = 0
+ self._edges = 0
+
+ visited = set()
+
for i in items:
-
- self._matrix[i] = {}
- conn = 0.0
+ self._matrix.setdefault(i, {})
for j in items:
- distance = i.distance(j)
- if distance is not None:
- conn += 1
- self._matrix[i][j] = distance
-
- if conn / len(items) < self.connectedness:
- # reject i as a first class node
- del self._matrix[i]
+ self._matrix.setdefault(j, {})
+ if (j, i) not in visited:
+ visited.add((i, j))
+ distance = i.distance(j)
+
+ if distance is not None:
+ self._matrix[i][j] = distance
+ self._matrix[j][i] = distance
+ i.weight += distance
+ j.weight += distance
+
+ self._weight += distance
+ self._edges += 1
+
self._items = set(self._matrix.keys())
if len(self._items) < 1:
- raise ClusterEmptyError()
+ raise ClusterEmptyError()
self._initcount = self.count
@@ -1363,14 +1448,21 @@ class FragmentCluster(object):
@return: the current mean distance in the cluster
@rtype: float
"""
-
- d = self._distances(skip=skip)
+ if self._edges == 0:
+ raise ClusterExhaustedError()
- if len(d) > 0:
- return numpy.mean(d)
+ if not skip:
+ return float(self._weight) / self._edges
+
else:
- raise ClusterExhaustedError()
-
+ weight = self._weight - skip.weight
+ edges = self._edges - len(self._matrix[skip])
+
+ if edges < 1:
+ return 0
+ else:
+ return float(weight) / edges
+
def centroid(self):
"""
@return: the current representative fragment
@@ -1387,10 +1479,11 @@ class FragmentCluster(object):
avg = None
for i in self._matrix:
+ edges = len(self._matrix[i]) or (1.0 / self.count)
+ curravg = float(i.weight) / edges
+ conn = len(self._matrix[i]) / float(self.count)
- curravg = numpy.mean(list(self._matrix[i].values()))
-
- if avg is None or curravg < avg:
+ if avg is None or (curravg < avg and conn >= self.connectedness):
avg = curravg
cen = i
elif curravg == avg:
@@ -1398,7 +1491,7 @@ class FragmentCluster(object):
cen = i
d = self._distances()
- mean = numpy.mean(d)
+ mean = numpy.mean(d)
cons = sum(1.0 for i in d if i <= self.threshold) / len(d)
for i in self._matrix:
@@ -1418,16 +1511,20 @@ class FragmentCluster(object):
@type item: L{ClusterNode}
@raise ClusterExhaustedError: if this is the last remaining item
"""
-
if self.count == 1:
raise ClusterExhaustedError()
assert not item.fixed
for i in self._matrix:
- if item in self._matrix[i]:
+ if item in self._matrix[i]:
+ distance = self._matrix[i][item]
+ self._weight -= distance
+ i.weight -= distance
+
del self._matrix[i][item]
-
+ self._edges -= 1
+
del self._matrix[item]
self._items.remove(item)
@@ -1526,6 +1623,7 @@ class ClusterNode(object):
self.fragment = fragment
self.fixed = bool(fixed)
+ self.weight = 0
self._distance = getattr(self.fragment, distance)
@@ -1743,14 +1841,31 @@ class SmoothFragmentMap(csb.core.AbstractContainer):
class ResidueEventInfo(object):
- def __init__(self, rank, confidence=None, count=None, confident=True, rep=None):
+ def __init__(self, residue, confidence=0, count=0, confident=True, gap=False, rep=None):
- self.rank = rank
+ self.residue = residue
self.confidence = confidence
self.confident = confident
+ self.gap = gap
self.count = count
self.rep = rep
+
+ @property
+ def rank(self):
+ return self.residue.rank
+ @property
+ def type(self):
+ return self.residue.type
+
+ @property
+ def torsion(self):
+ if self.rep:
+ return self.rep.torsion_at(self.rank, self.rank)[0]
+ else:
+ return None
+
+
class RosettaFragsetFactory(object):
"""
Simplifies the construction of fragment libraries.
@@ -1824,19 +1939,23 @@ class RosettaFragsetFactory(object):
if r.assignments.length == 0:
if callback:
- callback(ResidueEventInfo(r.native.rank, None, 0, False))
+ callback(ResidueEventInfo(r.native, gap=True))
continue
cluster = r.filter()
if cluster is None:
if callback:
- callback(ResidueEventInfo(r.native.rank, 0, 0, False))
+ callback(ResidueEventInfo(r.native, 0, 0, confident=False))
continue
if cluster.confidence >= threshold:
covered.add(r.native.rank)
- elif callback:
- callback(ResidueEventInfo(r.native.rank, cluster.confidence, cluster.count, False))
+ confident = True
+ else:
+ confident = False
+
+ if callback:
+ callback(ResidueEventInfo(r.native, cluster.confidence, cluster.count, confident))
for r in target.residues:
if r.native.rank not in covered: # true for gaps and low-conf residues
@@ -1865,10 +1984,14 @@ class RosettaFragsetFactory(object):
for r in target.residues:
if r.assignments.length == 0:
- continue
+ if callback:
+ callback(ResidueEventInfo(r.native, gap=True))
+ continue
cluster = r.filter(extend=extend)
if cluster is None:
+ if callback:
+ callback(ResidueEventInfo(r.native, 0, 0, confident=False))
continue
if extend and cluster.has_alternative:
@@ -1879,7 +2002,7 @@ class RosettaFragsetFactory(object):
fragment = self.rosetta.RosettaFragment.from_object(best)
fragments.append(fragment)
if callback:
- callback(ResidueEventInfo(r.native.rank, cluster.confidence, cluster.count, rep=cluster.centroid))
+ callback(ResidueEventInfo(r.native, cluster.confidence, cluster.count, rep=cluster.centroid))
fragments.sort()
return self.rosetta.RosettaFragmentMap(fragments, target.length)
diff --git a/csb/bio/fragments/rosetta.py b/csb/bio/fragments/rosetta.py
index 27b46ca..d727fbc 100644
--- a/csb/bio/fragments/rosetta.py
+++ b/csb/bio/fragments/rosetta.py
@@ -156,7 +156,7 @@ class RosettaFragment(object):
for rank, aa, torsion, calpha in zip(range(a.start, a.end + 1), a.sequence, a.torsion, a.backbone):
residues.append(ResidueInfo(rank, aa, 'L', torsion, calpha))
- return RosettaFragment(a.source_id, a.qstart, a.qend, a.start, a.end, 1 - a.probability, residues)
+ return RosettaFragment(a.source_id, a.qstart, a.qend, a.start, a.end, 1 - (a.probability or 0.0), residues)
@property
def length(self):
@@ -176,7 +176,7 @@ class RosettaFragment(object):
@property
def id(self):
- return '{0.source_id}.{0.start}-{0.end}'.format(self)
+ return '{0.source_id}:{0.start}-{0.end}'.format(self)
@property
def qstart(self):
diff --git a/csb/bio/io/clans.py b/csb/bio/io/clans.py
index 611d767..18abd0c 100644
--- a/csb/bio/io/clans.py
+++ b/csb/bio/io/clans.py
@@ -477,11 +477,12 @@ class ClansParser(object):
groups = []
for line in block:
- p, v = line.split('=')
+ p, v = line.split('=', 1)
if p == 'name':
groups.append({'name': v})
elif p == 'numbers':
- groups[-1][p] = [int(val) for val in v.split(';')[:-1]]
+ ## empty groups with terminal semicolon ("numbers=;") contained an empty string in v.split(';')
+ groups[-1][p] = [int(val) for val in [member for member in v.split(';')[:-1] if member != '']]
else:
groups[-1][p] = v
return groups
diff --git a/csb/bio/io/cs.py b/csb/bio/io/cs.py
new file mode 100644
index 0000000..db36d96
--- /dev/null
+++ b/csb/bio/io/cs.py
@@ -0,0 +1,206 @@
+"""
+Simple NMR STAR chemical shift readers.
+"""
+
+from csb.bio.nmr import ChemShiftInfo
+
+
+class ChemShiftFormatError(ValueError):
+ pass
+
+
+class ChemShiftReader(object):
+ """
+ Simple NMR STAR v2 chemical shift reader.
+
+ @note: This is not a full-fledged, semantic NMR STAR parser. It handles
+ only the chemical shift table.
+ """
+
+ FRAME = 'save_assigned_chemical_shifts'
+
+ RANK = '_Residue_seq_code'
+ RESIDUE = '_Residue_label'
+ ATOM = '_Atom_name'
+ ELEMENT = '_Atom_type'
+ SHIFT = '_Chem_shift_value'
+
+ @staticmethod
+ def create(frame=FRAME, version=2):
+ """
+ Parser factory: create a new parser, given a saveframe name
+ and format verison.
+
+ @param frame: name of the saveframe to read
+ @type frame: str
+ @param version: NMR STAR format version
+ @type version: int
+
+ @return: an instance of any L{ChemShiftReader} class
+ @rtype: L{ChemShiftReader}
+ """
+
+ if version == 3:
+ return ChemShift3Reader(frame=frame)
+ elif version == 2:
+ return ChemShiftReader(frame=frame)
+ else:
+ raise ValueError('Unknown NMR-STAR version')
+
+ @staticmethod
+ def guess(file, frame=FRAME):
+ """
+ Parser factory: try to guess the correct NMR STAR version from a given
+ file and create an appropriate parser.
+
+ @param file: NMR STAR path and file name
+ @type file: str
+ @param frame: name of the saveframe to read
+ @type frame: str
+
+ @return: an instance of any L{ChemShiftReader} class
+ @rtype: L{ChemShiftReader}
+
+ @raise ChemShiftFormatError: on failure to determine the NMR STAR version
+ """
+
+ with open(file) as cs:
+ content = cs.read()
+
+ if not content.strip():
+ return ChemShiftReader.create()
+ elif ChemShift3Reader.SHIFT3 in content:
+ return ChemShiftReader.create(frame, version=3)
+ elif ChemShiftReader.SHIFT in content:
+ return ChemShiftReader.create(frame, version=2)
+ else:
+ raise ChemShiftFormatError("Can't guess NMR-STAR version")
+
+ def __init__(self, frame=FRAME):
+ self._frame = frame
+
+ def read_file(self, filename):
+ """
+ Parse the specified file.
+
+ @param filename: file path and name
+ @type filename: str
+
+ @rtype: tuple of L{ChemShiftInfo}
+ """
+ with open(filename) as input:
+ return self.read_shifts(input.read())
+
+ def read_shifts(self, star_table):
+ """
+ Parse a given NMR STAR chemical shift table.
+
+ @param star_table: NMR STAR chemical shift table
+ @type star_table: str
+
+ @rtype: tuple of L{ChemShiftInfo}
+ @raise ChemShiftFormatError: on parse error
+ """
+
+ shifts = []
+
+ init = False
+ in_shifts = False
+ fields = []
+ lines = iter(star_table.splitlines())
+
+ if self._frame in star_table:
+ self._scroll(lines, self._frame)
+
+
+ for l in lines:
+ ls = l.strip()
+
+ if not in_shifts:
+
+ if ls == 'loop_':
+ assert in_shifts is False and not fields and init is False
+ init = True
+ continue
+
+ elif init and ls.startswith('_'):
+ assert in_shifts is False
+ fields.append(l.strip())
+ continue
+
+ elif init and not ls:
+ if len(fields) < 1:
+ raise ChemShiftFormatError("No fields found in the CS table")
+ in_shifts = True
+ continue
+
+ else:
+
+ if ls == 'stop_':
+ break
+
+ elif ls.startswith('#'):
+ continue
+
+ elif ls:
+ values = l.split()
+ if len(values) < len(fields):
+ raise ChemShiftFormatError("Insufficient number of values: {0}".format(l))
+ data = dict(zip(fields, values))
+
+ shifts.append(self._create_shift(data))
+
+ return tuple(shifts)
+
+ def _scroll(self, iterator, field):
+
+ for line in iterator:
+ if line.lstrip().startswith(field):
+ break
+
+ def _create_shift(self, data):
+
+ try:
+ position = int(data[ChemShiftReader.RANK])
+ residue = data[ChemShiftReader.RESIDUE]
+ name = data[ChemShiftReader.ATOM]
+ element = data[ChemShiftReader.ELEMENT]
+ shift = float(data[ChemShiftReader.SHIFT])
+
+ except KeyError as ke:
+ raise ChemShiftFormatError("Required field {0} not found".format(str(ke)))
+ except ValueError as ve:
+ raise ChemShiftFormatError("Can't parse value: {0}".format(str(ve)))
+
+ return ChemShiftInfo(position, residue, name, element, shift)
+
+
+class ChemShift3Reader(ChemShiftReader):
+ """
+ Simple NMR STAR v3 chemical shift reader.
+
+ @note: This is not a full-fledged, semantic NMR STAR parser. It handles
+ only the chemical shift table.
+ """
+
+ RANK3 = '_Atom_chem_shift.Seq_ID'
+ RESIDUE3 = '_Atom_chem_shift.Comp_ID'
+ ATOM3 = '_Atom_chem_shift.Atom_ID'
+ ELEMENT3 = '_Atom_chem_shift.Atom_type'
+ SHIFT3 = '_Atom_chem_shift.Val'
+
+ def _create_shift(self, data):
+
+ try:
+ position = data[ChemShift3Reader.RANK3]
+ residue = data[ChemShift3Reader.RESIDUE3]
+ name = data[ChemShift3Reader.ATOM3]
+ element = data[ChemShift3Reader.ELEMENT3]
+ shift = data[ChemShift3Reader.SHIFT3]
+
+ except KeyError as ke:
+ raise ChemShiftFormatError("Required field {0} not found".format(str(ke)))
+ except ValueError as ve:
+ raise ChemShiftFormatError("Can't parse value: {0}".format(str(ve)))
+
+ return ChemShiftInfo(position, residue, name, element, shift)
diff --git a/csb/bio/io/fasta.py b/csb/bio/io/fasta.py
index 8fd85ed..c46e4c0 100644
--- a/csb/bio/io/fasta.py
+++ b/csb/bio/io/fasta.py
@@ -44,6 +44,10 @@ from csb.bio.sequence import SequenceAlignment, StructureAlignment, A3MAlignment
from csb.bio.sequence import SequenceCollection, AbstractSequence, Sequence, RichSequence, ChainSequence
+class SequenceFormatError(ValueError):
+ pass
+
+
class BaseSequenceParser(object):
"""
FASTA parser template. Subclasses must implement the way FASTA strings are
@@ -92,6 +96,8 @@ class BaseSequenceParser(object):
@return: a new sequence, created with L{BaseSequenceParser.product_factory}
@rtype: L{AbstractSequence}
+
+ @raise SequenceFormatError: on parse error
"""
pass
@@ -104,6 +110,8 @@ class BaseSequenceParser(object):
@return: a list of L{Sequence}s
@rtype: L{SequenceCollection}
+
+ @raise SequenceFormatError: on parse error
"""
stream = csb.io.MemoryStream()
@@ -120,6 +128,8 @@ class BaseSequenceParser(object):
@return: a list of L{Sequence}s
@rtype: L{SequenceCollection}
+
+ @raise SequenceFormatError: on parse error
"""
if isinstance(fasta_file, csb.core.string):
stream = open(fasta_file)
@@ -143,6 +153,8 @@ class BaseSequenceParser(object):
@return: efficient cursor over all L{Sequence}s (parse on demand)
@rtype: iterator
+
+ @raise SequenceFormatError: on parse error
"""
if isinstance(fasta_file, csb.core.string):
stream = open(fasta_file)
@@ -166,7 +178,7 @@ class SequenceParser(BaseSequenceParser):
if not lines[0].startswith(AbstractSequence.DELIMITER):
lines = [''] + lines
if len(lines) < 2:
- raise ValueError('Empty FASTA entry')
+ raise SequenceFormatError('Empty FASTA entry')
header = lines[0]
id = header[1:].split()[0]
@@ -185,7 +197,7 @@ class PDBSequenceParser(SequenceParser):
seq = super(PDBSequenceParser, self).read_sequence(string)
if not (seq.header and seq.id) or not (len(seq.id) in(5, 6) and seq.header.find('mol:') != -1):
- raise ValueError('Does not look like a PDB header: {0}'.format(seq.header))
+ raise SequenceFormatError('Does not look like a PDB header: {0}'.format(seq.header))
seq.id = seq.id.replace('_', '')
stype = seq.header.partition('mol:')[2].partition(' ')[0]
diff --git a/csb/bio/io/hhpred.py b/csb/bio/io/hhpred.py
index f8e1367..044bd28 100644
--- a/csb/bio/io/hhpred.py
+++ b/csb/bio/io/hhpred.py
@@ -99,7 +99,7 @@ class HHProfileParser(object):
s = csb.bio.io.StructureParser(input_pdb).parse_structure()
chain = s.chains[chain_id]
- if s.first_chain.length != hmm.layers.length:
+ if chain.length != hmm.layers.length:
raise StructureFormatError(
"{0}: Incorrect number of residues".format(chain.entry_id))
diff --git a/csb/bio/io/noe.py b/csb/bio/io/noe.py
new file mode 100644
index 0000000..7863861
--- /dev/null
+++ b/csb/bio/io/noe.py
@@ -0,0 +1,269 @@
+"""
+Simple XEASY and Sparky peak list parsers.
+"""
+
+from abc import ABCMeta, abstractmethod
+from csb.bio.nmr import NOESpectrum
+
+
+class PeakListFormatError(ValueError):
+ pass
+
+class BasePeakListReader(object):
+
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def read(self, table):
+ """
+ Parse a peak list table.
+
+ @param table: input peak list table
+ @type table: str
+ @rtype: L{NOESpectrum}
+ """
+ pass
+
+ def read_file(self, filename):
+ """
+ Parse a peak list file.
+
+ @param filename: input file name
+ @type filename: str
+ @rtype: L{NOESpectrum}
+ """
+ with open(filename) as input:
+ return self.read(input.read())
+
+ def read_all(self, filenames):
+ """
+ Parse a list of peak list files and merge the resulting spectra.
+ All spectra must have identical dimensions.
+
+ @param filenames: input file names
+ @type filenames: iterable of str
+
+ @return: joint spectrum
+ @rtype: L{NOESpectrum}
+ """
+ spectra = [self.read_file(f) for f in filenames]
+ return NOESpectrum.join(*spectra)
+
+class SparkyPeakListReader(BasePeakListReader):
+ """
+ Sparky NOE peak list parser.
+
+ @param elements: list of element names for each dimension
+ @type elements: list of (str or L{EnumItem})
+ @param connected: list of covalently connected dimension indices in the
+ format: [(i1,i2),...]
+ @type connected: list of (int,int) tuples
+ """
+
+ def __init__(self, elements, connected):
+
+ self._elements = list(elements)
+ self._connected = [(d1, d2) for d1, d2 in connected]
+
+ if len(self._elements) < 1:
+ raise ValueError("Can't parse a 0-dimensional peak list")
+
+ def read(self, table):
+ """
+ Parse a Sparky peak list table.
+
+ @param table: input peak list
+ @type table: str
+ @rtype: L{NOESpectrum}
+ """
+ offset = 0
+ spectrum = NOESpectrum(self._elements)
+
+ for d1, d2 in self._connected:
+ spectrum.connect(d1, d2)
+
+ for l in table.splitlines():
+ if not l.strip() or ('w1' in l and 'w2' in l):
+ if l.lstrip().lower().startswith('assignment'):
+ offset = 1
+ continue
+
+ line = l.split()[offset:]
+ try:
+ float(line[-1]) # last item may or may not be a comment
+ except ValueError:
+ if len(line) > 0:
+ line.pop()
+
+ items = list(map(float, line))
+ intensity = items[-1]
+ dimensions = items[:-1]
+
+ if len(dimensions) != len(self._elements):
+ raise PeakListFormatError("Expected {0} dimensional spectrum, got {1}".format(
+ len(self._elements), len(dimensions)))
+
+ spectrum.add(intensity, dimensions)
+
+ return spectrum
+
+class XeasyPeakListReader(BasePeakListReader):
+ """
+ XEASY NOE peak list parser.
+ """
+
+ def __init__(self):
+ pass
+
+ def read(self, table):
+ """
+ Parse an XEASY peak list table.
+
+ @param table: input peak list
+ @type table: str
+ @rtype: L{NOESpectrum}
+ """
+ lines = table.splitlines()
+ spectrum = self._read_header(lines)
+
+ for l in lines:
+ if not l.strip() or l.startswith('#'):
+ continue
+
+ parts = l.split()[1:]
+ peak = parts[:spectrum.num_dimensions]
+ height = parts[spectrum.num_dimensions + 2]
+
+ intensity = float(height)
+ dimensions = map(float, peak)
+
+ spectrum.add(intensity, dimensions)
+
+ return spectrum
+
+
+ def _read_header(self, lines):
+
+ num = 0
+ dim = {}
+ el = {}
+ el2 = {}
+ connectivity = None
+
+ for l in lines:
+ if l.startswith('#'):
+ if l[1:].lstrip().lower().startswith('number of dimensions'):
+ num = int(l.split()[-1])
+
+ if l.startswith('#INAME'):
+ parts = l.split()[1:]
+ if len(parts) != 2:
+ raise PeakListFormatError("Invalid Xeasy header")
+
+ index = int(parts[0]) - 1
+ if index < 0:
+ raise PeakListFormatError("Invalid Xeasy header: dimension index < 1")
+
+ element = ''.join(i for i in parts[1] if i.isalpha())
+ el[parts[1]] = index
+ el2[element] = index
+
+ dim[index] = element
+
+ if l.startswith('#CYANAFORMAT'):
+ connectivity = l.split()[1]
+
+ if len(dim) != num or num == 0:
+ raise PeakListFormatError("Invalid Xeasy header")
+
+ elements = tuple(dim[i] for i in sorted(dim))
+ spectrum = NOESpectrum(elements)
+
+ if connectivity:
+ # cyanaformat - explicitly defines connected dimensions:
+ # upper case dimensions are connected, e.g. "#CYANAFORMAT hCH" => 2-3
+ if connectivity.upper() != ''.join(elements).upper():
+ raise ValueError("Invalid XEASY/CYANA header")
+ for i1 in range(len(connectivity)):
+ for i2 in range(len(connectivity)):
+ e1, e2 = connectivity[i1], connectivity[i2]
+ if i1 != i2 and e1.isupper() and e2.isupper():
+ spectrum.connect(i1, i2)
+ else:
+ # dimension labels starting with a number are connected, e.g. "1A B2 3C" => 1-3
+ if len(el) != num:
+ raise PeakListFormatError("Invalid XEASY header")
+ for e1 in el:
+ for e2 in el:
+ if e1 != e2:
+ element1 = dim[el[e1]]
+ element2 = dim[el[e2]]
+
+ num1 = e1.replace(element1, '')
+ num2 = e2.replace(element2, '')
+
+ if e1.startswith(num1) and e2.startswith(num2):
+ spectrum.connect(el[e1], el[e2])
+
+ return spectrum
+
+
+class XeasyFileBuilder(object):
+ """
+ XEASY output format builder.
+
+ @param stream: destination stream, were the output is written
+ @type stream: file
+ """
+
+ def __init__(self, stream):
+ self._out = stream
+
+ def add_spectrum(self, spectrum):
+
+ self.add_header(spectrum)
+ self.add_peaks(spectrum)
+
+ def add_header(self, spectrum):
+ """
+ Write the XEASY header.
+
+ @param spectrum: NOE spectrum
+ @type spectrum: L{NOESpectrum}
+ """
+
+ self._out.write(
+ '# Number of dimensions {0}\n'.format(spectrum.num_dimensions))
+
+ conn = ''
+
+ for en, e in enumerate(spectrum.dimensions, start=1):
+ element = repr(e).upper()
+ self._out.write('#INAME {0} {1}{0}\n'.format(en, element))
+
+ if spectrum.has_connected_dimensions(en - 1):
+ conn += element.upper()
+ else:
+ conn += element.lower()
+
+ self._out.write(
+ '#CYANAFORMAT {0}\n'.format(conn))
+
+ def add_peaks(self, spectrum):
+ """
+ Write all peaks from C{spectrum}.
+
+ @param spectrum: NOE spectrum
+ @type spectrum: L{NOESpectrum}
+ """
+
+ for pn, peak in enumerate(spectrum, start=1):
+ self._out.write("{0:5} ".format(pn))
+
+ for dim in range(spectrum.num_dimensions):
+ data = "{0:7.3f} ".format(peak.get(dim))
+ self._out.write(data)
+
+ self._out.write("2 U ")
+ self._out.write("{0:18e} ".format(peak.intensity))
+ self._out.write("0.00e+00 m 0 0 0 0 0\n")
diff --git a/csb/bio/io/wwpdb.py b/csb/bio/io/wwpdb.py
index 47cef14..3070714 100644
--- a/csb/bio/io/wwpdb.py
+++ b/csb/bio/io/wwpdb.py
@@ -29,6 +29,20 @@ input PDB file. If the input file looks like a regular PDB file, the factory
returns a L{RegularStructureParser}, otherwise it instantiates L{LegacyStructureParser}.
L{StructureParser} is in fact an alias for L{AbstractStructureParser.create_parser}.
+Writing your own, customized PDB parser is easy. Suppose that you are trying to
+parse a PDB-like file which misuses the charge column to store custom info. This
+will certainly crash L{RegularStructureParser} (for good), but you can create your
+own parser as a workaround. All you need to to is to override the virtual
+C{_read_charge} hook method::
+
+ class CustomParser(RegularStructureParser):
+
+ def _read_charge(self, line):
+ try:
+ return super(CustomParser, self)._read_charge(line)
+ except StructureFormatError:
+ return None
+
Another important abstraction in this module is L{StructureProvider}. It has several
implementations which can be used to retrieve PDB L{Structure}s from various sources:
file system directories, remote URLs, etc. You can easily create your own provider as
@@ -54,6 +68,8 @@ import datetime
import multiprocessing
import csb.bio.structure
+import csb.bio.sequence
+import csb.bio.sequence.alignment as alignment
import csb.core
import csb.io
@@ -135,6 +151,9 @@ class StructureNotFoundError(KeyError):
class InvalidEntryIDError(StructureFormatError):
pass
+class ResidueMappingError(StructureFormatError):
+ pass
+
class EntryID(object):
"""
@@ -268,7 +287,10 @@ class AbstractStructureParser(object):
@type structure_file: str
@param check_ss: if True, secondary structure errors in the file will cause
L{SecStructureFormatError} exceptions
- @type check_ss: bool
+ @type check_ss: bool
+ @param mapper: residue mapper, used to align ATOM records to SEQRES.
+ If None, use the default (L{CombinedResidueMapper})
+ @type mapper: L{AbstractResidueMapper}
@raise IOError: when the input file cannot be found
"""
@@ -276,7 +298,7 @@ class AbstractStructureParser(object):
__metaclass__ = ABCMeta
@staticmethod
- def create_parser(structure_file, check_ss=False):
+ def create_parser(structure_file, check_ss=False, mapper=None):
"""
A StructureParser factory, which instantiates and returns the proper parser
object based on the contents of the PDB file.
@@ -287,6 +309,12 @@ class AbstractStructureParser(object):
@param structure_file: the PDB file to parse
@type structure_file: str
+ @param check_ss: if True, secondary structure errors in the file will cause
+ L{SecStructureFormatError} exceptions
+ @type check_ss: bool
+ @param mapper: residue mapper, used to align ATOM records to SEQRES.
+ If None, use the default (L{CombinedResidueMapper})
+ @type mapper: L{AbstractResidueMapper}
@rtype: L{AbstractStructureParser}
"""
@@ -298,23 +326,39 @@ class AbstractStructureParser(object):
break
if has_seqres:
- return RegularStructureParser(structure_file)
+ return RegularStructureParser(structure_file, check_ss, mapper)
else:
- return LegacyStructureParser(structure_file)
+ return LegacyStructureParser(structure_file, check_ss, mapper)
- def __init__(self, structure_file, check_ss=False):
+ def __init__(self, structure_file, check_ss=False, mapper=None):
self._file = None
self._stream = None
- self._check_ss = bool(check_ss)
+ self._mapper = CombinedResidueMapper()
+ self._check_ss = bool(check_ss)
self.filename = structure_file
+ if mapper is not None:
+ self.mapper = mapper
def __del__(self):
try:
self._stream.close()
except:
pass
+
+ @property
+ def mapper(self):
+ """
+ Current residue mapping strategy
+ @rtype: L{AbstractResidueMapper}
+ """
+ return self._mapper
+ @mapper.setter
+ def mapper(self, value):
+ if not isinstance(value, AbstractResidueMapper):
+ raise TypeError(value)
+ self._mapper = value
@property
def filename(self):
@@ -363,13 +407,44 @@ class AbstractStructureParser(object):
return models
else:
return []
+
+ def guess_chain_type(self, residue_labels):
+ """
+ Try to guess what is the sequence type of a chunk of PDB
+ C{residue_label}s. The list of labels is probed starting from the middle
+ first, because PDB chains often contain modified / unknown residues at
+ the termini. If none of the probed residues can be used to determine
+ chain's type, just give up and return L{SequenceTypes.Unknown}.
+
+ @param residue_labels: an iterable of PDB residue labels
+ @type residue_labels: iterable
+
+ @return: a L{SequenceTypes} enum member
+ @rtype: L{csb.core.EnumItem}
+ """
+
+ labels = list(residue_labels)
+ middle = int(len(labels) / 2)
+
+ reordered = labels[middle:] + list(reversed(labels[:middle]))
+
+ for label in reordered:
+ try:
+ type = self.guess_sequence_type(label)
+ if type != SequenceTypes.Unknown:
+ return type
+
+ except UnknownPDBResidueError:
+ continue
+
+ return SequenceTypes.Unknown
- def guess_sequence_type(self, residue_name):
+ def guess_sequence_type(self, residue_label):
"""
- Try to guess what is the sequence type of a PDB C{residue_name}.
+ Try to guess what is the sequence type of a PDB C{residue_label}.
- @param residue_name: a PDB-conforming name of a residue
- @type residue_name: str
+ @param residue_label: a PDB-conforming name of a residue
+ @type residue_label: str
@return: a L{SequenceTypes} enum member
@rtype: L{csb.core.EnumItem}
@@ -377,22 +452,22 @@ class AbstractStructureParser(object):
@raise UnknownPDBResidueError: when there is no such PDB residue name
in the catalog tables
"""
- if residue_name in PDB_AMINOACIDS:
+ if residue_label in PDB_AMINOACIDS:
return SequenceTypes.Protein
- elif residue_name in PDB_NUCLEOTIDES:
+ elif residue_label in PDB_NUCLEOTIDES:
return SequenceTypes.NucleicAcid
else:
- raise UnknownPDBResidueError(residue_name)
+ raise UnknownPDBResidueError(residue_label)
- def parse_residue(self, residue_name, as_type=None):
+ def parse_residue(self, residue_label, as_type=None):
"""
- Try to parse a PDB C{residue_name} and return its closest 'normal'
+ Try to parse a PDB C{residue_label} and return its closest 'normal'
string representation. If a sequence type (C{as_type}) is defined,
guess the alphabet based on that information, otherwise try first to
parse it as a protein residue.
- @param residue_name: a PDB-conforming name of a residue
- @type residue_name: str
+ @param residue_label: a PDB-conforming name of a residue
+ @type residue_label: str
@param as_type: suggest a sequence type (L{SequenceTypes} member)
@type L{scb.core.EnumItem}
@@ -403,25 +478,25 @@ class AbstractStructureParser(object):
in the catalog table(s)
"""
if as_type is None:
- as_type = self.guess_sequence_type(residue_name)
+ as_type = self.guess_sequence_type(residue_label)
try:
if as_type == SequenceTypes.Protein:
- return PDB_AMINOACIDS[residue_name]
+ return PDB_AMINOACIDS[residue_label]
elif as_type == SequenceTypes.NucleicAcid:
- return PDB_NUCLEOTIDES[residue_name]
+ return PDB_NUCLEOTIDES[residue_label]
else:
- raise UnknownPDBResidueError(residue_name)
+ raise UnknownPDBResidueError(residue_label)
except KeyError:
- raise UnknownPDBResidueError(residue_name)
+ raise UnknownPDBResidueError(residue_label)
- def parse_residue_safe(self, residue_name, as_type):
+ def parse_residue_safe(self, residue_label, as_type):
"""
Same as C{parse_residue}, but returns UNK/Any instead of raising
UnknownPDBResidueError.
- @param residue_name: a PDB-conforming name of a residue
- @type residue_name: str
+ @param residue_label: a PDB-conforming name of a residue
+ @type residue_label: str
@param as_type: suggest a sequence type (L{SequenceTypes} member)
@type L{scb.core.EnumItem}
@@ -429,7 +504,7 @@ class AbstractStructureParser(object):
@rtype: str
"""
try:
- return self.parse_residue(residue_name, as_type)
+ return self.parse_residue(residue_label, as_type)
except UnknownPDBResidueError:
if as_type == SequenceTypes.Protein:
return repr(SequenceAlphabets.Protein.UNK)
@@ -464,8 +539,13 @@ class AbstractStructureParser(object):
"""
if model is not None:
model = int(model)
-
- structure = self._parse_header(model)
+
+ try:
+ structure = self._parse_header(model)
+ except PDBParseError:
+ raise
+ except ValueError as ex:
+ raise HeaderFormatError("Malformed header: {0}".format(ex))
self._parse_atoms(structure, model)
self._parse_ss(structure)
@@ -582,6 +662,11 @@ class AbstractStructureParser(object):
molecule identifiers, chains and residues. This structure object is
then internally passed to the C{_parse_atoms} hook, responsible for
attachment of the atoms to the residues in the structure.
+
+ @param model: model ID to parse
+ @type model: str
+
+ @rtype: L{Structure}
"""
pass
@@ -597,7 +682,7 @@ class AbstractStructureParser(object):
raise ValueError('No such model {0} in the structure.'.format(model))
if line.startswith('MODEL'):
- model_id = int(line[10:14])
+ model_id = self._read_model(line)
if model == model_id:
return model_id
@@ -605,16 +690,24 @@ class AbstractStructureParser(object):
"""
Parse the ATOMs from the specified C{model} and attach them to the
C{structure}.
+
+ @param structure: L{Structure} being constructed
+ @type structure:L{Structure}
+ @param model: model ID to parse
+ @type model: str
"""
structure.model_id = None
- atoms = dict( (chain, []) for chain in structure.chains )
chains = set()
total_chains = len([c for c in structure.items if c.length > 0])
- het_residues = dict( (chain, set()) for chain in structure.chains )
+
+ residues = dict( (chain, []) for chain in structure.chains )
+ seen_residues = dict( (chain, {}) for chain in structure.chains )
+
in_ligands = False
in_atom = False
+ read_model = False
self._stream.seek(0)
while True:
@@ -624,87 +717,28 @@ class AbstractStructureParser(object):
except StopIteration:
break
- if line.startswith('HET '):
- het_residue, het_chain, het_residue_id = line[7:10].strip(), line[12], line[13:18].strip()
+ if line.startswith('MODEL'):
+ if read_model:
+ break
+ else:
+ self._parse_model_line(line, structure, model)
+ model = structure.model_id
+ read_model = True
+
+ elif line.startswith('ATOM') or \
+ (line.startswith('HETATM') and not in_ligands):
+ in_atom = True
- if het_chain in structure:
- chain = structure.chains[het_chain]
- if chain.type == SequenceTypes.Protein and het_residue in PDB_AMINOACIDS:
- het_residues[het_chain].add(het_residue_id)
- elif chain.type == SequenceTypes.NucleicAcid and het_residue in PDB_NUCLEOTIDES:
- het_residues[het_chain].add(het_residue_id)
+ info = self._parse_atom_line(line, structure)
+ chains.add(info.chain)
- elif line.startswith('MODEL'):
- if model and model != int(line[10:14]):
- self._scroll_model(model, self._stream)
- structure.model_id = model
+ if info.id not in seen_residues[info.chain]:
+ residues[info.chain].append(info)
+ seen_residues[info.chain][info.id] = info
else:
- model = int(line[10:14])
- structure.model_id = model
-
- elif line.startswith('ATOM') \
- or (line.startswith('HETATM') and not in_ligands):
- in_atom = True
+ atom = info.atoms[0]
+ seen_residues[info.chain][info.id].atoms.append(atom)
- rank = int(line[22:26])
- serial_number = int(line[6:11])
- name = line[12:16]
- x, y, z = line[30:38], line[38:46], line[46:54]
- vector = numpy.array([float(x), float(y), float(z)])
- element = line[76:78].strip()
- if element:
- try:
- element = csb.core.Enum.parsename(ChemElements, element)
- except csb.core.EnumMemberError:
- if element in ('D', 'X'):
- element = ChemElements.x
- else:
- raise StructureFormatError('Unknown chemical element: {0}'.format(element))
- else:
- element = None
-
- atom = csb.bio.structure.Atom(serial_number, name, element,
- vector)
-
- atom._het = line.startswith('HETATM')
- atom._rank = rank
- atom._sequence_number = int(line[22:26].strip())
- atom._residue_id = str(atom._sequence_number)
- atom._insertion_code = line[26].strip()
- if not atom._insertion_code:
- atom._insertion_code = None
- else:
- atom._residue_id += atom._insertion_code
-
- atom.alternate = line[16].strip()
- if not atom.alternate:
- atom.alternate = None
-
- atom._chain = line[21].strip()
- if atom._chain not in structure.chains:
- raise StructureFormatError(
- 'Atom {0}: chain {1} is undefined'.format(serial_number, atom._chain))
- chains.add(atom._chain)
- residue_name = line[17:20].strip()
- residue_name = self.parse_residue_safe(residue_name, as_type=structure.chains[atom._chain].type)
- if structure.chains[atom._chain].type == SequenceTypes.NucleicAcid:
- atom._residue_name = csb.core.Enum.parsename(SequenceAlphabets.Nucleic, residue_name)
- else:
- atom._residue_name = csb.core.Enum.parsename(SequenceAlphabets.Protein, residue_name)
-
- atom.occupancy = float(line[54:60].strip() or 0)
- atom.bfactor = float(line[60:66].strip() or 0)
-
- charge = line[78:80].strip()
- if charge:
- if charge in ('+', '-'):
- charge += '1'
- if charge[-1] in ('+', '-'):
- charge = charge[::-1]
- atom.charge = int(charge)
-
- atoms[atom._chain].append(atom)
-
elif in_atom and line.startswith('TER'):
in_atom = False
if len(chains) == total_chains:
@@ -718,97 +752,265 @@ class AbstractStructureParser(object):
if structure.model_id != model:
raise ValueError('No such model {0} in the structure.'.format(model))
+
+ self._map_residues(structure, residues)
+
+ def _parse_model_line(self, line, structure, model):
+ """
+ Handle a new MODEL line. The default implementation will read the model
+ ID and attach it to the C{structure}.
+
+ @param line: raw string line to parse
+ @type line: str
+ @param structure: L{Structure} being constructed
+ @type structure:L{Structure}
+
+ @note: this method may have side effects: scrolls the current stream
+
+ @return: read model ID
+ @rtype: int
+ """
+ if model and model != self._read_model(line):
+ self._scroll_model(model, self._stream)
+ structure.model_id = model
+ else:
+ model = self._read_model(line)
+ structure.model_id = model
+
+ return model
+
+ def _parse_atom_line(self, line, structure):
+ """
+ Handle a new ATOM or HETATM line. The default implementation will read
+ all data fields and create a new L{Atom}.
+
+ @param line: raw string line to parse
+ @type line: str
+ @param structure: L{Structure} being constructed
+ @type structure:L{Structure}
+
+ @return: newly constructed atom
+ @rtype: L{ResidueInfo}
+ """
- self._map_residues(structure, atoms, het_residues)
-
- def _map_residues(self, structure, atoms, het_residues):
-
- assert set(structure.chains) == set(atoms.keys())
-
- for chain in structure.chains:
-
- subject = structure.chains[chain].sequence
- query = ''
- fragments = []
-
- seq_numbers = []
- lookup = {}
-
- i = -1
- for a in atoms[chain]:
- if a._residue_id not in lookup:
- i += 1
- lookup[a._residue_id] = [a._sequence_number, a._insertion_code]
- seq_numbers.append(a._residue_id)
- res_name = a._residue_name.value
- res_id = '{0}{1}'.format(a._sequence_number or '', a._insertion_code or '').strip()
- if a._het and res_id not in het_residues[chain]:
- # if it is a HETATM, but not a modified residue, initiate an optional fragment
- fragments.append([res_name, '?'])
- elif a._insertion_code and not (i > 0 and lookup[seq_numbers[i - 1]][1]):
- fragments.append([res_name])
- elif i == 0 or a._sequence_number - lookup[seq_numbers[i - 1]][0] not in (0, 1, -1):
- # if residues [i, i-1] are not consecutive or 'overlapping', initiate a new fragment:
- fragments.append([res_name])
- else:
- # then they are consecutive
- if fragments[-1][-1] == '?':
- # but the prev was optional (maybe HET), all optionals *MUST* reside in
- # singleton fragments, so start a new fragment
- fragments.append([res_name])
- else:
- # append the new residue to the end of the last fragment
- fragments[-1].append(res_name)
+ atom = self._read_atom(line)
+
+ rank = self._read_sequence_number(line)
+ sequence_number = rank
+ insertion_code = self._read_insertion_code(line)
+
+ id = str(sequence_number)
+ if insertion_code:
+ id += insertion_code
+
+ chain = self._read_chain_id(line)
+ if chain not in structure.chains:
+ raise StructureFormatError("Chain {0} is undefined".format(chain))
+
+ type = self._read_residue(line, structure.chains[chain])
+ label = self._read_residue_raw(line)
+
+ atom.alternate = self._read_alternate(line)
+ atom.occupancy = self._read_occupancy(line)
+ atom.bfactor = self._read_bfactor(line)
+ atom.charge = self._read_charge(line)
+
+ info = ResidueInfo(chain, rank, id, sequence_number, insertion_code, type, label)
+ info.atoms = [atom]
+
+ return info
+
+ def _read_model(self, line):
+ """
+ @return: model identifier
+ @rtype: int
+ """
+ try:
+ return int(line[10:14])
+ except ValueError:
+ raise StructureFormatError("Invalid model ID: {0}".format(line))
+
+ def _read_atom(self, line):
+ """
+ @return: a new atom (serial_number, name, element, vector)
+ @rtype: L{Atom}
+ """
+ try:
+ serial_number = int(line[6:11])
+ name = line[12:16]
+ x, y, z = line[30:38], line[38:46], line[46:54]
+ vector = numpy.array([float(x), float(y), float(z)])
+ except ValueError as ve:
+ raise StructureFormatError("Invalid ATOM line: {0}".format(ve))
+
+ element = self._read_element(line)
+ return csb.bio.structure.Atom(serial_number, name, element, vector)
+
+ def _read_sequence_number(self, line):
+ """
+ @return: PDB sequence number
+ @rtype: int
+ """
+ try:
+ return int(line[22:26])
+ except ValueError:
+ raise StructureFormatError("Invalid sequence number")
+
+ def _read_insertion_code(self, line):
+ """
+ @return: PDB insertion code
+ @rtype: str or None
+ """
+ code = line[26].strip()
+
+ if code:
+ return code
+ else:
+ return None
+
+ def _read_chain_id(self, line):
+ """
+ @return: chain identifier
+ @rtype: str
+ """
+ return line[21].strip()
+
+ def _read_residue(self, line, chain):
+ """
+ @param chain: owning L{Chain} object
+ @type chain: L{Chain}
+
+ @return: a member of any alphabet (e.g. L{SequenceAlphabets.Protein})
+ @rtype: L{EnumItem}
+ """
+ raw = self._read_residue_raw(line)
+ residue = self.parse_residue_safe(raw, as_type=chain.type)
+
+ try:
+ if chain.type == SequenceTypes.NucleicAcid:
+ return csb.core.Enum.parsename(SequenceAlphabets.Nucleic, residue)
+ else:
+ return csb.core.Enum.parsename(SequenceAlphabets.Protein, residue)
+ except csb.core.EnumMemberError:
+ raise StructureFormatError("{0} is not a valid {1} residue".format(raw, chain.type))
+
+ def _read_residue_raw(self, line):
+ """
+ @rtype: str
+ """
+ return line[17:20].strip()
+
+ def _read_element(self, line):
+ """
+ @return: a member of L{ChemElements}
+ @rtype: L{EnumItem} or None
+ """
+ element = line[76:78].strip()
+ if element:
+ try:
+ element = csb.core.Enum.parsename(ChemElements, element)
+ except csb.core.EnumMemberError:
+ if element in ('D', 'X'):
+ element = ChemElements.x
+ else:
+ raise StructureFormatError('Unknown chemical element: {0}'.format(element))
+ else:
+ element = None
+
+ return element
+
+ def _read_alternate(self, line):
+ """
+ @return: alt identifier
+ @rtype: str or None
+ """
+ alternate = line[16].strip()
+
+ if not alternate:
+ return None
+ else:
+ return alternate
+
+ def _read_occupancy(self, line):
+ """
+ @return: occupancy
+ @rtype: float or None
+ """
+ try:
+ return float(line[54:60].strip() or 0)
+ except ValueError:
+ raise StructureFormatError("Malformed occupancy field")
+
+ def _read_bfactor(self, line):
+ """
+ @return: b-factor
+ @rtype: float or None
+ """
+ try:
+ return float(line[60:66].strip() or 0)
+ except ValueError:
+ raise StructureFormatError("Malformed bfactor field")
+
+ def _read_charge(self, line):
+ """
+ @return: charge
+ @rtype: int or None
+ """
+ charge = line[78:80].strip()
+
+ if charge:
+ if charge in ('+', '-'):
+ charge += '1'
+ if charge[-1] in ('+', '-'):
+ charge = charge[::-1]
+ try:
+ return int(charge)
+ except ValueError:
+ raise StructureFormatError("Malformed charge field")
+ else:
+ return None
+
+ def _map_residues(self, structure, residues):
+ """
+ Attach each L{Atom} to its corresponding L{Residue}.
+
+ So far we have constructed a sparse (fragmented) chain given the information
+ we have read from the ATOM/HETATM records. That includes PDB sequence
+ identifiers and insertion codes, which cover only residues with XYZ coordinates
+ and often do not correspond to our L{Residue} ranks.
+ Our job is to find the right correspondence by matching this unreal sequence
+ to what we have got from the SEQRES fields (the complete, gap-free protein
+ sequence). Here we delegate this task to the current L{AbstractResidueMapper}
+ strategy, which does all the magic.
+
+ @param structure: L{Structure} being constructed
+ @type structure:L{Structure}
+ @param residues: all L{Atom}s which have been constructed so far.
+ This must be a map of the form:
+ <chainID: [L{ResidueInfo}1, L{ResidueInfo}2...]>
+ @type residues: dict of L{ResidueInfo}
+ """
+
+ if set(structure.chains) != set(residues.keys()):
+ raise PDBParseError("Corrupt PDB file")
+
+ for chain in structure.items:
+ if chain.length == 0 or len(residues[chain.id]) == 0:
+ continue
- for i, frag in enumerate(fragments):
- fragments[i] = ''.join(frag)
- if len(fragments) > 100:
- # Python's regex engine will crash with more than 100 groups
- raise StructureFormatError("Can't map structure with more than 100 fragments in ATOMS")
- query = '^.*?({0}).*?$'.format(').*?('.join(fragments))
+ reference = SparseChainSequence.create(chain)
+ sparse = SparseChainSequence(
+ "atoms", "", residues[chain.id], chain.type)
- matches = re.match(query, subject)
+ aligned = self.mapper.map(sparse, reference)
+ assert aligned.length == chain.length
- if matches:
- seq_numitem = -1
- for frag_number, frag in enumerate(matches.groups(), start=1):
- if frag is '':
- # optional fragment, which did not match (NOTE: all optionals must occupy
- # their own fragments of length 1 residue)
- seq_numitem += 1 # this 1 implies that all optional fragments are 1 residue long
- else:
- for i, dummy in enumerate(frag, start=1):
- seq_numitem += 1
- # lookup[res_id] is finally set to give the final rank of residue under id res_id:
- try:
- lookup[ seq_numbers[seq_numitem] ] = matches.start(frag_number) + i
- except:
- raise
-
- fixed_residue = None
- for atom in atoms[chain]:
- if not isinstance(lookup[atom._residue_id], int):
- continue # this atom was not mapped (e.g. HET)
- atom._rank = lookup[atom._residue_id]
- residue = structure.chains[chain].residues[atom._rank]
- if residue is not fixed_residue:
- residue.id = atom._sequence_number, atom._insertion_code
- fixed_residue = residue
-
- assert str(residue.type) == subject[atom._rank - 1]
- residue.atoms.append(atom)
+ for residue, mapped in zip(chain.residues, aligned.residues):
+ if mapped.type != sparse.alphabet.GAP:
+ residue.id = (mapped.sequence_number, mapped.insertion_code)
- del atom._rank
- del atom._insertion_code
- del atom._sequence_number
- del atom._chain
- del atom._residue_id
- del atom._residue_name
- else:
- if structure.chains[chain].length == 0 and len(atoms[chain]) > 0:
- raise StructureFormatError("Can't add atoms: chain {0} has no residues".format(chain))
- else:
- raise StructureFormatError("Can't map atoms")
+ for atom in mapped.atoms:
+ residue.atoms.append(atom)
def _parse_ss(self, structure):
"""
@@ -979,56 +1181,84 @@ class RegularStructureParser(AbstractStructureParser):
chains += ', ' + line[11:].strip()
break
- chain_name = chain_name.strip()[:-1]
- for chain in chains.replace(';', ' ').replace(',', ' ').split() or ['']: # the second part deals with an empty chain id
- new_chain = csb.bio.structure.Chain(chain, type=SequenceTypes.Unknown,
- name=chain_name, accession=structure.accession)
- new_chain.molecule_id = mol_id
- try:
- structure.chains.append(new_chain)
- except csb.core.DuplicateKeyError:
- raise HeaderFormatError('Chain {0} is already defined.'.format(new_chain.id))
+ chain_ids = chains.replace(';', ' ').replace(',', ' ').split() or [''] # the second part deals with an empty chain id
+ self._add_chains(structure, chain_name, mol_id, *chain_ids)
elif line.startswith('REMARK 2 RESOLUTION'):
- res = re.search("(\d+(?:\.\d+)?)\s+ANGSTROM", line)
- if res and res.groups():
- structure.resolution = float(res.group(1))
+ structure.resolution = self._read_resolution(line)
elif line.startswith('SEQRES'):
- # Correct handling of empty chain id
- seq_fields = [line[7:10], line[11], line[13:17] ]
- seq_fields.extend(line[18:].split())
-
- rank_base = int(seq_fields[0])
- chain_id = seq_fields[1].strip()
-
- if chain_id not in structure.chains:
- raise HeaderFormatError('Chain {0} is undefined'.format(chain_id))
-
+ chain_id, residues = self._parse_seqres_line(line, structure)
chain = structure.chains[chain_id]
-
- if chain.type == SequenceTypes.Unknown:
- inner_residuerank = int(len(seq_fields[3:]) / 2) + 3
- for i in [inner_residuerank, 3, -1]:
- try:
- chain.type = self.guess_sequence_type(seq_fields[i])
- break
- except UnknownPDBResidueError:
- pass
-
- for i, residue_name in enumerate(seq_fields[3:]):
- rank = rank_base * 13 - (13 - (i + 1))
- rname = self.parse_residue_safe(residue_name, as_type=chain.type)
- residue = csb.bio.structure.Residue.create(chain.type, rank=rank, type=rname)
- residue._pdb_name = residue_name
- structure.chains[chain_id].residues.append(residue)
- assert structure.chains[chain_id].residues.last_index == rank
+
+ for residue in residues:
+ chain.residues.append(residue)
+ if chain.residues.last_index != residue.rank:
+ raise HeaderFormatError("Malformed SEQRES")
elif line.startswith('MODEL') or line.startswith('ATOM'):
break
- return structure
+ return structure
+
+ def _add_chains(self, structure, name, mol_id, *chain_ids):
+ name = name.strip().rstrip(";")
+
+ for chain in chain_ids:
+ new_chain = csb.bio.structure.Chain(chain, type=SequenceTypes.Unknown,
+ name=name, accession=structure.accession)
+ new_chain.molecule_id = mol_id
+ try:
+ structure.chains.append(new_chain)
+ except csb.bio.structure.DuplicateChainIDError:
+ raise HeaderFormatError('Chain {0} is already defined.'.format(new_chain.id))
+
+ def _read_resolution(self, line):
+ """
+ @return: resolution
+ @rtype: float or None
+ """
+ res = re.search("(\d+(?:\.\d+)?)\s+ANGSTROM", line)
+
+ if res and res.groups():
+ return float(res.group(1))
+ else:
+ return None
+
+ def _parse_seqres_line(self, line, structure):
+ """
+ Parse a SEQRES line, build and return newly constructed residues.
+ If the current sequence type of the chain is unknown, try to guess it
+ before parsing the residues.
+
+ @return: parsed chain_id and L{Residue}s
+ @rtype: 2-tuple: (str, iterable of L{Residue})
+ """
+ residues = []
+
+ rownum = int(line[7:10])
+ chain_id = line[11].strip()
+ labels = line[18:].split()
+
+ if chain_id not in structure.chains:
+ raise HeaderFormatError('Chain {0} is undefined'.format(chain_id))
+
+ chain = structure.chains[chain_id]
+
+ if chain.type == SequenceTypes.Unknown:
+ chain.type = self.guess_chain_type(labels)
+
+ for rn, label in enumerate(labels):
+ rank = rownum * 13 - (13 - (rn + 1))
+ rtype = self.parse_residue_safe(label, as_type=chain.type)
+
+ residue = csb.bio.structure.Residue.create(chain.type, rank=rank, type=rtype)
+ residue.label = label
+ residues.append(residue)
+
+ return chain_id, residues
+
class PDBHeaderParser(RegularStructureParser):
"""
@@ -1067,6 +1297,7 @@ class LegacyStructureParser(AbstractStructureParser):
self._stream.seek(0)
in_atom = False
has_atoms = False
+ has_model = False
chains = csb.core.OrderedDict()
header = next(self._stream)
@@ -1086,73 +1317,396 @@ class LegacyStructureParser(AbstractStructureParser):
break
if line.startswith('MODEL'):
- if model and model != int(line[10:14]):
- self._scroll_model(model, self._stream)
- structure.model_id = model
+ if has_model:
+ break
else:
- model = int(line[10:14])
- structure.model_id = model
+ self._parse_model_line(line, structure, model)
+ model = structure.model_id
+ has_model = True
elif line.startswith('ATOM') \
or (in_atom and line.startswith('HETATM')):
in_atom = True
has_atoms = True
- residue_id = line[22:27].strip()
- residue_name = line[17:20].strip()
- chain_id = line[21].strip()
+ seq_number = self._read_sequence_number(line)
+ ins_code = self._read_insertion_code(line)
+ residue_id = (seq_number, ins_code)
+ label = self._read_residue_raw(line)
+ chain_id = self._read_chain_id(line)
if chain_id not in chains:
chains[chain_id] = csb.core.OrderedDict()
-
- new_chain = csb.bio.structure.Chain(
- chain_id,
- type=SequenceTypes.Unknown,
- accession=structure.accession)
- new_chain.molecule_id = '1'
- structure.chains.append(new_chain)
+ self._add_chain(structure, chain_id)
if residue_id not in chains[chain_id]:
- chains[chain_id][residue_id] = residue_name
-
- if structure.chains[chain_id].type == SequenceTypes.Unknown:
- try:
- structure.chains[chain_id].type = self.guess_sequence_type(residue_name)
- except UnknownPDBResidueError:
- pass
+ chains[chain_id][residue_id] = label
+ chain = structure.chains[chain_id]
+ if chain.type == SequenceTypes.Unknown:
+ self._fix_chain(chain, label)
elif in_atom and line.startswith('TER'):
in_atom = False
-
elif line.startswith('ENDMDL'):
break
-
elif line.startswith('END'):
break
if not has_atoms:
raise HeaderFormatError("Can't parse legacy structure: no ATOMs found")
+
+ for chain in structure.items:
+ self._build_chain(chain, chains[chain.id])
- for chain_id in structure.chains:
- chain = structure.chains[chain_id]
+ return structure
+
+ def _add_chain(self, structure, chain_id):
+
+ new_chain = csb.bio.structure.Chain(chain_id,
+ type=SequenceTypes.Unknown,
+ accession=structure.accession)
+ new_chain.molecule_id = '1'
+ structure.chains.append(new_chain)
+
+ def _build_chain(self, chain, residues):
+
+ for residue_id, label in residues.items():
+ rank = (chain.residues.last_index or 0) + 1
- for residue_id in chains[chain_id]:
- residue_name = chains[chain_id][residue_id]
- rank = (chain.residues.last_index or 0) + 1
+ rname = self.parse_residue_safe(label, as_type=chain.type)
+ residue = csb.bio.structure.Residue.create(chain.type, rank=rank, type=rname)
+ residue.label = label
+ residue.id = residue_id
+ chain.residues.append(residue)
+
+ def _fix_chain(self, chain, probe):
- rname = self.parse_residue_safe(residue_name, as_type=structure.chains[chain_id].type)
- residue = csb.bio.structure.Residue.create(chain.type, rank=rank, type=rname)
- residue._pdb_name = residue_name
- chain.residues.append(residue)
-
- return structure
+ try:
+ chain.type = self.guess_sequence_type(probe)
+ except UnknownPDBResidueError:
+ pass
+
+ def _map_residues(self, structure, residues):
+ for chain in structure.items:
+ for residue_info in residues[chain.id]:
+ try:
+ residue = chain.find(residue_info.sequence_number, residue_info.insertion_code)
+ for atom in residue_info.atoms:
+ residue.atoms.append(atom)
+
+ except csb.bio.structure.EntityNotFoundError:
+ pass
+
StructureParser = AbstractStructureParser.create_parser
"""
Alias for L{AbstractStructureParser.create_parser}.
"""
+
+class ResidueInfo(object):
+ """
+ High-performance struct, which functions as a container for unmapped
+ L{Atom}s.
+
+ @note: This object must implement the L{csb.bio.sequence.ResidueInfo}
+ interface. This is not enforced through inheritance solely
+ to save some CPU (by exposing public fields and no properties).
+ However, on an abstract level this object is_a ResidueInfo
+ and is used to build L{AbstractSequence}s.
+ """
+ __slots__ = ['chain', 'rank', 'id' , 'sequence_number', 'insertion_code', 'type', 'label', 'atoms']
+
+ def __init__(self, chain, rank, id, seq_number, ins_code, type, label):
+
+ self.chain = chain
+ self.rank = rank
+ self.id = id
+ self.sequence_number = seq_number
+ self.insertion_code = ins_code
+ self.type = type
+ self.label = label
+ self.atoms = []
+
+ @property
+ def is_modified(self):
+
+ if self.type.enum is SequenceAlphabets.Nucleic:
+ return self.label != str(self.type)
+ else:
+ return self.label != repr(self.type)
+
+class SparseChainSequence(csb.bio.sequence.ChainSequence):
+ """
+ Sequence view for reference (SEQRES) or sparse (ATOM) PDB chains.
+ The residue instances passed to the constructor must be
+ L{csb.bio.structure.Residue} or L{csb.bio.io.wwpdb.ResidueInfo} objects.
+
+ See L{csb.bio.sequence.AbstractSequence} for details.
+ """
+
+ def _add(self, residue):
+
+ if not isinstance(residue, (csb.bio.structure.Residue, ResidueInfo)):
+ raise TypeError(residue)
+ else:
+ self._residues.append(residue)
+
+ def _get(self, rank):
+ return self._residues[rank - 1]
+
+ @staticmethod
+ def create(chain):
+ """
+ Create a new L{SparseChainSequence} from existing L{Chain}.
+
+ @type chain: L{csb.bio.structure.Chain}
+ @rtype: L{SparseChainSequence}
+ """
+ return SparseChainSequence(
+ chain.entry_id, chain.header, chain.residues, chain.type)
+
+
+class AbstractResidueMapper(object):
+ """
+ Defines the base interface of all residue mappers, used to align PDB ATOM
+ records to the real (SEQRES) sequence of a chain.
+ """
+
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def map(self, sparse, reference):
+ """
+ Map L{sparse}'s residues to L{reference}. Return all C{sparse} residues,
+ aligned over C{reference}, with artificial gap residues inserted at
+ relevant positions. The resulting sequence of sparse residues will
+ always have the same length as the C{reference} sequence.
+
+ @note: C{sparse}'s ranks won't be touched because the C{rank} property
+ of the underlying L{ResidueInfo} implementation is not necessarily r/w.
+
+ @param sparse: sparse sequence (e.g. derived from ATOMS records)
+ @type sparse: L{SparseChainSequence}
+ @param reference: reference, complete sequence
+ (e.g. derived from SEQRES records)
+ @type reference: L{SparseChainSequence}
+
+ @return: all C{sparse} residues, optimally aligned over C{reference}
+ (with gaps)
+ @rtype: L{SparseChainSequence}
+
+ @raise ResidueMappingError: if the specified sequences are not alignable
+ """
+ pass
+
+ def create_gap(self, alphabet=SequenceAlphabets.Protein):
+ """
+ Create and return a new gap residue.
+
+ @param alphabet: sequence alphabet; a member of L{SequenceAlphabets}
+ which has GAP item
+ @type alphabet: L{enum}
+
+ @rtype: L{ResidueInfo}
+ """
+ return ResidueInfo(None, -1, None, None, None, alphabet.GAP, "-")
+
+ def _build(self, sparse, aligned):
+
+ return SparseChainSequence(
+ sparse.id, sparse.header, aligned, sparse.type)
+
+class FastResidueMapper(AbstractResidueMapper):
+ """
+ RegExp-based residue mapper. Fails on heavily malformed input (i.e. it cannot
+ insert gaps in the C{reference}), but it is very fast (linear) and memory
+ efficient.
+ """
+
+ MAX_FRAGMENTS = 20
+
+ MIN_UNICODE_CHAR = 300
+ FORBIDDEN_CHARS = set('^.*?()-')
+
+ CODEC = "utf-8"
+ DELIMITER = ").*?(".encode(CODEC).decode(CODEC)
+ PATTERN = "^.*?({0}).*?$".encode(CODEC).decode(CODEC)
+
+ def __init__(self):
+ self._charcode = FastResidueMapper.MIN_UNICODE_CHAR
+ self._cache = {}
+
+ def map(self, sparse, reference):
+
+ aligned = []
+ mapping = {}
+
+ residues = list(sparse.residues)
+
+ pattern = self._build_pattern(residues)
+ seqres = self._encode_sequence(reference)
+
+ matches = re.match(pattern, seqres)
+
+ if matches:
+ unmapped_item = -1
+
+ for fn, fragment in enumerate(matches.groups(), start=1):
+ assert fragment != ''
+
+ for offset in range(1, len(fragment) + 1):
+ unmapped_item += 1
+ rank = matches.start(fn) + offset
+
+ mapped_residue = residues[unmapped_item]
+ real_residue = reference.residues[rank]
+
+ assert real_residue.type == mapped_residue.type
+ mapping[real_residue] = mapped_residue
+
+ else:
+ raise ResidueMappingError("Can't map ATOM records")
+
+ for rank, residue in enumerate(reference.residues, start=1):
+ if residue in mapping:
+ aligned.append(mapping[residue])
+ else:
+ aligned.append(self.create_gap(sparse.alphabet))
+
+ assert len(aligned) == reference.length
+ return self._build(sparse, aligned)
+
+ def _build_pattern(self, residues):
+ """
+ Build and return a sparse regular rexpression for C{residues}.
+ """
+
+ fragments = []
+
+ for rn, r in enumerate(residues):
+ res_name = self._encode(r)
+
+ if rn == 0:
+ # First residue, start a new fragment:
+ fragments.append([res_name])
+ elif r.insertion_code: # and not residues[rn - 1].insertion_code:
+ # If residue i has an insertion code, initiate a new fragment:
+ fragments.append([res_name])
+ elif r.sequence_number - residues[rn - 1].sequence_number in (0, 1, -1):
+ # If the seq numbers of residues [i-1, i] are consecutive, extend the last fragment:
+ fragments[-1].append(res_name)
+ else:
+ # They are not consecutive, so we better start a new fragment:
+ fragments.append([res_name])
+
+ for i, frag in enumerate(fragments):
+ fragments[i] = ''.join(frag)
+ if len(fragments) > FastResidueMapper.MAX_FRAGMENTS:
+ # Wow, that's a lot of fragments. Better use a different mapper
+ raise ResidueMappingError("Can't map chain with large number of fragments")
+
+ blocks = FastResidueMapper.DELIMITER.join(fragments)
+ pattern = FastResidueMapper.PATTERN.format(blocks)
+
+ return pattern
+
+ def _encode(self, r):
+ """
+ Return a unique single-letter representation of C{r.type}.
+ """
+ if not r.is_modified:
+ return str(r.type)
+ else:
+ return self._register_label(r.label)
+
+ def _encode_sequence(self, s):
+ return ''.join(map(self._encode, s.residues))
+
+ def _register_label(self, label):
+ """
+ Assign a new unicode character to C{label} and cache it.
+
+ @return: cached single-letter representation of label.
+ @rtype: unicode char
+ """
+
+ if label not in self._cache:
+ if set(label).intersection(FastResidueMapper.FORBIDDEN_CHARS):
+ raise ResidueMappingError("Invalid residue label")
+
+ self._charcode += 1
+ code = self._charcode
+ self._cache[label] = csb.io.unichr(code)
+
+ return self._cache[label]
+
+
+class RobustResidueMapper(AbstractResidueMapper):
+ """
+ Exhaustive residue mapper, which uses Needleman-Wunsch global alignment.
+ Much slower (quadratic), but fail-proof even with incompatible sequences
+ (can insert gaps in both the C{sparse} and the C{reference} sequence).
+
+ @param match: score for a match
+ @type match: float
+ @param mismatch: score for a mismatch (by default mismatches are heavily
+ penalized, while gaps are allowed)
+ @type mismatch: float
+ @param gap: gap penalty
+ @type gap: float
+ """
+
+ class GlobalAligner(alignment.GlobalAlignmentAlgorithm):
+
+ def _sequence(self, s):
+ return [r.label for r in s.residues]
+
+
+ def __init__(self, match=1, mismatch=-10, gap=0):
+
+ scoring = alignment.IdentityMatrix(match=match, mismatch=mismatch)
+ aligner = RobustResidueMapper.GlobalAligner(scoring=scoring, gap=gap)
+
+ self._aligner = aligner
+
+ def map(self, sparse, reference):
+
+ aligned = []
+ ali = self._aligner.align(sparse, reference)
+
+ if ali.is_empty:
+ raise ResidueMappingError("Global alignment failed")
+
+ for mapped, residue in zip(ali.query, ali.subject):
+
+ if residue.type == reference.alphabet.GAP:
+ continue
+ elif mapped.type == sparse.alphabet.GAP:
+ aligned.append(self.create_gap(sparse.alphabet))
+ else:
+ aligned.append(mapped)
+
+ return self._build(sparse, aligned)
+
+class CombinedResidueMapper(AbstractResidueMapper):
+ """
+ The best of both worlds: attempts to map the residues using
+ L{FastResidueMapper}, but upon failure secures success by switching to
+ L{RobustResidueMapper}.
+ """
+ FAST = FastResidueMapper()
+ ROBUST = RobustResidueMapper()
+
+ def map(self, sparse, reference):
+
+ try:
+ return CombinedResidueMapper.FAST.map(sparse, reference)
+ except ResidueMappingError:
+ return CombinedResidueMapper.ROBUST.map(sparse, reference)
+
+
class FileBuilder(object):
"""
Base abstract files for all structure file formatters.
@@ -1260,7 +1814,7 @@ class PDBFileBuilder(FileBuilder):
for chain_id in master.chains:
chain = master.chains[chain_id]
- res = [ r._pdb_name for r in chain.residues ]
+ res = [ r.label for r in chain.residues ]
rn = 0
for j in range(0, chain.length, 13):
@@ -1306,7 +1860,7 @@ class PDBFileBuilder(FileBuilder):
element = ' '
self.writeline('ATOM {0:>5} {1:>4}{2}{3:>3} {4}{5:>4}{6} {7:>8.3f}{8:>8.3f}{9:>8.3f}{10:>6.2f}{11:>6.2f}{12:>12}{13:2}'.format(
atom.serial_number, atom._full_name, isnull(alt, ' '),
- residue._pdb_name, chain.id,
+ residue.label, chain.id,
isnull(residue.sequence_number, residue.rank), isnull(residue.insertion_code, ' '),
atom.vector[0], atom.vector[1], atom.vector[2], isnull(atom.occupancy, 0.0), isnull(atom.bfactor, 0.0),
element, isnull(atom.charge, ' ') ))
diff --git a/csb/bio/nmr/__init__.py b/csb/bio/nmr/__init__.py
index 893b7af..7c23f67 100644
--- a/csb/bio/nmr/__init__.py
+++ b/csb/bio/nmr/__init__.py
@@ -3,10 +3,15 @@ NMR related objects.
"""
import os
+import numpy.linalg
+import xml.dom.minidom
+
import csb.io.tsv
import csb.core as pu
-from csb.bio.sequence import SequenceAlphabets
+from csb.statistics.pdf import GeneralizedNormal
+from csb.bio.sequence import ProteinAlphabet
+from csb.bio.structure import ChemElements
class InvalidResidueError(ValueError):
@@ -71,7 +76,7 @@ class RandomCoil(object):
header = 'Residue:str Nucleus:str Value:float'
for row in csb.io.tsv.Table.from_tsv(ref, header):
- residue = pu.Enum.parsename(SequenceAlphabets.Protein, row[0])
+ residue = pu.Enum.parsename(ProteinAlphabet, row[0])
nucleus, value = row[1:]
if residue not in self._reference:
@@ -82,7 +87,7 @@ class RandomCoil(object):
header = 'Residue:str Nucleus:str CS1:float CS2:float CS3:float CS4:float'
for row in csb.io.tsv.Table.from_tsv(cor, header):
- residue = pu.Enum.parsename(SequenceAlphabets.Protein, row[0])
+ residue = pu.Enum.parsename(ProteinAlphabet, row[0])
nucleus = row[1]
values = row[2:]
@@ -111,11 +116,11 @@ class RandomCoil(object):
try:
if isinstance(residue, pu.string):
if len(residue) == 1:
- residue = pu.Enum.parse(SequenceAlphabets.Protein, residue)
+ residue = pu.Enum.parse(ProteinAlphabet, residue)
else:
- residue = pu.Enum.parsename(SequenceAlphabets.Protein, residue)
+ residue = pu.Enum.parsename(ProteinAlphabet, residue)
else:
- if residue.enum is not SequenceAlphabets.Protein:
+ if residue.enum is not ProteinAlphabet:
raise TypeError(residue)
return value - self._reference[residue][nucleus]
@@ -165,4 +170,1090 @@ class RandomCoil(object):
continue
return shift
-
\ No newline at end of file
+
+
+class AtomConnectivity(object):
+
+ RESOURCES = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'resources')
+
+ _instance = None
+
+ @staticmethod
+ def get():
+ """
+ Get the current L{AtomConnectivity} instance (and create it if this
+ method is invoked for the first time).
+ @rtype: L{AtomConnectivity}
+ """
+ if AtomConnectivity._instance is None:
+ AtomConnectivity._instance = AtomConnectivity()
+ return AtomConnectivity._instance
+
+ def __init__(self):
+
+ self._table = {}
+ self._initialize()
+
+ def _initialize(self):
+
+ resource = os.path.join(AtomConnectivity.RESOURCES, 'AtomConnectivity.xml')
+ root = xml.dom.minidom.parse(resource)
+
+ for r in root.documentElement.getElementsByTagName('residue'):
+ residue = pu.Enum.parsename(ProteinAlphabet, r.getAttribute('type'))
+ self._table[residue] = {}
+
+ for a in r.getElementsByTagName('atom'):
+ atom = a.getAttribute('name')
+ self._table[residue][atom] = set()
+
+ for b in r.getElementsByTagName('bond'):
+ atom1 = b.getAttribute('atom1')
+ atom2 = b.getAttribute('atom2')
+ self._table[residue][atom1].add(atom2)
+ self._table[residue][atom2].add(atom1)
+
+ def connected(self, residue, atom1, atom2):
+ """
+ Return True if C{atom1} is covalently connected to C{atom2} in C{residue}
+
+ @param residue: residue type (a member of L{ProteinAlphabet})
+ @type residue: L{EnumItem}
+ @param atom1: first atom name (IUPAC)
+ @type atom1: str
+ @param atom2: second atom name (IUPAC)
+ @type atom2: str
+
+ @rtype: boolean
+ """
+ if residue in self._table:
+ r = self._table[residue]
+ if atom1 in r:
+ return atom2 in r[atom1]
+
+ return False
+
+ def connected_atoms(self, residue, atom):
+ """
+ Return all atoms covalently connected to C{atom} in C{residue}.
+
+ @param residue: residue type (a member of L{ProteinAlphabet})
+ @type residue: L{EnumItem}
+ @param atom: source atom name (IUPAC)
+ @type atom: str
+
+ @rtype: tuple of str
+ """
+ if residue in self._table:
+ r = self._table[residue]
+ if atom in r:
+ return tuple(r[atom])
+
+ return tuple()
+
+ def contains(self, residue, atom):
+ """
+ Return True if C{atom} name is contained in C{residue}.
+
+ @param residue: residue type (a member of L{ProteinAlphabet})
+ @type residue: L{EnumItem}
+ @param atom: atom name (IUPAC)
+ @type atom: str
+
+ @rtype: bool
+ """
+ if residue in self._table:
+ return atom in self._table[residue]
+
+ return False
+
+ def get_atoms(self, residue, prefix=''):
+ """
+ Get all atoms contained in C{residue}.
+
+ @param residue: residue type (a member of L{ProteinAlphabet})
+ @type residue: L{EnumItem}
+ @param prefix: atom name prefix wildcard (IUPAC)
+ @type prefix: str
+
+ @return: set of atom names
+ @rtype: frozenset of str
+ """
+ t = self._table[residue]
+ if residue in self._table:
+ return frozenset(a for a in t if a.startswith(prefix))
+
+ return frozenset()
+
+
+class Filters(object):
+ """
+ Pre-built atom filters for L{ContactMap}s.
+ """
+
+ @staticmethod
+ def ALL(a):
+ return True
+
+ @staticmethod
+ def HYDROGENS(a):
+ return a.element == ChemElements.H
+
+ @staticmethod
+ def CARBONS(a):
+ return a.element == ChemElements.C
+
+ @staticmethod
+ def CALPHAS(a):
+ return a.name == 'CA'
+
+class ContactMap(object):
+ """
+ Describes a protein contact map. Atoms positioned at distance below
+ a given cutoff are considered to be in contact.
+
+ @param chain: source protein chain
+ @type chain: L{csb.bio.structure.Chain}
+ @param cutoff: distance cutoff in angstroms
+ @type cutoff: float
+ @param filter: a callable with signature 'bool def(csb.bio.structure.Atom)',
+ invoked for every atom, which determines whether a given atom
+ should be skipped (False) or considered (True). See L{Filters}
+ @type filter: lambda
+ """
+
+ DISTANCE_CUTOFF = 6.0
+
+ @staticmethod
+ def load(filename):
+ """
+ Deserialize from a pickle.
+ """
+ with open(filename, 'rb') as stream:
+ return csb.io.Pickle.load(stream)
+
+ def __init__(self, chain, cutoff=DISTANCE_CUTOFF, filter=None):
+
+ self._cutoff = float(cutoff)
+ self._chain = chain
+ self._atoms = []
+ self._atomset = set()
+ self._map = {}
+ self._coords = {}
+
+ if filter is None:
+ filter = lambda i: True
+
+ for residue in chain.residues:
+ self._coords[residue.rank] = {}
+ atoms = [a for a in residue.items if filter(a)]
+
+ if len(atoms) == 0:
+ continue
+
+ step = 1.0 / len(atoms)
+ n = 0
+
+ for atom in atoms:
+ self._atoms.append(atom)
+ self._atomset.add(atom)
+ self._coords[residue.rank][atom.name] = residue.rank + n * step
+ n += 1
+
+ def __iter__(self):
+ return self.contacts
+
+ def __contains__(self, atom):
+ return atom in self._atomset
+
+ @property
+ def cutoff(self):
+ """
+ Distance cutoff in Angstroms
+ @rtype: float
+ """
+ return self._cutoff
+
+ @property
+ def chain(self):
+ """
+ Source protein chain
+ @rtype: L{Chain}
+ """
+ return self._chain
+
+ @property
+ def atoms(self):
+ """
+ All atoms involved in this map, sorted by residue number
+ @rtype: tuple of L{Atom}
+ """
+ return tuple(self._atoms)
+
+ @property
+ def contacts(self):
+ """
+ All atom contacts: an iterator over all contacting
+ (L{Atom}, L{Atom}) pairs.
+ @rtype: iterator of 2-tuples
+ """
+ visited = set()
+
+ for a1 in self._map:
+ for a2 in self._map[a1]:
+ if (a1, a2) not in visited:
+ visited.add((a1, a2))
+ visited.add((a2, a1))
+ yield (a1, a2)
+
+ def build(self):
+ """
+ Extract all contacts from the chain using the current distance cutoff.
+ """
+
+ self._map = {}
+
+ for atom1 in self._atoms:
+ for atom2 in self._atoms:
+ if atom1 is not atom2:
+ distance = numpy.linalg.norm(atom1.vector - atom2.vector)
+ if distance <= self._cutoff:
+ self._connect(atom1, atom2)
+
+ def connect(self, atom1, atom2):
+ """
+ Define a contact between C{atom1} and C{atom2}.
+
+ @param atom1: first atom
+ @type atom1: L{Atom}
+ @param atom2: second atom
+ @type atom2: L{Atom}
+ """
+ for atom in [atom1, atom2]:
+ if atom not in self._atomset:
+ raise ValueError("No such atom in contact map: {0}".format(atom))
+
+ self._connect(atom1, atom2)
+
+ def _connect(self, atom1, atom2):
+
+ if atom1 not in self._map:
+ self._map[atom1] = set()
+ self._map[atom1].add(atom2)
+
+ if atom2 not in self._map:
+ self._map[atom2] = set()
+ self._map[atom2].add(atom1)
+
+ def connected(self, atom1, atom2):
+ """
+ Return True if the specified atoms are in contact.
+
+ @param atom1: first atom
+ @type atom1: L{Atom}
+ @param atom2: second atom
+ @type atom2: L{Atom}
+ """
+ if atom1 in self._map:
+ return atom2 in self._map[atom1]
+ return False
+
+ def atom_contacts(self, atom):
+ """
+ Return all atoms within C{self.cutoff} angstroms of C{atom}.
+
+ @param atom: anchor atom
+ @type atom: L{Atom}
+
+ @rtype: frozenset of L{Atom}
+ """
+
+ if atom in self._map:
+ return frozenset(self._map[atom])
+ else:
+ return frozenset()
+
+ def residue_contacts(self, residue):
+ """
+ Return all residues, having neighboring atoms within C{self.cutoff}
+ angstroms from any of the C{residue}'s atoms.
+
+ @param residue: anchor residue
+ @type residue: L{Residue}
+
+ @rtype: frozenset of L{Residue}
+ """
+
+ partners = set()
+
+ for atom in residue.items:
+ if atom in self._map:
+ for partner in self._map[atom]:
+ partners.add(partner.residue)
+
+ return frozenset(partners)
+
+ def position(self, rank, atom_name):
+ """
+ Compute the location of C{atom} on the contact map.
+
+ @param rank: residue rank (1-based)
+ @type rank: int
+ @param atom_name: atom name
+ @type atom_name: str
+
+ @rtype: float
+ """
+ residue = self._chain.residues[rank]
+ atom = residue.atoms[atom_name]
+
+ try:
+ return self._coords[residue.rank][atom.name]
+ except KeyError:
+ msg = "No atom {0} at #{1} in contact map: {2}"
+ raise ValueError(msg.format(atom_name, rank, self._coords[residue.rank].values()))
+
+ def atom_matrix(self):
+ """
+ Build a 2D binary contact matrix (0=no contact, 1=contact). The order of elements
+ in each dimension will match the order of atoms in the contact map
+ (see L{ContactMap.atoms} and iter(L{ContactMap}). That means, the atoms in
+ each dimension are sorted by residue number first.
+
+ @deprecated: This method can be removed in future versions
+
+ @rtype: numpy.array (2D)
+ """
+
+ matrix = []
+
+ for i, atom1 in enumerate(self.atoms):
+ matrix.append([])
+
+ for atom2 in self.atoms:
+ if atom1 in self._map and atom2 in self._map[atom1]:
+ matrix[i].append(1)
+ else:
+ matrix[i].append(0)
+
+ return numpy.array(matrix)
+
+ def draw(self, plot, color="black"):
+ """
+ Visualize this contact map.
+
+ @param plot: L{csb.io.plots.Chart}'s plot to draw on
+ @type plot: matplotlib.AxesSubplot
+ @param color: pixel color (must be a matplotlib color constant)
+ @type color: str
+ """
+
+ x, y = [], []
+
+ for atom1 in self.atoms:
+ for atom2 in self.atom_contacts(atom1):
+ pos1 = self.position(atom1.residue.rank, atom1.name)
+ pos2 = self.position(atom2.residue.rank, atom2.name)
+
+ assert None not in (pos1, pos2), (atom1, atom2)
+ x.append(pos1)
+ y.append(pos2)
+
+ plot.plot(x, y, color=color, marker=",", linestyle='none')
+
+ plot.set_xlim(0, self.chain.length)
+ plot.set_ylim(0, self.chain.length)
+
+ return plot
+
+ @staticmethod
+ def compare(query, reference, min_distance=0):
+ """
+ Compare a query contact map against a reference.
+
+ @type query: L{ContactMap}
+ @type reference: L{ContactMap}
+
+ @param min_distance: consider only contacts between atoms, separated by
+ the given minimum number of residues
+ @type min_distance: int
+
+ @return: precision and coverage
+ @rtype: L{ContactMapComparisonInfo}
+ """
+ if query.chain is not reference.chain:
+ raise ValueError("Contact maps are not comparable")
+ if not query._map and not reference._map:
+ raise ValueError("Can't compare empty contact maps")
+
+ true_pos = 0.0
+ false_pos = 0.0
+ false_neg = 0.0
+
+ for a1, a2 in query.contacts:
+ if abs(a1.residue.rank - a2.residue.rank) >= min_distance:
+ if reference.connected(a1, a2):
+ true_pos += 1.0
+ else:
+ false_pos += 1.0
+
+ for a1, a2 in reference.contacts:
+ if abs(a1.residue.rank - a2.residue.rank) >= min_distance:
+ if not query.connected(a1, a2):
+ false_neg += 1.0
+
+ try:
+ precision = true_pos / (true_pos + false_pos)
+ coverage = true_pos / (true_pos + false_neg)
+ return ContactMapComparisonInfo(precision, coverage)
+
+ except ZeroDivisionError:
+ return ContactMapComparisonInfo(0, 0)
+
+class ContactMapComparisonInfo(object):
+
+ def __init__(self, precision, coverage):
+
+ self.precision = precision
+ self.coverage = coverage
+
+
+class Label(object):
+ """
+ Utility class for working with chemical shift labels.
+
+ @param residue: residue type
+ @type residue: L{EnumItem}
+ @param rank: residue position (1-based)
+ @type rank: int
+ @param atom_name: nucleus name
+ @type atom_name: str
+ """
+
+ @staticmethod
+ def build(residue_type, position, atom_name):
+ """
+ Build a new string label by specifying its components.
+ @rtype: str
+ """
+ return '{0!s}#{1}:{2}'.format(residue_type, position, atom_name)
+
+ @staticmethod
+ def from_shift(shift):
+ """
+ Build a new string label from a L{ChemShiftInfo}.
+ @rtype: str
+ """
+ return Label.build(shift.residue, shift.position, shift.name)
+
+ @staticmethod
+ def from_atom(atom):
+ """
+ Build a new string label from an L{Atom}.
+ @rtype: str
+ """
+ return Label.build(atom.residue.type, atom.residue.rank, atom.name)
+
+ @staticmethod
+ def match(shift, atom):
+ """
+ Return True if the labels of a L{ChemShiftInfo} and an L{Atom} match.
+ @rtype: bool
+ """
+
+ l = Label.from_shift(shift)
+ r = Label.from_atom(atom)
+
+ return r == l
+
+ @staticmethod
+ def get_atom(chain, label):
+ """
+ Get the L{Atom} in a L{Chain}, designated by a given string label.
+ @rtype: L{Atom}
+ """
+ dummy, rank, atom = Label.parse(label)
+ return chain.residues[rank].atoms[atom]
+
+ @staticmethod
+ def parse(label):
+ """
+ Parse the components of a string nucleus label.
+ @return: (residue, rank, atom)
+ @rtype: 3-tuple
+ """
+ parts = label.split("#")
+ residue = parts[0]
+
+ subparts = parts[1].split(":")
+ rank = int(subparts[0])
+ atom = subparts[1]
+
+ return (residue, rank, atom)
+
+ @staticmethod
+ def from_string(label):
+ """
+ Parse the a string nucleus label and create a new L{Label}.
+ @rtype: L{Label}
+ """
+ residue, rank, atom = Label.parse(label)
+ return Label(residue, rank, atom)
+
+ def __init__(self, residue, rank, atom_name):
+
+ self._residue = residue
+ self._rank = rank
+ self._atom = atom_name
+
+ @property
+ def residue(self):
+ """
+ Residue type (a L{ProteinAlphabet} member)
+ """
+ return self._residue
+
+ @property
+ def rank(self):
+ """
+ Residue rank (1-based)
+ """
+ return self._rank
+
+ @property
+ def atom_name(self):
+ """
+ Nucleus name
+ """
+ return self._atom
+
+ def __str__(self):
+ return Label.build(self._residue, self._rank, self._atom)
+
+
+class ChemShiftInfo(object):
+ """
+ Chemical shift struct.
+
+ @param position: residue rank (1-based)
+ @type position: int
+ @param residue: amino acid type (a member of L{ProteinAlphabet})
+ @type residue: str or L{EnumItem}
+ @param name: nucleus label
+ @type name: str
+ @param element: nucleus type (a member of L{ChemElements})
+ @type element: str or L{EnumItem}
+ @param shift: chemical shift value
+ @type shift: float
+ """
+
+ def __init__(self, position, residue, name, element, shift):
+
+ if not isinstance(residue, pu.EnumItem) or residue.enum is not ProteinAlphabet:
+ residue = pu.Enum.parsename(ProteinAlphabet, str(residue))
+
+ if not isinstance(element, pu.EnumItem) or element.enum is not ChemElements:
+ element = pu.Enum.parsename(ChemElements, str(element))
+
+ self.position = int(position)
+ self.residue = residue
+ self.name = str(name)
+ self.element = element
+ self.shift = float(shift)
+
+ def clone(self, name):
+ """
+ Clone the current shift and create a new one with the specified
+ nucleus label.
+
+ @rtype: L{ChemShiftInfo}
+ """
+ ni = self
+ return ChemShiftInfo(ni.position, repr(ni.residue), name, repr(ni.element), ni.shift)
+
+ def __str__(self):
+ return "{0!s}#{1}:{2}".format(self.residue, self.position, self.name)
+
+ @property
+ def label(self):
+ """
+ String label representation
+ @rtype: str
+ """
+ return str(self)
+
+class ChemicalShiftNetwork(object):
+ """
+ Describes a network of covalently connected, chemical shift visible nuclei.
+
+ @param shifts: chemical shift instances
+ @type shifts: iterable of L{ChemShiftInfo}
+ """
+
+ def __init__(self, shifts):
+
+ self._neighbors = {}
+
+ labels = {}
+
+ for cs in shifts:
+ self._neighbors[cs] = set()
+ id = Label.from_shift(cs)
+ labels[id] = cs
+
+ conn = AtomConnectivity.get()
+
+ for cs in shifts:
+ for atom_name in conn.connected_atoms(cs.residue, cs.name):
+ target = Label.build(cs.residue, cs.position, atom_name)
+ if target in labels:
+ self.connect(cs, labels[target])
+
+ def connect(self, cs1, cs2):
+ """
+ Connect two nuclei.
+
+ @param cs1: first chemical shift instance
+ @type cs1: L{ChemShiftInfo}
+ @param cs2: second chemical shift instance
+ @type cs2: L{ChemShiftInfo}
+ """
+
+ try:
+ self._neighbors[cs1].add(cs2)
+ self._neighbors[cs2].add(cs1)
+ except KeyError:
+ raise ValueError("Unknown chemical shift")
+
+ def connected_shifts(self, source, element=None):
+ """
+ Return an iterator over all covalently connected neuclei to a given
+ C{source}.
+
+ @param source: source chemical shift
+ @type source: L{ChemShiftInfo}
+
+ @rtype: iterator of L{ChemShiftInfo}
+ """
+
+
+ if source not in self._neighbors:
+ raise ValueError("No such chemical shift in this network")
+
+ for cs in self._neighbors[source]:
+ if element is None or cs.element == element:
+ yield cs
+
+ def __iter__(self):
+ return iter(self._neighbors)
+
+class ChemShiftScoringModel(object):
+ """
+ Chemical shift similarity scoring model. See C{ScoringModel.NUCLEI} for
+ a list of supported chemical shift types.
+ """
+
+ NUCLEI = ('CA', 'CB', 'C', 'N', 'HA')
+
+
+ def __init__(self):
+
+ self._pos = {}
+ self._neg = {}
+
+ self._pos['CA'] = GeneralizedNormal(0.02, 1.32, 1.1)
+ self._neg['CA'] = GeneralizedNormal(-0.08, 4.23, 2.2)
+
+ self._pos['CB'] = GeneralizedNormal(0.06, 1.32, 1.0)
+ self._neg['CB'] = GeneralizedNormal(0.08, 2.41, 1.2)
+
+ self._pos['C'] = GeneralizedNormal(0.12, 1.52, 1.4)
+ self._neg['C'] = GeneralizedNormal(-0.13, 3.42, 2.1)
+
+ self._pos['N'] = GeneralizedNormal(0.23, 4.39, 1.4)
+ self._neg['N'] = GeneralizedNormal(0.17, 7.08, 1.9)
+
+ self._pos['HA'] = GeneralizedNormal(0.00, 0.27, 1.0)
+ self._neg['HA'] = GeneralizedNormal(-0.01, 0.66, 1.4)
+
+ assert set(self._pos) == set(ChemShiftScoringModel.NUCLEI)
+ assert set(self._neg) == set(ChemShiftScoringModel.NUCLEI)
+
+ def positive(self, nucleus, deltas):
+ """
+ Return the probability that a given chemical shift difference
+ indicates structural similarity (true positive match).
+
+ @param nucleus: chemical shift (a member of C{ScoringModel.NUCLEI})
+ @type nucleus: str
+ @param deltas: chemical shift difference(s): q-s
+ @type deltas: float or list of floats
+
+ @return: the raw value of the probability density function
+ @rtype: float or array of floats
+ """
+ results = self._pos[nucleus].evaluate([deltas])
+ return results[0]
+
+ def negative(self, nucleus, deltas):
+ """
+ Return the probability that a given chemical shift difference
+ indicates no structural similarity (true negative match).
+
+ @param nucleus: chemical shift (a member of C{ScoringModel.NUCLEI})
+ @type nucleus: str
+ @param deltas: chemical shift difference(s): q-s
+ @type deltas: float or list of floats
+
+ @return: the raw value of the probability density function
+ @rtype: float or array of floats
+ """
+ results = self._neg[nucleus].evaluate([deltas])
+ return results[0]
+
+ def score(self, nucleus, deltas):
+ """
+ Return the bit score for a given chemical shift difference.
+
+ @param nucleus: chemical shift (a member of C{ScoringModel.NUCLEI})
+ @type nucleus: str
+ @param deltas: chemical shift difference(s): q-s
+ @type deltas: float or list of floats
+
+ @return: bit score
+ @rtype: float or array of floats
+ """
+ pos = self.positive(nucleus, deltas)
+ neg = self.negative(nucleus, deltas)
+
+ return numpy.log2(pos / neg)
+
+
+class NOEPeak(object):
+ """
+ Describes a single NOE peak.
+
+ @param intensity: peak intensity
+ @type intensity: float
+ @param dimensions: list of dimension values
+ @type dimensions: iterable of float
+ @param spectrum: owning NOE spectrum
+ @type spectrum: L{NOESpectrum}
+ """
+
+ def __init__(self, intensity, dimensions, spectrum):
+
+ self._dimensions = list(dimensions)
+ self._intensity = float(intensity)
+ self._spectrum = spectrum
+
+ @property
+ def intensity(self):
+ """
+ Peak intensity
+ @rtype: float
+ """
+ return self._intensity
+
+ @property
+ def num_dimensions(self):
+ """
+ Number of dimensions
+ @rtype: int
+ """
+ return len(self._dimensions)
+
+ def has_element(self, e):
+ """
+ Return True if the owning spectrum contains a dimension of the specified type
+
+ @param e: element (dimension) type (see L{ChemElements})
+ @type e: L{EnumItem}
+
+ @rtype: bool
+ """
+ return self._spectrum.has_element(e)
+
+ def __getitem__(self, column):
+ return self.get(column)
+
+ def __iter__(self):
+ return iter(self._dimensions)
+
+ def __str__(self):
+ return '<NOEPeak: {0}, I={1}>'.format(self._dimensions, self._intensity)
+
+ def element(self, i):
+ """
+ Return the dimension (nucleus) type at dimension index i
+
+ @param i: dimension index (0-based)
+ @type i: int
+
+ @return: nucleus type
+ @rtype: L{EnumItem}
+ """
+ return self._spectrum.element(i)
+
+ def get(self, column):
+ """
+ Get the value of the specified dimension.
+
+ @param column: dimension index (0-based)
+ @type column: int
+
+ @return: dimension value
+ @rtype: float
+ """
+ if 0 <= column < len(self._dimensions):
+ return self._dimensions[column]
+ else:
+ raise IndexError("Dimension index out of range")
+
+ def has_connected_dimensions(self, i):
+ """
+ Return True of dimension index C{i} has covalently connected dimensions.
+
+ @param i: dimension index (0-based)
+ @type i: int
+
+ @rtype: bool
+ """
+ return self._spectrum.has_connected_dimensions(i)
+
+ def connected_dimensions(self, i):
+ """
+ Return a list of all dimension indices, covalently connected to
+ dimension C{i}.
+
+ @param i: dimension index (0-based)
+ @type i: int
+
+ @rtype: iterable of L{EnumItem}
+ """
+ return self._spectrum.connected_dimensions(i)
+
+
+class NOESpectrum(object):
+ """
+ Describes an NOE spectrum.
+
+ @param elements: list of dimension (nucleus) types for each dimension
+ @type elements: iterable of L{EnumItem} (L{ChemElements}) or str
+ """
+ def __init__(self, elements):
+
+ self._elements = []
+ self._elemset = set()
+ self._connected = {}
+ self._protondim = set()
+ self._peaks = []
+ self._min = float("inf")
+ self._max = float("-inf")
+
+ for i, n in enumerate(elements):
+
+ if not isinstance(n, pu.EnumItem) or n.enum is not ChemElements:
+ element = pu.Enum.parsename(ChemElements, n)
+ else:
+ element = n
+ self._elements.append(element)
+
+ if element == ChemElements.H:
+ self._protondim.add(i)
+
+ self._elemset = set(self._elements)
+
+ @staticmethod
+ def join(spectrum, *spectra):
+ """
+ Merge multiple L{NOESpectrum} instances. All C{spectra} must have matching
+ dimensions according to the master C{spectrum}.
+
+ @return: merged spectrum
+ @rtype: L{NOESpectrum}
+ """
+ elements = tuple(spectrum.dimensions)
+ joint = NOESpectrum(map(repr, elements))
+
+ for i, dummy in enumerate(elements):
+ for j in spectrum.connected_dimensions(i):
+ joint.connect(i, j)
+
+ for s in [spectrum] + list(spectra):
+ if tuple(s.dimensions) != elements:
+ raise ValueError("Incompatible spectrum: {0}".format(s))
+ for p in s:
+ joint.add(p.intensity, list(p))
+
+ return joint
+
+
+ def __iter__(self):
+ return iter(self._peaks)
+
+ def __len__(self):
+ return len(self._peaks)
+
+ def __str__(self):
+ return '<NOESpectrum: {0}>'.format(self._elements)
+
+ def __getitem__(self, i):
+ try:
+ return self._peaks[i]
+ except IndexError:
+ raise IndexError("Peak index out of range")
+
+ @property
+ def min_intensity(self):
+ """
+ Minimum intensity
+ @rtype: float
+ """
+ return self._min
+
+ @property
+ def max_intensity(self):
+ """
+ Maximum intensity
+ @rtype: float
+ """
+ return self._max
+
+ @property
+ def dimensions(self):
+ """
+ Tuple of all dimensions (nucleus types)
+ @rtype: tuple of L{EnumItem}
+ """
+ return tuple(self._elements)
+
+ @property
+ def proton_dimensions(self):
+ """
+ Tuple of all proton dimension indices
+ @rtype: tuple of int
+ """
+ return tuple(self._protondim)
+
+ @property
+ def num_dimensions(self):
+ """
+ Number of dimensions
+ @rtype: int
+ """
+ return len(self._elements)
+
+ @property
+ def num_proton_dimensions(self):
+ """
+ Number of proton dimensions
+ @rtype: int
+ """
+ return len(self._protondim)
+
+ def has_element(self, e):
+ """
+ Return True if the spectrum contains a dimension of the specified type
+
+ @param e: element (dimension) type (see L{ChemElements})
+ @type e: L{EnumItem}
+
+ @rtype: bool
+ """
+ return e in self._elemset
+
+ def connect(self, i1, i2):
+ """
+ Mark dimensions with indices C{i1} and C{i2} as covalently connected.
+
+ @param i1: dimension index 1 (0-based)
+ @type i1: int
+ @param i2: dimension index 2 (0-based)
+ @type i2: int
+ """
+
+ for i in [i1, i2]:
+ if not 0 <= i < self.num_dimensions:
+ raise IndexError("Dimension index out of range")
+
+ if i1 == i2:
+ raise ValueError("Can't connect a dimension to itself")
+ if not self._can_connect(i1, i2):
+ raise ValueError("Only proton-nonproton bonds are allowed")
+
+ self._connected.setdefault(i1, set()).add(i2)
+ self._connected.setdefault(i2, set()).add(i1)
+
+ def _can_connect(self, i1, i2):
+
+ pair = set()
+
+ for i in [i1, i2]:
+ is_proton = self.element(i) == ChemElements.H
+ pair.add(is_proton)
+
+ if True in pair and False in pair:
+ return True
+
+ return False
+
+ def has_connected_dimensions(self, i):
+ """
+ Return True of dimension index C{i} has covalently connected dimensions.
+
+ @param i: dimension index (0-based)
+ @type i: int
+
+ @rtype: bool
+ """
+ if i in self._connected:
+ return len(self._connected[i]) > 0
+
+ return False
+
+ def connected_dimensions(self, i):
+ """
+ Return a list of all dimension indices, covalently connected to
+ dimension C{i}.
+
+ @param i: dimension index (0-based)
+ @type i: int
+
+ @rtype: iterable of int
+ """
+ if i in self._connected:
+ return tuple(self._connected[i])
+
+ return tuple()
+
+ def add(self, intensity, dimensions):
+ """
+ Add a new NOE peak.
+
+ @param intensity: peak intensity
+ @type intensity: float
+ @param dimensions: list of dimension values
+ @param dimensions: iterable of float
+ """
+
+ dimensions = list(dimensions)
+ if len(dimensions) != self.num_dimensions:
+ raise ValueError("Invalid number of dimensions")
+
+ peak = NOEPeak(intensity, dimensions, self)
+ self._peaks.append(peak)
+
+ if peak.intensity < self._min:
+ self._min = peak.intensity
+ if peak.intensity > self._max:
+ self._max = peak.intensity
+
+ def element(self, i):
+ """
+ Return the chemical element (nucleus) type at dimension index C{i}.
+ @rtype: L{EnumItem}
+ """
+ return self._elements[i]
+
+
diff --git a/csb/bio/nmr/resources/AtomConnectivity.xml b/csb/bio/nmr/resources/AtomConnectivity.xml
new file mode 100644
index 0000000..b1f0f34
--- /dev/null
+++ b/csb/bio/nmr/resources/AtomConnectivity.xml
@@ -0,0 +1,812 @@
+<!DOCTYPE connectivity SYSTEM "connectivity1.0.dtd">
+<connectivity>
+ <residue type="ALA">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="H"/>
+ <atom name="H1"/>
+ <atom name="H2"/>
+ <atom name="H3"/>
+ <atom name="HA"/>
+ <atom name="HB1"/>
+ <atom name="HB2"/>
+ <atom name="HB3"/>
+ <atom name="N"/>
+ <atom name="O"/>
+ <atom name="O'"/>
+ <atom name="O''"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="C" atom2="O'"/>
+ <bond atom1="C" atom2="O''"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="HB1"/>
+ <bond atom1="CB" atom2="HB2"/>
+ <bond atom1="CB" atom2="HB3"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <bond atom1="N" atom2="H1"/>
+ <bond atom1="N" atom2="H2"/>
+ <bond atom1="N" atom2="H3"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="HB1"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="ARG">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="CD"/>
+ <atom name="CG"/>
+ <atom name="CZ"/>
+ <atom name="H"/>
+ <atom name="HA"/>
+ <atom name="HB2"/>
+ <atom name="HB3"/>
+ <atom name="HD2"/>
+ <atom name="HD3"/>
+ <atom name="HE"/>
+ <atom name="HG2"/>
+ <atom name="HG3"/>
+ <atom name="HH11"/>
+ <atom name="HH21"/>
+ <atom name="HH22"/>
+ <atom name="N"/>
+ <atom name="NE"/>
+ <atom name="NH1"/>
+ <atom name="NH2"/>
+ <atom name="O"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="CG"/>
+ <bond atom1="CB" atom2="HB2"/>
+ <bond atom1="CB" atom2="HB3"/>
+ <bond atom1="CD" atom2="HD2"/>
+ <bond atom1="CD" atom2="HD3"/>
+ <bond atom1="CD" atom2="NE"/>
+ <bond atom1="CG" atom2="CD"/>
+ <bond atom1="CG" atom2="HG2"/>
+ <bond atom1="CG" atom2="HG3"/>
+ <bond atom1="CZ" atom2="NH1"/>
+ <bond atom1="CZ" atom2="NH2"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <bond atom1="NE" atom2="CZ"/>
+ <bond atom1="NE" atom2="HE"/>
+ <bond atom1="NH1" atom2="HH11"/>
+ <bond atom1="NH2" atom2="HH21"/>
+ <bond atom1="NH2" atom2="HH22"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="CG"/>
+ <dihedral name="chi2" atom1="CA" atom2="CB" atom3="CG" atom4="CD"/>
+ <dihedral name="chi3" atom1="CB" atom2="CG" atom3="CD" atom4="NE"/>
+ <dihedral name="chi4" atom1="CG" atom2="CD" atom3="NE" atom4="CZ"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="ASN">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="CG"/>
+ <atom name="H"/>
+ <atom name="HA"/>
+ <atom name="HB2"/>
+ <atom name="HB3"/>
+ <atom name="HD21"/>
+ <atom name="HD22"/>
+ <atom name="N"/>
+ <atom name="ND2"/>
+ <atom name="O"/>
+ <atom name="OD1"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="CG"/>
+ <bond atom1="CB" atom2="HB2"/>
+ <bond atom1="CB" atom2="HB3"/>
+ <bond atom1="CG" atom2="ND2"/>
+ <bond atom1="CG" atom2="OD1"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <bond atom1="ND2" atom2="HD21"/>
+ <bond atom1="ND2" atom2="HD22"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="CG"/>
+ <dihedral name="chi2" atom1="CA" atom2="CB" atom3="CG" atom4="ND2"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="ASP">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="CG"/>
+ <atom name="H"/>
+ <atom name="HA"/>
+ <atom name="HB2"/>
+ <atom name="HB3"/>
+ <atom name="HD2"/>
+ <atom name="N"/>
+ <atom name="O"/>
+ <atom name="OD1"/>
+ <atom name="OD2"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="CG"/>
+ <bond atom1="CB" atom2="HB2"/>
+ <bond atom1="CB" atom2="HB3"/>
+ <bond atom1="CG" atom2="OD1"/>
+ <bond atom1="CG" atom2="OD2"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <bond atom1="OD2" atom2="HD2"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="CG"/>
+ <dihedral name="chi2" atom1="CA" atom2="CB" atom3="CG" atom4="OD1"/>
+ <dihedral name="chi32" atom1="CB" atom2="CG" atom3="OD2" atom4="HD2"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="CYS">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="H"/>
+ <atom name="HA"/>
+ <atom name="HB2"/>
+ <atom name="HB3"/>
+ <atom name="HG"/>
+ <atom name="N"/>
+ <atom name="O"/>
+ <atom name="SG"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="CB" atom2="SG"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CB" atom2="HB2"/>
+ <bond atom1="CB" atom2="HB3"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="N" atom2="H"/>
+ <bond atom1="SG" atom2="HG"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="SG"/>
+ <dihedral name="chi2" atom1="CA" atom2="CB" atom3="SG" atom4="HG"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="GLN">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="CD"/>
+ <atom name="CG"/>
+ <atom name="H"/>
+ <atom name="HA"/>
+ <atom name="HB2"/>
+ <atom name="HB3"/>
+ <atom name="HE21"/>
+ <atom name="HE22"/>
+ <atom name="HG2"/>
+ <atom name="HG3"/>
+ <atom name="N"/>
+ <atom name="NE2"/>
+ <atom name="O"/>
+ <atom name="OE1"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="CG"/>
+ <bond atom1="CB" atom2="HB2"/>
+ <bond atom1="CB" atom2="HB3"/>
+ <bond atom1="CD" atom2="NE2"/>
+ <bond atom1="CD" atom2="OE1"/>
+ <bond atom1="CG" atom2="CD"/>
+ <bond atom1="CG" atom2="HG2"/>
+ <bond atom1="CG" atom2="HG3"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <bond atom1="NE2" atom2="HE21"/>
+ <bond atom1="NE2" atom2="HE22"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="CG"/>
+ <dihedral name="chi2" atom1="CA" atom2="CB" atom3="CG" atom4="CD"/>
+ <dihedral name="chi3" atom1="CB" atom2="CG" atom3="CD" atom4="NE2"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="GLU">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="CD"/>
+ <atom name="CG"/>
+ <atom name="H"/>
+ <atom name="HA"/>
+ <atom name="HB2"/>
+ <atom name="HB3"/>
+ <atom name="HE2"/>
+ <atom name="HG2"/>
+ <atom name="HG3"/>
+ <atom name="N"/>
+ <atom name="O"/>
+ <atom name="OE1"/>
+ <atom name="OE2"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="CG"/>
+ <bond atom1="CB" atom2="HB2"/>
+ <bond atom1="CB" atom2="HB3"/>
+ <bond atom1="CD" atom2="OE1"/>
+ <bond atom1="CD" atom2="OE2"/>
+ <bond atom1="CG" atom2="CD"/>
+ <bond atom1="CG" atom2="HG2"/>
+ <bond atom1="CG" atom2="HG3"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <bond atom1="OE2" atom2="HE2"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="CG"/>
+ <dihedral name="chi2" atom1="CA" atom2="CB" atom3="CG" atom4="CD"/>
+ <dihedral name="chi3" atom1="CB" atom2="CG" atom3="CD" atom4="OE1"/>
+ <dihedral name="chi42" atom1="CG" atom2="CD" atom3="OE2" atom4="HE2"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="GLY">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="H"/>
+ <atom name="HA2"/>
+ <atom name="HA3"/>
+ <atom name="N"/>
+ <atom name="O"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="HA3"/>
+ <bond atom1="CA" atom2="HA2"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="HIS">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="CD2"/>
+ <atom name="CE1"/>
+ <atom name="CG"/>
+ <atom name="H"/>
+ <atom name="HA"/>
+ <atom name="HB2"/>
+ <atom name="HB3"/>
+ <atom name="HD1"/>
+ <atom name="HD2"/>
+ <atom name="HE1"/>
+ <atom name="N"/>
+ <atom name="ND1"/>
+ <atom name="NE2"/>
+ <atom name="O"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="CG"/>
+ <bond atom1="CB" atom2="HB2"/>
+ <bond atom1="CB" atom2="HB3"/>
+ <bond atom1="CD2" atom2="HD2"/>
+ <bond atom1="CD2" atom2="CG"/>
+ <bond atom1="CE1" atom2="HE1"/>
+ <bond atom1="CE1" atom2="NE2"/>
+ <bond atom1="CG" atom2="ND1"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <bond atom1="ND1" atom2="CE1"/>
+ <bond atom1="ND1" atom2="HD1"/>
+ <bond atom1="NE2" atom2="CD2"/>
+ <cut atom1="CD2" atom2="CG"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="CG"/>
+ <dihedral name="chi2" atom1="CA" atom2="CB" atom3="CG" atom4="ND1"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="ILE">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="CD1"/>
+ <atom name="CG1"/>
+ <atom name="CG2"/>
+ <atom name="H"/>
+ <atom name="H1"/>
+ <atom name="H2"/>
+ <atom name="H3"/>
+ <atom name="HA"/>
+ <atom name="HB"/>
+ <atom name="HD11"/>
+ <atom name="HD12"/>
+ <atom name="HD13"/>
+ <atom name="HG12"/>
+ <atom name="HG13"/>
+ <atom name="HG21"/>
+ <atom name="HG22"/>
+ <atom name="HG23"/>
+ <atom name="N"/>
+ <atom name="O"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="CG1"/>
+ <bond atom1="CB" atom2="CG2"/>
+ <bond atom1="CB" atom2="HB"/>
+ <bond atom1="CD1" atom2="HD11"/>
+ <bond atom1="CD1" atom2="HD12"/>
+ <bond atom1="CD1" atom2="HD13"/>
+ <bond atom1="CG1" atom2="CD1"/>
+ <bond atom1="CG1" atom2="HG12"/>
+ <bond atom1="CG1" atom2="HG13"/>
+ <bond atom1="CG2" atom2="HG21"/>
+ <bond atom1="CG2" atom2="HG22"/>
+ <bond atom1="CG2" atom2="HG23"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <bond atom1="N" atom2="H1"/>
+ <bond atom1="N" atom2="H2"/>
+ <bond atom1="N" atom2="H3"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="CG1"/>
+ <dihedral name="chi21" atom1="CA" atom2="CB" atom3="CG1" atom4="CD1"/>
+ <dihedral name="chi22" atom1="CA" atom2="CB" atom3="CG2" atom4="HG21"/>
+ <dihedral name="chi31" atom1="CB" atom2="CG1" atom3="CD1" atom4="HD11"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="LEU">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="CD1"/>
+ <atom name="CD2"/>
+ <atom name="CG"/>
+ <atom name="H"/>
+ <atom name="HA"/>
+ <atom name="HB2"/>
+ <atom name="HB3"/>
+ <atom name="HD11"/>
+ <atom name="HD12"/>
+ <atom name="HD13"/>
+ <atom name="HD21"/>
+ <atom name="HD22"/>
+ <atom name="HD23"/>
+ <atom name="HG"/>
+ <atom name="N"/>
+ <atom name="O"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="CG"/>
+ <bond atom1="CB" atom2="HB2"/>
+ <bond atom1="CB" atom2="HB3"/>
+ <bond atom1="CD1" atom2="HD11"/>
+ <bond atom1="CD1" atom2="HD12"/>
+ <bond atom1="CD1" atom2="HD13"/>
+ <bond atom1="CD2" atom2="HD21"/>
+ <bond atom1="CD2" atom2="HD22"/>
+ <bond atom1="CD2" atom2="HD23"/>
+ <bond atom1="CG" atom2="CD1"/>
+ <bond atom1="CG" atom2="CD2"/>
+ <bond atom1="CG" atom2="HG"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="CG"/>
+ <dihedral name="chi2" atom1="CA" atom2="CB" atom3="CG" atom4="CD1"/>
+ <dihedral name="chi31" atom1="CB" atom2="CG" atom3="CD1" atom4="HD11"/>
+ <dihedral name="chi32" atom1="CB" atom2="CG" atom3="CD2" atom4="HD21"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="LYS">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="CD"/>
+ <atom name="CE"/>
+ <atom name="CG"/>
+ <atom name="H"/>
+ <atom name="HA"/>
+ <atom name="HB2"/>
+ <atom name="HB3"/>
+ <atom name="HD2"/>
+ <atom name="HD3"/>
+ <atom name="HE2"/>
+ <atom name="HE3"/>
+ <atom name="HG2"/>
+ <atom name="HG3"/>
+ <atom name="HZ1"/>
+ <atom name="HZ2"/>
+ <atom name="N"/>
+ <atom name="NZ"/>
+ <atom name="O"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="CG"/>
+ <bond atom1="CB" atom2="HB2"/>
+ <bond atom1="CB" atom2="HB3"/>
+ <bond atom1="CD" atom2="CE"/>
+ <bond atom1="CD" atom2="HD2"/>
+ <bond atom1="CD" atom2="HD3"/>
+ <bond atom1="CE" atom2="HE2"/>
+ <bond atom1="CE" atom2="HE3"/>
+ <bond atom1="CE" atom2="NZ"/>
+ <bond atom1="CG" atom2="CD"/>
+ <bond atom1="CG" atom2="HG2"/>
+ <bond atom1="CG" atom2="HG3"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <bond atom1="NZ" atom2="HZ1"/>
+ <bond atom1="NZ" atom2="HZ2"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="CG"/>
+ <dihedral name="chi2" atom1="CA" atom2="CB" atom3="CG" atom4="CD"/>
+ <dihedral name="chi3" atom1="CB" atom2="CG" atom3="CD" atom4="CE"/>
+ <dihedral name="chi4" atom1="CG" atom2="CD" atom3="CE" atom4="NZ"/>
+ <dihedral name="chi5" atom1="CD" atom2="CE" atom3="NZ" atom4="HZ1"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="MET">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="CE"/>
+ <atom name="CG"/>
+ <atom name="H"/>
+ <atom name="HA"/>
+ <atom name="HB2"/>
+ <atom name="HB3"/>
+ <atom name="HE1"/>
+ <atom name="HE2"/>
+ <atom name="HE3"/>
+ <atom name="HG2"/>
+ <atom name="HG3"/>
+ <atom name="N"/>
+ <atom name="O"/>
+ <atom name="SD"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="CG"/>
+ <bond atom1="CB" atom2="HB2"/>
+ <bond atom1="CB" atom2="HB3"/>
+ <bond atom1="CE" atom2="HE1"/>
+ <bond atom1="CE" atom2="HE2"/>
+ <bond atom1="CE" atom2="HE3"/>
+ <bond atom1="CG" atom2="HG2"/>
+ <bond atom1="CG" atom2="HG3"/>
+ <bond atom1="CG" atom2="SD"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <bond atom1="SD" atom2="CE"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="CG"/>
+ <dihedral name="chi2" atom1="CA" atom2="CB" atom3="CG" atom4="SD"/>
+ <dihedral name="chi3" atom1="CB" atom2="CG" atom3="SD" atom4="CE"/>
+ <dihedral name="chi4" atom1="CG" atom2="SD" atom3="CE" atom4="HE1"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="PHE">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="CD1"/>
+ <atom name="CD2"/>
+ <atom name="CE1"/>
+ <atom name="CE2"/>
+ <atom name="CG"/>
+ <atom name="CZ"/>
+ <atom name="H"/>
+ <atom name="HA"/>
+ <atom name="HB2"/>
+ <atom name="HB3"/>
+ <atom name="HD1"/>
+ <atom name="HD2"/>
+ <atom name="HE1"/>
+ <atom name="HE2"/>
+ <atom name="HZ"/>
+ <atom name="N"/>
+ <atom name="O"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="CG"/>
+ <bond atom1="CB" atom2="HB2"/>
+ <bond atom1="CB" atom2="HB3"/>
+ <bond atom1="CD1" atom2="CE1"/>
+ <bond atom1="CD1" atom2="HD1"/>
+ <bond atom1="CD2" atom2="HD2"/>
+ <bond atom1="CD2" atom2="CG"/>
+ <bond atom1="CE1" atom2="CZ"/>
+ <bond atom1="CE1" atom2="HE1"/>
+ <bond atom1="CE2" atom2="CD2"/>
+ <bond atom1="CE2" atom2="HE2"/>
+ <bond atom1="CG" atom2="CD1"/>
+ <bond atom1="CZ" atom2="CE2"/>
+ <bond atom1="CZ" atom2="HZ"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <cut atom1="CD2" atom2="CG"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="CG"/>
+ <dihedral name="chi2" atom1="CA" atom2="CB" atom3="CG" atom4="CD1"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="PRO">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="CD"/>
+ <atom name="CG"/>
+ <atom name="HA"/>
+ <atom name="HB2"/>
+ <atom name="HB3"/>
+ <atom name="HD2"/>
+ <atom name="HD3"/>
+ <atom name="HG2"/>
+ <atom name="HG3"/>
+ <atom name="N"/>
+ <atom name="O"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="HB2"/>
+ <bond atom1="CB" atom2="HB3"/>
+ <bond atom1="CD" atom2="CG"/>
+ <bond atom1="CD" atom2="HD2"/>
+ <bond atom1="CD" atom2="HD3"/>
+ <bond atom1="CG" atom2="CB"/>
+ <bond atom1="CG" atom2="HG2"/>
+ <bond atom1="CG" atom2="HG3"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="CD"/>
+ <cut atom1="CA" atom2="CB"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ </residue>
+ <residue type="SER">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="H"/>
+ <atom name="HA"/>
+ <atom name="HB2"/>
+ <atom name="HB3"/>
+ <atom name="HG"/>
+ <atom name="N"/>
+ <atom name="O"/>
+ <atom name="OG"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="HB2"/>
+ <bond atom1="CB" atom2="HB3"/>
+ <bond atom1="CB" atom2="OG"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <bond atom1="OG" atom2="HG"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="OG"/>
+ <dihedral name="chi2" atom1="CA" atom2="CB" atom3="OG" atom4="HG"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="THR">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="CG2"/>
+ <atom name="H"/>
+ <atom name="HA"/>
+ <atom name="HB"/>
+ <atom name="HG1"/>
+ <atom name="HG21"/>
+ <atom name="HG22"/>
+ <atom name="HG23"/>
+ <atom name="N"/>
+ <atom name="O"/>
+ <atom name="OG1"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="CG2"/>
+ <bond atom1="CB" atom2="HB"/>
+ <bond atom1="CB" atom2="OG1"/>
+ <bond atom1="CG2" atom2="HG21"/>
+ <bond atom1="CG2" atom2="HG22"/>
+ <bond atom1="CG2" atom2="HG23"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <bond atom1="OG1" atom2="HG1"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="OG1"/>
+ <dihedral name="chi21" atom1="CA" atom2="CB" atom3="OG1" atom4="HG1"/>
+ <dihedral name="chi22" atom1="CA" atom2="CB" atom3="CG2" atom4="HG21"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="TRP">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="CD1"/>
+ <atom name="CD2"/>
+ <atom name="CE2"/>
+ <atom name="CE3"/>
+ <atom name="CG"/>
+ <atom name="CH2"/>
+ <atom name="CZ2"/>
+ <atom name="CZ3"/>
+ <atom name="H"/>
+ <atom name="HA"/>
+ <atom name="HB2"/>
+ <atom name="HB3"/>
+ <atom name="HD1"/>
+ <atom name="HE1"/>
+ <atom name="HE3"/>
+ <atom name="HH2"/>
+ <atom name="HZ2"/>
+ <atom name="HZ3"/>
+ <atom name="N"/>
+ <atom name="NE1"/>
+ <atom name="O"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="CG"/>
+ <bond atom1="CB" atom2="HB2"/>
+ <bond atom1="CB" atom2="HB3"/>
+ <bond atom1="CD1" atom2="HD1"/>
+ <bond atom1="CD1" atom2="NE1"/>
+ <bond atom1="CE2" atom2="CZ2"/>
+ <bond atom1="CE3" atom2="CD2"/>
+ <bond atom1="CE3" atom2="HE3"/>
+ <bond atom1="CG" atom2="CD1"/>
+ <bond atom1="CD2" atom2="CG"/>
+ <bond atom1="CD2" atom2="CE2"/>
+ <bond atom1="CH2" atom2="CZ3"/>
+ <bond atom1="CH2" atom2="HH2"/>
+ <bond atom1="CZ2" atom2="CH2"/>
+ <bond atom1="CZ2" atom2="HZ2"/>
+ <bond atom1="CZ3" atom2="CE3"/>
+ <bond atom1="CZ3" atom2="HZ3"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <bond atom1="NE1" atom2="CE2"/>
+ <bond atom1="NE1" atom2="HE1"/>
+ <cut atom1="CD2" atom2="CG"/>
+ <cut atom1="CD2" atom2="CE2"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="CG"/>
+ <dihedral name="chi2" atom1="CA" atom2="CB" atom3="CG" atom4="CD1"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="TYR">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="CD1"/>
+ <atom name="CD2"/>
+ <atom name="CE1"/>
+ <atom name="CE2"/>
+ <atom name="CG"/>
+ <atom name="CZ"/>
+ <atom name="H"/>
+ <atom name="HA"/>
+ <atom name="HB2"/>
+ <atom name="HB3"/>
+ <atom name="HD1"/>
+ <atom name="HD2"/>
+ <atom name="HE1"/>
+ <atom name="HE2"/>
+ <atom name="HH"/>
+ <atom name="N"/>
+ <atom name="O"/>
+ <atom name="OH"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="CG"/>
+ <bond atom1="CB" atom2="HB2"/>
+ <bond atom1="CB" atom2="HB3"/>
+ <bond atom1="CD1" atom2="CE1"/>
+ <bond atom1="CD1" atom2="HD1"/>
+ <bond atom1="CD2" atom2="HD2"/>
+ <bond atom1="CD2" atom2="CG"/>
+ <bond atom1="CE1" atom2="CZ"/>
+ <bond atom1="CE1" atom2="HE1"/>
+ <bond atom1="CE2" atom2="CD2"/>
+ <bond atom1="CE2" atom2="HE2"/>
+ <bond atom1="CG" atom2="CD1"/>
+ <bond atom1="CZ" atom2="CE2"/>
+ <bond atom1="CZ" atom2="OH"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <bond atom1="OH" atom2="HH"/>
+ <cut atom1="CD2" atom2="CG"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="CG"/>
+ <dihedral name="chi2" atom1="CA" atom2="CB" atom3="CG" atom4="CD1"/>
+ <dihedral name="chi6" atom1="CE1" atom2="CZ" atom3="OH" atom4="HH"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+ <residue type="VAL">
+ <atom name="C"/>
+ <atom name="CA"/>
+ <atom name="CB"/>
+ <atom name="CG1"/>
+ <atom name="CG2"/>
+ <atom name="H"/>
+ <atom name="HA"/>
+ <atom name="HB"/>
+ <atom name="HG11"/>
+ <atom name="HG12"/>
+ <atom name="HG13"/>
+ <atom name="HG21"/>
+ <atom name="HG22"/>
+ <atom name="HG23"/>
+ <atom name="N"/>
+ <atom name="O"/>
+ <bond atom1="C" atom2="O"/>
+ <bond atom1="CA" atom2="C"/>
+ <bond atom1="CA" atom2="CB"/>
+ <bond atom1="CA" atom2="HA"/>
+ <bond atom1="CB" atom2="CG1"/>
+ <bond atom1="CB" atom2="CG2"/>
+ <bond atom1="CB" atom2="HB"/>
+ <bond atom1="CG1" atom2="HG11"/>
+ <bond atom1="CG1" atom2="HG12"/>
+ <bond atom1="CG1" atom2="HG13"/>
+ <bond atom1="CG2" atom2="HG21"/>
+ <bond atom1="CG2" atom2="HG22"/>
+ <bond atom1="CG2" atom2="HG23"/>
+ <bond atom1="N" atom2="CA"/>
+ <bond atom1="N" atom2="H"/>
+ <dihedral name="chi1" atom1="N" atom2="CA" atom3="CB" atom4="CG1"/>
+ <dihedral name="chi21" atom1="CA" atom2="CB" atom3="CG1" atom4="HG11"/>
+ <dihedral name="chi22" atom1="CA" atom2="CB" atom3="CG2" atom4="HG21"/>
+ <dihedral name="omega" atom1="O-" atom2="C-" atom3="N" atom4="CA"/>
+ <dihedral name="phi" atom1="C-" atom2="N" atom3="CA" atom4="C"/>
+ <dihedral name="psi" atom1="N" atom2="CA" atom3="C" atom4="N+"/>
+ </residue>
+</connectivity>
diff --git a/csb/bio/sequence/__init__.py b/csb/bio/sequence/__init__.py
index 650e17a..70b6313 100644
--- a/csb/bio/sequence/__init__.py
+++ b/csb/bio/sequence/__init__.py
@@ -129,6 +129,8 @@ class SequenceAlphabets(object):
SequenceTypes.DNA: NucleicAlphabet,
SequenceTypes.RNA: NucleicAlphabet,
SequenceTypes.Unknown: UnknownAlphabet }
+
+ ALL_ALPHABETS = set([ProteinAlphabet, NucleicAlphabet, UnknownAlphabet])
assert set(MAP) == csb.core.Enum.members(SequenceTypes)
@@ -136,8 +138,20 @@ class SequenceAlphabets(object):
def get(type):
"""
Get the alphabet corresponding to the specified sequence C{type}
+ @param type: a member of L{SequenceTypes}
+ @type type: L{csb.core.EnumItem}
+ @rtype: L{csb.core.enum}
+ """
+ return SequenceAlphabets.MAP[type]
+
+ @staticmethod
+ def contains(alphabet):
"""
- return SequenceAlphabets.MAP[type]
+ Return True if C{alphabet} is a sequence alphabet
+ @type alphabet: L{csb.core.enum}
+ @rtype: bool
+ """
+ return alphabet in SequenceAlphabets.ALL_ALPHABETS
class SequenceError(ValueError):
@@ -195,7 +209,7 @@ class ResidueInfo(object):
return self._type
@type.setter
def type(self, type):
- if type.enum not in (ProteinAlphabet, NucleicAlphabet, UnknownAlphabet):
+ if not SequenceAlphabets.contains(type.enum):
raise TypeError(type)
self._type = type
@@ -1313,5 +1327,6 @@ class A3MAlignment(AbstractAlignment):
@rtype: L{A3MAlignment}
"""
from csb.bio.io.fasta import SequenceAlignmentReader
- return SequenceAlignmentReader(strict=strict).read_a3m(string)
+ return SequenceAlignmentReader(strict=strict).read_a3m(string)
+
\ No newline at end of file
diff --git a/csb/bio/sequence/alignment.py b/csb/bio/sequence/alignment.py
new file mode 100644
index 0000000..6593997
--- /dev/null
+++ b/csb/bio/sequence/alignment.py
@@ -0,0 +1,617 @@
+"""
+Collection of sequence alignment algorithms.
+
+ at note: The classes in this module have been optimized for performance.
+ Think twice before switching a field to a generally nicer property
+ access, because it turns out that these things often add significant
+ constants to the running time of a dynamic programming algorithm.
+"""
+
+from csb.bio.sequence import AbstractSequence, SequenceAlignment, RichSequence, ResidueInfo
+from abc import ABCMeta, abstractmethod
+
+
+class ResidueNotFoundError(KeyError):
+ pass
+
+
+class AbstractScoringMatrix(object):
+ """
+ Defines a pairwise sequence alignment scoring function.
+ """
+
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def score(self, x, y):
+ """
+ Return the pairwise score of residues C{x} and C{y}.
+ C{x} and C{y} must be comparable (e.g. implement __eq__ and __hash__).
+
+ @param x: first residue
+ @type x: object
+ @param y: second residue
+ @type y: object
+
+ @rtype: float
+
+ @raise ResidueNotFoundError: if C{x} or C{y} cannot be handled by this
+ scoring matrix
+ """
+ pass
+
+class IdentityMatrix(AbstractScoringMatrix):
+ """
+ Simple identity-based scoring matrix.
+
+ @param match: score for a match
+ @type match: float
+ @param mismatch: score for a mismatch
+ @type mismatch: float
+ """
+
+ def __init__(self, match=1, mismatch=-1):
+
+ self._match = float(match)
+ self._mismatch = float(mismatch)
+
+ @property
+ def match(self):
+ """
+ Score for a match
+ @rtype: float
+ """
+ return self._match
+
+ @property
+ def mismatch(self):
+ """
+ Score for a mismatch
+ @rtype: float
+ """
+ return self._mismatch
+
+ def score(self, x, y):
+
+ if x == y:
+ return self._match
+ else:
+ return self._mismatch
+
+class SimilarityMatrix(AbstractScoringMatrix):
+ """
+ Similarity scoring matrix.
+
+ @param matrix:
+ """
+
+ BLOSUM62 = {
+ 'A': { 'A': 4.0, 'R':-1.0, 'N':-2.0, 'D':-2.0, 'C': 0.0, 'Q':-1.0, 'E':-1.0, 'G': 0.0, 'H':-2.0, 'I':-1.0, 'L':-1.0, 'K':-1.0, 'M':-1.0, 'F':-2.0, 'P':-1.0, 'S': 1.0, 'T': 0.0, 'W':-3.0, 'Y':-2.0, 'V': 0.0, 'B':-2.0, 'Z':-1.0, 'X': 0.0, '*':-4.0 },
+ 'R': { 'A':-1.0, 'R': 5.0, 'N': 0.0, 'D':-2.0, 'C':-3.0, 'Q': 1.0, 'E': 0.0, 'G':-2.0, 'H': 0.0, 'I':-3.0, 'L':-2.0, 'K': 2.0, 'M':-1.0, 'F':-3.0, 'P':-2.0, 'S':-1.0, 'T':-1.0, 'W':-3.0, 'Y':-2.0, 'V':-3.0, 'B':-1.0, 'Z': 0.0, 'X':-1.0, '*':-4.0 },
+ 'N': { 'A':-2.0, 'R': 0.0, 'N': 6.0, 'D': 1.0, 'C':-3.0, 'Q': 0.0, 'E': 0.0, 'G': 0.0, 'H': 1.0, 'I':-3.0, 'L':-3.0, 'K': 0.0, 'M':-2.0, 'F':-3.0, 'P':-2.0, 'S': 1.0, 'T': 0.0, 'W':-4.0, 'Y':-2.0, 'V':-3.0, 'B': 3.0, 'Z': 0.0, 'X':-1.0, '*':-4.0 },
+ 'D': { 'A':-2.0, 'R':-2.0, 'N': 1.0, 'D': 6.0, 'C':-3.0, 'Q': 0.0, 'E': 2.0, 'G':-1.0, 'H':-1.0, 'I':-3.0, 'L':-4.0, 'K':-1.0, 'M':-3.0, 'F':-3.0, 'P':-1.0, 'S': 0.0, 'T':-1.0, 'W':-4.0, 'Y':-3.0, 'V':-3.0, 'B': 4.0, 'Z': 1.0, 'X':-1.0, '*':-4.0 },
+ 'C': { 'A': 0.0, 'R':-3.0, 'N':-3.0, 'D':-3.0, 'C': 9.0, 'Q':-3.0, 'E':-4.0, 'G':-3.0, 'H':-3.0, 'I':-1.0, 'L':-1.0, 'K':-3.0, 'M':-1.0, 'F':-2.0, 'P':-3.0, 'S':-1.0, 'T':-1.0, 'W':-2.0, 'Y':-2.0, 'V':-1.0, 'B':-3.0, 'Z':-3.0, 'X':-2.0, '*':-4.0 },
+ 'Q': { 'A':-1.0, 'R': 1.0, 'N': 0.0, 'D': 0.0, 'C':-3.0, 'Q': 5.0, 'E': 2.0, 'G':-2.0, 'H': 0.0, 'I':-3.0, 'L':-2.0, 'K': 1.0, 'M': 0.0, 'F':-3.0, 'P':-1.0, 'S': 0.0, 'T':-1.0, 'W':-2.0, 'Y':-1.0, 'V':-2.0, 'B': 0.0, 'Z': 3.0, 'X':-1.0, '*':-4.0 },
+ 'E': { 'A':-1.0, 'R': 0.0, 'N': 0.0, 'D': 2.0, 'C':-4.0, 'Q': 2.0, 'E': 5.0, 'G':-2.0, 'H': 0.0, 'I':-3.0, 'L':-3.0, 'K': 1.0, 'M':-2.0, 'F':-3.0, 'P':-1.0, 'S': 0.0, 'T':-1.0, 'W':-3.0, 'Y':-2.0, 'V':-2.0, 'B': 1.0, 'Z': 4.0, 'X':-1.0, '*':-4.0 },
+ 'G': { 'A': 0.0, 'R':-2.0, 'N': 0.0, 'D':-1.0, 'C':-3.0, 'Q':-2.0, 'E':-2.0, 'G': 6.0, 'H':-2.0, 'I':-4.0, 'L':-4.0, 'K':-2.0, 'M':-3.0, 'F':-3.0, 'P':-2.0, 'S': 0.0, 'T':-2.0, 'W':-2.0, 'Y':-3.0, 'V':-3.0, 'B':-1.0, 'Z':-2.0, 'X':-1.0, '*':-4.0 },
+ 'H': { 'A':-2.0, 'R': 0.0, 'N': 1.0, 'D':-1.0, 'C':-3.0, 'Q': 0.0, 'E': 0.0, 'G':-2.0, 'H': 8.0, 'I':-3.0, 'L':-3.0, 'K':-1.0, 'M':-2.0, 'F':-1.0, 'P':-2.0, 'S':-1.0, 'T':-2.0, 'W':-2.0, 'Y': 2.0, 'V':-3.0, 'B': 0.0, 'Z': 0.0, 'X':-1.0, '*':-4.0 },
+ 'I': { 'A':-1.0, 'R':-3.0, 'N':-3.0, 'D':-3.0, 'C':-1.0, 'Q':-3.0, 'E':-3.0, 'G':-4.0, 'H':-3.0, 'I': 4.0, 'L': 2.0, 'K':-3.0, 'M': 1.0, 'F': 0.0, 'P':-3.0, 'S':-2.0, 'T':-1.0, 'W':-3.0, 'Y':-1.0, 'V': 3.0, 'B':-3.0, 'Z':-3.0, 'X':-1.0, '*':-4.0 },
+ 'L': { 'A':-1.0, 'R':-2.0, 'N':-3.0, 'D':-4.0, 'C':-1.0, 'Q':-2.0, 'E':-3.0, 'G':-4.0, 'H':-3.0, 'I': 2.0, 'L': 4.0, 'K':-2.0, 'M': 2.0, 'F': 0.0, 'P':-3.0, 'S':-2.0, 'T':-1.0, 'W':-2.0, 'Y':-1.0, 'V': 1.0, 'B':-4.0, 'Z':-3.0, 'X':-1.0, '*':-4.0 },
+ 'K': { 'A':-1.0, 'R': 2.0, 'N': 0.0, 'D':-1.0, 'C':-3.0, 'Q': 1.0, 'E': 1.0, 'G':-2.0, 'H':-1.0, 'I':-3.0, 'L':-2.0, 'K': 5.0, 'M':-1.0, 'F':-3.0, 'P':-1.0, 'S': 0.0, 'T':-1.0, 'W':-3.0, 'Y':-2.0, 'V':-2.0, 'B': 0.0, 'Z': 1.0, 'X':-1.0, '*':-4.0 },
+ 'M': { 'A':-1.0, 'R':-1.0, 'N':-2.0, 'D':-3.0, 'C':-1.0, 'Q': 0.0, 'E':-2.0, 'G':-3.0, 'H':-2.0, 'I': 1.0, 'L': 2.0, 'K':-1.0, 'M': 5.0, 'F': 0.0, 'P':-2.0, 'S':-1.0, 'T':-1.0, 'W':-1.0, 'Y':-1.0, 'V': 1.0, 'B':-3.0, 'Z':-1.0, 'X':-1.0, '*':-4.0 },
+ 'F': { 'A':-2.0, 'R':-3.0, 'N':-3.0, 'D':-3.0, 'C':-2.0, 'Q':-3.0, 'E':-3.0, 'G':-3.0, 'H':-1.0, 'I': 0.0, 'L': 0.0, 'K':-3.0, 'M': 0.0, 'F': 6.0, 'P':-4.0, 'S':-2.0, 'T':-2.0, 'W': 1.0, 'Y': 3.0, 'V':-1.0, 'B':-3.0, 'Z':-3.0, 'X':-1.0, '*':-4.0 },
+ 'P': { 'A':-1.0, 'R':-2.0, 'N':-2.0, 'D':-1.0, 'C':-3.0, 'Q':-1.0, 'E':-1.0, 'G':-2.0, 'H':-2.0, 'I':-3.0, 'L':-3.0, 'K':-1.0, 'M':-2.0, 'F':-4.0, 'P': 7.0, 'S':-1.0, 'T':-1.0, 'W':-4.0, 'Y':-3.0, 'V':-2.0, 'B':-2.0, 'Z':-1.0, 'X':-2.0, '*':-4.0 },
+ 'S': { 'A': 1.0, 'R':-1.0, 'N': 1.0, 'D': 0.0, 'C':-1.0, 'Q': 0.0, 'E': 0.0, 'G': 0.0, 'H':-1.0, 'I':-2.0, 'L':-2.0, 'K': 0.0, 'M':-1.0, 'F':-2.0, 'P':-1.0, 'S': 4.0, 'T': 1.0, 'W':-3.0, 'Y':-2.0, 'V':-2.0, 'B': 0.0, 'Z': 0.0, 'X': 0.0, '*':-4.0 },
+ 'T': { 'A': 0.0, 'R':-1.0, 'N': 0.0, 'D':-1.0, 'C':-1.0, 'Q':-1.0, 'E':-1.0, 'G':-2.0, 'H':-2.0, 'I':-1.0, 'L':-1.0, 'K':-1.0, 'M':-1.0, 'F':-2.0, 'P':-1.0, 'S': 1.0, 'T': 5.0, 'W':-2.0, 'Y':-2.0, 'V': 0.0, 'B':-1.0, 'Z':-1.0, 'X': 0.0, '*':-4.0 },
+ 'W': { 'A':-3.0, 'R':-3.0, 'N':-4.0, 'D':-4.0, 'C':-2.0, 'Q':-2.0, 'E':-3.0, 'G':-2.0, 'H':-2.0, 'I':-3.0, 'L':-2.0, 'K':-3.0, 'M':-1.0, 'F': 1.0, 'P':-4.0, 'S':-3.0, 'T':-2.0, 'W': 11.0,'Y': 2.0, 'V':-3.0, 'B':-4.0, 'Z':-3.0, 'X':-2.0, '*':-4.0 },
+ 'Y': { 'A':-2.0, 'R':-2.0, 'N':-2.0, 'D':-3.0, 'C':-2.0, 'Q':-1.0, 'E':-2.0, 'G':-3.0, 'H': 2.0, 'I':-1.0, 'L':-1.0, 'K':-2.0, 'M':-1.0, 'F': 3.0, 'P':-3.0, 'S':-2.0, 'T':-2.0, 'W': 2.0, 'Y': 7.0, 'V':-1.0, 'B':-3.0, 'Z':-2.0, 'X':-1.0, '*':-4.0 },
+ 'V': { 'A': 0.0, 'R':-3.0, 'N':-3.0, 'D':-3.0, 'C':-1.0, 'Q':-2.0, 'E':-2.0, 'G':-3.0, 'H':-3.0, 'I': 3.0, 'L': 1.0, 'K':-2.0, 'M': 1.0, 'F':-1.0, 'P':-2.0, 'S':-2.0, 'T': 0.0, 'W':-3.0, 'Y':-1.0, 'V': 4.0, 'B':-3.0, 'Z':-2.0, 'X':-1.0, '*':-4.0 },
+ 'B': { 'A':-2.0, 'R':-1.0, 'N': 3.0, 'D': 4.0, 'C':-3.0, 'Q': 0.0, 'E': 1.0, 'G':-1.0, 'H': 0.0, 'I':-3.0, 'L':-4.0, 'K': 0.0, 'M':-3.0, 'F':-3.0, 'P':-2.0, 'S': 0.0, 'T':-1.0, 'W':-4.0, 'Y':-3.0, 'V':-3.0, 'B': 4.0, 'Z': 1.0, 'X':-1.0, '*':-4.0 },
+ 'Z': { 'A':-1.0, 'R': 0.0, 'N': 0.0, 'D': 1.0, 'C':-3.0, 'Q': 3.0, 'E': 4.0, 'G':-2.0, 'H': 0.0, 'I':-3.0, 'L':-3.0, 'K': 1.0, 'M':-1.0, 'F':-3.0, 'P':-1.0, 'S': 0.0, 'T':-1.0, 'W':-3.0, 'Y':-2.0, 'V':-2.0, 'B': 1.0, 'Z': 4.0, 'X':-1.0, '*':-4.0 },
+ 'X': { 'A': 0.0, 'R':-1.0, 'N':-1.0, 'D':-1.0, 'C':-2.0, 'Q':-1.0, 'E':-1.0, 'G':-1.0, 'H':-1.0, 'I':-1.0, 'L':-1.0, 'K':-1.0, 'M':-1.0, 'F':-1.0, 'P':-2.0, 'S': 0.0, 'T': 0.0, 'W':-2.0, 'Y':-1.0, 'V':-1.0, 'B':-1.0, 'Z':-1.0, 'X':-1.0, '*':-4.0 },
+ '*': { 'A':-4.0, 'R':-4.0, 'N':-4.0, 'D':-4.0, 'C':-4.0, 'Q':-4.0, 'E':-4.0, 'G':-4.0, 'H':-4.0, 'I':-4.0, 'L':-4.0, 'K':-4.0, 'M':-4.0, 'F':-4.0, 'P':-4.0, 'S':-4.0, 'T':-4.0, 'W':-4.0, 'Y':-4.0, 'V':-4.0, 'B':-4.0, 'Z':-4.0, 'X':-4.0, '*': 1.0 }
+ }
+
+ def __init__(self, matrix=BLOSUM62):
+ self._matrix = matrix
+
+ def score(self, x, y):
+ try:
+ return self._matrix[x][y]
+ except KeyError as ke:
+ raise ResidueNotFoundError(ke.message)
+
+ @staticmethod
+ def parse(string):
+ """
+ Parse a standard scoring matrix file, where the first row and
+ column are residue labels.
+
+ @param string: scoring matrix string
+ @type string: str
+
+ @rtype: L{SimilarityMatrix}
+ """
+
+ residues = {}
+ matrix = {}
+
+ for line in string.splitlines():
+ if not line.strip() or line.startswith("#"):
+ continue
+
+ if not residues:
+ residues = line.split()
+
+ else:
+ items = line.split()
+ if len(items) != len(residues) + 1:
+ raise ValueError("{0} scoring columns expected".format(len(residues)))
+
+ try:
+ aa, scores = items[0], map(float, items[1:])
+ matrix[aa] = dict((residues[i], s) for i, s in enumerate(scores))
+ except (KeyError, ValueError):
+ raise ValueError("Corrupt scoring matrix")
+
+ return SimilarityMatrix(matrix)
+
+
+class AbstractAlignmentAlgorithm(object):
+ """
+ Base class for all sequence alignment algorithms.
+
+ This class was designed with simple sequence alignment algorithms in mind.
+ Implementors have full control over the behavior of the scoring function and
+ the dynamic programming matrix, but implementing things that require
+ additional matrices (such as affine gap penalties) might be trickier.
+
+ @param scoring: scoring matrix
+ @type scoring: L{AbstractScoringMatrix}
+ @param gap: simple gap penalty
+ @type gap: float
+ """
+
+ __metaclass__ = ABCMeta
+
+ def __init__(self, scoring=IdentityMatrix(), gap=0):
+
+ if not isinstance(scoring, AbstractScoringMatrix):
+ raise TypeError(scoring)
+
+ self._gap = float(gap)
+ self._scoring = scoring
+
+ @property
+ def gap(self):
+ """
+ Simple gap penalty
+ @rtype: float
+ """
+ return self._gap
+
+ @property
+ def scoring_matrix(self):
+ """
+ Scoring matrix
+ @rtype: L{AbstractScoringMatrix}
+ """
+ return self._scoring
+
+ def align(self, query, subject):
+ """
+ Align two sequences and return the optimal alignment.
+
+ @param query: first sequence
+ @type query: L{AbstractSequence}
+ @param subject: second sequence
+ @type subject: L{AbstractSequence}
+
+ @rtype: L{AlignmentResult}
+ """
+ if query.length == 0 or subject.length == 0:
+ raise ValueError("Can't align zero length sequence")
+
+ # working with string sequences results in a massive speed-up
+ qseq = ["*"] + self._sequence(query)
+ sseq = ["*"] + self._sequence(subject)
+
+ # 1. create a dynamic programming matrix
+ matrix = []
+ rows, cols = len(query), len(subject)
+
+ for i in range(rows + 1):
+ matrix.append([])
+ for j in range(cols + 1):
+ matrix[i].append(None)
+
+ # 2. initialize the first row and column
+ self._initialize(matrix)
+
+ # fill
+ for i in range(1, rows + 1):
+ for j in range(1, cols + 1):
+ score = self._score(qseq[i], sseq[j])
+ self._fill(matrix, i, j, score)
+
+ # 3. compute alignment
+ return self._traceback(matrix, query, subject)
+
+ def _sequence(self, s):
+ """
+ Extract and return the string sequence of {s}.
+
+ @param s: sequence object
+ @type s: L{AbstractSequence}
+
+ @rtype: list of str
+ """
+ return list(s.sequence)
+
+ @abstractmethod
+ def _initialize(self, matrix):
+ """
+ Initialize (typically the first row and column) of the dynamic
+ programming matrix.
+
+ @param matrix: list (2D)
+ @type matrix: list
+ """
+ pass
+
+ def _score(self, residue1, residue2):
+ """
+ Retrieve the pairwise score of two residues using the current
+ scoring matrix.
+
+ @rtype: float
+ """
+ return self._scoring.score(residue1, residue2)
+
+ def _fill(self, matrix, i, j, score):
+ """
+ Compute and set the best score that leads to cell i,j in the dynamic
+ programming matrix: right, down or diagonal.
+
+ See also L{AbstractAlignmentAlgorithm._max}.
+
+ @param score: pairwise score at matrix[i][j]
+ @type score: float
+ @return: the best score
+ @rtype: float
+ """
+
+ match = matrix[i-1][j-1] + score
+ insertion = matrix[i][j-1] + self._gap
+ deletion = matrix[i-1][j] + self._gap
+
+ best = self._max(match, insertion, deletion)
+ matrix[i][j] = best
+
+ return best
+
+ @abstractmethod
+ def _max(self, match, insertion, deletion):
+ """
+ Choose the best score among all given possibilities:
+ scores for match, insertion and deletion. This will determine
+ the direction taken at each step while building the dynamic programming
+ matrix (diagonal, down or right).
+
+ This is an expected notable point of divergence for most sequence
+ alignment algorithms.
+ """
+ pass
+
+ def _traceback(self, m, seq1, seq2):
+ """
+ Trace back and return the optimal alignment.
+ """
+
+ query = []
+ subject = []
+
+ # working with string sequences results in a massive speed-up
+ qseq = ["*"] + self._sequence(seq1)
+ sseq = ["*"] + self._sequence(seq2)
+
+ i, j = self._terminus(m)
+ qstart, start = i, j
+ qend, end = i, j
+ score = m[i][j]
+
+ while self._expandable(m, i, j):
+
+ if i > 0 and j > 0 and m[i][j] == (m[i-1][j-1] + self._score(qseq[i], sseq[j])):
+ query.append(seq1.residues[i])
+ subject.append(seq2.residues[j])
+ qstart, start = i, j
+ i, j = i - 1, j - 1
+
+ elif i > 0 and m[i][j] == (m[i-1][j] + self._gap):
+ query.append(seq1.residues[i])
+ subject.append(ResidueInfo(-1, seq2.alphabet.GAP))
+ qstart = i
+ i = i - 1
+
+ elif j > 0 and m[i][j] == (m[i][j-1] + self._gap):
+ query.append(ResidueInfo(-1, seq1.alphabet.GAP))
+ subject.append(seq2.residues[j])
+ start = j
+ j = j - 1
+
+ else:
+ assert False
+
+ query.reverse()
+ subject.reverse()
+
+ aligned_query = RichSequence(seq1.id, seq1.header, query, seq1.type)
+ aligned_subject = RichSequence(seq2.id, seq2.header, subject, seq2.type)
+
+ return AlignmentResult(score, aligned_query, aligned_subject, qstart, qend, start, end)
+
+ @abstractmethod
+ def _terminus(self, matrix):
+ """
+ Find the coordinates of the optimal alignment's right endpoint in the
+ dynamic programming matrix. This is the starting point of a traceback.
+
+ @param matrix: the complete dynamic programming matrix
+ @type matrix: 2D list
+
+ @rtype: 2-tuple (i, j)
+ """
+ pass
+
+ @abstractmethod
+ def _expandable(self, i, j):
+ """
+ Return True if the traceback procedure must not terminate at
+ position C{i,j} in the dynamic programming matrix.
+
+ @rtype: bool
+ """
+ pass
+
+
+class GlobalAlignmentAlgorithm(AbstractAlignmentAlgorithm):
+ """
+ Needleman-Wunsch global sequence alignment.
+ """
+
+ def __init__(self, scoring=IdentityMatrix(), gap=0):
+ super(GlobalAlignmentAlgorithm, self).__init__(scoring, gap)
+
+ def _initialize(self, matrix):
+
+ for i in range(len(matrix)):
+ matrix[i][0] = self._gap * i
+ for j in range(len(matrix[0])):
+ matrix[0][j] = self._gap * j
+
+ def _max(self, match, insertion, deletion):
+ return max(match, insertion, deletion)
+
+ def _terminus(self, matrix):
+
+ i = len(matrix) - 1
+ j = len(matrix[0]) - 1
+
+ return (i, j)
+
+ def _expandable(self, matrix, i, j):
+ return i > 0 or j > 0
+
+class LocalAlignmentAlgorithm(AbstractAlignmentAlgorithm):
+ """
+ Smith-Waterman local sequence alignment.
+ """
+
+ START = 0
+ """
+ Score for initiation of a new local alignment
+ """
+
+ def __init__(self, scoring=IdentityMatrix(), gap=-1):
+ super(LocalAlignmentAlgorithm, self).__init__(scoring, gap)
+
+ def _initialize(self, matrix):
+
+ for i in range(len(matrix)):
+ matrix[i][0] = LocalAlignmentAlgorithm.START
+ for j in range(len(matrix[0])):
+ matrix[0][j] = LocalAlignmentAlgorithm.START
+
+ def _max(self, match, insertion, deletion):
+ return max(match, insertion, deletion, LocalAlignmentAlgorithm.START)
+
+ def _terminus(self, matrix):
+
+ maxi, maxj = 0, 0
+
+ for i in range(len(matrix)):
+ for j in range(len(matrix[i])):
+ if matrix[i][j] > matrix[maxi][maxj]:
+ maxi, maxj = i, j
+
+ return (maxi, maxj)
+
+ def _expandable(self, matrix, i, j):
+ return matrix[i][j] != LocalAlignmentAlgorithm.START
+
+
+class AlignmentResult(object):
+ """
+ Represents a pairwise sequence alignment result.
+
+ @param score: raw alignment score
+ @type score: float
+ @param query: aligned query sequence (with gaps)
+ @type query: L{AbstractSequence}
+ @param subject: aligned subject sequence (with gaps)
+ @type subject: L{AbstractSequence}
+ @param qstart: query start position
+ @type qstart: int
+ @param qend: query end position
+ @type qend: int
+ @param start: subject start position
+ @type start: int
+ @param end: subject end position
+ @type end: int
+ """
+
+ def __init__(self, score, query, subject, qstart, qend, start, end):
+
+ if not isinstance(query, AbstractSequence):
+ raise TypeError(query)
+ if not isinstance(subject, AbstractSequence):
+ raise TypeError(query)
+
+ if not (len(query) == len(subject)):
+ raise ValueError("Corrupt alignment")
+
+ self._score = float(score)
+ self._query = query
+ self._subject = subject
+ self._qstart = int(qstart)
+ self._qend = int(qend)
+ self._start = int(start)
+ self._end = int(end)
+ self._identicals = 0
+ self._gaps = 0
+ self._length = 0
+
+ if query.length > 0 and subject.length > 0:
+
+ if not 1 <= qstart <= qend:
+ raise ValueError("Invalid query start/end positions")
+ if not 1 <= start <= end:
+ raise ValueError("Invalid subject start/end positions")
+
+ qgap = query.alphabet.GAP
+ sgap = subject.alphabet.GAP
+
+ for q, s in zip(query, subject):
+ if q.type == qgap or s.type == sgap:
+ self._gaps += 1
+ elif q.type == s.type:
+ self._identicals += 1
+
+ self._length = (self._gaps + (qend - qstart + 1) + (end - start + 1)) / 2
+
+ else:
+ if (score + qstart + qend + start + end) != 0:
+ raise ValueError("Corrupt alignment")
+ self._length = 0
+
+
+ def __str__(self):
+ string = "{0.qstart:5} {0.query.sequence:} {0.qend:<5}\n"
+ string += "{0.start:5} {0.subject.sequence:} {0.end:<5}"
+ return string.format(self)
+
+ @property
+ def is_empty(self):
+ """
+ Return True if this is an empty alignment (i.e. no matches)
+ @rtype: bool
+ """
+ return self.length == 0 or self.gaps == self.length
+
+ @property
+ def score(self):
+ """
+ Raw alignment score
+ @rtype: float
+ """
+ return self._score
+
+ @property
+ def query(self):
+ """
+ Aligned query sequence (with gaps)
+ @rtype: L{AbstractSequence}
+ """
+ return self._query
+
+ @property
+ def subject(self):
+ """
+ Aligned subject sequence (with gaps)
+ @rtype: L{AbstractSequence}
+ """
+ return self._subject
+
+ @property
+ def qstart(self):
+ """
+ Query start position
+ @rtype: int
+ """
+ return self._qstart
+
+ @property
+ def qend(self):
+ """
+ Query end position
+ @rtype: int
+ """
+ return self._qend
+
+ @property
+ def start(self):
+ """
+ Subject start position
+ @rtype: int
+ """
+ return self._start
+
+ @property
+ def end(self):
+ """
+ Subject end position
+ @rtype: int
+ """
+ return self._end
+
+ @property
+ def identicals(self):
+ """
+ Number of identical residues
+ @rtype: int
+ """
+ return self._identicals
+
+ @property
+ def identity(self):
+ """
+ Percentage of identical residues
+ @rtype: int
+ """
+ return float(self._identicals) / self._length
+
+ @property
+ def gaps(self):
+ """
+ Total number of gaps (query + subject)
+ @rtype: int
+ """
+ return self._gaps
+
+ @property
+ def length(self):
+ """
+ Alignment length (query + subject + gaps / 2)
+ @rtype: int
+ """
+ return self._length
+
+ def alignment(self):
+ """
+ @return: as a sequence alignment object
+ @rtype: L{SequenceAlignment}
+ """
+ return SequenceAlignment([self.query, self.subject])
+
+
diff --git a/csb/bio/structure/__init__.py b/csb/bio/structure/__init__.py
index 9289d17..8a3ac39 100644
--- a/csb/bio/structure/__init__.py
+++ b/csb/bio/structure/__init__.py
@@ -533,6 +533,23 @@ class Structure(csb.core.AbstractNIContainer, AbstractEntity):
with csb.io.EntryWriter(output_file, close=False) as out:
out.write(data)
+ @staticmethod
+ def from_chain(chain):
+ """
+ A Structure factory, which instantiates and returns a new Structure with
+ chain as deep cpoy of chain
+
+ @param chain: the chain which will comprise the new structure
+ @type chain: L{Chain}
+
+ @rtype: L{Structure}
+ """
+ structure = Structure("NONE")
+ structure.chains.append(chain.clone())
+
+ return structure
+
+
class StructureChainsTable(csb.core.DictionaryContainer):
def __init__(self, structure=None, chains=None):
@@ -581,6 +598,7 @@ class StructureChainsTable(csb.core.DictionaryContainer):
@param id: ID of the chain to be detached
@type id: str
+ @raise ChainNotFoundError: if C{id} is not a valid chain ID
"""
chain = self[id]
self._remove(id)
@@ -914,7 +932,7 @@ class Chain(csb.core.AbstractNIContainer, AbstractEntity):
@return: the residue object with such an ID
@rtype: L{Residue}
- @raise csb.core.ItemNotFoundError: if no residue with that ID exists
+ @raise EntityNotFoundError: if no residue with that ID exists
"""
res_id = str(sequence_number).strip()
@@ -1153,7 +1171,7 @@ class ChainResiduesCollection(csb.core.CollectionContainer):
try:
return self.__lookup[id]
except KeyError:
- raise csb.core.ItemNotFoundError(id)
+ raise EntityNotFoundError(id)
class Residue(csb.core.AbstractNIContainer, AbstractEntity):
"""
@@ -1179,7 +1197,7 @@ class Residue(csb.core.AbstractNIContainer, AbstractEntity):
def __init__(self, rank, type, sequence_number=None, insertion_code=None):
self._type = None
- self._pdb_name = None
+ self._label = None
self._rank = int(rank)
self._atoms = ResidueAtomsTable(self)
self._secondary_structure = None
@@ -1190,7 +1208,7 @@ class Residue(csb.core.AbstractNIContainer, AbstractEntity):
self.type = type
self.id = sequence_number, insertion_code
- self._pdb_name = repr(type)
+ self.label = repr(type)
@property
def _children(self):
@@ -1198,6 +1216,26 @@ class Residue(csb.core.AbstractNIContainer, AbstractEntity):
def __repr__(self):
return '<{1} [{0.rank}]: {0.type!r} {0.id}>'.format(self, self.__class__.__name__)
+
+ @property
+ def label(self):
+ """
+ Original residue label (different from C{Residue.type} for modified
+ residues)
+ @rtype: str
+ """
+ return self._label
+ @label.setter
+ def label(self, value):
+ self._label = str(value)
+
+ @property
+ def is_modified(self):
+ """
+ Return True id this is a modified residue
+ @rtype: bool
+ """
+ return self.label != repr(self.type)
@property
def type(self):
@@ -1492,7 +1530,11 @@ class NucleicResidue(Residue):
raise TypeError(type)
super(NucleicResidue, self).__init__(rank, type, sequence_number, insertion_code)
- self._pdb_name = str(type)
+ self.label = str(type)
+
+ @property
+ def is_modified(self):
+ return self.label != str(self.type)
class UnknownResidue(Residue):
diff --git a/csb/build.py b/csb/build.py
index d21de45..e533ee3 100644
--- a/csb/build.py
+++ b/csb/build.py
@@ -28,6 +28,7 @@ import os
import sys
import getopt
import traceback
+import compileall
if os.path.basename(__file__) == '__init__.py':
PARENT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
@@ -128,6 +129,8 @@ Options:
if os.path.join(SOURCETREE, ROOT) != PARENT:
raise IOError('{0} must be a sub-package or sub-module of {1}'.format(__file__, ROOT))
self._input = SOURCETREE
+
+ self._success = True
self.output = output
self.verbosity = verbosity
@@ -165,9 +168,15 @@ Options:
v = self._revision()
self._doc(v)
self._test()
+
+ self._compile()
vn = self._package()
- self.log('\n# Done ({0}).\n'.format(vn.full))
+ if self._success:
+ self.log('\n# Done ({0}).\n'.format(vn.full))
+ else:
+ self.log('\n# Build failed.\n')
+
def log(self, message, level=1, ending='\n'):
@@ -283,9 +292,23 @@ Options:
self.log('\n DID NOT PASS: The docs might be broken')
else:
self.log('\n FAIL: Epydoc returned "#{0.code}: {0}"'.format(ex))
+ self._success = False
self.log('\n# Restoring the previous ARGV...', level=2)
sys.argv = argv
+
+ def _compile(self):
+ """
+ Byte-compile all modules and packages.
+ """
+ self.log('\n# Byte-compiling all *.py files...')
+
+ quiet = self.verbosity <= 1
+ valid = compileall.compile_dir(self._root, quiet=quiet, force=True)
+
+ if not valid:
+ self.log('\n FAIL: Compilation error(s)\n')
+ self._success = False
def _package(self):
"""
@@ -314,8 +337,9 @@ Options:
except SystemExit as ex:
if ex.code is not 0:
- package = 'FAIL'
self.log('\n FAIL: Setup returned: \n\n{0}\n'.format(ex))
+ self._success = False
+ package = 'FAIL'
self.log('\n# Restoring the previous CWD and ARGV...', level=2)
os.chdir(cwd)
diff --git a/csb/io/__init__.py b/csb/io/__init__.py
index 57358b9..ab7d06c 100644
--- a/csb/io/__init__.py
+++ b/csb/io/__init__.py
@@ -43,6 +43,11 @@ try:
import urllib.request as urllib
except ImportError:
import urllib2 as urllib
+
+try:
+ from __builtin__ import unichr
+except ImportError:
+ from builtins import chr as unichr
NEWLINE = "\n"
diff --git a/csb/io/tsv.py b/csb/io/tsv.py
index e2cbcc8..bbcb805 100644
--- a/csb/io/tsv.py
+++ b/csb/io/tsv.py
@@ -366,7 +366,7 @@ class DataRow(object):
@param delimiter: column separator (defaults to tab)
@type delimiter: str
"""
- return delimiter.join(map(str, self._row))
+ return delimiter.join(map(Table._isnull, self._row))
@property
def columns(self):
@@ -412,6 +412,7 @@ class Table(object):
Table header string, used when saving and restoring TSV files.
"""
HEADER = '# @TSV '
+ NULL = ''
def __init__(self, definition, name='TSV', backend=SQLiteRepository):
@@ -421,6 +422,7 @@ class Table(object):
self._name = name
self._backend = backend
self._imp = backend(name)
+ self._metadata = []
try:
if isinstance(definition[0], ColumnInfo):
@@ -439,7 +441,8 @@ class Table(object):
self._imp.create(self._metadata)
@staticmethod
- def from_tsv(tsv, definition=None, delimiter='\t', skip=0, name='TSV', backend=SQLiteRepository):
+ def from_tsv(tsv, definition=None, delimiter='\t', skip=0, name='TSV',
+ backend=SQLiteRepository):
"""
Table factory: build a L{Table} from a TSV file.
@@ -479,7 +482,8 @@ class Table(object):
for i, line in enumerate(tsvfile, start=1):
if (skip and i <= skip) or line.startswith(Table.HEADER):
continue
- table.insert(line.rstrip(os.linesep).split(delimiter))
+ data = line.rstrip(os.linesep).split(delimiter)
+ table.insert(data)
return table
@@ -518,7 +522,32 @@ class Table(object):
return Table.from_iterable(table, table._metadata, name=name, backend=backend)
else:
return Table(table._metadata, name=name, backend=backend)
+
+ @staticmethod
+ def _isnull(value):
+ if value is None or str(value) == "":
+ return Table.NULL
+ else:
+ return str(value)
+
+ def _convert(self, row):
+
+ if len(row) != len(self._metadata):
+ raise ValueError("{0} columns expected, got {1}".format(
+ len(self._metadata), len(row)))
+ data = []
+
+ for value, ci in zip(row, self._metadata):
+ if value == Table.NULL:
+ data.append(None)
+ elif isinstance(value, csb.core.string):
+ data.append(ci.type(value))
+ else:
+ data.append(value)
+
+ return data
+
def __del__(self):
self._imp.close()
@@ -739,7 +768,8 @@ class Table(object):
@param row: a tuple of the appropriate length
@type row: tuple
"""
- self._imp.insert(row)
+ data = self._convert(row)
+ self._imp.insert(data)
def _project(self, columns):
diff --git a/csb/numeric/__init__.py b/csb/numeric/__init__.py
index 7d55e9f..cffbeea 100644
--- a/csb/numeric/__init__.py
+++ b/csb/numeric/__init__.py
@@ -287,7 +287,7 @@ def dihedral_angle(a, b, c, d):
m = numpy.cross((a - b), v)
m /= norm(m)
n = numpy.cross((d - c), v)
- n /= norm(m)
+ n /= norm(n)
c = numpy.dot(m, n)
s = numpy.dot(numpy.cross(n, m), v) / norm(v)
@@ -608,7 +608,9 @@ class InvertibleMatrix(object):
def _check_diagonal(self, matrix):
- return (matrix.T == matrix).all()
+ i, j = numpy.nonzero(matrix)
+
+ return numpy.array_equal(i, j)
def _check_unity_multiple(self, matrix):
@@ -706,6 +708,16 @@ class InvertibleMatrix(object):
__idiv__ = __itruediv__
+ def __eq__(self, other):
+
+ if self._matrix is not None and other._matrix is not None:
+
+ return self._matrix == other._matrix
+
+ if self._inverse_matrix is not None and other._inverse_matrix is not None:
+
+ return self._inverse_matrix == other._inverse_matrix
+
def __str__(self):
if self._matrix is not None and self._inverse_matrix is not None:
return "csb.numeric.InvertibleMatrix object holding the following numpy matrix:\n"\
diff --git a/csb/numeric/integrators.py b/csb/numeric/integrators.py
index 7ef07d0..58b38a1 100644
--- a/csb/numeric/integrators.py
+++ b/csb/numeric/integrators.py
@@ -67,7 +67,7 @@ class AbstractIntegrator(object):
return builder.product
@abstractmethod
- def integrate_once(self, state, current_step, inverse_mass_matrix=None):
+ def integrate_once(self, state, current_step, mass_matrix=None):
"""
Integrates one step starting from an initial state and an initial time
given by the product of the timestep and the current_step parameter.
@@ -79,10 +79,10 @@ class AbstractIntegrator(object):
@param current_step: Current integration step
@type current_step: int
- @param inverse_mass_matrix: Inverse mass matrix
- @type inverse_mass_matrix: n-dimensional numpy array with n being the dimension
- of the configuration space, that is, the dimension of
- the position / momentum vectors
+ @param mass_matrix: mass matrix
+ @type mass_matrix: n-dimensional numpy array with n being the dimension
+ of the configuration space, that is, the dimension of
+ the position / momentum vectors
@return: the altered state
@rtype: L{State}
"""
diff --git a/csb/statistics/__init__.py b/csb/statistics/__init__.py
index 69d54e0..8e20b03 100644
--- a/csb/statistics/__init__.py
+++ b/csb/statistics/__init__.py
@@ -256,7 +256,9 @@ def histogram_nd(x, nbins=100, axes=None, nbatch=1000, normalize=True):
shape = tuple(map(len, axes))
H = np.zeros(shape)
- s = np.multiply.accumulate(np.array((1,) + H.shape[:-1]))[::-1]
+ ## MH: was like that before...
+ ## s = np.multiply.accumulate(np.array((1,) + H.shape[:-1]))[::-1]
+ s = np.multiply.accumulate(np.array((1,) + H.shape[1:]))[::-1]
H = H.flatten()
while len(x):
diff --git a/csb/statistics/samplers/__init__.py b/csb/statistics/samplers/__init__.py
index 478b452..f821101 100644
--- a/csb/statistics/samplers/__init__.py
+++ b/csb/statistics/samplers/__init__.py
@@ -76,6 +76,10 @@ class State(AbstractState):
self.position = position
self.momentum = momentum
+ def __eq__(self, other):
+
+ return self.position == other.position and self.momentum == other.momentum
+
@property
def position(self):
return self._position.copy()
@@ -100,7 +104,11 @@ class State(AbstractState):
self._momentum = None
def clone(self):
- return State(self.position, self.momentum)
+ if self.momentum is not None:
+ return self.__class__(self.position.copy(), self.momentum.copy())
+ else:
+ return self.__class__(self.position.copy())
+
class EnsembleState(csb.core.BaseCollectionContainer, AbstractState):
"""
@@ -116,7 +124,7 @@ class EnsembleState(csb.core.BaseCollectionContainer, AbstractState):
@property
def position(self):
- return np.array([s.positions for s in self])
+ return np.array([s.position for s in self])
@property
def momentum(self):
diff --git a/csb/statistics/samplers/mc/__init__.py b/csb/statistics/samplers/mc/__init__.py
index 52d4f00..133b3e9 100644
--- a/csb/statistics/samplers/mc/__init__.py
+++ b/csb/statistics/samplers/mc/__init__.py
@@ -118,6 +118,10 @@ class PropagationResult(AbstractPropagationResult):
self._heat = None
self.heat = heat
+
+ def __iter__(self):
+
+ return iter([self._initial, self.final])
@property
def initial(self):
@@ -262,144 +266,6 @@ class ShortTrajectoryBuilder(TrajectoryBuilder):
initial, final = self._states
return PropagationResult(initial, final, heat=self._heat)
-class SimpleProposalCommunicator(object):
- """
- With the exception of the current state of the Markov chain, this
- holds all the information needed to calculate the acceptance
- probability in both the L{RWMCSampler} and L{HMCSampler} classes,
- that is, only the proposal state.
- For more advanced algorithms, one may derive classes capable of
- holding the neccessary additional information from this class.
-
- @param proposal: Proposal state
- @type proposal: L{State}
- """
-
- __metaclass__ = ABCMeta
-
- def __init__(self, proposal):
-
- self._proposal = proposal
-
- @property
- def proposal(self):
- return self._proposal
-
-class AbstractSingleChainMC(AbstractMC):
- """
- Abstract class for Monte Carlo sampling algorithms simulating
- only one ensemble.
-
- @param pdf: probability density function to sample from
- @type pdf: subclass of L{csb.statistics.pdf.AbstractDensity}
-
- @param state: Initial state
- @type state: L{State}
-
- @param temperature: Pseudo-temperature of the Boltzmann ensemble
- M{p(x) = 1/N * exp(-1/T * E(x))} with the
- pseudo-energy defined as M{E(x) = -log(p(x))}
- where M{p(x)} is the PDF under consideration
- @type temperature: float
- """
-
- __metaclass__ = ABCMeta
-
- def __init__(self, pdf, state, temperature=1.):
-
- super(AbstractSingleChainMC, self).__init__(state)
-
- self._pdf = pdf
- self._temperature = temperature
- self._nmoves = 0
- self._accepted = 0
- self._last_move_accepted = None
-
- def _checkstate(self, state):
- if not isinstance(state, State):
- raise TypeError(state)
-
- def sample(self):
- """
- Draw a sample.
- @rtype: L{State}
- """
-
- proposal_communicator = self._propose()
- pacc = self._calc_pacc(proposal_communicator)
- self._accept(proposal_communicator.proposal, pacc)
-
- return self.state
-
- @abstractmethod
- def _propose(self):
- """
- Calculate a new proposal state and gather additional information
- needed to calculate the acceptance probability.
-
- @rtype: L{SimpleProposalCommunicator}
- """
- pass
-
- @abstractmethod
- def _calc_pacc(self, proposal_communicator):
- """
- Calculate probability with which to accept the proposal.
-
- @param proposal_communicator: Contains information about the proposal
- and additional information needed to
- calculate the acceptance probability
- @type proposal_communicator: L{SimpleProposalCommunicator}
- """
- pass
-
- def _accept(self, proposal, pacc):
- """
- Accept / reject proposal with given acceptance probability pacc.
-
- @param proposal: proposal state
- @type proposal: L{State}
-
- @param pacc: acceptance probability
- @type pacc: float
- """
-
- self._nmoves += 1
-
- if numpy.random.random() < pacc:
- self.state = proposal
- self._accepted += 1
- self._last_move_accepted = True
- return True
- else:
- self._last_move_accepted = False
- return False
-
- @property
- def energy(self):
- """
- Log-likelihood of the current state.
- @rtype: float
- """
- return self._pdf.log_prob(self.state.position)
-
- @property
- def acceptance_rate(self):
- """
- Acceptance rate.
- """
- return float(self._accepted) / float(self._nmoves)
-
- @property
- def last_move_accepted(self):
- """
- Information whether the last MC move was accepted or not.
- """
- return self._last_move_accepted
-
- @property
- def temperature(self):
- return self._temperature
class MCCollection(csb.core.BaseCollectionContainer):
"""
@@ -410,685 +276,49 @@ class MCCollection(csb.core.BaseCollectionContainer):
"""
def __init__(self, items):
-
- super(MCCollection, self).__init__(items, type=AbstractSingleChainMC)
-
-class AbstractEnsembleMC(AbstractMC):
- """
- Abstract class for Monte Carlo sampling algorithms simulating several ensembles.
-
- @param samplers: samplers which sample from their respective equilibrium distributions
- @type samplers: list of L{AbstractSingleChainMC}
- """
-
- __metaclass__ = ABCMeta
-
- def __init__(self, samplers):
-
- self._samplers = MCCollection(samplers)
- state = EnsembleState([x.state for x in self._samplers])
-
- super(AbstractEnsembleMC, self).__init__(state)
-
- def sample(self):
- """
- Draw an ensemble sample.
-
- @rtype: L{EnsembleState}
- """
-
- sample = EnsembleState([sampler.sample() for sampler in self._samplers])
- self.state = sample
-
- return sample
-
- @property
- def energy(self):
- """
- Total ensemble energy.
- """
- return sum([x.energy for x in self._samplers])
-
-class AbstractSwapParameterInfo(object):
- """
- Subclass instances hold all parameters necessary for performing a swap
- between two given samplers.
- """
-
- __metaclass__ = ABCMeta
-
- def __init__(self, sampler1, sampler2):
- """
- @param sampler1: First sampler
- @type sampler1: L{AbstractSingleChainMC}
-
- @param sampler2: Second sampler
- @type sampler2: L{AbstractSingleChainMC}
- """
-
- self._sampler1 = sampler1
- self._sampler2 = sampler2
-
- @property
- def sampler1(self):
- return self._sampler1
-
- @property
- def sampler2(self):
- return self._sampler2
-
-class RESwapParameterInfo(AbstractSwapParameterInfo):
- """
- Holds parameters for a standard Replica Exchange swap.
- """
- pass
-
-class MDRENSSwapParameterInfo(RESwapParameterInfo):
- """
- Holds parameters for a MDRENS swap.
-
- @param sampler1: First sampler
- @type sampler1: L{AbstractSingleChainMC}
-
- @param sampler2: Second sampler
- @type sampler2: L{AbstractSingleChainMC}
-
- @param timestep: Integration timestep
- @type timestep: float
- @param mass_matrix: Mass matrix
- @type mass_matrix: n-dimensional matrix of type L{InvertibleMatrix} with n being the dimension
- of the configuration space, that is, the dimension of
- the position / momentum vectors
-
- @param traj_length: Trajectory length in number of timesteps
- @type traj_length: int
-
- @param gradient: Gradient which determines the dynamics during a trajectory
- @type gradient: L{AbstractGradient}
- """
-
- def __init__(self, sampler1, sampler2, timestep, traj_length, gradient, mass_matrix=None):
-
- super(MDRENSSwapParameterInfo, self).__init__(sampler1, sampler2)
+ from csb.statistics.samplers.mc.singlechain import AbstractSingleChainMC
- self._mass_matrix = mass_matrix
- if self.mass_matrix is None:
- d = len(sampler1.state.position)
- self.mass_matrix = csb.numeric.InvertibleMatrix(numpy.eye(d), numpy.eye(d))
-
- self._traj_length = traj_length
- self._gradient = gradient
- self._timestep = timestep
-
- @property
- def timestep(self):
- """
- Integration timestep.
- """
- return self._timestep
- @timestep.setter
- def timestep(self, value):
- self._timestep = float(value)
-
- @property
- def traj_length(self):
- """
- Trajectory length in number of integration steps.
- """
- return self._traj_length
- @traj_length.setter
- def traj_length(self, value):
- self._traj_length = int(value)
-
- @property
- def gradient(self):
- """
- Gradient which governs the equations of motion.
- """
- return self._gradient
-
- @property
- def mass_matrix(self):
- return self._mass_matrix
- @mass_matrix.setter
- def mass_matrix(self, value):
- self._mass_matrix = value
-
-class ThermostattedMDRENSSwapParameterInfo(MDRENSSwapParameterInfo):
- """
- @param sampler1: First sampler
- @type sampler1: subclass instance of L{AbstractSingleChainMC}
-
- @param sampler2: Second sampler
- @type sampler2: subclass instance of L{AbstractSingleChainMC}
-
- @param timestep: Integration timestep
- @type timestep: float
-
- @param mass_matrix: Mass matrix
- @type mass_matrix: n-dimensional L{InvertibleMatrix} with n being the dimension
- of the configuration space, that is, the dimension of
- the position / momentum vectors
-
- @param traj_length: Trajectory length in number of timesteps
- @type traj_length: int
-
- @param gradient: Gradient which determines the dynamics during a trajectory
- @type gradient: subclass instance of L{AbstractGradient}
-
- @param temperature: Temperature interpolation function.
- @type temperature: Real-valued function mapping from [0,1] to R.
- T(0) = temperature of the ensemble sampler1 samples from, T(1) = temperature
- of the ensemble sampler2 samples from
-
- @param collision_probability: Probability for a collision with the heatbath during one timestep
- @type collision_probability: float
-
- @param collision_interval: Interval during which collision may occur with probability
- collision_probability
- @type collision_interval: int
- """
-
- def __init__(self, sampler1, sampler2, timestep, traj_length, gradient, mass_matrix=None,
- temperature=lambda l: 1., collision_probability=0.1, collision_interval=1):
-
- super(ThermostattedMDRENSSwapParameterInfo, self).__init__(sampler1, sampler2, timestep,
- traj_length, gradient,
- mass_matrix=mass_matrix)
-
- self._collision_probability = None
- self._collision_interval = None
- self._temperature = temperature
- self.collision_probability = collision_probability
- self.collision_interval = collision_interval
-
- @property
- def collision_probability(self):
- """
- Probability for a collision with the heatbath during one timestep.
- """
- return self._collision_probability
- @collision_probability.setter
- def collision_probability(self, value):
- self._collision_probability = float(value)
-
- @property
- def collision_interval(self):
- """
- Interval during which collision may occur with probability
- C{collision_probability}.
- """
- return self._collision_interval
- @collision_interval.setter
- def collision_interval(self, value):
- self._collision_interval = int(value)
-
- @property
- def temperature(self):
- return self._temperature
-
-class AbstractSwapCommunicator(object):
- """
- Holds all the information which needs to be communicated between
- distinct swap substeps.
-
- @param param_info: ParameterInfo instance holding swap parameters
- @type param_info: L{AbstractSwapParameterInfo}
-
- @param traj12: Forward trajectory
- @type traj12: L{Trajectory}
-
- @param traj21: Reverse trajectory
- @type traj21: L{Trajectory}
- """
-
- __metaclass__ = ABCMeta
-
- def __init__(self, param_info, traj12, traj21):
-
- self._sampler1 = param_info.sampler1
- self._sampler2 = param_info.sampler2
-
- self._traj12 = traj12
- self._traj21 = traj21
-
- self._param_info = param_info
-
- self._acceptance_probability = None
- self._accepted = False
-
- @property
- def sampler1(self):
- return self._sampler1
-
- @property
- def sampler2(self):
- return self._sampler2
-
- @property
- def traj12(self):
- return self._traj12
-
- @property
- def traj21(self):
- return self._traj21
-
- @property
- def acceptance_probability(self):
- return self._acceptance_probability
- @acceptance_probability.setter
- def acceptance_probability(self, value):
- self._acceptance_probability = value
-
- @property
- def accepted(self):
- return self._accepted
- @accepted.setter
- def accepted(self, value):
- self._accepted = value
-
- @property
- def param_info(self):
- return self._param_info
-
-class RESwapCommunicator(AbstractSwapCommunicator):
- """
- Holds all the information which needs to be communicated between distinct
- RE swap substeps.
-
- See L{AbstractSwapCommunicator} for constructor signature.
- """
- pass
-
-class RENSSwapCommunicator(AbstractSwapCommunicator):
- """
- Holds all the information which needs to be communicated between distinct
- RENS swap substeps.
-
- See L{AbstractSwapCommunicator} for constructor signature.
- """
-
- pass
-
-class SingleSwapStatistics(object):
- """
- Tracks swap statistics of a single sampler pair.
-
- @param param_info: ParameterInfo instance holding swap parameters
- @type param_info: L{AbstractSwapParameterInfo}
- """
-
- def __init__(self, param_info):
- self._total_swaps = 0
- self._accepted_swaps = 0
-
- @property
- def total_swaps(self):
- return self._total_swaps
-
- @property
- def accepted_swaps(self):
- return self._accepted_swaps
-
- @property
- def acceptance_rate(self):
- """
- Acceptance rate of the sampler pair.
- """
- if self.total_swaps > 0:
- return float(self.accepted_swaps) / float(self.total_swaps)
- else:
- return 0.
-
- def update(self, accepted):
- """
- Updates swap statistics.
- """
- self._total_swaps += 1
- self._accepted_swaps += int(accepted)
-
-class SwapStatistics(object):
- """
- Tracks swap statistics for an AbstractExchangeMC subclass instance.
-
- @param param_infos: list of ParameterInfo instances providing information
- needed for performing swaps
- @type param_infos: list of L{AbstractSwapParameterInfo}
- """
-
- def __init__(self, param_infos):
- self._stats = [SingleSwapStatistics(x) for x in param_infos]
-
- @property
- def stats(self):
- return tuple(self._stats)
-
- @property
- def acceptance_rates(self):
- """
- Returns acceptance rates for all swaps.
- """
- return [x.acceptance_rate for x in self._stats]
-
-class AbstractExchangeMC(AbstractEnsembleMC):
- """
- Abstract class for Monte Carlo sampling algorithms employing some replica exchange method.
+ super(MCCollection, self).__init__(items, type=AbstractSingleChainMC)
- @param samplers: samplers which sample from their respective equilibrium distributions
- @type samplers: list of L{AbstractSingleChainMC}
- @param param_infos: list of ParameterInfo instances providing information needed
- for performing swaps
- @type param_infos: list of L{AbstractSwapParameterInfo}
+def augment_state(state, temperature=1.0, mass_matrix=None):
"""
+ Augments a state with only positions given by momenta drawn
+ from the Maxwell-Boltzmann distribution.
- __metaclass__ = ABCMeta
-
- def __init__(self, samplers, param_infos):
- super(AbstractExchangeMC, self).__init__(samplers)
-
- self._swaplist1 = []
- self._swaplist2 = []
- self._currentswaplist = self._swaplist1
-
- self._param_infos = param_infos
- self._statistics = SwapStatistics(self._param_infos)
-
- def _checkstate(self, state):
- if not isinstance(state, EnsembleState):
- raise TypeError(state)
-
- def swap(self, index):
- """
- Perform swap between sampler pair described by param_infos[index]
- and return outcome (true = accepted, false = rejected).
-
- @param index: index of swap pair in param_infos
- @type index: int
-
- @rtype: boolean
- """
- param_info = self._param_infos[index]
- swapcom = self._propose_swap(param_info)
- swapcom = self._calc_pacc_swap(swapcom)
- result = self._accept_swap(swapcom)
-
- self.state = EnsembleState([x.state for x in self._samplers])
-
- self.statistics.stats[index].update(result)
-
- return result
-
- @abstractmethod
- def _propose_swap(self, param_info):
- """
- Calculate proposal states for a swap between two samplers.
-
- @param param_info: ParameterInfo instance holding swap parameters
- @type param_info: L{AbstractSwapParameterInfo}
-
- @rtype: L{AbstractSwapCommunicator}
- """
- pass
-
- @abstractmethod
- def _calc_pacc_swap(self, swapcom):
- """
- Calculate probability to accept a swap given initial and proposal states.
-
- @param swapcom: SwapCommunicator instance holding information to be communicated
- between distinct swap substeps
- @type swapcom: L{AbstractSwapCommunicator}
-
- @rtype: L{AbstractSwapCommunicator}
- """
- pass
-
- def _accept_swap(self, swapcom):
- """
- Accept / reject an exchange between two samplers given proposal states and
- the acceptance probability and returns the outcome (true = accepted, false = rejected).
-
- @param swapcom: SwapCommunicator instance holding information to be communicated
- between distinct swap substeps
- @type swapcom: L{AbstractSwapCommunicator}
-
- @rtype: boolean
- """
-
- if numpy.random.random() < swapcom.acceptance_probability:
- swapcom.sampler1.state = swapcom.traj21.final
- swapcom.sampler2.state = swapcom.traj12.final
- return True
- else:
- return False
-
- @property
- def acceptance_rates(self):
- """
- Return swap acceptance rates.
-
- @rtype: list of floats
- """
- return self.statistics.acceptance_rates
-
- @property
- def param_infos(self):
- """
- List of SwapParameterInfo instances holding all necessary parameters.
+ @param state: State to be augmented
+ @type state: L{State}
- @rtype: list of L{AbstractSwapParameterInfo}
- """
- return self._param_infos
-
- @property
- def statistics(self):
- return self._statistics
+ @param temperature: Temperature of the desired Maxwell-Boltzmann
+ distribution
+ @type temperature: float
- def _update_statistics(self, index, accepted):
- """
- Update statistics of a given swap process.
-
- @param index: position of swap statistics to be updated
- @type index: int
-
- @param accepted: outcome of the swap
- @type accepted: boolean
- """
-
- self._stats[index][0] += 1
- self._stats[index][1] += int(accepted)
+ @param mass_matrix: Mass matrix to be used in the Maxwell-Boltzmann
+ distribution; None defaults to a unity matrix
+ @type mass_matrix: L{InvertibleMatrix}
-class RENSTrajInfo(object):
+ @return: The initial state augmented with momenta
+ @rtype: L{State}
"""
- Holds information necessary for calculating a RENS trajectory.
- @param param_info: ParameterInfo instance holding swap parameters
- @type param_info: L{AbstractSwapParameterInfo}
-
- @param init_state: state from which the trajectory is supposed to start
- @type init_state: L{State}
-
- @param protocol: Protocol to be used to generate nonequilibrium trajectories
- @type protocol: Real-valued function that maps [0, switching time] to [0, 1]
- """
+ d = len(state.position)
+ mm_unity = None
- def __init__(self, param_info, init_state, protocol):
-
- self._param_info = param_info
- self._protocol = protocol
- self._init_state = init_state
-
- @property
- def param_info(self):
- return self._param_info
-
- @property
- def protocol(self):
- return self._protocol
-
- @property
- def init_state(self):
- return self._init_state
-
-class AbstractRENS(AbstractExchangeMC):
- """
- Abstract Replica Exchange with Nonequilibrium Switches
- (RENS, Ballard & Jarzynski 2009) class.
- Subclasses implement various ways of generating trajectories
- (deterministic or stochastic).
- """
-
- __metaclass__ = ABCMeta
-
- def _propose_swap(self, param_info):
-
- T1 = param_info.sampler1.temperature
- T2 = param_info.sampler2.temperature
-
- momentum_covariance_matrix1 = T1 * param_info.mass_matrix
- momentum_covariance_matrix2 = T2 * param_info.mass_matrix
-
- d = len(param_info.sampler1.state.position)
-
- if param_info.mass_matrix.is_unity_multiple:
- momentum1 = numpy.random.normal(scale=numpy.sqrt(T1 * param_info.mass_matrix[0][0]),
- size=d)
- momentum2 = numpy.random.normal(scale=numpy.sqrt(T2 * param_info.mass_matrix[0][0]),
- size=d)
- else:
- momentum1 = numpy.random.multivariate_normal(mean=numpy.zeros(d),
- cov=momentum_covariance_matrix1)
- momentum2 = numpy.random.multivariate_normal(mean=numpy.zeros(d),
- cov=momentum_covariance_matrix2)
-
- init_state1 = State(param_info.sampler1.state.position, momentum1)
- init_state2 = State(param_info.sampler2.state.position, momentum2)
-
- param_info.sampler1.state = init_state1
- param_info.sampler2.state = init_state2
-
- trajinfo12 = RENSTrajInfo(param_info, init_state1, protocol=lambda t, tau: t / tau)
- trajinfo21 = RENSTrajInfo(param_info, init_state2, protocol=lambda t, tau: (tau - t) / tau)
-
- traj12 = self._run_traj_generator(trajinfo12)
- traj21 = self._run_traj_generator(trajinfo21)
-
- return RENSSwapCommunicator(param_info, traj12, traj21)
+ if mass_matrix is None:
+ mm_unity = True
- def _calc_pacc_swap(self, swapcom):
-
- T1 = swapcom.param_info.sampler1.temperature
- T2 = swapcom.param_info.sampler2.temperature
-
- heat12 = swapcom.traj12.heat
- heat21 = swapcom.traj21.heat
-
- proposal1 = swapcom.traj21.final
- proposal2 = swapcom.traj12.final
-
- state1 = swapcom.traj12.initial
- state2 = swapcom.traj21.initial
-
- if swapcom.param_info.mass_matrix.is_unity_multiple:
- inverse_mass_matrix = 1. / swapcom.param_info.mass_matrix[0][0]
- else:
- inverse_mass_matrix = swapcom.param_info.mass_matrix.inverse
-
- E1 = lambda x:-swapcom.sampler1._pdf.log_prob(x)
- E2 = lambda x:-swapcom.sampler2._pdf.log_prob(x)
- K = lambda x: 0.5 * numpy.dot(x.T, numpy.dot(inverse_mass_matrix, x))
-
- w12 = (K(proposal2.momentum) + E2(proposal2.position)) / T2 - \
- (K(state1.momentum) + E1(state1.position)) / T1 - heat12
- w21 = (K(proposal1.momentum) + E1(proposal1.position)) / T1 - \
- (K(state2.momentum) + E2(state2.position)) / T2 - heat21
-
- swapcom.acceptance_probability = csb.numeric.exp(-w12 - w21)
-
- return swapcom
-
- @abstractmethod
- def _run_traj_generator(self, traj_info):
- """
- Run the trajectory generator which generates a trajectory
- of a given length between the states of two samplers.
-
- @param traj_info: TrajectoryInfo instance holding information
- needed to generate a nonequilibrium trajectory
- @type traj_info: L{RENSTrajInfo}
+ if mm_unity == None:
+ mm_unity = mass_matrix.is_unity_multiple
- @rtype: L{Trajectory}
- """
- pass
-
-class AbstractSwapScheme(object):
- """
- Provides the interface for classes defining schemes according to which swaps in
- Replica Exchange-like simulations are performed.
-
- @param algorithm: Exchange algorithm that performs the swaps
- @type algorithm: L{AbstractExchangeMC}
- """
-
- __metaclass__ = ABCMeta
+ if mm_unity == True:
+ momentum = numpy.random.normal(scale=numpy.sqrt(temperature),
+ size=d)
+ else:
+ covariance_matrix = temperature * mass_matrix
+ momentum = numpy.random.multivariate_normal(mean=numpy.zeros(d),
+ cov=covariance_matrix)
- def __init__(self, algorithm):
+ state.momentum = momentum
- self._algorithm = algorithm
-
- @abstractmethod
- def swap_all(self):
- """
- Advises the Replica Exchange-like algorithm to perform swaps according to
- the some schedule defined here.
- """
-
- pass
-
-class AlternatingAdjacentSwapScheme(AbstractSwapScheme):
- """
- Provides a swapping scheme in which tries exchanges between neighbours only
- following the scheme 1 <-> 2, 3 <-> 4, ... and after a sampling period 2 <-> 3, 4 <-> 5, ...
-
- @param algorithm: Exchange algorithm that performs the swaps
- @type algorithm: L{AbstractExchangeMC}
- """
-
- def __init__(self, algorithm):
-
- super(AlternatingAdjacentSwapScheme, self).__init__(algorithm)
-
- self._current_swap_list = None
- self._swap_list1 = []
- self._swap_list2 = []
- self._create_swap_lists()
-
- def _create_swap_lists(self):
-
- if len(self._algorithm.param_infos) == 1:
- self._swap_list1.append(0)
- self._swap_list2.append(0)
- else:
- i = 0
- while i < len(self._algorithm.param_infos):
- self._swap_list1.append(i)
- i += 2
-
- i = 1
- while i < len(self._algorithm.param_infos):
- self._swap_list2.append(i)
- i += 2
-
- self._current_swap_list = self._swap_list1
-
- def swap_all(self):
-
- for x in self._current_swap_list:
- self._algorithm.swap(x)
-
- if self._current_swap_list == self._swap_list1:
- self._current_swap_list = self._swap_list2
- else:
- self._current_swap_list = self._swap_list1
+ return state
diff --git a/csb/statistics/samplers/mc/multichain.py b/csb/statistics/samplers/mc/multichain.py
index 0333102..8e7f5fb 100644
--- a/csb/statistics/samplers/mc/multichain.py
+++ b/csb/statistics/samplers/mc/multichain.py
@@ -12,8 +12,8 @@ being generated by the HMC algorithm:
>>> from csb.io.plots import Chart
>>> from csb.statistics.pdf import Normal
>>> from csb.statistics.samplers import State
- >>> from csb.statistics.samplers.mc import ThermostattedMDRENSSwapParameterInfo, AlternatingAdjacentSwapScheme
- >>> from csb.statistics.samplers.mc.multichain import ThermostattedMDRENS
+ >>> from csb.statistics.samplers.mc.multichain import ThermostattedMDRENSSwapParameterInfo
+ >>> from csb.statistics.samplers.mc.multichain import ThermostattedMDRENS, AlternatingAdjacentSwapScheme
>>> from csb.statistics.samplers.mc.singlechain import HMCSampler
>>> # Pick some initial state for the different Markov chains:
@@ -97,14 +97,1265 @@ L{AbstractSwapScheme.swap_all} method may be called for example after sampling
intervals of a fixed length or randomly.
"""
+import numpy
+
import csb.numeric
-from csb.statistics.samplers.mc import AbstractExchangeMC, AbstractRENS, RESwapCommunicator
-from csb.statistics.samplers.mc.propagators import MDPropagator, ThermostattedMDPropagator
-from csb.statistics.samplers.mc import Trajectory
-from csb.numeric.integrators import AbstractGradient
+from abc import ABCMeta, abstractmethod
+
+from csb.statistics.samplers import EnsembleState
+from csb.statistics.samplers.mc import AbstractMC, Trajectory, MCCollection, augment_state
+from csb.statistics.samplers.mc.propagators import MDPropagator, ThermostattedMDPropagator
+from csb.statistics.samplers.mc.neqsteppropagator import NonequilibriumStepPropagator
+from csb.statistics.samplers.mc.neqsteppropagator import Protocol, Step, ReducedHamiltonian
+from csb.statistics.samplers.mc.neqsteppropagator import ReducedHamiltonianPerturbation
+from csb.statistics.samplers.mc.neqsteppropagator import HMCPropagation, HMCPropagationParam
+from csb.statistics.samplers.mc.neqsteppropagator import HamiltonianSysInfo, NonequilibriumTrajectory
+from csb.numeric.integrators import AbstractGradient, FastLeapFrog
+
+
+class AbstractEnsembleMC(AbstractMC):
+ """
+ Abstract class for Monte Carlo sampling algorithms simulating several ensembles.
+
+ @param samplers: samplers which sample from their respective equilibrium distributions
+ @type samplers: list of L{AbstractSingleChainMC}
+ """
+
+ __metaclass__ = ABCMeta
+
+ def __init__(self, samplers):
+
+ self._samplers = MCCollection(samplers)
+ state = EnsembleState([x.state for x in self._samplers])
+
+ super(AbstractEnsembleMC, self).__init__(state)
+
+ def sample(self):
+ """
+ Draw an ensemble sample.
+
+ @rtype: L{EnsembleState}
+ """
+
+ sample = EnsembleState([sampler.sample() for sampler in self._samplers])
+ self.state = sample
+
+ return sample
+
+ @property
+ def energy(self):
+ """
+ Total ensemble energy.
+ """
+ return sum([x.energy for x in self._samplers])
+
+
+class AbstractExchangeMC(AbstractEnsembleMC):
+ """
+ Abstract class for Monte Carlo sampling algorithms employing some replica exchange method.
+
+ @param samplers: samplers which sample from their respective equilibrium distributions
+ @type samplers: list of L{AbstractSingleChainMC}
+
+ @param param_infos: list of ParameterInfo instances providing information needed
+ for performing swaps
+ @type param_infos: list of L{AbstractSwapParameterInfo}
+ """
+
+ __metaclass__ = ABCMeta
+
+ def __init__(self, samplers, param_infos):
+ super(AbstractExchangeMC, self).__init__(samplers)
+
+ self._swaplist1 = []
+ self._swaplist2 = []
+ self._currentswaplist = self._swaplist1
+
+ self._param_infos = param_infos
+ self._statistics = SwapStatistics(self._param_infos)
+
+ def _checkstate(self, state):
+ if not isinstance(state, EnsembleState):
+ raise TypeError(state)
+
+ def swap(self, index):
+ """
+ Perform swap between sampler pair described by param_infos[index]
+ and return outcome (true = accepted, false = rejected).
+
+ @param index: index of swap pair in param_infos
+ @type index: int
+
+ @rtype: boolean
+ """
+ param_info = self._param_infos[index]
+ swapcom = self._propose_swap(param_info)
+ swapcom = self._calc_pacc_swap(swapcom)
+ result = self._accept_swap(swapcom)
+
+ self.state = EnsembleState([x.state for x in self._samplers])
+
+ self.statistics.stats[index].update(result)
+
+ return result
+
+ @abstractmethod
+ def _propose_swap(self, param_info):
+ """
+ Calculate proposal states for a swap between two samplers.
+
+ @param param_info: ParameterInfo instance holding swap parameters
+ @type param_info: L{AbstractSwapParameterInfo}
+
+ @rtype: L{AbstractSwapCommunicator}
+ """
+ pass
+
+ @abstractmethod
+ def _calc_pacc_swap(self, swapcom):
+ """
+ Calculate probability to accept a swap given initial and proposal states.
+
+ @param swapcom: SwapCommunicator instance holding information to be communicated
+ between distinct swap substeps
+ @type swapcom: L{AbstractSwapCommunicator}
+
+ @rtype: L{AbstractSwapCommunicator}
+ """
+ pass
+
+ def _accept_swap(self, swapcom):
+ """
+ Accept / reject an exchange between two samplers given proposal states and
+ the acceptance probability and returns the outcome (true = accepted, false = rejected).
+
+ @param swapcom: SwapCommunicator instance holding information to be communicated
+ between distinct swap substeps
+ @type swapcom: L{AbstractSwapCommunicator}
+
+ @rtype: boolean
+ """
+
+ if numpy.random.random() < swapcom.acceptance_probability:
+ if swapcom.sampler1.state.momentum is None and swapcom.sampler2.state.momentum is None:
+ swapcom.traj12.final.momentum = None
+ swapcom.traj21.final.momentum = None
+ swapcom.sampler1.state = swapcom.traj21.final
+ swapcom.sampler2.state = swapcom.traj12.final
+ return True
+ else:
+ return False
+
+ @property
+ def acceptance_rates(self):
+ """
+ Return swap acceptance rates.
+
+ @rtype: list of floats
+ """
+ return self.statistics.acceptance_rates
+
+ @property
+ def param_infos(self):
+ """
+ List of SwapParameterInfo instances holding all necessary parameters.
+
+ @rtype: list of L{AbstractSwapParameterInfo}
+ """
+ return self._param_infos
+
+ @property
+ def statistics(self):
+ return self._statistics
+
+ def _update_statistics(self, index, accepted):
+ """
+ Update statistics of a given swap process.
+
+ @param index: position of swap statistics to be updated
+ @type index: int
+
+ @param accepted: outcome of the swap
+ @type accepted: boolean
+ """
+
+ self._stats[index][0] += 1
+ self._stats[index][1] += int(accepted)
+
+
+class AbstractSwapParameterInfo(object):
+ """
+ Subclass instances hold all parameters necessary for performing a swap
+ between two given samplers.
+ """
+
+ __metaclass__ = ABCMeta
+
+ def __init__(self, sampler1, sampler2):
+ """
+ @param sampler1: First sampler
+ @type sampler1: L{AbstractSingleChainMC}
+
+ @param sampler2: Second sampler
+ @type sampler2: L{AbstractSingleChainMC}
+ """
+
+ self._sampler1 = sampler1
+ self._sampler2 = sampler2
+
+ @property
+ def sampler1(self):
+ return self._sampler1
+
+ @property
+ def sampler2(self):
+ return self._sampler2
+
+
+class AbstractSwapCommunicator(object):
+ """
+ Holds all the information which needs to be communicated between
+ distinct swap substeps.
+
+ @param param_info: ParameterInfo instance holding swap parameters
+ @type param_info: L{AbstractSwapParameterInfo}
+
+ @param traj12: Forward trajectory
+ @type traj12: L{Trajectory}
+
+ @param traj21: Reverse trajectory
+ @type traj21: L{Trajectory}
+ """
+
+ __metaclass__ = ABCMeta
+
+ def __init__(self, param_info, traj12, traj21):
+
+ self._sampler1 = param_info.sampler1
+ self._sampler2 = param_info.sampler2
+
+ self._traj12 = traj12
+ self._traj21 = traj21
+
+ self._param_info = param_info
+
+ self._acceptance_probability = None
+ self._accepted = False
+
+ @property
+ def sampler1(self):
+ return self._sampler1
+
+ @property
+ def sampler2(self):
+ return self._sampler2
+
+ @property
+ def traj12(self):
+ return self._traj12
+
+ @property
+ def traj21(self):
+ return self._traj21
+
+ @property
+ def acceptance_probability(self):
+ return self._acceptance_probability
+ @acceptance_probability.setter
+ def acceptance_probability(self, value):
+ self._acceptance_probability = value
+
+ @property
+ def accepted(self):
+ return self._accepted
+ @accepted.setter
+ def accepted(self, value):
+ self._accepted = value
+
+ @property
+ def param_info(self):
+ return self._param_info
+
+
+class ReplicaExchangeMC(AbstractExchangeMC):
+ """
+ Replica Exchange (RE, Swendsen & Yang 1986) implementation.
+ """
+
+ def _propose_swap(self, param_info):
+
+ return RESwapCommunicator(param_info, Trajectory([param_info.sampler1.state,
+ param_info.sampler1.state]),
+ Trajectory([param_info.sampler2.state,
+ param_info.sampler2.state]))
+
+ def _calc_pacc_swap(self, swapcom):
+
+ E1 = lambda x:-swapcom.sampler1._pdf.log_prob(x)
+ E2 = lambda x:-swapcom.sampler2._pdf.log_prob(x)
+
+ T1 = swapcom.sampler1.temperature
+ T2 = swapcom.sampler2.temperature
+
+ state1 = swapcom.traj12.initial
+ state2 = swapcom.traj21.initial
+
+ proposal1 = swapcom.traj21.final
+ proposal2 = swapcom.traj12.final
+
+ swapcom.acceptance_probability = csb.numeric.exp(-E1(proposal1.position) / T1
+ + E1(state1.position) / T1
+ - E2(proposal2.position) / T2
+ + E2(state2.position) / T2)
+
+ return swapcom
+
+
+class RESwapParameterInfo(AbstractSwapParameterInfo):
+ """
+ Holds parameters for a standard Replica Exchange swap.
+ """
+ pass
+
+
+class RESwapCommunicator(AbstractSwapCommunicator):
+ """
+ Holds all the information which needs to be communicated between distinct
+ RE swap substeps.
+
+ See L{AbstractSwapCommunicator} for constructor signature.
+ """
+ pass
+
+
+class AbstractRENS(AbstractExchangeMC):
+ """
+ Abstract Replica Exchange with Nonequilibrium Switches
+ (RENS, Ballard & Jarzynski 2009) class.
+ Subclasses implement various ways of generating trajectories
+ (deterministic or stochastic).
+ """
+
+ __metaclass__ = ABCMeta
+
+ def _propose_swap(self, param_info):
+
+ init_state1 = param_info.sampler1.state
+ init_state2 = param_info.sampler2.state
+
+ trajinfo12 = RENSTrajInfo(param_info, init_state1, direction="fw")
+ trajinfo21 = RENSTrajInfo(param_info, init_state2, direction="bw")
+
+ traj12 = self._run_traj_generator(trajinfo12)
+ traj21 = self._run_traj_generator(trajinfo21)
+
+ return RENSSwapCommunicator(param_info, traj12, traj21)
+
+ def _setup_protocol(self, traj_info):
+ """
+ Sets the protocol lambda(t) to either the forward or the reverse protocol.
+
+ @param traj_info: TrajectoryInfo object holding information neccessary to
+ calculate the rens trajectories.
+ @type traj_info: L{RENSTrajInfo}
+ """
+
+ if traj_info.direction == "fw":
+ return traj_info.param_info.protocol
+ else:
+ return lambda t, tau: traj_info.param_info.protocol(tau - t, tau)
+
+ return protocol
+
+ def _get_init_temperature(self, traj_info):
+ """
+ Determine the initial temperature of a RENS trajectory.
+
+ @param traj_info: TrajectoryInfo object holding information neccessary to
+ calculate the RENS trajectory.
+ @type traj_info: L{RENSTrajInfo}
+ """
+
+ if traj_info.direction == "fw":
+ return traj_info.param_info.sampler1.temperature
+ else:
+ return traj_info.param_info.sampler2.temperature
+
+ @abstractmethod
+ def _calc_works(self, swapcom):
+ """
+ Calculates the works expended during the nonequilibrium
+ trajectories.
+
+ @param swapcom: Swap communicator object holding all the
+ neccessary information.
+ @type swapcom: L{RENSSwapCommunicator}
+
+ @return: The expended during the forward and the backward
+ trajectory.
+ @rtype: 2-tuple of floats
+ """
+
+ pass
+
+ def _calc_pacc_swap(self, swapcom):
+
+ work12, work21 = self._calc_works(swapcom)
+ swapcom.acceptance_probability = csb.numeric.exp(-work12 - work21)
+
+ return swapcom
+
+ @abstractmethod
+ def _propagator_factory(self, traj_info):
+ """
+ Factory method which produces the propagator object used to calculate
+ the RENS trajectories.
+
+ @param traj_info: TrajectoryInfo object holding information neccessary to
+ calculate the rens trajectories.
+ @type traj_info: L{RENSTrajInfo}
+ @rtype: L{AbstractPropagator}
+ """
+ pass
+
+ def _run_traj_generator(self, traj_info):
+ """
+ Run the trajectory generator which generates a trajectory
+ of a given length between the states of two samplers.
+
+ @param traj_info: TrajectoryInfo instance holding information
+ needed to generate a nonequilibrium trajectory
+ @type traj_info: L{RENSTrajInfo}
+
+ @rtype: L{Trajectory}
+ """
+
+ init_temperature = self._get_init_temperature(traj_info)
+
+ init_state = traj_info.init_state.clone()
+
+ if init_state.momentum is None:
+ init_state = augment_state(init_state,
+ init_temperature,
+ traj_info.param_info.mass_matrix)
+
+ gen = self._propagator_factory(traj_info)
+
+ traj = gen.generate(init_state, int(traj_info.param_info.traj_length))
+
+ return traj
+
+
+class AbstractRENSSwapParameterInfo(RESwapParameterInfo):
+ """
+ Holds parameters for a RENS swap.
+ """
+
+ __metaclass__ = ABCMeta
+
+ def __init__(self, sampler1, sampler2, protocol):
+
+ super(AbstractRENSSwapParameterInfo, self).__init__(sampler1, sampler2)
+
+ ## Can't pass the linear protocol as a default argument because of a reported bug
+ ## in epydoc parsing which makes it fail building the docs.
+ self._protocol = None
+ if protocol is None:
+ self._protocol = lambda t, tau: t / tau
+ else:
+ self._protocol = protocol
+
+ @property
+ def protocol(self):
+ """
+ Switching protocol determining the time dependence
+ of the switching parameter.
+ """
+ return self._protocol
+ @protocol.setter
+ def protocol(self, value):
+ self._protocol = value
+
+
+class RENSSwapCommunicator(AbstractSwapCommunicator):
+ """
+ Holds all the information which needs to be communicated between distinct
+ RENS swap substeps.
+
+ See L{AbstractSwapCommunicator} for constructor signature.
+ """
+
+ pass
+
+
+class RENSTrajInfo(object):
+ """
+ Holds information necessary for calculating a RENS trajectory.
+
+ @param param_info: ParameterInfo instance holding swap parameters
+ @type param_info: L{AbstractSwapParameterInfo}
+
+ @param init_state: state from which the trajectory is supposed to start
+ @type init_state: L{State}
+
+ @param direction: Either "fw" or "bw", indicating a forward or backward
+ trajectory. This is neccessary to pick the protocol or
+ the reversed protocol, respectively.
+ @type direction: string, either "fw" or "bw"
+ """
+
+ def __init__(self, param_info, init_state, direction):
+
+ self._param_info = param_info
+ self._init_state = init_state
+ self._direction = direction
+
+ @property
+ def param_info(self):
+ return self._param_info
+
+ @property
+ def init_state(self):
+ return self._init_state
+
+ @property
+ def direction(self):
+ return self._direction
+
+
+class MDRENS(AbstractRENS):
+ """
+ Replica Exchange with Nonequilibrium Switches (RENS, Ballard & Jarzynski 2009)
+ with Molecular Dynamics (MD) trajectories.
+
+ @param samplers: Samplers which sample their
+ respective equilibrium distributions
+ @type samplers: list of L{AbstractSingleChainMC}
+
+ @param param_infos: ParameterInfo instance holding
+ information required to perform a MDRENS swap
+ @type param_infos: list of L{MDRENSSwapParameterInfo}
+
+ @param integrator: Subclass of L{AbstractIntegrator} to be used to
+ calculate the non-equilibrium trajectories
+ @type integrator: type
+ """
+
+ def __init__(self, samplers, param_infos,
+ integrator=csb.numeric.integrators.FastLeapFrog):
+
+ super(MDRENS, self).__init__(samplers, param_infos)
+
+ self._integrator = integrator
+
+ def _propagator_factory(self, traj_info):
+
+ protocol = self._setup_protocol(traj_info)
+ tau = traj_info.param_info.traj_length * traj_info.param_info.timestep
+ factory = InterpolationFactory(protocol, tau)
+ gen = MDPropagator(factory.build_gradient(traj_info.param_info.gradient),
+ traj_info.param_info.timestep,
+ mass_matrix=traj_info.param_info.mass_matrix,
+ integrator=self._integrator)
+
+ return gen
+
+ def _calc_works(self, swapcom):
+
+ T1 = swapcom.param_info.sampler1.temperature
+ T2 = swapcom.param_info.sampler2.temperature
+
+ heat12 = swapcom.traj12.heat
+ heat21 = swapcom.traj21.heat
+
+ proposal1 = swapcom.traj21.final
+ proposal2 = swapcom.traj12.final
+
+ state1 = swapcom.traj12.initial
+ state2 = swapcom.traj21.initial
+
+ if swapcom.param_info.mass_matrix.is_unity_multiple:
+ inverse_mass_matrix = 1.0 / swapcom.param_info.mass_matrix[0][0]
+ else:
+ inverse_mass_matrix = swapcom.param_info.mass_matrix.inverse
+
+ E1 = lambda x:-swapcom.sampler1._pdf.log_prob(x)
+ E2 = lambda x:-swapcom.sampler2._pdf.log_prob(x)
+ K = lambda x: 0.5 * numpy.dot(x.T, numpy.dot(inverse_mass_matrix, x))
+
+ w12 = (K(proposal2.momentum) + E2(proposal2.position)) / T2 - \
+ (K(state1.momentum) + E1(state1.position)) / T1 - heat12
+ w21 = (K(proposal1.momentum) + E1(proposal1.position)) / T1 - \
+ (K(state2.momentum) + E2(state2.position)) / T2 - heat21
+
+ return w12, w21
+
+
+class MDRENSSwapParameterInfo(RESwapParameterInfo):
+ """
+ Holds parameters for a MDRENS swap.
+
+ @param sampler1: First sampler
+ @type sampler1: L{AbstractSingleChainMC}
+
+ @param sampler2: Second sampler
+ @type sampler2: L{AbstractSingleChainMC}
+
+ @param timestep: Integration timestep
+ @type timestep: float
+
+ @param traj_length: Trajectory length in number of timesteps
+ @type traj_length: int
+
+ @param gradient: Gradient which determines the dynamics during a trajectory
+ @type gradient: L{AbstractGradient}
+
+ @param protocol: Switching protocol determining the time dependence of the
+ switching parameter. It is a function M{f} taking the running
+ time t and the switching time tau to yield a value in M{[0, 1]}
+ with M{f(0, tau) = 0} and M{f(tau, tau) = 1}. Default is a linear
+ protocol, which is being set manually due to an epydoc bug
+ @type protocol: callable
+
+ @param mass_matrix: Mass matrix
+ @type mass_matrix: n-dimensional matrix of type L{InvertibleMatrix} with n being the dimension
+ of the configuration space, that is, the dimension of
+ the position / momentum vectors
+ """
+
+ def __init__(self, sampler1, sampler2, timestep, traj_length, gradient,
+ protocol=None, mass_matrix=None):
+
+ super(MDRENSSwapParameterInfo, self).__init__(sampler1, sampler2)
+
+ self._mass_matrix = mass_matrix
+ if self.mass_matrix is None:
+ d = len(sampler1.state.position)
+ self.mass_matrix = csb.numeric.InvertibleMatrix(numpy.eye(d), numpy.eye(d))
+
+ self._traj_length = traj_length
+ self._gradient = gradient
+ self._timestep = timestep
+
+ ## Can't pass the linear protocol as a default argument because of a reported bug
+ ## in epydoc parsing which makes it fail building the docs.
+ self._protocol = None
+ if protocol is None:
+ self._protocol = lambda t, tau: t / tau
+ else:
+ self._protocol = protocol
+
+ @property
+ def timestep(self):
+ """
+ Integration timestep.
+ """
+ return self._timestep
+ @timestep.setter
+ def timestep(self, value):
+ self._timestep = float(value)
+
+ @property
+ def traj_length(self):
+ """
+ Trajectory length in number of integration steps.
+ """
+ return self._traj_length
+ @traj_length.setter
+ def traj_length(self, value):
+ self._traj_length = int(value)
+
+ @property
+ def gradient(self):
+ """
+ Gradient which governs the equations of motion.
+ """
+ return self._gradient
+
+ @property
+ def mass_matrix(self):
+ return self._mass_matrix
+ @mass_matrix.setter
+ def mass_matrix(self, value):
+ self._mass_matrix = value
+
+ @property
+ def protocol(self):
+ """
+ Switching protocol determining the time dependence
+ of the switching parameter.
+ """
+ return self._protocol
+ @protocol.setter
+ def protocol(self, value):
+ self._protocol = value
+
+
+class ThermostattedMDRENS(MDRENS):
+ """
+ Replica Exchange with Nonequilibrium Switches (RENS, Ballard & Jarzynski, 2009)
+ with Andersen-thermostatted Molecular Dynamics (MD) trajectories.
+
+ @param samplers: Samplers which sample their
+ respective equilibrium distributions
+ @type samplers: list of L{AbstractSingleChainMC}
+
+ @param param_infos: ParameterInfo instance holding
+ information required to perform a MDRENS swap
+ @type param_infos: list of L{ThermostattedMDRENSSwapParameterInfo}
+
+ @param integrator: Subclass of L{AbstractIntegrator} to be used to
+ calculate the non-equilibrium trajectories
+ @type integrator: type
+ """
+
+ def __init__(self, samplers, param_infos, integrator=csb.numeric.integrators.LeapFrog):
+
+ super(ThermostattedMDRENS, self).__init__(samplers, param_infos, integrator)
+
+ def _propagator_factory(self, traj_info):
+
+ protocol = self._setup_protocol(traj_info)
+ tau = traj_info.param_info.traj_length * traj_info.param_info.timestep
+ factory = InterpolationFactory(protocol, tau)
+
+ grad = factory.build_gradient(traj_info.param_info.gradient)
+ temp = factory.build_temperature(traj_info.param_info.temperature)
+
+ gen = ThermostattedMDPropagator(grad,
+ traj_info.param_info.timestep, temperature=temp,
+ collision_probability=traj_info.param_info.collision_probability,
+ update_interval=traj_info.param_info.collision_interval,
+ mass_matrix=traj_info.param_info.mass_matrix,
+ integrator=self._integrator)
+
+ return gen
+
+class ThermostattedMDRENSSwapParameterInfo(MDRENSSwapParameterInfo):
+ """
+ @param sampler1: First sampler
+ @type sampler1: subclass instance of L{AbstractSingleChainMC}
+
+ @param sampler2: Second sampler
+ @type sampler2: subclass instance of L{AbstractSingleChainMC}
+
+ @param timestep: Integration timestep
+ @type timestep: float
+
+ @param traj_length: Trajectory length in number of timesteps
+ @type traj_length: int
+
+ @param gradient: Gradient which determines the dynamics during a trajectory
+ @type gradient: subclass instance of L{AbstractGradient}
+
+ @param mass_matrix: Mass matrix
+ @type mass_matrix: n-dimensional L{InvertibleMatrix} with n being the dimension
+ of the configuration space, that is, the dimension of
+ the position / momentum vectors
+
+ @param protocol: Switching protocol determining the time dependence of the
+ switching parameter. It is a function f taking the running
+ time t and the switching time tau to yield a value in [0, 1]
+ with f(0, tau) = 0 and f(tau, tau) = 1
+ @type protocol: callable
+
+ @param temperature: Temperature interpolation function.
+ @type temperature: Real-valued function mapping from [0,1] to R.
+ T(0) = temperature of the ensemble sampler1 samples from, T(1) = temperature
+ of the ensemble sampler2 samples from
+
+ @param collision_probability: Probability for a collision with the heatbath during one timestep
+ @type collision_probability: float
+
+ @param collision_interval: Interval during which collision may occur with probability
+ collision_probability
+ @type collision_interval: int
+ """
+
+ def __init__(self, sampler1, sampler2, timestep, traj_length, gradient, mass_matrix=None,
+ protocol=None, temperature=lambda l: 1.0,
+ collision_probability=0.1, collision_interval=1):
+
+ super(ThermostattedMDRENSSwapParameterInfo, self).__init__(sampler1, sampler2, timestep,
+ traj_length, gradient,
+ mass_matrix=mass_matrix,
+ protocol=protocol)
+
+ self._collision_probability = None
+ self._collision_interval = None
+ self._temperature = temperature
+ self.collision_probability = collision_probability
+ self.collision_interval = collision_interval
+
+ @property
+ def collision_probability(self):
+ """
+ Probability for a collision with the heatbath during one timestep.
+ """
+ return self._collision_probability
+ @collision_probability.setter
+ def collision_probability(self, value):
+ self._collision_probability = float(value)
+
+ @property
+ def collision_interval(self):
+ """
+ Interval during which collision may occur with probability
+ C{collision_probability}.
+ """
+ return self._collision_interval
+ @collision_interval.setter
+ def collision_interval(self, value):
+ self._collision_interval = int(value)
+
+ @property
+ def temperature(self):
+ return self._temperature
+
+
+class AbstractStepRENS(AbstractRENS):
+ """
+ Replica Exchange with Nonequilibrium Switches (RENS, Ballard & Jarzynski 2009)
+ with stepwise trajectories as described in Nilmeier et al., "Nonequilibrium candidate
+ Monte Carlo is an efficient tool for equilibrium simulation", PNAS 2011.
+ The switching parameter dependence of the Hamiltonian is a linear interpolation
+ between the PDFs of the sampler objects,
+ M{H(S{lambda}) = H_2 * S{lambda} + (1 - S{lambda}) * H_1}.
+ The perturbation kernel is a thermodynamic perturbation and the propagation is subclass
+ responsibility.
+ Note that due to the linear interpolations between the two Hamiltonians, the
+ log-probability has to be evaluated four times per perturbation step which can be
+ costly. In this case it is advisable to define the intermediate log probabilities
+ in _run_traj_generator differently.
+
+ @param samplers: Samplers which sample their respective equilibrium distributions
+ @type samplers: list of L{AbstractSingleChainMC}
+
+ @param param_infos: ParameterInfo instances holding
+ information required to perform a HMCStepRENS swaps
+ @type param_infos: list of L{AbstractSwapParameterInfo}
+ """
+
+ __metaclass__ = ABCMeta
+
+ def __init__(self, samplers, param_infos):
+
+ super(AbstractStepRENS, self).__init__(samplers, param_infos)
+
+ self._evaluate_im_works = True
+
+ @abstractmethod
+ def _setup_propagations(self, im_sys_infos, param_info):
+ """
+ Set up the propagation steps using the information about the current system
+ setup and parameters from the SwapParameterInfo object.
+
+ @param im_sys_infos: Information about the intermediate system setups
+ @type im_sys_infos: List of L{AbstractSystemInfo}
+
+ @param param_info: SwapParameterInfo object containing parameters for the
+ propagations like timesteps, trajectory lengths etc.
+ @type param_info: L{AbstractSwapParameterInfo}
+ """
+
+ pass
+
+ @abstractmethod
+ def _add_gradients(self, im_sys_infos, param_info, t_prot):
+ """
+ If needed, set im_sys_infos.hamiltonian.gradient.
+
+ @param im_sys_infos: Information about the intermediate system setups
+ @type im_sys_infos: List of L{AbstractSystemInfo}
+
+ @param param_info: SwapParameterInfo object containing parameters for the
+ propagations like timesteps, trajectory lengths etc.
+ @type param_info: L{AbstractSwapParameterInfo}
+
+ @param t_prot: Switching protocol defining the time dependence of the switching
+ parameter.
+ @type t_prot: callable
+ """
+
+ pass
+
+ def _setup_stepwise_protocol(self, traj_info):
+ """
+ Sets up the stepwise protocol consisting of perturbation and relaxation steps.
+
+ @param traj_info: TrajectoryInfo instance holding information
+ needed to generate a nonequilibrium trajectory
+ @type traj_info: L{RENSTrajInfo}
+
+ @rtype: L{Protocol}
+ """
+
+ pdf1 = traj_info.param_info.sampler1._pdf
+ pdf2 = traj_info.param_info.sampler2._pdf
+ T1 = traj_info.param_info.sampler1.temperature
+ T2 = traj_info.param_info.sampler2.temperature
+ traj_length = traj_info.param_info.intermediate_steps
+ prot = self._setup_protocol(traj_info)
+ t_prot = lambda i: prot(float(i), float(traj_length))
+
+ im_log_probs = [lambda x, i=i: pdf2.log_prob(x) * t_prot(i) + \
+ (1 - t_prot(i)) * pdf1.log_prob(x)
+ for i in range(traj_length + 1)]
+
+ im_temperatures = [T2 * t_prot(i) + (1 - t_prot(i)) * T1
+ for i in range(traj_length + 1)]
+ im_reduced_hamiltonians = [ReducedHamiltonian(im_log_probs[i],
+ temperature=im_temperatures[i])
+ for i in range(traj_length + 1)]
+ im_sys_infos = [HamiltonianSysInfo(im_reduced_hamiltonians[i])
+ for i in range(traj_length + 1)]
+ perturbations = [ReducedHamiltonianPerturbation(im_sys_infos[i], im_sys_infos[i+1])
+ for i in range(traj_length)]
+ if self._evaluate_im_works == False:
+ for p in perturbations:
+ p.evaluate_work = False
+ im_sys_infos = self._add_gradients(im_sys_infos, traj_info.param_info, t_prot)
+ propagations = self._setup_propagations(im_sys_infos, traj_info.param_info)
+
+ steps = [Step(perturbations[i], propagations[i]) for i in range(traj_length)]
+
+ return Protocol(steps)
+
+ def _propagator_factory(self, traj_info):
+
+ protocol = self._setup_stepwise_protocol(traj_info)
+ gen = NonequilibriumStepPropagator(protocol)
+
+ return gen
+
+ def _run_traj_generator(self, traj_info):
+
+ init_temperature = self._get_init_temperature(traj_info)
+
+ gen = self._propagator_factory(traj_info)
+
+ traj = gen.generate(traj_info.init_state)
+
+ return NonequilibriumTrajectory([traj_info.init_state, traj.final], jacobian=1.0,
+ heat=traj.heat, work=traj.work, deltaH=traj.deltaH)
+
+
+class HMCStepRENS(AbstractStepRENS):
+ """
+ Replica Exchange with Nonequilibrium Switches (RENS, Ballard & Jarzynski 2009)
+ with stepwise trajectories as described in Nilmeier et al., "Nonequilibrium candidate
+ Monte Carlo is an efficient tool for equilibrium simulation", PNAS 2011.
+ The switching parameter dependence of the Hamiltonian is a linear interpolation
+ between the PDFs of the sampler objects,
+ M{H(S{lambda}) = H_2 * S{lambda} + (1 - S{lambda}) * H_1}.
+ The perturbation kernel is a thermodynamic perturbation and the propagation is done using HMC.
+
+ Note that due to the linear interpolations between the two Hamiltonians, the
+ log-probability and its gradient has to be evaluated four times per perturbation step which
+ can be costly. In this case it is advisable to define the intermediate log probabilities
+ in _run_traj_generator differently.
+
+ @param samplers: Samplers which sample their respective equilibrium distributions
+ @type samplers: list of L{AbstractSingleChainMC}
+
+ @param param_infos: ParameterInfo instances holding
+ information required to perform a HMCStepRENS swaps
+ @type param_infos: list of L{HMCStepRENSSwapParameterInfo}
+ """
+
+ def __init__(self, samplers, param_infos):
+
+ super(HMCStepRENS, self).__init__(samplers, param_infos)
+
+ def _add_gradients(self, im_sys_infos, param_info, t_prot):
+
+ im_gradients = [lambda x, t, i=i: param_info.gradient(x, t_prot(i))
+ for i in range(param_info.intermediate_steps + 1)]
+
+ for i, s in enumerate(im_sys_infos):
+ s.hamiltonian.gradient = im_gradients[i]
+
+ return im_sys_infos
+
+ def _setup_propagations(self, im_sys_infos, param_info):
+
+ propagation_params = [HMCPropagationParam(param_info.timestep,
+ param_info.hmc_traj_length,
+ im_sys_infos[i+1].hamiltonian.gradient,
+ param_info.hmc_iterations,
+ mass_matrix=param_info.mass_matrix,
+ integrator=param_info.integrator)
+ for i in range(param_info.intermediate_steps)]
+
+ propagations = [HMCPropagation(im_sys_infos[i+1], propagation_params[i], evaluate_heat=False)
+ for i in range(param_info.intermediate_steps)]
+
+ return propagations
+
+ def _calc_works(self, swapcom):
+
+ return swapcom.traj12.work, swapcom.traj21.work
+
+
+class HMCStepRENSSwapParameterInfo(AbstractRENSSwapParameterInfo):
+ """
+ Holds all required information for performing HMCStepRENS swaps.
+
+ @param sampler1: First sampler
+ @type sampler1: subclass instance of L{AbstractSingleChainMC}
+
+ @param sampler2: Second sampler
+ @type sampler2: subclass instance of L{AbstractSingleChainMC}
+
+ @param timestep: integration timestep
+ @type timestep: float
+
+ @param hmc_traj_length: HMC trajectory length
+ @type hmc_traj_length: int
+
+ @param hmc_iterations: number of HMC iterations in the propagation step
+ @type hmc_iterations: int
+
+ @param gradient: gradient governing the equations of motion, function of
+ position array and switching protocol
+ @type gradient: callable
+
+ @param intermediate_steps: number of steps in the protocol; this is a discrete version
+ of the switching time in "continuous" RENS implementations
+ @type intermediate_steps: int
+
+ @param protocol: Switching protocol determining the time dependence of the
+ switching parameter. It is a function f taking the running
+ time t and the switching time tau to yield a value in [0, 1]
+ with f(0, tau) = 0 and f(tau, tau) = 1
+ @type protocol: callable
+
+ @param mass_matrix: mass matrix for kinetic energy definition
+ @type mass_matrix: L{InvertibleMatrix}
+
+ @param integrator: Integration scheme to be utilized
+ @type integrator: l{AbstractIntegrator}
+ """
+
+ def __init__(self, sampler1, sampler2, timestep, hmc_traj_length, hmc_iterations,
+ gradient, intermediate_steps, parametrization=None, protocol=None,
+ mass_matrix=None, integrator=FastLeapFrog):
+
+ super(HMCStepRENSSwapParameterInfo, self).__init__(sampler1, sampler2, protocol)
+
+ self._mass_matrix = None
+ self.mass_matrix = mass_matrix
+ if self.mass_matrix is None:
+ d = len(sampler1.state.position)
+ self.mass_matrix = csb.numeric.InvertibleMatrix(numpy.eye(d), numpy.eye(d))
+
+ self._hmc_traj_length = None
+ self.hmc_traj_length = hmc_traj_length
+ self._gradient = None
+ self.gradient = gradient
+ self._timestep = None
+ self.timestep = timestep
+ self._hmc_iterations = None
+ self.hmc_iterations = hmc_iterations
+ self._intermediate_steps = None
+ self.intermediate_steps = intermediate_steps
+ self._integrator = None
+ self.integrator = integrator
+
+ @property
+ def timestep(self):
+ """
+ Integration timestep.
+ """
+ return self._timestep
+ @timestep.setter
+ def timestep(self, value):
+ self._timestep = float(value)
+
+ @property
+ def hmc_traj_length(self):
+ """
+ HMC trajectory length in number of integration steps.
+ """
+ return self._hmc_traj_length
+ @hmc_traj_length.setter
+ def hmc_traj_length(self, value):
+ self._hmc_traj_length = int(value)
+
+ @property
+ def gradient(self):
+ """
+ Gradient which governs the equations of motion.
+ """
+ return self._gradient
+ @gradient.setter
+ def gradient(self, value):
+ self._gradient = value
+
+ @property
+ def mass_matrix(self):
+ return self._mass_matrix
+ @mass_matrix.setter
+ def mass_matrix(self, value):
+ self._mass_matrix = value
+
+ @property
+ def hmc_iterations(self):
+ return self._hmc_iterations
+ @hmc_iterations.setter
+ def hmc_iterations(self, value):
+ self._hmc_iterations = value
+
+ @property
+ def intermediate_steps(self):
+ return self._intermediate_steps
+ @intermediate_steps.setter
+ def intermediate_steps(self, value):
+ self._intermediate_steps = value
+
+ @property
+ def integrator(self):
+ return self._integrator
+ @integrator.setter
+ def integrator(self, value):
+ self._integrator = value
+
+
+class AbstractSwapScheme(object):
+ """
+ Provides the interface for classes defining schemes according to which swaps in
+ Replica Exchange-like simulations are performed.
+
+ @param algorithm: Exchange algorithm that performs the swaps
+ @type algorithm: L{AbstractExchangeMC}
+ """
+
+ __metaclass__ = ABCMeta
+
+ def __init__(self, algorithm):
+
+ self._algorithm = algorithm
+
+ @abstractmethod
+ def swap_all(self):
+ """
+ Advises the Replica Exchange-like algorithm to perform swaps according to
+ the schedule defined here.
+ """
+
+ pass
+
+
+class AlternatingAdjacentSwapScheme(AbstractSwapScheme):
+ """
+ Provides a swapping scheme in which tries exchanges between neighbours only
+ following the scheme 1 <-> 2, 3 <-> 4, ... and after a sampling period 2 <-> 3, 4 <-> 5, ...
+
+ @param algorithm: Exchange algorithm that performs the swaps
+ @type algorithm: L{AbstractExchangeMC}
+ """
+
+ def __init__(self, algorithm):
+
+ super(AlternatingAdjacentSwapScheme, self).__init__(algorithm)
+
+ self._current_swap_list = None
+ self._swap_list1 = []
+ self._swap_list2 = []
+ self._create_swap_lists()
+
+ def _create_swap_lists(self):
+
+ if len(self._algorithm.param_infos) == 1:
+ self._swap_list1.append(0)
+ self._swap_list2.append(0)
+ else:
+ i = 0
+ while i < len(self._algorithm.param_infos):
+ self._swap_list1.append(i)
+ i += 2
+
+ i = 1
+ while i < len(self._algorithm.param_infos):
+ self._swap_list2.append(i)
+ i += 2
+
+ self._current_swap_list = self._swap_list1
+
+ def swap_all(self):
+
+ for x in self._current_swap_list:
+ self._algorithm.swap(x)
+
+ if self._current_swap_list == self._swap_list1:
+ self._current_swap_list = self._swap_list2
+ else:
+ self._current_swap_list = self._swap_list1
+class SingleSwapStatistics(object):
+ """
+ Tracks swap statistics of a single sampler pair.
+
+ @param param_info: ParameterInfo instance holding swap parameters
+ @type param_info: L{AbstractSwapParameterInfo}
+ """
+
+ def __init__(self, param_info):
+ self._total_swaps = 0
+ self._accepted_swaps = 0
+
+ @property
+ def total_swaps(self):
+ return self._total_swaps
+
+ @property
+ def accepted_swaps(self):
+ return self._accepted_swaps
+
+ @property
+ def acceptance_rate(self):
+ """
+ Acceptance rate of the sampler pair.
+ """
+ if self.total_swaps > 0:
+ return float(self.accepted_swaps) / float(self.total_swaps)
+ else:
+ return 0.
+
+ def update(self, accepted):
+ """
+ Updates swap statistics.
+ """
+ self._total_swaps += 1
+ self._accepted_swaps += int(accepted)
+
+
+class SwapStatistics(object):
+ """
+ Tracks swap statistics for an AbstractExchangeMC subclass instance.
+
+ @param param_infos: list of ParameterInfo instances providing information
+ needed for performing swaps
+ @type param_infos: list of L{AbstractSwapParameterInfo}
+ """
+
+ def __init__(self, param_infos):
+ self._stats = [SingleSwapStatistics(x) for x in param_infos]
+
+ @property
+ def stats(self):
+ return tuple(self._stats)
+
+ @property
+ def acceptance_rates(self):
+ """
+ Returns acceptance rates for all swaps.
+ """
+ return [x.acceptance_rate for x in self._stats]
+
+
class InterpolationFactory(object):
"""
Produces interpolations for functions changed during non-equilibrium
@@ -160,6 +1411,7 @@ class InterpolationFactory(object):
"""
return lambda t: temperature(self.protocol(t, self.tau))
+
class Gradient(AbstractGradient):
def __init__(self, gradient, protocol, tau):
@@ -171,115 +1423,169 @@ class Gradient(AbstractGradient):
def evaluate(self, q, t):
return self._gradient(q, self._protocol(t, self._tau))
-class ReplicaExchangeMC(AbstractExchangeMC):
- """
- Replica Exchange (RE, Swendsen & Yang 1986) implementation.
- """
-
- def _propose_swap(self, param_info):
-
- return RESwapCommunicator(param_info, Trajectory([param_info.sampler1.state,
- param_info.sampler1.state]),
- Trajectory([param_info.sampler2.state,
- param_info.sampler2.state]))
+
+class ReplicaHistory(object):
+ '''
+ Replica history object, works with both RE and RENS for
+ the AlternatingAdjacentSwapScheme.
+
+ @param samples: list holding ensemble states
+ @type samples: list
+
+ @param swap_interval: interval with which swaps were attempted, e.g.,
+ 5 means that every 5th regular MC step is replaced
+ by a swap
+ @type swap_interval: int
+
+ @param first_swap: sample index of the first sample generated by a swap attempt.
+ If None, the first RE sampled is assumed to have sample index
+ swap_interval. If specified, it has to be greater than zero
+ @type first_swap: int
+ '''
- def _calc_pacc_swap(self, swapcom):
-
- E1 = lambda x:-swapcom.sampler1._pdf.log_prob(x)
- E2 = lambda x:-swapcom.sampler2._pdf.log_prob(x)
-
- T1 = swapcom.sampler1.temperature
- T2 = swapcom.sampler2.temperature
+ def __init__(self, samples, swap_interval, first_swap=None):
+ self.samples = samples
+ self.swap_interval = swap_interval
+ if first_swap == None:
+ self.first_swap = swap_interval - 1
+ elif first_swap > 0:
+ self.first_swap = first_swap - 1
+ else:
+ raise(ValueError("Sample index of first swap has to be greater than zero!"))
+ self.n_replicas = len(samples[0])
+
+ @staticmethod
+ def _change_direction(x):
+ if x == 1:
+ return -1
+ if x == -1:
+ return 1
+
+ def calculate_history(self, start_ensemble):
+ '''
+ Calculates the replica history of the first state of ensemble #start_ensemble.
+
+ @param start_ensemble: index of the ensemble to start at, zero-indexed
+ @type start_ensemble: int
+
+ @return: replica history as a list of ensemble indices
+ @rtype: list of ints
+ '''
- state1 = swapcom.traj12.initial
- state2 = swapcom.traj21.initial
+ sample_counter = 0
+
+ # determine the direction (up = 1, down = -1) in the "temperature ladder" of
+ # the first swap attempt. Remember: first swap series is always 0 <-> 1, 2 <-> 3, ...
+ if start_ensemble % 2 == 0:
+ direction = +1
+ else:
+ direction = -1
+
+ # if number of replicas is not even and the start ensemble is the highest-temperature-
+ # ensemble, the first swap will be attempted "downwards"
+ if start_ensemble % 2 == 0 and start_ensemble == self.n_replicas - 1:
+ direction = -1
+
+ # will store the indices of the ensembles the state will visit in chronological order
+ history = []
+
+ # the ensemble the state is currently in
+ ens = start_ensemble
- proposal1 = swapcom.traj21.final
- proposal2 = swapcom.traj12.final
+ while sample_counter < len(self.samples):
+ if self.n_replicas == 2:
+ if (sample_counter - self.first_swap - 1) % self.swap_interval == 0 and \
+ sample_counter >= self.first_swap:
+ ## swap attempt: determine whether it was successfull or not
+ # state after swap attempt
+ current_sample = self.samples[sample_counter][ens]
- swapcom.acceptance_probability = csb.numeric.exp(-E1(proposal1.position) / T1
- + E1(state1.position) / T1
- - E2(proposal2.position) / T2
- + E2(state2.position) / T2)
-
- return swapcom
+ # state before swap attempt
+ previous_sample = self.samples[sample_counter - 1][history[-1]]
-class MDRENS(AbstractRENS):
- """
- Replica Exchange with Nonequilibrium Switches (RENS, Ballard & Jarzynski 2009)
- with Molecular Dynamics (MD) trajectories.
+ # swap was accepted when position of the current state doesn't equal
+ # the position of the state before the swap attempt, that is, the last
+ # state in the history
+ swap_accepted = not numpy.all(current_sample.position ==
+ previous_sample.position)
- @param samplers: Samplers which sample their
- respective equilibrium distributions
- @type samplers: list of L{AbstractSingleChainMC}
+ if swap_accepted:
+ if ens == 0:
+ ens = 1
+ else:
+ ens = 0
+ history.append(ens)
+ else:
+ history.append(ens)
- @param param_infos: ParameterInfo instance holding
- information required to perform a MDRENS swap
- @type param_infos: list of L{MDRENSSwapParameterInfo}
+ else:
+
+ if (sample_counter - self.first_swap - 1) % self.swap_interval == 0 and \
+ sample_counter >= self.first_swap:
+ # state after swap attempt
+ current_sample = self.samples[sample_counter][ens]
- @param integrator: Subclass of L{AbstractIntegrator} to be used to
- calculate the non-equilibrium trajectories
- @type integrator: type
- """
+ # state before swap attempt
+ previous_sample = self.samples[sample_counter - 1][ens]
- def __init__(self, samplers, param_infos,
- integrator=csb.numeric.integrators.FastLeapFrog):
-
- super(MDRENS, self).__init__(samplers, param_infos)
-
- self._integrator = integrator
-
- def _run_traj_generator(self, traj_info):
-
- tau = traj_info.param_info.traj_length * traj_info.param_info.timestep
- factory = InterpolationFactory(traj_info.protocol, tau)
+ # swap was accepted when position of the current state doesn't equal
+ # the position of the state before the swap attempt, that is, the last
+ # state in the history
+ swap_accepted = not numpy.all(current_sample.position == previous_sample.position)
- gen = MDPropagator(factory.build_gradient(traj_info.param_info.gradient),
- traj_info.param_info.timestep,
- mass_matrix=traj_info.param_info.mass_matrix,
- integrator=self._integrator)
-
- traj = gen.generate(traj_info.init_state, int(traj_info.param_info.traj_length))
- return traj
+ if swap_accepted:
+ ens += direction
+ else:
+ if ens == self.n_replicas - 1:
+ # if at the top of the ladder, go downwards again
+ direction = -1
+ elif ens == 0:
+ # if at the bottom of the ladder, go upwards
+ direction = +1
+ else:
+ # in between, reverse the direction of the trajectory
+ # in temperature space
+ direction = self._change_direction(direction)
-class ThermostattedMDRENS(MDRENS):
- """
- Replica Exchange with Nonequilibrium Switches (RENS, Ballard & Jarzynski, 2009)
- with Andersen-thermostatted Molecular Dynamics (MD) trajectories.
+ history.append(ens)
- @param samplers: Samplers which sample their
- respective equilibrium distributions
- @type samplers: list of L{AbstractSingleChainMC}
+ sample_counter += 1
- @param param_infos: ParameterInfo instance holding
- information required to perform a MDRENS swap
- @type param_infos: list of L{ThermostattedMDRENSSwapParameterInfo}
+ return history
- @param integrator: Subclass of L{AbstractIntegrator} to be used to
- calculate the non-equilibrium trajectories
- @type integrator: type
- """
+ def calculate_projected_trajectories(self, ensemble):
+ '''
+ Calculates sequentially correlated trajectories projected on a specific ensemble.
- def __init__(self, samplers, param_infos, integrator=csb.numeric.integrators.LeapFrog):
-
- super(ThermostattedMDRENS, self).__init__(samplers, param_infos, integrator)
+ @param ensemble: ensemble index of ensemble of interest, zero-indexed
+ @type ensemble: int
- def _run_traj_generator(self, traj_info):
-
- tau = traj_info.param_info.traj_length * traj_info.param_info.timestep
- factory = InterpolationFactory(traj_info.protocol, tau)
+ @return: list of Trajectory objects containg sequentially correlated trajectories
+ @rtype: list of L{Trajectory} objects.
+ '''
- grad = factory.build_gradient(traj_info.param_info.gradient)
- temp = factory.build_temperature(traj_info.param_info.temperature)
-
- gen = ThermostattedMDPropagator(grad,
- traj_info.param_info.timestep, temperature=temp,
- collision_probability=traj_info.param_info.collision_probability,
- update_interval=traj_info.param_info.collision_interval,
- mass_matrix=traj_info.param_info.mass_matrix,
- integrator=self._integrator)
+ trajectories = []
+
+ for i in range(self.n_replicas):
+ history = self.calculate_history(i)
+ traj = [x[ensemble] for k, x in enumerate(self.samples) if history[k] == ensemble]
+ trajectories.append(Trajectory(traj))
+ return trajectories
+
+ def calculate_trajectories(self):
+ '''
+ Calculates sequentially correlated trajectories.
+
+ @return: list of Trajectory objects containg sequentially correlated trajectories
+ @rtype: list of L{Trajectory} objects.
+ '''
- traj = gen.generate(traj_info.init_state, traj_info.param_info.traj_length)
+ trajectories = []
- return traj
+ for i in range(self.n_replicas):
+ history = self.calculate_history(i)
+ traj = [x[history[k]] for k, x in enumerate(self.samples)]
+ trajectories.append(Trajectory(traj))
+
+ return trajectories
diff --git a/csb/statistics/samplers/mc/neqsteppropagator.py b/csb/statistics/samplers/mc/neqsteppropagator.py
new file mode 100644
index 0000000..11087b0
--- /dev/null
+++ b/csb/statistics/samplers/mc/neqsteppropagator.py
@@ -0,0 +1,1239 @@
+"""
+Propagator class employing stepwise trajectories as used in the NCMC
+algorithm (Nilmeier et al., "Nonequilibrium candidate Monte Carlo is
+an efficient tool for equilibrium simulation", PNAS 2011)
+"""
+
+import csb
+
+import numpy
+
+from abc import ABCMeta, abstractmethod
+from csb.statistics.samplers.mc import TrajectoryBuilder, Trajectory, augment_state, PropagationResult
+from csb.statistics.samplers.mc.propagators import AbstractPropagator, MDPropagator, HMCPropagator
+from csb.numeric import InvertibleMatrix
+from csb.numeric.integrators import FastLeapFrog
+
+class NonequilibriumTrajectory(Trajectory):
+ """
+ Trajectory holding additional information about energy difference
+ the Jacobian.
+
+ @param items: sequence of trajectory states
+ @type items: list of L{State}s
+
+ @param heat: heat produced during the trajectory
+ @type heat: float
+
+ @param work: work expended during the trajectory
+ @type work: float
+
+ @param deltaH: energy difference between initial and final states
+ @type deltaH: float
+
+ @param jacobian: product of Jacobians of perturbations applied in the
+ calculation of the trajectory
+ @type jacobian: float
+ """
+
+ def __init__(self, items, heat=0.0, work=0.0, deltaH=0.0, jacobian=1.0, stats=None):
+
+ super(NonequilibriumTrajectory, self).__init__(items, heat=heat, work=work)
+
+ self._deltaH = None
+ self.deltaH = deltaH
+ self._jacobian = None
+ self.jacobian = jacobian
+ self._stats = None
+ self.stats = stats
+
+ @property
+ def jacobian(self):
+ return self._jacobian
+ @jacobian.setter
+ def jacobian(self, value):
+ self._jacobian = value
+
+ @property
+ def deltaH(self):
+ return self._deltaH
+ @deltaH.setter
+ def deltaH(self, value):
+ self._deltaH = value
+
+ @property
+ def stats(self):
+ return self._stats
+ @stats.setter
+ def stats(self, value):
+ self._stats = value
+
+
+class AbstractSystemInfo(object):
+ """
+ Subclasses hold all information describing a current system setup
+ (Hamiltonian, boundaries, ...)
+ """
+
+ pass
+
+class PerturbationResult(Trajectory):
+ """
+ Instances hold the result of a perturbation.
+
+ @param items: list of states defining a phase-space trajectory
+ @type items: list of L{AbstractState}s
+
+ @param work: work performed on the system during perturbation
+ @type work: float
+
+ @param jacobian: jacobian of the perturbation
+ @type jacobian: float
+
+ @param perturbed_sys: L{AbstractSystemInfo} instance
+ describing the perturbed system
+ @type perturbed_sys: L{AbstractSystemInfo}
+ """
+
+ def __init__(self, items, perturbed_sys, work, heat=0.0, jacobian=1.0):
+
+ super(PerturbationResult, self).__init__(items, heat, work)
+
+ self._jacobian = None
+ self.jacobian = jacobian
+ self._perturbed_sys = None
+ self.perturbed_sys = perturbed_sys
+
+ @property
+ def jacobian(self):
+ return self._jacobian
+ @jacobian.setter
+ def jacobian(self, value):
+ self._jacobian = value
+
+ @property
+ def perturbed_sys(self):
+ return self._perturbed_sys
+ @perturbed_sys.setter
+ def perturbed_sys(self, value):
+ self._perturbed_sys = value
+
+
+class Protocol(object):
+ """
+ Describes a stepwise protocol as in Nilmeier et al. (2011).
+
+ @param steps: the steps making up the protocol
+ @type steps: list of L{Step}s
+ """
+
+ def __init__(self, steps):
+
+ self._steps = None
+ self.steps = steps
+
+ @property
+ def steps(self):
+ """
+ The steps making up the protocol
+ """
+ return self._steps
+ @steps.setter
+ def steps(self, value):
+ self._steps = value
+
+class Step(object):
+ """
+ Defines a step in an NCMC-like stepwise protocol.
+
+ @param perturbation: The perturbation of the system
+ @type perturbation: L{AbstractPerturbation}
+
+ @param propagation: The propagation of the perturbed system
+ @type propagation: L{AbstractPropagation}
+ """
+
+ def __init__(self, perturbation, propagation):
+
+ self._perturbation = None
+ self.perturbation = perturbation
+ self._propagation = None
+ self.propagation = propagation
+ self._perform = None
+ self.perform = self._perform_pert_prop
+
+ def _perform_pert_prop(self, state, extra_info=None):
+ '''
+ First, perform the perturbation, and then the propagation.
+ Override this in a subclass if you want to pass on extra
+ information to the next step in the protocol or if you want
+ to gather some statistics on what happens in the intermediate steps.
+
+ @param state: state to be evolved
+ @type state: L{State}
+ @param extra_info: possible extra information resulting
+ from previous steps
+ @type extra_info: any type
+
+ @rtype: L{list} containing a short trajectory consisting of
+ the initial and the evolved state, possible extra information
+ which will be passed on to the next step in the protocol and
+ possible subclasses of L{AbstractStepStatistics} containing
+ information on what happend in the step.
+ '''
+
+ perturbation_result = self.perturbation(state)
+ propagation_result = self.propagation(perturbation_result.final)
+ result_state = propagation_result.final
+
+ shorttraj = NonequilibriumTrajectory([state, result_state],
+ heat=propagation_result.heat,
+ work=perturbation_result.work,
+ jacobian=perturbation_result.jacobian)
+
+ return shorttraj, None, None
+
+ def _perform_prop_pert(self, state, extra_info=None):
+ '''
+ First, perform the propagation, and then the perturbation.
+ Override this in a subclass if you want to pass on extra
+ information to the next step in the protocol or if you want
+ to gather some statistics on what happens in the intermediate steps.
+
+ @param state: state to be evolved
+ @type state: L{State}
+ @param extra_info: possible extra information resulting
+ from previous steps
+ @type extra_info: any type
+
+ @rtype: L{list} containing a short trajectory consisting of
+ the initial and the evolved state, possible extra information
+ which will be passed on to the next step in the protocol and
+ possible subclasses of L{AbstractStepStatistics} containing
+ information on what happend in the step.
+ '''
+
+ propagation_result = self.propagation(state)
+ perturbation_result = self.perturbation(propagation_result.final)
+ result_state = perturbation_result.final
+
+ shorttraj = NonequilibriumTrajectory([state, result_state],
+ heat=propagation_result.heat,
+ work=perturbation_result.work,
+ jacobian=perturbation_result.jacobian)
+
+ return shorttraj, None, None
+
+ def set_perturbation_first(self):
+ """
+ Perform first perturbation, then propagation
+ """
+
+ self.perform = self._perform_pert_prop
+
+ def set_propagation_first(self):
+ """
+ Perform first propagation, then perturbation
+ """
+
+ self.perform = self._perform_prop_pert
+
+ @property
+ def perturbation(self):
+ return self._perturbation
+ @perturbation.setter
+ def perturbation(self, value):
+ self._perturbation = value
+
+ @property
+ def propagation(self):
+ return self._propagation
+ @propagation.setter
+ def propagation(self, value):
+ self._propagation = value
+
+
+class ReducedHamiltonian(object):
+ """
+ Describes a reduced Hamiltonian (Hamiltonian, its position gradient
+ and the system temperature)
+
+ @param log_prob: log probability of the PDF under consideration, that is,
+ the negative potential energy of the system
+ @type log_prob: callable
+
+ @param gradient: gradient of the negative log probability of the PDF under
+ consideration, that is, the gradient of the potential energy;
+ function of position array and time
+ @type gradient: callable
+
+ @param temperature: system temperature
+ @type temperature: float
+
+ @param mass_matrix: system mass matrix
+ @type mass_matrix: L{InvertibleMatrix}
+ """
+
+ def __init__(self, log_prob, gradient=None, temperature=1.0, mass_matrix=None):
+ self._log_prob = None
+ self.log_prob = log_prob
+ self._gradient = None
+ self.gradient = gradient
+ self._temperature = None
+ self.temperature = temperature
+ self._mass_matrix = None
+ self.mass_matrix = mass_matrix
+
+ def E(self, x):
+ """
+ Potential energy of the system, aka negative log probability
+
+ @param x: position vector
+ @type x: 1D numpy array
+ """
+
+ return -self.log_prob(x)
+
+ def kinetic_energy(self, p):
+ """
+ Kinetic energy of the system
+
+ @param p: system momentum vector
+ @type p: 1D numpy array
+ """
+
+ if p is not None:
+ if self.mass_matrix is None:
+ return 0.5 * sum(p ** 2)
+ else:
+ if self.mass_matrix.is_unity_multiple:
+ return 0.5 * sum(p ** 2) / self.mass_matrix[0][0]
+ else:
+ return 0.5 * numpy.dot(p, numpy.dot(self.mass_matrix.inverse, p))
+ else:
+ return 0.0
+
+ def rlog_prob(self, x):
+ """
+ Reduced log probability
+
+ @param x: position vector
+ @type x: 1D numpy array
+ """
+
+ return self.log_prob(x) / self.temperature
+
+ def rkinetic_energy(self, p):
+ """
+ Reduced kinetic energy
+
+ @param p: system momentum vector
+ @type p: 1D numpy array
+ """
+
+ return self.kinetic_energy(p) / self.temperature
+
+ def __call__(self, x):
+ """
+ Evaluates the reduced Hamiltionian at the state x
+
+ @param x: system state
+ @type x: L{State}
+ """
+
+ return -self.rlog_prob(x.position) + self.rkinetic_energy(x.momentum)
+
+ @property
+ def log_prob(self):
+ return self._log_prob
+ @log_prob.setter
+ def log_prob(self, value):
+ self._log_prob = value
+
+ @property
+ def gradient(self):
+ return self._gradient
+ @gradient.setter
+ def gradient(self, value):
+ self._gradient = value
+
+ @property
+ def temperature(self):
+ return self._temperature
+ @temperature.setter
+ def temperature(self, value):
+ self._temperature = value
+
+ @property
+ def mass_matrix(self):
+ return self._mass_matrix
+ @mass_matrix.setter
+ def mass_matrix(self, value):
+ self._mass_matrix = value
+
+
+class AbstractPerturbation(object):
+ """
+ Describes an abstract system perturbation
+
+ @param sys_before: information about the system before the perturbation
+ @type sys_before: L{AbstractSystemInfo}
+
+ @param sys_after: information about the system after the perturbation
+ @type sys_after: L{AbstractSystemInfo}
+
+ @param param: parameters neccessary for system perturbation
+ @type param: L{AbstractPerturbationParam}
+
+ @param evaluate_work: Allows to switch off the work evaluation,
+ which might not always be needed, in order to
+ save computation time.
+ @type evaluate_work: boolean
+ """
+
+ __metaclass__ = ABCMeta
+
+ def __init__(self, sys_before, sys_after, param=None, evaluate_work=True):
+ self._sys_before = None
+ self.sys_before = sys_before
+ self._sys_after = None
+ self.sys_after = sys_after
+ self.param = param
+ self._evaluate_work = None
+ self.evaluate_work = evaluate_work
+
+ @abstractmethod
+ def _run_perturbator(self, state):
+ """
+ Calculates the trajectory of the system while it is being perturbed.
+
+ @param state: The initial system state
+ @type state: L{State}
+
+ @return: The trajectory of the system while it is being perturbed
+ @rtype: L{Trajectory}
+ """
+
+ pass
+
+ @abstractmethod
+ def _calculate_work(self, traj):
+ """
+ Calculates the work expended during perturbation of the system.
+
+ @param traj: The trajectory of the system while being perturbed
+ @type traj: L{Trajectory}
+
+ @return: The work expended during perturbation
+ @rtype: float
+ """
+
+ pass
+
+ @abstractmethod
+ def _calculate_jacobian(self, traj):
+ """
+ Calculates the Jacobian determinant which reflects phase
+ space compression during perturbation.
+
+ @param traj: The trajectory of the system while being perturbed
+ @type traj: L{Trajectory}
+
+ @return: The Jacobian determinant
+ @rtype: float
+ """
+
+ pass
+
+ def _evaluate(self, state):
+ """
+ Performs the perturbation of the system and / or the state
+
+ @param state: system state
+ @type state: L{State}
+ """
+
+ traj = self._run_perturbator(state)
+ work = self._calculate_work(traj)
+ jacobian = self._calculate_jacobian(traj)
+
+ return PerturbationResult([traj.initial, traj.final], self.sys_after,
+ work, jacobian=jacobian)
+
+ def __call__(self, state):
+ """
+ Performs the perturbation of the system and / or the state
+
+ @param state: system state
+ @type state: L{State}
+ """
+
+ return self._evaluate(state)
+
+ @property
+ def sys_before(self):
+ return self._sys_before
+ @sys_before.setter
+ def sys_before(self, value):
+ self._sys_before = value
+
+ @property
+ def sys_after(self):
+ return self._sys_after
+ @sys_after.setter
+ def sys_after(self, value):
+ self._sys_after = value
+
+ @property
+ def param(self):
+ return self._param
+ @param.setter
+ def param(self, value):
+ self._param = value
+
+ @property
+ def evaluate_work(self):
+ return self._evaluate_work
+ @evaluate_work.setter
+ def evaluate_work(self, value):
+ self._evaluate_work = value
+
+
+class AbstractPropagation(object):
+ """
+ Describes an abstract system propagation
+
+ @param sys: information about the current system setup
+ @type sys: L{AbstractSystemInfo}
+
+ @param param: parameters neccessary for propagating the system
+ @type param: L{AbstractPropagationParam}
+
+ @param evaluate_heat: Allows to switch off the heat evaluation,
+ which might not always be needed, in order to
+ save computation time.
+ @type evaluate_heat: boolean
+ """
+
+ __metaclass__ = ABCMeta
+
+ def __init__(self, sys, param, evaluate_heat=True):
+
+ self._sys = None
+ self.sys = sys
+ self._param = None
+ self.param = param
+ self._evaluate_heat = None
+ self.evaluate_heat = evaluate_heat
+
+ @abstractmethod
+ def _propagator_factory(self):
+ """
+ Factory method which returns the propagator to be used for
+ propagating the system.
+
+ @return: Some propagator object
+ @rtype: L{AbstractPropagator}
+ """
+
+ @abstractmethod
+ def _run_propagator(self, state):
+ """
+ Propagates the system using the propagator instance returned
+ by _propagator_factory().
+
+ @param state: Initial state
+ @type state: L{State}
+
+ @return: The result of the propagation
+ @rtype: L{PropagationResult}
+ """
+
+ pass
+
+ @abstractmethod
+ def _calculate_heat(self, traj):
+ """
+ Calculates the heat resulting from system propagation.
+
+ @param traj: The trajectory of the system during propagation
+ @type traj: L{Trajectory}
+
+ @return: The heat resulting from system propagation.
+ @rtype: float
+ """
+
+ pass
+
+ def _evaluate(self, state):
+ """
+ Performs the propagation of a state in the specified system
+
+ @param state: system state
+ @type state: L{State}
+ """
+
+ traj = self._run_propagator(state)
+ heat = self._calculate_heat(traj)
+
+ return PropagationResult(traj.initial, traj.final, heat=heat)
+
+ def __call__(self, state):
+ """
+ Performs the propagation of a state in the specified system
+
+ @param state: system state
+ @type state: L{State}
+ """
+
+ return self._evaluate(state)
+
+ @property
+ def sys(self):
+ return self._sys
+ @sys.setter
+ def sys(self, value):
+ self._sys = value
+
+ @property
+ def param(self):
+ return self._param
+ @param.setter
+ def param(self, value):
+ self._param = value
+
+ @property
+ def evaluate_heat(self):
+ return self._evaluate_heat
+ @evaluate_heat.setter
+ def evaluate_heat(self, value):
+ self._evaluate_heat = value
+
+
+class ReducedHamiltonianPerturbation(AbstractPerturbation):
+ """
+ System perturbation by changing the reduced Hamiltonian
+
+ @param sys_before: information about the system before the perturbation
+ @type sys_before: L{AbstractSystemInfo}
+
+ @param sys_after: information about the system after the perturbation
+ @type sys_after: L{AbstractSystemInfo}
+
+ @param evaluate_work: Allows to switch off the work evaluation,
+ which might not always be needed, in order to
+ save computation time.
+ @type evaluate_work: boolean
+ """
+
+ def __init__(self, sys_before, sys_after, evaluate_work=True):
+
+ super(ReducedHamiltonianPerturbation, self).__init__(sys_before, sys_after,
+ evaluate_work=evaluate_work)
+
+ def _calculate_work(self, traj):
+
+ work = 0.0
+ if self.evaluate_work == True:
+ work = self.sys_after.hamiltonian(traj.final) - \
+ self.sys_before.hamiltonian(traj.initial)
+
+ return work
+
+ def _calculate_jacobian(self, traj):
+
+ return 1.0
+
+ def _run_perturbator(self, state):
+
+ return Trajectory([state, state])
+
+
+class AbstractMCPropagation(AbstractPropagation):
+ """
+ System propagation by some MC algorithm.
+
+ @param sys: information about the current system setup
+ @type sys: L{AbstractSystemInfo}
+
+ @param param: parameters neccessary for propagating the system
+ @type param: L{AbstractPropagationParam}
+
+ @param evaluate_heat: Allows to switch off the heat evaluation,
+ which might not always be needed, in order to
+ save computation time.
+ @type evaluate_heat: boolean
+ """
+
+ ## Not neccessary, but otherwise epydoc complains
+ def __init__(self, sys, param, evaluate_heat=True):
+
+ super(AbstractMCPropagation, self).__init__(sys, param, evaluate_heat=True)
+
+ def _calculate_heat(self, traj):
+
+ heat = 0.0
+ if self.evaluate_heat == True:
+ heat = self.sys.hamiltonian.E(traj.final.position) - \
+ self.sys.hamiltonian.E(traj.initial.position)
+
+ return heat
+
+ def _run_propagator(self, state):
+
+ gen = self._propagator_factory()
+
+ return gen.generate(state, self.param.iterations, False)
+
+
+class HMCPropagation(AbstractMCPropagation):
+ """
+ System propagation by HMC
+
+ @param sys: information about the current system setup
+ @type sys: L{AbstractSystemInfo}
+
+ @param param: parameters neccessary for propagating the system
+ @type param: L{HMCPropagationParam}
+
+ @param evaluate_heat: Allows to switch off the heat evaluation,
+ which might not always be needed, in order to
+ save computation time.
+ @type evaluate_heat: boolean
+ """
+
+ def __init__(self, sys, param, evaluate_heat=True):
+
+ super(HMCPropagation, self).__init__(sys, param, evaluate_heat)
+
+ if self.param.gradient is None:
+ self.param.gradient = self.sys.hamiltonian.gradient
+
+ def _set_mass_matrix(self, state):
+ """
+ Sets the mass matrix in the param object.
+
+ @param state: The initial state which is used to determine the dimension
+ of the mass matrix
+ @type state: L{State}
+ """
+
+ if self.param.mass_matrix is None:
+ d = len(state.position)
+ self.param.mass_matrix = InvertibleMatrix(numpy.eye(d))
+
+ def _propagator_factory(self):
+
+ gen = HMCPropagator(self.sys.hamiltonian, self.param.gradient,
+ self.param.timestep, self.param.traj_length,
+ temperature=self.sys.hamiltonian.temperature,
+ integrator=self.param.integrator,
+ mass_matrix=self.param.mass_matrix)
+
+ return gen
+
+ def _evaluate(self, state):
+
+ self._set_mass_matrix(state)
+
+ return super(HMCPropagation, self)._evaluate(state)
+
+ @property
+ def param(self):
+ return self._param
+ @param.setter
+ def param(self, value):
+ self._param = value
+
+
+class AbstractMDPropagation(AbstractPropagation):
+ """
+ System propagation by some MD algorithm
+
+ @param sys: information about the current system setup
+ @type sys: L{AbstractSystemInfo}
+
+ @param param: parameters neccessary for propagating the system
+ @type param: L{MDPropagationParam}
+
+ @param evaluate_heat: Allows to switch off the heat evaluation,
+ which might not always be needed, in order to
+ save computation time.
+ @type evaluate_heat: boolean
+ """
+
+ __metaclass__ = ABCMeta
+
+ ## Not neccessary, but otherwise epydoc complains
+ def __init__(self, sys, param, evaluate_heat=True):
+
+ super(AbstractMDPropagation, self).__init__(sys, param, evaluate_heat=True)
+
+ def _set_mass_matrix(self, state):
+ """
+ Sets the mass matrix in the param object.
+
+ @param state: The initial state which is used to determine the dimension
+ of the mass matrix
+ @type state: L{State}
+ """
+
+ if self.param.mass_matrix is None:
+ d = len(state.position)
+ self.param.mass_matrix = InvertibleMatrix(numpy.eye(d))
+
+ def _augment_state(self, state):
+ """
+ Augments the initial state by a momentum if none is defined.
+
+ @param state: Initial state
+ @type state: L{State}
+ """
+
+ if state.momentum == None:
+ state = augment_state(state, self.sys.hamiltonian.temperature,
+ self.param.mass_matrix)
+
+ return state
+
+ def _run_propagator(self, state):
+
+ gen = self._propagator_factory()
+ state = self._augment_state(state)
+ traj = gen.generate(state, self.param.traj_length)
+
+ return traj
+
+
+class PlainMDPropagation(AbstractMDPropagation):
+ """
+ System propagation by plain, microcanonical MD
+
+ @param sys: information about the current system setup
+ @type sys: L{AbstractSystemInfo}
+
+ @param param: parameters neccessary for propagating the system
+ @type param: L{PlainMDPropagationParam}
+
+ @param evaluate_heat: Allows to switch off the heat evaluation,
+ which might not always be needed, in order to
+ save computation time.
+ @type evaluate_heat: boolean
+ """
+
+ ## Not neccessary, but otherwise epydoc is complaining
+ def __init__(self, sys, param, evaluate_heat=True):
+
+ super(PlainMDPropagation, self).__init__(sys, param, evaluate_heat=evaluate_heat)
+
+ def _propagator_factory(self):
+
+ return MDPropagator(self.param.gradient, self.param.timestep,
+ mass_matrix=self.param.mass_matrix,
+ integrator=self.param.integrator)
+
+ def _calculate_heat(self, traj):
+
+ heat = 0.0
+ if self.evaluate_heat == True:
+ heat = self.sys.hamiltonian(traj.final) - self.sys.hamiltonian(traj.initial)
+
+ return heat
+
+ def _evaluate(self, state):
+
+ self._set_mass_matrix(state)
+
+ return super(PlainMDPropagation, self)._evaluate(state)
+
+
+class AbstractPerturbationParam(object):
+ """
+ Subclasses hold informations required for different kinds
+ of system perturbation
+ """
+
+ pass
+
+
+class AbstractPropagationParam(object):
+ """
+ Subclasses hold informations required for different kinds
+ of system propagation
+ """
+
+ pass
+
+
+class MDParam(object):
+ """
+ Holds all required information for calculating a MD trajectory.
+
+ @param timestep: integration timestep
+ @type timestep: float
+
+ @param traj_length: MD trajectory length
+ @type traj_length: int
+
+ @param gradient: gradient governing the equations of motion, function of
+ position array and time
+ @type gradient: callable
+
+ @param temperature: System temperature for drawing momenta from the
+ Maxwell distribution
+ @type temperature: float
+
+ @param integrator: Integration scheme to be utilized
+ @type integrator: L{AbstractIntegrator}
+
+ @param mass_matrix: mass matrix for kinetic energy definition
+ @type mass_matrix: L{InvertibleMatrix}
+ """
+
+ def __init__(self, timestep, traj_length, gradient, temperature=1.0,
+ integrator=FastLeapFrog, mass_matrix=None):
+
+ self._timestep = None
+ self.timestep = timestep
+ self._traj_length = None
+ self.traj_length = traj_length
+ self._gradient = None
+ self.gradient = gradient
+ self._temperature = None
+ self.temperature = temperature
+ self._integrator = None
+ self.integrator = integrator
+ self._mass_matrix = None
+ self.mass_matrix = mass_matrix
+
+ @property
+ def timestep(self):
+ return self._timestep
+ @timestep.setter
+ def timestep(self, value):
+ self._timestep = value
+
+ @property
+ def traj_length(self):
+ return self._traj_length
+ @traj_length.setter
+ def traj_length(self, value):
+ self._traj_length = value
+
+ @property
+ def gradient(self):
+ return self._gradient
+ @gradient.setter
+ def gradient(self, value):
+ self._gradient = value
+
+ @property
+ def temperature(self):
+ return self._temperature
+ @temperature.setter
+ def temperature(self, value):
+ self._temperature = value
+
+ @property
+ def integrator(self):
+ return self._integrator
+ @integrator.setter
+ def integrator(self, value):
+ self._integrator = value
+
+ @property
+ def mass_matrix(self):
+ return self._mass_matrix
+ @mass_matrix.setter
+ def mass_matrix(self, value):
+ self._mass_matrix = value
+
+
+class HMCPropagationParam(MDParam, AbstractPropagationParam):
+ """
+ Holds all required information for propagating a system by HMC.
+ The system temperature is taken from the
+ HMCPropagation.sys.hamiltonian.temperature property.
+
+ @param timestep: integration timestep
+ @type timestep: float
+
+ @param traj_length: MD trajectory length
+ @type traj_length: int
+
+ @param gradient: gradient governing the equations of motion, function of
+ position array and time
+ @type gradient: callable
+
+ @param iterations: number of HMC iterations to be performed
+ @type iterations: int
+
+ @param integrator: Integration scheme to be utilized
+ @type integrator: l{AbstractIntegrator}
+
+ @param mass_matrix: mass matrix for kinetic energy definition
+ @type mass_matrix: L{InvertibleMatrix}
+ """
+
+ def __init__(self, timestep, traj_length, gradient, iterations=1,
+ integrator=FastLeapFrog, mass_matrix=None):
+
+ super(HMCPropagationParam, self).__init__(timestep, traj_length, gradient,
+ integrator=integrator, mass_matrix=mass_matrix)
+
+ self._iterations = None
+ self.iterations = iterations
+
+ @property
+ def iterations(self):
+ return self._iterations
+ @iterations.setter
+ def iterations(self, value):
+ self._iterations = value
+
+
+class MDPropagationParam(MDParam, AbstractPropagationParam):
+
+ pass
+
+
+class PlainMDPropagationParam(MDParam, AbstractPropagationParam):
+ """
+ Holds all required information for propagating a system by MD.
+ The system temperature is taken from the
+ MDPropagation.sys.hamiltonian.temperature property.
+
+ @param timestep: integration timestep
+ @type timestep: float
+
+ @param traj_length: MD trajectory length
+ @type traj_length: int
+
+ @param gradient: gradient governing the equations of motion, function of
+ position array and time
+ @type gradient: callable
+
+ @param integrator: Integration scheme to be utilized
+ @type integrator: l{AbstractIntegrator}
+
+ @param mass_matrix: mass matrix for kinetic energy definition
+ @type mass_matrix: L{InvertibleMatrix}
+ """
+
+ def __init__(self, timestep, traj_length, gradient,
+ integrator=FastLeapFrog, mass_matrix=None):
+
+ super(PlainMDPropagationParam, self).__init__(timestep, traj_length, gradient,
+ integrator=integrator, mass_matrix=mass_matrix)
+
+
+class HamiltonianSysInfo(AbstractSystemInfo):
+ """
+ Holds information describing a system by its Hamiltonian only.
+
+ @param hamiltonian: the Hamiltonian of the system to be described
+ @type hamiltonian: L{ReducedHamiltonian}
+ """
+
+ def __init__(self, hamiltonian):
+
+ self._hamiltonian = None
+ self.hamiltonian = hamiltonian
+
+ @property
+ def hamiltonian(self):
+ return self._hamiltonian
+ @hamiltonian.setter
+ def hamiltonian(self, value):
+ self._hamiltonian = value
+
+
+class AbstractStepStatistics(object):
+ '''
+ Abstract class defining a minimal interface for objects allowing to store statistics
+ of what happens in L{Step} instances.
+ '''
+
+ @abstractmethod
+ def update(self, step_index, shorttraj, stats_data):
+ pass
+
+
+class DummyStepStatistics(AbstractStepStatistics):
+
+ def update(self, step_index, shorttraj, stats_data):
+ pass
+
+
+class AbstractHeatWorkJacobianLogger(object):
+ '''
+ Abstract class defining the interface for objects keeping track of and accumulating
+ heat, work and the Jacobian during a nonequilibrium trajectory.
+ '''
+
+ def __init__(self):
+
+ self._heat = 0.0
+ self._work = 0.0
+ self._jacobian = 1.0
+
+ @abstractmethod
+ def accumulate(self, heat=0.0, work=0.0, jacobian=1.0):
+ '''
+ Adds heat and work contribution to the so far accumulated values and
+ "multiply-accumulates" the Jacobian to the so far "multiply-accumulated" values.
+ '''
+ pass
+
+ @property
+ def heat(self):
+ return self._heat
+
+ @property
+ def work(self):
+ return self._work
+
+ @property
+ def jacobian(self):
+ return self._jacobian
+
+
+class TrivialHeatWorkJacobianLogger(AbstractHeatWorkJacobianLogger):
+
+ def accumulate(self, heat=0.0, work=0.0, jacobian=1.0):
+
+ self._heat += heat
+ self._work += work
+ self._jacobian *= jacobian
+
+
+class NonequilibriumStepPropagator(AbstractPropagator):
+ """
+ Propagator class which propagates a system using NCMC-like
+ stepwise trajectories
+
+ @param protocol: stepwise protocol to be followed
+ @type protocol: L{Protocol}
+ """
+
+ def __init__(self, protocol):
+
+ self._protocol = None
+ self.protocol = protocol
+
+ def _calculate_deltaH(self, traj):
+ """
+ Calculate the difference of the Hamiltonian between the initial and
+ the final state of a NCMC trajectory.
+
+ @param traj: The NCMC trajectory between whose initial and final states
+ the Hamiltonian difference should be calculated
+ @type traj: L{NonequilibriumTrajectory}
+ """
+
+ return self.protocol.steps[-1].perturbation.sys_after.hamiltonian(traj.final) - \
+ self.protocol.steps[0].perturbation.sys_before.hamiltonian(traj.initial)
+
+ def _create_heat_work_jacobian_logger(self):
+ '''
+ Factory method for the L{AbstractHeatWorkJacobianLogger} subclass instance
+ which keeps track of work, heat and Jacobian contributions during the nonequilibrium
+ trajectory.
+
+ @rtype: instance of an L{AbstractHeatWorkJacobianLogger}-derived class
+ '''
+
+ return TrivialHeatWorkJacobianLogger()
+
+ def _create_step_stats(self):
+ '''
+ Factory method for the L{AbstractStepStatistics} subclass instance
+ which can be used to collect statistics of what happens during steps.
+
+ @rtype: instance of an L{AbstractStepStatistics}-derived class
+ '''
+
+ return DummyStepStatistics()
+
+ def _set_initial_extra_info(self, init_state):
+ '''
+ Provides additional information for the first step in the protocol.
+
+ @rtype: any type
+ '''
+
+ return None
+
+ def _perform_step_iteration(self, estate, hwj_logger, step_stats, builder, extra_info):
+ '''
+ Performs the iteration over all steps in the nonequilibrium protocol.
+
+ @param estate: the state which will be evolved
+ @type estate: L{State}
+ @param hwj_logger: an instance of an L{AbstractHeatWorkJacobianLogger}-derived class,
+ which keeps track of work, heat and Jacobian contributions
+ @type hwj_logger: subclass of L{AbstractHeatWorkJacobianLogger}
+ @param step_stats: an instance of an L{AbstractStepStatistics}-derived class,
+ which may collect some statistics of what happens during steps
+ @type step_stats: subclass of L{AbstractStepStatistics}
+ @param builder: L{TrajectoryBuilder} instance in charge of building the L{Trajectory} object
+ @type builder: L{TrajectoryBuilder}
+ @param extra_info: Dictionary containing eventual additional information, which needs to
+ be passed on from one step to the following
+ @type extra_info: any type
+
+ @rtype: L{Trajectory}
+ '''
+
+ for i in range(len(self.protocol.steps)):
+
+ shorttraj, extra_info, stats_data = self.protocol.steps[i].perform(estate, extra_info)
+
+ step_stats.update(i, shorttraj, stats_data)
+ hwj_logger.accumulate(shorttraj.heat, shorttraj.work, shorttraj.jacobian)
+
+ estate = shorttraj.final
+
+ if i == 0:
+ builder.add_initial_state(shorttraj.initial)
+ elif i != len(self.protocol.steps) - 1:
+ builder.add_intermediate_state(estate)
+ else:
+ builder.add_final_state(estate)
+
+ return builder.product
+
+
+ def generate(self, init_state, return_trajectory=False):
+
+ estate = init_state.clone()
+
+ hwj_logger = self._create_heat_work_jacobian_logger()
+ step_stats = self._create_step_stats()
+ builder = TrajectoryBuilder.create(full=return_trajectory)
+ extra_info = self._set_initial_extra_info(estate)
+
+ traj = self._perform_step_iteration(estate, hwj_logger, step_stats,
+ builder, extra_info)
+
+ reduced_deltaH = self._calculate_deltaH(traj)
+
+ if init_state.momentum is None:
+ for s in traj:
+ s.momentum = None
+
+ result = NonequilibriumTrajectory([x for x in traj],
+ heat=hwj_logger.heat,
+ work=hwj_logger.work,
+ deltaH=reduced_deltaH,
+ jacobian=hwj_logger.jacobian,
+ stats=step_stats)
+
+ return result
+
+ @property
+ def protocol(self):
+ return self._protocol
+ @protocol.setter
+ def protocol(self, value):
+ self._protocol = value
diff --git a/csb/statistics/samplers/mc/propagators.py b/csb/statistics/samplers/mc/propagators.py
index e7fc55c..ed2e26f 100644
--- a/csb/statistics/samplers/mc/propagators.py
+++ b/csb/statistics/samplers/mc/propagators.py
@@ -10,7 +10,6 @@ from csb.statistics.samplers.mc import TrajectoryBuilder
from csb.numeric.integrators import FastLeapFrog, VelocityVerlet
from csb.numeric import InvertibleMatrix
-
class AbstractPropagator(object):
"""
Abstract propagator class. Subclasses serve to propagate
@@ -243,7 +242,7 @@ class ThermostattedMDPropagator(MDPropagator):
integrator = self._integrator(self.timestep, self.gradient)
builder = TrajectoryBuilder.create(full=return_trajectory)
-
+
builder.add_initial_state(init_state)
heat = 0.
@@ -309,8 +308,11 @@ class AbstractMCPropagator(AbstractPropagator):
for i in range(length):
self._sampler.sample()
- builder.add_intermediate_state(self._sampler.state)
+ if i != length - 1:
+ builder.add_intermediate_state(self._sampler.state)
+ builder.add_final_state(self._sampler.state)
+
self._acceptance_rate = self._sampler.acceptance_rate
return builder.product
@@ -331,6 +333,7 @@ class AbstractMCPropagator(AbstractPropagator):
trajectory.
"""
return self._acceptance_rate
+
class RWMCPropagator(AbstractMCPropagator):
"""
@@ -396,7 +399,7 @@ class HMCPropagator(AbstractMCPropagator):
@param temperature: See documentation of L{AbstractSingleChainMC}
@type temperature: float
"""
-
+
def __init__(self, pdf, gradient, timestep, nsteps, mass_matrix=None,
integrator=FastLeapFrog, temperature=1.):
@@ -411,7 +414,7 @@ class HMCPropagator(AbstractMCPropagator):
def _init_sampler(self, init_state):
from csb.statistics.samplers.mc.singlechain import HMCSampler
-
+
self._sampler = HMCSampler(self._pdf, init_state, self._gradient,
self._timestep, self._nsteps,
mass_matrix=self.mass_matrix,
@@ -423,3 +426,45 @@ class HMCPropagator(AbstractMCPropagator):
@mass_matrix.setter
def mass_matrix(self, value):
self._mass_matrix = value
+
+
+class AbstractNCMCPropagator(AbstractMCPropagator):
+ """
+ Draws a number of samples from a PDF using the L{AbstractNCMCSampler}.
+
+ @param protocol: The nonequilibrium protocol specifying a sequence of
+ perturbation and propagation steps
+ @type protocol: L{Protocol}
+
+ @param reverse_protocol: The protocol with the order of perturbation and
+ propagation reversed in each step.
+ @type reverse_protocol: L{Protocol}
+ """
+
+ __metaclass__ = ABCMeta
+
+ def __init__(self, protocol, reverse_protocol):
+
+ self._protocol = None
+ self.protocol = protocol
+ self._reverse_protocol = None
+ self.reverse_protocol = reverse_protocol
+
+ pdf = self.protocol.steps[0].perturbation.sys_before.hamiltonian
+ temperature = self.protocol.steps[0].perturbation.sys_before.hamiltonian.temperature
+
+ super(AbstractNCMCPropagator, self).__init__(pdf, temperature)
+
+ @property
+ def protocol(self):
+ return self._protocol
+ @protocol.setter
+ def protocol(self, value):
+ self._protocol = value
+
+ @property
+ def reverse_protocol(self):
+ return self._reverse_protocol
+ @reverse_protocol.setter
+ def reverse_protocol(self, value):
+ self._reverse_protocol = value
diff --git a/csb/statistics/samplers/mc/singlechain.py b/csb/statistics/samplers/mc/singlechain.py
index 34033b1..77a56f6 100644
--- a/csb/statistics/samplers/mc/singlechain.py
+++ b/csb/statistics/samplers/mc/singlechain.py
@@ -54,14 +54,147 @@ L{State} objects in states, a histogram is created and finally plotted.
import numpy
import csb.numeric
+import csb.core
+
+from abc import ABCMeta, abstractmethod
from csb.statistics.samplers import State
-from csb.statistics.samplers.mc import AbstractSingleChainMC
-from csb.statistics.samplers.mc import SimpleProposalCommunicator
+from csb.statistics.samplers.mc import AbstractMC, MCCollection, augment_state
from csb.statistics.samplers.mc.propagators import MDPropagator
+from csb.statistics.samplers.mc.neqsteppropagator import NonequilibriumStepPropagator
from csb.numeric.integrators import FastLeapFrog
from csb.numeric import InvertibleMatrix
+
+class AbstractSingleChainMC(AbstractMC):
+ """
+ Abstract class for Monte Carlo sampling algorithms simulating
+ only one ensemble.
+
+ @param pdf: probability density function to sample from
+ @type pdf: subclass of L{csb.statistics.pdf.AbstractDensity}
+
+ @param state: Initial state
+ @type state: L{State}
+
+ @param temperature: Pseudo-temperature of the Boltzmann ensemble
+ M{p(x) = 1/N * exp(-1/T * E(x))} with the
+ pseudo-energy defined as M{E(x) = -log(p(x))}
+ where M{p(x)} is the PDF under consideration
+ @type temperature: float
+ """
+
+ __metaclass__ = ABCMeta
+
+ def __init__(self, pdf, state, temperature=1.):
+
+ super(AbstractSingleChainMC, self).__init__(state)
+
+ self._pdf = pdf
+ self._temperature = temperature
+ self._nmoves = 0
+ self._accepted = 0
+ self._last_move_accepted = None
+
+ def _checkstate(self, state):
+ if not isinstance(state, State):
+ raise TypeError(state)
+
+ def sample(self):
+ """
+ Draw a sample.
+
+ @rtype: L{State}
+ """
+
+ proposal_communicator = self._propose()
+ pacc = self._calc_pacc(proposal_communicator)
+
+ accepted = None
+ if numpy.random.random() < pacc:
+ accepted = True
+ else:
+ accepted = False
+
+ if accepted == True:
+ self._accept_proposal(proposal_communicator.proposal_state)
+
+ self._update_statistics(accepted)
+ self._last_move_accepted = accepted
+
+ return self.state
+
+ @abstractmethod
+ def _propose(self):
+ """
+ Calculate a new proposal state and gather additional information
+ needed to calculate the acceptance probability.
+
+ @rtype: L{SimpleProposalCommunicator}
+ """
+ pass
+
+ @abstractmethod
+ def _calc_pacc(self, proposal_communicator):
+ """
+ Calculate probability with which to accept the proposal.
+
+ @param proposal_communicator: Contains information about the proposal
+ and additional information needed to
+ calculate the acceptance probability
+ @type proposal_communicator: L{SimpleProposalCommunicator}
+ """
+ pass
+
+ def _accept_proposal(self, proposal_state):
+ """
+ Accept the proposal state by setting it as the current state of the sampler
+ object
+
+ @param proposal_state: The proposal state
+ @type proposal_state: L{State}
+ """
+
+ self.state = proposal_state
+
+ def _update_statistics(self, accepted):
+ """
+ Update the sampling statistics.
+
+ @param accepted: Whether or not the proposal state has been accepted
+ @type accepted: boolean
+ """
+
+ self._nmoves += 1
+ self._accepted += int(accepted)
+
+ @property
+ def energy(self):
+ """
+ Log-likelihood of the current state.
+ @rtype: float
+ """
+ return self._pdf.log_prob(self.state.position)
+
+ @property
+ def acceptance_rate(self):
+ """
+ Acceptance rate.
+ """
+ return float(self._accepted) / float(self._nmoves)
+
+ @property
+ def last_move_accepted(self):
+ """
+ Information whether the last MC move was accepted or not.
+ """
+ return self._last_move_accepted
+
+ @property
+ def temperature(self):
+ return self._temperature
+
+
class HMCSampler(AbstractSingleChainMC):
"""
Hamilton Monte Carlo (HMC, also called Hybrid Monte Carlo by the inventors,
@@ -101,7 +234,7 @@ class HMCSampler(AbstractSingleChainMC):
def __init__(self, pdf, state, gradient, timestep, nsteps,
mass_matrix=None, integrator=FastLeapFrog, temperature=1.):
-
+
super(HMCSampler, self).__init__(pdf, state, temperature)
self._timestep = None
@@ -126,26 +259,28 @@ class HMCSampler(AbstractSingleChainMC):
def _propose(self):
- if not self.mass_matrix.is_unity_multiple:
- momenta = numpy.random.multivariate_normal(mean=numpy.zeros(self._d),
- cov=self._momentum_covariance_matrix)
- else:
- mass = self.mass_matrix[0][0]
- momenta = numpy.random.normal(size=self._d, scale=numpy.sqrt(self.temperature * mass))
-
- self.state = State(self.state.position, momenta)
- proposal = self._propagator.generate(self.state, self._nsteps).final
+ current_state = self.state.clone()
+ current_state = augment_state(current_state, self.temperature, self.mass_matrix)
+ proposal_state = self._propagator.generate(current_state, self._nsteps).final
- return SimpleProposalCommunicator(proposal)
+ return SimpleProposalCommunicator(current_state, proposal_state)
def _calc_pacc(self, proposal_communicator):
- proposal = proposal_communicator.proposal
+ current_state = proposal_communicator.current_state
+ proposal_state = proposal_communicator.proposal_state
+
E = lambda x: -self._pdf.log_prob(x)
K = lambda x: 0.5 * numpy.dot(x.T, numpy.dot(self.mass_matrix.inverse, x))
- pacc = csb.numeric.exp((-K(proposal.momentum) - E(proposal.position)
- + K(self.state.momentum) + E(self.state.position)) / self.temperature)
+ pacc = csb.numeric.exp((-K(proposal_state.momentum) - E(proposal_state.position)
+ + K(current_state.momentum) + E(current_state.position)) /
+ self.temperature)
+
+ if self.state.momentum is None:
+ proposal_communicator.proposal_state.momentum = None
+ else:
+ proposal_communicator.proposal_state.momentum = self.state.momentum
return pacc
@@ -176,6 +311,7 @@ class HMCSampler(AbstractSingleChainMC):
if "_propagator" in dir(self):
self._propagator.mass_matrix = self._mass_matrix
+
class RWMCSampler(AbstractSingleChainMC):
"""
Random Walk Metropolis Monte Carlo implementation
@@ -211,20 +347,27 @@ class RWMCSampler(AbstractSingleChainMC):
self._stepsize = None
self.stepsize = stepsize
if proposal_density == None:
- self._proposal_density = lambda x, s: x.position + s * numpy.random.uniform(size=x.position.shape, low=-1., high=1.)
+ self._proposal_density = lambda x, s: x.position + \
+ s * numpy.random.uniform(size=x.position.shape, low=-1., high=1.)
else:
self._proposal_density = proposal_density
def _propose(self):
+
+ current_state = self.state.clone()
+ proposal_state = self.state.clone()
+ proposal_state.position = self._proposal_density(current_state, self.stepsize)
- return SimpleProposalCommunicator(State(self._proposal_density(self._state, self.stepsize)))
+ return SimpleProposalCommunicator(current_state, proposal_state)
def _calc_pacc(self, proposal_communicator):
- proposal = proposal_communicator.proposal
+ current_state = proposal_communicator.current_state
+ proposal_state = proposal_communicator.proposal_state
E = lambda x:-self._pdf.log_prob(x)
- pacc = csb.numeric.exp((-(E(proposal.position) - E(self.state.position))) / self.temperature)
+ pacc = csb.numeric.exp((-(E(proposal_state.position) - E(current_state.position))) /
+ self.temperature)
return pacc
@property
@@ -234,3 +377,131 @@ class RWMCSampler(AbstractSingleChainMC):
@stepsize.setter
def stepsize(self, value):
self._stepsize = float(value)
+
+
+class AbstractNCMCSampler(AbstractSingleChainMC):
+ """
+ Implementation of the NCMC sampling algorithm (Nilmeier et al., "Nonequilibrium candidate Monte
+ Carlo is an efficient tool for equilibrium simulation", PNAS 2011) for sampling from one
+ ensemble only.
+ Subclasses have to specify the acceptance probability, which depends on the kind of
+ perturbations and propagations in the protocol.
+
+ @param state: Inital state
+ @type state: L{State}
+
+ @param protocol: Nonequilibrium protocol with alternating perturbations and propagations
+ @type protocol: L{Protocol}
+
+ @param reverse_protocol: The reversed version of the protocol, that is, the order of
+ perturbations and propagations in each step is reversed
+ @type reverse_protocol: L{Protocol}
+ """
+
+ __metaclass__ = ABCMeta
+
+ def __init__(self, state, protocol, reverse_protocol):
+
+ self._protocol = None
+ self.protocol = protocol
+ self._reverse_protocol = None
+ self.reverse_protocol = reverse_protocol
+
+ pdf = self.protocol.steps[0].perturbation.sys_before.hamiltonian
+ temperature = self.protocol.steps[0].perturbation.sys_before.hamiltonian.temperature
+
+ super(AbstractNCMCSampler, self).__init__(pdf, state, temperature)
+
+ def _pick_protocol(self):
+ """
+ Picks either the protocol or the reversed protocol with equal probability.
+
+ @return: Either the protocol or the reversed protocol
+ @rtype: L{Protocol}
+ """
+
+ if numpy.random.random() < 0.5:
+ return self.protocol
+ else:
+ return self.reverse_protocol
+
+ def _propose(self):
+
+ protocol = self._pick_protocol()
+
+ gen = NonequilibriumStepPropagator(protocol)
+
+ traj = gen.generate(self.state)
+
+ return NCMCProposalCommunicator(traj)
+
+ def _accept_proposal(self, proposal_state):
+
+ if self.state.momentum is not None:
+ proposal_state.momentum *= -1.0
+ else:
+ proposal_state.momentum = None
+
+ super(AbstractNCMCSampler, self)._accept_proposal(proposal_state)
+
+ @property
+ def protocol(self):
+ return self._protocol
+ @protocol.setter
+ def protocol(self, value):
+ self._protocol = value
+
+ @property
+ def reverse_protocol(self):
+ return self._reverse_protocol
+ @reverse_protocol.setter
+ def reverse_protocol(self, value):
+ self._reverse_protocol = value
+
+
+class SimpleProposalCommunicator(object):
+ """
+ This holds all the information needed to calculate the acceptance
+ probability in both the L{RWMCSampler} and L{HMCSampler} classes,
+ that is, only the proposal state.
+ For more advanced algorithms, one may derive classes capable of
+ holding the neccessary additional information from this class.
+
+ @param current_state: Current state
+ @type current_state: L{State}
+
+ @param proposal_state: Proposal state
+ @type proposal_state: L{State}
+ """
+
+ __metaclass__ = ABCMeta
+
+ def __init__(self, current_state, proposal_state):
+
+ self._current_state = current_state
+ self._proposal_state = proposal_state
+
+ @property
+ def current_state(self):
+ return self._current_state
+
+ @property
+ def proposal_state(self):
+ return self._proposal_state
+
+
+class NCMCProposalCommunicator(SimpleProposalCommunicator):
+ """
+ Holds all information (that is, the trajectory with heat, work, Hamiltonian difference
+ and jacbian) needed to calculate the acceptance probability in the AbstractNCMCSampler class.
+
+ @param traj: Non-equilibrium trajectory stemming from a stepwise protocol
+ @type traj: NCMCTrajectory
+ """
+
+ def __init__(self, traj):
+
+ self._traj = None
+ self.traj = traj
+
+ super(NCMCProposalCommunicator, self).__init__(traj.initial, traj.final)
diff --git a/csb/test/__init__.py b/csb/test/__init__.py
index d407100..c9a48e2 100644
--- a/csb/test/__init__.py
+++ b/csb/test/__init__.py
@@ -330,7 +330,7 @@ class Config(object):
Pickle.dump(ensemble, open(os.path.join(self.data, '1nz9.full.pickle'), 'wb'))
mse = model1.chains['A'].find(164)
- mse._pdb_name = 'MSE'
+ mse.label = 'MSE'
mse.atoms['SD']._element = ChemElements.Se
mse.atoms['SD']._full_name = 'SE '
Pickle.dump(model1, open(os.path.join(self.data, '1nz9.model1.pickle'), 'wb'))
diff --git a/csb/test/cases/bio/fragments/__init__.py b/csb/test/cases/bio/fragments/__init__.py
index 070057d..3c4dacd 100644
--- a/csb/test/cases/bio/fragments/__init__.py
+++ b/csb/test/cases/bio/fragments/__init__.py
@@ -48,7 +48,7 @@ class SampleTarget(object):
rmsd = chain.subregion(start, end).rmsd(source.subregion(start, end))
- a = Assignment(source, start, end, id, start, end, probability=1.0, rmsd=rmsd)
+ a = Assignment(source, start, end, start, end, id=id, probability=1.0, rmsd=rmsd)
a.secondary_structure = ss[start - 1: end]
target.assign(a)
@@ -225,7 +225,7 @@ class TestTorsionAnglesPredictor(test.Case):
# add more fragments at location 2..9; this will also alter the confidence
for i in range(20):
# these fragments come from 12..19
- target.assign(Assignment(source, 2 + 10, 9 + 10, 't', 2, 9, 1, 1))
+ target.assign(Assignment(source, 2 + 10, 9 + 10, 2, 9, 't', 1, 1))
predictor = TorsionAnglesPredictor(target)
pred = predictor.flat_torsion_map()
@@ -348,7 +348,7 @@ class TestFragmentCluster(test.Case):
self.assertEqual(rep.count, 3)
self.assertEqual(rep.rejections, 3)
self.assertAlmostEqual(rep.mean, 0.0444, places=4)
- self.assertAlmostEqual(self.c.mean(), 0.0444, places=4)
+ #self.assertAlmostEqual(self.c.mean(), 0.0333, places=4)
self.assertAlmostEqual(rep.confidence, 0.2651, places=4)
self.assertEqual(rep.centroid, self.c.centroid().centroid)
@@ -358,7 +358,7 @@ class TestFragmentCluster(test.Case):
self.assertGreater(item.r, 1.1)
def testMean(self):
- self.assertAlmostEqual(self.c.mean(), 0.5639, places=4)
+ self.assertAlmostEqual(self.c.mean(), 0.4833, places=4)
def testShrinkOne(self):
@@ -565,5 +565,6 @@ class TestRosettaFragmentMap(test.Case):
if __name__ == '__main__':
- test.Console()
+ TestFragmentCluster.execute()
+ #test.Console()
\ No newline at end of file
diff --git a/csb/test/cases/bio/hmm/__init__.py b/csb/test/cases/bio/hmm/__init__.py
index 3c8f3da..514dc71 100644
--- a/csb/test/cases/bio/hmm/__init__.py
+++ b/csb/test/cases/bio/hmm/__init__.py
@@ -1,6 +1,6 @@
import csb.test as test
-from csb.core import Enum
+from csb.core import Enum, OrderedDict
from csb.bio.hmm import State, Transition, ProfileHMM, HMMLayer, ProfileLength, StateFactory, ProfileHMMSegment
from csb.bio.hmm import StateExistsError, UnobservableStateError, EmissionExistsError
@@ -17,7 +17,10 @@ def build_hmm():
hmm = ProfileHMM(units=ScoreUnits.Probability)
factory = StateFactory()
- background = { ProteinAlphabet.ALA: 0.02457563, ProteinAlphabet.CYS: 0.00325358, ProteinAlphabet.GLU: 0.01718016 }
+ background = OrderedDict([ (ProteinAlphabet.ALA, 0.02457563),
+ (ProteinAlphabet.CYS, 0.00325358),
+ (ProteinAlphabet.GLU, 0.01718016) ])
+
emission = dict( (aa, 1.0 / i) for i, aa in enumerate(background, start=1) )
# States
@@ -280,7 +283,7 @@ class TestState(test.Case):
def testEmission(self):
# also covers EmissionTable
self.assertEqual(self.m.emission.length, 3)
- self.assertEqual(list(self.m.emission), [ProteinAlphabet.ALA, ProteinAlphabet.CYS, ProteinAlphabet.GLU])
+ self.assertEqual(set(self.m.emission), set([ProteinAlphabet.ALA, ProteinAlphabet.CYS, ProteinAlphabet.GLU]))
self.assertEqual(self.m.emission[ProteinAlphabet.CYS], 0.5)
self.assertRaises(lambda: self.m.emission[ProteinAlphabet.GAP])
self.assertRaises(UnobservableStateError, lambda: self.d.emission)
@@ -298,7 +301,7 @@ class TestState(test.Case):
def testBackground(self):
# also covers EmissionTable
self.assertEqual(self.m.background.length, 3)
- self.assertEqual(list(self.m.background), [ProteinAlphabet.ALA, ProteinAlphabet.CYS, ProteinAlphabet.GLU])
+ self.assertEqual(set(self.m.background), set([ProteinAlphabet.ALA, ProteinAlphabet.CYS, ProteinAlphabet.GLU]))
self.assertEqual(self.m.background[ProteinAlphabet.CYS], 0.00325358)
self.assertRaises(lambda: self.m.background[ProteinAlphabet.GAP])
diff --git a/csb/test/cases/bio/io/clans/__init__.py b/csb/test/cases/bio/io/clans/__init__.py
index 0dc2df3..57b0f86 100644
--- a/csb/test/cases/bio/io/clans/__init__.py
+++ b/csb/test/cases/bio/io/clans/__init__.py
@@ -345,11 +345,13 @@ class TestClansParser(test.Case):
self.assertTrue(
(self.clans_instance.rotmtx - correct_rotmtx < 1e-6).all())
- self.assertEqual(len(self.clans_instance.seqgroups), 2)
+ self.assertEqual(len(self.clans_instance.seqgroups), 4)
seqgroup_names = ('insect hypoth. protein (2 copies, C term)',
- 'allergens')
- seqgroup_sizes = (20, 17)
+ 'allergens >= xyz',
+ 'empty group WITH terminal semicolon in numbers line',
+ 'empty group WITHOUT terminal semicolon in numbers line')
+ seqgroup_sizes = (20, 17, 0, 0)
for i, seqgroup in enumerate(self.clans_instance.seqgroups):
self.assertEqual(len(seqgroup), seqgroup_sizes[i])
@@ -384,6 +386,11 @@ class TestClansFileWriter(test.Case):
in_hsps = False
start_tag_hsp = '<hsp>\n'
end_tag_hsp = '</hsp>\n'
+
+ in_seqgroups = False
+ start_tag_seqgroups = '<seqgroups>\n'
+ end_tag_seqgroups = '</seqgroups>\n'
+
colorarr_tag = 'colorarr='
color_tag = 'color='
@@ -396,6 +403,13 @@ class TestClansFileWriter(test.Case):
in_hsps = False
continue
+ if original_line == start_tag_seqgroups:
+ in_seqgroups = True
+ continue
+ if original_line == end_tag_seqgroups:
+ in_seqgroups = False
+ continue
+
if original_line.startswith(colorarr_tag):
## remove colorarr_tag from beginning of line
original_line = original_line[len(colorarr_tag):].strip().strip(':')
@@ -439,6 +453,10 @@ class TestClansFileWriter(test.Case):
self.assertEqual(original_start_end, written_start_end)
self.assertTrue((float(original_value) - float(written_value)) < 1e-6)
+ elif in_seqgroups and (original_line == 'numbers=\n'):
+ ## a terminal semicolon is added by the ClansWriter
+ self.assertEqual(original_line.strip() + ';', written_lines[i].strip())
+
else:
self.assertEqual(original_line, written_lines[i])
diff --git a/csb/test/cases/bio/io/cs/__init__.py b/csb/test/cases/bio/io/cs/__init__.py
new file mode 100644
index 0000000..a4d6574
--- /dev/null
+++ b/csb/test/cases/bio/io/cs/__init__.py
@@ -0,0 +1,87 @@
+import csb.test as test
+
+from csb.bio.io.cs import ChemShiftReader, ChemShift3Reader, ChemShiftFormatError
+from csb.bio.structure import ChemElements
+from csb.bio.sequence import ProteinAlphabet
+
+
+
+ at test.unit
+class TestChemShiftReader(test.Case):
+
+ def setUp(self):
+
+ super(TestChemShiftReader, self).setUp()
+
+ self.parser = self.klass()
+ self.file2 = self.config.getTestFile('2l01.v2.str')
+ self.file3 = self.config.getTestFile('2l01.v3.str')
+
+ @property
+ def file(self):
+ return self.file2
+
+ @property
+ def klass(self):
+ return ChemShiftReader
+
+ def testCreate(self):
+
+ klass = self.klass
+
+ self.assertTrue(isinstance(klass.create(version=2), ChemShiftReader))
+ self.assertTrue(isinstance(klass.create(version=3), ChemShift3Reader))
+
+ self.assertRaises(ValueError, klass.create, version=1)
+
+ def testGuess(self):
+
+ klass = self.klass
+
+ self.assertTrue(isinstance(klass.guess(self.file2), ChemShiftReader))
+ self.assertTrue(isinstance(klass.guess(self.file3), ChemShift3Reader))
+
+ dummy = self.config.getTestFile("2JZC.sum")
+ self.assertRaises(ChemShiftFormatError, klass.guess, dummy)
+
+ def testReadShifts(self):
+
+ content = open(self.file).read()
+ cs = self.parser.read_shifts(content)
+
+ self.assertEqual(len(cs), 11)
+
+ self.assertEqual(cs[0].name, "HA")
+ self.assertEqual(cs[0].element, ChemElements.H)
+ self.assertEqual(cs[0].shift, 3.977)
+
+ self.assertEqual(cs[1].name, "HB2")
+ self.assertEqual(cs[1].shift, 2.092)
+
+ self.assertEqual(cs[7].element, ChemElements.C)
+ self.assertEqual(cs[7].residue, ProteinAlphabet.MET)
+
+ self.assertEqual(cs[10].residue, ProteinAlphabet.LYS)
+ self.assertEqual(cs[10].shift, 4.423)
+
+ def testReadFile(self):
+
+ cs = self.parser.read_file(self.file)
+ self.assertEqual(len(cs), 11)
+
+ at test.unit
+class TestChemShift3Reader(TestChemShiftReader):
+
+ @property
+ def file(self):
+ return self.file3
+
+ @property
+ def klass(self):
+ return ChemShift3Reader
+
+
+if __name__ == '__main__':
+
+ test.Console()
+
\ No newline at end of file
diff --git a/csb/test/cases/bio/io/fasta/__init__.py b/csb/test/cases/bio/io/fasta/__init__.py
index 1b4f555..17d5eb4 100644
--- a/csb/test/cases/bio/io/fasta/__init__.py
+++ b/csb/test/cases/bio/io/fasta/__init__.py
@@ -22,7 +22,7 @@ class TestSequenceParser(test.Case):
s = self.parser.parse_file(self.file)
- self.assertEquals(len(s), 3)
+ self.assertEqual(len(s), 3)
self.assertTrue(isinstance(s[0], Sequence))
self.assertEqual(s[0].id, 'gi|148654187')
@@ -33,7 +33,7 @@ class TestSequenceParser(test.Case):
s = self.parser.parse_string(self.data)
- self.assertEquals(len(s), 3)
+ self.assertEqual(len(s), 3)
self.assertTrue(isinstance(s[0], Sequence))
self.assertEqual(s[0].id, 'gi|148654187')
diff --git a/csb/test/cases/bio/io/noe/__init__.py b/csb/test/cases/bio/io/noe/__init__.py
new file mode 100644
index 0000000..495d4d4
--- /dev/null
+++ b/csb/test/cases/bio/io/noe/__init__.py
@@ -0,0 +1,148 @@
+import csb.test as test
+import csb.io
+
+from csb.bio.io.noe import SparkyPeakListReader, XeasyPeakListReader, XeasyFileBuilder
+from csb.bio.structure import ChemElements
+from csb.bio.sequence import ProteinAlphabet
+
+
+ at test.unit
+class TestSparkyPeakListReader(test.Case):
+
+ def setUp(self):
+
+ super(TestSparkyPeakListReader, self).setUp()
+
+ self.elements = (ChemElements.H, ChemElements.C, ChemElements.H)
+ self.parser = SparkyPeakListReader(self.elements, [(1, 2)])
+ self.file = self.config.getTestFile('Sparky.peaks')
+
+ def testRead(self):
+
+ content = open(self.file).read()
+ spectrum = self.parser.read(content)
+
+ self.assertEqual(len(spectrum), 3)
+
+ self.assertEqual(spectrum.min_intensity, 147454)
+ self.assertEqual(spectrum.max_intensity, 204746)
+
+ self.assertEqual(spectrum.element(0), self.elements[0])
+ self.assertEqual(spectrum.element(1), self.elements[1])
+
+ self.assertEqual(spectrum.dimensions, self.elements)
+ self.assertEqual(spectrum.proton_dimensions, (0, 2))
+ self.assertEqual(spectrum.num_dimensions, 3)
+ self.assertEqual(spectrum.num_proton_dimensions, 2)
+
+ self.assertFalse(spectrum.has_element(ChemElements.Ca))
+ self.assertTrue(spectrum.has_element(ChemElements.C))
+
+ self.assertFalse(spectrum.has_connected_dimensions(0))
+ self.assertEqual(spectrum.connected_dimensions(0), ())
+ self.assertTrue(spectrum.has_connected_dimensions(1))
+ self.assertEqual(spectrum.connected_dimensions(1), (2,))
+ self.assertTrue(spectrum.has_connected_dimensions(2))
+ self.assertEqual(spectrum.connected_dimensions(2), (1,))
+
+ peaks = list(spectrum)
+ self.assertEqual(peaks[0].intensity, 157921)
+ self.assertEqual(peaks[0].get(0), 3.418)
+ self.assertEqual(peaks[0].get(1), 114.437)
+ self.assertEqual(peaks[0].get(2), 7.440)
+
+ def testReadFile(self):
+
+ spectrum = self.parser.read_file(self.file)
+ self.assertEqual(len(spectrum), 3)
+
+ def testReadAll(self):
+
+ spectrum = self.parser.read_all([self.file, self.file])
+ self.assertEqual(len(spectrum), 6)
+
+
+ at test.unit
+class TestXeasyPeakListReader(test.Case):
+
+ def setUp(self):
+
+ super(TestXeasyPeakListReader, self).setUp()
+
+ self.elements = (ChemElements.H, ChemElements.C, ChemElements.H)
+ self.parser = XeasyPeakListReader()
+ self.file = self.config.getTestFile('Xeasy1.peaks')
+
+ def testRead(self):
+
+ content = open(self.file).read()
+ spectrum = self.parser.read(content)
+
+ self.assertEqual(len(spectrum), 3)
+
+ self.assertEqual(spectrum.min_intensity, 1.291120e05)
+ self.assertEqual(spectrum.max_intensity, 4.243830e05)
+
+ self.assertEqual(spectrum.element(0), self.elements[0])
+ self.assertEqual(spectrum.element(1), self.elements[1])
+
+ self.assertEqual(spectrum.dimensions, self.elements)
+ self.assertEqual(spectrum.proton_dimensions, (0, 2))
+ self.assertEqual(spectrum.num_dimensions, 3)
+ self.assertEqual(spectrum.num_proton_dimensions, 2)
+
+ self.assertFalse(spectrum.has_element(ChemElements.Ca))
+ self.assertTrue(spectrum.has_element(ChemElements.C))
+
+ self.assertFalse(spectrum.has_connected_dimensions(0))
+ self.assertEqual(spectrum.connected_dimensions(0), ())
+ self.assertTrue(spectrum.has_connected_dimensions(1))
+ self.assertEqual(spectrum.connected_dimensions(1), (2,))
+ self.assertTrue(spectrum.has_connected_dimensions(2))
+ self.assertEqual(spectrum.connected_dimensions(2), (1,))
+
+ peaks = list(spectrum)
+ self.assertEqual(peaks[0].intensity, 1.565890e05)
+ self.assertEqual(peaks[0].get(0), 7.050)
+ self.assertEqual(peaks[0].get(1), 10.374)
+ self.assertEqual(peaks[0].get(2), 0.889)
+
+ at test.unit
+class TestXeasyPeakListReader2(TestXeasyPeakListReader):
+
+ def setUp(self):
+
+ super(TestXeasyPeakListReader2, self).setUp()
+
+ self.elements = (ChemElements.H, ChemElements.C, ChemElements.H)
+ self.parser = XeasyPeakListReader()
+ self.file = self.config.getTestFile('Xeasy2.peaks')
+
+
+ at test.unit
+class TestXeasyFileBuilder(test.Case):
+
+ def setUp(self):
+ super(TestXeasyFileBuilder, self).setUp()
+
+ def testBuild(self):
+
+ content = self.config.getContent("Xeasy1.peaks")
+ spectrum = XeasyPeakListReader().read(content)
+
+ stream = csb.io.MemoryStream()
+
+ builder = XeasyFileBuilder(stream)
+ builder.add_header(spectrum)
+ builder.add_peaks(spectrum)
+
+ self.assertEqual(stream.getvalue().strip(), content.strip())
+
+
+
+
+if __name__ == '__main__':
+
+ test.Console()
+
+
\ No newline at end of file
diff --git a/csb/test/cases/bio/io/wwpdb/__init__.py b/csb/test/cases/bio/io/wwpdb/__init__.py
index 3614475..6d7dc27 100644
--- a/csb/test/cases/bio/io/wwpdb/__init__.py
+++ b/csb/test/cases/bio/io/wwpdb/__init__.py
@@ -1,11 +1,13 @@
import os
+import sys
import csb.test as test
from csb.bio.io.wwpdb import EntryID, StandardID, DegenerateID, SeqResID, InvalidEntryIDError, HeaderFormatError
+from csb.bio.io.wwpdb import RobustResidueMapper, FastResidueMapper, CombinedResidueMapper, ResidueMappingError, SparseChainSequence
from csb.bio.io.wwpdb import StructureParser, RegularStructureParser, LegacyStructureParser, UnknownPDBResidueError
from csb.bio.io.wwpdb import get, find, FileSystemStructureProvider, RemoteStructureProvider, CustomStructureProvider, StructureNotFoundError
-from csb.bio.sequence import SequenceAlphabets, SequenceTypes
-from csb.bio.structure import ChemElements, SecStructures, Structure
+from csb.bio.sequence import SequenceAlphabets, ProteinAlphabet, SequenceTypes, RichSequence
+from csb.bio.structure import ChemElements, SecStructures, Structure, Chain
@test.regression
@@ -50,9 +52,8 @@ class TestMappingRegressions(test.Case):
self.assertTrue(residue.atoms.length > 0)
for an in residue.atoms:
- self.assertTrue(residue[an]._het)
self.assertTrue(residue[an].vector.tolist())
-
+
def testNonStandardResidueMapping(self):
"""
@see: [CSB 0000052]
@@ -105,15 +106,15 @@ class TestLegacyStructureParser(test.Case):
def testParseModels(self):
ensemble = self.parser.parse_models()
- self.assertEquals(ensemble.models.length, 10)
- self.assertEquals(ensemble[0].model_id, 1)
- self.assertEquals(ensemble.models[1].model_id, 1)
+ self.assertEqual(ensemble.models.length, 10)
+ self.assertEqual(ensemble[0].model_id, 1)
+ self.assertEqual(ensemble.models[1].model_id, 1)
def testParseStructure(self):
structure = self.parser.parse(model=1)
- self.assertEquals(self.parser.parse_structure().model_id, 1)
+ self.assertEqual(self.parser.parse_structure().model_id, 1)
self.assertEqual(structure.accession, '1d3z')
self.assertEqual(structure.model_id, 1)
@@ -130,8 +131,10 @@ class TestLegacyStructureParser(test.Case):
# Residue level
self.assertEqual(len(structure['A'][1:10]), 9)
self.assertEqual(structure['A'][0].type, SequenceAlphabets.Protein.MET)
- self.assertEqual(structure['A'][0]._pdb_name, 'MSE')
- self.assertEqual(structure['A'][1]._pdb_name, 'GLN')
+ self.assertEqual(structure['A'][0].label, 'MSE')
+ self.assertEqual(structure['A'][1].label, 'GLN')
+ self.assertTrue(structure['A'][0].is_modified)
+ self.assertFalse(structure['A'][1].is_modified)
# Atom level
self.assertEqual(structure['A'][1].atoms['CA'].element, None)
@@ -156,8 +159,8 @@ class TestLegacyStructureParser(test.Case):
def testGuessSequenceType(self):
- self.assertEquals(self.parser.guess_sequence_type('AGM'), SequenceTypes.Protein)
- self.assertEquals(self.parser.guess_sequence_type('DOC'), SequenceTypes.NucleicAcid)
+ self.assertEqual(self.parser.guess_sequence_type('AGM'), SequenceTypes.Protein)
+ self.assertEqual(self.parser.guess_sequence_type('DOC'), SequenceTypes.NucleicAcid)
self.assertRaises(UnknownPDBResidueError, self.parser.guess_sequence_type, 'junk')
def testFileName(self):
@@ -197,29 +200,72 @@ class TestRegularStructureParser(test.Case):
super(TestRegularStructureParser, self).setUp()
self.pdb = self.config.getTestFile('1d3z.regular.pdb')
+ self.mapping = self.config.getTestFile('mapping.pdb')
self.parser = RegularStructureParser(self.pdb)
+ def testMapper(self):
+
+ p = RegularStructureParser(self.pdb, mapper=None)
+ self.assertTrue(isinstance(p.mapper, CombinedResidueMapper))
+
+ p.mapper = FastResidueMapper()
+ self.assertTrue(isinstance(p.mapper, FastResidueMapper))
+
+ def testCombinedMapping(self):
+
+ # default mapper
+ c = self.parser.parse(self.mapping)['E']
+ self.assertEqual(c.residues[14].type, ProteinAlphabet.GLU)
+ self.assertEqual(c.residues[15].type, ProteinAlphabet.GLU)
+ self.assertEqual(c.residues[16].type, ProteinAlphabet.THR)
+ self.assertEqual(4, sum([1 for r in c if r.has_structure]))
+
+ # explicit combined mapper
+ self.parser.mapper = CombinedResidueMapper()
+ c = self.parser.parse(self.mapping)['E']
+ self.assertEqual(4, sum([1 for r in c if r.has_structure]))
+
+ def testFastMapping(self):
+
+ self.parser.mapper = FastResidueMapper()
+ self.assertRaises(ResidueMappingError, self.parser.parse, self.mapping)
+
+ mapping2 = self.config.getTestFile('mapping2.pdb')
+
+ c = self.parser.parse(mapping2)['E']
+ self.assertEqual(2, sum([1 for r in c if r.has_structure]))
+
+ def testRobustMapping(self):
+
+ mapping3 = self.config.getTestFile('mapping3.pdb')
+
+ self.parser.mapper = RobustResidueMapper()
+ self.assertRaises(ResidueMappingError, self.parser.parse, mapping3)
+
+ c = self.parser.parse(self.mapping)['E']
+ self.assertEqual(4, sum([1 for r in c if r.has_structure]))
+
def testParseModels(self):
ensemble = self.parser.parse_models()
- self.assertEquals(ensemble.models.length, 10)
- self.assertEquals(ensemble[0].model_id, 1)
- self.assertEquals(ensemble.models[1].model_id, 1)
+ self.assertEqual(ensemble.models.length, 10)
+ self.assertEqual(ensemble[0].model_id, 1)
+ self.assertEqual(ensemble.models[1].model_id, 1)
self.assertRaises(ValueError, self.parser.parse_models, (999, 1000))
pdb = self.config.getTestFile('3p1u.pdb')
ensemble = RegularStructureParser(pdb).parse_models()
- self.assertEquals(ensemble.models.length, 1)
- self.assertEquals(ensemble[0].model_id, 1)
- self.assertEquals(ensemble[0].resolution, 2.05)
+ self.assertEqual(ensemble.models.length, 1)
+ self.assertEqual(ensemble[0].model_id, 1)
+ self.assertEqual(ensemble[0].resolution, 2.05)
def testParseStructure(self):
structure = self.parser.parse(model=2)
- self.assertEquals(self.parser.parse_structure().model_id, 1)
- self.assertEquals(structure.resolution, None)
+ self.assertEqual(self.parser.parse_structure().model_id, 1)
+ self.assertEqual(structure.resolution, None)
self.assertEqual(structure.accession, '1d3z')
self.assertEqual(structure.model_id, 2)
@@ -228,6 +274,7 @@ class TestRegularStructureParser(test.Case):
self.assertEqual(structure.chains.length, 1)
self.assertEqual(len(structure.chains), 1)
self.assertEqual(structure.chains['A'].sequence, 'MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG')
+ self.assertEqual(structure.chains['A'].sequence, ''.join([str(r.type) for r in structure.chains['A'] if r.has_structure]))
ss = structure.chains['A'].secondary_structure
self.assertEqual(ss.to_string(), '-EEEEE-----EEEEE-----HHHHHHHHHHHHHH-HHH-EEEEE--EE------HHHHH-----EEEEEE')
@@ -241,13 +288,12 @@ class TestRegularStructureParser(test.Case):
# Residue level
self.assertEqual(len(structure['A'][1:10]), 9)
self.assertEqual(structure['A'][0].type, SequenceAlphabets.Protein.MET)
- self.assertEqual(structure['A'][0]._pdb_name, 'MSE')
- self.assertEqual(structure['A'][1]._pdb_name, 'GLN')
+ self.assertEqual(structure['A'][0].label, 'MSE')
+ self.assertEqual(structure['A'][1].label, 'GLN')
# Atom
vector = [52.647, -87.443, 9.674]
self.assertEqual(structure['A'][0]['CA'].vector.tolist(), vector)
- self.assertEqual(structure['A'][0]['CA']._het, True)
def testParseResidue(self):
@@ -264,8 +310,8 @@ class TestRegularStructureParser(test.Case):
def testGuessSequenceType(self):
- self.assertEquals(self.parser.guess_sequence_type('AGM'), SequenceTypes.Protein)
- self.assertEquals(self.parser.guess_sequence_type('DOC'), SequenceTypes.NucleicAcid)
+ self.assertEqual(self.parser.guess_sequence_type('AGM'), SequenceTypes.Protein)
+ self.assertEqual(self.parser.guess_sequence_type('DOC'), SequenceTypes.NucleicAcid)
self.assertRaises(UnknownPDBResidueError, self.parser.guess_sequence_type, 'junk')
def testFileName(self):
@@ -276,6 +322,100 @@ class TestRegularStructureParser(test.Case):
@test.unit
+class TestFastResidueMapper(test.Case):
+
+ def setUp(self):
+ super(TestFastResidueMapper, self).setUp()
+ self.mapper = FastResidueMapper()
+
+ def _build(self, string):
+
+ id = str(hash(string))
+ seq = RichSequence(id, "", string, SequenceTypes.Protein)
+
+ return SparseChainSequence.create(Chain.from_sequence(seq))
+
+ def testMap(self):
+
+ ref = self._build("ZABCD")
+ sparse = self._build("AC")
+
+ self.assertRaises(ResidueMappingError, self.mapper.map, sparse, ref)
+
+ sparse.residues[2].id = (22, None)
+ result = self.mapper.map(sparse, ref)
+
+ self.assertEqual(result.sequence, "-A-C-")
+
+ def testModifiedResidueMapping(self):
+ """
+ Strictly speaking, this is a regression test. But it so essential that
+ we should keep it here.
+
+ @see: [csb: 19]
+ """
+ pdb = self.config.getTestFile('modified.pdb')
+
+ structure = StructureParser(pdb, mapper=self.mapper).parse_structure()
+ chain = structure.first_chain
+
+ self.assertFalse(chain.residues[1].has_structure)
+ self.assertEqual(chain.residues[1].label, "MET")
+
+ self.assertTrue(chain.residues[19].has_structure)
+ self.assertEqual(chain.residues[19].label, "MSE")
+
+
+ at test.unit
+class TestRobustResidueMapper(TestFastResidueMapper):
+
+ def setUp(self):
+ super(TestRobustResidueMapper, self).setUp()
+ self.mapper = RobustResidueMapper()
+
+ def testMap(self):
+
+ ref = self._build("ABCD")
+ sparse = self._build("EF")
+
+ self.assertRaises(ResidueMappingError, self.mapper.map, sparse, ref)
+
+ ref = self._build("ZABCD")
+ sparse = self._build("AC")
+ result = self.mapper.map(sparse, ref)
+
+ self.assertEqual(result.sequence, "-A-C-")
+
+ def testModifiedResidueMapping(self):
+
+ pdb = self.config.getTestFile('modified2.pdb')
+
+ structure = StructureParser(pdb, mapper=self.mapper).parse_structure()
+ chain = structure.first_chain
+
+ self.assertTrue(chain.residues[1].has_structure)
+ self.assertEqual(chain.residues[1].label, "MSE")
+
+ self.assertFalse(chain.residues[19].has_structure)
+ self.assertEqual(chain.residues[19].label, "MET")
+
+
+ at test.unit
+class TestCombinedResidueMapper(TestFastResidueMapper):
+
+ def setUp(self):
+ super(TestCombinedResidueMapper, self).setUp()
+ self.mapper = CombinedResidueMapper()
+
+ def testMap(self):
+
+ ref = self._build("ZABCD")
+ sparse = self._build("AC")
+ result = self.mapper.map(sparse, ref)
+
+ self.assertEqual(result.sequence, "-A-C-")
+
+ at test.unit
class TestFileSystemProvider(test.Case):
def setUp(self):
@@ -485,6 +625,7 @@ def TestPDB():
try:
StructureParser(self.entry).parse_structure()
except:
+ sys.stdout.write("\n{0}\n".format(self.entry))
self.reRaise([self.entry])
var = 'PDBMASK'
@@ -499,6 +640,7 @@ def TestPDB():
return suite
+
if __name__ == '__main__':
-
+
test.Console()
diff --git a/csb/test/cases/bio/nmr/__init__.py b/csb/test/cases/bio/nmr/__init__.py
index 65de64d..7c5b4ea 100644
--- a/csb/test/cases/bio/nmr/__init__.py
+++ b/csb/test/cases/bio/nmr/__init__.py
@@ -1,37 +1,45 @@
-import csb.bio.nmr as nmr
-import csb.bio.sequence as sequence
import csb.test as test
+from csb.bio.nmr import RandomCoil, AtomConnectivity, ContactMap, Filters, Label
+from csb.bio.nmr import ChemShiftInfo, ChemShiftScoringModel, NOESpectrum, NOEPeak
+from csb.bio.nmr import InvalidResidueError, EntityNotSupportedError
+from csb.bio.sequence import ProteinAlphabet
+from csb.bio.structure import ChemElements, Atom
+
+
+def get_chain():
+ s = test.Config().getPickle('1nz9.model1.pickle')
+ return s.first_chain.subregion(1, 5, clone=True)
+
@test.unit
class TestRandomCoil(test.Case):
def setUp(self):
-
+
super(TestRandomCoil, self).setUp()
- self.rc = nmr.RandomCoil.get()
- self.chain = self.config.getPickle('1nz9.model1.pickle').first_chain
+ self.rc = RandomCoil.get()
+ self.chain = get_chain()
self.residue = self.chain.residues[2]
def testFactory(self):
- self.assertTrue(nmr.RandomCoil.get() is nmr.RandomCoil.get())
+ self.assertTrue(RandomCoil.get() is RandomCoil.get())
def testSimpleSecondaryShift(self):
raw = 200.0
- for r in ['A', 'ALA', sequence.SequenceAlphabets.Protein.ALA]:
-
+ for r in ['A', 'ALA', ProteinAlphabet.ALA]:
self.assertEqual(
self.rc.simple_secondary_shift(r, 'N', raw),
raw - 125)
self.assertRaises(
- nmr.InvalidResidueError,
+ InvalidResidueError,
lambda:self.rc.simple_secondary_shift('$', 'N', 0))
self.assertRaises(
- nmr.EntityNotSupportedError,
+ EntityNotSupportedError,
lambda:self.rc.simple_secondary_shift('A', '$', 0))
def testSecondaryShift(self):
@@ -39,11 +47,329 @@ class TestRandomCoil(test.Case):
raw = 200.0
for r in [self.residue, self.residue.rank, self.residue.id]:
-
self.assertEqual(
self.rc.secondary_shift(self.chain, r, 'H', raw),
raw - (8.44 + 0.07 - 0.05 - 0.01))
+
+
+ at test.unit
+class TestAtomConnectivity(test.Case):
+
+ def setUp(self):
+ super(TestAtomConnectivity, self).setUp()
+ self.ac = AtomConnectivity.get()
+
+ def testFactory(self):
+ self.assertTrue(AtomConnectivity.get() is AtomConnectivity.get())
+
+ def testConnected(self):
+
+ self.assertTrue(self.ac.connected(ProteinAlphabet.CYS, "SG", "HG"))
+ self.assertTrue(self.ac.connected(ProteinAlphabet.CYS, "SG", "CB"))
+ self.assertFalse(self.ac.connected(ProteinAlphabet.CYS, "SG", "H"))
+
+ self.assertTrue(self.ac.connected(ProteinAlphabet.CYS, "CA", "C"))
+ self.assertTrue(self.ac.connected(ProteinAlphabet.CYS, "CA", "HA"))
+ self.assertTrue(self.ac.connected(ProteinAlphabet.CYS, "CA", "CB"))
+ self.assertFalse(self.ac.connected(ProteinAlphabet.CYS, "CA", "HB"))
+
+ def testConnectedAtoms(self):
+
+ partners = self.ac.connected_atoms(ProteinAlphabet.CYS, "CA")
+ self.assertTrue(partners, ("N", "C", "CB", "HA"))
+
+ partners = self.ac.connected_atoms(ProteinAlphabet.CYS, "SG")
+ self.assertTrue(partners, ("CB"))
+
+ def testContains(self):
+
+ self.assertTrue(self.ac.contains(ProteinAlphabet.CYS, "SG"))
+ self.assertTrue(self.ac.contains(ProteinAlphabet.ASP, "CG"))
+ self.assertFalse(self.ac.contains(ProteinAlphabet.ALA, "CG"))
+
+ def testGetAtoms(self):
+
+ atoms = frozenset(['C', 'HD2', 'CB', 'CA', 'CG', 'O', 'N',
+ 'H', 'OD1', 'HA', 'OD2', 'HB3', 'HB2'])
+ self.assertEqual(self.ac.get_atoms(ProteinAlphabet.ASP), atoms)
+
+
+ at test.unit
+class TestContactMap(test.Case):
+
+ CHAIN = get_chain()
+ MAP = ContactMap(CHAIN, cutoff=1.75, filter=Filters.HYDROGENS)
+ MAP.build()
+
+ def setUp(self):
+ super(TestContactMap, self).setUp()
+ self.chain = TestContactMap.CHAIN
+ self.cm = TestContactMap.MAP
+
+ def testBuild(self):
+ self.assertEqual(len(self.cm.atoms), 38)
+
+ def testContains(self):
+ self.assertFalse(self.chain[0]['C'] in self.cm)
+ self.assertTrue(self.chain[0]['HA'] in self.cm)
+
+ def testIterator(self):
+ self.assertEqual(list(self.cm), list(self.cm.contacts))
+
+ def testCutoff(self):
+ self.assertEqual(self.cm.cutoff, 1.75)
+
+ def testChain(self):
+ self.assertTrue(self.cm.chain is self.chain)
+
+ def testAtoms(self):
+ self.assertTrue(self.cm.atoms[0] is self.chain[0]['H1'])
+ self.assertTrue(self.cm.atoms[-1] is self.chain[-1]['HZ'])
+
+ def testContacts(self):
+
+ c = self.chain
+ contacts = set([
+ (c[0]['H2'], c[0]['H3']),
+ (c[0]['H2'], c[0]['H1']),
+ (c[0]['H1'], c[0]['H3']),
+ (c[1]['HE22'], c[1]['HE21']) ])
+
+ self.assertEqual(len(contacts), len(set(self.cm.contacts)))
+ for a1, a2 in contacts:
+ self.assertTrue((a1, a2) in contacts or (a2, a1) in contacts)
+
+ def testConnect(self):
+ # covers also cm.connected()
+
+ c = self.chain
+ cm = ContactMap(TestContactMap.CHAIN, cutoff=1.75, filter=Filters.CALPHAS)
+
+ self.assertFalse(cm.connected(c[0]['CA'], c[1]['CA']))
+ cm.connect(c[0]['CA'], c[1]['CA'])
+ self.assertTrue(cm.connected(c[0]['CA'], c[1]['CA']))
+ self.assertTrue(cm.connected(c[1]['CA'], c[0]['CA']))
+
+ def testConnected(self):
+ c = self.chain
+ self.assertTrue(self.cm.connected(c[0]['H3'], c[0]['H2']))
+ self.assertTrue(self.cm.connected(c[0]['H2'], c[0]['H3']))
+
+ def testAtomContacts(self):
+ atoms = set([self.chain[0]['H2'], self.chain[0]['H3']])
+ self.assertEqual(self.cm.atom_contacts(self.chain[0]['H1']), atoms)
+
+ def testResidueContacts(self):
+ residues = set([self.chain[0]])
+ self.assertEqual(self.cm.residue_contacts(self.chain[0]), residues)
+
+ def testPosition(self):
+ self.assertEqual(self.cm.position(1, 'H1'), 1.0)
+ self.assertEqual(self.cm.position(2, 'HA'), 2.125)
+
+ def testCompare(self):
+
+ ci = ContactMap.compare(self.cm, self.cm, 0)
+ self.assertEqual(ci.precision, 1)
+ self.assertEqual(ci.coverage, 1)
+
+ ci = ContactMap.compare(self.cm, self.cm, 1)
+ self.assertEqual(ci.precision, 0)
+ self.assertEqual(ci.coverage, 0)
+
+
+ at test.unit
+class TestLabel(test.Case):
+
+ def testBuild(self):
+ self.assertEqual(Label.build(ProteinAlphabet.ALA, 2, 'CA'), "A#2:CA")
+
+ def testFromShift(self):
+ shift = ChemShiftInfo(2, ProteinAlphabet.ALA, 'CA', ChemElements.C, 12)
+ self.assertEqual(Label.from_shift(shift), "A#2:CA")
+
+ def testFromAtom(self):
+ atom = get_chain()[1]['CA']
+ self.assertEqual(Label.from_atom(atom), "Q#2:CA")
+
+ def testGetAtom(self):
+ chain = get_chain()
+ self.assertEqual(Label.get_atom(chain, "Q#2:CA"), chain[1]['CA'])
+
+ def testMatch(self):
+ atom = get_chain()[1]['CA']
+ shift = ChemShiftInfo(2, ProteinAlphabet.GLN, 'CA', ChemElements.C, 12)
+ self.assertTrue(Label.match(shift, atom))
+ def testParse(self):
+ self.assertEqual(Label.parse("Q#2:CA"), ("Q", 2, 'CA'))
+
+ def testFromString(self):
+
+ label = Label.from_string("Q#2:CA")
+
+ self.assertEqual(label.residue, ProteinAlphabet.GLN)
+ self.assertEqual(label.rank, 2)
+ self.assertEqual(label.atom_name, 'CA')
+
+
+ at test.unit
+class TestChemShiftInfo(test.Case):
+
+ def testConstructor(self):
+
+ shift = ChemShiftInfo(2, 'ALA', 'CA', 'C', 12)
+
+ self.assertEqual(shift.element, ChemElements.C)
+ self.assertEqual(shift.residue, ProteinAlphabet.ALA)
+ self.assertEqual(shift.position, 2)
+ self.assertEqual(shift.shift, 12)
+
+ def testLabel(self):
+
+ shift = ChemShiftInfo(2, 'ALA', 'CA', 'C', 12)
+ self.assertEqual(shift.label, Label.from_shift(shift))
+
+
+ at test.unit
+class TestChemShiftScoringModel(test.Case):
+
+ def setUp(self):
+ super(TestChemShiftScoringModel, self).setUp()
+ self.model = ChemShiftScoringModel()
+
+ def testPositive(self):
+ self.assertAlmostEqual(self.model.positive('CA', 1), 0.191, places=3)
+ self.assertAlmostEqual(self.model.positive('CA', [1, 1])[1], 0.191, places=3)
+
+ def testNegative(self):
+ self.assertAlmostEqual(self.model.negative('CA', 1), 0.127, places=3)
+ self.assertAlmostEqual(self.model.negative('CA', [1, 1])[1], 0.127, places=3)
+
+ def testScore(self):
+ self.assertAlmostEqual(self.model.score('CA', 1), 0.588, places=3)
+ self.assertAlmostEqual(self.model.score('CA', [1, 1])[1], 0.588, places=3)
+
+
+ at test.unit
+class TestNOESpectrum(test.Case):
+
+ def setUp(self):
+ super(TestNOESpectrum, self).setUp()
+
+ self.elements = (ChemElements.H, ChemElements.C, ChemElements.H)
+ self.spectrum = NOESpectrum(self.elements)
+ self.spectrum.connect(1, 2)
+ self.spectrum.add(11, [1, 2, 3])
+ self.spectrum.add(12, [11, 22, 33])
+
+ def testGetitem(self):
+ self.assertEqual(self.spectrum[0].intensity, 11)
+ self.assertRaises(IndexError, lambda i: self.spectrum[i], 3)
+
+ def testLen(self):
+ self.assertEqual(len(self.spectrum), 2)
+
+ def testMinIntensity(self):
+ self.assertEqual(self.spectrum.min_intensity, 11)
+
+ def testMaxIntensity(self):
+ self.assertEqual(self.spectrum.max_intensity, 12)
+
+ def testElement(self):
+ self.assertEqual(self.spectrum.element(0), self.elements[0])
+ self.assertEqual(self.spectrum.element(1), self.elements[1])
+ self.assertRaises(IndexError, self.spectrum.element, 3)
+
+ def testDimensions(self):
+ self.assertEqual(self.spectrum.dimensions, self.elements)
+
+ def testProtonDimensions(self):
+ self.assertEqual(self.spectrum.proton_dimensions, (0, 2))
+
+ def testNumDimensions(self):
+ self.assertEqual(self.spectrum.num_dimensions, 3)
+
+ def testNumProtonDimensions(self):
+ self.assertEqual(self.spectrum.num_proton_dimensions, 2)
+
+ def testHasElement(self):
+ self.assertFalse(self.spectrum.has_element(ChemElements.Ca))
+ self.assertTrue(self.spectrum.has_element(ChemElements.C))
+
+ def testHasConnectedDimensions(self):
+ self.assertFalse(self.spectrum.has_connected_dimensions(0))
+ self.assertTrue(self.spectrum.has_connected_dimensions(1))
+ self.assertTrue(self.spectrum.has_connected_dimensions(2))
+
+ def testConnectedDimensions(self):
+ self.assertEqual(self.spectrum.connected_dimensions(0), ())
+ self.assertEqual(self.spectrum.connected_dimensions(1), (2,))
+ self.assertEqual(self.spectrum.connected_dimensions(2), (1,))
+
+ def testConnect(self):
+ self.assertRaises(ValueError, self.spectrum.connect, 0, 2) # H-H
+ self.assertRaises(IndexError, self.spectrum.connect, 0, 3)
+
+ def testIterator(self):
+
+ peaks = list(self.spectrum)
+
+ self.assertEqual(peaks[0].intensity, 11)
+ self.assertEqual(peaks[0].get(0), 1)
+ self.assertEqual(peaks[0].get(1), 2)
+ self.assertEqual(peaks[0].get(2), 3)
+
+
+ at test.unit
+class TestNOEPeak(test.Case):
+
+ def setUp(self):
+ super(TestNOEPeak, self).setUp()
+
+ self.elements = (ChemElements.H, ChemElements.C, ChemElements.H)
+ self.spectrum = NOESpectrum(self.elements)
+ self.spectrum.connect(1, 2)
+ self.spectrum.add(11, [1, 2, 3])
+ self.spectrum.add(12, [11, 22, 33])
+
+ self.peaks = list(self.spectrum)
+
+ def testIntensity(self):
+ self.assertEqual(self.peaks[0].intensity, 11)
+ self.assertEqual(self.peaks[1].intensity, 12)
+
+ def testNumDimensions(self):
+ self.assertEqual(self.peaks[0].num_dimensions, 3)
+
+ def testHasConnectedDimensions(self):
+ self.assertFalse(self.peaks[0].has_connected_dimensions(0))
+ self.assertTrue(self.peaks[0].has_connected_dimensions(1))
+ self.assertTrue(self.peaks[0].has_connected_dimensions(2))
+
+ def testConnectedDimensions(self):
+ self.assertEqual(self.peaks[0].connected_dimensions(0), ())
+ self.assertEqual(self.peaks[0].connected_dimensions(1), (2,))
+ self.assertEqual(self.peaks[0].connected_dimensions(2), (1,))
+
+ def testElement(self):
+ self.assertEqual(self.peaks[0].element(0), self.elements[0])
+ self.assertEqual(self.peaks[0].element(1), self.elements[1])
+
+ def testHasElement(self):
+ self.assertTrue(self.peaks[0].has_element(self.elements[0]))
+ self.assertTrue(self.peaks[0].has_element(self.elements[1]))
+
+ def testGet(self):
+ self.assertEqual(self.peaks[0].get(0), 1)
+ self.assertEqual(self.peaks[0][0], 1)
+ self.assertEqual(self.peaks[1].get(1), 22)
+ self.assertEqual(self.peaks[1][1], 22)
+
+ self.assertRaises(IndexError, lambda i: self.peaks[0][i], 4)
+ self.assertRaises(IndexError, self.peaks[0].get, -1)
+
+
if __name__ == '__main__':
test.Console()
diff --git a/csb/test/cases/bio/sequence/alignment/__init__.py b/csb/test/cases/bio/sequence/alignment/__init__.py
new file mode 100644
index 0000000..fa7e33d
--- /dev/null
+++ b/csb/test/cases/bio/sequence/alignment/__init__.py
@@ -0,0 +1,196 @@
+import csb.test as test
+
+from csb.bio.sequence import RichSequence, SequenceTypes
+from csb.bio.sequence.alignment import IdentityMatrix, SimilarityMatrix
+from csb.bio.sequence.alignment import GlobalAlignmentAlgorithm, LocalAlignmentAlgorithm, AlignmentResult
+
+
+ at test.unit
+class TestIdentityMatrix(test.Case):
+
+ def setUp(self):
+
+ super(TestIdentityMatrix, self).setUp()
+ self.matrix = IdentityMatrix(2, -3)
+
+ def testScore(self):
+ self.assertEqual(self.matrix.score("a", "a"), 2)
+ self.assertEqual(self.matrix.score("a", "b"), -3)
+
+ at test.unit
+class TestSimilarityMatrix(test.Case):
+
+ def setUp(self):
+
+ super(TestSimilarityMatrix, self).setUp()
+ self.matrix = SimilarityMatrix(SimilarityMatrix.BLOSUM62)
+
+ def testScore(self):
+ self.assertEqual(self.matrix.score("A", "A"), 4)
+ self.assertEqual(self.matrix.score("A", "R"), -1)
+ self.assertEqual(self.matrix.score("R", "A"), -1)
+
+
+ at test.unit
+class TestGlobalAlignmentAlgorithm(test.Case):
+
+ def setUp(self):
+
+ super(TestGlobalAlignmentAlgorithm, self).setUp()
+
+ self.seq1 = RichSequence('s1', '', 'CCABBBCBBCABAABCCEAAAAAAAAAAAAFAA', SequenceTypes.Protein)
+ self.seq2 = RichSequence('s1', '', 'AZCBBABAABCCEF', SequenceTypes.Protein)
+ self.algorithm = GlobalAlignmentAlgorithm(scoring=IdentityMatrix(1, -1), gap=0)
+
+ def testAlign(self):
+
+ ali = self.algorithm.align(self.seq1, self.seq2)
+
+ self.assertEqual(ali.query.sequence, "CCA-BBBCBBCABAABCCEAAAAAAAAAAAAFAA")
+ self.assertEqual(ali.subject.sequence, "--AZ---CBB-ABAABCCE------------F--")
+
+ self.assertEqual(ali.query.residues[3], self.seq1.residues[3])
+ self.assertTrue(ali.query.residues[3] is self.seq1.residues[3])
+
+ self.assertEqual(ali.qstart, 1)
+ self.assertEqual(ali.qend, 33)
+ self.assertEqual(ali.start, 1)
+ self.assertEqual(ali.end, 14)
+
+ self.assertEqual(ali.length, 34)
+ self.assertEqual(ali.gaps, 21)
+ self.assertEqual(ali.identicals, 13)
+ self.assertEqual(ali.identity, 13 / 34.0 )
+ self.assertEqual(ali.score, 13)
+
+ def testEmptyAlignment(self):
+
+ seq1 = RichSequence('s1', '', 'AAAA', SequenceTypes.Protein)
+ seq2 = RichSequence('s2', '', 'BBBB', SequenceTypes.Protein)
+
+ ali = self.algorithm.align(seq1, seq2)
+ self.assertTrue(ali.is_empty)
+
+ at test.unit
+class TestLocalAlignmentAlgorithm(test.Case):
+
+ def setUp(self):
+
+ super(TestLocalAlignmentAlgorithm, self).setUp()
+
+ self.seq1 = RichSequence('s1', '', 'CCABBBCBBCABAABCCEAAAAAAAAAAAAFAA', SequenceTypes.Protein)
+ self.seq2 = RichSequence('s1', '', 'AZCBBABAACBCCEF', SequenceTypes.Protein)
+ self.algorithm = LocalAlignmentAlgorithm(scoring=IdentityMatrix(1, -1), gap=-1)
+
+ def testAlign(self):
+
+ ali = self.algorithm.align(self.seq1, self.seq2)
+
+ self.assertEqual(ali.query.sequence, "CBBCABAA-BCCE")
+ self.assertEqual(ali.subject.sequence, "CBB-ABAACBCCE")
+
+ self.assertEqual(ali.qstart, 7)
+ self.assertEqual(ali.qend, 18)
+ self.assertEqual(ali.start, 3)
+ self.assertEqual(ali.end, 14)
+
+ self.assertEqual(ali.length, 13)
+ self.assertEqual(ali.gaps, 2)
+ self.assertEqual(ali.identicals, 11)
+ self.assertEqual(ali.identity, 11 / 13.0 )
+ self.assertEqual(ali.score, 9)
+
+ def testEmptyAlignment(self):
+
+ seq1 = RichSequence('s1', '', 'AAAA', SequenceTypes.Protein)
+ seq2 = RichSequence('s2', '', 'BBBB', SequenceTypes.Protein)
+
+ ali = self.algorithm.align(seq1, seq2)
+ self.assertTrue(ali.is_empty)
+
+
+ at test.unit
+class TestAlignmentResult(test.Case):
+
+ def setUp(self):
+
+ super(TestAlignmentResult, self).setUp()
+
+ self.seq1 = RichSequence('s1', '', 'AB-D', SequenceTypes.Protein)
+ self.seq2 = RichSequence('s2', '', 'A-CD', SequenceTypes.Protein)
+ self.ali = AlignmentResult(5.5, self.seq1, self.seq2, 10, 12, 20, 22)
+
+ self.es = RichSequence('s1', '', '')
+ self.empty = AlignmentResult(0, self.es, self.es, 0, 0, 0, 0)
+
+ def testConstructor(self):
+
+ self.assertRaises(ValueError, AlignmentResult, 1, self.es, self.es, 0, 0, 0, 0)
+ self.assertRaises(ValueError, AlignmentResult, 0, self.es, self.es, 1, 0, 0, 0)
+ self.assertRaises(ValueError, AlignmentResult, 0, self.es, self.es, 0, 1, 0, 0)
+ self.assertRaises(ValueError, AlignmentResult, 0, self.es, self.es, 0, 0, 1, 0)
+ self.assertRaises(ValueError, AlignmentResult, 0, self.es, self.es, 0, 0, 0, 1)
+
+ self.assertRaises(ValueError, AlignmentResult, 1, self.seq1, self.seq2, 0, 0, 0, 0)
+
+ def testStr(self):
+
+ string = r"""
+ 10 AB-D 12
+ 20 A-CD 22 """.strip("\r\n")
+ self.assertEqual(string, str(self.ali))
+
+ def testAlignment(self):
+
+ ali = self.ali.alignment()
+ self.assertEqual(ali.rows[1].sequence, self.seq1.sequence)
+ self.assertEqual(ali.rows[2].sequence, self.seq2.sequence)
+
+ def testQuery(self):
+ self.assertEqual(self.ali.query.sequence, self.seq1.sequence)
+ self.assertEqual(self.ali.query.residues[2], self.seq1.residues[2])
+ self.assertTrue(self.ali.query.residues[2] is self.seq1.residues[2])
+
+ def testSubject(self):
+ self.assertEqual(self.ali.subject.sequence, self.seq2.sequence)
+ self.assertEqual(self.ali.subject.residues[3], self.seq2.residues[3])
+ self.assertTrue(self.ali.subject.residues[3] is self.seq2.residues[3])
+
+ def testQstart(self):
+ self.assertEqual(self.ali.qstart, 10)
+
+ def testQend(self):
+ self.assertEqual(self.ali.qend, 12)
+
+ def testStart(self):
+ self.assertEqual(self.ali.start, 20)
+
+ def testEnd(self):
+ self.assertEqual(self.ali.end, 22)
+
+ def testLength(self):
+ self.assertEqual(self.ali.length, 4)
+
+ def testScore(self):
+ self.assertEqual(self.ali.score, 5.5)
+
+ def testGaps(self):
+ self.assertEqual(self.ali.gaps, 2)
+
+ def testIdenticals(self):
+ self.assertEqual(self.ali.identicals, 2)
+
+ def testIdentity(self):
+ self.assertEqual(self.ali.identity, 0.5)
+
+ def testIsEmpty(self):
+ self.assertFalse(self.ali.is_empty)
+
+ es = RichSequence('s1', '', '')
+ empty = AlignmentResult(0, es, es, 0, 0, 0, 0)
+ self.assertTrue(empty.is_empty)
+
+
+if __name__ == '__main__':
+
+ test.Console()
diff --git a/csb/test/cases/bio/structure/__init__.py b/csb/test/cases/bio/structure/__init__.py
index f2e722d..8bca0fe 100644
--- a/csb/test/cases/bio/structure/__init__.py
+++ b/csb/test/cases/bio/structure/__init__.py
@@ -285,7 +285,7 @@ class TestStructure(test.Case):
self.assertFalse('A' in self.structure2.chains)
for c in ['.', 'A']:
- self.assertRaises(csb.core.ItemNotFoundError, self.structure2.chains.remove, c)
+ self.assertRaises(structure.ChainNotFoundError, self.structure2.chains.remove, c)
def testAccession(self):
@@ -356,6 +356,19 @@ class TestStructure(test.Case):
with self.config.getTempStream() as tmp:
self.structure.to_pdb(tmp.name)
self.assertEqual(pdbraw, open(tmp.name).read())
+
+ def testFromChain(self):
+ new_structure = structure.Structure.from_chain(self.structure.first_chain)
+
+ self.assertEqual(new_structure.chains.length,1)
+ self.assertEqual(new_structure.first_chain.get_coordinates(('CA',)).tolist(),
+ self.structure.first_chain.get_coordinates(('CA',)).tolist(),)
+
+ self.assertEqual(new_structure.first_chain.sequence,
+ self.structure.first_chain.sequence)
+
+
+
@test.unit
class TestChain(test.Case):
@@ -464,8 +477,8 @@ class TestChain(test.Case):
self.assertEqual(self.chain.find('127', 'X'), self.chain[0])
self.chain[0].id = 127, None
- self.assertRaises(csb.core.ItemNotFoundError, self.chain.find, 127, 'X')
- self.assertRaises(csb.core.ItemNotFoundError, self.chain.find, 999999)
+ self.assertRaises(structure.EntityNotFoundError, self.chain.find, 127, 'X')
+ self.assertRaises(structure.EntityNotFoundError, self.chain.find, 999999)
def testComputeTorsion(self):
@@ -683,8 +696,16 @@ class TestResidue(test.Case):
self.assertEqual(self.residue.insertion_code, None)
def testType(self):
- self.assertEqual(self.residue.type, structure.SequenceAlphabets.Protein.ALA)
+ self.assertEqual(self.residue.type, structure.SequenceAlphabets.Protein.ALA)
+
+ def testIsModified(self):
+ self.assertFalse(self.residue.is_modified)
+ self.assertTrue(self.chain[37].is_modified)
+ def testLabel(self):
+ self.assertEqual(self.residue.label, repr(self.residue.type))
+ self.assertTrue(self.chain[37].label, 'MSE')
+
def testTorsion(self):
self.assertEqual(self.residue.torsion.phi, None)
self.assertNotEqual(self.residue.torsion.psi, None)
diff --git a/csb/test/cases/core/__init__.py b/csb/test/cases/core/__init__.py
index c48dd4e..8eaebb7 100644
--- a/csb/test/cases/core/__init__.py
+++ b/csb/test/cases/core/__init__.py
@@ -17,7 +17,7 @@ class TestDeepCopy(test.Case):
obj = ['X']
copy = utils.deepcopy(obj, recursion=(rec + 1))
- self.assertEquals(obj, copy)
+ self.assertEqual(obj, copy)
self.assertNotEquals(id(obj), id(copy))
@test.unit
@@ -54,7 +54,7 @@ class TestEnum(test.Case):
result = list(self.enum)
members = [self.enum.A, self.enum.B, self.enum.C]
- self.assertEquals(set(result), set(members))
+ self.assertEqual(set(result), set(members))
def testInOperator(self):
self.assertTrue(self.enum.C in self.enum)
@@ -85,7 +85,7 @@ class TestEnum(test.Case):
def testMembers(self):
result = utils.Enum.members(self.enum)
members = [self.enum.A, self.enum.B, self.enum.C]
- self.assertEquals(set(result), set(members))
+ self.assertEqual(set(result), set(members))
def testCreate(self):
new = utils.Enum.create('NewEnum', A=11, B=22)
@@ -96,15 +96,15 @@ class TestEnum(test.Case):
result = utils.Enum.names(self.enum)
names = map(repr, [self.enum.A, self.enum.B, self.enum.C])
names2 = map(operator.attrgetter('name'), [self.enum.A, self.enum.B, self.enum.C])
- self.assertEquals(set(result), set(names))
- self.assertEquals(set(result), set(names2))
+ self.assertEqual(set(result), set(names))
+ self.assertEqual(set(result), set(names2))
def testValues(self):
result = utils.Enum.values(self.enum)
values = map(int, [self.enum.A, self.enum.B, self.enum.C])
values2 = map(operator.attrgetter('value'), [self.enum.A, self.enum.B, self.enum.C])
- self.assertEquals(set(result), set(values))
- self.assertEquals(set(result), set(values2))
+ self.assertEqual(set(result), set(values))
+ self.assertEqual(set(result), set(values2))
def testParseValue(self):
item = utils.Enum.parse(self.enum, 66)
@@ -140,7 +140,7 @@ class TestDictionaryContainer(test.Case):
super(TestDictionaryContainer, self).setUp()
- self.dict = utils.OrderedDict({'A': 1, 'B': 2})
+ self.dict = utils.OrderedDict([('A', 1), ('B', 2)])
self.keys = ('A', 'B', 'C', 'D', 'Z')
self.new = utils.DictionaryContainer
self.test = utils.DictionaryContainer(items=self.dict, restrict=self.keys)
@@ -186,7 +186,7 @@ class TestDictionaryContainer(test.Case):
new._update({'A': 7})
self.assertTrue('A' in new)
- self.assertEquals(new['A'], 7)
+ self.assertEqual(new['A'], 7)
self.assertRaises(utils.ItemNotFoundError, new._update, {'Z': 0})
@@ -251,7 +251,7 @@ class TestCollectionContainer(test.Case):
new._update([98, 99])
self.assertTrue(98 in new)
- self.assertEquals(new.length, 2)
+ self.assertEqual(new.length, 2)
self.assertRaises(TypeError, new._update, ['S'])
@@ -336,7 +336,7 @@ class TestOrderedDict(test.Case):
items = [('G', 4), ('A', 2), ('C', 8), ('B', 7)]
odict = utils.OrderedDict(items)
- self.assertEquals(list(odict.items()), items)
+ self.assertEqual(list(odict.items()), items)
for i, k in enumerate(odict):
self.assertEqual(k, items[i][0])
diff --git a/csb/test/cases/io/__init__.py b/csb/test/cases/io/__init__.py
index 338e067..e303a70 100644
--- a/csb/test/cases/io/__init__.py
+++ b/csb/test/cases/io/__init__.py
@@ -1,5 +1,6 @@
import os
+import sys
import numpy
import types
import csb.io
@@ -18,16 +19,24 @@ class TestShell(test.Case):
def setUp(self):
super(TestShell, self).setUp()
- self.output = csb.io.Shell.run('echo TeST')
+
+ cmd = '{0.python} -c "print({0.text})"'
+
+ self.python = sys.executable.replace("\\", "/") # "\" fails on win7
+ self.text = "123"
+ self.output = csb.io.Shell.run(cmd.format(self))
+
+ if not self.python:
+ self.skipTest("Can't get interpreter's path")
def testSTDOUT(self):
- self.assertEquals(self.output.stdout.strip(), 'TeST')
+ self.assertEqual(self.output.stdout.strip(), self.text)
def testSTDERR(self):
- self.assertEquals(self.output.stderr, '')
+ self.assertEqual(self.output.stderr, '')
def testExitCode(self):
- self.assertEquals(self.output.code, 0)
+ self.assertEqual(self.output.code, 0)
@test.unit
@@ -82,7 +91,7 @@ class TestTable(test.Case):
def testInsert(self):
self.t.insert([333, 333.0, '3 3 3'])
- self.assertEquals(self.t[3, 2], '3 3 3')
+ self.assertEqual(self.t[3, 2], '3 3 3')
self.assertRaises(Exception, lambda:self.t.insert([1]))
@@ -90,20 +99,20 @@ class TestTable(test.Case):
self.assertRaises(csb.io.tsv.InvalidColumnError, lambda:self.t.where('ID').equals(11).update('Missing', 0))
self.t.where('ID').equals(11).update('ID', 133)
- self.assertEquals(self.t[0, 'ID'], 133)
- self.assertEquals(self.t[1, 'ID'], 12)
+ self.assertEqual(self.t[0, 'ID'], 133)
+ self.assertEqual(self.t[1, 'ID'], 12)
self.assertRaises(csb.io.tsv.InvalidColumnError, lambda:self.t.update('Missing', 0))
self.assertRaises(csb.io.tsv.InvalidColumnError, lambda:self.t[0, 'Missing'])
self.t.update('ID', 13)
for r in self.t[:, 'ID']:
- self.assertEquals(r['ID'], 13)
+ self.assertEqual(r['ID'], 13)
self.t[0, 'ID'] = 11
self.t[1, 'ID'] = 12
- self.assertEquals(self.t[0, 'ID'], 11)
- self.assertEquals(self.t[2, 'ID'], 13)
+ self.assertEqual(self.t[0, 'ID'], 11)
+ self.assertEqual(self.t[2, 'ID'], 13)
def testDump(self):
@@ -116,9 +125,9 @@ class TestTable(test.Case):
def testScalar(self):
- self.assertEquals(self.table.scalar(0, 'ID'), 11)
- self.assertEquals(self.table[0, 'ID'], 11)
- self.assertEquals(self.table[0, 0], 11)
+ self.assertEqual(self.table.scalar(0, 'ID'), 11)
+ self.assertEqual(self.table[0, 'ID'], 11)
+ self.assertEqual(self.table[0, 0], 11)
self.assertTrue(isinstance(self.table[:, :], csb.io.tsv.Table))
self.assertTrue(isinstance(self.table[:, ('A',)], csb.io.tsv.Table))
@@ -129,46 +138,46 @@ class TestTable(test.Case):
def testSelect(self):
# test column selection
- self.assertEquals(self.table.select().columns, ('ID', 'A', 'B'))
- self.assertEquals(self.table.select('*').columns, ('ID', 'A', 'B'))
- self.assertEquals(self.table[:, :].columns, ('ID', 'A', 'B'))
- self.assertEquals(self.table[:, :'A'].columns, ('ID',))
- self.assertEquals(self.table[:, 'ID':'B'].columns, ('ID', 'A'))
+ self.assertEqual(self.table.select().columns, ('ID', 'A', 'B'))
+ self.assertEqual(self.table.select('*').columns, ('ID', 'A', 'B'))
+ self.assertEqual(self.table[:, :].columns, ('ID', 'A', 'B'))
+ self.assertEqual(self.table[:, :'A'].columns, ('ID',))
+ self.assertEqual(self.table[:, 'ID':'B'].columns, ('ID', 'A'))
- self.assertEquals(self.table.select(['B', 'A']).columns, ('B', 'A'))
- self.assertEquals(self.table.select('A', 'B').columns, ('A', 'B'))
- self.assertEquals(self.table[:, ('B', 'A')].columns, ('B', 'A'))
+ self.assertEqual(self.table.select(['B', 'A']).columns, ('B', 'A'))
+ self.assertEqual(self.table.select('A', 'B').columns, ('A', 'B'))
+ self.assertEqual(self.table[:, ('B', 'A')].columns, ('B', 'A'))
- self.assertEquals(self.table.select(['A']).columns, ('A',))
- self.assertEquals(self.table.select(['A']).columns, ('A',))
+ self.assertEqual(self.table.select(['A']).columns, ('A',))
+ self.assertEqual(self.table.select(['A']).columns, ('A',))
def fr(t):
return list(t)[0]
# test data
- self.assertEquals(len(list(self.table[1:2, :])), 1)
- self.assertEquals(len(list(self.table[1:, :])), 2)
- self.assertEquals(len(list(self.table[(1, 2), :])), 2)
- self.assertEquals(len(list(self.table[(0, 1, 2), :])), 3)
- self.assertEquals(len(list(self.table[:, :])), 3)
+ self.assertEqual(len(list(self.table[1:2, :])), 1)
+ self.assertEqual(len(list(self.table[1:, :])), 2)
+ self.assertEqual(len(list(self.table[(1, 2), :])), 2)
+ self.assertEqual(len(list(self.table[(0, 1, 2), :])), 3)
+ self.assertEqual(len(list(self.table[:, :])), 3)
firstrow = fr(self.table.select('B', 'A'))
- self.assertEquals(firstrow[0], 'Row eleven')
- self.assertEquals(firstrow[1], 11.1)
+ self.assertEqual(firstrow[0], 'Row eleven')
+ self.assertEqual(firstrow[1], 11.1)
- self.assertEquals(fr(self.table[:, :])[0], 11)
+ self.assertEqual(fr(self.table[:, :])[0], 11)
- self.assertEquals(fr(self.table[:, 'A'])[0], 11.1)
- self.assertEquals(fr(self.table[:, 'B':])[0], 'Row eleven')
+ self.assertEqual(fr(self.table[:, 'A'])[0], 11.1)
+ self.assertEqual(fr(self.table[:, 'B':])[0], 'Row eleven')
- self.assertEquals(fr(self.table[2, :])[0], 13)
- self.assertEquals(fr(self.table[(1, 2), :])[0], 12)
- self.assertEquals(fr(self.table[1:9, :])[0], 12)
+ self.assertEqual(fr(self.table[2, :])[0], 13)
+ self.assertEqual(fr(self.table[(1, 2), :])[0], 12)
+ self.assertEqual(fr(self.table[1:9, :])[0], 12)
def testWhere(self):
self.assertEqual(self.table.where('ID').equals(11).select('A').scalar(), 11.1)
- self.assertEqual(self.table.where('ID').greater(12).select('A').scalar(), 13.3)
+ self.assertEqual(self.table.where('ID').greater(12).select('A').scalar(), None)
self.assertEqual(self.table.where('ID').between(11, 12).select('A').scalar(), 11.1)
self.assertEqual(self.table.where('ID').in_(11).select('A').scalar(), 11.1)
self.assertEqual(self.table.where('ID').in_(11, 12).select('A').scalar(), 11.1)
@@ -265,7 +274,7 @@ class TestTempFile(test.Case):
self.assertTrue(os.path.isdir(name))
del temp
- self.assertEquals(os.path.isdir(name), not dispose)
+ self.assertEqual(os.path.isdir(name), not dispose)
def testMultipleHandles(self):
@@ -309,7 +318,7 @@ class TestTempFolder(test.Case):
self.assertTrue(os.path.isdir(name))
del temp
- self.assertEquals(os.path.isdir(name), not dispose)
+ self.assertEqual(os.path.isdir(name), not dispose)
@test.unit
diff --git a/csb/test/cases/numeric/__init__.py b/csb/test/cases/numeric/__init__.py
index 8e687dd..145c51c 100644
--- a/csb/test/cases/numeric/__init__.py
+++ b/csb/test/cases/numeric/__init__.py
@@ -1,6 +1,27 @@
import csb.test as test
import numpy as np
+
+ at test.regression
+class MathRegressions(test.Case):
+
+ def testDihedralAngles(self):
+ """
+ r526
+ """
+ from csb.numeric import dihedral_angle
+
+ a = np.array([2, 0., 0.])
+ b = np.array([0, 0., 0.])
+ c = np.array([0, 2., 0.])
+ d = np.array([0, 4., -4.])
+
+ self.assertEqual(dihedral_angle(a, b, c, d), 90.0)
+ self.assertEqual(dihedral_angle(a, b, c, a), 0.0)
+ self.assertEqual(dihedral_angle(a, b, c, -d), -90.0)
+
+
+
@test.unit
class TestMath(test.Case):
@@ -217,48 +238,48 @@ class InvertibleMatrixTest(test.Case):
# Initialize with matrix
testmatrix = InvertibleMatrix(self.m_general)
# Check if it worked
- self.assertListEqual(testmatrix._matrix.flatten().tolist(),
+ self.assertListAlmostEqual(testmatrix._matrix.flatten().tolist(),
self.m_general.flatten().tolist())
# Because the testmatrix.inverse hasn't been accessed yet, testmatrix._inverse should be None
self.assertEqual(testmatrix._inverse_matrix, None)
# Now we access testmatrix.inverse, which onyl now actually calculates the inverse
- self.assertListEqual(testmatrix.inverse.flatten().tolist(),
+ self.assertListAlmostEqual(testmatrix.inverse.flatten().tolist(),
self.m_general_inv.flatten().tolist())
# Let's change testmatrix via testmatrix.__imul__
testmatrix *= 2.
# Check if that worked
- self.assertListEqual(testmatrix._matrix.flatten().tolist(),
+ self.assertListAlmostEqual(testmatrix._matrix.flatten().tolist(),
(2.0 * self.m_general.flatten()).tolist())
# This operation should not have changed the testmatrix._inverse_matrix field, as
# we didn't access testmatrix.inverse again
- self.assertListEqual(testmatrix._inverse_matrix.flatten().tolist(),
+ self.assertListAlmostEqual(testmatrix._inverse_matrix.flatten().tolist(),
self.m_general_inv.flatten().tolist())
# New we access testmatrix.inverse, which calculates the inverse and updates the field
- self.assertListEqual(testmatrix.inverse.flatten().tolist(),
+ self.assertListAlmostEqual(testmatrix.inverse.flatten().tolist(),
(self.m_general_inv / 2.0).flatten().tolist())
# The same again for testmatrix.__idiv__
testmatrix /= 2.
# Check if that worked
- self.assertListEqual(testmatrix._matrix.flatten().tolist(),
+ self.assertListAlmostEqual(testmatrix._matrix.flatten().tolist(),
(self.m_general.flatten()).tolist())
# This operation should not have changed the testmatrix._inverse_matrix field, as
# we didn't access testmatrix.inverse again
- self.assertListEqual(testmatrix._inverse_matrix.flatten().tolist(),
+ self.assertListAlmostEqual(testmatrix._inverse_matrix.flatten().tolist(),
(self.m_general_inv / 2.0).flatten().tolist())
# New we access testmatrix.inverse, which calculates the inverse and updates the field
- self.assertListEqual(testmatrix.inverse.flatten().tolist(),
+ self.assertListAlmostEqual(testmatrix.inverse.flatten().tolist(),
self.m_general_inv.flatten().tolist())
# Initialize with inverse matrix
testmatrix = InvertibleMatrix(inverse_matrix=self.m_general_inv)
# Let's see if that worked, e.g. if the testmatrix._inverse_matrix field has been
# set correctly
- self.assertListEqual(testmatrix._inverse_matrix.flatten().tolist(),
+ self.assertListAlmostEqual(testmatrix._inverse_matrix.flatten().tolist(),
self.m_general_inv.flatten().tolist())
# Check if the property returns what it's supposed to be
- self.assertListEqual(testmatrix.inverse.flatten().tolist(),
+ self.assertListAlmostEqual(testmatrix.inverse.flatten().tolist(),
self.m_general_inv.flatten().tolist())
# We didn't call testmatrix.__getitem__() yet, so testmatrix._matrix should be None
self.assertEqual(testmatrix._matrix, None)
@@ -267,7 +288,7 @@ class InvertibleMatrixTest(test.Case):
# Now we access testmatrix by its __getitem__ method, which calculates the
# testmatrix._matrix field from the testmatrix._inverse_matrix by inversion
for i in range(len(testmatrix)):
- self.assertListEqual(testmatrix[i].tolist(), invinv[i].tolist())
+ self.assertListAlmostEqual(testmatrix[i].tolist(), invinv[i].tolist())
testmatrix = InvertibleMatrix(inverse_matrix=self.m_general_inv)
# Let's change testmatrix via testmatrix.__imul__
@@ -275,13 +296,13 @@ class InvertibleMatrixTest(test.Case):
# That shouldn't have changed the testmatrix._matrix field (which currently
# should be None), but the testmatrix._inverse_matrix field by a factor of 1/2.0 = 0.5
self.assertEqual(testmatrix._matrix, None)
- self.assertListEqual(testmatrix._inverse_matrix.flatten().tolist(),
- (self.m_general_inv / 2.0).flatten().tolist())
+ self.assertListAlmostEqual(testmatrix._inverse_matrix.flatten().tolist(),
+ (self.m_general_inv / 2.0).flatten().tolist())
# Now we access testmatrix by __getitem__, which calculates the matrix
# from the inverse and updates the field testmatrix._matrix
invinv *= 2.0
for i in range(len(testmatrix)):
- self.assertListEqual(testmatrix[i].tolist(), invinv[i].tolist())
+ self.assertListAlmostEqual(testmatrix[i].tolist(), invinv[i].tolist())
# The same again for testmatrix.__idiv__
testmatrix = InvertibleMatrix(inverse_matrix=self.m_general_inv)
@@ -289,16 +310,20 @@ class InvertibleMatrixTest(test.Case):
# Check if testmatrix._matrix is None and if the testmatrix._inverse field
# has been multiplied by a factor of 2.0
self.assertEqual(testmatrix._matrix, None)
- self.assertListEqual(testmatrix.inverse.flatten().tolist(),
- (self.m_general_inv * 2.0).flatten().tolist())
+ self.assertListAlmostEqual(testmatrix.inverse.flatten().tolist(),
+ (self.m_general_inv * 2.0).flatten().tolist())
# All that is supposed to leave testmatrix._matrix with None:
self.assertEqual(testmatrix._matrix, None)
# Now we access testmatrix by __getitem__ again, which calculates the matrix from
# its inverse and updates the field
invinv /= 4.0
for i in range(len(testmatrix)):
- self.assertListEqual(testmatrix[i].tolist(), invinv[i].tolist())
+ self.assertListAlmostEqual(testmatrix[i].tolist(), invinv[i].tolist())
+
+ def assertListAlmostEqual(self, first, second, places=None, msg=None, delta=0.00000001):
+ for i, j in zip(first, second):
+ self.assertAlmostEqual(i, j, places=places, msg=msg, delta=delta)
if __name__ == '__main__':
test.Console()
diff --git a/csb/test/cases/statistics/samplers/__init__.py b/csb/test/cases/statistics/samplers/__init__.py
index 003bd2b..219648d 100644
--- a/csb/test/cases/statistics/samplers/__init__.py
+++ b/csb/test/cases/statistics/samplers/__init__.py
@@ -1,5 +1,6 @@
import numpy as np
import csb.test as test
+import csb
from csb.statistics.pdf import Normal, AbstractDensity
@@ -7,19 +8,35 @@ from csb.numeric.integrators import AbstractGradient, VelocityVerlet, LeapFrog,
from csb.numeric import InvertibleMatrix
from csb.statistics.samplers import State
-from csb.statistics.samplers.mc import MDRENSSwapParameterInfo, ThermostattedMDRENSSwapParameterInfo
-from csb.statistics.samplers.mc import RESwapParameterInfo
-from csb.statistics.samplers.mc import AlternatingAdjacentSwapScheme
-from csb.statistics.samplers.mc.singlechain import HMCSampler, RWMCSampler
-from csb.statistics.samplers.mc.multichain import ReplicaExchangeMC, MDRENS, ThermostattedMDRENS
-from csb.statistics.samplers.mc.propagators import RWMCPropagator, HMCPropagator
-
+from csb.statistics.samplers.mc import Trajectory
+from csb.statistics.samplers.mc.multichain import MDRENSSwapParameterInfo, MDRENS
+from csb.statistics.samplers.mc.multichain import ThermostattedMDRENSSwapParameterInfo
+from csb.statistics.samplers.mc.multichain import RESwapParameterInfo, AlternatingAdjacentSwapScheme
+from csb.statistics.samplers.mc.multichain import ReplicaExchangeMC, ThermostattedMDRENS
+from csb.statistics.samplers.mc.multichain import HMCStepRENS, HMCStepRENSSwapParameterInfo
+from csb.statistics.samplers.mc.multichain import AbstractSwapCommunicator, AbstractExchangeMC
+from csb.statistics.samplers.mc.multichain import AbstractSwapParameterInfo, ReplicaHistory
+from csb.statistics.samplers.mc.singlechain import HMCSampler, RWMCSampler, AbstractNCMCSampler
+from csb.statistics.samplers.mc.singlechain import AbstractSingleChainMC
+from csb.statistics.samplers.mc.propagators import RWMCPropagator, HMCPropagator, MDPropagator
+from csb.statistics.samplers.mc.propagators import AbstractNCMCPropagator, AbstractPropagator
+from csb.statistics.samplers.mc.neqsteppropagator import ReducedHamiltonian, HamiltonianSysInfo
+from csb.statistics.samplers.mc.neqsteppropagator import PlainMDPropagation, PlainMDPropagationParam
+from csb.statistics.samplers.mc.neqsteppropagator import AbstractMDPropagation, HMCPropagation
+from csb.statistics.samplers.mc.neqsteppropagator import Protocol, Step, AbstractPerturbation
+from csb.statistics.samplers.mc.neqsteppropagator import ReducedHamiltonianPerturbation, AbstractPropagation
+from csb.statistics.samplers.mc.neqsteppropagator import NonequilibriumStepPropagator
+from csb.statistics.samplers.mc.neqsteppropagator import NonequilibriumTrajectory
+from csb.statistics.samplers.mc.neqsteppropagator import HMCPropagationParam
class SamplePDF(Normal):
def log_prob(self, x):
return sum(map(super(SamplePDF, self).log_prob, x))
+ def grad(self, x, t):
+ return x / (self.sigma ** 2)
+
class MultimodalPDF(AbstractDensity):
def log_prob(self, x):
@@ -45,7 +62,6 @@ class Multimodal2DPDF(AbstractDensity):
return np.array([(-6.25 * np.sin(2.5 * x[0]) + 0.08 * x[0]) * self._E2(x),
self._E1(x) * self.k * x[1]])
-
@test.functional
class TestMCPropagators(test.Case):
@@ -95,8 +111,55 @@ class TestMCPropagators(test.Case):
gen = HMCPropagator(self.pdf, self.gradient, self.timestep * 1.5, self.nsteps, mass_matrix=mm)
self.checkResult(gen.generate(init_state, self.nits))
+
+ @test.skip("Takes quite a long time to run.")
+ def testNCMCPropagator(self):
+
+ Nhalf = 5
+ dt = 0.1
+ md_tl = 5
+ ks = np.linspace(1.0, 0.2, Nhalf).tolist()
+ sigmas = [1/np.sqrt(k) for k in ks]
+ sigmas += sigmas[::-1][1:]
+ N = len(sigmas)
+ pdfs = [SamplePDF(sigma=s) for s in sigmas]
+ hamiltonians = [ReducedHamiltonian(pdfs[i].log_prob, pdfs[i].grad) for i in range(N)]
+ sys_infos = [HamiltonianSysInfo(hamiltonians[i]) for i in range(N)]
+ steps = [Step(ReducedHamiltonianPerturbation(sys_infos[i], sys_infos[i+1],
+ evaluate_work=False),
+ PlainMDPropagation(sys_infos[i+1],
+ PlainMDPropagationParam(dt, md_tl, pdfs[i+1].grad),
+ evaluate_heat=False))
+ for i in range(N - 1)]
+ rv_steps = [Step(ReducedHamiltonianPerturbation(sys_infos[i], sys_infos[i+1],
+ evaluate_work=False),
+ PlainMDPropagation(sys_infos[i],
+ PlainMDPropagationParam(dt, md_tl, pdfs[i].grad),
+ evaluate_heat=False))
+ for i in range(N - 1)]
+
+ for s in rv_steps:
+ s.set_propagation_first()
+ protocol = Protocol(steps)
+ rv_protocol = Protocol(rv_steps)
+
+ class MDProbStepNCMCSampler(AbstractNCMCSampler):
+ def _calc_pacc(self, proposal_communicator):
+ return np.exp(-proposal_communicator.traj.deltaH)
+
+ class MDPropStepNCMCPropagator(AbstractNCMCPropagator):
+ def _init_sampler(self, init_state):
+ self._sampler = MDProbStepNCMCSampler(init_state, self.protocol,
+ self.reverse_protocol)
+
+ gen = MDPropStepNCMCPropagator(protocol, rv_protocol)
+
+ init_state = State(np.array([1.0]))
+ traj = gen.generate(init_state, self.nits, return_trajectory=True)
+ self.checkResult(traj)
+
@test.functional
class TestMultichain(test.Case):
@@ -167,13 +230,14 @@ class TestMultichain(test.Case):
self.assertAlmostEqual(p_occ_sampled1, p_occ, delta=4.0 * 0.035)
self.assertAlmostEqual(p_occ_sampled2, p_occ, delta=4.0 * 0.035)
-
+
+ @test.skip("Takes some time, rendered optional by a unit test.")
def testReplicaExchangeMC(self):
self.set1pParams()
params = [RESwapParameterInfo(self.samplers[0], self.samplers[1])]
algorithm = ReplicaExchangeMC(self.samplers, params)
self._run(algorithm)
-
+
def testMDRENS(self):
self.set1pParams()
@@ -181,7 +245,7 @@ class TestMultichain(test.Case):
0.025, 15, self.grad)]
algorithm = MDRENS(self.samplers, params, integrator=VelocityVerlet)
self._run(algorithm)
-
+
def testThermostattedMDRens(self):
self.set1pParams()
@@ -190,7 +254,7 @@ class TestMultichain(test.Case):
temperature=self.Ts[0])]
algorithm = ThermostattedMDRENS(self.samplers, params)
self._run(algorithm)
-
+
def testThermostattedMDRensMM(self):
self.set2pParams()
@@ -208,7 +272,645 @@ class TestMultichain(test.Case):
algorithm = ThermostattedMDRENS(self.samplers, params)
self._run(algorithm)
-
+
+ def testHMCStepRENS(self):
+
+ self.set1pParams()
+ params = [HMCStepRENSSwapParameterInfo(self.samplers[0], self.samplers[1], 0.05, 3, 1,
+ self.grad, 5)]
+
+ algorithm = HMCStepRENS(self.samplers, params)
+
+ self._run(algorithm)
+
+class MockSwapCommunicator(AbstractSwapCommunicator):
+
+ pass
+
+class MockSwapParameterInfo(AbstractSwapParameterInfo):
+
+ pass
+
+class MockSampler(AbstractSingleChainMC):
+
+ def __init__(self, pdf, state, temperature=1.0):
+
+ self._state = state
+ self._pdf = pdf
+ self._temperature = temperature
+
+ def _propose(self):
+
+ pass
+
+ def _calc_pacc(self):
+
+ pass
+
+class MockedAbstractExchangeMC(AbstractExchangeMC):
+
+ def _propose_swap(self, param_info):
+
+ return MockSwapCommunicator(param_info, Trajectory([State(np.array([1.0])),
+ State(np.array([2.0]))]),
+ Trajectory([State(np.array([2.0])),
+ State(np.array([1.0]))]))
+
+ def _calc_pacc_swap(self, swapcom):
+
+ swapcom.acceptance_probability = 0.75
+
+ return swapcom
+
+ at test.unit
+class TestAbstractExchangeMC(test.Case):
+
+ def setUp(self):
+
+ self.samplers = [MockSampler(None, State(np.array([3.0]))),
+ MockSampler(None, State(np.array([5.0])))]
+
+ self.param_info = MockSwapParameterInfo(self.samplers[0], self.samplers[1])
+
+ self.algo = MockedAbstractExchangeMC(self.samplers, [self.param_info])
+
+
+ def testAcceptSwap(self):
+
+ swapcom = MockSwapCommunicator(self.param_info,
+ Trajectory([State(np.array([1.0])),
+ State(np.array([2.0]))]),
+ Trajectory([State(np.array([2.0])),
+ State(np.array([1.0]))]))
+
+ np.random.seed(5)
+
+ swapcom.acceptance_probability = 0.75
+ res = self.algo._accept_swap(swapcom)
+ assert(res)
+
+ swapcom.acceptance_probability = 0.15
+ res = self.algo._accept_swap(swapcom)
+ assert(not res)
+
+ def testSwap(self):
+
+ np.random.seed(5)
+
+ res = self.algo.swap(0)
+
+ assert(res)
+ self.assertEqual(self.samplers[0].state.position[0], 1.0)
+ self.assertEqual(self.samplers[1].state.position[0], 2.0)
+ self.assertEqual(self.algo.statistics.stats[0].total_swaps, 1)
+ self.assertEqual(self.algo.statistics.stats[0].accepted_swaps, 1)
+
+ np.random.seed(4)
+
+ res = self.algo.swap(0)
+
+ assert(not res)
+ self.assertEqual(self.samplers[0].state.position[0], 1.0)
+ self.assertEqual(self.samplers[1].state.position[0], 2.0)
+ self.assertEqual(self.algo.statistics.stats[0].total_swaps, 2)
+ self.assertEqual(self.algo.statistics.stats[0].accepted_swaps, 1)
+
+ at test.unit
+class TestReplicaExchangeMC(test.Case):
+
+ def setUp(self):
+
+ pdf1 = HO()
+ pdf2 = HO(k1=2.0, k2=2.0)
+
+ self.samplers = [MockSampler(pdf1, State(np.array([3.0]))),
+ MockSampler(pdf2, State(np.array([5.0])))]
+
+ self.param_info = RESwapParameterInfo(self.samplers[0], self.samplers[1])
+
+ self.algo = ReplicaExchangeMC(self.samplers, [self.param_info])
+
+ def testProposeSwap(self):
+
+ res = self.algo._propose_swap(self.param_info)
+ self.assertEqual(res.traj12.initial.position[0], 3.0)
+ self.assertEqual(res.traj12.final.position[0], 3.0)
+ self.assertEqual(res.traj21.initial.position[0], 5.0)
+ self.assertEqual(res.traj21.final.position[0], 5.0)
+
+ def testCalcPaccSwap(self):
+
+ swapcom = self.algo._propose_swap(self.param_info)
+ res = self.algo._calc_pacc_swap(swapcom)
+
+ self.assertEqual(res.acceptance_probability, csb.numeric.exp(-12.5 + 4.5 - 9.0 + 25.0))
+
+class HO(object):
+
+ def __init__(self, k1=1.0, k2=1.0, x1=0.0, x2=0.0, tau=1.0):
+ self.k1 = k1
+ self.k2 = k2
+ self.x1 = x1
+ self.x2 = x2
+ self.tau = tau
+ self.kt = lambda t: self.k2 * t / self.tau + (1 - t / self.tau) * self.k1
+ self.xt = lambda t: self.x2 * t / self.tau + (1 - t / self.tau) * self.x1
+
+ def log_prob(self, x, t=0.0):
+ return -0.5 * self.kt(t) * sum((x - self.xt(t)) ** 2)
+
+ def gradient(self, x, t):
+ return self.kt(t) * (x - self.xt(t))
+
+class MockPropagator(AbstractPropagator):
+
+ def __init__(self):
+ pass
+
+ def generate(self, init_state, length, return_trajectory=False):
+
+ final_state = State(init_state.position * 2, init_state.momentum * 2)
+
+ return Trajectory([init_state, final_state])
+
+class MDPropagationMocked(AbstractMDPropagation):
+
+ def _propagator_factory(self):
+
+ return MockMDPropagator()
+
+class PlainMDPropagationMocked(PlainMDPropagation):
+
+ def _propagator_factory(self):
+
+ return MockPropagator()
+
+class HMCPropagationMocked(HMCPropagation):
+
+ def _propagator_factory(self):
+
+ return MockPropagator()
+
+class MockPerturbation(AbstractPerturbation):
+
+ @property
+ def sys_before(self):
+
+ pdf = HO()
+
+ return HamiltonianSysInfo(ReducedHamiltonian(pdf.log_prob, pdf.gradient))
+
+ @property
+ def sys_after(self):
+
+ pdf = HO()
+
+ return HamiltonianSysInfo(ReducedHamiltonian(pdf.log_prob, pdf.gradient))
+
+ def __init__(self):
+ pass
+
+ def _run_perturbator(self, state):
+
+ final = State(state.position * 2, state.momentum * 2)
+
+ return Trajectory([state, final])
+
+ def _calculate_work(self, traj):
+
+ return 42.0
+
+ def _calculate_jacobian(self, traj):
+
+ return 1.1
+
+
+class MockPropagation(AbstractPropagation):
+
+ def __init__(self):
+ pass
+
+ @property
+ def sys(self):
+
+ pdf = HO()
+
+ return HamiltonianSysInfo(pdf.log_prob, pdf.gradient)
+
+ def _run_propagator(self, state):
+
+ final = State(state.position * 2, state.momentum * 2)
+
+ return Trajectory([state, final])
+
+ def _calculate_heat(self, traj):
+
+ return -42.0
+
+ def _propagator_factory(self):
+
+ return None
+
+
+class MockStep(Step):
+
+ def __init__(self, return_momentum=True):
+
+ self._return_momentum = return_momentum
+ self._perform = None
+ self.perform = self._perform_pert_prop
+
+ @property
+ def perturbation(self):
+
+ return MockPerturbation()
+
+ def _perform_pert_prop(self, state, extra_info=None):
+
+ if self._return_momentum == True:
+ final = State(state.position * 2, state.momentum * 2)
+ else:
+ final = State(state.position * 2)
+
+ res = NonequilibriumTrajectory([state, final], heat=-42.0, work=42.0, jacobian=1.1)
+
+ return res, None, None
+
+ def _perform_prop_pert(self, state, extra_info=None):
+
+ if self._return_momentum == True:
+ final = State(state.position * 2, state.momentum * 2)
+ else:
+ final = State(state.position * 2)
+
+ res = NonequilibriumTrajectory([state, final], heat=42.0, work=-42.0, jacobian=1.1)
+
+ return res, None, None
+
+
+class MockProtocol(Protocol):
+
+ def __init__(self, momentum=True):
+
+ self._momentum = momentum
+ self.steps = [MockStep(self._momentum), MockStep(self._momentum)]
+
+
+ at test.unit
+class TestNeqsteppropagator(test.Case):
+
+ def testReducedHamiltonian(self):
+ pdf = HO(k1=2.0, k2=2.0)
+ init = State(np.array([2.0]), np.array([-2.0]))
+ ham = ReducedHamiltonian(lambda x: pdf.log_prob(x, 0.0), pdf.gradient, temperature=4.0)
+
+ self.assertEqual(4.0, ham.E(init.position))
+ self.assertEqual(2.0, ham.kinetic_energy(init.momentum))
+ self.assertEqual(0.0, ham.kinetic_energy(None))
+ self.assertEqual(-1.0, ham.rlog_prob(init.position))
+ self.assertEqual(0.5, ham.rkinetic_energy(init.momentum))
+ self.assertEqual(1.5, ham(init))
+
+ def testHMCPropagation(self):
+
+ pdf = HO()
+ sys = HamiltonianSysInfo(ReducedHamiltonian(pdf.log_prob, pdf.gradient))
+ param = HMCPropagationParam(None, None, None)
+ hmcprop = HMCPropagationMocked(sys, param)
+
+ init = State(np.array([2.0]), np.array([2.0]))
+
+ ## Test _set_mass_matrix
+ d = len(init.position)
+ param = HMCPropagationParam(None, None, None, mass_matrix=InvertibleMatrix(np.eye(d)))
+ hmcprop = HMCPropagationMocked(sys, param)
+ hmcprop._set_mass_matrix(init)
+ self.assertEqual(hmcprop.param.mass_matrix,
+ InvertibleMatrix(np.eye(len(init.position))))
+
+
+ param = HMCPropagationParam(None, None, None)
+ hmcprop = HMCPropagationMocked(sys, param)
+ hmcprop._set_mass_matrix(init)
+ self.assertEqual(hmcprop.param.mass_matrix,
+ InvertibleMatrix(np.eye(len(init.position))))
+
+ ## Test _calculate_heat
+ final = State(init.position * 2, init.momentum * 2)
+ traj = Trajectory([init, final])
+ self.assertEqual(hmcprop._calculate_heat(traj), 6.0)
+
+ ## Test __call__
+ result = hmcprop(init)
+ self.assertEqual(init.position, result.initial.position)
+ self.assertEqual(init.momentum, result.initial.momentum)
+ self.assertEqual(result.final.position, init.position * 2)
+ self.assertEqual(result.final.momentum, init.momentum * 2)
+ self.assertEqual(result.heat, 6.0)
+
+ def testPlainMDPropagation(self):
+
+ pdf = HO()
+ sys = HamiltonianSysInfo(ReducedHamiltonian(pdf.log_prob, pdf.gradient))
+
+ init = State(np.array([2.0]), np.array([2.0]))
+
+ ## Test _set_mass_matrix
+ d = len(init.position)
+ param = PlainMDPropagationParam(None, None, None,
+ mass_matrix=InvertibleMatrix(np.eye(d)))
+ mdprop = PlainMDPropagationMocked(sys, param)
+ mdprop._set_mass_matrix(init)
+ self.assertEqual(mdprop.param.mass_matrix,
+ InvertibleMatrix(np.eye(d)))
+
+ param = PlainMDPropagationParam(None, None, None)
+ mdprop = PlainMDPropagationMocked(sys, param)
+ mdprop._set_mass_matrix(init)
+ self.assertEqual(mdprop.param.mass_matrix,
+ InvertibleMatrix(np.eye(d)))
+
+ ## Test _calculate_heat
+ final = State(init.position * 2, init.momentum * 2)
+ traj = Trajectory([init, final])
+ self.assertEqual(mdprop._calculate_heat(traj), 12.0)
+
+ ## Test __call__
+ result = mdprop(init)
+ self.assertEqual(init.position, result.initial.position)
+ self.assertEqual(init.momentum, result.initial.momentum)
+ self.assertEqual(result.final.position, init.position * 2)
+ self.assertEqual(result.final.momentum, init.momentum * 2)
+ self.assertEqual(result.heat, 12.0)
+
+
+ def testReducedHamiltonianPerturbation(self):
+
+ pdf = HO(k1=1.0, k2=2.0)
+ redham1 = ReducedHamiltonian(lambda x: pdf.log_prob(x, 0.0))
+ redham2 = ReducedHamiltonian(lambda x: pdf.log_prob(x, 1.0))
+ sys1 = HamiltonianSysInfo(redham1)
+ sys2 = HamiltonianSysInfo(redham2)
+ init = State(np.array([2.0]), np.array([2.0]))
+ traj = Trajectory([init, init])
+
+ hampert = ReducedHamiltonianPerturbation(sys1, sys2)
+
+ ## Test _calculate_work
+ self.assertEqual(hampert._calculate_work(traj), 2.0)
+
+ ## Test __call__
+ result = hampert(init)
+ self.assertEqual(result.initial.position[0], init.position[0])
+ self.assertEqual(result.initial.momentum[0], init.momentum[0])
+ self.assertEqual(result.initial.position[0], result.final.position[0])
+ self.assertEqual(result.initial.momentum[0], result.final.momentum[0])
+ self.assertEqual(result.work, 2.0)
+ self.assertEqual(result.jacobian, 1.0)
+
+
+ def testStep(self):
+
+ step = Step(MockPerturbation(), MockPropagation())
+ init = State(np.array([2.0]), np.array([2.0]))
+
+ ## Test step with first perturbation, then propagation
+ res = step.perform(init)[0]
+
+ self.assertEqual(res.final.position, init.position * 4)
+ self.assertEqual(res.final.momentum, init.momentum * 4)
+ self.assertEqual(res.heat, -42.0)
+ self.assertEqual(res.work, 42.0)
+ self.assertEqual(res.jacobian, 1.1)
+
+ ## Test step with first perturbation, then propagation
+ step.set_propagation_first()
+ res = step.perform(init)[0]
+
+ self.assertEqual(step.perform, step._perform_prop_pert)
+ self.assertEqual(res.final.position, init.position * 4)
+ self.assertEqual(res.final.momentum, init.momentum * 4)
+ self.assertEqual(res.heat, -42.0)
+ self.assertEqual(res.work, 42.0)
+ self.assertEqual(res.jacobian, 1.1)
+
+ def testNonequilibriumStepPropagator(self):
+
+ protocol = Protocol([MockStep(True) for i in range(10)])
+
+ gen = NonequilibriumStepPropagator(protocol)
+
+ ## Test generate()
+ init = State(np.array([2.0]), np.array([2.0]))
+
+ res = gen.generate(init)
+
+ self.assertEqual(res.final.position, init.position * (2 ** 10))
+ self.assertEqual(res.final.momentum, init.momentum * (2 ** 10))
+ self.assertEqual(res.work, 10 * 42)
+ self.assertEqual(res.heat, -10 * 42)
+ self.assertEqual(res.jacobian, 1.1 ** 10)
+
+
+class MockedNCMCSampler(AbstractNCMCSampler):
+
+ def _calc_pacc(self, proposal_communicator):
+
+ return proposal_communicator.traj.final.position[0]
+
+
+ at test.unit
+class TestNCMCSampler(test.Case):
+
+ def testProposeWithMomentum(self):
+
+ self.protocol = MockProtocol(True)
+ self.reverse_protocol = MockProtocol(True)
+ for s in self.reverse_protocol.steps:
+ s.set_propagation_first()
+
+ ## Test with momentum
+ init = State(np.array([2.0]), np.array([2.0]))
+
+ sampler = MockedNCMCSampler(init, self.protocol, self.reverse_protocol)
+
+ ## Test _propose
+ # Make sure the first random number is < 0.5
+ np.random.seed(5)
+ result = sampler._propose()
+
+ self.assertEqual(result.traj.heat, - 2 * 42)
+ self.assertEqual(result.traj.work, 2 * 42)
+ self.assertEqual(result.traj.initial.position[0], init.position[0])
+ self.assertEqual(result.traj.final.position[0], init.position[0] * 4)
+ self.assertEqual(result.traj.initial.momentum[0], init.momentum[0])
+ self.assertEqual(result.traj.final.momentum[0], init.momentum[0] * 4)
+
+ # Make sure the first random number is > 0.5
+ np.random.seed(4)
+ result = sampler._propose()
+
+ self.assertEqual(result.traj.heat, 2 * 42)
+ self.assertEqual(result.traj.work, - 2 * 42)
+ self.assertEqual(result.traj.initial.position[0], init.position[0])
+ self.assertEqual(result.traj.final.position[0], init.position[0] * 4)
+ self.assertEqual(result.traj.initial.momentum[0], init.momentum[0])
+ self.assertEqual(result.traj.final.momentum[0], init.momentum[0] * 4)
+
+
+ def testProposeWithoutMomentum(self):
+
+ self.protocol = MockProtocol(False)
+ self.reverse_protocol = MockProtocol(False)
+ for s in self.reverse_protocol.steps:
+ s.set_propagation_first()
+
+ ## Test without momentum
+ init = State(np.array([2.0]))
+
+ sampler = MockedNCMCSampler(init, self.protocol, self.reverse_protocol)
+
+ ## Test _propose
+ # Make sure the first random number is < 0.5
+ np.random.seed(5)
+ result = sampler._propose()
+
+ self.assertEqual(result.traj.heat, - 2 * 42)
+ self.assertEqual(result.traj.work, 2 * 42)
+ self.assertEqual(result.traj.initial.position[0], init.position[0])
+ self.assertEqual(result.traj.final.position[0], init.position[0] * 4)
+ self.assertEqual(result.traj.initial.momentum, None)
+ self.assertEqual(result.traj.final.momentum, None)
+
+ # Make sure the first random number is > 0.5
+ np.random.seed(4)
+ result = sampler._propose()
+
+ self.assertEqual(result.traj.heat, 2 * 42)
+ self.assertEqual(result.traj.work, - 2 * 42)
+ self.assertEqual(result.traj.initial.position[0], init.position[0])
+ self.assertEqual(result.traj.final.position[0], init.position[0] * 4)
+ self.assertEqual(result.traj.initial.momentum, None)
+ self.assertEqual(result.traj.final.momentum, None)
+
+
+class HState(State):
+
+ def clone(self):
+ s = super(HState, self).clone()
+ s.history = self.history
+
+ return s
+
+
+ at test.functional
+class TestReplicaHistory(test.Case):
+
+ def setUp(self):
+
+ pass
+
+ def _runSimulation(self, n_replicas, swap_interval, first_swap):
+
+ temperatures = np.linspace(1.0, 5.0, n_replicas)
+
+ init_states = [HState(np.array([1.0])) for T in temperatures]
+
+ for i, x in enumerate(init_states):
+ x.history = []
+
+ samplers = [RWMCSampler(SamplePDF(), init_states[i], stepsize=1.0, temperature=T)
+ for i, T in enumerate(temperatures)]
+
+ params = [RESwapParameterInfo(samplers[i], samplers[i+1]) for i in range(len(samplers) - 1)]
+
+ algo = ReplicaExchangeMC(samplers, params)
+ swapper = AlternatingAdjacentSwapScheme(algo)
+ samples = []
+
+ for i in range(500):
+ if (i - first_swap) % swap_interval == 0 and i > 0 and i >= first_swap:
+ swapper.swap_all()
+ else:
+ algo.sample()
+
+ for j, s in enumerate(algo.state):
+ s.history.append(j)
+
+ samples.append(algo.state)
+
+ return samples
+
+
+ def _assertIdenticalHistories(self, samples, interval, first_swap=None):
+
+ rh = ReplicaHistory(samples, interval, first_swap)
+
+ for i in range(len(samples[0])):
+ h = rh.calculate_history(i)
+ for j, x in enumerate(samples[-1]):
+ if x.history == h:
+ return True
+
+ return False
+
+ def _assertIdenticalProjTrajs(self, samples, interval, first_swap=None):
+
+ rh = ReplicaHistory(samples, interval, first_swap)
+
+ ## Calculate projected trajectories directly from test data history
+ trajs1 = [Trajectory([x for x in [y[j] for y in samples] if x.history[0] == j])
+ for j in range(len(samples[0]))]
+
+ ok = []
+ for i in range(len(samples[0])):
+ trajs2 = rh.calculate_projected_trajectories(i)
+ ok.append(True in [np.all(np.array(t1) == np.array(t2)) for t1 in trajs1
+ for t2 in trajs2])
+
+ return np.all(ok)
+
+ def testTwoReplicas(self):
+
+ swap_interval = 5
+ first_swap = 5
+
+ samples = self._runSimulation(2, swap_interval, first_swap)
+
+ assert(self._assertIdenticalHistories(samples, swap_interval))
+ assert(self._assertIdenticalProjTrajs(samples, swap_interval))
+
+ def testFourReplicas(self):
+
+ swap_interval = 5
+ first_swap = 5
+
+ samples = self._runSimulation(4, swap_interval, first_swap)
+
+ assert(self._assertIdenticalHistories(samples, swap_interval))
+ assert(self._assertIdenticalProjTrajs(samples, swap_interval))
+
+ def testFiveReplicas(self):
+
+ swap_interval = 5
+ first_swap = 5
+
+ samples = self._runSimulation(5, swap_interval, first_swap)
+
+ assert(self._assertIdenticalHistories(samples, swap_interval))
+ assert(self._assertIdenticalProjTrajs(samples, swap_interval))
+
+ def testFiveReplicasOffset(self):
+
+ swap_interval = 6
+ first_swap = 7
+
+ samples = self._runSimulation(5, swap_interval, first_swap)
+
+ assert(self._assertIdenticalHistories(samples, swap_interval, first_swap))
+ assert(self._assertIdenticalProjTrajs(samples, swap_interval, first_swap))
+
+
if __name__ == '__main__':
test.Console()
diff --git a/csb/test/data/1d3z.regular.pdb b/csb/test/data/1d3z.regular.pdb
index 084e89d..2307f7a 100644
--- a/csb/test/data/1d3z.regular.pdb
+++ b/csb/test/data/1d3z.regular.pdb
@@ -1403,25 +1403,25 @@ ATOM 1231 HA3 GLY A 76 45.541 -70.558 -22.406 1.00 0.00 H
TER 1232 GLY A 76
ENDMDL
MODEL 2
-ATOM 1 N MET A 1 54.015 -88.009 9.498 1.00 9.67 N
-HETATM 2 CA MET A 1 52.647 -87.443 9.674 1.00 10.38 C
-ATOM 3 C MET A 1 51.877 -87.557 8.364 1.00 9.62 C
-ATOM 4 O MET A 1 52.467 -87.554 7.283 1.00 9.62 O
-ATOM 5 CB MET A 1 52.755 -85.973 10.093 1.00 13.77 C
-ATOM 6 CG MET A 1 53.573 -85.198 9.060 1.00 16.29 C
-ATOM 7 SD MET A 1 53.941 -83.545 9.709 1.00 17.17 S
-ATOM 8 CE MET A 1 53.585 -82.596 8.207 1.00 16.11 C
-ATOM 9 H1 MET A 1 54.679 -87.516 10.127 1.00 0.00 H
-ATOM 10 H2 MET A 1 54.316 -87.887 8.509 1.00 0.00 H
-ATOM 11 H3 MET A 1 54.004 -89.021 9.735 1.00 0.00 H
-ATOM 12 HA MET A 1 52.131 -87.997 10.444 1.00 0.00 H
-ATOM 13 HB2 MET A 1 51.765 -85.547 10.160 1.00 0.00 H
-ATOM 14 HB3 MET A 1 53.240 -85.907 11.053 1.00 0.00 H
-ATOM 15 HG2 MET A 1 54.497 -85.722 8.868 1.00 0.00 H
-ATOM 16 HG3 MET A 1 53.010 -85.113 8.143 1.00 0.00 H
-ATOM 17 HE1 MET A 1 53.656 -81.540 8.426 1.00 0.00 H
-ATOM 18 HE2 MET A 1 52.590 -82.823 7.863 1.00 0.00 H
-ATOM 19 HE3 MET A 1 54.299 -82.859 7.439 1.00 0.00 H
+ATOM 1 N MSE A 1 54.015 -88.009 9.498 1.00 9.67 N
+HETATM 2 CA MSE A 1 52.647 -87.443 9.674 1.00 10.38 C
+ATOM 3 C MSE A 1 51.877 -87.557 8.364 1.00 9.62 C
+ATOM 4 O MSE A 1 52.467 -87.554 7.283 1.00 9.62 O
+ATOM 5 CB MSE A 1 52.755 -85.973 10.093 1.00 13.77 C
+ATOM 6 CG MSE A 1 53.573 -85.198 9.060 1.00 16.29 C
+ATOM 7 SD MSE A 1 53.941 -83.545 9.709 1.00 17.17 SE
+ATOM 8 CE MSE A 1 53.585 -82.596 8.207 1.00 16.11 C
+ATOM 9 H1 MSE A 1 54.679 -87.516 10.127 1.00 0.00 H
+ATOM 10 H2 MSE A 1 54.316 -87.887 8.509 1.00 0.00 H
+ATOM 11 H3 MSE A 1 54.004 -89.021 9.735 1.00 0.00 H
+ATOM 12 HA MSE A 1 52.131 -87.997 10.444 1.00 0.00 H
+ATOM 13 HB2 MSE A 1 51.765 -85.547 10.160 1.00 0.00 H
+ATOM 14 HB3 MSE A 1 53.240 -85.907 11.053 1.00 0.00 H
+ATOM 15 HG2 MSE A 1 54.497 -85.722 8.868 1.00 0.00 H
+ATOM 16 HG3 MSE A 1 53.010 -85.113 8.143 1.00 0.00 H
+ATOM 17 HE1 MSE A 1 53.656 -81.540 8.426 1.00 0.00 H
+ATOM 18 HE2 MSE A 1 52.590 -82.823 7.863 1.00 0.00 H
+ATOM 19 HE3 MSE A 1 54.299 -82.859 7.439 1.00 0.00 H
ATOM 20 N GLN A 2 50.554 -87.663 8.468 1.00 9.27 N
ATOM 21 CA GLN A 2 49.701 -87.785 7.286 1.00 9.07 C
ATOM 22 C GLN A 2 49.120 -86.428 6.916 1.00 8.72 C
@@ -2637,25 +2637,25 @@ ATOM 1231 HA3 GLY A 76 44.645 -76.671 -23.946 1.00 0.00 H
TER 1232 GLY A 76
ENDMDL
MODEL 3
-ATOM 1 N MET A 1 52.857 -89.463 8.836 1.00 9.67 N
-ATOM 2 CA MET A 1 51.617 -88.687 9.117 1.00 10.38 C
-ATOM 3 C MET A 1 50.812 -88.526 7.834 1.00 9.62 C
-ATOM 4 O MET A 1 51.362 -88.563 6.733 1.00 9.62 O
-ATOM 5 CB MET A 1 51.991 -87.313 9.677 1.00 13.77 C
-ATOM 6 CG MET A 1 52.898 -86.575 8.693 1.00 16.29 C
-ATOM 7 SD MET A 1 53.560 -85.095 9.501 1.00 17.17 S
-ATOM 8 CE MET A 1 53.593 -84.022 8.045 1.00 16.11 C
-ATOM 9 H1 MET A 1 52.791 -90.398 9.287 1.00 0.00 H
-ATOM 10 H2 MET A 1 53.679 -88.951 9.217 1.00 0.00 H
-ATOM 11 H3 MET A 1 52.967 -89.583 7.810 1.00 0.00 H
-ATOM 12 HA MET A 1 51.021 -89.219 9.844 1.00 0.00 H
-ATOM 13 HB2 MET A 1 51.094 -86.735 9.838 1.00 0.00 H
-ATOM 14 HB3 MET A 1 52.510 -87.437 10.613 1.00 0.00 H
-ATOM 15 HG2 MET A 1 53.713 -87.220 8.401 1.00 0.00 H
-ATOM 16 HG3 MET A 1 52.333 -86.288 7.818 1.00 0.00 H
-ATOM 17 HE1 MET A 1 54.176 -83.138 8.264 1.00 0.00 H
-ATOM 18 HE2 MET A 1 52.587 -83.731 7.789 1.00 0.00 H
-ATOM 19 HE3 MET A 1 54.034 -84.556 7.215 1.00 0.00 H
+ATOM 1 N MSE A 1 52.857 -89.463 8.836 1.00 9.67 N
+ATOM 2 CA MSE A 1 51.617 -88.687 9.117 1.00 10.38 C
+ATOM 3 C MSE A 1 50.812 -88.526 7.834 1.00 9.62 C
+ATOM 4 O MSE A 1 51.362 -88.563 6.733 1.00 9.62 O
+ATOM 5 CB MSE A 1 51.991 -87.313 9.677 1.00 13.77 C
+ATOM 6 CG MSE A 1 52.898 -86.575 8.693 1.00 16.29 C
+ATOM 7 SD MSE A 1 53.560 -85.095 9.501 1.00 17.17 S
+ATOM 8 CE MSE A 1 53.593 -84.022 8.045 1.00 16.11 C
+ATOM 9 H1 MSE A 1 52.791 -90.398 9.287 1.00 0.00 H
+ATOM 10 H2 MSE A 1 53.679 -88.951 9.217 1.00 0.00 H
+ATOM 11 H3 MSE A 1 52.967 -89.583 7.810 1.00 0.00 H
+ATOM 12 HA MSE A 1 51.021 -89.219 9.844 1.00 0.00 H
+ATOM 13 HB2 MSE A 1 51.094 -86.735 9.838 1.00 0.00 H
+ATOM 14 HB3 MSE A 1 52.510 -87.437 10.613 1.00 0.00 H
+ATOM 15 HG2 MSE A 1 53.713 -87.220 8.401 1.00 0.00 H
+ATOM 16 HG3 MSE A 1 52.333 -86.288 7.818 1.00 0.00 H
+ATOM 17 HE1 MSE A 1 54.176 -83.138 8.264 1.00 0.00 H
+ATOM 18 HE2 MSE A 1 52.587 -83.731 7.789 1.00 0.00 H
+ATOM 19 HE3 MSE A 1 54.034 -84.556 7.215 1.00 0.00 H
ATOM 20 N GLN A 2 49.502 -88.353 7.985 1.00 9.27 N
ATOM 21 CA GLN A 2 48.610 -88.190 6.839 1.00 9.07 C
ATOM 22 C GLN A 2 48.346 -86.712 6.590 1.00 8.72 C
@@ -3871,25 +3871,25 @@ ATOM 1231 HA3 GLY A 76 48.862 -74.289 -28.625 1.00 0.00 H
TER 1232 GLY A 76
ENDMDL
MODEL 4
-ATOM 1 N MET A 1 52.843 -89.493 8.945 1.00 9.67 N
-ATOM 2 CA MET A 1 51.702 -88.594 9.278 1.00 10.38 C
-ATOM 3 C MET A 1 50.840 -88.389 8.038 1.00 9.62 C
-ATOM 4 O MET A 1 51.323 -88.488 6.911 1.00 9.62 O
-ATOM 5 CB MET A 1 52.242 -87.249 9.774 1.00 13.77 C
-ATOM 6 CG MET A 1 53.169 -86.641 8.722 1.00 16.29 C
-ATOM 7 SD MET A 1 54.026 -85.211 9.437 1.00 17.17 S
-ATOM 8 CE MET A 1 53.884 -84.095 8.019 1.00 16.11 C
-ATOM 9 H1 MET A 1 52.860 -89.666 7.920 1.00 0.00 H
-ATOM 10 H2 MET A 1 52.730 -90.397 9.449 1.00 0.00 H
-ATOM 11 H3 MET A 1 53.735 -89.044 9.233 1.00 0.00 H
-ATOM 12 HA MET A 1 51.106 -89.049 10.055 1.00 0.00 H
-ATOM 13 HB2 MET A 1 51.415 -86.577 9.954 1.00 0.00 H
-ATOM 14 HB3 MET A 1 52.789 -87.397 10.689 1.00 0.00 H
-ATOM 15 HG2 MET A 1 53.894 -87.379 8.412 1.00 0.00 H
-ATOM 16 HG3 MET A 1 52.587 -86.326 7.868 1.00 0.00 H
-ATOM 17 HE1 MET A 1 54.122 -84.634 7.112 1.00 0.00 H
-ATOM 18 HE2 MET A 1 54.570 -83.272 8.137 1.00 0.00 H
-ATOM 19 HE3 MET A 1 52.874 -83.713 7.963 1.00 0.00 H
+ATOM 1 N MSE A 1 52.843 -89.493 8.945 1.00 9.67 N
+ATOM 2 CA MSE A 1 51.702 -88.594 9.278 1.00 10.38 C
+ATOM 3 C MSE A 1 50.840 -88.389 8.038 1.00 9.62 C
+ATOM 4 O MSE A 1 51.323 -88.488 6.911 1.00 9.62 O
+ATOM 5 CB MSE A 1 52.242 -87.249 9.774 1.00 13.77 C
+ATOM 6 CG MSE A 1 53.169 -86.641 8.722 1.00 16.29 C
+ATOM 7 SD MSE A 1 54.026 -85.211 9.437 1.00 17.17 S
+ATOM 8 CE MSE A 1 53.884 -84.095 8.019 1.00 16.11 C
+ATOM 9 H1 MSE A 1 52.860 -89.666 7.920 1.00 0.00 H
+ATOM 10 H2 MSE A 1 52.730 -90.397 9.449 1.00 0.00 H
+ATOM 11 H3 MSE A 1 53.735 -89.044 9.233 1.00 0.00 H
+ATOM 12 HA MSE A 1 51.106 -89.049 10.055 1.00 0.00 H
+ATOM 13 HB2 MSE A 1 51.415 -86.577 9.954 1.00 0.00 H
+ATOM 14 HB3 MSE A 1 52.789 -87.397 10.689 1.00 0.00 H
+ATOM 15 HG2 MSE A 1 53.894 -87.379 8.412 1.00 0.00 H
+ATOM 16 HG3 MSE A 1 52.587 -86.326 7.868 1.00 0.00 H
+ATOM 17 HE1 MSE A 1 54.122 -84.634 7.112 1.00 0.00 H
+ATOM 18 HE2 MSE A 1 54.570 -83.272 8.137 1.00 0.00 H
+ATOM 19 HE3 MSE A 1 52.874 -83.713 7.963 1.00 0.00 H
ATOM 20 N GLN A 2 49.558 -88.108 8.255 1.00 9.27 N
ATOM 21 CA GLN A 2 48.622 -87.893 7.152 1.00 9.07 C
ATOM 22 C GLN A 2 48.443 -86.406 6.891 1.00 8.72 C
@@ -5105,25 +5105,25 @@ ATOM 1231 HA3 GLY A 76 50.941 -75.217 -29.204 1.00 0.00 H
TER 1232 GLY A 76
ENDMDL
MODEL 5
-ATOM 1 N MET A 1 53.726 -88.517 9.362 1.00 9.67 N
-ATOM 2 CA MET A 1 52.381 -87.906 9.564 1.00 10.38 C
-ATOM 3 C MET A 1 51.600 -87.939 8.255 1.00 9.62 C
-ATOM 4 O MET A 1 52.184 -87.930 7.172 1.00 9.62 O
-ATOM 5 CB MET A 1 52.547 -86.459 10.032 1.00 13.77 C
-ATOM 6 CG MET A 1 53.368 -85.671 9.011 1.00 16.29 C
-ATOM 7 SD MET A 1 53.795 -84.057 9.716 1.00 17.17 S
-ATOM 8 CE MET A 1 53.641 -83.079 8.201 1.00 16.11 C
-ATOM 9 H1 MET A 1 53.617 -89.519 9.109 1.00 0.00 H
-ATOM 10 H2 MET A 1 54.277 -88.437 10.241 1.00 0.00 H
-ATOM 11 H3 MET A 1 54.221 -88.020 8.595 1.00 0.00 H
-ATOM 12 HA MET A 1 51.847 -88.466 10.317 1.00 0.00 H
-ATOM 13 HB2 MET A 1 51.573 -86.004 10.134 1.00 0.00 H
-ATOM 14 HB3 MET A 1 53.052 -86.444 10.983 1.00 0.00 H
-ATOM 15 HG2 MET A 1 54.273 -86.215 8.782 1.00 0.00 H
-ATOM 16 HG3 MET A 1 52.792 -85.532 8.109 1.00 0.00 H
-ATOM 17 HE1 MET A 1 52.651 -83.214 7.786 1.00 0.00 H
-ATOM 18 HE2 MET A 1 54.377 -83.405 7.483 1.00 0.00 H
-ATOM 19 HE3 MET A 1 53.801 -82.035 8.431 1.00 0.00 H
+ATOM 1 N MSE A 1 53.726 -88.517 9.362 1.00 9.67 N
+ATOM 2 CA MSE A 1 52.381 -87.906 9.564 1.00 10.38 C
+ATOM 3 C MSE A 1 51.600 -87.939 8.255 1.00 9.62 C
+ATOM 4 O MSE A 1 52.184 -87.930 7.172 1.00 9.62 O
+ATOM 5 CB MSE A 1 52.547 -86.459 10.032 1.00 13.77 C
+ATOM 6 CG MSE A 1 53.368 -85.671 9.011 1.00 16.29 C
+ATOM 7 SD MSE A 1 53.795 -84.057 9.716 1.00 17.17 S
+ATOM 8 CE MSE A 1 53.641 -83.079 8.201 1.00 16.11 C
+ATOM 9 H1 MSE A 1 53.617 -89.519 9.109 1.00 0.00 H
+ATOM 10 H2 MSE A 1 54.277 -88.437 10.241 1.00 0.00 H
+ATOM 11 H3 MSE A 1 54.221 -88.020 8.595 1.00 0.00 H
+ATOM 12 HA MSE A 1 51.847 -88.466 10.317 1.00 0.00 H
+ATOM 13 HB2 MSE A 1 51.573 -86.004 10.134 1.00 0.00 H
+ATOM 14 HB3 MSE A 1 53.052 -86.444 10.983 1.00 0.00 H
+ATOM 15 HG2 MSE A 1 54.273 -86.215 8.782 1.00 0.00 H
+ATOM 16 HG3 MSE A 1 52.792 -85.532 8.109 1.00 0.00 H
+ATOM 17 HE1 MSE A 1 52.651 -83.214 7.786 1.00 0.00 H
+ATOM 18 HE2 MSE A 1 54.377 -83.405 7.483 1.00 0.00 H
+ATOM 19 HE3 MSE A 1 53.801 -82.035 8.431 1.00 0.00 H
ATOM 20 N GLN A 2 50.273 -87.975 8.361 1.00 9.27 N
ATOM 21 CA GLN A 2 49.411 -88.007 7.178 1.00 9.07 C
ATOM 22 C GLN A 2 48.888 -86.612 6.866 1.00 8.72 C
@@ -6339,25 +6339,25 @@ ATOM 1231 HA3 GLY A 76 52.706 -74.521 -29.054 1.00 0.00 H
TER 1232 GLY A 76
ENDMDL
MODEL 6
-ATOM 1 N MET A 1 54.021 -89.121 8.946 1.00 9.67 N
-ATOM 2 CA MET A 1 52.663 -88.554 9.180 1.00 10.38 C
-ATOM 3 C MET A 1 51.876 -88.563 7.876 1.00 9.62 C
-ATOM 4 O MET A 1 52.453 -88.508 6.789 1.00 9.62 O
-ATOM 5 CB MET A 1 52.793 -87.120 9.700 1.00 13.77 C
-ATOM 6 CG MET A 1 53.582 -86.271 8.702 1.00 16.29 C
-ATOM 7 SD MET A 1 53.916 -84.648 9.438 1.00 17.17 S
-ATOM 8 CE MET A 1 53.710 -83.651 7.941 1.00 16.11 C
-ATOM 9 H1 MET A 1 54.709 -88.639 9.558 1.00 0.00 H
-ATOM 10 H2 MET A 1 54.286 -88.985 7.948 1.00 0.00 H
-ATOM 11 H3 MET A 1 54.016 -90.137 9.167 1.00 0.00 H
-ATOM 12 HA MET A 1 52.148 -89.156 9.914 1.00 0.00 H
-ATOM 13 HB2 MET A 1 51.808 -86.696 9.830 1.00 0.00 H
-ATOM 14 HB3 MET A 1 53.308 -87.126 10.647 1.00 0.00 H
-ATOM 15 HG2 MET A 1 54.517 -86.762 8.475 1.00 0.00 H
-ATOM 16 HG3 MET A 1 53.010 -86.148 7.795 1.00 0.00 H
-ATOM 17 HE1 MET A 1 54.477 -83.913 7.226 1.00 0.00 H
-ATOM 18 HE2 MET A 1 53.796 -82.605 8.190 1.00 0.00 H
-ATOM 19 HE3 MET A 1 52.734 -83.838 7.515 1.00 0.00 H
+ATOM 1 N MSE A 1 54.021 -89.121 8.946 1.00 9.67 N
+ATOM 2 CA MSE A 1 52.663 -88.554 9.180 1.00 10.38 C
+ATOM 3 C MSE A 1 51.876 -88.563 7.876 1.00 9.62 C
+ATOM 4 O MSE A 1 52.453 -88.508 6.789 1.00 9.62 O
+ATOM 5 CB MSE A 1 52.793 -87.120 9.700 1.00 13.77 C
+ATOM 6 CG MSE A 1 53.582 -86.271 8.702 1.00 16.29 C
+ATOM 7 SD MSE A 1 53.916 -84.648 9.438 1.00 17.17 S
+ATOM 8 CE MSE A 1 53.710 -83.651 7.941 1.00 16.11 C
+ATOM 9 H1 MSE A 1 54.709 -88.639 9.558 1.00 0.00 H
+ATOM 10 H2 MSE A 1 54.286 -88.985 7.948 1.00 0.00 H
+ATOM 11 H3 MSE A 1 54.016 -90.137 9.167 1.00 0.00 H
+ATOM 12 HA MSE A 1 52.148 -89.156 9.914 1.00 0.00 H
+ATOM 13 HB2 MSE A 1 51.808 -86.696 9.830 1.00 0.00 H
+ATOM 14 HB3 MSE A 1 53.308 -87.126 10.647 1.00 0.00 H
+ATOM 15 HG2 MSE A 1 54.517 -86.762 8.475 1.00 0.00 H
+ATOM 16 HG3 MSE A 1 53.010 -86.148 7.795 1.00 0.00 H
+ATOM 17 HE1 MSE A 1 54.477 -83.913 7.226 1.00 0.00 H
+ATOM 18 HE2 MSE A 1 53.796 -82.605 8.190 1.00 0.00 H
+ATOM 19 HE3 MSE A 1 52.734 -83.838 7.515 1.00 0.00 H
ATOM 20 N GLN A 2 50.553 -88.637 7.992 1.00 9.27 N
ATOM 21 CA GLN A 2 49.678 -88.658 6.819 1.00 9.07 C
ATOM 22 C GLN A 2 49.126 -87.265 6.549 1.00 8.72 C
@@ -7573,25 +7573,25 @@ ATOM 1231 HA3 GLY A 76 54.369 -78.311 -20.102 1.00 0.00 H
TER 1232 GLY A 76
ENDMDL
MODEL 7
-ATOM 1 N MET A 1 53.488 -87.741 9.771 1.00 9.67 N
-ATOM 2 CA MET A 1 52.191 -87.034 9.969 1.00 10.38 C
-ATOM 3 C MET A 1 51.362 -87.117 8.693 1.00 9.62 C
-ATOM 4 O MET A 1 51.902 -87.262 7.596 1.00 9.62 O
-ATOM 5 CB MET A 1 52.458 -85.568 10.323 1.00 13.77 C
-ATOM 6 CG MET A 1 53.289 -84.909 9.222 1.00 16.29 C
-ATOM 7 SD MET A 1 53.806 -83.262 9.773 1.00 17.17 S
-ATOM 8 CE MET A 1 53.714 -82.432 8.167 1.00 16.11 C
-ATOM 9 H1 MET A 1 53.894 -87.472 8.852 1.00 0.00 H
-ATOM 10 H2 MET A 1 53.329 -88.770 9.793 1.00 0.00 H
-ATOM 11 H3 MET A 1 54.148 -87.476 10.529 1.00 0.00 H
-ATOM 12 HA MET A 1 51.647 -87.504 10.774 1.00 0.00 H
-ATOM 13 HB2 MET A 1 51.517 -85.047 10.422 1.00 0.00 H
-ATOM 14 HB3 MET A 1 52.996 -85.516 11.255 1.00 0.00 H
-ATOM 15 HG2 MET A 1 54.163 -85.511 9.022 1.00 0.00 H
-ATOM 16 HG3 MET A 1 52.698 -84.820 8.322 1.00 0.00 H
-ATOM 17 HE1 MET A 1 52.694 -82.456 7.809 1.00 0.00 H
-ATOM 18 HE2 MET A 1 54.353 -82.938 7.462 1.00 0.00 H
-ATOM 19 HE3 MET A 1 54.040 -81.406 8.275 1.00 0.00 H
+ATOM 1 N MSE A 1 53.488 -87.741 9.771 1.00 9.67 N
+ATOM 2 CA MSE A 1 52.191 -87.034 9.969 1.00 10.38 C
+ATOM 3 C MSE A 1 51.362 -87.117 8.693 1.00 9.62 C
+ATOM 4 O MSE A 1 51.902 -87.262 7.596 1.00 9.62 O
+ATOM 5 CB MSE A 1 52.458 -85.568 10.323 1.00 13.77 C
+ATOM 6 CG MSE A 1 53.289 -84.909 9.222 1.00 16.29 C
+ATOM 7 SD MSE A 1 53.806 -83.262 9.773 1.00 17.17 S
+ATOM 8 CE MSE A 1 53.714 -82.432 8.167 1.00 16.11 C
+ATOM 9 H1 MSE A 1 53.894 -87.472 8.852 1.00 0.00 H
+ATOM 10 H2 MSE A 1 53.329 -88.770 9.793 1.00 0.00 H
+ATOM 11 H3 MSE A 1 54.148 -87.476 10.529 1.00 0.00 H
+ATOM 12 HA MSE A 1 51.647 -87.504 10.774 1.00 0.00 H
+ATOM 13 HB2 MSE A 1 51.517 -85.047 10.422 1.00 0.00 H
+ATOM 14 HB3 MSE A 1 52.996 -85.516 11.255 1.00 0.00 H
+ATOM 15 HG2 MSE A 1 54.163 -85.511 9.022 1.00 0.00 H
+ATOM 16 HG3 MSE A 1 52.698 -84.820 8.322 1.00 0.00 H
+ATOM 17 HE1 MSE A 1 52.694 -82.456 7.809 1.00 0.00 H
+ATOM 18 HE2 MSE A 1 54.353 -82.938 7.462 1.00 0.00 H
+ATOM 19 HE3 MSE A 1 54.040 -81.406 8.275 1.00 0.00 H
ATOM 20 N GLN A 2 50.043 -87.023 8.847 1.00 9.27 N
ATOM 21 CA GLN A 2 49.129 -87.088 7.708 1.00 9.07 C
ATOM 22 C GLN A 2 48.742 -85.683 7.268 1.00 8.72 C
@@ -8807,25 +8807,25 @@ ATOM 1231 HA3 GLY A 76 57.091 -80.711 -19.305 1.00 0.00 H
TER 1232 GLY A 76
ENDMDL
MODEL 8
-ATOM 1 N MET A 1 52.860 -87.633 9.893 1.00 9.67 N
-ATOM 2 CA MET A 1 51.639 -86.791 10.042 1.00 10.38 C
-ATOM 3 C MET A 1 50.836 -86.822 8.749 1.00 9.62 C
-ATOM 4 O MET A 1 51.381 -87.059 7.671 1.00 9.62 O
-ATOM 5 CB MET A 1 52.051 -85.353 10.371 1.00 13.77 C
-ATOM 6 CG MET A 1 52.971 -84.809 9.279 1.00 16.29 C
-ATOM 7 SD MET A 1 53.676 -83.234 9.829 1.00 17.17 S
-ATOM 8 CE MET A 1 53.745 -82.427 8.211 1.00 16.11 C
-ATOM 9 H1 MET A 1 53.262 -87.497 8.944 1.00 0.00 H
-ATOM 10 H2 MET A 1 52.608 -88.634 10.023 1.00 0.00 H
-ATOM 11 H3 MET A 1 53.563 -87.356 10.607 1.00 0.00 H
-ATOM 12 HA MET A 1 51.032 -87.181 10.845 1.00 0.00 H
-ATOM 13 HB2 MET A 1 51.168 -84.734 10.435 1.00 0.00 H
-ATOM 14 HB3 MET A 1 52.569 -85.334 11.316 1.00 0.00 H
-ATOM 15 HG2 MET A 1 53.768 -85.515 9.095 1.00 0.00 H
-ATOM 16 HG3 MET A 1 52.408 -84.656 8.370 1.00 0.00 H
-ATOM 17 HE1 MET A 1 52.755 -82.413 7.776 1.00 0.00 H
-ATOM 18 HE2 MET A 1 54.413 -82.972 7.564 1.00 0.00 H
-ATOM 19 HE3 MET A 1 54.107 -81.415 8.331 1.00 0.00 H
+ATOM 1 N MSE A 1 52.860 -87.633 9.893 1.00 9.67 N
+ATOM 2 CA MSE A 1 51.639 -86.791 10.042 1.00 10.38 C
+ATOM 3 C MSE A 1 50.836 -86.822 8.749 1.00 9.62 C
+ATOM 4 O MSE A 1 51.381 -87.059 7.671 1.00 9.62 O
+ATOM 5 CB MSE A 1 52.051 -85.353 10.371 1.00 13.77 C
+ATOM 6 CG MSE A 1 52.971 -84.809 9.279 1.00 16.29 C
+ATOM 7 SD MSE A 1 53.676 -83.234 9.829 1.00 17.17 S
+ATOM 8 CE MSE A 1 53.745 -82.427 8.211 1.00 16.11 C
+ATOM 9 H1 MSE A 1 53.262 -87.497 8.944 1.00 0.00 H
+ATOM 10 H2 MSE A 1 52.608 -88.634 10.023 1.00 0.00 H
+ATOM 11 H3 MSE A 1 53.563 -87.356 10.607 1.00 0.00 H
+ATOM 12 HA MSE A 1 51.032 -87.181 10.845 1.00 0.00 H
+ATOM 13 HB2 MSE A 1 51.168 -84.734 10.435 1.00 0.00 H
+ATOM 14 HB3 MSE A 1 52.569 -85.334 11.316 1.00 0.00 H
+ATOM 15 HG2 MSE A 1 53.768 -85.515 9.095 1.00 0.00 H
+ATOM 16 HG3 MSE A 1 52.408 -84.656 8.370 1.00 0.00 H
+ATOM 17 HE1 MSE A 1 52.755 -82.413 7.776 1.00 0.00 H
+ATOM 18 HE2 MSE A 1 54.413 -82.972 7.564 1.00 0.00 H
+ATOM 19 HE3 MSE A 1 54.107 -81.415 8.331 1.00 0.00 H
ATOM 20 N GLN A 2 49.530 -86.590 8.866 1.00 9.27 N
ATOM 21 CA GLN A 2 48.640 -86.598 7.704 1.00 9.07 C
ATOM 22 C GLN A 2 48.377 -85.177 7.228 1.00 8.72 C
@@ -10041,25 +10041,25 @@ ATOM 1231 HA3 GLY A 76 46.758 -68.642 -18.268 1.00 0.00 H
TER 1232 GLY A 76
ENDMDL
MODEL 9
-ATOM 1 N MET A 1 55.070 -87.882 9.224 1.00 9.67 N
-ATOM 2 CA MET A 1 53.685 -87.417 9.516 1.00 10.38 C
-ATOM 3 C MET A 1 52.823 -87.562 8.269 1.00 9.62 C
-ATOM 4 O MET A 1 53.330 -87.557 7.147 1.00 9.62 O
-ATOM 5 CB MET A 1 53.722 -85.953 9.962 1.00 13.77 C
-ATOM 6 CG MET A 1 54.376 -85.095 8.879 1.00 16.29 C
-ATOM 7 SD MET A 1 54.661 -83.429 9.532 1.00 17.17 S
-ATOM 8 CE MET A 1 54.358 -82.517 7.998 1.00 16.11 C
-ATOM 9 H1 MET A 1 55.134 -88.181 8.231 1.00 0.00 H
-ATOM 10 H2 MET A 1 55.303 -88.684 9.845 1.00 0.00 H
-ATOM 11 H3 MET A 1 55.740 -87.106 9.394 1.00 0.00 H
-ATOM 12 HA MET A 1 53.266 -88.021 10.308 1.00 0.00 H
-ATOM 13 HB2 MET A 1 52.714 -85.605 10.136 1.00 0.00 H
-ATOM 14 HB3 MET A 1 54.291 -85.870 10.874 1.00 0.00 H
-ATOM 15 HG2 MET A 1 55.321 -85.535 8.595 1.00 0.00 H
-ATOM 16 HG3 MET A 1 53.730 -85.039 8.016 1.00 0.00 H
-ATOM 17 HE1 MET A 1 55.067 -82.835 7.246 1.00 0.00 H
-ATOM 18 HE2 MET A 1 54.475 -81.460 8.177 1.00 0.00 H
-ATOM 19 HE3 MET A 1 53.351 -82.714 7.658 1.00 0.00 H
+ATOM 1 N MSE A 1 55.070 -87.882 9.224 1.00 9.67 N
+ATOM 2 CA MSE A 1 53.685 -87.417 9.516 1.00 10.38 C
+ATOM 3 C MSE A 1 52.823 -87.562 8.269 1.00 9.62 C
+ATOM 4 O MSE A 1 53.330 -87.557 7.147 1.00 9.62 O
+ATOM 5 CB MSE A 1 53.722 -85.953 9.962 1.00 13.77 C
+ATOM 6 CG MSE A 1 54.376 -85.095 8.879 1.00 16.29 C
+ATOM 7 SD MSE A 1 54.661 -83.429 9.532 1.00 17.17 S
+ATOM 8 CE MSE A 1 54.358 -82.517 7.998 1.00 16.11 C
+ATOM 9 H1 MSE A 1 55.134 -88.181 8.231 1.00 0.00 H
+ATOM 10 H2 MSE A 1 55.303 -88.684 9.845 1.00 0.00 H
+ATOM 11 H3 MSE A 1 55.740 -87.106 9.394 1.00 0.00 H
+ATOM 12 HA MSE A 1 53.266 -88.021 10.308 1.00 0.00 H
+ATOM 13 HB2 MSE A 1 52.714 -85.605 10.136 1.00 0.00 H
+ATOM 14 HB3 MSE A 1 54.291 -85.870 10.874 1.00 0.00 H
+ATOM 15 HG2 MSE A 1 55.321 -85.535 8.595 1.00 0.00 H
+ATOM 16 HG3 MSE A 1 53.730 -85.039 8.016 1.00 0.00 H
+ATOM 17 HE1 MSE A 1 55.067 -82.835 7.246 1.00 0.00 H
+ATOM 18 HE2 MSE A 1 54.475 -81.460 8.177 1.00 0.00 H
+ATOM 19 HE3 MSE A 1 53.351 -82.714 7.658 1.00 0.00 H
ATOM 20 N GLN A 2 51.516 -87.697 8.473 1.00 9.27 N
ATOM 21 CA GLN A 2 50.577 -87.852 7.362 1.00 9.07 C
ATOM 22 C GLN A 2 49.916 -86.521 7.030 1.00 8.72 C
@@ -11275,25 +11275,25 @@ ATOM 1231 HA3 GLY A 76 51.135 -74.673 -29.075 1.00 0.00 H
TER 1232 GLY A 76
ENDMDL
MODEL 10
-ATOM 1 N MET A 1 53.579 -87.805 9.683 1.00 9.67 N
-ATOM 2 CA MET A 1 52.285 -87.087 9.857 1.00 10.38 C
-ATOM 3 C MET A 1 51.483 -87.158 8.563 1.00 9.62 C
-ATOM 4 O MET A 1 52.047 -87.267 7.475 1.00 9.62 O
-ATOM 5 CB MET A 1 52.558 -85.625 10.218 1.00 13.77 C
-ATOM 6 CG MET A 1 53.411 -84.970 9.131 1.00 16.29 C
-ATOM 7 SD MET A 1 53.926 -83.325 9.690 1.00 17.17 S
-ATOM 8 CE MET A 1 53.868 -82.498 8.081 1.00 16.11 C
-ATOM 9 H1 MET A 1 53.399 -88.823 9.576 1.00 0.00 H
-ATOM 10 H2 MET A 1 54.179 -87.644 10.519 1.00 0.00 H
-ATOM 11 H3 MET A 1 54.063 -87.450 8.835 1.00 0.00 H
-ATOM 12 HA MET A 1 51.725 -87.553 10.654 1.00 0.00 H
-ATOM 13 HB2 MET A 1 51.620 -85.096 10.303 1.00 0.00 H
-ATOM 14 HB3 MET A 1 53.082 -85.578 11.158 1.00 0.00 H
-ATOM 15 HG2 MET A 1 54.286 -85.577 8.947 1.00 0.00 H
-ATOM 16 HG3 MET A 1 52.836 -84.881 8.222 1.00 0.00 H
-ATOM 17 HE1 MET A 1 52.879 -82.605 7.658 1.00 0.00 H
-ATOM 18 HE2 MET A 1 54.592 -82.945 7.419 1.00 0.00 H
-ATOM 19 HE3 MET A 1 54.098 -81.449 8.209 1.00 0.00 H
+ATOM 1 N MSE A 1 53.579 -87.805 9.683 1.00 9.67 N
+ATOM 2 CA MSE A 1 52.285 -87.087 9.857 1.00 10.38 C
+ATOM 3 C MSE A 1 51.483 -87.158 8.563 1.00 9.62 C
+ATOM 4 O MSE A 1 52.047 -87.267 7.475 1.00 9.62 O
+ATOM 5 CB MSE A 1 52.558 -85.625 10.218 1.00 13.77 C
+ATOM 6 CG MSE A 1 53.411 -84.970 9.131 1.00 16.29 C
+ATOM 7 SD MSE A 1 53.926 -83.325 9.690 1.00 17.17 S
+ATOM 8 CE MSE A 1 53.868 -82.498 8.081 1.00 16.11 C
+ATOM 9 H1 MSE A 1 53.399 -88.823 9.576 1.00 0.00 H
+ATOM 10 H2 MSE A 1 54.179 -87.644 10.519 1.00 0.00 H
+ATOM 11 H3 MSE A 1 54.063 -87.450 8.835 1.00 0.00 H
+ATOM 12 HA MSE A 1 51.725 -87.553 10.654 1.00 0.00 H
+ATOM 13 HB2 MSE A 1 51.620 -85.096 10.303 1.00 0.00 H
+ATOM 14 HB3 MSE A 1 53.082 -85.578 11.158 1.00 0.00 H
+ATOM 15 HG2 MSE A 1 54.286 -85.577 8.947 1.00 0.00 H
+ATOM 16 HG3 MSE A 1 52.836 -84.881 8.222 1.00 0.00 H
+ATOM 17 HE1 MSE A 1 52.879 -82.605 7.658 1.00 0.00 H
+ATOM 18 HE2 MSE A 1 54.592 -82.945 7.419 1.00 0.00 H
+ATOM 19 HE3 MSE A 1 54.098 -81.449 8.209 1.00 0.00 H
ATOM 20 N GLN A 2 50.160 -87.096 8.691 1.00 9.27 N
ATOM 21 CA GLN A 2 49.270 -87.155 7.530 1.00 9.07 C
ATOM 22 C GLN A 2 48.831 -85.753 7.129 1.00 8.72 C
diff --git a/csb/test/data/2l01.v2.str b/csb/test/data/2l01.v2.str
new file mode 100644
index 0000000..372b573
--- /dev/null
+++ b/csb/test/data/2l01.v2.str
@@ -0,0 +1,31 @@
+save_assigned_chem_shift_list
+save_
+
+save_assigned_chem_shift_list_1
+
+ loop_
+ _Atom_shift_assign_ID
+ _Residue_author_seq_code
+ _Residue_seq_code
+ _Residue_label
+ _Atom_name
+ _Atom_type
+ _Chem_shift_value
+ _Chem_shift_value_error
+ _Chem_shift_ambiguity_code
+
+ 1 1 1 MET HA H 3.977 0.020 1
+ 2 1 1 MET HB2 H 2.092 0.020 1
+ 3 1 1 MET HB3 H 2.092 0.020 1
+ 4 1 1 MET HE H 2.111 0.020 1
+ 5 1 1 MET HG2 H 2.580 0.020 1
+ 6 1 1 MET HG3 H 2.580 0.020 1
+ 7 1 1 MET CA C 55.300 0.200 1
+ 8 1 1 MET CB C 33.840 0.200 1
+ 9 1 1 MET CE C 16.841 0.200 1
+ 10 1 1 MET CG C 30.975 0.200 1
+ 11 2 2 LYS HA H 4.423 0.020 1
+
+ stop_
+
+save_
diff --git a/csb/test/data/2l01.v3.str b/csb/test/data/2l01.v3.str
new file mode 100644
index 0000000..a6209cb
--- /dev/null
+++ b/csb/test/data/2l01.v3.str
@@ -0,0 +1,45 @@
+save_assigned_chem_shift_list
+save_
+
+save_assigned_chem_shift_list_1
+
+ loop_
+ _Atom_chem_shift.ID
+ _Atom_chem_shift.Assembly_atom_ID
+ _Atom_chem_shift.Entity_assembly_ID
+ _Atom_chem_shift.Entity_ID
+ _Atom_chem_shift.Comp_index_ID
+ _Atom_chem_shift.Seq_ID
+ _Atom_chem_shift.Comp_ID
+ _Atom_chem_shift.Atom_ID
+ _Atom_chem_shift.Atom_type
+ _Atom_chem_shift.Atom_isotope_number
+ _Atom_chem_shift.Val
+ _Atom_chem_shift.Val_err
+ _Atom_chem_shift.Assign_fig_of_merit
+ _Atom_chem_shift.Ambiguity_code
+ _Atom_chem_shift.Occupancy
+ _Atom_chem_shift.Resonance_ID
+ _Atom_chem_shift.Auth_entity_assembly_ID
+ _Atom_chem_shift.Auth_seq_ID
+ _Atom_chem_shift.Auth_comp_ID
+ _Atom_chem_shift.Auth_atom_ID
+ _Atom_chem_shift.Details
+ _Atom_chem_shift.Entry_ID
+ _Atom_chem_shift.Assigned_chem_shift_list_ID
+
+ 1 . 1 1 1 1 MET HA H 1 3.977 0.020 . 1 . . . 1 MET HA . 17025 1
+ 2 . 1 1 1 1 MET HB2 H 1 2.092 0.020 . 1 . . . 1 MET HB2 . 17025 1
+ 3 . 1 1 1 1 MET HB3 H 1 2.092 0.020 . 1 . . . 1 MET HB3 . 17025 1
+ 4 . 1 1 1 1 MET HE1 H 1 2.111 0.020 . 1 . . . 1 MET HE . 17025 1
+ 5 . 1 1 1 1 MET HG2 H 1 2.580 0.020 . 1 . . . 1 MET HG2 . 17025 1
+ 6 . 1 1 1 1 MET HG3 H 1 2.580 0.020 . 1 . . . 1 MET HG3 . 17025 1
+ 7 . 1 1 1 1 MET CA C 13 55.300 0.200 . 1 . . . 1 MET CA . 17025 1
+ 8 . 1 1 1 1 MET CB C 13 33.840 0.200 . 1 . . . 1 MET CB . 17025 1
+ 9 . 1 1 1 1 MET CE C 13 16.841 0.200 . 1 . . . 1 MET CE . 17025 1
+ 10 . 1 1 1 1 MET CG C 13 30.975 0.200 . 1 . . . 1 MET CG . 17025 1
+ 12 . 1 1 2 2 LYS HA H 1 4.423 0.020 . 1 . . . 2 LYS HA . 17025 1
+
+ stop_
+
+save_
diff --git a/csb/test/data/Sparky.peaks b/csb/test/data/Sparky.peaks
new file mode 100644
index 0000000..0d22cba
--- /dev/null
+++ b/csb/test/data/Sparky.peaks
@@ -0,0 +1,5 @@
+ Assignment w1 w2 w3 Data Height Note
+
+ ?-?-? 3.418 114.437 7.440 157921
+ ?-?-? 0.972 114.476 7.443 204746
+ ?-?-? 1.147 114.481 7.445 147454
diff --git a/csb/test/data/Xeasy1.peaks b/csb/test/data/Xeasy1.peaks
new file mode 100644
index 0000000..06be499
--- /dev/null
+++ b/csb/test/data/Xeasy1.peaks
@@ -0,0 +1,9 @@
+# Number of dimensions 3
+#INAME 1 H1
+#INAME 2 C2
+#INAME 3 H3
+#CYANAFORMAT hCH
+ 1 7.050 10.374 0.889 2 U 1.565890e+05 0.00e+00 m 0 0 0 0 0
+ 2 8.921 10.397 0.892 2 U 1.291120e+05 0.00e+00 m 0 0 0 0 0
+ 3 2.307 10.430 0.891 2 U 4.243830e+05 0.00e+00 m 0 0 0 0 0
+
diff --git a/csb/test/data/Xeasy2.peaks b/csb/test/data/Xeasy2.peaks
new file mode 100644
index 0000000..ffc81e2
--- /dev/null
+++ b/csb/test/data/Xeasy2.peaks
@@ -0,0 +1,8 @@
+# Number of dimensions 3
+#INAME 1 H1
+#INAME 2 2C
+#INAME 3 3H
+ 1 7.050 10.374 0.889 2 U 1.565890e+05 0.00e+00 m 0 0 0 0 0
+ 2 8.921 10.397 0.892 2 U 1.291120e+05 0.00e+00 m 0 0 0 0 0
+ 3 2.307 10.430 0.891 2 U 4.243830e+05 0.00e+00 m 0 0 0 0 0
+
diff --git a/csb/test/data/csb.tsv b/csb/test/data/csb.tsv
index 06464c3..a1944b9 100644
--- a/csb/test/data/csb.tsv
+++ b/csb/test/data/csb.tsv
@@ -1,4 +1,4 @@
# @TSV ID:int A:float B:str
11 11.1 Row eleven
12 12.2 Row twelve
-13 13.3 Row thirteen
+13 Row thirteen
diff --git a/csb/test/data/mapping.pdb b/csb/test/data/mapping.pdb
new file mode 100644
index 0000000..8edb767
--- /dev/null
+++ b/csb/test/data/mapping.pdb
@@ -0,0 +1,12 @@
+HEADER RIBOSOME 30-MAR-01 1GIY
+COMPND MOL_ID: 1;
+COMPND 2 MOLECULE: 50S RIBOSOMAL PROTEIN L3;
+COMPND 3 CHAIN: E;
+SEQRES 1 E 338 LEU VAL ASN ASP GLU PRO ASN SER PRO ARG GLU GLY MET
+SEQRES 2 E 338 GLU GLU THR VAL PRO VAL THR VAL ILE GLU THR PRO PRO
+ATOM 3430 CA MET E 65 -35.315 183.547 344.254 1.00 0.00 C
+ATOM 3431 CA GLU E 66 -31.330 184.145 343.173 1.00 0.00 C
+ATOM 3432 CA THR E 67 -27.574 184.326 344.054 1.00 0.00 C
+ATOM 3433 CA VAL E 68 -25.637 187.772 343.919 1.00 0.00 C
+TER 3634 VAL E 68
+END
diff --git a/csb/test/data/mapping2.pdb b/csb/test/data/mapping2.pdb
new file mode 100644
index 0000000..df7e032
--- /dev/null
+++ b/csb/test/data/mapping2.pdb
@@ -0,0 +1,10 @@
+HEADER RIBOSOME 30-MAR-01 1GIY
+COMPND MOL_ID: 1;
+COMPND 2 MOLECULE: 50S RIBOSOMAL PROTEIN L3;
+COMPND 3 CHAIN: E;
+SEQRES 1 E 338 LEU VAL ASN ASP GLU PRO ASN SER PRO ARG GLU GLY MET
+SEQRES 2 E 338 GLU THR VAL PRO VAL THR VAL ILE GLU THR PRO PRO
+ATOM 3430 CA MET E 65 -35.315 183.547 344.254 1.00 0.00 C
+ATOM 3433 CA VAL E 68 -25.637 187.772 343.919 1.00 0.00 C
+TER 3634 VAL E 68
+END
diff --git a/csb/test/data/mapping3.pdb b/csb/test/data/mapping3.pdb
new file mode 100644
index 0000000..8a02f87
--- /dev/null
+++ b/csb/test/data/mapping3.pdb
@@ -0,0 +1,9 @@
+HEADER RIBOSOME 30-MAR-01 1GIY
+COMPND MOL_ID: 1;
+COMPND 2 MOLECULE: 50S RIBOSOMAL PROTEIN L3;
+COMPND 3 CHAIN: E;
+SEQRES 1 E 338 LEU VAL ASN ASP GLU PRO ASN
+ATOM 3430 CA SER E 65 -35.315 183.547 344.254 1.00 0.00 C
+ATOM 3433 CA GLY E 68 -25.637 187.772 343.919 1.00 0.00 C
+TER 3634 GLY E 68
+END
diff --git a/csb/test/data/modified.pdb b/csb/test/data/modified.pdb
new file mode 100644
index 0000000..97cd795
--- /dev/null
+++ b/csb/test/data/modified.pdb
@@ -0,0 +1,16 @@
+HEADER . 12-Mar-13 TEST
+COMPND 1 MOL_ID: 1;
+COMPND 2 MOLECULE: HYPOTHETICAL PROTEIN RV0983;
+COMPND 3 CHAIN: A;
+SEQRES 1 A 20 MET PRO PRO GLY SER VAL GLU GLN VAL ALA ALA LYS VAL
+SEQRES 2 A 20 VAL PRO SER VAL VAL MSE
+ATOM 95 N MSE A 21 55.075 23.677 19.139 1.00 33.35 N
+ATOM 96 CA MSE A 21 54.672 23.803 17.741 1.00 37.62 C
+ATOM 97 C MSE A 21 54.539 22.403 17.111 1.00 35.23 C
+ATOM 98 O MSE A 21 55.344 21.485 17.419 1.00 34.50 O
+ATOM 99 CB MSE A 21 55.662 24.626 16.915 1.00 34.96 C
+ATOM 100 CG MSE A 21 55.211 24.834 15.458 1.00 34.65 C
+ATOM 101 SE MSE A 21 56.402 26.367 14.841 1.00 52.51 Se
+ATOM 102 CE MSE A 21 56.143 26.398 12.634 1.00 50.46 C
+TER
+END
diff --git a/csb/test/data/modified2.pdb b/csb/test/data/modified2.pdb
new file mode 100644
index 0000000..35cf97a
--- /dev/null
+++ b/csb/test/data/modified2.pdb
@@ -0,0 +1,16 @@
+HEADER . 12-Mar-13 TEST
+COMPND 1 MOL_ID: 1;
+COMPND 2 MOLECULE: HYPOTHETICAL PROTEIN RV0983;
+COMPND 3 CHAIN: A;
+SEQRES 1 A 20 MSE PRO PRO GLY SER VAL GLU GLN VAL ALA ALA LYS VAL
+SEQRES 2 A 20 VAL PRO SER VAL VAL MET
+ATOM 95 N MSE A 21 55.075 23.677 19.139 1.00 33.35 N
+ATOM 96 CA MSE A 21 54.672 23.803 17.741 1.00 37.62 C
+ATOM 97 C MSE A 21 54.539 22.403 17.111 1.00 35.23 C
+ATOM 98 O MSE A 21 55.344 21.485 17.419 1.00 34.50 O
+ATOM 99 CB MSE A 21 55.662 24.626 16.915 1.00 34.96 C
+ATOM 100 CG MSE A 21 55.211 24.834 15.458 1.00 34.65 C
+ATOM 101 SE MSE A 21 56.402 26.367 14.841 1.00 52.51 Se
+ATOM 102 CE MSE A 21 56.143 26.398 12.634 1.00 50.46 C
+TER
+END
diff --git a/csb/test/data/out.clans b/csb/test/data/out.clans
index 131daef..2575cd6 100644
--- a/csb/test/data/out.clans
+++ b/csb/test/data/out.clans
@@ -121,12 +121,24 @@ size=12
hide=0
color=255;204;51
numbers=0;1;2;3;4;5;6;10;13;14;15;16;17;19;20;22;23;26;27;32;
-name=allergens
+name=allergens >= xyz
type=0
size=12
hide=1
color=255;102;51
numbers=7;8;9;11;12;21;24;25;28;29;30;31;36;37;38;39;40;
+name=empty group WITH terminal semicolon in numbers line
+type=0
+size=12
+hide=1
+color=255;102;51
+numbers=;
+name=empty group WITHOUT terminal semicolon in numbers line
+type=0
+size=12
+hide=1
+color=255;102;51
+numbers=
</seqgroups>
<pos>
0 -44.18891000 -32.72951000 -0.98480570
--
Alioth's /git/debian-med/git-commit-notice on /srv/git.debian.org/git/debian-med/python-csb.git
More information about the debian-med-commit
mailing list