[med-svn] [dssp] 02/11: New upstream version 3.0.0
Andreas Tille
tille at debian.org
Wed Sep 27 08:29:45 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository dssp.
commit 340276cc170105e676e302e3cd38fff22b15deed
Author: Andreas Tille <tille at debian.org>
Date: Wed Sep 27 09:37:11 2017 +0200
New upstream version 3.0.0
---
README.txt | 8 +-
changelog | 3 +
src/align-2d.h | 207 +--
src/buffer.h | 61 +-
src/dssp.cpp | 446 +++---
src/dssp.h | 10 +-
src/iocif.cpp | 748 +++++-----
src/iocif.h | 237 +--
src/mas.cpp | 2341 ++++++++++++++---------------
src/mas.h | 38 +-
src/matrix.h | 570 ++++----
src/matrix.inl | 16 -
src/mkdssp.cpp | 289 ++--
src/primitives-3d.cpp | 571 ++++----
src/primitives-3d.h | 176 +--
src/structure.cpp | 3894 +++++++++++++++++++++++++------------------------
src/structure.h | 679 ++++-----
src/utils.cpp | 235 ++-
src/utils.h | 92 +-
src/version.h | 6 +
20 files changed, 5474 insertions(+), 5153 deletions(-)
diff --git a/README.txt b/README.txt
index 4b86de3..71b351b 100644
--- a/README.txt
+++ b/README.txt
@@ -1,14 +1,14 @@
-This README is part of the source code distribution of DSSP 2.0
+This README is part of the source code distribution of DSSP 3.0
To build the mkdssp executable, you need a recent C++ compiler
capable of compiling C++0x code. You also need a recent version
-of the Boost libraries, we are currently using version 1.46.1.
+of the Boost libraries, we are currently using version 1.62.0.
The makefile reads a make.config file containing site specific
settings. This make.config file is created if it doesn't exist
when running make.
Please let me know if there are any problems with this code.
-You can reach me at: mailto:m.hekkelman at cmbi.ru.nl
+You can reach me at: mailto:coos.baakman at radboudumc.nl
--maarten hekkelman
+-Coos Baakman
diff --git a/changelog b/changelog
index cafe62f..f281968 100644
--- a/changelog
+++ b/changelog
@@ -1,3 +1,6 @@
+3.0.0
+- Added support for multiline chain IDs. Adds two new columns to for the mmCIF chains
+
2.2.1
- Bug fix in mmCIF import code.
diff --git a/src/align-2d.h b/src/align-2d.h
old mode 100644
new mode 100755
index 96cf973..658734c
--- a/src/align-2d.h
+++ b/src/align-2d.h
@@ -1,57 +1,62 @@
// Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
+// Copyright Coos Baakman, Jon Black, Wouter G. Touw & Gert Vriend, Radboud university medical center 2015.
// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef XSSP_ALIGN2D_H
+#define XSSP_ALIGN2D_H
#pragma once
+#include "mas.h"
+
+#include <algorithm>
#include <string>
#include <vector>
-#include <algorithm>
typedef std::basic_string<uint8> sec_structure;
// --------------------------------------------------------------------
-// entry is a multiple sequence alignment 'entry', a sequence with an ID and more.
+// entry is a multiple sequence alignment 'entry', a sequence with an ID and
+// more.
struct entry
{
- entry(const entry& e)
- : m_nr(e.m_nr)
- , m_id(e.m_id)
- , m_seq(e.m_seq)
- , m_weight(e.m_weight)
- , m_positions(e.m_positions)
- {
- }
-
- entry(uint32 nr, const std::string& id, const sequence& seq = sequence(), float weight = 1.0f)
- : m_nr(nr)
- , m_id(id)
- , m_seq(seq)
- , m_weight(weight) {}
-
- uint32 nr() const { return m_nr; }
- float weight() const { return m_weight; }
- uint32 length() const { return static_cast<uint32>(m_seq.length()); }
-
- //bool has_gaps() const { return std::find(m_seq.begin(), m_seq.end(), kSignalGapCode) != m_seq.end(); }
-
- void insert_gap(uint32 pos);
- void append_gap();
-
- void remove_gap(uint32 pos);
-
- void remove_gaps();
- void dump_positions() { m_positions.clear(); }
-
- uint32 m_nr;
- std::string m_id;
- sequence m_seq;
- sec_structure m_ss;
- float m_weight;
- std::vector<int16>
- m_positions;
+ entry(const entry& e)
+ : m_nr(e.m_nr)
+ , m_id(e.m_id)
+ , m_seq(e.m_seq)
+ , m_weight(e.m_weight)
+ , m_positions(e.m_positions)
+ {
+ }
+
+ entry(uint32 nr, const std::string& id, const sequence& seq = sequence(),
+ float weight = 1.0f)
+ : m_nr(nr)
+ , m_id(id)
+ , m_seq(seq)
+ , m_weight(weight) {}
+
+ uint32 nr() const { return m_nr; }
+ float weight() const { return m_weight; }
+ uint32 length() const { return static_cast<uint32>(m_seq.length()); }
+
+ void insert_gap(uint32 pos);
+ void append_gap();
+
+ void remove_gap(uint32 pos);
+
+ void remove_gaps();
+ void dump_positions() { m_positions.clear(); }
+
+ uint32 m_nr;
+ std::string m_id;
+ sequence m_seq;
+ sec_structure m_ss;
+ float m_weight;
+ std::vector<int16> m_positions;
};
// --------------------------------------------------------------------
@@ -59,77 +64,79 @@ struct entry
struct base_node
{
- virtual ~base_node() {}
+ virtual ~base_node() {}
- virtual void print(std::ostream& s) = 0;
+ virtual void print(std::ostream& s) = 0;
- virtual base_node* left() const { return 0; }
- virtual base_node* right() const { return 0; }
+ virtual base_node* left() const { return 0; }
+ virtual base_node* right() const { return 0; }
- virtual void add_weight(float w) = 0;
- virtual uint32 leaf_count() const { return 1; }
+ virtual void add_weight(float w) = 0;
+ virtual uint32 leaf_count() const { return 1; }
- virtual uint32 length() const = 0;
- virtual uint32 cost() const { return 0; }
- virtual uint32 cumulative_cost() const
- { return 0; }
+ virtual uint32 length() const = 0;
+ virtual uint32 cost() const { return 0; }
+ virtual uint32 cumulative_cost() const
+ { return 0; }
};
std::ostream& operator<<(std::ostream& lhs, base_node& rhs);
struct joined_node : public base_node
{
- joined_node();
-
- joined_node(base_node* left, base_node* right,
- float d_left, float d_right);
-
- virtual ~joined_node();
-
- virtual void print(std::ostream& s);
-
- virtual base_node* left() const { return m_left; }
- virtual base_node* right() const { return m_right; }
-
- virtual void add_weight(float w)
- {
- m_left->add_weight(w);
- m_right->add_weight(w);
- }
-
- virtual uint32 leaf_count() const { return m_leaf_count; }
- virtual uint32 length() const { return m_length; }
-
- virtual uint32 cost() const { return m_length * m_leaf_count; }
- virtual uint32 cumulative_cost() const
- { return cost() + m_left->cumulative_cost() + m_right->cumulative_cost(); }
-
- base_node* m_left;
- base_node* m_right;
- float m_d_left;
- float m_d_right;
- uint32 m_leaf_count;
- uint32 m_length;
+ joined_node();
+ joined_node(base_node* left, base_node* right, float d_left, float d_right);
+ virtual ~joined_node();
+
+ virtual void print(std::ostream& s);
+
+ virtual base_node* left() const { return m_left; }
+ virtual base_node* right() const { return m_right; }
+
+ virtual void add_weight(float w)
+ {
+ m_left->add_weight(w);
+ m_right->add_weight(w);
+ }
+
+ virtual uint32 leaf_count() const { return m_leaf_count; }
+ virtual uint32 length() const { return m_length; }
+
+ virtual uint32 cost() const { return m_length * m_leaf_count; }
+ virtual uint32 cumulative_cost() const
+ {
+ return cost() + m_left->cumulative_cost() + m_right->cumulative_cost();
+ }
+
+ base_node* m_left;
+ base_node* m_right;
+ float m_d_left;
+ float m_d_right;
+ uint32 m_leaf_count;
+ uint32 m_length;
};
struct leaf_node : public base_node
{
- leaf_node(entry& e)
- : m_entry(e)
- {
- m_entry.m_weight = 0;
- }
+ leaf_node(entry& e)
+ : m_entry(e)
+ {
+ m_entry.m_weight = 0;
+ }
- virtual void print(std::ostream& s);
+ virtual void print(std::ostream& s);
- virtual void add_weight(float w)
- {
- m_entry.m_weight += w;
- }
+ virtual void add_weight(float w)
+ {
+ m_entry.m_weight += w;
+ }
- virtual uint32 length() const { return static_cast<uint32>(m_entry.m_seq.length()); }
+ virtual uint32 length() const
+ {
+ return static_cast<uint32>(m_entry.m_seq.length());
+ }
- entry& m_entry;
+ entry& m_entry;
};
class substitution_matrix_family;
@@ -139,11 +146,13 @@ class matrix;
// prototype
void align(
- const joined_node* node,
- std::vector<entry*>& a, std::vector<entry*>& b, std::vector<entry*>& c,
- const substitution_matrix_family& mat_fam,
- float gop, float gep, float magic,
- bool ignorePositions);
+ const joined_node* node,
+ std::vector<entry*>& a, std::vector<entry*>& b, std::vector<entry*>& c,
+ const substitution_matrix_family& mat_fam,
+ float gop, float gep, float magic,
+ bool ignorePositions);
void print_matrix(std::ostream& os,
- const matrix<int8>& tb, const sequence& sx, const sequence& sy);
+ const matrix<int8>& tb, const sequence& sx, const sequence& sy);
+
+#endif
diff --git a/src/buffer.h b/src/buffer.h
index bcc7a0e..8944325 100755
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -1,60 +1,69 @@
// Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
+// Copyright Coos Baakman, Jon Black, Wouter G. Touw & Gert Vriend, Radboud university medical center 2015.
// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
//
// buffer is a thread safe queue
+#ifndef XSSP_BUFFER_H
+#define XSSP_BUFFER_H
+
#pragma once
-#include <deque>
+#include "mas.h"
+
#include <boost/thread/mutex.hpp>
#include <boost/thread/condition.hpp>
+#include <deque>
+
template<class T, uint32 N = 100>
class buffer
{
public:
- buffer() {}
+ buffer() {}
- void put(T inValue);
- T get();
+ void put(T inValue);
+ T get();
private:
- buffer(const buffer&);
- buffer& operator=(const buffer&);
+ buffer(const buffer&);
+ buffer& operator=(const buffer&);
- std::deque<T> m_queue;
- boost::mutex m_mutex;
- boost::condition m_empty, m_full;
+ std::deque<T> m_queue;
+ boost::mutex m_mutex;
+ boost::condition m_empty, m_full;
};
template<class T, uint32 N>
void buffer<T,N>::put(T inValue)
{
- boost::mutex::scoped_lock lock(m_mutex);
+ boost::mutex::scoped_lock lock(m_mutex);
- while (m_queue.size() >= N)
- m_full.wait(lock);
-
- m_queue.push_back(inValue);
+ while (m_queue.size() >= N)
+ m_full.wait(lock);
- m_empty.notify_one();
+ m_queue.push_back(inValue);
+
+ m_empty.notify_one();
}
template<class T, uint32 N>
T buffer<T,N>::get()
{
- boost::mutex::scoped_lock lock(m_mutex);
+ boost::mutex::scoped_lock lock(m_mutex);
+
+ while (m_queue.empty())
+ m_empty.wait(lock);
- while (m_queue.empty())
- m_empty.wait(lock);
-
- T result = m_queue.front();
- m_queue.pop_front();
+ T result = m_queue.front();
+ m_queue.pop_front();
- m_full.notify_one();
-
- return result;
+ m_full.notify_one();
+
+ return result;
}
+
+#endif
diff --git a/src/dssp.cpp b/src/dssp.cpp
old mode 100644
new mode 100755
index 6d8ebe1..29c1f82
--- a/src/dssp.cpp
+++ b/src/dssp.cpp
@@ -1,239 +1,249 @@
// Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
+// Copyright Coos Baakman, Jon Black, Wouter G. Touw & Gert Vriend, Radboud university medical center 2015.
// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
#include "mas.h"
+#include "dssp.h"
+#include "structure.h"
+
+#include <boost/bind.hpp>
+#include <boost/date_time/date_clock_device.hpp>
+#include <boost/date_time/gregorian/gregorian.hpp>
+#include <boost/foreach.hpp>
+#include <boost/format.hpp>
+
#if defined(_MSC_VER)
#include <conio.h>
#include <ctype.h>
#endif
-
#include <iostream>
-#include <boost/format.hpp>
-#include <boost/foreach.hpp>
-#define foreach BOOST_FOREACH
-#include <boost/bind.hpp>
-#include <boost/date_time/gregorian/gregorian.hpp>
-#include <boost/date_time/date_clock_device.hpp>
-#include "dssp.h"
-#include "structure.h"
+#define foreach BOOST_FOREACH
-using namespace std;
-string ResidueToDSSPLine(const MResidue& residue)
+std::string ResidueToDSSPLine(const MResidue& residue)
{
-/*
- This is the header line for the residue lines in a DSSP file:
-
- # RESIDUE AA STRUCTURE BP1 BP2 ACC N-H-->O O-->H-N N-H-->O O-->H-N TCO KAPPA ALPHA PHI PSI X-CA Y-CA Z-CA
+/*
+ This is the header line for the residue lines in a DSSP file:
+ # RESIDUE AA STRUCTURE BP1 BP2 ACC N-H-->O O-->H-N N-H-->O O-->H-N TCO KAPPA ALPHA PHI PSI X-CA Y-CA Z-CA CHAIN
*/
- boost::format kDSSPResidueLine(
- "%5.5d%5.5d%1.1s%1.1s %c %c %c%c%c%c%c%c%c%4.4d%4.4d%c%4.4d %11s%11s%11s%11s %6.3f%6.1f%6.1f%6.1f%6.1f %6.1f %6.1f %6.1f");
-
- const MAtom& ca = residue.GetCAlpha();
-
- char code = kResidueInfo[residue.GetType()].code;
- if (residue.GetType() == kCysteine and residue.GetSSBridgeNr() != 0)
- code = 'a' + ((residue.GetSSBridgeNr() - 1) % 26);
-
- char ss;
- switch (residue.GetSecondaryStructure())
- {
- case alphahelix: ss = 'H'; break;
- case betabridge: ss = 'B'; break;
- case strand: ss = 'E'; break;
- case helix_3: ss = 'G'; break;
- case helix_5: ss = 'I'; break;
- case turn: ss = 'T'; break;
- case bend: ss = 'S'; break;
- case loop: ss = ' '; break;
- }
-
- char helix[3];
- for (uint32 stride = 3; stride <= 5; ++stride)
- {
- switch (residue.GetHelixFlag(stride))
- {
- case helixNone: helix[stride - 3] = ' '; break;
- case helixStart: helix[stride - 3] = '>'; break;
- case helixEnd: helix[stride - 3] = '<'; break;
- case helixStartAndEnd: helix[stride - 3] = 'X'; break;
- case helixMiddle: helix[stride - 3] = '0' + stride; break;
- }
- }
-
- char bend = ' ';
- if (residue.IsBend())
- bend = 'S';
-
- double alpha;
- char chirality;
- tr1::tie(alpha,chirality) = residue.Alpha();
-
- uint32 bp[2] = {};
- char bridgelabel[2] = { ' ', ' ' };
- for (uint32 i = 0; i < 2; ++i)
- {
- MBridgeParner p = residue.GetBetaPartner(i);
- if (p.residue != nullptr)
- {
- bp[i] = p.residue->GetNumber();
- bp[i] %= 10000; // won't fit otherwise...
- bridgelabel[i] = 'A' + p.ladder % 26;
- if (p.parallel)
- bridgelabel[i] = tolower(bridgelabel[i]);
- }
- }
-
- char sheet = ' ';
- if (residue.GetSheet() != 0)
- sheet = 'A' + (residue.GetSheet() - 1) % 26;
-
- string NHO[2], ONH[2];
- const HBond* acceptors = residue.Acceptor();
- const HBond* donors = residue.Donor();
- for (uint32 i = 0; i < 2; ++i)
- {
- NHO[i] = ONH[i] = "0, 0.0";
-
- if (acceptors[i].residue != nullptr)
- {
- int32 d = acceptors[i].residue->GetNumber() - residue.GetNumber();
- NHO[i] = (boost::format("%d,%3.1f") % d % acceptors[i].energy).str();
- }
-
- if (donors[i].residue != nullptr)
- {
- int32 d = donors[i].residue->GetNumber() - residue.GetNumber();
- ONH[i] = (boost::format("%d,%3.1f") % d % donors[i].energy).str();
- }
- }
-
- return (kDSSPResidueLine % residue.GetNumber() % ca.mResSeq % ca.mICode % ca.mChainID % code %
- ss % helix[0] % helix[1] % helix[2] % bend % chirality % bridgelabel[0] % bridgelabel[1] %
- bp[0] % bp[1] % sheet % floor(residue.Accessibility() + 0.5) %
- NHO[0] % ONH[0] % NHO[1] % ONH[1] %
- residue.TCO() % residue.Kappa() % alpha % residue.Phi() % residue.Psi() %
- ca.mLoc.mX % ca.mLoc.mY % ca.mLoc.mZ).str();
+ boost::format kDSSPResidueLine(
+ "%5.5d%5.5d%1.1s%1.1s %c %c %c%c%c%c%c%c%c%4.4d%4.4d%c%4.4d %11s%11s%11s%11s %6.3f%6.1f%6.1f%6.1f%6.1f %6.1f %6.1f %6.1f %4.4s");
+
+ const MAtom& ca = residue.GetCAlpha();
+
+ char code = kResidueInfo[residue.GetType()].code;
+ if (residue.GetType() == kCysteine and residue.GetSSBridgeNr() != 0)
+ code = 'a' + ((residue.GetSSBridgeNr() - 1) % 26);
+
+ char ss;
+ switch (residue.GetSecondaryStructure())
+ {
+ case alphahelix: ss = 'H'; break;
+ case betabridge: ss = 'B'; break;
+ case strand: ss = 'E'; break;
+ case helix_3: ss = 'G'; break;
+ case helix_5: ss = 'I'; break;
+ case turn: ss = 'T'; break;
+ case bend: ss = 'S'; break;
+ case loop: ss = ' '; break;
+ }
+
+ char helix[3];
+ for (uint32 stride = 3; stride <= 5; ++stride)
+ {
+ switch (residue.GetHelixFlag(stride))
+ {
+ case helixNone: helix[stride - 3] = ' '; break;
+ case helixStart: helix[stride - 3] = '>'; break;
+ case helixEnd: helix[stride - 3] = '<'; break;
+ case helixStartAndEnd: helix[stride - 3] = 'X'; break;
+ case helixMiddle: helix[stride - 3] = '0' + stride; break;
+ }
+ }
+
+ char bend = ' ';
+ if (residue.IsBend())
+ bend = 'S';
+
+ double alpha;
+ char chirality;
+ std::tr1::tie(alpha,chirality) = residue.Alpha();
+
+ uint32 bp[2] = {};
+ char bridgelabel[2] = { ' ', ' ' };
+ for (uint32 i = 0; i < 2; ++i)
+ {
+ MBridgeParner p = residue.GetBetaPartner(i);
+ if (p.residue != nullptr)
+ {
+ bp[i] = p.residue->GetNumber();
+ bp[i] %= 10000; // won't fit otherwise...
+ bridgelabel[i] = 'A' + p.ladder % 26;
+ if (p.parallel)
+ bridgelabel[i] = tolower(bridgelabel[i]);
+ }
+ }
+
+ char sheet = ' ';
+ if (residue.GetSheet() != 0)
+ sheet = 'A' + (residue.GetSheet() - 1) % 26;
+
+ std::string NHO[2], ONH[2];
+ const HBond* acceptors = residue.Acceptor();
+ const HBond* donors = residue.Donor();
+ for (uint32 i = 0; i < 2; ++i)
+ {
+ NHO[i] = ONH[i] = "0, 0.0";
+
+ if (acceptors[i].residue != nullptr)
+ {
+ int32 d = acceptors[i].residue->GetNumber() - residue.GetNumber();
+ NHO[i] = (boost::format("%d,%3.1f") % d % acceptors[i].energy).str();
+ }
+
+ if (donors[i].residue != nullptr)
+ {
+ int32 d = donors[i].residue->GetNumber() - residue.GetNumber();
+ ONH[i] = (boost::format("%d,%3.1f") % d % donors[i].energy).str();
+ }
+ }
+
+ std::string chainChar = ca.mChainID,
+ long_ChainID = "";
+ if (ca.mChainID.length () > 1)
+ {
+ // For mmCIF compatibility
+
+ chainChar = ">";
+ long_ChainID = ca.mChainID;
+ }
+
+ return (kDSSPResidueLine % residue.GetNumber() % ca.mResSeq % ca.mICode % chainChar % code %
+ ss % helix[0] % helix[1] % helix[2] % bend % chirality % bridgelabel[0] % bridgelabel[1] %
+ bp[0] % bp[1] % sheet % floor(residue.Accessibility() + 0.5) %
+ NHO[0] % ONH[0] % NHO[1] % ONH[1] %
+ residue.TCO() % residue.Kappa() % alpha % residue.Phi() % residue.Psi() %
+ ca.mLoc.mX % ca.mLoc.mY % ca.mLoc.mZ % long_ChainID).str();
}
-void WriteDSSP(MProtein& protein, ostream& os)
+void WriteDSSP(MProtein& protein, std::ostream& os)
{
- const string kFirstLine("==== Secondary Structure Definition by the program DSSP, CMBI version by M.L. Hekkelman/2010-10-21 ==== ");
- boost::format kHeaderLine("%1% %|127t|%2%");
-
- using namespace boost::gregorian;
-
- uint32 nrOfResidues, nrOfChains, nrOfSSBridges, nrOfIntraChainSSBridges, nrOfHBonds;
- uint32 nrOfHBondsPerDistance[11] = {};
-
- protein.GetStatistics(nrOfResidues, nrOfChains, nrOfSSBridges, nrOfIntraChainSSBridges, nrOfHBonds, nrOfHBondsPerDistance);
-
- date today = day_clock::local_day();
-
- os << kHeaderLine % (kFirstLine + "DATE=" + to_iso_extended_string(today)) % '.' << endl;
- os << kHeaderLine % "REFERENCE W. KABSCH AND C.SANDER, BIOPOLYMERS 22 (1983) 2577-2637" % '.' << endl;
- os << kHeaderLine % protein.GetHeader() % '.' << endl;
- if (not protein.GetCompound().empty())
- os << kHeaderLine % protein.GetCompound() % '.' << endl;
- if (not protein.GetSource().empty())
- os << kHeaderLine % protein.GetSource() % '.' << endl;
- if (not protein.GetAuthor().empty())
- os << kHeaderLine % protein.GetAuthor() % '.' << endl;
-
- double accessibleSurface = 0; // calculate accessibility as
- foreach (const MChain* chain, protein.GetChains())
- {
- foreach (const MResidue* residue, chain->GetResidues())
- accessibleSurface += residue->Accessibility();
- }
-
- os << boost::format("%5.5d%3.3d%3.3d%3.3d%3.3d TOTAL NUMBER OF RESIDUES, NUMBER OF CHAINS, NUMBER OF SS-BRIDGES(TOTAL,INTRACHAIN,INTERCHAIN) %|127t|%c") %
- nrOfResidues % nrOfChains % nrOfSSBridges % nrOfIntraChainSSBridges % (nrOfSSBridges - nrOfIntraChainSSBridges) % '.' << endl;
- os << kHeaderLine % (boost::format("%8.1f ACCESSIBLE SURFACE OF PROTEIN (ANGSTROM**2)") % accessibleSurface) % '.' << endl;
-
- // hydrogenbond summary
-
- os << kHeaderLine % (
- boost::format("%5.5d%5.1f TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(J) , SAME NUMBER PER 100 RESIDUES")
- % nrOfHBonds % (nrOfHBonds * 100.0 / nrOfResidues)) % '.' << endl;
-
- uint32 nrOfHBondsInParallelBridges = protein.GetNrOfHBondsInParallelBridges();
- os << kHeaderLine % (
- boost::format("%5.5d%5.1f TOTAL NUMBER OF HYDROGEN BONDS IN PARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES")
- % nrOfHBondsInParallelBridges % (nrOfHBondsInParallelBridges * 100.0 / nrOfResidues)) % '.' << endl;
-
- uint32 nrOfHBondsInAntiparallelBridges = protein.GetNrOfHBondsInAntiparallelBridges();
- os << kHeaderLine % (
- boost::format("%5.5d%5.1f TOTAL NUMBER OF HYDROGEN BONDS IN ANTIPARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES")
- % nrOfHBondsInAntiparallelBridges % (nrOfHBondsInAntiparallelBridges * 100.0 / nrOfResidues)) % '.' << endl;
-
- boost::format kHBondsLine("%5.5d%5.1f TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I%c%1.1d), SAME NUMBER PER 100 RESIDUES");
- for (int32 k = 0; k < 11; ++k)
- {
- os << kHeaderLine % (kHBondsLine % nrOfHBondsPerDistance[k] % (nrOfHBondsPerDistance[k] * 100.0 / nrOfResidues) % (k - 5 < 0 ? '-' : '+') % abs(k - 5)) % '.' << endl;
- }
-
- // histograms...
-
- uint32 histogram[kHistogramSize];
- os << " 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 *** HISTOGRAMS OF *** ." << endl;
-
- protein.GetResiduesPerAlphaHelixHistogram(histogram);
- for (uint32 i = 0; i < kHistogramSize; ++i)
- os << boost::format("%3.3d") % histogram[i];
- os << " RESIDUES PER ALPHA HELIX ." << endl;
-
- protein.GetParallelBridgesPerLadderHistogram(histogram);
- for (uint32 i = 0; i < kHistogramSize; ++i)
- os << boost::format("%3.3d") % histogram[i];
- os << " PARALLEL BRIDGES PER LADDER ." << endl;
-
- protein.GetAntiparallelBridgesPerLadderHistogram(histogram);
- for (uint32 i = 0; i < kHistogramSize; ++i)
- os << boost::format("%3.3d") % histogram[i];
- os << " ANTIPARALLEL BRIDGES PER LADDER ." << endl;
-
- protein.GetLaddersPerSheetHistogram(histogram);
- for (uint32 i = 0; i < kHistogramSize; ++i)
- os << boost::format("%3.3d") % histogram[i];
- os << " LADDERS PER SHEET ." << endl;
-
- // per residue information
-
- os << " # RESIDUE AA STRUCTURE BP1 BP2 ACC N-H-->O O-->H-N N-H-->O O-->H-N TCO KAPPA ALPHA PHI PSI X-CA Y-CA Z-CA " << endl;
- boost::format kDSSPResidueLine(
- "%5.5d !%c 0 0 0 0, 0.0 0, 0.0 0, 0.0 0, 0.0 0.000 360.0 360.0 360.0 360.0 0.0 0.0 0.0");
-
- vector<const MResidue*> residues;
-
- foreach (const MChain* chain, protein.GetChains())
- {
- foreach (const MResidue* residue, chain->GetResidues())
- residues.push_back(residue);
- }
-
- // keep residues sorted by residue number as assigned during reading the PDB file
- sort(residues.begin(), residues.end(), boost::bind(&MResidue::GetNumber, _1) < boost::bind(&MResidue::GetNumber, _2));
-
- const MResidue* last = nullptr;
- foreach (const MResidue* residue, residues)
- {
- // insert a break line whenever we detect missing residues
- // can be the transition to a different chain, or missing residues in the current chain
- if (last != nullptr and last->GetNumber() + 1 != residue->GetNumber())
- {
- char breaktype = ' ';
- if (last->GetChainID() != residue->GetChainID())
- breaktype = '*';
- os << (kDSSPResidueLine % (last->GetNumber() + 1) % breaktype) << endl;
- }
- os << ResidueToDSSPLine(*residue) << endl;
- last = residue;
- }
+ const std::string kFirstLine("==== Secondary Structure Definition by the program DSSP, CMBI version 2.0 ==== ");
+ boost::format kHeaderLine("%1% %|127t|%2%");
+
+ using namespace boost::gregorian;
+
+ uint32 nrOfResidues, nrOfChains, nrOfSSBridges, nrOfIntraChainSSBridges, nrOfHBonds;
+ uint32 nrOfHBondsPerDistance[11] = {};
+
+ protein.GetStatistics(nrOfResidues, nrOfChains, nrOfSSBridges, nrOfIntraChainSSBridges, nrOfHBonds, nrOfHBondsPerDistance);
+
+ date today = day_clock::local_day();
+
+ os << kHeaderLine % (kFirstLine + "DATE=" + to_iso_extended_string(today)) % '.' << std::endl;
+ os << kHeaderLine % "REFERENCE W. KABSCH AND C.SANDER, BIOPOLYMERS 22 (1983) 2577-2637" % '.' << std::endl;
+ os << kHeaderLine % protein.GetHeader() % '.' << std::endl;
+ if (not protein.GetCompound().empty())
+ os << kHeaderLine % protein.GetCompound() % '.' << std::endl;
+ if (not protein.GetSource().empty())
+ os << kHeaderLine % protein.GetSource() % '.' << std::endl;
+ if (not protein.GetAuthor().empty())
+ os << kHeaderLine % protein.GetAuthor() % '.' << std::endl;
+
+ double accessibleSurface = 0; // calculate accessibility as
+ foreach (const MChain* chain, protein.GetChains())
+ {
+ foreach (const MResidue* residue, chain->GetResidues())
+ accessibleSurface += residue->Accessibility();
+ }
+
+ os << boost::format("%5.5d%3.3d%3.3d%3.3d%3.3d TOTAL NUMBER OF RESIDUES, NUMBER OF CHAINS, NUMBER OF SS-BRIDGES(TOTAL,INTRACHAIN,INTERCHAIN) %|127t|%c") %
+ nrOfResidues % nrOfChains % nrOfSSBridges % nrOfIntraChainSSBridges % (nrOfSSBridges - nrOfIntraChainSSBridges) % '.' << std::endl;
+ os << kHeaderLine % (boost::format("%8.1f ACCESSIBLE SURFACE OF PROTEIN (ANGSTROM**2)") % accessibleSurface) % '.' << std::endl;
+
+ // hydrogenbond summary
+
+ os << kHeaderLine % (
+ boost::format("%5.5d%5.1f TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(J) , SAME NUMBER PER 100 RESIDUES")
+ % nrOfHBonds % (nrOfHBonds * 100.0 / nrOfResidues)) % '.' << std::endl;
+
+ uint32 nrOfHBondsInParallelBridges = protein.GetNrOfHBondsInParallelBridges();
+ os << kHeaderLine % (
+ boost::format("%5.5d%5.1f TOTAL NUMBER OF HYDROGEN BONDS IN PARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES")
+ % nrOfHBondsInParallelBridges % (nrOfHBondsInParallelBridges * 100.0 / nrOfResidues)) % '.' << std::endl;
+
+ uint32 nrOfHBondsInAntiparallelBridges = protein.GetNrOfHBondsInAntiparallelBridges();
+ os << kHeaderLine % (
+ boost::format("%5.5d%5.1f TOTAL NUMBER OF HYDROGEN BONDS IN ANTIPARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES")
+ % nrOfHBondsInAntiparallelBridges % (nrOfHBondsInAntiparallelBridges * 100.0 / nrOfResidues)) % '.' << std::endl;
+
+ boost::format kHBondsLine("%5.5d%5.1f TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I%c%1.1d), SAME NUMBER PER 100 RESIDUES");
+ for (int32 k = 0; k < 11; ++k)
+ {
+ os << kHeaderLine % (kHBondsLine % nrOfHBondsPerDistance[k] % (nrOfHBondsPerDistance[k] * 100.0 / nrOfResidues) % (k - 5 < 0 ? '-' : '+') % abs(k - 5)) % '.' << std::endl;
+ }
+
+ // histograms...
+
+ uint32 histogram[kHistogramSize];
+ os << " 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 *** HISTOGRAMS OF *** ." << std::endl;
+
+ protein.GetResiduesPerAlphaHelixHistogram(histogram);
+ for (uint32 i = 0; i < kHistogramSize; ++i)
+ os << boost::format("%3.3d") % histogram[i];
+ os << " RESIDUES PER ALPHA HELIX ." << std::endl;
+
+ protein.GetParallelBridgesPerLadderHistogram(histogram);
+ for (uint32 i = 0; i < kHistogramSize; ++i)
+ os << boost::format("%3.3d") % histogram[i];
+ os << " PARALLEL BRIDGES PER LADDER ." << std::endl;
+
+ protein.GetAntiparallelBridgesPerLadderHistogram(histogram);
+ for (uint32 i = 0; i < kHistogramSize; ++i)
+ os << boost::format("%3.3d") % histogram[i];
+ os << " ANTIPARALLEL BRIDGES PER LADDER ." << std::endl;
+
+ protein.GetLaddersPerSheetHistogram(histogram);
+ for (uint32 i = 0; i < kHistogramSize; ++i)
+ os << boost::format("%3.3d") % histogram[i];
+ os << " LADDERS PER SHEET ." << std::endl;
+
+ // per residue information
+
+ os << " # RESIDUE AA STRUCTURE BP1 BP2 ACC N-H-->O O-->H-N N-H-->O O-->H-N TCO KAPPA ALPHA PHI PSI X-CA Y-CA Z-CA CHAIN" << std::endl;
+ boost::format kDSSPResidueLine(
+ "%5.5d !%c 0 0 0 0, 0.0 0, 0.0 0, 0.0 0, 0.0 0.000 360.0 360.0 360.0 360.0 0.0 0.0 0.0");
+
+ std::vector<const MResidue*> residues;
+
+ foreach (const MChain* chain, protein.GetChains())
+ {
+ foreach (const MResidue* residue, chain->GetResidues())
+ residues.push_back(residue);
+ }
+
+ // keep residues sorted by residue number as assigned during reading the PDB file
+ sort(residues.begin(), residues.end(), boost::bind(&MResidue::GetNumber, _1) < boost::bind(&MResidue::GetNumber, _2));
+
+ const MResidue* last = nullptr;
+ foreach (const MResidue* residue, residues)
+ {
+ // insert a break line whenever we detect missing residues
+ // can be the transition to a different chain, or missing residues in the current chain
+ if (last != nullptr and last->GetNumber() + 1 != residue->GetNumber())
+ {
+ char breaktype = ' ';
+ if (last->GetChainID() != residue->GetChainID())
+ breaktype = '*';
+ os << (kDSSPResidueLine % (last->GetNumber() + 1) % breaktype) << std::endl;
+ }
+ os << ResidueToDSSPLine(*residue) << std::endl;
+ last = residue;
+ }
}
diff --git a/src/dssp.h b/src/dssp.h
old mode 100644
new mode 100755
index f0a3b8e..947023f
--- a/src/dssp.h
+++ b/src/dssp.h
@@ -1,7 +1,11 @@
// Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
+// Copyright Coos Baakman, Jon Black, Wouter G. Touw & Gert Vriend, Radboud university medical center 2015.
// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef XSSP_DSSP_H
+#define XSSP_DSSP_H
#pragma once
@@ -15,3 +19,5 @@ std::string ResidueToDSSPLine(const MResidue& residue);
// Write a complete DSSP file for a protein
void WriteDSSP(MProtein& protein, std::ostream& os);
+
+#endif
diff --git a/src/iocif.cpp b/src/iocif.cpp
old mode 100644
new mode 100755
index e0d5614..e5c8dd7
--- a/src/iocif.cpp
+++ b/src/iocif.cpp
@@ -1,448 +1,450 @@
-#include "mas.h"
+#include "iocif.h"
-#include <cassert>
+#include "utils.h"
+#include <boost/algorithm/string.hpp>
+#include <boost/foreach.hpp>
+#include <boost/iostreams/copy.hpp>
+#include <boost/iostreams/device/back_inserter.hpp>
+
+#include <cassert>
#include <iostream>
#include <string>
-#include <vector>
-#include <boost/iostreams/copy.hpp>
-#include <boost/iostreams/device/back_inserter.hpp>
-#include <boost/foreach.hpp>
#define foreach BOOST_FOREACH
-#include <boost/algorithm/string.hpp>
-
-#include "iocif.h"
-#include "utils.h"
-using namespace std;
namespace io = boost::iostreams;
namespace ba = boost::algorithm;
-// Our CIF implementation consists of flyweight classes.
+// Our CIF implementation consists of flyweight classes.
namespace mmCIF
{
// skip routines to quickly position character pointer p at a next interesting location
const char* skip_line(const char* p, const char* end);
-const char* skip_white(const char* p, const char* end); // skip over white-space and comments
+// skip over white-space and comments
+const char* skip_white(const char* p, const char* end);
const char* skip_value(const char* p, const char* end);
-string row::operator[](const char* inName) const
+std::string row::operator[](const char* inName) const
{
- string result;
-
- foreach (const field& f, m_fields)
- {
- if (strncmp(inName, f.m_name, f.m_name_end - f.m_name) == 0)
- {
- result = f.value();
- break;
- }
- }
-
- return result;
+ std::string result;
+
+ foreach (const field& f, m_fields)
+ {
+ if (strncmp(inName, f.m_name, f.m_name_end - f.m_name) == 0)
+ {
+ result = f.value();
+ break;
+ }
+ }
+
+ return result;
}
row record::front() const
{
- row result;
- result.m_data = m_start;
- result.m_field = 0;
-
- const char* p = m_start;
-
- if (m_loop)
- {
- for (uint32 i = 0; i < m_field_count; ++i)
- {
- assert(*p == '_');
- assert(*(p + m_name.length()) == '.');
-
- field field = {};
-
- field.m_name = p = p + m_name.length() + 1;
- while (p != m_end and not isspace(*p))
- ++p;
-
- field.m_name_end = p;
-
- p = skip_white(p, m_end);
-
- result.m_fields.push_back(field);
- }
-
- foreach (field& fld, result.m_fields)
- {
- fld.m_data = skip_white(p, m_end);
- fld.m_data_end = skip_value(fld.m_data, m_end);
- p = skip_white(fld.m_data_end, m_end);
- }
- }
- else
- {
- for (uint32 i = 0; i < m_field_count; ++i)
- {
- assert(*p == '_');
- assert(*(p + m_name.length()) == '.');
-
- field field;
- field.m_name = p = p + m_name.length() + 1;
- while (p != m_end and not isspace(*p))
- ++p;
-
- field.m_name_end = p;
-
- p = skip_white(p, m_end);
- field.m_data = p;
-
- p = skip_value(p, m_end);
- field.m_data_end = p;
- p = skip_white(p, m_end);
-
- result.m_fields.push_back(field);
- }
- }
-
- return result;
+ row result;
+ result.m_data = m_start;
+ result.m_field = 0;
+
+ const char* p = m_start;
+
+ if (m_loop)
+ {
+ for (uint32 i = 0; i < m_field_count; ++i)
+ {
+ assert(*p == '_');
+ assert(*(p + m_name.length()) == '.');
+
+ field field = {};
+
+ field.m_name = p = p + m_name.length() + 1;
+ while (p != m_end and not isspace(*p))
+ ++p;
+
+ field.m_name_end = p;
+
+ p = skip_white(p, m_end);
+
+ result.m_fields.push_back(field);
+ }
+
+ foreach (field& fld, result.m_fields)
+ {
+ fld.m_data = skip_white(p, m_end);
+ fld.m_data_end = skip_value(fld.m_data, m_end);
+ p = skip_white(fld.m_data_end, m_end);
+ }
+ }
+ else
+ {
+ for (uint32 i = 0; i < m_field_count; ++i)
+ {
+ assert(*p == '_');
+ assert(*(p + m_name.length()) == '.');
+
+ field field;
+ field.m_name = p = p + m_name.length() + 1;
+ while (p != m_end and not isspace(*p))
+ ++p;
+
+ field.m_name_end = p;
+
+ p = skip_white(p, m_end);
+ field.m_data = p;
+
+ p = skip_value(p, m_end);
+ field.m_data_end = p;
+ p = skip_white(p, m_end);
+
+ result.m_fields.push_back(field);
+ }
+ }
+
+ return result;
}
record::iterator record::begin() const
{
- return const_iterator(*this, front());
+ return const_iterator(*this, front());
}
record::iterator record::end() const
{
- row end = { m_start, -1 };
- return const_iterator(*this, end);
+ row end = { m_start, -1 };
+ return const_iterator(*this, end);
}
void record::advance(row& row) const
{
- if (m_loop and not row.m_fields.empty())
- {
- const char* p = skip_white(row.m_fields.back().m_data_end, m_end);
-
- if (p >= m_end)
- {
- row.m_fields.clear();
- row.m_field = -1;
- }
- else
- {
- foreach (field& fld, row.m_fields)
- {
- fld.m_data = skip_white(p, m_end);
- fld.m_data_end = skip_value(fld.m_data, m_end);
- p = skip_white(fld.m_data_end, m_end);
- }
-
- row.m_field += 1;
- }
- }
- else
- {
- row.m_fields.clear();
- row.m_field = -1;
- }
+ if (m_loop and not row.m_fields.empty())
+ {
+ const char* p = skip_white(row.m_fields.back().m_data_end, m_end);
+
+ if (p >= m_end)
+ {
+ row.m_fields.clear();
+ row.m_field = -1;
+ }
+ else
+ {
+ foreach (field& fld, row.m_fields)
+ {
+ fld.m_data = skip_white(p, m_end);
+ fld.m_data_end = skip_value(fld.m_data, m_end);
+ p = skip_white(fld.m_data_end, m_end);
+ }
+
+ row.m_field += 1;
+ }
+ }
+ else
+ {
+ row.m_fields.clear();
+ row.m_field = -1;
+ }
}
-string record::get_joined(const char* inName, const char* inDelimiter) const
+std::string record::get_joined(const char* inName,
+ const char* inDelimiter) const
{
- string result;
-
- for (iterator i = begin(); i != end(); ++i)
- {
- string s = i->operator[](inName);
- ba::trim(s);
- result = (result.empty() ? result : result + inDelimiter) + s;
- }
-
- return result;
+ std::string result;
+
+ for (iterator i = begin(); i != end(); ++i)
+ {
+ std::string s = i->operator[](inName);
+ ba::trim(s);
+ result = (result.empty() ? result : result + inDelimiter) + s;
+ }
+
+ return result;
}
-file::file(istream& is)
+file::file(std::istream& is)
{
- // first extract data into a buffer
- m_buffer.reserve(10 * 1024 * 1024); // reserve 10 MB, should be sufficient for most
-
- io::copy(is, io::back_inserter(m_buffer));
-
- m_data = &m_buffer[0];
- m_end = m_data + m_buffer.size();
-
- m_buffer.push_back(0); // end with a null character, makes coding a bit easier
-
- // CIF files are simple to parse
-
- const char* p = m_data;
-
- if (strncmp(p, "data_", 5) != 0)
- throw mas_exception("Is this an mmCIF file?");
-
- p = skip_line(p, m_end);
-
- record rec = { p };
- bool loop = false;
-
- while (p < m_end)
- {
- if (isspace(*p)) // skip over white space
- {
- ++p;
- continue;
- }
-
- if (*p == '#') // line starting with hash, this is a comment, skip
- {
- p = skip_line(p, m_end);
- continue;
- }
-
- if (strncmp(p, "loop_", 5) == 0)
- {
- if (not m_records.empty() and m_records.back().m_end == nullptr)
- m_records.back().m_end = p;
-
- loop = true;
- rec.m_loop = false;
- p = skip_line(p + 5, m_end);
-
- continue;
- }
-
- const char* s = p;
-
- if (*p == '_') // a label
- {
- // scan for first dot
- bool newName = loop;
- const char* n = rec.m_start;
-
- for (;;)
- {
- if (not newName and *p != *n)
- newName = true;
-
- ++p;
- ++n;
-
- if (p == m_end or *p == '.' or isspace(*p))
- break;
- }
-
- if (*p == '.') // OK, found a record
- {
- if (newName)
- {
- // store start as end for the previous record, if any
- if (not m_records.empty() and m_records.back().m_end == nullptr)
- m_records.back().m_end = s;
-
- rec.m_start = s;
- rec.m_end = nullptr;
- rec.m_loop = loop;
- rec.m_field_count = 1;
- rec.m_name = string(s, p);
-
- m_records.push_back(rec);
- }
- else
- m_records.back().m_field_count += 1;
-
- // skip over field name
- while (p != m_end and not isspace(*p))
- ++p;
- }
- else
- {
- // store start as end for the previous record, if any
- if (not m_records.empty() and m_records.back().m_end == nullptr)
- m_records.back().m_end = s;
-
- // a record without a field (is that possible in mmCIF?)
- cerr << "record without field: " << string(s, p) << endl;
-
- rec.m_start = s;
- rec.m_end = nullptr;
- rec.m_loop = loop;
- rec.m_field_count = 0;
- rec.m_name = string(s, p);
-
- m_records.push_back(rec);
- }
-
- if (not rec.m_loop)
- p = skip_value(p, m_end);
-
- loop = false;
- continue;
- }
-
- if (rec.m_loop == false)
- {
- // guess we should never reach this point
- throw mas_exception("invalid CIF file? (unexpected data, not in loop)");
- }
-
- p = skip_value(p, m_end);
- p = skip_white(p, m_end);
-
- // check for a new data_ block
- if (p != m_end and strncmp(p, "data_", 5) == 0)
- throw mas_exception("Multiple data blocks in CIF file");
- }
-
- if (not m_records.empty() and m_records.back().m_end == nullptr)
- m_records.back().m_end = p;
-
- sort(m_records.begin(), m_records.end());
+ // first extract data into a buffer
+ m_buffer.reserve(10 * 1024 * 1024); // reserve 10 MB, should be sufficient
+
+ io::copy(is, io::back_inserter(m_buffer));
+
+ m_data = &m_buffer[0];
+ m_end = m_data + m_buffer.size();
+
+ m_buffer.push_back(0); // end with a null character, makes coding easier
+
+ // CIF files are simple to parse
+
+ const char* p = m_data;
+
+ if (strncmp(p, "data_", 5) != 0)
+ throw mas_exception("Is this an mmCIF file?");
+
+ p = skip_line(p, m_end);
+
+ record rec = { p };
+ bool loop = false;
+
+ while (p < m_end)
+ {
+ if (isspace(*p)) // skip over white space
+ {
+ ++p;
+ continue;
+ }
+
+ if (*p == '#') // line starting with hash, this is a comment, skip
+ {
+ p = skip_line(p, m_end);
+ continue;
+ }
+
+ if (strncmp(p, "loop_", 5) == 0)
+ {
+ if (not m_records.empty() and m_records.back().m_end == nullptr)
+ m_records.back().m_end = p;
+
+ loop = true;
+ rec.m_loop = false;
+ p = skip_line(p + 5, m_end);
+
+ continue;
+ }
+
+ const char* s = p;
+
+ if (*p == '_') // a label
+ {
+ // scan for first dot
+ bool newName = loop;
+ const char* n = rec.m_start;
+
+ for (;;)
+ {
+ if (not newName and *p != *n)
+ newName = true;
+
+ ++p;
+ ++n;
+
+ if (p == m_end or *p == '.' or isspace(*p))
+ break;
+ }
+
+ if (*p == '.') // OK, found a record
+ {
+ if (newName)
+ {
+ // store start as end for the previous record, if any
+ if (not m_records.empty() and m_records.back().m_end == nullptr)
+ m_records.back().m_end = s;
+
+ rec.m_start = s;
+ rec.m_end = nullptr;
+ rec.m_loop = loop;
+ rec.m_field_count = 1;
+ rec.m_name = std::string(s, p);
+
+ m_records.push_back(rec);
+ }
+ else
+ m_records.back().m_field_count += 1;
+
+ // skip over field name
+ while (p != m_end and not isspace(*p))
+ ++p;
+ }
+ else
+ {
+ // store start as end for the previous record, if any
+ if (not m_records.empty() and m_records.back().m_end == nullptr)
+ m_records.back().m_end = s;
+
+ // a record without a field (is that possible in mmCIF?)
+ std::cerr << "record without field: " << std::string(s, p)
+ << std::endl;
+
+ rec.m_start = s;
+ rec.m_end = nullptr;
+ rec.m_loop = loop;
+ rec.m_field_count = 0;
+ rec.m_name = std::string(s, p);
+
+ m_records.push_back(rec);
+ }
+
+ if (not rec.m_loop)
+ p = skip_value(p, m_end);
+
+ loop = false;
+ continue;
+ }
+
+ if (rec.m_loop == false)
+ {
+ // guess we should never reach this point
+ throw mas_exception("invalid CIF file? (unexpected data, not in loop)");
+ }
+
+ p = skip_value(p, m_end);
+ p = skip_white(p, m_end);
+
+ // check for a new data_ block
+ if (p != m_end and strncmp(p, "data_", 5) == 0)
+ throw mas_exception("Multiple data blocks in CIF file");
+ }
+
+ if (not m_records.empty() and m_records.back().m_end == nullptr)
+ m_records.back().m_end = p;
+
+ sort(m_records.begin(), m_records.end());
}
record file::operator[](const char* inName) const
{
- record result = {};
- result.m_name = inName;
-
- vector<record>::const_iterator i = lower_bound(m_records.begin(), m_records.end(), result);
- if (i != m_records.end() and i->m_name == inName)
- result = *i;
-
- return result;
+ record result = {};
+ result.m_name = inName;
+
+ std::vector<record>::const_iterator i = lower_bound(m_records.begin(),
+ m_records.end(), result);
+ if (i != m_records.end() and i->m_name == inName)
+ result = *i;
+
+ return result;
}
-string file::get(const char* inName) const
+std::string file::get(const char* inName) const
{
- const char* p = strchr(inName, '.');
- assert(p != nullptr);
- if (p == nullptr)
- throw logic_error("incorrect name");
-
- record r = operator[](string(inName, p).c_str());
- return r.front()[string(p + 1).c_str()];
+ const char* p = strchr(inName, '.');
+ assert(p != nullptr);
+ if (p == nullptr)
+ throw std::logic_error("incorrect name");
+
+ record r = operator[](std::string(inName, p).c_str());
+ return r.front()[std::string(p + 1).c_str()];
}
-string file::get_joined(const char* inName, const char* inDelimiter) const
+std::string file::get_joined(const char* inName, const char* inDelimiter) const
{
- const char* p = strchr(inName, '.');
- assert(p != nullptr);
- if (p == nullptr)
- throw logic_error("incorrect name");
-
- record test;
- test.m_name.assign(inName, p);
-
- string result;
-
- vector<record>::const_iterator i = lower_bound(m_records.begin(), m_records.end(), test);
- if (i != m_records.end() and i->m_name == test.m_name)
- result = i->get_joined(p + 1, inDelimiter);
-
- return result;
+ const char* p = strchr(inName, '.');
+ assert(p != nullptr);
+ if (p == nullptr)
+ throw std::logic_error("incorrect name");
+
+ record test;
+ test.m_name.assign(inName, p);
+
+ std::string result;
+
+ std::vector<record>::const_iterator i = lower_bound(m_records.begin(),
+ m_records.end(), test);
+ if (i != m_records.end() and i->m_name == test.m_name)
+ result = i->get_joined(p + 1, inDelimiter);
+
+ return result;
}
// skip to first character after the next NL character
const char* skip_line(const char* p, const char* end)
{
- while (p != end)
- {
- if (*p++ == '\n')
- break;
- }
+ while (p != end)
+ {
+ if (*p++ == '\n')
+ break;
+ }
- return p;
+ return p;
}
// skip over white space and comments
const char* skip_white(const char* p, const char* end)
{
- while (p != end)
- {
- if (isspace(*p))
- {
- ++p;
- continue;
- }
-
- if (*p == '#')
- {
- do ++p; while (p < end and *p != '\n');
- continue;
- }
-
- break;
- }
-
- return p;
+ while (p != end)
+ {
+ if (isspace(*p))
+ {
+ ++p;
+ continue;
+ }
+
+ if (*p == '#')
+ {
+ do ++p; while (p < end and *p != '\n');
+ continue;
+ }
+
+ break;
+ }
+
+ return p;
}
// skip over values for a record
const char* skip_value(const char* p, const char* end)
{
- for (;;)
- {
- if (isspace(*p))
- {
- ++p;
- continue;
- }
-
- if (*p == ';' and *(p - 1) == '\n')
- {
- do p = skip_line(p, end); while (p < end and *p != ';');
- ++p;
- break;
- }
-
- if (*p == '\'')
- {
- do ++p; while (p != end and *p != '\'');
- ++p;
- break;
- }
-
- if (*p == '\"')
- {
- do ++p; while (p != end and *p != '\"');
- ++p;
- break;
- }
-
- if (*p == '#')
- {
- p = skip_line(p, end);
- continue;
- }
-
- while (p != end and not isspace(*p))
- ++p;
-
- break;
- }
-
- return p;
+ for (;;)
+ {
+ if (isspace(*p))
+ {
+ ++p;
+ continue;
+ }
+
+ if (*p == ';' and *(p - 1) == '\n')
+ {
+ do p = skip_line(p, end); while (p < end and *p != ';');
+ ++p;
+ break;
+ }
+
+ if (*p == '\'')
+ {
+ do ++p; while (p != end and *p != '\'');
+ ++p;
+ break;
+ }
+
+ if (*p == '\"')
+ {
+ do ++p; while (p != end and *p != '\"');
+ ++p;
+ break;
+ }
+
+ if (*p == '#')
+ {
+ p = skip_line(p, end);
+ continue;
+ }
+
+ while (p != end and not isspace(*p))
+ ++p;
+
+ break;
+ }
+
+ return p;
}
-
+
}
//void ReadCIF(std::istream& in, MProtein& out)
//{
-// file cif(&buffer[0], buffer.size() - 1);
-//
-//
+// file cif(&buffer[0], buffer.size() - 1);
+//
+//
+//
+// cout << "id: " << cif["_entry"].front()["id"].value() << std::endl;
+//
+// foreach (const row& row, cif["_atom_type"])
+// {
+// cout << row["symbol"].value() << std::endl;
+// }
//
-// cout << "id: " << cif["_entry"].front()["id"].value() << endl;
-//
-// foreach (const row& row, cif["_atom_type"])
-// {
-// cout << row["symbol"].value() << endl;
-// }
-//
-// foreach (const row& row, cif["_atom_site"])
-// {
-// cout << "ATOM " << row["Cartn_x"].value() << ' ' << row["Cartn_y"].value() << ' ' << row["Cartn_z"].value() << endl;
-// }
+// foreach (const row& row, cif["_atom_site"])
+// {
+// cout << "ATOM " << row["Cartn_x"].value() << ' ' << row["Cartn_y"].value() << ' ' << row["Cartn_z"].value() << std::endl;
+// }
//}
//
diff --git a/src/iocif.h b/src/iocif.h
old mode 100644
new mode 100755
index eafdb16..4acbd16
--- a/src/iocif.h
+++ b/src/iocif.h
@@ -1,125 +1,168 @@
+#ifndef XSSP_IOCIF_H
+#define XSSP_IOCIF_H
+
#pragma once
+#include "mas.h"
+
#include <iostream>
+#include <vector>
-// Our CIF implementation consists of flyweight classes.
+// Our CIF implementation consists of flyweight classes.
namespace mmCIF
{
struct field
{
- std::string name() const
- {
- return std::string(m_name, m_name_end);
- }
-
- std::string value() const
- {
- std::string result;
-
- if (m_data[0] == '\'' and m_data_end > m_data and m_data_end[-1] == '\'')
- result = std::string(m_data + 1, m_data_end - 1);
- else if (m_data[0] == '"' and m_data_end > m_data and m_data_end[-1] == '"')
- result = std::string(m_data + 1, m_data_end - 1);
- else if (m_data[0] == ';' and m_data_end > m_data and m_data_end[-1] == ';')
- result = std::string(m_data + 1, m_data_end - 1);
- else
- result = std::string(m_data, m_data_end);
-
- return result;
- }
-
- const char* m_name;
- const char* m_name_end;
- const char* m_data;
- const char* m_data_end;
+ std::string name() const
+ {
+ return std::string(m_name, m_name_end);
+ }
+
+ std::string value() const
+ {
+ std::string result;
+
+ if (m_data[0] == '\'' and m_data_end > m_data and m_data_end[-1] == '\'')
+ result = std::string(m_data + 1, m_data_end - 1);
+ else if (m_data[0] == '"' and
+ m_data_end > m_data and
+ m_data_end[-1] == '"')
+ result = std::string(m_data + 1, m_data_end - 1);
+ else if (m_data[0] == ';' and
+ m_data_end > m_data and
+ m_data_end[-1] == ';')
+ result = std::string(m_data + 1, m_data_end - 1);
+ else
+ result = std::string(m_data, m_data_end);
+
+ return result;
+ }
+
+ const char* m_name;
+ const char* m_name_end;
+ const char* m_data;
+ const char* m_data_end;
};
struct row
{
- std::string operator[](const char* inName) const;
-
- bool operator==(const row& rhs) const
- {
- return m_data == rhs.m_data and m_field == rhs.m_field;
- }
-
- const char* m_data;
- int32 m_field;
- std::vector<field> m_fields;
+ std::string operator[](const char* inName) const;
+
+ bool operator==(const row& rhs) const
+ {
+ return m_data == rhs.m_data and m_field == rhs.m_field;
+ }
+
+ const char* m_data;
+ int32 m_field;
+ std::vector<field> m_fields;
};
struct record
{
- std::string name() const
- {
- return m_name;
- }
-
- struct const_iterator : public std::iterator<std::forward_iterator_tag, const row>
- {
- typedef std::iterator<std::forward_iterator_tag, const row> base_type;
- typedef base_type::reference reference;
- typedef base_type::pointer pointer;
-
- const_iterator(const record& rec, const row& row) : m_rec(rec), m_row(row) {}
- const_iterator(const const_iterator& iter) : m_rec(iter.m_rec), m_row(iter.m_row) {}
- const_iterator& operator=(const const_iterator& iter) { m_row = iter.m_row; return *this; }
-
- reference operator*() const { return m_row; }
- pointer operator->() const { return &m_row; }
-
- const_iterator& operator++() { m_rec.advance(m_row); return *this; }
- const_iterator operator++(int) { const_iterator iter(*this); operator++(); return iter; }
-
- bool operator==(const const_iterator& iter) const { return m_row == iter.m_row; }
- bool operator!=(const const_iterator& iter) const { return not operator==(iter); }
-
- private:
- const record& m_rec;
- row m_row;
- };
-
- typedef const_iterator iterator;
-
- row front() const;
- row back() const;
-
- const_iterator begin() const;
- const_iterator end() const;
-
- void advance(row& row) const; // update pointers to next data row, if any
-
- bool operator<(const record& rhs) const
- {
- return m_name < rhs.m_name;
- }
-
- std::string get_joined(const char* inFieldName, const char* inDelimiter) const;
-
- const char* m_start;
- const char* m_end;
- bool m_loop;
- uint32 m_field_count;
- std::string m_name;
+ std::string name() const
+ {
+ return m_name;
+ }
+
+ struct const_iterator : public std::iterator<std::forward_iterator_tag,
+ const row>
+ {
+ typedef std::iterator<std::forward_iterator_tag, const row> base_type;
+ typedef base_type::reference reference;
+ typedef base_type::pointer pointer;
+
+ const_iterator(const record& rec, const row& row)
+ : m_rec(rec),
+ m_row(row)
+ {}
+
+ const_iterator(const const_iterator& iter)
+ : m_rec(iter.m_rec),
+ m_row(iter.m_row)
+ {}
+
+ const_iterator& operator=(const const_iterator& iter)
+ {
+ m_row = iter.m_row;
+ return *this;
+ }
+
+ reference operator*() const { return m_row; }
+ pointer operator->() const { return &m_row; }
+
+ const_iterator& operator++()
+ {
+ m_rec.advance(m_row);
+ return *this;
+ }
+
+ const_iterator operator++(int)
+ {
+ const_iterator iter(*this);
+ operator++();
+ return iter;
+ }
+
+ bool operator==(const const_iterator& iter) const
+ {
+ return m_row == iter.m_row;
+ }
+
+ bool operator!=(const const_iterator& iter) const
+ {
+ return not operator==(iter);
+ }
+
+ private:
+ const record& m_rec;
+ row m_row;
+ };
+
+ typedef const_iterator iterator;
+
+ row front() const;
+ row back() const;
+
+ const_iterator begin() const;
+ const_iterator end() const;
+
+ void advance(row& row) const; // update pointers to next data row, if any
+
+ bool operator<(const record& rhs) const
+ {
+ return m_name < rhs.m_name;
+ }
+
+ std::string get_joined(const char* inFieldName,
+ const char* inDelimiter) const;
+
+ const char* m_start;
+ const char* m_end;
+ bool m_loop;
+ uint32 m_field_count;
+ std::string m_name;
};
class file
{
public:
- file(std::istream& is);
+ file(std::istream& is);
- record operator[](const char* inName) const;
-
- std::string get(const char* inName) const;
- std::string get_joined(const char* inName, const char* inDelimiter) const;
+ record operator[](const char* inName) const;
+
+ std::string get(const char* inName) const;
+ std::string get_joined(const char* inName, const char* inDelimiter) const;
private:
- std::vector<char> m_buffer;
- std::vector<record> m_records;
- const char* m_data;
- const char* m_end;
+ std::vector<char> m_buffer;
+ std::vector<record> m_records;
+ const char* m_data;
+ const char* m_end;
};
-
+
}
+
+#endif
diff --git a/src/mas.cpp b/src/mas.cpp
old mode 100644
new mode 100755
index a2713b8..f0eb6b8
--- a/src/mas.cpp
+++ b/src/mas.cpp
@@ -1,40 +1,44 @@
// Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
+// Copyright Coos Baakman, Jon Black, Wouter G. Touw & Gert Vriend, Radboud university medical center 2015.
// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
-//
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
// mas.cpp - simple attempt to write a multiple sequence alignment application
#include "mas.h"
-#include <algorithm>
#include <boost/foreach.hpp>
+
+#include <algorithm>
+
#define foreach BOOST_FOREACH
-using namespace std;
+int VERBOSE = 0;
+
// 22 real letters and 1 dummy
const char kResidues[] = "ACDEFGHIKLMNPQRSTVWYBZX";
const uint8 kResidueNrTable[] = {
-// A B C D E F G H I K L M N P Q R S T U=X V W X Y Z
-// 0, 1, 2, 3, 4, 5, 6, 7, 8, 23, 9, 10, 11, 12, 23, 13, 14, 15, 16, 17, 22, 18, 19, 22, 20, 21
- 0, 20, 1, 2, 3, 4, 5, 6, 7, 23, 8, 9, 10, 11, 23, 12, 13, 14, 15, 16, 22, 17, 18, 22, 19, 21
+// A B C D E F G H I K L M N P Q R S T U=X V W X Y Z
+// 0, 1, 2, 3, 4, 5, 6, 7, 8, 23, 9, 10, 11, 12, 23, 13, 14, 15, 16, 17, 22, 18, 19, 22, 20, 21
+ 0, 20, 1, 2, 3, 4, 5, 6, 7, 23, 8, 9, 10, 11, 23, 12, 13, 14, 15, 16, 22, 17, 18, 22, 19, 21
};
-sequence encode(const string& s)
+sequence encode(const std::string& s)
{
- sequence result(s.length(), 0);
- for (unsigned int i = 0; i < s.length(); ++i)
- result[i] = is_gap(s[i]) ? '-' : ResidueNr(s[i]);
- return result;
+ sequence result(s.length(), 0);
+ for (unsigned int i = 0; i < s.length(); ++i)
+ result[i] = is_gap(s[i]) ? '-' : ResidueNr(s[i]);
+ return result;
}
-string decode(const sequence& s)
+std::string decode(const sequence& s)
{
- string result(s.length(), 0);
- for (unsigned int i = 0; i < s.length(); ++i)
- result[i] = s[i] >= 23 ? '.' : kResidues[s[i]];
- return result;
+ std::string result(s.length(), 0);
+ for (unsigned int i = 0; i < s.length(); ++i)
+ result[i] = s[i] >= 23 ? '.' : kResidues[s[i]];
+ return result;
}
//#include <iostream>
@@ -68,7 +72,6 @@ string decode(const sequence& s)
//#include "structure.h"
//#include "align-3d.h"
-using namespace std;
//using namespace tr1;
//namespace fs = boost::filesystem;
//namespace po = boost::program_options;
@@ -76,7 +79,7 @@ using namespace std;
//namespace ba = boost::algorithm;
//
//int VERBOSE = 0,
-// MULTI_THREADED = 1;
+// MULTI_THREADED = 1;
//
//// --------------------------------------------------------------------
//
@@ -85,117 +88,117 @@ using namespace std;
//// ah, too bad those lamda's are not supported yet...
//struct sum_weight
//{
-// float operator()(float sum, const entry* e) const { return sum + e->m_weight; }
+// float operator()(float sum, const entry* e) const { return sum + e->m_weight; }
//};
//
//struct max_pdb_nr
//{
-// int16 operator()(int16 a, int16 b) const { return max(a, b); }
+// int16 operator()(int16 a, int16 b) const { return max(a, b); }
//};
//
//// --------------------------------------------------------------------
//
//void entry::insert_gap(uint32 pos)
//{
-// if (pos > m_seq.length())
-// {
-// m_seq += kSignalGapCode;
-// if (not m_ss.empty())
-// m_ss += loop;
-// if (not m_positions.empty())
-// m_positions.push_back(0);
-// }
-// else
-// {
-// aa r = kSignalGapCode;
-// m_seq.insert(pos, &r, 1);
-//
-// ss s = loop;
-// if (not m_ss.empty())
-// m_ss.insert(pos, &s, 1);
-//
-// if (not m_positions.empty())
-// m_positions.insert(m_positions.begin() + pos, 0);
-// }
-//
-// assert(m_positions.size() == m_seq.length() or m_positions.empty());
+// if (pos > m_seq.length())
+// {
+// m_seq += kSignalGapCode;
+// if (not m_ss.empty())
+// m_ss += loop;
+// if (not m_positions.empty())
+// m_positions.push_back(0);
+// }
+// else
+// {
+// aa r = kSignalGapCode;
+// m_seq.insert(pos, &r, 1);
+//
+// ss s = loop;
+// if (not m_ss.empty())
+// m_ss.insert(pos, &s, 1);
+//
+// if (not m_positions.empty())
+// m_positions.insert(m_positions.begin() + pos, 0);
+// }
+//
+// assert(m_positions.size() == m_seq.length() or m_positions.empty());
//}
//
//void entry::append_gap()
//{
-// m_seq += kSignalGapCode;
-// if (not m_ss.empty())
-// m_ss += loop;
-// if (not m_positions.empty())
-// m_positions.push_back(0);
+// m_seq += kSignalGapCode;
+// if (not m_ss.empty())
+// m_ss += loop;
+// if (not m_positions.empty())
+// m_positions.push_back(0);
//
-// assert(m_positions.size() == m_seq.length() or m_positions.empty());
+// assert(m_positions.size() == m_seq.length() or m_positions.empty());
//}
//
//void entry::remove_gap(uint32 pos)
//{
-// assert(pos < m_seq.length());
-// m_seq.erase(m_seq.begin() + pos);
-// if (pos < m_ss.length())
-// m_ss.erase(m_ss.begin() + pos);
-// if (pos < m_positions.size())
-// m_positions.erase(m_positions.begin() + pos);
+// assert(pos < m_seq.length());
+// m_seq.erase(m_seq.begin() + pos);
+// if (pos < m_ss.length())
+// m_ss.erase(m_ss.begin() + pos);
+// if (pos < m_positions.size())
+// m_positions.erase(m_positions.begin() + pos);
//}
//
//void entry::remove_gaps()
//{
-//// if (m_seq.length() == m_positions.size())
-//// assert(false);
-//// else
-//// m_seq.erase(remove(m_seq.begin(), m_seq.end(), kSignalGapCode), m_seq.end());
-//
-// for (uint32 i = 0; i < m_seq.length(); ++i)
-// {
-// if (m_seq[i] == kSignalGapCode)
-// {
-// remove_gap(i);
-// --i;
-// }
-// }
+//// if (m_seq.length() == m_positions.size())
+//// assert(false);
+//// else
+//// m_seq.erase(remove(m_seq.begin(), m_seq.end(), kSignalGapCode), m_seq.end());
+//
+// for (uint32 i = 0; i < m_seq.length(); ++i)
+// {
+// if (m_seq[i] == kSignalGapCode)
+// {
+// remove_gap(i);
+// --i;
+// }
+// }
//}
//
//// --------------------------------------------------------------------
//
//ostream& operator<<(ostream& lhs, base_node& rhs)
//{
-// rhs.print(lhs);
-// return lhs;
+// rhs.print(lhs);
+// return lhs;
//}
//
//joined_node::joined_node(base_node* left, base_node* right,
-// float d_left, float d_right)
-// : m_left(left)
-// , m_right(right)
-// , m_d_left(d_left)
-// , m_d_right(d_right)
-// , m_leaf_count(left->leaf_count() + right->leaf_count())
-// , m_length(max(left->length(), right->length()))
+// float d_left, float d_right)
+// : m_left(left)
+// , m_right(right)
+// , m_d_left(d_left)
+// , m_d_right(d_right)
+// , m_leaf_count(left->leaf_count() + right->leaf_count())
+// , m_length(max(left->length(), right->length()))
//{
-// m_left->add_weight(d_left / m_left->leaf_count());
-// m_right->add_weight(d_right / m_right->leaf_count());
+// m_left->add_weight(d_left / m_left->leaf_count());
+// m_right->add_weight(d_right / m_right->leaf_count());
//}
//
//joined_node::~joined_node()
//{
-// delete m_left;
-// delete m_right;
+// delete m_left;
+// delete m_right;
//}
//
//void joined_node::print(ostream& s)
//{
-// s << '(' << endl
-// << *m_left << (boost::format(":%1.4f") % m_d_left) << ',' << endl
-// << *m_right << (boost::format(":%1.4f") % m_d_right) << ')' << endl;
+// s << '(' << endl
+// << *m_left << (boost::format(":%1.4f") % m_d_left) << ',' << endl
+// << *m_right << (boost::format(":%1.4f") % m_d_right) << ')' << endl;
//}
//
//void leaf_node::print(ostream& s)
//{
-// s << m_entry.m_id;
+// s << m_entry.m_id;
//}
//
//// --------------------------------------------------------------------
@@ -207,1153 +210,1153 @@ using namespace std;
//
//float calculateDistance(const entry& a, const entry& b)
//{
-// int32 x = 0, endX = 0, dimX = a.m_seq.length();
-// int32 y = 0, endY = 0, dimY = b.m_seq.length();
-//
-// const vector<int16>& pa = a.m_positions;
-// const vector<int16>& pb = b.m_positions;
-//
-// matrix<float> B(dimX, dimY);
-// matrix<float> Ix(dimX, dimY);
-// matrix<float> Iy(dimX, dimY);
-// matrix<uint16> id(dimX, dimY);
-//
-// Ix(0, 0) = 0;
-// Iy(0, 0) = 0;
-//
-// uint16 highId = 0;
-//
-// if (pa.empty() or pb.empty())
-// {
-// endX = dimX;
-// endY = dimY;
-// }
-//
-// while (x < dimX and y < dimY)
-// {
-// if (x == endX and y == endY)
-// {
-// if (pa[x] == pb[y] and pa[x] != 0)
-// {
-// if (a.m_seq[x] == b.m_seq[y])
-// ++highId;
-// ++x; ++endX;
-// ++y; ++endY;
-// continue;
-// }
-// }
-//
-// while (endX < dimX or endY < dimY)
-// {
-// if (endX < dimX and pa[endX] == 0)
-// {
-// ++endX;
-// continue;
-// }
-//
-// if (endY < dimY and pb[endY] == 0)
-// {
-// ++endY;
-// continue;
-// }
-//
-// if (endX < dimX and endY < dimY and pa[endX] == pb[endY] and pa[endX] != 0)
-// break;
-//
-// if (endX < dimX)
-// {
-// while (endX < dimX and (endY == dimY or pa[endX] < pb[endY]))
-// ++endX;
-// }
-//
-// if (endY < dimY)
-// {
-// while (endY < dimY and (endX == dimX or pb[endY] < pa[endX]))
-// ++endY;
-// }
-//
-// if (endX < dimX and endY < dimY and pa[endX] != pb[endY])
-// continue;
-//
-// break;
-// }
-//
-// Ix(x, y) = 0;
-// Iy(x, y) = 0;
-// if (x > 0 and y > 0)
-// id(x - 1, y - 1) = highId;
-//
-// int32 startX = x, startY = y;
-// float high = -numeric_limits<float>::max();
-// uint16 highIdSub = 0;
-//
-// for (x = startX; x < endX; ++x)
-// {
-// for (y = startY; y < endY; ++y)
-// {
-// float Ix1 = 0; if (x > startX) Ix1 = Ix(x - 1, y);
-// float Iy1 = 0; if (y > startY) Iy1 = Iy(x, y - 1);
-//
-// // (1)
-// float M = kDistanceMatrix(a.m_seq[x], b.m_seq[y]);
-// if (x > startX and y > startY)
-// M += B(x - 1, y - 1);
-//
-// float s;
-// uint16 i = 0;
-// if (a.m_seq[x] == b.m_seq[y])
-// i = 1;
-//
-// if (M >= Ix1 and M >= Iy1)
-// {
-// if (x > startX and y > startY)
-// i += id(x - 1, y - 1);
-// s = M;
-// }
-// else if (Ix1 >= Iy1)
-// {
-// if (x > startX)
-// i += id(x - 1, y);
-// s = Ix1;
-// }
-// else
-// {
-// if (y > startY)
-// i += id(x, y - 1);
-// s = Iy1;
-// }
-//
-// B(x, y) = s;
-// id(x, y) = i;
-//
-// if ((x == endX - 1 or y == endY - 1) and high < s)
-// {
-// high = s;
-// highIdSub = i;
-// }
-//
-// // (3)
-// Ix(x, y) = max(M - kDistanceGapOpen, Ix1 - kDistanceGapExtend);
-//
-// // (4)
-// Iy(x, y) = max(M - kDistanceGapOpen, Iy1 - kDistanceGapExtend);
-// }
-// }
-//
-// highId += highIdSub;
-//
-// x = endX;
-// y = endY;
-// }
-//
-// float result = 1.0f - float(highId) / max(dimX, dimY);
-//
-// assert(result >= 0.0f);
-// assert(result <= 1.0f);
-//
-// if (VERBOSE)
-// {
-// static boost::mutex sLockCout;
-// boost::mutex::scoped_lock lock(sLockCout);
-// cerr << (boost::format("Sequences (%1$d:%2$d) Aligned. Score: %3$4.2f") % (a.m_nr + 1) % (b.m_nr + 1) % result) << endl;
-// if (VERBOSE >= 2)
-// cerr << " " << a.m_id << ':' << b.m_id << endl;
-// }
-//
-// return result;
+// int32 x = 0, endX = 0, dimX = a.m_seq.length();
+// int32 y = 0, endY = 0, dimY = b.m_seq.length();
+//
+// const vector<int16>& pa = a.m_positions;
+// const vector<int16>& pb = b.m_positions;
+//
+// matrix<float> B(dimX, dimY);
+// matrix<float> Ix(dimX, dimY);
+// matrix<float> Iy(dimX, dimY);
+// matrix<uint16> id(dimX, dimY);
+//
+// Ix(0, 0) = 0;
+// Iy(0, 0) = 0;
+//
+// uint16 highId = 0;
+//
+// if (pa.empty() or pb.empty())
+// {
+// endX = dimX;
+// endY = dimY;
+// }
+//
+// while (x < dimX and y < dimY)
+// {
+// if (x == endX and y == endY)
+// {
+// if (pa[x] == pb[y] and pa[x] != 0)
+// {
+// if (a.m_seq[x] == b.m_seq[y])
+// ++highId;
+// ++x; ++endX;
+// ++y; ++endY;
+// continue;
+// }
+// }
+//
+// while (endX < dimX or endY < dimY)
+// {
+// if (endX < dimX and pa[endX] == 0)
+// {
+// ++endX;
+// continue;
+// }
+//
+// if (endY < dimY and pb[endY] == 0)
+// {
+// ++endY;
+// continue;
+// }
+//
+// if (endX < dimX and endY < dimY and pa[endX] == pb[endY] and pa[endX] != 0)
+// break;
+//
+// if (endX < dimX)
+// {
+// while (endX < dimX and (endY == dimY or pa[endX] < pb[endY]))
+// ++endX;
+// }
+//
+// if (endY < dimY)
+// {
+// while (endY < dimY and (endX == dimX or pb[endY] < pa[endX]))
+// ++endY;
+// }
+//
+// if (endX < dimX and endY < dimY and pa[endX] != pb[endY])
+// continue;
+//
+// break;
+// }
+//
+// Ix(x, y) = 0;
+// Iy(x, y) = 0;
+// if (x > 0 and y > 0)
+// id(x - 1, y - 1) = highId;
+//
+// int32 startX = x, startY = y;
+// float high = -numeric_limits<float>::max();
+// uint16 highIdSub = 0;
+//
+// for (x = startX; x < endX; ++x)
+// {
+// for (y = startY; y < endY; ++y)
+// {
+// float Ix1 = 0; if (x > startX) Ix1 = Ix(x - 1, y);
+// float Iy1 = 0; if (y > startY) Iy1 = Iy(x, y - 1);
+//
+// // (1)
+// float M = kDistanceMatrix(a.m_seq[x], b.m_seq[y]);
+// if (x > startX and y > startY)
+// M += B(x - 1, y - 1);
+//
+// float s;
+// uint16 i = 0;
+// if (a.m_seq[x] == b.m_seq[y])
+// i = 1;
+//
+// if (M >= Ix1 and M >= Iy1)
+// {
+// if (x > startX and y > startY)
+// i += id(x - 1, y - 1);
+// s = M;
+// }
+// else if (Ix1 >= Iy1)
+// {
+// if (x > startX)
+// i += id(x - 1, y);
+// s = Ix1;
+// }
+// else
+// {
+// if (y > startY)
+// i += id(x, y - 1);
+// s = Iy1;
+// }
+//
+// B(x, y) = s;
+// id(x, y) = i;
+//
+// if ((x == endX - 1 or y == endY - 1) and high < s)
+// {
+// high = s;
+// highIdSub = i;
+// }
+//
+// // (3)
+// Ix(x, y) = max(M - kDistanceGapOpen, Ix1 - kDistanceGapExtend);
+//
+// // (4)
+// Iy(x, y) = max(M - kDistanceGapOpen, Iy1 - kDistanceGapExtend);
+// }
+// }
+//
+// highId += highIdSub;
+//
+// x = endX;
+// y = endY;
+// }
+//
+// float result = 1.0f - float(highId) / max(dimX, dimY);
+//
+// assert(result >= 0.0f);
+// assert(result <= 1.0f);
+//
+// if (VERBOSE)
+// {
+// static boost::mutex sLockCout;
+// boost::mutex::scoped_lock lock(sLockCout);
+// cerr << (boost::format("Sequences (%1$d:%2$d) Aligned. Score: %3$4.2f") % (a.m_nr + 1) % (b.m_nr + 1) % result) << endl;
+// if (VERBOSE >= 2)
+// cerr << " " << a.m_id << ':' << b.m_id << endl;
+// }
+//
+// return result;
//}
//
//// we use as many threads as is useful to do the distance calculation
//// which is quite easy to do using a thread safe queue
-//typedef buffer<tr1::tuple<uint32,uint32> > distance_queue;
-//const tr1::tuple<uint32,uint32> kSentinel = tr1::make_tuple(numeric_limits<uint32>::max(), 0);
+//typedef buffer<tr1::tuple<uint32,uint32> > distance_queue;
+//const tr1::tuple<uint32,uint32> kSentinel = tr1::make_tuple(numeric_limits<uint32>::max(), 0);
//
//void calculateDistance(distance_queue& queue, symmetric_matrix<float>& d, vector<entry>& data,
-// progress& pr)
+// progress& pr)
//{
-// for (;;)
-// {
-// uint32 a, b;
-// tr1::tie(a, b) = queue.get();
-//
-// if (a == numeric_limits<uint32>::max()) // sentinel found, quit loop
-// break;
-//
-// d(a, b) = calculateDistance(data[a], data[b]);
-// pr.step();
-// }
-//
-// queue.put(kSentinel);
+// for (;;)
+// {
+// uint32 a, b;
+// tr1::tie(a, b) = queue.get();
+//
+// if (a == numeric_limits<uint32>::max()) // sentinel found, quit loop
+// break;
+//
+// d(a, b) = calculateDistance(data[a], data[b]);
+// pr.step();
+// }
+//
+// queue.put(kSentinel);
//}
//
//void calculateDistanceMatrix(symmetric_matrix<float>& d, vector<entry>& data)
//{
-// progress pr("calculating guide tree", (data.size() * (data.size() - 1)) / 2);
-// distance_queue queue;
-//
-// boost::thread_group t;
-//
-// uint32 nr_of_threads = boost::thread::hardware_concurrency();
-//
-// if (not MULTI_THREADED)
-// nr_of_threads = 1;
-//
-// for (uint32 ti = 0; ti < nr_of_threads; ++ti)
-// t.create_thread(boost::bind(&calculateDistance,
-// boost::ref(queue), boost::ref(d), boost::ref(data), boost::ref(pr)));
-//
-// for (uint32 a = 0; a < data.size() - 1; ++a)
-// {
-// for (uint32 b = a + 1; b < data.size(); ++b)
-// queue.put(tr1::make_tuple(a, b));
-// }
-//
-// queue.put(kSentinel);
-//
-// t.join_all();
+// progress pr("calculating guide tree", (data.size() * (data.size() - 1)) / 2);
+// distance_queue queue;
+//
+// boost::thread_group t;
+//
+// uint32 nr_of_threads = boost::thread::hardware_concurrency();
+//
+// if (not MULTI_THREADED)
+// nr_of_threads = 1;
+//
+// for (uint32 ti = 0; ti < nr_of_threads; ++ti)
+// t.create_thread(boost::bind(&calculateDistance,
+// boost::ref(queue), boost::ref(d), boost::ref(data), boost::ref(pr)));
+//
+// for (uint32 a = 0; a < data.size() - 1; ++a)
+// {
+// for (uint32 b = a + 1; b < data.size(); ++b)
+// queue.put(tr1::make_tuple(a, b));
+// }
+//
+// queue.put(kSentinel);
+//
+// t.join_all();
//}
//
//// --------------------------------------------------------------------
//
//void joinNeighbours(symmetric_matrix<float>& d, vector<base_node*>& tree)
//{
-// uint32 r = tree.size();
-//
-// while (r > 2)
-// {
-// // calculate the sums first
-// vector<float> sum(r);
-// for (uint32 i = 1; i < r; ++i)
-// {
-// for (uint32 j = 0; j < i; ++j)
-// {
-// float dij = d(i, j);
-// sum[i] += dij;
-// sum[j] += dij;
-// }
-// }
-//
-// // calculate Q, or in fact, the position of the minimum in Q
-// uint32 min_i = 0, min_j = 0;
-// float m = numeric_limits<float>::max();
-//
-// for (uint32 i = 1; i < r; ++i)
-// {
-// for (uint32 j = 0; j < i; ++j)
-// {
-// float v = d(i, j) - (sum[i] + sum[j]) / (r - 2);
-//
-// if (m > v)
-// {
-// min_i = i;
-// min_j = j;
-// m = v;
-// }
-// }
-// }
-//
-//
-// // distance to joined node
-// float d_i, d_j;
-// float half_dij = d(min_i, min_j) / 2;
-//
-// d_i = half_dij + abs(sum[min_i] - sum[min_j]) / (2 * (r - 2));
-// d_j = d(min_i, min_j) - d_i;
-//
-// if (d_i > d_j and tree[min_i]->leaf_count() > tree[min_j]->leaf_count())
-// swap(d_i, d_j);
-//
-// joined_node* jn = new joined_node(tree[min_i], tree[min_j], d_i, d_j);
-// assert(min_j < min_i);
-// tree.erase(tree.begin() + min_i);
-// tree.erase(tree.begin() + min_j);
-// tree.push_back(jn);
-//
-// // distances to other nodes
-// vector<float> dn; dn.reserve(r - 2);
-// for (uint32 x = 0; x < r; ++x)
-// {
-// if (x == min_i or x == min_j)
-// continue;
-// dn.push_back((abs(d(x, min_i) - d_i) + abs(d(x, min_j) - d_j)) / 2);
-// }
-//
-// // fill new distance matrix
-// d.erase_2(min_i, min_j);
-// --r;
-// for (uint32 x = 0; x < r - 1; ++x)
-// d(x, r - 1) = dn[x];
-// }
-//
-// assert(r == 2); assert(tree.size() == 2);
-//
-// joined_node* root = new joined_node(tree[0], tree[1], d(0, 1) / 2, d(0, 1) / 2);
-// tree.clear();
-// tree.push_back(root);
+// uint32 r = tree.size();
+//
+// while (r > 2)
+// {
+// // calculate the sums first
+// vector<float> sum(r);
+// for (uint32 i = 1; i < r; ++i)
+// {
+// for (uint32 j = 0; j < i; ++j)
+// {
+// float dij = d(i, j);
+// sum[i] += dij;
+// sum[j] += dij;
+// }
+// }
+//
+// // calculate Q, or in fact, the position of the minimum in Q
+// uint32 min_i = 0, min_j = 0;
+// float m = numeric_limits<float>::max();
+//
+// for (uint32 i = 1; i < r; ++i)
+// {
+// for (uint32 j = 0; j < i; ++j)
+// {
+// float v = d(i, j) - (sum[i] + sum[j]) / (r - 2);
+//
+// if (m > v)
+// {
+// min_i = i;
+// min_j = j;
+// m = v;
+// }
+// }
+// }
+//
+//
+// // distance to joined node
+// float d_i, d_j;
+// float half_dij = d(min_i, min_j) / 2;
+//
+// d_i = half_dij + abs(sum[min_i] - sum[min_j]) / (2 * (r - 2));
+// d_j = d(min_i, min_j) - d_i;
+//
+// if (d_i > d_j and tree[min_i]->leaf_count() > tree[min_j]->leaf_count())
+// swap(d_i, d_j);
+//
+// joined_node* jn = new joined_node(tree[min_i], tree[min_j], d_i, d_j);
+// assert(min_j < min_i);
+// tree.erase(tree.begin() + min_i);
+// tree.erase(tree.begin() + min_j);
+// tree.push_back(jn);
+//
+// // distances to other nodes
+// vector<float> dn; dn.reserve(r - 2);
+// for (uint32 x = 0; x < r; ++x)
+// {
+// if (x == min_i or x == min_j)
+// continue;
+// dn.push_back((abs(d(x, min_i) - d_i) + abs(d(x, min_j) - d_j)) / 2);
+// }
+//
+// // fill new distance matrix
+// d.erase_2(min_i, min_j);
+// --r;
+// for (uint32 x = 0; x < r - 1; ++x)
+// d(x, r - 1) = dn[x];
+// }
+//
+// assert(r == 2); assert(tree.size() == 2);
+//
+// joined_node* root = new joined_node(tree[0], tree[1], d(0, 1) / 2, d(0, 1) / 2);
+// tree.clear();
+// tree.push_back(root);
//}
//
//// --------------------------------------------------------------------
//
//inline float score(const vector<entry*>& a, const vector<entry*>& b,
-// uint32 ix_a, uint32 ix_b, const substitution_matrix& mat)
+// uint32 ix_a, uint32 ix_b, const substitution_matrix& mat)
//{
-// float result = 0;
-//
-// const float kSSScore[8][8] = {
-// // loop, alphahelix, betabridge, strand, helix_3, helix_5, turn, bend
-// { 0, 0, 0, 0, 0, 0, 0, 0 }, // loop
-// { 0, 3, 0, 0, 0, 0, 0, 0 }, // alphahelix
-// { 0, 0, 3, 2, 0, 0, 0, 0 }, // betabridge
-// { 0, 0, 2, 3, 0, 0, 0, 0 }, // strand
-// { 0, 0, 0, 0, 4, 0, 0, 0 }, // helix_3
-// { 0, 0, 0, 0, 0, 3, 0, 0 }, // helix_5
-// { 0, 0, 0, 0, 0, 0, 2, 0 }, // turn
-// { 0, 0, 0, 0, 0, 0, 0, 1 }, // bend
-// };
-//
-// foreach (const entry* ea, a)
-// {
-// foreach (const entry* eb, b)
-// {
-// assert(ix_a < ea->m_seq.length());
-// assert(ix_b < eb->m_seq.length());
-//
-// aa ra = ea->m_seq[ix_a];
-// aa rb = eb->m_seq[ix_b];
-//
-// if (ra != kSignalGapCode and rb != kSignalGapCode)
-// result += ea->m_weight * eb->m_weight * mat(ra, rb);
-//
-// if (not (ea->m_ss.empty() or eb->m_ss.empty()))
-// {
-// ss ssa = ea->m_ss[ix_a];
-// ss ssb = eb->m_ss[ix_b];
-//
-// assert(ssa < 8); assert(ssb < 8);
-//
-// result += ea->m_weight * eb->m_weight * kSSScore[ssa][ssb];
-// }
-// }
-// }
-//
-// result /= (a.size() * b.size());
-//
-// return result;
+// float result = 0;
+//
+// const float kSSScore[8][8] = {
+// // loop, alphahelix, betabridge, strand, helix_3, helix_5, turn, bend
+// { 0, 0, 0, 0, 0, 0, 0, 0 }, // loop
+// { 0, 3, 0, 0, 0, 0, 0, 0 }, // alphahelix
+// { 0, 0, 3, 2, 0, 0, 0, 0 }, // betabridge
+// { 0, 0, 2, 3, 0, 0, 0, 0 }, // strand
+// { 0, 0, 0, 0, 4, 0, 0, 0 }, // helix_3
+// { 0, 0, 0, 0, 0, 3, 0, 0 }, // helix_5
+// { 0, 0, 0, 0, 0, 0, 2, 0 }, // turn
+// { 0, 0, 0, 0, 0, 0, 0, 1 }, // bend
+// };
+//
+// foreach (const entry* ea, a)
+// {
+// foreach (const entry* eb, b)
+// {
+// assert(ix_a < ea->m_seq.length());
+// assert(ix_b < eb->m_seq.length());
+//
+// aa ra = ea->m_seq[ix_a];
+// aa rb = eb->m_seq[ix_b];
+//
+// if (ra != kSignalGapCode and rb != kSignalGapCode)
+// result += ea->m_weight * eb->m_weight * mat(ra, rb);
+//
+// if (not (ea->m_ss.empty() or eb->m_ss.empty()))
+// {
+// ss ssa = ea->m_ss[ix_a];
+// ss ssb = eb->m_ss[ix_b];
+//
+// assert(ssa < 8); assert(ssb < 8);
+//
+// result += ea->m_weight * eb->m_weight * kSSScore[ssa][ssb];
+// }
+// }
+// }
+//
+// result /= (a.size() * b.size());
+//
+// return result;
//}
//
//// don't ask me, but looking at the clustal code, they substract 0.2 from the table
//// as mentioned in the article in NAR.
//const float kResidueSpecificPenalty[20] = {
-// 1.13f - 0.2f, // A
-// 0.72f - 0.2f, // R
-// 0.63f - 0.2f, // N
-// 0.96f - 0.2f, // D
-// 1.13f - 0.2f, // C
-// 1.07f - 0.2f, // Q
-// 1.31f - 0.2f, // E
-// 0.61f - 0.2f, // G
-// 1.00f - 0.2f, // H
-// 1.32f - 0.2f, // I
-// 1.21f - 0.2f, // L
-// 0.96f - 0.2f, // K
-// 1.29f - 0.2f, // M
-// 1.20f - 0.2f, // F
-// 0.74f - 0.2f, // P
-// 0.76f - 0.2f, // S
-// 0.89f - 0.2f, // T
-// 1.23f - 0.2f, // W
-// 1.00f - 0.2f, // Y
-// 1.25f - 0.2f // V
+// 1.13f - 0.2f, // A
+// 0.72f - 0.2f, // R
+// 0.63f - 0.2f, // N
+// 0.96f - 0.2f, // D
+// 1.13f - 0.2f, // C
+// 1.07f - 0.2f, // Q
+// 1.31f - 0.2f, // E
+// 0.61f - 0.2f, // G
+// 1.00f - 0.2f, // H
+// 1.32f - 0.2f, // I
+// 1.21f - 0.2f, // L
+// 0.96f - 0.2f, // K
+// 1.29f - 0.2f, // M
+// 1.20f - 0.2f, // F
+// 0.74f - 0.2f, // P
+// 0.76f - 0.2f, // S
+// 0.89f - 0.2f, // T
+// 1.23f - 0.2f, // W
+// 1.00f - 0.2f, // Y
+// 1.25f - 0.2f // V
//};
//
//const boost::function<bool(aa)> is_hydrophilic = ba::is_any_of(encode("DEGKNQPRS"));
//
//void adjust_gp(vector<float>& gop, vector<float>& gep, const vector<entry*>& seq)
//{
-// assert(gop.size() == seq.front()->m_seq.length());
-//
-// vector<uint32> gaps(gop.size());
-// vector<bool> hydrophilic_stretch(gop.size(), false);
-// vector<float> residue_specific_penalty(gop.size());
-//
-// foreach (const entry* e, seq)
-// {
-// const sequence& s = e->m_seq;
-// const sec_structure& s2 = e->m_ss;
-//
-// for (uint32 ix = 0; ix < gop.size(); ++ix)
-// {
-// aa r = s[ix];
-//
-// if (r == kSignalGapCode)
-// gaps[ix] += 1;
-//
-// // residue specific gap penalty
-// if (ix < s2.length())
-// {
-// // The output of DSSP is explained extensively under 'explanation'. The very short summary of the output is:
-// // H = alpha helix
-// // B = residue in isolated beta-bridge
-// // E = extended strand, participates in beta ladder
-// // G = 3-helix (3/10 helix)
-// // I = 5 helix (pi helix)
-// // T = hydrogen bonded turn
-// // S = bend
-//
-// switch (s2[ix])
-// {
-// case alphahelix:
-// case helix_5:
-// case helix_3:
-// residue_specific_penalty[ix] += 5.0f;
-// break;
-//
-// case betabridge:
-// case strand:
-// residue_specific_penalty[ix] += 5.0f;
-// break;
-//
-// default:
-// residue_specific_penalty[ix] += 1.0f;
-// break;
-// }
-// }
-// else if (r < 20)
-// residue_specific_penalty[ix] += kResidueSpecificPenalty[r];
-// else
-// residue_specific_penalty[ix] += 1.0f;
-// }
-//
-// // find a run of 5 hydrophilic residues
-// for (uint32 si = 0, i = 0; i <= gop.size(); ++i)
-// {
-// if (i == gop.size() or is_hydrophilic(s[i]) == false)
-// {
-// if (i >= si + 5)
-// {
-// for (uint32 j = si; j < i; ++j)
-// hydrophilic_stretch[j] = true;
-// }
-// si = i + 1;
-// }
-// }
-// }
-//
-// for (int32 ix = 0; ix < static_cast<int32>(gop.size()); ++ix)
-// {
-// // if there is a gap, lower gap open cost
-// if (gaps[ix] > 0)
-// {
-// gop[ix] *= 0.3f * ((seq.size() - gaps[ix]) / float(seq.size()));
-// gep[ix] /= 2;
-// }
-//
-// // else if there is a gap within 8 residues, increase gap cost
-// else
-// {
-// for (int32 d = 0; d < 8; ++d)
-// {
-// if (ix + d >= int32(gaps.size()) or gaps[ix + d] > 0 or
-// ix - d < 0 or gaps[ix - d] > 0)
-// {
-// gop[ix] *= (2 + ((8 - d) * 2)) / 8.f;
-// break;
-// }
-// }
-//
-// if (hydrophilic_stretch[ix])
-// gop[ix] /= 3;
-// else
-// gop[ix] *= (residue_specific_penalty[ix] / seq.size());
-// }
-// }
+// assert(gop.size() == seq.front()->m_seq.length());
+//
+// vector<uint32> gaps(gop.size());
+// vector<bool> hydrophilic_stretch(gop.size(), false);
+// vector<float> residue_specific_penalty(gop.size());
+//
+// foreach (const entry* e, seq)
+// {
+// const sequence& s = e->m_seq;
+// const sec_structure& s2 = e->m_ss;
+//
+// for (uint32 ix = 0; ix < gop.size(); ++ix)
+// {
+// aa r = s[ix];
+//
+// if (r == kSignalGapCode)
+// gaps[ix] += 1;
+//
+// // residue specific gap penalty
+// if (ix < s2.length())
+// {
+// // The output of DSSP is explained extensively under 'explanation'. The very short summary of the output is:
+// // H = alpha helix
+// // B = residue in isolated beta-bridge
+// // E = extended strand, participates in beta ladder
+// // G = 3-helix (3/10 helix)
+// // I = 5 helix (pi helix)
+// // T = hydrogen bonded turn
+// // S = bend
+//
+// switch (s2[ix])
+// {
+// case alphahelix:
+// case helix_5:
+// case helix_3:
+// residue_specific_penalty[ix] += 5.0f;
+// break;
+//
+// case betabridge:
+// case strand:
+// residue_specific_penalty[ix] += 5.0f;
+// break;
+//
+// default:
+// residue_specific_penalty[ix] += 1.0f;
+// break;
+// }
+// }
+// else if (r < 20)
+// residue_specific_penalty[ix] += kResidueSpecificPenalty[r];
+// else
+// residue_specific_penalty[ix] += 1.0f;
+// }
+//
+// // find a run of 5 hydrophilic residues
+// for (uint32 si = 0, i = 0; i <= gop.size(); ++i)
+// {
+// if (i == gop.size() or is_hydrophilic(s[i]) == false)
+// {
+// if (i >= si + 5)
+// {
+// for (uint32 j = si; j < i; ++j)
+// hydrophilic_stretch[j] = true;
+// }
+// si = i + 1;
+// }
+// }
+// }
+//
+// for (int32 ix = 0; ix < static_cast<int32>(gop.size()); ++ix)
+// {
+// // if there is a gap, lower gap open cost
+// if (gaps[ix] > 0)
+// {
+// gop[ix] *= 0.3f * ((seq.size() - gaps[ix]) / float(seq.size()));
+// gep[ix] /= 2;
+// }
+//
+// // else if there is a gap within 8 residues, increase gap cost
+// else
+// {
+// for (int32 d = 0; d < 8; ++d)
+// {
+// if (ix + d >= int32(gaps.size()) or gaps[ix + d] > 0 or
+// ix - d < 0 or gaps[ix - d] > 0)
+// {
+// gop[ix] *= (2 + ((8 - d) * 2)) / 8.f;
+// break;
+// }
+// }
+//
+// if (hydrophilic_stretch[ix])
+// gop[ix] /= 3;
+// else
+// gop[ix] *= (residue_specific_penalty[ix] / seq.size());
+// }
+// }
//}
//
//void print_matrix(ostream& os, const matrix<int8>& tb, const sequence& sx, const sequence& sy)
//{
-// os << ' ';
-// for (uint32 x = 0; x < sx.length(); ++x)
-// os << kAA[sx[x]];
-// os << endl;
-//
-// for (uint32 y = 0; y < sy.length(); ++y)
-// {
-// os << kAA[sy[y]];
-// for (uint32 x = 0; x < sx.length(); ++x)
-// {
-// switch (tb(x, y))
-// {
-// case -1: os << '|'; break;
-// case 0: os << '\\'; break;
-// case 1: os << '-'; break;
-// case 2: os << '.'; break;
-// }
-// }
-// os << endl;
-// }
+// os << ' ';
+// for (uint32 x = 0; x < sx.length(); ++x)
+// os << kAA[sx[x]];
+// os << endl;
+//
+// for (uint32 y = 0; y < sy.length(); ++y)
+// {
+// os << kAA[sy[y]];
+// for (uint32 x = 0; x < sx.length(); ++x)
+// {
+// switch (tb(x, y))
+// {
+// case -1: os << '|'; break;
+// case 0: os << '\\'; break;
+// case 1: os << '-'; break;
+// case 2: os << '.'; break;
+// }
+// }
+// os << endl;
+// }
//}
//
//void align(
-// const joined_node* node,
-// vector<entry*>& a, vector<entry*>& b, vector<entry*>& c,
-// const substitution_matrix_family& mat_fam,
-// float gop, float gep, float magic,
-// bool ignorePositions)
+// const joined_node* node,
+// vector<entry*>& a, vector<entry*>& b, vector<entry*>& c,
+// const substitution_matrix_family& mat_fam,
+// float gop, float gep, float magic,
+// bool ignorePositions)
//{
-// if (VERBOSE > 2)
-// {
-// cerr << "aligning sets" << endl << "a(" << a.front()->m_seq.length() << "): ";
-// foreach (const entry* e, a)
-// cerr << e->m_id << "; ";
-// cerr << endl << "b(" << b.front()->m_seq.length() << "): ";
-// foreach (const entry* e, b)
-// cerr << e->m_id << "; ";
-// cerr << endl << endl;
-// }
-//
-// const float kSentinelValue = -(numeric_limits<float>::max() / 2);
-//
-// const entry* fa = a.front();
-// const entry* fb = b.front();
-//
-// const vector<int16>& pa = fa->m_positions;
-// const vector<int16>& pb = fb->m_positions;
-//
-// int32 x = 0, dimX = fa->m_seq.length(), endX = 0;
-// int32 y = 0, dimY = fb->m_seq.length(), endY = 0;
-//
+// if (VERBOSE > 2)
+// {
+// cerr << "aligning sets" << endl << "a(" << a.front()->m_seq.length() << "): ";
+// foreach (const entry* e, a)
+// cerr << e->m_id << "; ";
+// cerr << endl << "b(" << b.front()->m_seq.length() << "): ";
+// foreach (const entry* e, b)
+// cerr << e->m_id << "; ";
+// cerr << endl << endl;
+// }
+//
+// const float kSentinelValue = -(numeric_limits<float>::max() / 2);
+//
+// const entry* fa = a.front();
+// const entry* fb = b.front();
+//
+// const vector<int16>& pa = fa->m_positions;
+// const vector<int16>& pb = fb->m_positions;
+//
+// int32 x = 0, dimX = fa->m_seq.length(), endX = 0;
+// int32 y = 0, dimY = fb->m_seq.length(), endY = 0;
+//
//#ifdef NDEBUG
-// matrix<float> B(dimX, dimY);
-// matrix<float> Ix(dimX, dimY);
-// matrix<float> Iy(dimX, dimY);
-// matrix<int8> tb(dimX, dimY);
+// matrix<float> B(dimX, dimY);
+// matrix<float> Ix(dimX, dimY);
+// matrix<float> Iy(dimX, dimY);
+// matrix<int8> tb(dimX, dimY);
//#else
-// matrix<float> B(dimX, dimY, kSentinelValue);
-// matrix<float> Ix(dimX, dimY);
-// matrix<float> Iy(dimX, dimY);
-// matrix<int8> tb(dimX, dimY, 2);
+// matrix<float> B(dimX, dimY, kSentinelValue);
+// matrix<float> Ix(dimX, dimY);
+// matrix<float> Iy(dimX, dimY);
+// matrix<int8> tb(dimX, dimY, 2);
//#endif
-//
-// const substitution_matrix& smat = mat_fam(abs(node->m_d_left + node->m_d_right), true);
-//
-// float minLength = static_cast<float>(dimX), maxLength = static_cast<float>(dimY);
-// if (minLength > maxLength)
-// swap(minLength, maxLength);
-//
-// float logmin = 1.0f / log10(minLength);
-// float logdiff = 1.0f + 0.5f * log10(minLength / maxLength);
-//
-// // initial gap open cost, 0.05f is the remaining magical number here...
-// gop = (gop / (logdiff * logmin)) * abs(smat.mismatch_average()) * smat.scale_factor() * magic;
-//
-// float avg_weight_a = accumulate(a.begin(), a.end(), 0.f, sum_weight()) / a.size();
-// float avg_weight_b = accumulate(b.begin(), b.end(), 0.f, sum_weight()) / b.size();
-//
-// // position specific gap penalties
-// // initial gap extend cost is adjusted for difference in sequence lengths
-// vector<float> gop_a(dimX, gop * avg_weight_a),
-// gep_a(dimX, gep * (1 + log10(float(dimX) / dimY)) * avg_weight_a);
-// adjust_gp(gop_a, gep_a, a);
-//
-// vector<float> gop_b(dimY, gop * avg_weight_b),
-// gep_b(dimY, gep * (1 + log10(float(dimY) / dimX)) * avg_weight_b);
-// adjust_gp(gop_b, gep_b, b);
-//
-// // normally, startX is 0 and endX is dimX, however, when there are fixed
-// // positions, we only take into account the sub matrices that are allowed
-//
-// if (ignorePositions or pa.empty() or pb.empty())
-// {
-// endX = dimX;
-// endY = dimY;
-// }
-//
-// int32 highX = 0, highY = 0;
-//
-// while (x < dimX and y < dimY)
-// {
-// if (x == endX and y == endY)
-// {
-// if (pa[x] == pb[y] and pa[x] != 0)
-// {
-// tb(x, y) = 0;
-// highX = x;
-// highY = y;
-// ++x; ++endX;
-// ++y; ++endY;
-// continue;
-// }
-// }
-//
-// while (endX < dimX or endY < dimY)
-// {
-// if (endX < dimX and pa[endX] == 0)
-// {
-// ++endX;
-// continue;
-// }
-//
-// if (endY < dimY and pb[endY] == 0)
-// {
-// ++endY;
-// continue;
-// }
-//
-// if (endX < dimX and endY < dimY and pa[endX] == pb[endY] and pa[endX] != 0)
-// break;
-//
-// if (endX < dimX)
-// {
-// while (endX < dimX and (endY == dimY or pa[endX] < pb[endY]))
-// ++endX;
-// }
-//
-// if (endY < dimY)
-// {
-// while (endY < dimY and (endX == dimX or pb[endY] < pa[endX]))
-// ++endY;
-// }
-//
-// if (endX < dimX and endY < dimY and pa[endX] != pb[endY])
-// continue;
-//
-// break;
-// }
-//
-// Ix(x, y) = 0;
-// Iy(x, y) = 0;
-//
-// float high = 0;
-// int32 startX = x, startY = y;
-//
-// if (y > 0)
-// {
-// for (int32 ix = x; ix < endX; ++ix)
-// tb(ix, y - 1) = 1;
-// }
-//
-// if (x > 0)
-// {
-// for (int32 iy = y; iy < endY; ++iy)
-// tb(x - 1, iy) = -1;
-// }
-//
-// for (x = startX; x < endX; ++x)
-// {
-// for (y = startY; y < endY; ++y)
-// {
-// float Ix1 = 0; if (x > startX) Ix1 = Ix(x - 1, y);
-// float Iy1 = 0; if (y > startY) Iy1 = Iy(x, y - 1);
-//
-// float M = score(a, b, x, y, smat);
-// if (x > startX and y > startY)
-// M += B(x - 1, y - 1);
-//
-// float s;
-// if (M >= Ix1 and M >= Iy1)
-// {
-// tb(x, y) = 0;
-// B(x, y) = s = M;
-// }
-// else if (Ix1 >= Iy1)
-// {
-// tb(x, y) = 1;
-// B(x, y) = s = Ix1;
-// }
-// else
-// {
-// tb(x, y) = -1;
-// B(x, y) = s = Iy1;
-// }
-//
-// if ((x == endX - 1 or y == endY - 1) and high <= s)
-// {
-// high = s;
-// highX = x;
-// highY = y;
-// }
-//
-// Ix(x, y) = max(M - (x < dimX - 1 ? gop_a[x] : 0), Ix1 - gep_a[x]);
-// Iy(x, y) = max(M - (y < dimY - 1 ? gop_b[y] : 0), Iy1 - gep_b[y]);
-// }
-// }
-//
-// if (endY > 0)
-// {
-// for (x = highX + 1; x < endX; ++x)
-// tb(x, endY - 1) = 1;
-// }
-//
-// if (endX > 0)
-// {
-// for (y = highY + 1; y < endY; ++y)
-// tb(endX - 1, y) = -1;
-// }
-//
-// x = endX;
-// y = endY;
-// }
-//
-// if (endY > 0)
-// {
-// for (x = highX + 1; x < dimX; ++x)
-// tb(x, endY - 1) = 1;
-// }
-//
-// if (endX > 0)
-// {
-// for (y = highY + 1; y < dimY; ++y)
-// tb(endX - 1, y) = -1;
-// }
-//
-// // build the alignment
-// x = dimX - 1;
-// y = dimY - 1;
-//
-// if (VERBOSE >= 6)
-// print_matrix(cerr, tb, fa->m_seq, fb->m_seq);
-//
-// // trace back the matrix
-// while (x >= 0 and y >= 0)
-// {
-// switch (tb(x, y))
-// {
-// case -1:
-// foreach (entry* e, a)
-// e->insert_gap(x + 1);
-// --y;
-// break;
-//
-// case 1:
-// foreach (entry* e, b)
-// e->insert_gap(y + 1);
-// --x;
-// break;
-//
-// case 0:
-// --x;
-// --y;
-// break;
-//
-// default:
-// assert(false);
-// break;
-// }
-// }
-//
-// // and finally insert start-gaps
-// while (x >= 0)
-// {
-// foreach (entry* e, b)
-// e->insert_gap(y + 1);
-// --x;
-// }
-//
-// while (y >= 0)
-// {
-// foreach (entry* e, a)
-// e->insert_gap(x + 1);
-// --y;
-// }
-//
-// c.reserve(a.size() + b.size());
-// copy(a.begin(), a.end(), back_inserter(c));
-// copy(b.begin(), b.end(), back_inserter(c));
-//
-// // copy over the pdb_nrs to the first line
-// if (not ignorePositions and not pa.empty())
-// {
-// assert(pa.size() == pb.size());
-// vector<int16>& pc = c.front()->m_positions;
-//
-// transform(
-// pa.begin(), pa.end(),
-// pb.begin(),
-// pc.begin(),
-// max_pdb_nr());
-// }
-//
-// if (VERBOSE >= 2)
-// report(c, cerr, "clustalw");
+//
+// const substitution_matrix& smat = mat_fam(abs(node->m_d_left + node->m_d_right), true);
+//
+// float minLength = static_cast<float>(dimX), maxLength = static_cast<float>(dimY);
+// if (minLength > maxLength)
+// swap(minLength, maxLength);
+//
+// float logmin = 1.0f / log10(minLength);
+// float logdiff = 1.0f + 0.5f * log10(minLength / maxLength);
+//
+// // initial gap open cost, 0.05f is the remaining magical number here...
+// gop = (gop / (logdiff * logmin)) * abs(smat.mismatch_average()) * smat.scale_factor() * magic;
+//
+// float avg_weight_a = accumulate(a.begin(), a.end(), 0.f, sum_weight()) / a.size();
+// float avg_weight_b = accumulate(b.begin(), b.end(), 0.f, sum_weight()) / b.size();
+//
+// // position specific gap penalties
+// // initial gap extend cost is adjusted for difference in sequence lengths
+// vector<float> gop_a(dimX, gop * avg_weight_a),
+// gep_a(dimX, gep * (1 + log10(float(dimX) / dimY)) * avg_weight_a);
+// adjust_gp(gop_a, gep_a, a);
+//
+// vector<float> gop_b(dimY, gop * avg_weight_b),
+// gep_b(dimY, gep * (1 + log10(float(dimY) / dimX)) * avg_weight_b);
+// adjust_gp(gop_b, gep_b, b);
+//
+// // normally, startX is 0 and endX is dimX, however, when there are fixed
+// // positions, we only take into account the sub matrices that are allowed
+//
+// if (ignorePositions or pa.empty() or pb.empty())
+// {
+// endX = dimX;
+// endY = dimY;
+// }
+//
+// int32 highX = 0, highY = 0;
+//
+// while (x < dimX and y < dimY)
+// {
+// if (x == endX and y == endY)
+// {
+// if (pa[x] == pb[y] and pa[x] != 0)
+// {
+// tb(x, y) = 0;
+// highX = x;
+// highY = y;
+// ++x; ++endX;
+// ++y; ++endY;
+// continue;
+// }
+// }
+//
+// while (endX < dimX or endY < dimY)
+// {
+// if (endX < dimX and pa[endX] == 0)
+// {
+// ++endX;
+// continue;
+// }
+//
+// if (endY < dimY and pb[endY] == 0)
+// {
+// ++endY;
+// continue;
+// }
+//
+// if (endX < dimX and endY < dimY and pa[endX] == pb[endY] and pa[endX] != 0)
+// break;
+//
+// if (endX < dimX)
+// {
+// while (endX < dimX and (endY == dimY or pa[endX] < pb[endY]))
+// ++endX;
+// }
+//
+// if (endY < dimY)
+// {
+// while (endY < dimY and (endX == dimX or pb[endY] < pa[endX]))
+// ++endY;
+// }
+//
+// if (endX < dimX and endY < dimY and pa[endX] != pb[endY])
+// continue;
+//
+// break;
+// }
+//
+// Ix(x, y) = 0;
+// Iy(x, y) = 0;
+//
+// float high = 0;
+// int32 startX = x, startY = y;
+//
+// if (y > 0)
+// {
+// for (int32 ix = x; ix < endX; ++ix)
+// tb(ix, y - 1) = 1;
+// }
+//
+// if (x > 0)
+// {
+// for (int32 iy = y; iy < endY; ++iy)
+// tb(x - 1, iy) = -1;
+// }
+//
+// for (x = startX; x < endX; ++x)
+// {
+// for (y = startY; y < endY; ++y)
+// {
+// float Ix1 = 0; if (x > startX) Ix1 = Ix(x - 1, y);
+// float Iy1 = 0; if (y > startY) Iy1 = Iy(x, y - 1);
+//
+// float M = score(a, b, x, y, smat);
+// if (x > startX and y > startY)
+// M += B(x - 1, y - 1);
+//
+// float s;
+// if (M >= Ix1 and M >= Iy1)
+// {
+// tb(x, y) = 0;
+// B(x, y) = s = M;
+// }
+// else if (Ix1 >= Iy1)
+// {
+// tb(x, y) = 1;
+// B(x, y) = s = Ix1;
+// }
+// else
+// {
+// tb(x, y) = -1;
+// B(x, y) = s = Iy1;
+// }
+//
+// if ((x == endX - 1 or y == endY - 1) and high <= s)
+// {
+// high = s;
+// highX = x;
+// highY = y;
+// }
+//
+// Ix(x, y) = max(M - (x < dimX - 1 ? gop_a[x] : 0), Ix1 - gep_a[x]);
+// Iy(x, y) = max(M - (y < dimY - 1 ? gop_b[y] : 0), Iy1 - gep_b[y]);
+// }
+// }
+//
+// if (endY > 0)
+// {
+// for (x = highX + 1; x < endX; ++x)
+// tb(x, endY - 1) = 1;
+// }
+//
+// if (endX > 0)
+// {
+// for (y = highY + 1; y < endY; ++y)
+// tb(endX - 1, y) = -1;
+// }
+//
+// x = endX;
+// y = endY;
+// }
+//
+// if (endY > 0)
+// {
+// for (x = highX + 1; x < dimX; ++x)
+// tb(x, endY - 1) = 1;
+// }
+//
+// if (endX > 0)
+// {
+// for (y = highY + 1; y < dimY; ++y)
+// tb(endX - 1, y) = -1;
+// }
+//
+// // build the alignment
+// x = dimX - 1;
+// y = dimY - 1;
+//
+// if (VERBOSE >= 6)
+// print_matrix(cerr, tb, fa->m_seq, fb->m_seq);
+//
+// // trace back the matrix
+// while (x >= 0 and y >= 0)
+// {
+// switch (tb(x, y))
+// {
+// case -1:
+// foreach (entry* e, a)
+// e->insert_gap(x + 1);
+// --y;
+// break;
+//
+// case 1:
+// foreach (entry* e, b)
+// e->insert_gap(y + 1);
+// --x;
+// break;
+//
+// case 0:
+// --x;
+// --y;
+// break;
+//
+// default:
+// assert(false);
+// break;
+// }
+// }
+//
+// // and finally insert start-gaps
+// while (x >= 0)
+// {
+// foreach (entry* e, b)
+// e->insert_gap(y + 1);
+// --x;
+// }
+//
+// while (y >= 0)
+// {
+// foreach (entry* e, a)
+// e->insert_gap(x + 1);
+// --y;
+// }
+//
+// c.reserve(a.size() + b.size());
+// copy(a.begin(), a.end(), back_inserter(c));
+// copy(b.begin(), b.end(), back_inserter(c));
+//
+// // copy over the pdb_nrs to the first line
+// if (not ignorePositions and not pa.empty())
+// {
+// assert(pa.size() == pb.size());
+// vector<int16>& pc = c.front()->m_positions;
+//
+// transform(
+// pa.begin(), pa.end(),
+// pb.begin(),
+// pc.begin(),
+// max_pdb_nr());
+// }
+//
+// if (VERBOSE >= 2)
+// report(c, cerr, "clustalw");
//}
//
//void createAlignment(joined_node* node, vector<entry*>& alignment,
-// const substitution_matrix_family& mat, float gop, float gep, float magic,
-// progress& pr)
+// const substitution_matrix_family& mat, float gop, float gep, float magic,
+// progress& pr)
//{
-// vector<entry*> a, b;
-//
-// if (MULTI_THREADED)
-// {
-// boost::thread_group t;
-//
-// if (dynamic_cast<leaf_node*>(node->left()) != NULL)
-// a.push_back(&static_cast<leaf_node*>(node->left())->m_entry);
-// else
-// t.create_thread(boost::bind(&createAlignment,
-// static_cast<joined_node*>(node->left()), boost::ref(a), boost::ref(mat), gop, gep, magic,
-// boost::ref(pr)));
-//
-// if (dynamic_cast<leaf_node*>(node->right()) != NULL)
-// b.push_back(&static_cast<leaf_node*>(node->right())->m_entry);
-// else
-// t.create_thread(boost::bind(&createAlignment,
-// static_cast<joined_node*>(node->right()), boost::ref(b), boost::ref(mat), gop, gep, magic,
-// boost::ref(pr)));
-//
-// t.join_all();
-// }
-// else
-// {
-// if (dynamic_cast<leaf_node*>(node->left()) != NULL)
-// a.push_back(&static_cast<leaf_node*>(node->left())->m_entry);
-// else
-// createAlignment(static_cast<joined_node*>(node->left()), a, mat, gop, gep, magic, pr);
-//
-// if (dynamic_cast<leaf_node*>(node->right()) != NULL)
-// b.push_back(&static_cast<leaf_node*>(node->right())->m_entry);
-// else
-// createAlignment(static_cast<joined_node*>(node->right()), b, mat, gop, gep, magic, pr);
-// }
-//
-// align(node, a, b, alignment, mat, gop, gep, magic, false);
-//
-// pr.step(node->cost());
+// vector<entry*> a, b;
+//
+// if (MULTI_THREADED)
+// {
+// boost::thread_group t;
+//
+// if (dynamic_cast<leaf_node*>(node->left()) != NULL)
+// a.push_back(&static_cast<leaf_node*>(node->left())->m_entry);
+// else
+// t.create_thread(boost::bind(&createAlignment,
+// static_cast<joined_node*>(node->left()), boost::ref(a), boost::ref(mat), gop, gep, magic,
+// boost::ref(pr)));
+//
+// if (dynamic_cast<leaf_node*>(node->right()) != NULL)
+// b.push_back(&static_cast<leaf_node*>(node->right())->m_entry);
+// else
+// t.create_thread(boost::bind(&createAlignment,
+// static_cast<joined_node*>(node->right()), boost::ref(b), boost::ref(mat), gop, gep, magic,
+// boost::ref(pr)));
+//
+// t.join_all();
+// }
+// else
+// {
+// if (dynamic_cast<leaf_node*>(node->left()) != NULL)
+// a.push_back(&static_cast<leaf_node*>(node->left())->m_entry);
+// else
+// createAlignment(static_cast<joined_node*>(node->left()), a, mat, gop, gep, magic, pr);
+//
+// if (dynamic_cast<leaf_node*>(node->right()) != NULL)
+// b.push_back(&static_cast<leaf_node*>(node->right())->m_entry);
+// else
+// createAlignment(static_cast<joined_node*>(node->right()), b, mat, gop, gep, magic, pr);
+// }
+//
+// align(node, a, b, alignment, mat, gop, gep, magic, false);
+//
+// pr.step(node->cost());
//}
//
//void shuffle(vector<entry*> alignment, substitution_matrix_family& mat, float gop, float gep, float magic)
//{
-// progress pr("reshuffling alignments", alignment.size());
-// for (uint32 i = 0; i < alignment.size(); ++i)
-// {
-// vector<entry*> a(alignment), b;
-//
-// alignment.clear();
-// b.push_back(a[i]);
-// a.erase(a.begin() + i);
-//
-// b.front()->remove_gaps();
-//
-// for (uint32 j = 0; j < a.front()->m_seq.length(); ++j)
-// {
-// bool empty = true;
-// for (uint32 k = 0; empty and k < a.size(); ++k)
-// empty = a[k]->m_seq[j] == kSignalGapCode;
-//
-// if (empty)
-// {
-// for (uint32 k = 0; empty and k < a.size(); ++k)
-// a[k]->remove_gap(j);
-// }
-// }
-//
-// joined_node node(new leaf_node(*a.front()), new leaf_node(*b.front()), 0.5, 0.5);
-// align(&node, a, b, alignment, mat, gop, gep, magic, true);
-//
-// pr.step(1);
-// }
+// progress pr("reshuffling alignments", alignment.size());
+// for (uint32 i = 0; i < alignment.size(); ++i)
+// {
+// vector<entry*> a(alignment), b;
+//
+// alignment.clear();
+// b.push_back(a[i]);
+// a.erase(a.begin() + i);
+//
+// b.front()->remove_gaps();
+//
+// for (uint32 j = 0; j < a.front()->m_seq.length(); ++j)
+// {
+// bool empty = true;
+// for (uint32 k = 0; empty and k < a.size(); ++k)
+// empty = a[k]->m_seq[j] == kSignalGapCode;
+//
+// if (empty)
+// {
+// for (uint32 k = 0; empty and k < a.size(); ++k)
+// a[k]->remove_gap(j);
+// }
+// }
+//
+// joined_node node(new leaf_node(*a.front()), new leaf_node(*b.front()), 0.5, 0.5);
+// align(&node, a, b, alignment, mat, gop, gep, magic, true);
+//
+// pr.step(1);
+// }
//}
//
//void align3d(const vector<string>& input,
-// uint32 iterations, substitution_matrix_family& mat, float gop, float gep, float magic,
-// vector<entry*>& alignment)
+// uint32 iterations, substitution_matrix_family& mat, float gop, float gep, float magic,
+// vector<entry*>& alignment)
//{
-// if (input.size() != 2)
-// throw mas_exception("Structure alignment is limited to two structures at the moment");
-//
-// char chainA = 0;
-// string fileNameA = input[0];
-// if (fileNameA.length() > 2 and fileNameA[fileNameA.length() - 2] == ',')
-// {
-// chainA = fileNameA[fileNameA.length() - 1];
-// fileNameA.erase(fileNameA.begin() + fileNameA.length() - 2, fileNameA.end());
-// }
-//
-// ifstream fileA(fileNameA.c_str(), ios_base::in | ios_base::binary);
-// if (not fileA.is_open())
-// throw mas_exception(boost::format("Could not open file '%1%'") % fileNameA);
-//
-// io::filtering_stream<io::input> inA;
-// if (ba::ends_with(fileNameA, ".bz2"))
-// inA.push(io::bzip2_decompressor());
-// else if (ba::ends_with(fileNameA, ".gz"))
-// inA.push(io::gzip_decompressor());
-//
-// inA.push(fileA);
-//
-// char chainB = 0;
-// string fileNameB = input[1];
-// if (fileNameB.length() > 2 and fileNameB[fileNameB.length() - 2] == ',')
-// {
-// chainB = fileNameB[fileNameB.length() - 1];
-// fileNameB.erase(fileNameB.begin() + fileNameB.length() - 2, fileNameB.end());
-// }
-//
-// ifstream fileB(fileNameB.c_str(), ios_base::in | ios_base::binary);
-// if (not fileB.is_open())
-// throw mas_exception(boost::format("Could not open file '%1%'") % fileNameB);
-//
-// io::filtering_stream<io::input> inB;
-// if (ba::ends_with(fileNameB, ".bz2"))
-// inB.push(io::bzip2_decompressor());
-// else if (ba::ends_with(fileNameB, ".gz"))
-// inB.push(io::gzip_decompressor());
-//
-// inB.push(fileB);
-//
-// align_structures(fileA, fileB, chainA, chainB, iterations, mat, gop, gep, magic, alignment);
+// if (input.size() != 2)
+// throw mas_exception("Structure alignment is limited to two structures at the moment");
+//
+// char chainA = 0;
+// string fileNameA = input[0];
+// if (fileNameA.length() > 2 and fileNameA[fileNameA.length() - 2] == ',')
+// {
+// chainA = fileNameA[fileNameA.length() - 1];
+// fileNameA.erase(fileNameA.begin() + fileNameA.length() - 2, fileNameA.end());
+// }
+//
+// ifstream fileA(fileNameA.c_str(), ios_base::in | ios_base::binary);
+// if (not fileA.is_open())
+// throw mas_exception(boost::format("Could not open file '%1%'") % fileNameA);
+//
+// io::filtering_stream<io::input> inA;
+// if (ba::ends_with(fileNameA, ".bz2"))
+// inA.push(io::bzip2_decompressor());
+// else if (ba::ends_with(fileNameA, ".gz"))
+// inA.push(io::gzip_decompressor());
+//
+// inA.push(fileA);
+//
+// char chainB = 0;
+// string fileNameB = input[1];
+// if (fileNameB.length() > 2 and fileNameB[fileNameB.length() - 2] == ',')
+// {
+// chainB = fileNameB[fileNameB.length() - 1];
+// fileNameB.erase(fileNameB.begin() + fileNameB.length() - 2, fileNameB.end());
+// }
+//
+// ifstream fileB(fileNameB.c_str(), ios_base::in | ios_base::binary);
+// if (not fileB.is_open())
+// throw mas_exception(boost::format("Could not open file '%1%'") % fileNameB);
+//
+// io::filtering_stream<io::input> inB;
+// if (ba::ends_with(fileNameB, ".bz2"))
+// inB.push(io::bzip2_decompressor());
+// else if (ba::ends_with(fileNameB, ".gz"))
+// inB.push(io::gzip_decompressor());
+//
+// inB.push(fileB);
+//
+// align_structures(fileA, fileB, chainA, chainB, iterations, mat, gop, gep, magic, alignment);
//}
//
//void align2d(const vector<string>& input, char chain, vector<entry>& data,
-// substitution_matrix_family& mat, float gop, float gep, float magic,
-// vector<entry*>& alignment)
+// substitution_matrix_family& mat, float gop, float gep, float magic,
+// vector<entry*>& alignment)
//{
-// fs::path path;
-//
-// joined_node* root;
-//
-// foreach (string infile, input)
-// {
-// char iChain = chain;
-// if (infile.length() > 2 and infile[infile.length() - 2] == ',')
-// {
-// iChain = infile[infile.length() - 1];
-// infile.erase(infile.length() - 2, string::npos);
-// }
-//
-// io::filtering_stream<io::input> in;
-// auto_ptr<ifstream> file;
-//
-// if (infile == "stdin")
-// in.push(cin);
-// else
-// {
-// file.reset(new ifstream(infile.c_str(), ios_base::in | ios_base::binary));
-// if (not file->is_open())
-// throw mas_exception(boost::format("Could not open file '%1%'") % infile);
-//
-// if (ba::ends_with(infile, ".bz2"))
-// {
-// in.push(io::bzip2_decompressor());
-// infile.erase(infile.length() - 4, string::npos);
-// }
-// else if (ba::ends_with(infile, ".gz"))
-// {
-// in.push(io::gzip_decompressor());
-// infile.erase(infile.length() - 3, string::npos);
-// }
-//
-// in.push(*file);
-// }
-//
-// fs::path path(infile);
-// if (path.extension() == ".hssp")
-// readAlignmentFromHsspFile(in, iChain, data);
-//// else if (path.extension() == ".mapping")
-//// readWhatifMappingFile(in, data);
-// else if (path.extension() == ".ids")
-// readFamilyIdsFile(in, path.branch_path(), data);
-// else if (path.extension() == ".pdb" or (ba::starts_with(path.leaf(), "pdb") and path.extension() == ".ent"))
-// {
-// readPDB(in, iChain, data);
-// data.back().m_positions.clear();
-// }
-// else
-// readFasta(in, data);
-// }
-//
-//// if (vm.count("ignore-pos-nr"))
-//// for_each(data.begin(), data.end(), boost::bind(&entry::dump_positions, _1));
-//
-// if (data.size() < 2)
-// throw mas_exception("insufficient number of sequences");
-//
-// if (data.size() == 2)
-// {
-// // no need to do difficult stuff, just align two sequences:
-// float dist = calculateDistance(data[0], data[1]);
-// root = new joined_node(new leaf_node(data[0]), new leaf_node(data[1]), dist / 2, dist / 2);
-// }
-// else
-// {
-// // create the leaf nodes
-// vector<base_node*> tree;
-// tree.reserve(data.size());
-// foreach (entry& e, data)
-// tree.push_back(new leaf_node(e));
-//
-// // calculate guide tree
-//// if (vm.count("guide-tree"))
-//// useGuideTree(vm["guide-tree"].as<string>(), tree);
-//// else
-//// {
-// // a distance matrix
-// symmetric_matrix<float> d(data.size());
-// calculateDistanceMatrix(d, data);
-// joinNeighbours(d, tree);
-//// }
-//
-// root = static_cast<joined_node*>(tree.front());
-// }
-//
-// if (VERBOSE)
-// cerr << *root << ';' << endl;
-//
-// progress pr("calculating alignments", root->cumulative_cost());
-// createAlignment(root, alignment, mat, gop, gep, magic, pr);
+// fs::path path;
+//
+// joined_node* root;
+//
+// foreach (string infile, input)
+// {
+// char iChain = chain;
+// if (infile.length() > 2 and infile[infile.length() - 2] == ',')
+// {
+// iChain = infile[infile.length() - 1];
+// infile.erase(infile.length() - 2, string::npos);
+// }
+//
+// io::filtering_stream<io::input> in;
+// auto_ptr<ifstream> file;
+//
+// if (infile == "stdin")
+// in.push(cin);
+// else
+// {
+// file.reset(new ifstream(infile.c_str(), ios_base::in | ios_base::binary));
+// if (not file->is_open())
+// throw mas_exception(boost::format("Could not open file '%1%'") % infile);
+//
+// if (ba::ends_with(infile, ".bz2"))
+// {
+// in.push(io::bzip2_decompressor());
+// infile.erase(infile.length() - 4, string::npos);
+// }
+// else if (ba::ends_with(infile, ".gz"))
+// {
+// in.push(io::gzip_decompressor());
+// infile.erase(infile.length() - 3, string::npos);
+// }
+//
+// in.push(*file);
+// }
+//
+// fs::path path(infile);
+// if (path.extension() == ".hssp")
+// readAlignmentFromHsspFile(in, iChain, data);
+//// else if (path.extension() == ".mapping")
+//// readWhatifMappingFile(in, data);
+// else if (path.extension() == ".ids")
+// readFamilyIdsFile(in, path.branch_path(), data);
+// else if (path.extension() == ".pdb" or (ba::starts_with(path.leaf(), "pdb") and path.extension() == ".ent"))
+// {
+// readPDB(in, iChain, data);
+// data.back().m_positions.clear();
+// }
+// else
+// readFasta(in, data);
+// }
+//
+//// if (vm.count("ignore-pos-nr"))
+//// for_each(data.begin(), data.end(), boost::bind(&entry::dump_positions, _1));
+//
+// if (data.size() < 2)
+// throw mas_exception("insufficient number of sequences");
+//
+// if (data.size() == 2)
+// {
+// // no need to do difficult stuff, just align two sequences:
+// float dist = calculateDistance(data[0], data[1]);
+// root = new joined_node(new leaf_node(data[0]), new leaf_node(data[1]), dist / 2, dist / 2);
+// }
+// else
+// {
+// // create the leaf nodes
+// vector<base_node*> tree;
+// tree.reserve(data.size());
+// foreach (entry& e, data)
+// tree.push_back(new leaf_node(e));
+//
+// // calculate guide tree
+//// if (vm.count("guide-tree"))
+//// useGuideTree(vm["guide-tree"].as<string>(), tree);
+//// else
+//// {
+// // a distance matrix
+// symmetric_matrix<float> d(data.size());
+// calculateDistanceMatrix(d, data);
+// joinNeighbours(d, tree);
+//// }
+//
+// root = static_cast<joined_node*>(tree.front());
+// }
+//
+// if (VERBOSE)
+// cerr << *root << ';' << endl;
+//
+// progress pr("calculating alignments", root->cumulative_cost());
+// createAlignment(root, alignment, mat, gop, gep, magic, pr);
//}
//
//int main(int argc, char* argv[])
//{
-// try
-// {
-// po::options_description desc("mas options");
-// desc.add_options()
-// ("help,h", "Display help message")
-// ("input,i", po::value<vector<string>>(),
-// "Input file(s) (use stdin for input from STDIN)")
-// ("outfile,o", po::value<string>(), "Output file, use 'stdout' to output to screen")
-// ("format,f", po::value<string>(), "Output format, can be clustalw (default) or fasta")
-// ("outtree", po::value<string>(), "Write guide tree")
-// ("debug,d", po::value<int>(), "Debug output level")
-// ("verbose,v", "Verbose output")
-// ("no-threads,T", "Avoid multi-threading (=debug option)")
-// ("guide-tree,g",po::value<string>(), "use existing guide tree")
-// ("matrix,m", po::value<string>(), "Substitution matrix, default is PAM")
-// ("gap-open", po::value<float>(), "Gap open penalty")
-// ("gap-extend", po::value<float>(), "Gap extend penalty")
-// ("magic", po::value<float>(), "Magical number")
-// ("chain,c", po::value<char>(), "Chain ID to select (from HSSP input)")
-// ("ignore-pos-nr", "Do not use position/PDB nr in scoring")
-// ("3d", "Do a structure alignment")
-// ("iterations,I",po::value<uint32>(), "Number of iterations in 3d alignment [default = 5]")
-// ("ss", "Read secondary structure files")
-// ("shuffle", "Refine alignment")
-// ;
-//
-// po::positional_options_description p;
-// p.add("input", -1);
-//
-// po::variables_map vm;
-// po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
-// po::notify(vm);
-//
-// VERBOSE = vm.count("verbose");
-// if (vm.count("debug"))
-// VERBOSE = vm["debug"].as<int>();
-//
-// if (vm.count("no-threads"))
-// MULTI_THREADED = 0;
-//
-// if (vm.count("help") or (vm.count("input") == 0 and vm.count("3d-a") == 0))
-// {
-// cerr << desc << endl;
-// exit(1);
-// }
-//
-// // matrix
-// string matrix;
-// float gop, gep, magic;
-//
-// if (vm.count("input")) // regular alignments
-// {
-// matrix = "BLOSUM";
-// gop = 10;
-// gep = 0.2;
-// magic = 0.1;
-// }
-// else // 3d alignments
-// {
-// matrix = "GONNET";
-// gop = 2;
-// gep = 0.02;
-// magic = 0.1;
-// }
-//
-// if (vm.count("matrix"))
-// matrix = vm["matrix"].as<string>();
-// substitution_matrix_family mat(matrix);
-//
-// if (vm.count("gap-open")) gop = vm["gap-open"].as<float>();
-// if (vm.count("gap-extend")) gep = vm["gap-extend"].as<float>();
-// if (vm.count("magic")) magic = vm["magic"].as<float>();
-//
-// char chain = 0;
-// if (vm.count("chain"))
-// chain = vm["chain"].as<char>();
-//
-// vector<entry> data;
-// vector<entry*> alignment;
-//
-// if (vm.count("3d"))
-// {
-// uint32 iterations = 5;
-// if (vm.count("iterations"))
-// iterations = vm["iterations"].as<uint32>();
-//
-// align3d(vm["input"].as<vector<string>>(), iterations, mat, gop, gep, magic, alignment);
-// }
-// else
-// {
-// align2d(vm["input"].as<vector<string>>(), chain, data, mat, gop, gep, magic, alignment);
-// }
-//
-// string outfile = "stdout";
-// if (vm.count("outfile") != 0)
-// outfile = vm["outfile"].as<string>();
-//
-//// if (vm.count("outtree"))
-//// {
-//// fs::path treepath = vm["outtree"].as<string>();
-//// treepath = treepath.parent_path() / (treepath.stem() + ".dnd");
-//// fs::ofstream file(treepath);
-//// if (not file.is_open())
-//// throw mas_exception(boost::format("Failed to write guide tree %1%") % treepath.string());
-//// file << *root << ';' << endl;
-//// }
-//
-// sort(alignment.begin(), alignment.end(),
-// boost::bind(&entry::nr, _1) < boost::bind(&entry::nr, _2));
-//
-// if (vm.count("shuffle"))
-// {
-// shuffle(alignment, mat, gop, gep, magic);
-// shuffle(alignment, mat, gop, gep, magic);
-// }
-//
-// string format = "clustalw";
-// if (vm.count("format"))
-// format = vm["format"].as<string>();
-//
-// if (vm.count("output"))
-// {
-// string output = vm["output"].as<string>();
-//
-// ofstream outfile(output.c_str(), ios_base::out|ios_base::trunc|ios_base::binary);
-// if (not outfile.is_open())
-// throw runtime_error("could not create output file");
-//
-// io::filtering_stream<io::output> out;
-// if (ba::ends_with(output, ".bz2"))
-// out.push(io::bzip2_compressor());
-// else if (ba::ends_with(output, ".gz"))
-// out.push(io::gzip_compressor());
-//
-// out.push(outfile);
-//
-// report(alignment, out, format);
-// }
-// else
-// report(alignment, cout, format);
-//
-//// delete root;
-// }
-// catch (const exception& e)
-// {
-// cerr << e.what() << endl;
-// exit(1);
-// }
-//
-// return 0;
+// try
+// {
+// po::options_description desc("mas options");
+// desc.add_options()
+// ("help,h", "Display help message")
+// ("input,i", po::value<vector<string>>(),
+// "Input file(s) (use stdin for input from STDIN)")
+// ("outfile,o", po::value<string>(), "Output file, use 'stdout' to output to screen")
+// ("format,f", po::value<string>(), "Output format, can be clustalw (default) or fasta")
+// ("outtree", po::value<string>(), "Write guide tree")
+// ("debug,d", po::value<int>(), "Debug output level")
+// ("verbose,v", "Verbose output")
+// ("no-threads,T", "Avoid multi-threading (=debug option)")
+// ("guide-tree,g",po::value<string>(), "use existing guide tree")
+// ("matrix,m", po::value<string>(), "Substitution matrix, default is PAM")
+// ("gap-open", po::value<float>(), "Gap open penalty")
+// ("gap-extend", po::value<float>(), "Gap extend penalty")
+// ("magic", po::value<float>(), "Magical number")
+// ("chain,c", po::value<char>(), "Chain ID to select (from HSSP input)")
+// ("ignore-pos-nr", "Do not use position/PDB nr in scoring")
+// ("3d", "Do a structure alignment")
+// ("iterations,I",po::value<uint32>(), "Number of iterations in 3d alignment [default = 5]")
+// ("ss", "Read secondary structure files")
+// ("shuffle", "Refine alignment")
+// ;
+//
+// po::positional_options_description p;
+// p.add("input", -1);
+//
+// po::variables_map vm;
+// po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
+// po::notify(vm);
+//
+// VERBOSE = vm.count("verbose");
+// if (vm.count("debug"))
+// VERBOSE = vm["debug"].as<int>();
+//
+// if (vm.count("no-threads"))
+// MULTI_THREADED = 0;
+//
+// if (vm.count("help") or (vm.count("input") == 0 and vm.count("3d-a") == 0))
+// {
+// cerr << desc << endl;
+// exit(1);
+// }
+//
+// // matrix
+// string matrix;
+// float gop, gep, magic;
+//
+// if (vm.count("input")) // regular alignments
+// {
+// matrix = "BLOSUM";
+// gop = 10;
+// gep = 0.2;
+// magic = 0.1;
+// }
+// else // 3d alignments
+// {
+// matrix = "GONNET";
+// gop = 2;
+// gep = 0.02;
+// magic = 0.1;
+// }
+//
+// if (vm.count("matrix"))
+// matrix = vm["matrix"].as<string>();
+// substitution_matrix_family mat(matrix);
+//
+// if (vm.count("gap-open")) gop = vm["gap-open"].as<float>();
+// if (vm.count("gap-extend")) gep = vm["gap-extend"].as<float>();
+// if (vm.count("magic")) magic = vm["magic"].as<float>();
+//
+// char chain = 0;
+// if (vm.count("chain"))
+// chain = vm["chain"].as<char>();
+//
+// vector<entry> data;
+// vector<entry*> alignment;
+//
+// if (vm.count("3d"))
+// {
+// uint32 iterations = 5;
+// if (vm.count("iterations"))
+// iterations = vm["iterations"].as<uint32>();
+//
+// align3d(vm["input"].as<vector<string>>(), iterations, mat, gop, gep, magic, alignment);
+// }
+// else
+// {
+// align2d(vm["input"].as<vector<string>>(), chain, data, mat, gop, gep, magic, alignment);
+// }
+//
+// string outfile = "stdout";
+// if (vm.count("outfile") != 0)
+// outfile = vm["outfile"].as<string>();
+//
+//// if (vm.count("outtree"))
+//// {
+//// fs::path treepath = vm["outtree"].as<string>();
+//// treepath = treepath.parent_path() / (treepath.stem() + ".dnd");
+//// fs::ofstream file(treepath);
+//// if (not file.is_open())
+//// throw mas_exception(boost::format("Failed to write guide tree %1%") % treepath.string());
+//// file << *root << ';' << endl;
+//// }
+//
+// sort(alignment.begin(), alignment.end(),
+// boost::bind(&entry::nr, _1) < boost::bind(&entry::nr, _2));
+//
+// if (vm.count("shuffle"))
+// {
+// shuffle(alignment, mat, gop, gep, magic);
+// shuffle(alignment, mat, gop, gep, magic);
+// }
+//
+// string format = "clustalw";
+// if (vm.count("format"))
+// format = vm["format"].as<string>();
+//
+// if (vm.count("output"))
+// {
+// string output = vm["output"].as<string>();
+//
+// ofstream outfile(output.c_str(), ios_base::out|ios_base::trunc|ios_base::binary);
+// if (not outfile.is_open())
+// throw runtime_error("could not create output file");
+//
+// io::filtering_stream<io::output> out;
+// if (ba::ends_with(output, ".bz2"))
+// out.push(io::bzip2_compressor());
+// else if (ba::ends_with(output, ".gz"))
+// out.push(io::gzip_compressor());
+//
+// out.push(outfile);
+//
+// report(alignment, out, format);
+// }
+// else
+// report(alignment, cout, format);
+//
+//// delete root;
+// }
+// catch (const exception& e)
+// {
+// cerr << e.what() << endl;
+// exit(1);
+// }
+//
+// return 0;
//}
//
diff --git a/src/mas.h b/src/mas.h
old mode 100644
new mode 100755
index 6920346..7d73356
--- a/src/mas.h
+++ b/src/mas.h
@@ -1,7 +1,11 @@
// Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
+// Copyright Coos Baakman, Jon Black, Wouter G. Touw & Gert Vriend, Radboud university medical center 2015.
// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef XSSP_MAS_H
+#define XSSP_MAS_H
#pragma once
@@ -27,14 +31,14 @@
#include <boost/version.hpp>
#include <boost/cstdint.hpp>
-typedef int8_t int8;
-typedef uint8_t uint8;
-typedef int16_t int16;
-typedef uint16_t uint16;
-typedef int32_t int32;
-typedef uint32_t uint32;
-typedef int64_t int64;
-typedef uint64_t uint64;
+typedef int8_t int8;
+typedef uint8_t uint8;
+typedef int16_t int16;
+typedef uint16_t uint16;
+typedef int32_t int32;
+typedef uint32_t uint32;
+typedef int64_t int64;
+typedef uint64_t uint64;
#ifndef nullptr
#define nullptr NULL
@@ -53,19 +57,21 @@ extern const uint8 kResidueNrTable[];
inline uint8 ResidueNr(char inAA)
{
- int result = 23;
+ int result = 23;
- inAA |= 040;
- if (inAA >= 'a' and inAA <= 'z')
- result = kResidueNrTable[inAA - 'a'];
+ inAA |= 040;
+ if (inAA >= 'a' and inAA <= 'z')
+ result = kResidueNrTable[inAA - 'a'];
- return result;
+ return result;
}
inline bool is_gap(char aa)
{
- return aa == ' ' or aa == '.' or aa == '-';
+ return aa == ' ' or aa == '.' or aa == '-';
}
sequence encode(const std::string& s);
std::string decode(const sequence& s);
+
+#endif
diff --git a/src/matrix.h b/src/matrix.h
old mode 100644
new mode 100755
index 52792fa..90baf48
--- a/src/matrix.h
+++ b/src/matrix.h
@@ -1,40 +1,44 @@
// Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
+// Copyright Coos Baakman, Jon Black, Wouter G. Touw & Gert Vriend, Radboud university medical center 2015.
// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
-//
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
// substitution matrix for multiple sequence alignments
+#ifndef XSSP_MATRIX_H
+#define XSSP_MATRIX_H
+
#pragma once
#include "mas.h"
-#include <vector>
-#include <string>
-#include <istream>
+#include <algorithm>
#include <cassert>
+#include <istream>
#include <stdexcept>
-#include <algorithm>
+#include <string>
+#include <vector>
// Some predefined matrices
// PAM250 is used by hssp-nt in aligning the sequences
-extern const int8 kMBlosum45[], kMBlosum50[], kMBlosum62[], kMBlosum80[], kMBlosum90[],
- kMPam250[], kMPam30[], kMPam70[];
+extern const int8 kMBlosum45[], kMBlosum50[], kMBlosum62[], kMBlosum80[],
+ kMBlosum90[], kMPam250[], kMPam30[], kMPam70[];
extern const float kMPam250ScalingFactor, kMPam250MisMatchAverage;
struct MMtrxStats
{
- double lambda, kappa, entropy, alpha, beta;
+ double lambda, kappa, entropy, alpha, beta;
};
struct MMatrixData
{
- const char* mName;
- int8 mGapOpen, mGapExtend;
- const int8* mMatrix;
- MMtrxStats mGappedStats, mUngappedStats;
+ const char* mName;
+ int8 mGapOpen, mGapExtend;
+ const int8* mMatrix;
+ MMtrxStats mGappedStats, mUngappedStats;
};
extern const MMatrixData kMMatrixData[];
@@ -47,14 +51,14 @@ extern const float kDayhoffData[];
template<typename T>
inline T score(const T inMatrix[], uint8 inAA1, uint8 inAA2)
{
- T result;
+ T result;
- if (inAA1 >= inAA2)
- result = inMatrix[(inAA1 * (inAA1 + 1)) / 2 + inAA2];
- else
- result = inMatrix[(inAA2 * (inAA2 + 1)) / 2 + inAA1];
+ if (inAA1 >= inAA2)
+ result = inMatrix[(inAA1 * (inAA1 + 1)) / 2 + inAA2];
+ else
+ result = inMatrix[(inAA2 * (inAA2 + 1)) / 2 + inAA1];
- return result;
+ return result;
}
// --------------------------------------------------------------------
@@ -67,158 +71,162 @@ class matrix_base
{
public:
- typedef T value_type;
+ typedef T value_type;
+
+ virtual ~matrix_base() {}
- virtual ~matrix_base() {}
+ virtual uint32 dim_m() const = 0;
+ virtual uint32 dim_n() const = 0;
- virtual uint32 dim_m() const = 0;
- virtual uint32 dim_n() const = 0;
+ virtual value_type& operator()(uint32 i, uint32 j)
+ {
+ throw std::runtime_error("unimplemented method");
+ }
- virtual value_type& operator()(uint32 i, uint32 j) { throw std::runtime_error("unimplemented method"); }
- virtual value_type operator()(uint32 i, uint32 j) const = 0;
-
- matrix_base& operator*=(const value_type& rhs);
+ virtual value_type operator()(uint32 i, uint32 j) const = 0;
- matrix_base& operator-=(const value_type& rhs);
+ matrix_base& operator*=(const value_type& rhs);
+
+ matrix_base& operator-=(const value_type& rhs);
};
template<typename T>
matrix_base<T>& matrix_base<T>::operator*=(const T& rhs)
{
- for (uint32 i = 0; i < dim_m(); ++i)
- {
- for (uint32 j = 0; j < dim_n(); ++j)
- {
- operator()(i, j) *= rhs;
- }
- }
-
- return *this;
+ for (uint32 i = 0; i < dim_m(); ++i)
+ {
+ for (uint32 j = 0; j < dim_n(); ++j)
+ {
+ operator()(i, j) *= rhs;
+ }
+ }
+
+ return *this;
}
template<typename T>
matrix_base<T>& matrix_base<T>::operator-=(const T& rhs)
{
- for (uint32 i = 0; i < dim_m(); ++i)
- {
- for (uint32 j = 0; j < dim_n(); ++j)
- {
- operator()(i, j) -= rhs;
- }
- }
-
- return *this;
+ for (uint32 i = 0; i < dim_m(); ++i)
+ {
+ for (uint32 j = 0; j < dim_n(); ++j)
+ {
+ operator()(i, j) -= rhs;
+ }
+ }
+
+ return *this;
}
template<typename T>
std::ostream& operator<<(std::ostream& lhs, const matrix_base<T>& rhs)
{
- lhs << '[' << rhs.dim_m() << ',' << rhs.dim_n() << ']' << '(';
- for (uint32 i = 0; i < rhs.dim_m(); ++i)
- {
- lhs << '(';
- for (uint32 j = 0; j < rhs.dim_n(); ++j)
- {
- if (j > 0)
- lhs << ',';
- lhs << rhs(i,j);
- }
- lhs << ')';
- }
- lhs << ')';
-
- return lhs;
+ lhs << '[' << rhs.dim_m() << ',' << rhs.dim_n() << ']' << '(';
+ for (uint32 i = 0; i < rhs.dim_m(); ++i)
+ {
+ lhs << '(';
+ for (uint32 j = 0; j < rhs.dim_n(); ++j)
+ {
+ if (j > 0)
+ lhs << ',';
+ lhs << rhs(i,j);
+ }
+ lhs << ')';
+ }
+ lhs << ')';
+
+ return lhs;
}
template<typename T>
class matrix : public matrix_base<T>
{
public:
- typedef T value_type;
-
- template<typename T2>
- matrix(const matrix_base<T2>& m)
- : m_m(m.dim_m())
- , m_n(m.dim_n())
- {
- m_data = new value_type[m_m * m_n];
- for (uint32 i = 0; i < m_m; ++i)
- {
- for (uint32 j = 0; j < m_n; ++j)
- operator()(i, j) = m(i, j);
- }
- }
-
- matrix() : m_data(nullptr), m_m(0), m_n(0) {}
-
- matrix(const matrix& m)
- : m_m(m.m_m)
- , m_n(m.m_n)
- {
- m_data = new value_type[m_m * m_n];
- std::copy(m.m_data, m.m_data + (m_m * m_n), m_data);
- }
-
- matrix& operator=(const matrix& m)
- {
- value_type* t = new value_type[m.m_m * m.m_n];
- std::copy(m.m_data, m.m_data + (m.m_m * m.m_n), t);
-
- delete[] m_data;
- m_data = t;
- m_m = m.m_m;
- m_n = m.m_n;
-
- return *this;
- }
-
- matrix(uint32 m, uint32 n, T v = T())
- : m_m(m)
- , m_n(n)
- {
- m_data = new value_type[m_m * m_n];
- std::fill(m_data, m_data + (m_m * m_n), v);
- }
-
- virtual ~matrix()
- {
- delete [] m_data;
- }
-
- virtual uint32 dim_m() const { return m_m; }
- virtual uint32 dim_n() const { return m_n; }
-
- virtual value_type operator()(uint32 i, uint32 j) const
- {
- assert(i < m_m); assert(j < m_n);
- return m_data[i * m_n + j];
- }
-
- virtual value_type& operator()(uint32 i, uint32 j)
- {
- assert(i < m_m); assert(j < m_n);
- return m_data[i * m_n + j];
- }
-
- template<typename Func>
- void each(Func f)
- {
- for (uint32 i = 0; i < m_m * m_n; ++i)
- f(m_data[i]);
- }
-
- template<typename U>
- matrix& operator/=(U v)
- {
- for (uint32 i = 0; i < m_m * m_n; ++i)
- m_data[i] /= v;
-
- return *this;
- }
+ typedef T value_type;
+
+ template<typename T2>
+ matrix(const matrix_base<T2>& m)
+ : m_m(m.dim_m())
+ , m_n(m.dim_n())
+ {
+ m_data = new value_type[m_m * m_n];
+ for (uint32 i = 0; i < m_m; ++i)
+ {
+ for (uint32 j = 0; j < m_n; ++j)
+ operator()(i, j) = m(i, j);
+ }
+ }
+
+ matrix() : m_data(nullptr), m_m(0), m_n(0) {}
+
+ matrix(const matrix& m)
+ : m_m(m.m_m)
+ , m_n(m.m_n)
+ {
+ m_data = new value_type[m_m * m_n];
+ std::copy(m.m_data, m.m_data + (m_m * m_n), m_data);
+ }
+
+ matrix& operator=(const matrix& m)
+ {
+ value_type* t = new value_type[m.m_m * m.m_n];
+ std::copy(m.m_data, m.m_data + (m.m_m * m.m_n), t);
+
+ delete[] m_data;
+ m_data = t;
+ m_m = m.m_m;
+ m_n = m.m_n;
+
+ return *this;
+ }
+
+ matrix(uint32 m, uint32 n, T v = T())
+ : m_m(m)
+ , m_n(n)
+ {
+ m_data = new value_type[m_m * m_n];
+ std::fill(m_data, m_data + (m_m * m_n), v);
+ }
+
+ virtual ~matrix()
+ {
+ delete [] m_data;
+ }
+
+ virtual uint32 dim_m() const { return m_m; }
+ virtual uint32 dim_n() const { return m_n; }
+
+ virtual value_type operator()(uint32 i, uint32 j) const
+ {
+ assert(i < m_m); assert(j < m_n);
+ return m_data[i * m_n + j];
+ }
+
+ virtual value_type& operator()(uint32 i, uint32 j)
+ {
+ assert(i < m_m); assert(j < m_n);
+ return m_data[i * m_n + j];
+ }
+
+ template<typename Func>
+ void each(Func f)
+ {
+ for (uint32 i = 0; i < m_m * m_n; ++i)
+ f(m_data[i]);
+ }
+
+ template<typename U>
+ matrix& operator/=(U v)
+ {
+ for (uint32 i = 0; i < m_m * m_n; ++i)
+ m_data[i] /= v;
+
+ return *this;
+ }
private:
- value_type* m_data;
- uint32 m_m, m_n;
+ value_type* m_data;
+ uint32 m_m, m_n;
};
// --------------------------------------------------------------------
@@ -227,135 +235,135 @@ template<typename T>
class symmetric_matrix : public matrix_base<T>
{
public:
- typedef typename matrix_base<T>::value_type value_type;
-
- symmetric_matrix(uint32 n, T v = T())
- : m_owner(true)
- , m_n(n)
- {
- uint32 N = (m_n * (m_n + 1)) / 2;
- m_data = new value_type[N];
- std::fill(m_data, m_data + N, v);
- }
-
- symmetric_matrix(const T* data, uint32 n)
- : m_owner(false)
- , m_data(const_cast<T*>(data))
- , m_n(n)
- {
- }
-
-
- virtual ~symmetric_matrix()
- {
- if (m_owner)
- delete[] m_data;
- }
-
- virtual uint32 dim_m() const { return m_n; }
- virtual uint32 dim_n() const { return m_n; }
-
- T operator()(uint32 i, uint32 j) const;
- virtual T& operator()(uint32 i, uint32 j);
-
- // erase two rows, add one at the end (for neighbour joining)
- void erase_2(uint32 i, uint32 j);
-
- template<typename Func>
- void each(Func f)
- {
- uint32 N = (m_n * (m_n + 1)) / 2;
-
- for (uint32 i = 0; i < N; ++i)
- f(m_data[i]);
- }
-
- template<typename U>
- symmetric_matrix& operator/=(U v)
- {
- uint32 N = (m_n * (m_n + 1)) / 2;
-
- for (uint32 i = 0; i < N; ++i)
- m_data[i] /= v;
-
- return *this;
- }
+ typedef typename matrix_base<T>::value_type value_type;
+
+ symmetric_matrix(uint32 n, T v = T())
+ : m_owner(true)
+ , m_n(n)
+ {
+ uint32 N = (m_n * (m_n + 1)) / 2;
+ m_data = new value_type[N];
+ std::fill(m_data, m_data + N, v);
+ }
+
+ symmetric_matrix(const T* data, uint32 n)
+ : m_owner(false)
+ , m_data(const_cast<T*>(data))
+ , m_n(n)
+ {
+ }
+
+
+ virtual ~symmetric_matrix()
+ {
+ if (m_owner)
+ delete[] m_data;
+ }
+
+ virtual uint32 dim_m() const { return m_n; }
+ virtual uint32 dim_n() const { return m_n; }
+
+ T operator()(uint32 i, uint32 j) const;
+ virtual T& operator()(uint32 i, uint32 j);
+
+ // erase two rows, add one at the end (for neighbour joining)
+ void erase_2(uint32 i, uint32 j);
+
+ template<typename Func>
+ void each(Func f)
+ {
+ uint32 N = (m_n * (m_n + 1)) / 2;
+
+ for (uint32 i = 0; i < N; ++i)
+ f(m_data[i]);
+ }
+
+ template<typename U>
+ symmetric_matrix& operator/=(U v)
+ {
+ uint32 N = (m_n * (m_n + 1)) / 2;
+
+ for (uint32 i = 0; i < N; ++i)
+ m_data[i] /= v;
+
+ return *this;
+ }
private:
- bool m_owner;
- value_type* m_data;
- uint32 m_n;
+ bool m_owner;
+ value_type* m_data;
+ uint32 m_n;
};
template<typename T>
inline
T symmetric_matrix<T>::operator()(uint32 i, uint32 j) const
{
- return i < j
- ? m_data[(j * (j + 1)) / 2 + i]
- : m_data[(i * (i + 1)) / 2 + j];
-// if (i > j)
-// std::swap(i, j);
-// assert(j < m_n);
-// return m_data[(j * (j + 1)) / 2 + i];
+ return i < j
+ ? m_data[(j * (j + 1)) / 2 + i]
+ : m_data[(i * (i + 1)) / 2 + j];
+// if (i > j)
+// std::swap(i, j);
+// assert(j < m_n);
+// return m_data[(j * (j + 1)) / 2 + i];
}
template<typename T>
inline
T& symmetric_matrix<T>::operator()(uint32 i, uint32 j)
{
- if (i > j)
- std::swap(i, j);
- assert(j < m_n);
- return m_data[(j * (j + 1)) / 2 + i];
+ if (i > j)
+ std::swap(i, j);
+ assert(j < m_n);
+ return m_data[(j * (j + 1)) / 2 + i];
}
template<typename T>
void symmetric_matrix<T>::erase_2(uint32 di, uint32 dj)
{
- uint32 s = 0, d = 0;
- for (uint32 i = 0; i < m_n; ++i)
- {
- for (uint32 j = 0; j < i; ++j)
- {
- if (i != di and j != dj and i != dj and j != di)
- {
- if (s != d)
- m_data[d] = m_data[s];
- ++d;
- }
-
- ++s;
- }
- }
-
- --m_n;
+ uint32 s = 0, d = 0;
+ for (uint32 i = 0; i < m_n; ++i)
+ {
+ for (uint32 j = 0; j < i; ++j)
+ {
+ if (i != di and j != dj and i != dj and j != di)
+ {
+ if (s != d)
+ m_data[d] = m_data[s];
+ ++d;
+ }
+
+ ++s;
+ }
+ }
+
+ --m_n;
}
template<typename T>
class identity_matrix : public matrix_base<T>
{
public:
- typedef typename matrix_base<T>::value_type value_type;
+ typedef typename matrix_base<T>::value_type value_type;
- identity_matrix(uint32 n)
- : m_n(n)
- {
- }
+ identity_matrix(uint32 n)
+ : m_n(n)
+ {
+ }
- virtual uint32 dim_m() const { return m_n; }
- virtual uint32 dim_n() const { return m_n; }
+ virtual uint32 dim_m() const { return m_n; }
+ virtual uint32 dim_n() const { return m_n; }
- virtual value_type operator()(uint32 i, uint32 j) const
- {
- value_type result = 0;
- if (i == j)
- result = 1;
- return result;
- }
+ virtual value_type operator()(uint32 i, uint32 j) const
+ {
+ value_type result = 0;
+ if (i == j)
+ result = 1;
+ return result;
+ }
private:
- uint32 m_n;
+ uint32 m_n;
};
// --------------------------------------------------------------------
@@ -364,51 +372,55 @@ class identity_matrix : public matrix_base<T>
template<typename T>
matrix<T> operator*(const matrix_base<T>& lhs, const matrix_base<T>& rhs)
{
- matrix<T> result(std::min(lhs.dim_m(), rhs.dim_m()), std::min(lhs.dim_n(), rhs.dim_n()));
-
- for (uint32 i = 0; i < result.dim_m(); ++i)
- {
- for (uint32 j = 0; j < result.dim_n(); ++j)
- {
- for (uint32 li = 0, rj = 0; li < lhs.dim_m() and rj < rhs.dim_n(); ++li, ++rj)
- result(i, j) += lhs(li, j) * rhs(i, rj);
- }
- }
-
- return result;
+ matrix<T> result(std::min(lhs.dim_m(), rhs.dim_m()), std::min(lhs.dim_n(),
+ rhs.dim_n()));
+
+ for (uint32 i = 0; i < result.dim_m(); ++i)
+ {
+ for (uint32 j = 0; j < result.dim_n(); ++j)
+ {
+ for (uint32 li = 0, rj = 0;
+ li < lhs.dim_m() and rj < rhs.dim_n();
+ ++li, ++rj)
+ result(i, j) += lhs(li, j) * rhs(i, rj);
+ }
+ }
+
+ return result;
}
template<typename T>
matrix<T> operator*(const matrix_base<T>& lhs, T rhs)
{
- matrix<T> result(lhs);
- result *= rhs;
+ matrix<T> result(lhs);
+ result *= rhs;
- return result;
+ return result;
}
template<typename T>
matrix<T> operator-(const matrix_base<T>& lhs, const matrix_base<T>& rhs)
{
- matrix<T> result(std::min(lhs.dim_m(), rhs.dim_m()), std::min(lhs.dim_n(), rhs.dim_n()));
-
- for (uint32 i = 0; i < result.dim_m(); ++i)
- {
- for (uint32 j = 0; j < result.dim_n(); ++j)
- {
- result(i, j) = lhs(i, j) - rhs(i, j);
- }
- }
-
- return result;
+ matrix<T> result(std::min(lhs.dim_m(), rhs.dim_m()), std::min(lhs.dim_n(),
+ rhs.dim_n()));
+
+ for (uint32 i = 0; i < result.dim_m(); ++i)
+ {
+ for (uint32 j = 0; j < result.dim_n(); ++j)
+ {
+ result(i, j) = lhs(i, j) - rhs(i, j);
+ }
+ }
+
+ return result;
}
template<typename T>
matrix<T> operator-(const matrix_base<T>& lhs, T rhs)
{
- matrix<T> result(lhs.dim_m(), lhs.dim_n());
- result -= rhs;
- return result;
+ matrix<T> result(lhs.dim_m(), lhs.dim_n());
+ result -= rhs;
+ return result;
}
template<typename T>
@@ -417,4 +429,14 @@ symmetric_matrix<T> hamming_distance(const matrix_base<T>& lhs, T rhs);
template<typename T>
std::vector<T> sum(const matrix_base<T>& m);
-#include "matrix.inl"
+template<typename T>
+symmetric_matrix<T> hamming_distance(const matrix_base<T>& lhs, T rhs)
+{
+}
+
+template<typename T>
+std::vector<T> sum(const matrix_base<T>& m)
+{
+}
+
+#endif
diff --git a/src/matrix.inl b/src/matrix.inl
deleted file mode 100644
index 71b44b9..0000000
--- a/src/matrix.inl
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
-// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
-
-template<typename T>
-symmetric_matrix<T> hamming_distance(const matrix_base<T>& lhs, T rhs)
-{
-
-}
-
-template<typename T>
-std::vector<T> sum(const matrix_base<T>& m)
-{
-
-}
diff --git a/src/mkdssp.cpp b/src/mkdssp.cpp
old mode 100644
new mode 100755
index f7596e7..11d2c2a
--- a/src/mkdssp.cpp
+++ b/src/mkdssp.cpp
@@ -1,161 +1,186 @@
// Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
+// Copyright Coos Baakman, Jon Black, Wouter G. Touw & Gert Vriend, Radboud university medical center 2015.
// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
//
// A DSSP reimplementation
-#include "mas.h"
-
-#if defined(_MSC_VER)
-#include <conio.h>
-#include <ctype.h>
+#ifdef HAVE_CONFIG_H
+#include "config.h"
#endif
-#include <fstream>
+#include "dssp.h"
+#include "iocif.h"
+#include "mas.h"
+#include "structure.h"
+#include "version.h"
#include <boost/program_options.hpp>
-#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/copy.hpp>
-#if defined USE_COMPRESSION
+#include <boost/iostreams/filtering_stream.hpp>
+
+#ifdef HAVE_LIBBZ2
#include <boost/iostreams/filter/bzip2.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#endif
+
#include <boost/algorithm/string.hpp>
-#include "dssp.h"
-#include "structure.h"
-#include "iocif.h"
+#if defined(_MSC_VER)
+#include <conio.h>
+#include <ctype.h>
+#endif
+#include <fstream>
+
-using namespace std;
namespace po = boost::program_options;
namespace io = boost::iostreams;
namespace ba = boost::algorithm;
-int VERBOSE = 0;
+//int VERBOSE = 0;
int main(int argc, char* argv[])
{
- try
- {
- po::options_description desc("mkdssp " VERSION " options");
- desc.add_options()
- ("help,h", "Display help message")
- ("input,i", po::value<string>(), "Input file")
- ("output,o", po::value<string>(), "Output file, use 'stdout' to output to screen")
- ("verbose,v", "Verbose output")
- ("version", "Print version")
- ("debug,d", po::value<int>(), "Debug level (for even more verbose output)")
- ;
-
- po::positional_options_description p;
- p.add("input", 1);
- p.add("output", 2);
-
- po::variables_map vm;
- po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
- po::notify(vm);
-
- if (vm.count("version"))
- {
- cout << "mkdssp version " VERSION << endl;
- exit(0);
- }
-
- if (vm.count("help") or not vm.count("input"))
- {
- cerr << desc << endl
- << endl
- << "Examples: " << endl
- << endl
- << "To calculate the secondary structure for the file 1crn.pdb and" << endl
- << "write the result to a file called 1crn.dssp, you type:" << endl
- << endl
- << " " << argv[0] << " -i 1crn.pdb -o 1crn.dssp" << endl
- << endl;
+ try
+ {
+ po::options_description desc("mkdssp " XSSP_VERSION " options");
+ desc.add_options()
+ ("help,h", "Display help message")
+ ("input,i", po::value<std::string>(), "Input PDB file (.pdb) or mmCIF file (.cif/.mcif), optionally compressed by gzip (.gz) or bzip2 (.bz2)")
+ ("output,o", po::value<std::string>(), "Output file, optionally compressed by gzip (.gz) or bzip2 (.bz2). Use 'stdout' to output to screen")
+ ("verbose,v", "Verbose output")
+ ("version", "Print version and citation info")
+ ("debug,d", po::value<int>(), "Debug level (for even more verbose output)");
+
+ po::positional_options_description p;
+ p.add("input", 1);
+ p.add("output", 2);
+
+ po::variables_map vm;
+ po::store(
+ po::command_line_parser(argc, argv).options(desc).positional(p).run(),
+ vm);
+ po::notify(vm);
+
+ if (vm.count("version")>0)
+ {
+ std::cout << "mkdssp version " XSSP_VERSION << std::endl
+ << std::endl
+ << "If you use DSSP, please cite: " << std::endl
+ << "Touw WG, Baakman C, Black J, te Beek TA, Krieger E, Joosten RP & Vriend G." << std::endl
+ << "A series of PDB-related databanks for everyday needs." << std::endl
+ << "Nucleic Acids Res. (2015) 43, D364-D368. doi: 10.1093/nar/gku1028." << std::endl
+ << std::endl
+ << "The original DSSP reference is: " << std::endl
+ << "Kabsch W & Sander C." << std::endl
+ << "Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features." << std::endl
+ << "Biopolymers (1983) 22, 2577-2637. doi: 10.1002/bip.360221211." << std::endl;
+ exit(0);
+ }
+
+ if (vm.count("help") or not vm.count("input"))
+ {
+ std::cerr << desc << std::endl
+ << std::endl
+ << "Examples: " << std::endl
+ << std::endl
+ << "To calculate the secondary structure for the file 1crn.pdb and"
+ << std::endl
+ << "write the result to a file called 1crn.dssp, you type:"
+ << std::endl
+ << std::endl
+ << " " << argv[0] << " -i 1crn.pdb -o 1crn.dssp"
+ << std::endl
+ << std::endl;
#if defined(_MSC_VER)
- cerr << endl
- << "MKDSSP is a command line application, use the 'Command prompt' application" << endl
- << "to start " << argv[0] << " You can find the 'Command prompt' in the Start menu:" << endl
- << endl
- << "Start => Accessories => Command prompt" << endl
- << endl
- << endl
- << "Press any key to continue..." << endl;
- char ch = _getch();
+ std::cerr << std::endl
+ << "MKDSSP is a command line application, use the 'Command prompt' "
+ << "application" << std::endl
+ << "to start " << argv[0] << " You can find the 'Command prompt' in "
+ << "the Start menu:" << std::endl
+ << std::endl
+ << "Start => Accessories => Command prompt" << std::endl
+ << std::endl
+ << std::endl
+ << "Press any key to continue..." << std::endl;
+ char ch = _getch();
#endif
- exit(1);
- }
-
- VERBOSE = vm.count("verbose") != 0;
- if (vm.count("debug"))
- VERBOSE = vm["debug"].as<int>();
-
- string input = vm["input"].as<string>();
-
- ifstream infile(input.c_str(), ios_base::in | ios_base::binary);
- if (not infile.is_open())
- throw runtime_error("No such file");
-
- io::filtering_stream<io::input> in;
-
-#if defined USE_COMPRESSION
- if (ba::ends_with(input, ".bz2"))
- {
- in.push(io::bzip2_decompressor());
- input.erase(input.length() - 4);
- }
- else if (ba::ends_with(input, ".gz"))
- {
- in.push(io::gzip_decompressor());
- input.erase(input.length() - 3);
- }
+ exit(1);
+ }
+
+ VERBOSE = vm.count("verbose") != 0;
+ if (vm.count("debug"))
+ VERBOSE = vm["debug"].as<int>();
+
+ std::string input = vm["input"].as<std::string>();
+
+ std::ifstream infile(input.c_str(),
+ std::ios_base::in | std::ios_base::binary);
+ if (not infile.is_open())
+ throw std::runtime_error("No such file");
+
+ io::filtering_stream<io::input> in;
+
+#ifdef HAVE_LIBBZ2
+ if (ba::ends_with(input, ".bz2"))
+ {
+ in.push(io::bzip2_decompressor());
+ input.erase(input.length() - 4);
+ }
+ else if (ba::ends_with(input, ".gz"))
+ {
+ in.push(io::gzip_decompressor());
+ input.erase(input.length() - 3);
+ }
#endif
-
- in.push(infile);
-
- // OK, we've got the file, now create a protein
- MProtein a;
-
- if (ba::ends_with(input, ".cif"))
- a.ReadmmCIF(in);
- else
- a.ReadPDB(in);
-
- // then calculate the secondary structure
- a.CalculateSecondaryStructure();
-
- // and finally report the secondary structure in the DSSP format
- // either to cout or an (optionally compressed) file.
- if (vm.count("output"))
- {
- string output = vm["output"].as<string>();
-
- ofstream outfile(output.c_str(), ios_base::out|ios_base::trunc|ios_base::binary);
- if (not outfile.is_open())
- throw runtime_error("could not create output file");
-
- io::filtering_stream<io::output> out;
-#if defined USE_COMPRESSION
- if (ba::ends_with(output, ".bz2"))
- out.push(io::bzip2_compressor());
- else if (ba::ends_with(output, ".gz"))
- out.push(io::gzip_compressor());
+
+ in.push(infile);
+
+ // OK, we've got the file, now create a protein
+ MProtein a;
+
+ if (ba::ends_with(input, ".cif") or ba::ends_with(input, ".mcif"))
+ a.ReadmmCIF(in);
+ else
+ a.ReadPDB(in);
+
+ // then calculate the secondary structure
+ a.CalculateSecondaryStructure();
+
+ // and finally report the secondary structure in the DSSP format
+ // either to cout or an (optionally compressed) file.
+ if (vm.count("output"))
+ {
+ std::string output = vm["output"].as<std::string>();
+
+ std::ofstream outfile(
+ output.c_str(),
+ std::ios_base::out|std::ios_base::trunc|std::ios_base::binary);
+ if (not outfile.is_open())
+ throw std::runtime_error("could not create output file");
+
+ io::filtering_stream<io::output> out;
+#ifdef HAVE_LIBBZ2
+ if (ba::ends_with(output, ".bz2"))
+ out.push(io::bzip2_compressor());
+ else if (ba::ends_with(output, ".gz"))
+ out.push(io::gzip_compressor());
#endif
- out.push(outfile);
-
- WriteDSSP(a, out);
- }
- else
- WriteDSSP(a, cout);
- }
- catch (const exception& e)
- {
- cerr << "DSSP could not be created due to an error:" << endl
- << e.what() << endl;
- exit(1);
- }
-
- return 0;
+ out.push(outfile);
+
+ WriteDSSP(a, out);
+ }
+ else
+ WriteDSSP(a, std::cout);
+ }
+ catch (const std::exception& e)
+ {
+ std::cerr << "DSSP could not be created due to an error:" << std::endl
+ << e.what() << std::endl;
+ exit(1);
+ }
+
+ return 0;
}
diff --git a/src/primitives-3d.cpp b/src/primitives-3d.cpp
old mode 100644
new mode 100755
index b04e7c4..3cfaa77
--- a/src/primitives-3d.cpp
+++ b/src/primitives-3d.cpp
@@ -1,231 +1,230 @@
// Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
+// Copyright Coos Baakman, Jon Black, Wouter G. Touw & Gert Vriend, Radboud university medical center 2015.
// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
-//
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
// 3d routines
-#include "mas.h"
+#include "primitives-3d.h"
-#include <valarray>
-#include <cmath>
+#include "mas.h"
+#include "matrix.h"
#include <boost/foreach.hpp>
-#define foreach BOOST_FOREACH
-#include "primitives-3d.h"
-#include "matrix.h"
+#include <cmath>
+#include <valarray>
-using namespace std;
+#define foreach BOOST_FOREACH
-const double
- kPI = 4 * std::atan(1.0);
+const double kPI = 4 * std::atan(1.0);
// --------------------------------------------------------------------
MQuaternion Normalize(MQuaternion q)
{
- valarray<double> t(4);
-
- t[0] = q.R_component_1();
- t[1] = q.R_component_2();
- t[2] = q.R_component_3();
- t[3] = q.R_component_4();
-
- t *= t;
-
- double length = sqrt(t.sum());
-
- if (length > 0.001)
- q /= length;
- else
- q = MQuaternion(1, 0, 0, 0);
-
- return q;
+ std::valarray<double> t(4);
+
+ t[0] = q.R_component_1();
+ t[1] = q.R_component_2();
+ t[2] = q.R_component_3();
+ t[3] = q.R_component_4();
+
+ t *= t;
+
+ double length = sqrt(t.sum());
+
+ if (length > 0.001)
+ q /= length;
+ else
+ q = MQuaternion(1, 0, 0, 0);
+
+ return q;
}
// --------------------------------------------------------------------
double MPoint::Normalize()
{
- double length = mX * mX + mY * mY + mZ * mZ;
- if (length > 0)
- {
- length = sqrt(length);
- mX /= length;
- mY /= length;
- mZ /= length;
- }
- return length;
+ double length = mX * mX + mY * mY + mZ * mZ;
+ if (length > 0)
+ {
+ length = sqrt(length);
+ mX /= length;
+ mY /= length;
+ mZ /= length;
+ }
+ return length;
}
MPoint operator+(const MPoint& lhs, const MPoint& rhs)
{
- return MPoint(lhs.mX + rhs.mX, lhs.mY + rhs.mY, lhs.mZ + rhs.mZ);
+ return MPoint(lhs.mX + rhs.mX, lhs.mY + rhs.mY, lhs.mZ + rhs.mZ);
}
MPoint operator-(const MPoint& lhs, const MPoint& rhs)
{
- return MPoint(lhs.mX - rhs.mX, lhs.mY - rhs.mY, lhs.mZ - rhs.mZ);
+ return MPoint(lhs.mX - rhs.mX, lhs.mY - rhs.mY, lhs.mZ - rhs.mZ);
}
MPoint operator-(const MPoint& pt)
{
- return MPoint(-pt.mX, -pt.mY, -pt.mZ);
+ return MPoint(-pt.mX, -pt.mY, -pt.mZ);
}
MPoint operator*(const MPoint& pt, double f)
{
- MPoint result(pt);
- result *= f;
- return result;
+ MPoint result(pt);
+ result *= f;
+ return result;
}
MPoint operator/(const MPoint& pt, double f)
{
- MPoint result(pt);
- result /= f;
- return result;
+ MPoint result(pt);
+ result /= f;
+ return result;
}
-ostream& operator<<(ostream& os, const MPoint& pt)
+std::ostream& operator<<(std::ostream& os, const MPoint& pt)
{
- os << '(' << pt.mX << ',' << pt.mY << ',' << pt.mZ << ')';
- return os;
+ os << '(' << pt.mX << ',' << pt.mY << ',' << pt.mZ << ')';
+ return os;
}
-ostream& operator<<(ostream& os, const vector<MPoint>& pts)
+std::ostream& operator<<(std::ostream& os, const std::vector<MPoint>& pts)
{
- uint32 n = pts.size();
- os << '[' << n << ']';
-
- foreach (const MPoint& pt, pts)
- {
- os << pt;
- if (n-- > 1)
- os << ',';
- }
-
- return os;
+ uint32 n = pts.size();
+ os << '[' << n << ']';
+
+ foreach (const MPoint& pt, pts)
+ {
+ os << pt;
+ if (n-- > 1)
+ os << ',';
+ }
+
+ return os;
}
// --------------------------------------------------------------------
double DihedralAngle(const MPoint& p1, const MPoint& p2, const MPoint& p3, const MPoint& p4)
{
- MPoint v12 = p1 - p2; // vector from p2 to p1
- MPoint v43 = p4 - p3; // vector from p3 to p4
-
- MPoint z = p2 - p3; // vector from p3 to p2
-
- MPoint p = CrossProduct(z, v12);
- MPoint x = CrossProduct(z, v43);
- MPoint y = CrossProduct(z, x);
-
- double u = DotProduct(x, x);
- double v = DotProduct(y, y);
-
- double result = 360;
- if (u > 0 and v > 0)
- {
- u = DotProduct(p, x) / sqrt(u);
- v = DotProduct(p, y) / sqrt(v);
- if (u != 0 or v != 0)
- result = atan2(v, u) * 180 / kPI;
- }
-
- return result;
+ MPoint v12 = p1 - p2; // std::vector from p2 to p1
+ MPoint v43 = p4 - p3; // std::vector from p3 to p4
+
+ MPoint z = p2 - p3; // std::vector from p3 to p2
+
+ MPoint p = CrossProduct(z, v12);
+ MPoint x = CrossProduct(z, v43);
+ MPoint y = CrossProduct(z, x);
+
+ double u = DotProduct(x, x);
+ double v = DotProduct(y, y);
+
+ double result = 360;
+ if (u > 0 and v > 0)
+ {
+ u = DotProduct(p, x) / sqrt(u);
+ v = DotProduct(p, y) / sqrt(v);
+ if (u != 0 or v != 0)
+ result = atan2(v, u) * 180 / kPI;
+ }
+
+ return result;
}
double CosinusAngle(const MPoint& p1, const MPoint& p2, const MPoint& p3, const MPoint& p4)
{
- MPoint v12 = p1 - p2;
- MPoint v34 = p3 - p4;
-
- double result = 0;
-
- double x = DotProduct(v12, v12) * DotProduct(v34, v34);
- if (x > 0)
- result = DotProduct(v12, v34) / sqrt(x);
-
- return result;
+ MPoint v12 = p1 - p2;
+ MPoint v34 = p3 - p4;
+
+ double result = 0;
+
+ double x = DotProduct(v12, v12) * DotProduct(v34, v34);
+ if (x > 0)
+ result = DotProduct(v12, v34) / sqrt(x);
+
+ return result;
}
// --------------------------------------------------------------------
-tr1::tuple<double,MPoint> QuaternionToAngleAxis(MQuaternion q)
+std::tr1::tuple<double,MPoint> QuaternionToAngleAxis(MQuaternion q)
{
- if (q.R_component_1() > 1)
- q = Normalize(q);
+ if (q.R_component_1() > 1)
+ q = Normalize(q);
- // angle:
- double angle = 2 * acos(q.R_component_1());
- angle = angle * 180 / kPI;
+ // angle:
+ double angle = 2 * acos(q.R_component_1());
+ angle = angle * 180 / kPI;
- // axis:
- double s = sqrt(1 - q.R_component_1() * q.R_component_1());
- if (s < 0.001)
- s = 1;
-
- MPoint axis(q.R_component_2() / s, q.R_component_3() / s, q.R_component_4() / s);
+ // axis:
+ double s = sqrt(1 - q.R_component_1() * q.R_component_1());
+ if (s < 0.001)
+ s = 1;
- return tr1::make_tuple(angle, axis);
+ MPoint axis(q.R_component_2() / s, q.R_component_3() / s, q.R_component_4() / s);
+
+ return std::tr1::make_tuple(angle, axis);
}
-MPoint CenterPoints(vector<MPoint>& points)
+MPoint CenterPoints(std::vector<MPoint>& points)
{
- MPoint t;
-
- foreach (MPoint& pt, points)
- {
- t.mX += pt.mX;
- t.mY += pt.mY;
- t.mZ += pt.mZ;
- }
-
- t.mX /= points.size();
- t.mY /= points.size();
- t.mZ /= points.size();
-
- foreach (MPoint& pt, points)
- {
- pt.mX -= t.mX;
- pt.mY -= t.mY;
- pt.mZ -= t.mZ;
- }
-
- return t;
+ MPoint t;
+
+ foreach (MPoint& pt, points)
+ {
+ t.mX += pt.mX;
+ t.mY += pt.mY;
+ t.mZ += pt.mZ;
+ }
+
+ t.mX /= points.size();
+ t.mY /= points.size();
+ t.mZ /= points.size();
+
+ foreach (MPoint& pt, points)
+ {
+ pt.mX -= t.mX;
+ pt.mY -= t.mY;
+ pt.mZ -= t.mZ;
+ }
+
+ return t;
}
-MPoint Centroid(vector<MPoint>& points)
+MPoint Centroid(std::vector<MPoint>& points)
{
- MPoint result;
-
- foreach (MPoint& pt, points)
- result += pt;
-
- result /= points.size();
-
- return result;
+ MPoint result;
+
+ foreach (MPoint& pt, points)
+ result += pt;
+
+ result /= points.size();
+
+ return result;
}
-double RMSd(const vector<MPoint>& a, const vector<MPoint>& b)
+double RMSd(const std::vector<MPoint>& a, const std::vector<MPoint>& b)
{
- double sum = 0;
- for (uint32 i = 0; i < a.size(); ++i)
- {
- valarray<double> d(3);
-
- d[0] = b[i].mX - a[i].mX;
- d[1] = b[i].mY - a[i].mY;
- d[2] = b[i].mZ - a[i].mZ;
-
- d *= d;
-
- sum += d.sum();
- }
-
- return sqrt(sum / a.size());
+ double sum = 0;
+ for (uint32 i = 0; i < a.size(); ++i)
+ {
+ std::valarray<double> d(3);
+
+ d[0] = b[i].mX - a[i].mX;
+ d[1] = b[i].mY - a[i].mY;
+ d[2] = b[i].mZ - a[i].mZ;
+
+ d *= d;
+
+ sum += d.sum();
+ }
+
+ return sqrt(sum / a.size());
}
// The next function returns the largest solution for a quartic equation
@@ -235,142 +234,142 @@ double RMSd(const vector<MPoint>& a, const vector<MPoint>& b)
// x^4 + ax^2 + bx + c = 0
//
// (since I'm too lazy to find out a better way, I've implemented the
-// routine using complex values to avoid nan's as a result of taking
+// routine using std::complex values to avoid nan's as a result of taking
// sqrt of a negative number)
double LargestDepressedQuarticSolution(double a, double b, double c)
{
- complex<double> P = - (a * a) / 12 - c;
- complex<double> Q = - (a * a * a) / 108 + (a * c) / 3 - (b * b) / 8;
- complex<double> R = - Q / 2.0 + sqrt((Q * Q) / 4.0 + (P * P * P) / 27.0);
-
- complex<double> U = pow(R, 1 / 3.0);
-
- complex<double> y;
- if (U == 0.0)
- y = -5.0 * a / 6.0 + U - pow(Q, 1.0 / 3.0);
- else
- y = -5.0 * a / 6.0 + U - P / (3.0 * U);
-
- complex<double> W = sqrt(a + 2.0 * y);
-
- // And to get the final result:
- // result = (±W + sqrt(-(3 * alpha + 2 * y ± 2 * beta / W))) / 2;
- // We want the largest result, so:
-
- valarray<double> t(4);
-
- t[0] = (( W + sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
- t[1] = (( W + sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
- t[2] = ((-W + sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
- t[3] = ((-W + sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
-
- return t.max();
+ std::complex<double> P = - (a * a) / 12 - c;
+ std::complex<double> Q = - (a * a * a) / 108 + (a * c) / 3 - (b * b) / 8;
+ std::complex<double> R = - Q / 2.0 + sqrt((Q * Q) / 4.0 + (P * P * P) / 27.0);
+
+ std::complex<double> U = pow(R, 1 / 3.0);
+
+ std::complex<double> y;
+ if (U == 0.0)
+ y = -5.0 * a / 6.0 + U - pow(Q, 1.0 / 3.0);
+ else
+ y = -5.0 * a / 6.0 + U - P / (3.0 * U);
+
+ std::complex<double> W = sqrt(a + 2.0 * y);
+
+ // And to get the final result:
+ // result = (±W + sqrt(-(3 * alpha + 2 * y ± 2 * beta / W))) / 2;
+ // We want the largest result, so:
+
+ std::valarray<double> t(4);
+
+ t[0] = (( W + sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
+ t[1] = (( W + sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
+ t[2] = ((-W + sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
+ t[3] = ((-W + sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
+
+ return t.max();
}
-MQuaternion AlignPoints(const vector<MPoint>& pa, const vector<MPoint>& pb)
+MQuaternion AlignPoints(const std::vector<MPoint>& pa, const std::vector<MPoint>& pb)
{
- // First calculate M, a 3x3 matrix containing the sums of products of the coordinates of A and B
- matrix<double> M(3, 3, 0);
-
- for (uint32 i = 0; i < pa.size(); ++i)
- {
- const MPoint& a = pa[i];
- const MPoint& b = pb[i];
-
- M(0, 0) += a.mX * b.mX; M(0, 1) += a.mX * b.mY; M(0, 2) += a.mX * b.mZ;
- M(1, 0) += a.mY * b.mX; M(1, 1) += a.mY * b.mY; M(1, 2) += a.mY * b.mZ;
- M(2, 0) += a.mZ * b.mX; M(2, 1) += a.mZ * b.mY; M(2, 2) += a.mZ * b.mZ;
- }
-
- // Now calculate N, a symmetric 4x4 matrix
- symmetric_matrix<double> N(4);
-
- N(0, 0) = M(0, 0) + M(1, 1) + M(2, 2);
- N(0, 1) = M(1, 2) - M(2, 1);
- N(0, 2) = M(2, 0) - M(0, 2);
- N(0, 3) = M(0, 1) - M(1, 0);
-
- N(1, 1) = M(0, 0) - M(1, 1) - M(2, 2);
- N(1, 2) = M(0, 1) + M(1, 0);
- N(1, 3) = M(0, 2) + M(2, 0);
-
- N(2, 2) = -M(0, 0) + M(1, 1) - M(2, 2);
- N(2, 3) = M(1, 2) + M(2, 1);
-
- N(3, 3) = -M(0, 0) - M(1, 1) + M(2, 2);
-
- // det(N - λI) = 0
- // find the largest λ (λm)
- //
- // Aλ4 + Bλ3 + Cλ2 + Dλ + E = 0
- // A = 1
- // B = 0
- // and so this is a so-called depressed quartic
- // solve it using Ferrari's algorithm
-
- double C = -2 * (
- M(0, 0) * M(0, 0) + M(0, 1) * M(0, 1) + M(0, 2) * M(0, 2) +
- M(1, 0) * M(1, 0) + M(1, 1) * M(1, 1) + M(1, 2) * M(1, 2) +
- M(2, 0) * M(2, 0) + M(2, 1) * M(2, 1) + M(2, 2) * M(2, 2));
-
- double D = 8 * (M(0, 0) * M(1, 2) * M(2, 1) +
- M(1, 1) * M(2, 0) * M(0, 2) +
- M(2, 2) * M(0, 1) * M(1, 0)) -
- 8 * (M(0, 0) * M(1, 1) * M(2, 2) +
- M(1, 2) * M(2, 0) * M(0, 1) +
- M(2, 1) * M(1, 0) * M(0, 2));
-
- double E =
- (N(0,0) * N(1,1) - N(0,1) * N(0,1)) * (N(2,2) * N(3,3) - N(2,3) * N(2,3)) +
- (N(0,1) * N(0,2) - N(0,0) * N(2,1)) * (N(2,1) * N(3,3) - N(2,3) * N(1,3)) +
- (N(0,0) * N(1,3) - N(0,1) * N(0,3)) * (N(2,1) * N(2,3) - N(2,2) * N(1,3)) +
- (N(0,1) * N(2,1) - N(1,1) * N(0,2)) * (N(0,2) * N(3,3) - N(2,3) * N(0,3)) +
- (N(1,1) * N(0,3) - N(0,1) * N(1,3)) * (N(0,2) * N(2,3) - N(2,2) * N(0,3)) +
- (N(0,2) * N(1,3) - N(2,1) * N(0,3)) * (N(0,2) * N(1,3) - N(2,1) * N(0,3));
-
- // solve quartic
- double lm = LargestDepressedQuarticSolution(C, D, E);
-
- // calculate t = (N - λI)
- matrix<double> li = identity_matrix<double>(4) * lm;
- matrix<double> t = N - li;
-
- // calculate a matrix of cofactors for t
- matrix<double> cf(4, 4);
-
- const uint32 ixs[4][3] =
- {
- { 1, 2, 3 },
- { 0, 2, 3 },
- { 0, 1, 3 },
- { 0, 1, 2 }
- };
-
- uint32 maxR = 0;
- for (uint32 r = 0; r < 4; ++r)
- {
- const uint32* ir = ixs[r];
-
- for (uint32 c = 0; c < 4; ++c)
- {
- const uint32* ic = ixs[c];
-
- cf(r, c) =
- t(ir[0], ic[0]) * t(ir[1], ic[1]) * t(ir[2], ic[2]) +
- t(ir[0], ic[1]) * t(ir[1], ic[2]) * t(ir[2], ic[0]) +
- t(ir[0], ic[2]) * t(ir[1], ic[0]) * t(ir[2], ic[1]) -
- t(ir[0], ic[2]) * t(ir[1], ic[1]) * t(ir[2], ic[0]) -
- t(ir[0], ic[1]) * t(ir[1], ic[0]) * t(ir[2], ic[2]) -
- t(ir[0], ic[0]) * t(ir[1], ic[2]) * t(ir[2], ic[1]);
- }
-
- if (r > maxR and cf(r, 0) > cf(maxR, 0))
- maxR = r;
- }
-
- // NOTE the negation of the y here, why? Maybe I swapped r/c above?
- MQuaternion q(cf(maxR, 0), cf(maxR, 1), -cf(maxR, 2), cf(maxR, 3));
- q = Normalize(q);
-
- return q;
+ // First calculate M, a 3x3 matrix containing the sums of products of the coordinates of A and B
+ matrix<double> M(3, 3, 0);
+
+ for (uint32 i = 0; i < pa.size(); ++i)
+ {
+ const MPoint& a = pa[i];
+ const MPoint& b = pb[i];
+
+ M(0, 0) += a.mX * b.mX; M(0, 1) += a.mX * b.mY; M(0, 2) += a.mX * b.mZ;
+ M(1, 0) += a.mY * b.mX; M(1, 1) += a.mY * b.mY; M(1, 2) += a.mY * b.mZ;
+ M(2, 0) += a.mZ * b.mX; M(2, 1) += a.mZ * b.mY; M(2, 2) += a.mZ * b.mZ;
+ }
+
+ // Now calculate N, a symmetric 4x4 matrix
+ symmetric_matrix<double> N(4);
+
+ N(0, 0) = M(0, 0) + M(1, 1) + M(2, 2);
+ N(0, 1) = M(1, 2) - M(2, 1);
+ N(0, 2) = M(2, 0) - M(0, 2);
+ N(0, 3) = M(0, 1) - M(1, 0);
+
+ N(1, 1) = M(0, 0) - M(1, 1) - M(2, 2);
+ N(1, 2) = M(0, 1) + M(1, 0);
+ N(1, 3) = M(0, 2) + M(2, 0);
+
+ N(2, 2) = -M(0, 0) + M(1, 1) - M(2, 2);
+ N(2, 3) = M(1, 2) + M(2, 1);
+
+ N(3, 3) = -M(0, 0) - M(1, 1) + M(2, 2);
+
+ // det(N - λI) = 0
+ // find the largest λ (λm)
+ //
+ // Aλ4 + Bλ3 + Cλ2 + Dλ + E = 0
+ // A = 1
+ // B = 0
+ // and so this is a so-called depressed quartic
+ // solve it using Ferrari's algorithm
+
+ double C = -2 * (
+ M(0, 0) * M(0, 0) + M(0, 1) * M(0, 1) + M(0, 2) * M(0, 2) +
+ M(1, 0) * M(1, 0) + M(1, 1) * M(1, 1) + M(1, 2) * M(1, 2) +
+ M(2, 0) * M(2, 0) + M(2, 1) * M(2, 1) + M(2, 2) * M(2, 2));
+
+ double D = 8 * (M(0, 0) * M(1, 2) * M(2, 1) +
+ M(1, 1) * M(2, 0) * M(0, 2) +
+ M(2, 2) * M(0, 1) * M(1, 0)) -
+ 8 * (M(0, 0) * M(1, 1) * M(2, 2) +
+ M(1, 2) * M(2, 0) * M(0, 1) +
+ M(2, 1) * M(1, 0) * M(0, 2));
+
+ double E =
+ (N(0,0) * N(1,1) - N(0,1) * N(0,1)) * (N(2,2) * N(3,3) - N(2,3) * N(2,3)) +
+ (N(0,1) * N(0,2) - N(0,0) * N(2,1)) * (N(2,1) * N(3,3) - N(2,3) * N(1,3)) +
+ (N(0,0) * N(1,3) - N(0,1) * N(0,3)) * (N(2,1) * N(2,3) - N(2,2) * N(1,3)) +
+ (N(0,1) * N(2,1) - N(1,1) * N(0,2)) * (N(0,2) * N(3,3) - N(2,3) * N(0,3)) +
+ (N(1,1) * N(0,3) - N(0,1) * N(1,3)) * (N(0,2) * N(2,3) - N(2,2) * N(0,3)) +
+ (N(0,2) * N(1,3) - N(2,1) * N(0,3)) * (N(0,2) * N(1,3) - N(2,1) * N(0,3));
+
+ // solve quartic
+ double lm = LargestDepressedQuarticSolution(C, D, E);
+
+ // calculate t = (N - λI)
+ matrix<double> li = identity_matrix<double>(4) * lm;
+ matrix<double> t = N - li;
+
+ // calculate a matrix of cofactors for t
+ matrix<double> cf(4, 4);
+
+ const uint32 ixs[4][3] =
+ {
+ { 1, 2, 3 },
+ { 0, 2, 3 },
+ { 0, 1, 3 },
+ { 0, 1, 2 }
+ };
+
+ uint32 maxR = 0;
+ for (uint32 r = 0; r < 4; ++r)
+ {
+ const uint32* ir = ixs[r];
+
+ for (uint32 c = 0; c < 4; ++c)
+ {
+ const uint32* ic = ixs[c];
+
+ cf(r, c) =
+ t(ir[0], ic[0]) * t(ir[1], ic[1]) * t(ir[2], ic[2]) +
+ t(ir[0], ic[1]) * t(ir[1], ic[2]) * t(ir[2], ic[0]) +
+ t(ir[0], ic[2]) * t(ir[1], ic[0]) * t(ir[2], ic[1]) -
+ t(ir[0], ic[2]) * t(ir[1], ic[1]) * t(ir[2], ic[0]) -
+ t(ir[0], ic[1]) * t(ir[1], ic[0]) * t(ir[2], ic[2]) -
+ t(ir[0], ic[0]) * t(ir[1], ic[2]) * t(ir[2], ic[1]);
+ }
+
+ if (r > maxR and cf(r, 0) > cf(maxR, 0))
+ maxR = r;
+ }
+
+ // NOTE the negation of the y here, why? Maybe I swapped r/c above?
+ MQuaternion q(cf(maxR, 0), cf(maxR, 1), -cf(maxR, 2), cf(maxR, 3));
+ q = Normalize(q);
+
+ return q;
}
diff --git a/src/primitives-3d.h b/src/primitives-3d.h
old mode 100644
new mode 100755
index df920be..a169a17
--- a/src/primitives-3d.h
+++ b/src/primitives-3d.h
@@ -1,45 +1,50 @@
// Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
+// Copyright Coos Baakman, Jon Black, Wouter G. Touw & Gert Vriend, Radboud university medical center 2015.
// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
-//
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
// some data types and routines for working with 3d data
-#pragma once
+#ifndef XSSP_PRIMITIVES3D_H
+#define XSSP_PRIMITIVES3D_H
-#include <vector>
+#pragma once
-#include <boost/tr1/tuple.hpp>
#include <boost/math/quaternion.hpp>
+#include <boost/tr1/tuple.hpp>
+
+#include <vector>
typedef boost::math::quaternion<double> MQuaternion;
extern const double kPI;
// --------------------------------------------------------------------
-// The basic point type, can be used to store vectors in 3d space as well of course
+// The basic point type, can be used to store vectors in 3d space as well of
+// course
struct MPoint
{
- MPoint();
- MPoint(double x, double y, double z);
- MPoint(const MPoint& rhs);
+ MPoint();
+ MPoint(double x, double y, double z);
+ MPoint(const MPoint& rhs);
- MPoint& operator=(const MPoint& rhs);
+ MPoint& operator=(const MPoint& rhs);
- MPoint& operator+=(const MPoint& rhs);
- MPoint& operator-=(const MPoint& rhs);
+ MPoint& operator+=(const MPoint& rhs);
+ MPoint& operator-=(const MPoint& rhs);
- MPoint& operator+=(double f);
- MPoint& operator-=(double f);
+ MPoint& operator+=(double f);
+ MPoint& operator-=(double f);
- MPoint& operator*=(double f);
- MPoint& operator/=(double f);
+ MPoint& operator*=(double f);
+ MPoint& operator/=(double f);
- double Normalize();
- void Rotate(const MQuaternion& q);
+ double Normalize();
+ void Rotate(const MQuaternion& q);
- double mX, mY, mZ;
+ double mX, mY, mZ;
};
std::ostream& operator<<(std::ostream& os, const MPoint& pt);
@@ -56,8 +61,10 @@ double Distance(const MPoint& a, const MPoint& b);
double DistanceSquared(const MPoint& a, const MPoint& b);
double DotProduct(const MPoint& p1, const MPoint& p2);
MPoint CrossProduct(const MPoint& p1, const MPoint& p2);
-double DihedralAngle(const MPoint& p1, const MPoint& p2, const MPoint& p3, const MPoint& p4);
-double CosinusAngle(const MPoint& p1, const MPoint& p2, const MPoint& p3, const MPoint& p4);
+double DihedralAngle(const MPoint& p1, const MPoint& p2, const MPoint& p3,
+ const MPoint& p4);
+double CosinusAngle(const MPoint& p1, const MPoint& p2, const MPoint& p3,
+ const MPoint& p4);
// --------------------------------------------------------------------
// We use quaternions to do rotations in 3d space
@@ -67,7 +74,8 @@ MQuaternion Normalize(MQuaternion q);
std::tr1::tuple<double,MPoint> QuaternionToAngleAxis(MQuaternion q);
MPoint Centroid(std::vector<MPoint>& points);
MPoint CenterPoints(std::vector<MPoint>& points);
-MQuaternion AlignPoints(const std::vector<MPoint>& a, const std::vector<MPoint>& b);
+MQuaternion AlignPoints(const std::vector<MPoint>& a,
+ const std::vector<MPoint>& b);
double RMSd(const std::vector<MPoint>& a, const std::vector<MPoint>& b);
// --------------------------------------------------------------------
@@ -75,134 +83,136 @@ double RMSd(const std::vector<MPoint>& a, const std::vector<MPoint>& b);
inline
MPoint::MPoint()
- : mX(0)
- , mY(0)
- , mZ(0)
+ : mX(0)
+ , mY(0)
+ , mZ(0)
{
}
inline
MPoint::MPoint(double x, double y, double z)
- : mX(x)
- , mY(y)
- , mZ(z)
+ : mX(x)
+ , mY(y)
+ , mZ(z)
{
}
inline
MPoint::MPoint(const MPoint& rhs)
- : mX(rhs.mX)
- , mY(rhs.mY)
- , mZ(rhs.mZ)
+ : mX(rhs.mX)
+ , mY(rhs.mY)
+ , mZ(rhs.mZ)
{
}
inline
MPoint& MPoint::operator=(const MPoint& rhs)
{
- mX = rhs.mX;
- mY = rhs.mY;
- mZ = rhs.mZ;
-
- return *this;
+ mX = rhs.mX;
+ mY = rhs.mY;
+ mZ = rhs.mZ;
+
+ return *this;
}
inline
MPoint& MPoint::operator+=(const MPoint& rhs)
{
- mX += rhs.mX;
- mY += rhs.mY;
- mZ += rhs.mZ;
-
- return *this;
+ mX += rhs.mX;
+ mY += rhs.mY;
+ mZ += rhs.mZ;
+
+ return *this;
}
inline
MPoint& MPoint::operator-=(const MPoint& rhs)
{
- mX -= rhs.mX;
- mY -= rhs.mY;
- mZ -= rhs.mZ;
-
- return *this;
+ mX -= rhs.mX;
+ mY -= rhs.mY;
+ mZ -= rhs.mZ;
+
+ return *this;
}
inline
MPoint& MPoint::operator+=(double f)
{
- mX += f;
- mY += f;
- mZ += f;
-
- return *this;
+ mX += f;
+ mY += f;
+ mZ += f;
+
+ return *this;
}
inline
MPoint& MPoint::operator-=(double f)
{
- mX -= f;
- mY -= f;
- mZ -= f;
-
- return *this;
+ mX -= f;
+ mY -= f;
+ mZ -= f;
+
+ return *this;
}
inline
MPoint& MPoint::operator*=(double f)
{
- mX *= f;
- mY *= f;
- mZ *= f;
+ mX *= f;
+ mY *= f;
+ mZ *= f;
- return *this;
+ return *this;
}
inline
MPoint& MPoint::operator/=(double f)
{
- mX /= f;
- mY /= f;
- mZ /= f;
+ mX /= f;
+ mY /= f;
+ mZ /= f;
- return *this;
+ return *this;
}
inline
void MPoint::Rotate(const MQuaternion& q)
{
- MQuaternion p(0, mX, mY, mZ);
-
- p = q * p * conj(q);
+ MQuaternion p(0, mX, mY, mZ);
+
+ p = q * p * conj(q);
- mX = p.R_component_2();
- mY = p.R_component_3();
- mZ = p.R_component_4();
+ mX = p.R_component_2();
+ mY = p.R_component_3();
+ mZ = p.R_component_4();
}
inline double DotProduct(const MPoint& a, const MPoint& b)
{
- return a.mX * b.mX + a.mY * b.mY + a.mZ * b.mZ;
+ return a.mX * b.mX + a.mY * b.mY + a.mZ * b.mZ;
}
inline MPoint CrossProduct(const MPoint& a, const MPoint& b)
{
- return MPoint(a.mY * b.mZ - b.mY * a.mZ,
- a.mZ * b.mX - b.mZ * a.mX,
- a.mX * b.mY - b.mX * a.mY);
+ return MPoint(a.mY * b.mZ - b.mY * a.mZ,
+ a.mZ * b.mX - b.mZ * a.mX,
+ a.mX * b.mY - b.mX * a.mY);
}
inline double DistanceSquared(const MPoint& a, const MPoint& b)
{
- return
- (a.mX - b.mX) * (a.mX - b.mX) +
- (a.mY - b.mY) * (a.mY - b.mY) +
- (a.mZ - b.mZ) * (a.mZ - b.mZ);
+ return
+ (a.mX - b.mX) * (a.mX - b.mX) +
+ (a.mY - b.mY) * (a.mY - b.mY) +
+ (a.mZ - b.mZ) * (a.mZ - b.mZ);
}
inline double Distance(const MPoint& a, const MPoint& b)
{
- return sqrt(
- (a.mX - b.mX) * (a.mX - b.mX) +
- (a.mY - b.mY) * (a.mY - b.mY) +
- (a.mZ - b.mZ) * (a.mZ - b.mZ));
+ return sqrt(
+ (a.mX - b.mX) * (a.mX - b.mX) +
+ (a.mY - b.mY) * (a.mY - b.mY) +
+ (a.mZ - b.mZ) * (a.mZ - b.mZ));
}
+
+#endif
diff --git a/src/structure.cpp b/src/structure.cpp
old mode 100644
new mode 100755
index 305d729..a5aa9bc
--- a/src/structure.cpp
+++ b/src/structure.cpp
@@ -1,52 +1,55 @@
// Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
+// Copyright Coos Baakman, Jon Black, Wouter G. Touw & Gert Vriend, Radboud university medical center 2015.
// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
-//
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
// structure related stuff
-#include "mas.h"
+#include "structure.h"
-#include <set>
-#include <numeric>
-#include <functional>
+#include "align-2d.h"
+#include "buffer.h"
+#include "iocif.h"
+#include "utils.h"
+#include <boost/algorithm/string.hpp>
#include <boost/bind.hpp>
+#include <boost/foreach.hpp>
#include <boost/format.hpp>
#include <boost/lexical_cast.hpp>
-#include <boost/foreach.hpp>
-#define foreach BOOST_FOREACH
-#include <boost/algorithm/string.hpp>
#include <boost/math/special_functions/round.hpp>
+#include <boost/optional.hpp>
-#include "align-2d.h"
-#include "utils.h"
-#include "buffer.h"
-#include "structure.h"
-#include "iocif.h"
+#include <set>
+#include <numeric>
+#include <functional>
-using namespace std;
namespace ba = boost::algorithm;
namespace bm = boost::math;
+using boost::none;
+using boost::optional;
+
+#define foreach BOOST_FOREACH
// --------------------------------------------------------------------
const double
- kSSBridgeDistance = 3.0,
- kMinimalDistance = 0.5,
- kMinimalCADistance = 9.0,
- kMinHBondEnergy = -9.9,
- kMaxHBondEnergy = -0.5,
- kCouplingConstant = -27.888, // = -332 * 0.42 * 0.2
- kMaxPeptideBondLength = 2.5;
+ kSSBridgeDistance = 3.0,
+ kMinimalDistance = 0.5,
+ kMinimalCADistance = 9.0,
+ kMinHBondEnergy = -9.9,
+ kMaxHBondEnergy = -0.5,
+ kCouplingConstant = -27.888, // = -332 * 0.42 * 0.2
+ kMaxPeptideBondLength = 2.5;
const double
- kRadiusN = 1.65,
- kRadiusCA = 1.87,
- kRadiusC = 1.76,
- kRadiusO = 1.4,
- kRadiusSideAtom = 1.8,
- kRadiusWater = 1.4;
+ kRadiusN = 1.65,
+ kRadiusCA = 1.87,
+ kRadiusC = 1.76,
+ kRadiusO = 1.4,
+ kRadiusSideAtom = 1.8,
+ kRadiusWater = 1.4;
// --------------------------------------------------------------------
@@ -57,2109 +60,2260 @@ namespace
class MSurfaceDots
{
public:
- static MSurfaceDots& Instance();
-
- size_t size() const { return mPoints.size(); }
- const MPoint& operator[](uint32 inIx) const { return mPoints[inIx]; }
- double weight() const { return mWeight; }
+ static MSurfaceDots& Instance();
+
+ size_t size() const { return mPoints.size(); }
+ const MPoint& operator[](uint32 inIx) const { return mPoints[inIx]; }
+ double weight() const { return mWeight; }
private:
- MSurfaceDots(int32 inN);
+ MSurfaceDots(int32 inN);
- vector<MPoint> mPoints;
- double mWeight;
+ std::vector<MPoint> mPoints;
+ double mWeight;
};
MSurfaceDots& MSurfaceDots::Instance()
{
- const uint32 kN = 200;
-
- static MSurfaceDots sInstance(kN);
- return sInstance;
+ const uint32 kN = 200;
+
+ static MSurfaceDots sInstance(kN);
+ return sInstance;
}
MSurfaceDots::MSurfaceDots(int32 N)
{
- int32 P = 2 * N + 1;
-
- const double
- kGoldenRatio = (1 + sqrt(5.0)) / 2;
-
- mWeight = (4 * kPI) / P;
-
- for (int32 i = -N; i <= N; ++i)
- {
- double lat = asin((2.0 * i) / P);
- double lon = fmod(i, kGoldenRatio) * 2 * kPI / kGoldenRatio;
-
- MPoint p;
- p.mX = sin(lon) * cos(lat);
- p.mY = cos(lon) * cos(lat);
- p.mZ = sin(lat);
+ int32 P = 2 * N + 1;
+
+ const double kGoldenRatio = (1 + sqrt(5.0)) / 2;
+
+ mWeight = (4 * kPI) / P;
+
+ for (int32 i = -N; i <= N; ++i)
+ {
+ double lat = asin((2.0 * i) / P);
+ double lon = fmod(i, kGoldenRatio) * 2 * kPI / kGoldenRatio;
- mPoints.push_back(p);
- }
+ MPoint p;
+ p.mX = sin(lon) * cos(lat);
+ p.mY = cos(lon) * cos(lat);
+ p.mZ = sin(lat);
+
+ mPoints.push_back(p);
+ }
}
}
// --------------------------------------------------------------------
-MAtomType MapElement(string inElement)
-{
- ba::trim(inElement);
- ba::to_upper(inElement);
-
- MAtomType result = kUnknownAtom;
- if (inElement == "H")
- result = kHydrogen;
- else if (inElement == "C")
- result = kCarbon;
- else if (inElement == "N")
- result = kNitrogen;
- else if (inElement == "O")
- result = kOxygen;
- else if (inElement == "F")
- result = kFluorine;
- else if (inElement == "P")
- result = kPhosphorus;
- else if (inElement == "S")
- result = kSulfur;
- else if (inElement == "CL")
- result = kChlorine;
- else if (inElement == "K")
- result = kPotassium;
- else if (inElement == "MG")
- result = kMagnesium;
- else if (inElement == "CA")
- result = kCalcium;
- else if (inElement == "ZN")
- result = kZinc;
- else if (inElement == "SE")
- result = kSelenium;
- else
- throw mas_exception(boost::format("Unsupported element '%1%'") % inElement);
- return result;
-}
-
-MResidueType MapResidue(string inName)
-{
- ba::trim(inName);
-
- MResidueType result = kUnknownResidue;
-
- for (uint32 i = 0; i < kResidueTypeCount; ++i)
- {
- if (inName == kResidueInfo[i].name)
- {
- result = kResidueInfo[i].type;
- break;
- }
- }
-
- return result;
+MAtomType MapElement(std::string inElement)
+{
+ ba::trim(inElement);
+ ba::to_upper(inElement);
+
+ MAtomType result = kUnknownAtom;
+ if (inElement == "H")
+ result = kHydrogen;
+ else if (inElement == "C")
+ result = kCarbon;
+ else if (inElement == "N")
+ result = kNitrogen;
+ else if (inElement == "O")
+ result = kOxygen;
+ else if (inElement == "F")
+ result = kFluorine;
+ else if (inElement == "P")
+ result = kPhosphorus;
+ else if (inElement == "S")
+ result = kSulfur;
+ else if (inElement == "CL")
+ result = kChlorine;
+ else if (inElement == "K")
+ result = kPotassium;
+ else if (inElement == "MG")
+ result = kMagnesium;
+ else if (inElement == "CA")
+ result = kCalcium;
+ else if (inElement == "ZN")
+ result = kZinc;
+ else if (inElement == "SE")
+ result = kSelenium;
+ else
+ throw mas_exception(boost::format("Unsupported element '%1%'") % inElement);
+ return result;
+}
+
+MResidueType MapResidue(std::string inName)
+{
+ ba::trim(inName);
+
+ MResidueType result = kUnknownResidue;
+
+ for (uint32 i = 0; i < kResidueTypeCount; ++i)
+ {
+ if (inName == kResidueInfo[i].name)
+ {
+ result = kResidueInfo[i].type;
+ break;
+ }
+ }
+
+ return result;
}
MResidueType MapResidue(char inCode)
{
- MResidueType result = kUnknownResidue;
-
- for (uint32 i = 0; i < kResidueTypeCount; ++i)
- {
- if (inCode == kResidueInfo[i].code)
- {
- result = kResidueInfo[i].type;
- break;
- }
- }
-
- return result;
+ MResidueType result = kUnknownResidue;
+
+ for (uint32 i = 0; i < kResidueTypeCount; ++i)
+ {
+ if (inCode == kResidueInfo[i].code)
+ {
+ result = kResidueInfo[i].type;
+ break;
+ }
+ }
+
+ return result;
}
// --------------------------------------------------------------------
// a custom float parser, optimised for speed (and the way floats are represented in a PDB file)
-double ParseFloat(const string& s)
-{
- double result = 0;
- bool negate = false;
- double div = 10;
-
- enum State {
- pStart, pSign, pFirst, pSecond
- } state = pStart;
-
- for (string::const_iterator ch = s.begin(); ch != s.end(); ++ch)
- {
- switch (state)
- {
- case pStart:
- if (isspace(*ch))
- continue;
- if (*ch == '-')
- {
- negate = true;
- state = pSign;
- }
- else if (*ch == '+')
- state = pSign;
- else if (*ch == '.')
- state = pSecond;
- else if (isdigit(*ch))
- {
- result = *ch - '0';
- state = pFirst;
- }
- else
- throw mas_exception(boost::format("invalid formatted floating point number '%1%'") % s);
- break;
-
- case pSign:
- if (*ch == '.')
- state = pSecond;
- else if (isdigit(*ch))
- {
- state = pFirst;
- result = *ch - '0';
- }
- else
- throw mas_exception(boost::format("invalid formatted floating point number '%1%'") % s);
- break;
-
- case pFirst:
- if (*ch == '.')
- state = pSecond;
- else if (isdigit(*ch))
- result = 10 * result + (*ch - '0');
- else
- throw mas_exception(boost::format("invalid formatted floating point number '%1%'") % s);
- break;
-
- case pSecond:
- if (isdigit(*ch))
- {
- result += (*ch - '0') / div;
- div *= 10;
- }
- else
- throw mas_exception(boost::format("invalid formatted floating point number '%1%'") % s);
- break;
- }
- }
-
- if (negate)
- result = -result;
-
- return result;
-}
-
-void MAtom::WritePDB(ostream& os) const
-{
- // 1 - 6 Record name "ATOM "
- // 7 - 11 Integer serial Atom serial number.
- // 13 - 16 Atom name Atom name.
- // 17 Character altLoc Alternate location indicator.
- // 18 - 20 Residue name resName Residue name.
- // 22 Character chainID Chain identifier.
- // 23 - 26 Integer resSeq Residue sequence number.
- // 27 AChar iCode Code for insertion of residues.
- // 31 - 38 Real(8.3) x Orthogonal coordinates for X in Angstroms.
- // 39 - 46 Real(8.3) y Orthogonal coordinates for Y in Angstroms.
- // 47 - 54 Real(8.3) z Orthogonal coordinates for Z in Angstroms.
- // 55 - 60 Real(6.2) occupancy Occupancy.
- // 61 - 66 Real(6.2) tempFactor Temperature factor.
- // 77 - 78 LString(2) element Element symbol, right-justified.
- // 79 - 80 LString(2) charge Charge on the atom.
- boost::format atom("ATOM %5.5d %3.3s%c%3.3s %1.1s%4.4d%1.1s %8.3f%8.3f%8.3f%6.2f%6.2f %2.2s%2.2s");
-
- string charge;
- if (mCharge != 0)
- {
- charge += boost::lexical_cast<string>(abs(mCharge));
- if (mCharge > 0)
- charge += '+';
- else
- charge += '-';
- }
-
- os << (atom % mSerial % mName % mAltLoc % mResName % mChainID % mResSeq % mICode %
- mLoc.mX % mLoc.mY % mLoc.mZ % mOccupancy % mTempFactor % mElement % charge) << endl;
+double ParseFloat(const std::string& s)
+{
+ double result = 0;
+ bool negate = false;
+ double div = 10;
+
+ enum State {
+ pStart, pSign, pFirst, pSecond
+ } state = pStart;
+
+ for (std::string::const_iterator ch = s.begin(); ch != s.end(); ++ch)
+ {
+ switch (state)
+ {
+ case pStart:
+ if (isspace(*ch))
+ continue;
+ if (*ch == '-')
+ {
+ negate = true;
+ state = pSign;
+ }
+ else if (*ch == '+')
+ state = pSign;
+ else if (*ch == '.')
+ state = pSecond;
+ else if (isdigit(*ch))
+ {
+ result = *ch - '0';
+ state = pFirst;
+ }
+ else
+ throw mas_exception(boost::format("invalid formatted floating point number '%1%'") % s);
+ break;
+
+ case pSign:
+ if (*ch == '.')
+ state = pSecond;
+ else if (isdigit(*ch))
+ {
+ state = pFirst;
+ result = *ch - '0';
+ }
+ else
+ throw mas_exception(boost::format("invalid formatted floating point number '%1%'") % s);
+ break;
+
+ case pFirst:
+ if (*ch == '.')
+ state = pSecond;
+ else if (isdigit(*ch))
+ result = 10 * result + (*ch - '0');
+ else
+ throw mas_exception(boost::format("invalid formatted floating point number '%1%'") % s);
+ break;
+
+ case pSecond:
+ if (isdigit(*ch))
+ {
+ result += (*ch - '0') / div;
+ div *= 10;
+ }
+ else
+ throw mas_exception(boost::format("invalid formatted floating point number '%1%'") % s);
+ break;
+ }
+ }
+
+ if (negate)
+ result = -result;
+
+ return result;
+}
+
+void MAtom::WritePDB(std::ostream& os) const
+{
+ // 1 - 6 Record name "ATOM "
+ // 7 - 11 Integer serial Atom serial number.
+ // 13 - 16 Atom name Atom name.
+ // 17 Character altLoc Alternate location indicator.
+ // 18 - 20 Residue name resName Residue name.
+ // 22 Character chainID Chain identifier.
+ // 23 - 26 Integer resSeq Residue sequence number.
+ // 27 AChar iCode Code for insertion of residues.
+ // 31 - 38 Real(8.3) x Orthogonal coordinates for X in Angstroms.
+ // 39 - 46 Real(8.3) y Orthogonal coordinates for Y in Angstroms.
+ // 47 - 54 Real(8.3) z Orthogonal coordinates for Z in Angstroms.
+ // 55 - 60 Real(6.2) occupancy Occupancy.
+ // 61 - 66 Real(6.2) tempFactor Temperature factor.
+ // 77 - 78 LString(2) element Element symbol, right-justified.
+ // 79 - 80 LString(2) charge Charge on the atom.
+ boost::format atom("ATOM %5.5d %3.3s%c%3.3s %1.1s%4.4d%1.1s %8.3f%8.3f%8.3f%6.2f%6.2f %2.2s%2.2s");
+
+ std::string charge;
+ if (mCharge != 0)
+ {
+ charge += boost::lexical_cast<std::string>(abs(mCharge));
+ if (mCharge > 0)
+ charge += '+';
+ else
+ charge += '-';
+ }
+
+ os << (atom % mSerial % mName % mAltLoc % mResName % mChainID % mResSeq % mICode %
+ mLoc.mX % mLoc.mY % mLoc.mZ % mOccupancy % mTempFactor % mElement % charge) << std::endl;
}
const MResidueInfo kResidueInfo[] = {
- { kUnknownResidue, 'X', "UNK" },
- { kAlanine, 'A', "ALA" },
- { kArginine, 'R', "ARG" },
- { kAsparagine, 'N', "ASN" },
- { kAsparticAcid, 'D', "ASP" },
- { kCysteine, 'C', "CYS" },
- { kGlutamicAcid, 'E', "GLU" },
- { kGlutamine, 'Q', "GLN" },
- { kGlycine, 'G', "GLY" },
- { kHistidine, 'H', "HIS" },
- { kIsoleucine, 'I', "ILE" },
- { kLeucine, 'L', "LEU" },
- { kLysine, 'K', "LYS" },
- { kMethionine, 'M', "MET" },
- { kPhenylalanine, 'F', "PHE" },
- { kProline, 'P', "PRO" },
- { kSerine, 'S', "SER" },
- { kThreonine, 'T', "THR" },
- { kTryptophan, 'W', "TRP" },
- { kTyrosine, 'Y', "TYR" },
- { kValine, 'V', "VAL" }
+ { kUnknownResidue, 'X', "UNK" },
+ { kAlanine, 'A', "ALA" },
+ { kArginine, 'R', "ARG" },
+ { kAsparagine, 'N', "ASN" },
+ { kAsparticAcid, 'D', "ASP" },
+ { kCysteine, 'C', "CYS" },
+ { kGlutamicAcid, 'E', "GLU" },
+ { kGlutamine, 'Q', "GLN" },
+ { kGlycine, 'G', "GLY" },
+ { kHistidine, 'H', "HIS" },
+ { kIsoleucine, 'I', "ILE" },
+ { kLeucine, 'L', "LEU" },
+ { kLysine, 'K', "LYS" },
+ { kMethionine, 'M', "MET" },
+ { kPhenylalanine, 'F', "PHE" },
+ { kProline, 'P', "PRO" },
+ { kSerine, 'S', "SER" },
+ { kThreonine, 'T', "THR" },
+ { kTryptophan, 'W', "TRP" },
+ { kTyrosine, 'Y', "TYR" },
+ { kValine, 'V', "VAL" }
};
struct MBridge
{
- MBridgeType type;
- uint32 sheet, ladder;
- set<MBridge*> link;
- deque<uint32> i, j;
- string chainI, chainJ;
-
- bool operator<(const MBridge& b) const { return chainI < b.chainI or (chainI == b.chainI and i.front() < b.i.front()); }
+ MBridgeType type;
+ uint32 sheet, ladder;
+ std::set<MBridge*> link;
+ std::deque<uint32> i, j;
+ std::string chainI, chainJ;
+
+ bool operator<(const MBridge& b) const {
+ return chainI < b.chainI or (chainI == b.chainI and i.front() < b.i.front());
+ }
};
-ostream& operator<<(ostream& os, const MBridge& b)
+std::ostream& operator<<(std::ostream& os, const MBridge& b)
{
- os << '[' << (b.type == btParallel ? "p" : "a") << ':' << b.i.front() << '-' << b.i.back() << '/' << b.j.front() << '-' << b.j.back() << ']';
- return os;
+ os << '[' << (b.type == btParallel ? "p" : "a") << ':' << b.i.front()
+ << '-' << b.i.back() << '/' << b.j.front() << '-' << b.j.back() << ']';
+ return os;
}
-// return true if any of the residues in bridge a is identical to any of the residues in bridge b
+// return true if any of the residues in bridge a is identical to any of the
+// residues in bridge b
bool Linked(const MBridge& a, const MBridge& b)
{
- return
- find_first_of(a.i.begin(), a.i.end(), b.i.begin(), b.i.end()) != a.i.end() or
- find_first_of(a.i.begin(), a.i.end(), b.j.begin(), b.j.end()) != a.i.end() or
- find_first_of(a.j.begin(), a.j.end(), b.i.begin(), b.i.end()) != a.j.end() or
- find_first_of(a.j.begin(), a.j.end(), b.j.begin(), b.j.end()) != a.j.end();
+ return
+ find_first_of(a.i.begin(), a.i.end(), b.i.begin(), b.i.end()) != a.i.end() or
+ find_first_of(a.i.begin(), a.i.end(), b.j.begin(), b.j.end()) != a.i.end() or
+ find_first_of(a.j.begin(), a.j.end(), b.i.begin(), b.i.end()) != a.j.end() or
+ find_first_of(a.j.begin(), a.j.end(), b.j.begin(), b.j.end()) != a.j.end();
}
// --------------------------------------------------------------------
-MResidue::MResidue(uint32 inNumber,
- MResidue* inPrevious, const vector<MAtom>& inAtoms)
- : mPrev(inPrevious)
- , mNext(nullptr)
- , mSeqNumber(inAtoms.front().mResSeq)
- , mNumber(inNumber)
- , mInsertionCode(inAtoms.front().mICode)
- , mType(MapResidue(inAtoms.front().mResName))
- , mSSBridgeNr(0)
- , mAccessibility(0)
- , mSecondaryStructure(loop)
- , mSheet(0)
-{
- if (mPrev != nullptr)
- mPrev->mNext = this;
-
- fill(mHelixFlags, mHelixFlags + 3, helixNone);
-
- mBetaPartner[0].residue = mBetaPartner[1].residue = nullptr;
-
- mHBondDonor[0].energy = mHBondDonor[1].energy = mHBondAcceptor[0].energy = mHBondAcceptor[1].energy = 0;
- mHBondDonor[0].residue = mHBondDonor[1].residue = mHBondAcceptor[0].residue = mHBondAcceptor[1].residue = nullptr;
-
- static const MAtom kNullAtom = {};
- mN = mCA = mC = mO = kNullAtom;
-
- foreach (const MAtom& atom, inAtoms)
- {
- if (mChainID.empty())
- mChainID = atom.mChainID;
-
- if (MapResidue(atom.mResName) != mType)
- throw mas_exception(
- boost::format("inconsistent residue types in atom records for residue %1% (%2% != %3%)")
- % inNumber % atom.mResName % inAtoms.front().mResName);
-
- if (atom.mResSeq != mSeqNumber)
- throw mas_exception(boost::format("inconsistent residue sequence numbers (%1% != %2%)") % atom.mResSeq % mSeqNumber);
-
- if (atom.GetName() == "N")
- mN = atom;
- else if (atom.GetName() == "CA")
- mCA = atom;
- else if (atom.GetName() == "C")
- mC = atom;
- else if (atom.GetName() == "O")
- mO = atom;
- else
- mSideChain.push_back(atom);
- }
-
- // assign the Hydrogen
- mH = GetN();
-
- if (mType != kProline and mPrev != nullptr)
- {
- const MAtom& pc = mPrev->GetC();
- const MAtom& po = mPrev->GetO();
-
- double CODistance = Distance(pc, po);
-
- mH.mLoc.mX += (pc.mLoc.mX - po.mLoc.mX) / CODistance;
- mH.mLoc.mY += (pc.mLoc.mY - po.mLoc.mY) / CODistance;
- mH.mLoc.mZ += (pc.mLoc.mZ - po.mLoc.mZ) / CODistance;
- }
-
- // update the box containing all atoms
- mBox[0].mX = mBox[0].mY = mBox[0].mZ = numeric_limits<double>::max();
- mBox[1].mX = mBox[1].mY = mBox[1].mZ = -numeric_limits<double>::max();
-
- ExtendBox(mN, kRadiusN + 2 * kRadiusWater);
- ExtendBox(mCA, kRadiusCA + 2 * kRadiusWater);
- ExtendBox(mC, kRadiusC + 2 * kRadiusWater);
- ExtendBox(mO, kRadiusO + 2 * kRadiusWater);
- foreach (const MAtom& atom, mSideChain)
- ExtendBox(atom, kRadiusSideAtom + 2 * kRadiusWater);
-
- mRadius = mBox[1].mX - mBox[0].mX;
- if (mRadius < mBox[1].mY - mBox[0].mY)
- mRadius = mBox[1].mY - mBox[0].mY;
- if (mRadius < mBox[1].mZ - mBox[0].mZ)
- mRadius = mBox[1].mZ - mBox[0].mZ;
-
- mCenter.mX = (mBox[0].mX + mBox[1].mX) / 2;
- mCenter.mY = (mBox[0].mY + mBox[1].mY) / 2;
- mCenter.mZ = (mBox[0].mZ + mBox[1].mZ) / 2;
-
- if (VERBOSE > 3)
- cerr << "Created residue " << mN.mResName << endl;
-}
-
-MResidue::MResidue(uint32 inNumber, char inTypeCode, MResidue* inPrevious)
- : mPrev(nullptr)
- , mNext(nullptr)
- , mSeqNumber(inNumber)
- , mNumber(inNumber)
- , mType(MapResidue(inTypeCode))
- , mSSBridgeNr(0)
- , mAccessibility(0)
- , mSecondaryStructure(loop)
- , mSheet(0)
- , mBend(false)
-{
- fill(mHelixFlags, mHelixFlags + 3, helixNone);
-
- mBetaPartner[0].residue = mBetaPartner[1].residue = nullptr;
-
- mHBondDonor[0].energy = mHBondDonor[1].energy = mHBondAcceptor[0].energy = mHBondAcceptor[1].energy = 0;
- mHBondDonor[0].residue = mHBondDonor[1].residue = mHBondAcceptor[0].residue = mHBondAcceptor[1].residue = nullptr;
-
- static const MAtom kNullAtom = {};
- mN = mCA = mC = mO = kNullAtom;
-
- mCA.mResSeq = inTypeCode;
- mCA.mChainID = "A";
+MResidue::MResidue(int32 inNumber, MResidue* inPrevious,
+ const std::vector<MAtom>& inAtoms)
+ : mPrev(inPrevious)
+ , mNext(nullptr)
+ , mSeqNumber(inAtoms.front().mResSeq)
+ , mNumber(inNumber)
+ , mInsertionCode(inAtoms.front().mICode)
+ , mType(MapResidue(inAtoms.front().mResName))
+ , mSSBridgeNr(0)
+ , mAccessibility(0)
+ , mSecondaryStructure(loop)
+ , mSheet(0)
+ , mBend(false)
+{
+ if (mPrev != nullptr)
+ mPrev->mNext = this;
+
+ std::fill(mHelixFlags, mHelixFlags + 3, helixNone);
+
+ mBetaPartner[0].residue = mBetaPartner[1].residue = nullptr;
+
+ mHBondDonor[0].energy = mHBondDonor[1].energy = mHBondAcceptor[0].energy = mHBondAcceptor[1].energy = 0;
+ mHBondDonor[0].residue = mHBondDonor[1].residue = mHBondAcceptor[0].residue = mHBondAcceptor[1].residue = nullptr;
+
+ static const MAtom kNullAtom = {};
+ mN = mCA = mC = mO = kNullAtom;
+
+ foreach (const MAtom& atom, inAtoms)
+ {
+ if (mChainID.empty())
+ mChainID = atom.mChainID;
+
+ if (MapResidue(atom.mResName) != mType)
+ throw mas_exception(
+ boost::format("inconsistent residue types in atom records for residue %1% (%2% != %3%)")
+ % inNumber % atom.mResName % inAtoms.front().mResName);
+
+ if (atom.mResSeq != mSeqNumber)
+ throw mas_exception(boost::format("inconsistent residue sequence numbers (%1% != %2%)") % atom.mResSeq % mSeqNumber);
+
+ if (atom.GetName() == "N")
+ mN = atom;
+ else if (atom.GetName() == "CA")
+ mCA = atom;
+ else if (atom.GetName() == "C")
+ mC = atom;
+ else if (atom.GetName() == "O")
+ mO = atom;
+ else
+ mSideChain.push_back(atom);
+ }
+
+ // assign the Hydrogen
+ mH = GetN();
+
+ if (mType != kProline and mPrev != nullptr)
+ {
+ const MAtom& pc = mPrev->GetC();
+ const MAtom& po = mPrev->GetO();
+
+ double CODistance = Distance(pc, po);
+
+ mH.mLoc.mX += (pc.mLoc.mX - po.mLoc.mX) / CODistance;
+ mH.mLoc.mY += (pc.mLoc.mY - po.mLoc.mY) / CODistance;
+ mH.mLoc.mZ += (pc.mLoc.mZ - po.mLoc.mZ) / CODistance;
+ }
+
+ // update the box containing all atoms
+ mBox[0].mX = mBox[0].mY = mBox[0].mZ = std::numeric_limits<double>::max();
+ mBox[1].mX = mBox[1].mY = mBox[1].mZ = -std::numeric_limits<double>::max();
+
+ ExtendBox(mN, kRadiusN + 2 * kRadiusWater);
+ ExtendBox(mCA, kRadiusCA + 2 * kRadiusWater);
+ ExtendBox(mC, kRadiusC + 2 * kRadiusWater);
+ ExtendBox(mO, kRadiusO + 2 * kRadiusWater);
+ foreach (const MAtom& atom, mSideChain)
+ ExtendBox(atom, kRadiusSideAtom + 2 * kRadiusWater);
+
+ mRadius = mBox[1].mX - mBox[0].mX;
+ if (mRadius < mBox[1].mY - mBox[0].mY)
+ mRadius = mBox[1].mY - mBox[0].mY;
+ if (mRadius < mBox[1].mZ - mBox[0].mZ)
+ mRadius = mBox[1].mZ - mBox[0].mZ;
+
+ mCenter.mX = (mBox[0].mX + mBox[1].mX) / 2;
+ mCenter.mY = (mBox[0].mY + mBox[1].mY) / 2;
+ mCenter.mZ = (mBox[0].mZ + mBox[1].mZ) / 2;
+
+ if (VERBOSE > 3)
+ std::cerr << "Created residue " << mN.mResName << std::endl;
+}
+
+MResidue::MResidue(int32 inNumber, char inTypeCode, MResidue* inPrevious)
+ : mPrev(nullptr)
+ , mNext(nullptr)
+ , mSeqNumber(inNumber)
+ , mNumber(inNumber)
+ , mType(MapResidue(inTypeCode))
+ , mSSBridgeNr(0)
+ , mAccessibility(0)
+ , mSecondaryStructure(loop)
+ , mSheet(0)
+ , mBend(false)
+ , mRadius(0)
+ , mH(MAtom())
+{
+ std::fill(mHelixFlags, mHelixFlags + 3, helixNone);
+
+ mBetaPartner[0].residue = mBetaPartner[1].residue = nullptr;
+
+ mHBondDonor[0].energy = mHBondDonor[1].energy = mHBondAcceptor[0].energy = mHBondAcceptor[1].energy = 0;
+ mHBondDonor[0].residue = mHBondDonor[1].residue = mHBondAcceptor[0].residue = mHBondAcceptor[1].residue = nullptr;
+
+ static const MAtom kNullAtom = {};
+ mN = mCA = mC = mO = kNullAtom;
+
+ mCA.mResSeq = inTypeCode;
+ mCA.mChainID = "A";
}
MResidue::MResidue(const MResidue& residue)
- : mChainID(residue.mChainID)
- , mPrev(nullptr)
- , mNext(nullptr)
- , mSeqNumber(residue.mSeqNumber)
- , mNumber(residue.mNumber)
- , mType(residue.mType)
- , mSSBridgeNr(residue.mSSBridgeNr)
- , mAccessibility(residue.mAccessibility)
- , mSecondaryStructure(residue.mSecondaryStructure)
- , mC(residue.mC)
- , mN(residue.mN)
- , mCA(residue.mCA)
- , mO(residue.mO)
- , mH(residue.mH)
- , mSideChain(residue.mSideChain)
- , mSheet(residue.mSheet)
- , mBend(residue.mBend)
- , mCenter(residue.mCenter)
- , mRadius(residue.mRadius)
-{
- copy(residue.mHBondDonor, residue.mHBondDonor + 2, mHBondDonor);
- copy(residue.mHBondAcceptor, residue.mHBondAcceptor + 2, mHBondAcceptor);
- copy(residue.mBetaPartner, residue.mBetaPartner + 2, mBetaPartner);
- copy(residue.mHelixFlags, residue.mHelixFlags + 3, mHelixFlags);
- copy(residue.mBox, residue.mBox + 2, mBox);
+ : mChainID(residue.mChainID)
+ , mPrev(nullptr)
+ , mNext(nullptr)
+ , mSeqNumber(residue.mSeqNumber)
+ , mNumber(residue.mNumber)
+ , mType(residue.mType)
+ , mSSBridgeNr(residue.mSSBridgeNr)
+ , mAccessibility(residue.mAccessibility)
+ , mSecondaryStructure(residue.mSecondaryStructure)
+ , mC(residue.mC)
+ , mN(residue.mN)
+ , mCA(residue.mCA)
+ , mO(residue.mO)
+ , mH(residue.mH)
+ , mSideChain(residue.mSideChain)
+ , mSheet(residue.mSheet)
+ , mBend(residue.mBend)
+ , mCenter(residue.mCenter)
+ , mRadius(residue.mRadius)
+{
+ std::copy(residue.mHBondDonor, residue.mHBondDonor + 2, mHBondDonor);
+ std::copy(residue.mHBondAcceptor, residue.mHBondAcceptor + 2, mHBondAcceptor);
+ std::copy(residue.mBetaPartner, residue.mBetaPartner + 2, mBetaPartner);
+ std::copy(residue.mHelixFlags, residue.mHelixFlags + 3, mHelixFlags);
+ std::copy(residue.mBox, residue.mBox + 2, mBox);
}
void MResidue::SetPrev(MResidue* inResidue)
{
- mPrev = inResidue;
- mPrev->mNext = this;
+ mPrev = inResidue;
+ mPrev->mNext = this;
}
bool MResidue::NoChainBreak(const MResidue* from, const MResidue* to)
{
- bool result = true;
- for (const MResidue* r = from; result and r != to; r = r->mNext)
- {
- MResidue* next = r->mNext;
- if (next == nullptr)
- result = false;
- else
- result = next->mNumber == r->mNumber + 1;
- }
- return result;
+ bool result = true;
+ for (const MResidue* r = from; result and r != to; r = r->mNext)
+ {
+ MResidue* next = r->mNext;
+ if (next == nullptr)
+ result = false;
+ else
+ result = next->mNumber == r->mNumber + 1;
+ }
+ return result;
}
-void MResidue::SetChainID(const string& inChainID)
+void MResidue::SetChainID(const std::string& inChainID)
{
- mChainID = inChainID;
-
- mC.SetChainID(inChainID);
- mCA.SetChainID(inChainID);
- mO.SetChainID(inChainID);
- mN.SetChainID(inChainID);
- mH.SetChainID(inChainID);
- for_each(mSideChain.begin(), mSideChain.end(), boost::bind(&MAtom::SetChainID, _1, inChainID));
+ mChainID = inChainID;
+
+ mC.SetChainID(inChainID);
+ mCA.SetChainID(inChainID);
+ mO.SetChainID(inChainID);
+ mN.SetChainID(inChainID);
+ mH.SetChainID(inChainID);
+ for_each(mSideChain.begin(), mSideChain.end(),
+ boost::bind(&MAtom::SetChainID, _1, inChainID));
}
bool MResidue::ValidDistance(const MResidue& inNext) const
{
- return Distance(GetC(), inNext.GetN()) <= kMaxPeptideBondLength;
+ return Distance(GetC(), inNext.GetN()) <= kMaxPeptideBondLength;
}
bool MResidue::TestBond(const MResidue* other) const
{
- return
- (mHBondAcceptor[0].residue == other and mHBondAcceptor[0].energy < kMaxHBondEnergy) or
- (mHBondAcceptor[1].residue == other and mHBondAcceptor[1].energy < kMaxHBondEnergy);
+ return
+ (mHBondAcceptor[0].residue == other and mHBondAcceptor[0].energy < kMaxHBondEnergy) or
+ (mHBondAcceptor[1].residue == other and mHBondAcceptor[1].energy < kMaxHBondEnergy);
}
double MResidue::Phi() const
{
- double result = 360;
- if (mPrev != nullptr and NoChainBreak(mPrev, this))
- result = DihedralAngle(mPrev->GetC(), GetN(), GetCAlpha(), GetC());
- return result;
+ double result = 360;
+ if (mPrev != nullptr and NoChainBreak(mPrev, this))
+ result = DihedralAngle(mPrev->GetC(), GetN(), GetCAlpha(), GetC());
+ return result;
}
double MResidue::Psi() const
{
- double result = 360;
- if (mNext != nullptr and NoChainBreak(this, mNext))
- result = DihedralAngle(GetN(), GetCAlpha(), GetC(), mNext->GetN());
- return result;
+ double result = 360;
+ if (mNext != nullptr and NoChainBreak(this, mNext))
+ result = DihedralAngle(GetN(), GetCAlpha(), GetC(), mNext->GetN());
+ return result;
}
-tr1::tuple<double,char> MResidue::Alpha() const
+std::tr1::tuple<double,char> MResidue::Alpha() const
{
- double alhpa = 360;
- char chirality = ' ';
-
- const MResidue* nextNext = mNext ? mNext->Next() : nullptr;
- if (mPrev != nullptr and nextNext != nullptr and NoChainBreak(mPrev, nextNext))
- {
- alhpa = DihedralAngle(mPrev->GetCAlpha(), GetCAlpha(), mNext->GetCAlpha(), nextNext->GetCAlpha());
- if (alhpa < 0)
- chirality = '-';
- else
- chirality = '+';
- }
- return tr1::make_tuple(alhpa, chirality);
+ double alhpa = 360;
+ char chirality = ' ';
+
+ const MResidue* nextNext = mNext ? mNext->Next() : nullptr;
+ if (mPrev != nullptr and
+ nextNext != nullptr and
+ NoChainBreak(mPrev, nextNext))
+ {
+ alhpa = DihedralAngle(mPrev->GetCAlpha(), GetCAlpha(), mNext->GetCAlpha(),
+ nextNext->GetCAlpha());
+ if (alhpa < 0)
+ chirality = '-';
+ else
+ chirality = '+';
+ }
+ return std::tr1::make_tuple(alhpa, chirality);
}
double MResidue::Kappa() const
{
- double result = 360;
- const MResidue* prevPrev = mPrev ? mPrev->Prev() : nullptr;
- const MResidue* nextNext = mNext ? mNext->Next() : nullptr;
- if (prevPrev != nullptr and nextNext != nullptr and NoChainBreak(prevPrev, nextNext))
- {
- double ckap = CosinusAngle(GetCAlpha(), prevPrev->GetCAlpha(), nextNext->GetCAlpha(), GetCAlpha());
- double skap = sqrt(1 - ckap * ckap);
- result = atan2(skap, ckap) * 180 / kPI;
- }
- return result;
+ double result = 360;
+ const MResidue* prevPrev = mPrev ? mPrev->Prev() : nullptr;
+ const MResidue* nextNext = mNext ? mNext->Next() : nullptr;
+ if (prevPrev != nullptr and
+ nextNext != nullptr and
+ NoChainBreak(prevPrev, nextNext))
+ {
+ double ckap = CosinusAngle(GetCAlpha(), prevPrev->GetCAlpha(),
+ nextNext->GetCAlpha(), GetCAlpha());
+ double skap = sqrt(1 - ckap * ckap);
+ result = atan2(skap, ckap) * 180 / kPI;
+ }
+ return result;
}
double MResidue::TCO() const
{
- double result = 0;
- if (mPrev != nullptr and NoChainBreak(mPrev, this))
- result = CosinusAngle(GetC(), GetO(), mPrev->GetC(), mPrev->GetO());
- return result;
+ double result = 0;
+ if (mPrev != nullptr and NoChainBreak(mPrev, this))
+ result = CosinusAngle(GetC(), GetO(), mPrev->GetC(), mPrev->GetO());
+ return result;
}
void MResidue::SetBetaPartner(uint32 n,
- MResidue* inResidue, uint32 inLadder, bool inParallel)
+ MResidue* inResidue, uint32 inLadder, bool inParallel)
{
- assert(n == 0 or n == 1);
-
- mBetaPartner[n].residue = inResidue;
- mBetaPartner[n].ladder = inLadder;
- mBetaPartner[n].parallel = inParallel;
+ assert(n == 0 or n == 1);
+
+ mBetaPartner[n].residue = inResidue;
+ mBetaPartner[n].ladder = inLadder;
+ mBetaPartner[n].parallel = inParallel;
}
MBridgeParner MResidue::GetBetaPartner(uint32 n) const
{
- assert(n == 0 or n == 1);
- return mBetaPartner[n];
+ assert(n == 0 or n == 1);
+ return mBetaPartner[n];
}
MHelixFlag MResidue::GetHelixFlag(uint32 inHelixStride) const
{
- assert(inHelixStride == 3 or inHelixStride == 4 or inHelixStride == 5);
- return mHelixFlags[inHelixStride - 3];
+ assert(inHelixStride == 3 or inHelixStride == 4 or inHelixStride == 5);
+ return mHelixFlags[inHelixStride - 3];
}
bool MResidue::IsHelixStart(uint32 inHelixStride) const
{
- assert(inHelixStride == 3 or inHelixStride == 4 or inHelixStride == 5);
- return mHelixFlags[inHelixStride - 3] == helixStart or mHelixFlags[inHelixStride - 3] == helixStartAndEnd;
+ assert(inHelixStride == 3 or inHelixStride == 4 or inHelixStride == 5);
+ return mHelixFlags[inHelixStride - 3] == helixStart or
+ mHelixFlags[inHelixStride - 3] == helixStartAndEnd;
}
void MResidue::SetHelixFlag(uint32 inHelixStride, MHelixFlag inHelixFlag)
{
- assert(inHelixStride == 3 or inHelixStride == 4 or inHelixStride == 5);
- mHelixFlags[inHelixStride - 3] = inHelixFlag;
+ assert(inHelixStride == 3 or inHelixStride == 4 or inHelixStride == 5);
+ mHelixFlags[inHelixStride - 3] = inHelixFlag;
}
void MResidue::SetSSBridgeNr(uint8 inBridgeNr)
{
- if (mType != kCysteine)
- throw mas_exception("Only cysteine residues can form sulphur bridges");
- mSSBridgeNr = inBridgeNr;
+ if (mType != kCysteine)
+ throw mas_exception("Only cysteine residues can form sulphur bridges");
+ mSSBridgeNr = inBridgeNr;
}
uint8 MResidue::GetSSBridgeNr() const
{
- if (mType != kCysteine)
- throw mas_exception("Only cysteine residues can form sulphur bridges");
- return mSSBridgeNr;
+ if (mType != kCysteine)
+ throw mas_exception("Only cysteine residues can form sulphur bridges");
+ return mSSBridgeNr;
}
// TODO: use the angle to improve bond energy calculation.
double MResidue::CalculateHBondEnergy(MResidue& inDonor, MResidue& inAcceptor)
{
- double result = 0;
-
- if (inDonor.mType != kProline)
- {
- double distanceHO = Distance(inDonor.GetH(), inAcceptor.GetO());
- double distanceHC = Distance(inDonor.GetH(), inAcceptor.GetC());
- double distanceNC = Distance(inDonor.GetN(), inAcceptor.GetC());
- double distanceNO = Distance(inDonor.GetN(), inAcceptor.GetO());
-
- if (distanceHO < kMinimalDistance or distanceHC < kMinimalDistance or distanceNC < kMinimalDistance or distanceNO < kMinimalDistance)
- result = kMinHBondEnergy;
- else
- result = kCouplingConstant / distanceHO - kCouplingConstant / distanceHC + kCouplingConstant / distanceNC - kCouplingConstant / distanceNO;
-
- // DSSP compatibility mode:
- result = bm::round(result * 1000) / 1000;
-
- if (result < kMinHBondEnergy)
- result = kMinHBondEnergy;
- }
-
- // update donor
- if (result < inDonor.mHBondAcceptor[0].energy)
- {
- inDonor.mHBondAcceptor[1] = inDonor.mHBondAcceptor[0];
- inDonor.mHBondAcceptor[0].residue = &inAcceptor;
- inDonor.mHBondAcceptor[0].energy = result;
- }
- else if (result < inDonor.mHBondAcceptor[1].energy)
- {
- inDonor.mHBondAcceptor[1].residue = &inAcceptor;
- inDonor.mHBondAcceptor[1].energy = result;
- }
-
- // and acceptor
- if (result < inAcceptor.mHBondDonor[0].energy)
- {
- inAcceptor.mHBondDonor[1] = inAcceptor.mHBondDonor[0];
- inAcceptor.mHBondDonor[0].residue = &inDonor;
- inAcceptor.mHBondDonor[0].energy = result;
- }
- else if (result < inAcceptor.mHBondDonor[1].energy)
- {
- inAcceptor.mHBondDonor[1].residue = &inDonor;
- inAcceptor.mHBondDonor[1].energy = result;
- }
-
- return result;
+ double result = 0;
+
+ if (inDonor.mType != kProline)
+ {
+ double distanceHO = Distance(inDonor.GetH(), inAcceptor.GetO());
+ double distanceHC = Distance(inDonor.GetH(), inAcceptor.GetC());
+ double distanceNC = Distance(inDonor.GetN(), inAcceptor.GetC());
+ double distanceNO = Distance(inDonor.GetN(), inAcceptor.GetO());
+
+ if (distanceHO < kMinimalDistance or distanceHC < kMinimalDistance or
+ distanceNC < kMinimalDistance or distanceNO < kMinimalDistance)
+ result = kMinHBondEnergy;
+ else
+ result = kCouplingConstant / distanceHO - kCouplingConstant / distanceHC + kCouplingConstant / distanceNC - kCouplingConstant / distanceNO;
+
+ // DSSP compatibility mode:
+ result = bm::round(result * 1000) / 1000;
+
+ if (result < kMinHBondEnergy)
+ result = kMinHBondEnergy;
+ }
+
+ // update donor
+ if (result < inDonor.mHBondAcceptor[0].energy)
+ {
+ inDonor.mHBondAcceptor[1] = inDonor.mHBondAcceptor[0];
+ inDonor.mHBondAcceptor[0].residue = &inAcceptor;
+ inDonor.mHBondAcceptor[0].energy = result;
+ }
+ else if (result < inDonor.mHBondAcceptor[1].energy)
+ {
+ inDonor.mHBondAcceptor[1].residue = &inAcceptor;
+ inDonor.mHBondAcceptor[1].energy = result;
+ }
+
+ // and acceptor
+ if (result < inAcceptor.mHBondDonor[0].energy)
+ {
+ inAcceptor.mHBondDonor[1] = inAcceptor.mHBondDonor[0];
+ inAcceptor.mHBondDonor[0].residue = &inDonor;
+ inAcceptor.mHBondDonor[0].energy = result;
+ }
+ else if (result < inAcceptor.mHBondDonor[1].energy)
+ {
+ inAcceptor.mHBondDonor[1].residue = &inDonor;
+ inAcceptor.mHBondDonor[1].energy = result;
+ }
+
+ return result;
}
MBridgeType MResidue::TestBridge(MResidue* test) const
-{ // I. a d II. a d parallel
- const MResidue* a = mPrev; // \ /
- const MResidue* b = this; // b e b e
- const MResidue* c = mNext; // / \ ..
- const MResidue* d = test->mPrev; // c f c f
- const MResidue* e = test; //
- const MResidue* f = test->mNext; // III. a <- f IV. a f antiparallel
- //
- MBridgeType result = btNoBridge; // b e b <-> e
- //
- // c -> d c d
-
- if (a and c and NoChainBreak(a, c) and d and f and NoChainBreak(d, f))
- {
- if ((TestBond(c, e) and TestBond(e, a)) or (TestBond(f, b) and TestBond(b, d)))
- result = btParallel;
- else if ((TestBond(c, d) and TestBond(f, a)) or (TestBond(e, b) and TestBond(b, e)))
- result = btAntiParallel;
- }
-
- return result;
+{ // I. a d II. a d parallel
+ const MResidue* a = mPrev; // \ /
+ const MResidue* b = this; // b e b e
+ const MResidue* c = mNext; // / \ ..
+ const MResidue* d = test->mPrev; // c f c f
+ const MResidue* e = test; //
+ const MResidue* f = test->mNext; // III. a <- f IV. a f antiparallel
+ //
+ MBridgeType result = btNoBridge; // b e b <-> e
+ //
+ // c -> d c d
+
+ if (a and c and NoChainBreak(a, c) and d and f and NoChainBreak(d, f))
+ {
+ if ((TestBond(c, e) and TestBond(e, a)) or
+ (TestBond(f, b) and TestBond(b, d)))
+ result = btParallel;
+ else if ((TestBond(c, d) and TestBond(f, a)) or
+ (TestBond(e, b) and TestBond(b, e)))
+ result = btAntiParallel;
+ }
+
+ return result;
}
void MResidue::ExtendBox(const MAtom& atom, double inRadius)
{
- if (mBox[0].mX > atom.mLoc.mX - inRadius)
- mBox[0].mX = atom.mLoc.mX - inRadius;
- if (mBox[0].mY > atom.mLoc.mY - inRadius)
- mBox[0].mY = atom.mLoc.mY - inRadius;
- if (mBox[0].mZ > atom.mLoc.mZ - inRadius)
- mBox[0].mZ = atom.mLoc.mZ - inRadius;
- if (mBox[1].mX < atom.mLoc.mX + inRadius)
- mBox[1].mX = atom.mLoc.mX + inRadius;
- if (mBox[1].mY < atom.mLoc.mY + inRadius)
- mBox[1].mY = atom.mLoc.mY + inRadius;
- if (mBox[1].mZ < atom.mLoc.mZ + inRadius)
- mBox[1].mZ = atom.mLoc.mZ + inRadius;
+ if (mBox[0].mX > atom.mLoc.mX - inRadius)
+ mBox[0].mX = atom.mLoc.mX - inRadius;
+ if (mBox[0].mY > atom.mLoc.mY - inRadius)
+ mBox[0].mY = atom.mLoc.mY - inRadius;
+ if (mBox[0].mZ > atom.mLoc.mZ - inRadius)
+ mBox[0].mZ = atom.mLoc.mZ - inRadius;
+ if (mBox[1].mX < atom.mLoc.mX + inRadius)
+ mBox[1].mX = atom.mLoc.mX + inRadius;
+ if (mBox[1].mY < atom.mLoc.mY + inRadius)
+ mBox[1].mY = atom.mLoc.mY + inRadius;
+ if (mBox[1].mZ < atom.mLoc.mZ + inRadius)
+ mBox[1].mZ = atom.mLoc.mZ + inRadius;
}
inline
bool MResidue::AtomIntersectsBox(const MAtom& atom, double inRadius) const
{
- return
- atom.mLoc.mX + inRadius >= mBox[0].mX and atom.mLoc.mX - inRadius <= mBox[1].mX and
- atom.mLoc.mY + inRadius >= mBox[0].mY and atom.mLoc.mY - inRadius <= mBox[1].mY and
- atom.mLoc.mZ + inRadius >= mBox[0].mZ and atom.mLoc.mZ - inRadius <= mBox[1].mZ;
+ return
+ atom.mLoc.mX + inRadius >= mBox[0].mX and
+ atom.mLoc.mX - inRadius <= mBox[1].mX and
+ atom.mLoc.mY + inRadius >= mBox[0].mY and
+ atom.mLoc.mY - inRadius <= mBox[1].mY and
+ atom.mLoc.mZ + inRadius >= mBox[0].mZ and
+ atom.mLoc.mZ - inRadius <= mBox[1].mZ;
}
-void MResidue::CalculateSurface(const vector<MResidue*>& inResidues)
+void MResidue::CalculateSurface(const std::vector<MResidue*>& inResidues)
{
- vector<MResidue*> neighbours;
-
- foreach (MResidue* r, inResidues)
- {
- MPoint center;
- double radius;
- r->GetCenterAndRadius(center, radius);
-
- if (Distance(mCenter, center) < mRadius + radius)
- neighbours.push_back(r);
- }
+ std::vector<MResidue*> neighbours;
+
+ foreach (MResidue* r, inResidues)
+ {
+ MPoint center;
+ double radius;
+ r->GetCenterAndRadius(center, radius);
- mAccessibility = CalculateSurface(mN, kRadiusN, neighbours) +
- CalculateSurface(mCA, kRadiusCA, neighbours) +
- CalculateSurface(mC, kRadiusC, neighbours) +
- CalculateSurface(mO, kRadiusO, neighbours);
-
- foreach (const MAtom& atom, mSideChain)
- mAccessibility += CalculateSurface(atom, kRadiusSideAtom, neighbours);
+ if (Distance(mCenter, center) < mRadius + radius)
+ neighbours.push_back(r);
+ }
+
+ mAccessibility = CalculateSurface(mN, kRadiusN, neighbours) +
+ CalculateSurface(mCA, kRadiusCA, neighbours) +
+ CalculateSurface(mC, kRadiusC, neighbours) +
+ CalculateSurface(mO, kRadiusO, neighbours);
+
+ foreach (const MAtom& atom, mSideChain)
+ mAccessibility += CalculateSurface(atom, kRadiusSideAtom, neighbours);
}
class MAccumulator
{
public:
- struct candidate
- {
- MPoint location;
- double radius;
- double distance;
-
- bool operator<(const candidate& rhs) const
- { return distance < rhs.distance; }
- };
-
- void operator()(const MPoint& a, const MPoint& b, double d, double r)
- {
- double distance = DistanceSquared(a, b);
-
- d += kRadiusWater;
- r += kRadiusWater;
-
- double test = d + r;
- test *= test;
-
- if (distance < test and distance > 0.0001)
- {
- candidate c = { b - a, r * r, distance };
-
- m_x.push_back(c);
- push_heap(m_x.begin(), m_x.end());
- }
- }
-
- void sort()
- {
- sort_heap(m_x.begin(), m_x.end());
- }
-
- vector<candidate> m_x;
+ struct candidate
+ {
+ MPoint location;
+ double radius;
+ double distance;
+
+ bool operator<(const candidate& rhs) const
+ { return distance < rhs.distance; }
+ };
+
+ void operator()(const MPoint& a, const MPoint& b, double d, double r)
+ {
+ double distance = DistanceSquared(a, b);
+
+ d += kRadiusWater;
+ r += kRadiusWater;
+
+ double test = d + r;
+ test *= test;
+
+ if (distance < test and distance > 0.0001)
+ {
+ candidate c = { b - a, r * r, distance };
+
+ m_x.push_back(c);
+ push_heap(m_x.begin(), m_x.end());
+ }
+ }
+
+ void sort()
+ {
+ sort_heap(m_x.begin(), m_x.end());
+ }
+
+ std::vector<candidate> m_x;
};
-double MResidue::CalculateSurface(const MAtom& inAtom, double inRadius, const vector<MResidue*>& inResidues)
-{
- MAccumulator accumulate;
-
- foreach (MResidue* r, inResidues)
- {
- if (r->AtomIntersectsBox(inAtom, inRadius))
- {
- accumulate(inAtom, r->mN, inRadius, kRadiusN);
- accumulate(inAtom, r->mCA, inRadius, kRadiusCA);
- accumulate(inAtom, r->mC, inRadius, kRadiusC);
- accumulate(inAtom, r->mO, inRadius, kRadiusO);
-
- foreach (const MAtom& atom, r->mSideChain)
- accumulate(inAtom, atom, inRadius, kRadiusSideAtom);
- }
- }
-
- accumulate.sort();
-
- double radius = inRadius + kRadiusWater;
- double surface = 0;
-
- MSurfaceDots& surfaceDots = MSurfaceDots::Instance();
-
- for (uint32 i = 0; i < surfaceDots.size(); ++i)
- {
- MPoint xx = surfaceDots[i] * radius;
-
- bool free = true;
- for (uint32 k = 0; free and k < accumulate.m_x.size(); ++k)
- free = accumulate.m_x[k].radius < DistanceSquared(xx, accumulate.m_x[k].location);
-
- if (free)
- surface += surfaceDots.weight();
- }
-
- return surface * radius * radius;
+double MResidue::CalculateSurface(const MAtom& inAtom, double inRadius,
+ const std::vector<MResidue*>& inResidues)
+{
+ MAccumulator accumulate;
+
+ foreach (MResidue* r, inResidues)
+ {
+ if (r->AtomIntersectsBox(inAtom, inRadius))
+ {
+ accumulate(inAtom, r->mN, inRadius, kRadiusN);
+ accumulate(inAtom, r->mCA, inRadius, kRadiusCA);
+ accumulate(inAtom, r->mC, inRadius, kRadiusC);
+ accumulate(inAtom, r->mO, inRadius, kRadiusO);
+
+ foreach (const MAtom& atom, r->mSideChain)
+ accumulate(inAtom, atom, inRadius, kRadiusSideAtom);
+ }
+ }
+
+ accumulate.sort();
+
+ double radius = inRadius + kRadiusWater;
+ double surface = 0;
+
+ MSurfaceDots& surfaceDots = MSurfaceDots::Instance();
+
+ for (uint32 i = 0; i < surfaceDots.size(); ++i)
+ {
+ MPoint xx = surfaceDots[i] * radius;
+
+ bool free = true;
+ for (uint32 k = 0; free and k < accumulate.m_x.size(); ++k)
+ free = accumulate.m_x[k].radius < DistanceSquared(xx, accumulate.m_x[k].location);
+
+ if (free)
+ surface += surfaceDots.weight();
+ }
+
+ return surface * radius * radius;
}
void MResidue::Translate(const MPoint& inTranslation)
{
- mN.Translate(inTranslation);
- mCA.Translate(inTranslation);
- mC.Translate(inTranslation);
- mO.Translate(inTranslation);
- mH.Translate(inTranslation);
- for_each(mSideChain.begin(), mSideChain.end(), boost::bind(&MAtom::Translate, _1, inTranslation));
+ mN.Translate(inTranslation);
+ mCA.Translate(inTranslation);
+ mC.Translate(inTranslation);
+ mO.Translate(inTranslation);
+ mH.Translate(inTranslation);
+ for_each(mSideChain.begin(), mSideChain.end(),
+ boost::bind(&MAtom::Translate, _1, inTranslation));
}
void MResidue::Rotate(const MQuaternion& inRotation)
{
- mN.Rotate(inRotation);
- mCA.Rotate(inRotation);
- mC.Rotate(inRotation);
- mO.Rotate(inRotation);
- mH.Rotate(inRotation);
- for_each(mSideChain.begin(), mSideChain.end(), boost::bind(&MAtom::Rotate, _1, inRotation));
+ mN.Rotate(inRotation);
+ mCA.Rotate(inRotation);
+ mC.Rotate(inRotation);
+ mO.Rotate(inRotation);
+ mH.Rotate(inRotation);
+ for_each(mSideChain.begin(), mSideChain.end(),
+ boost::bind(&MAtom::Rotate, _1, inRotation));
}
-void MResidue::GetPoints(vector<MPoint>& outPoints) const
+void MResidue::GetPoints(std::vector<MPoint>& outPoints) const
{
- outPoints.push_back(mN);
- outPoints.push_back(mCA);
- outPoints.push_back(mC);
- outPoints.push_back(mO);
- foreach (const MAtom& a, mSideChain)
- outPoints.push_back(a);
+ outPoints.push_back(mN);
+ outPoints.push_back(mCA);
+ outPoints.push_back(mC);
+ outPoints.push_back(mO);
+ foreach (const MAtom& a, mSideChain)
+ outPoints.push_back(a);
}
-void MResidue::WritePDB(ostream& os)
+void MResidue::WritePDB(std::ostream& os)
{
- mN.WritePDB(os);
- mCA.WritePDB(os);
- mC.WritePDB(os);
- mO.WritePDB(os);
-
- for_each(mSideChain.begin(), mSideChain.end(), boost::bind(&MAtom::WritePDB, _1, boost::ref(os)));
+ mN.WritePDB(os);
+ mCA.WritePDB(os);
+ mC.WritePDB(os);
+ mO.WritePDB(os);
+
+ for_each(mSideChain.begin(), mSideChain.end(),
+ boost::bind(&MAtom::WritePDB, _1, boost::ref(os)));
}
// --------------------------------------------------------------------
MChain::MChain(const MChain& chain)
- : mChainID(chain.mChainID)
+ : mChainID(chain.mChainID)
{
- MResidue* previous = nullptr;
-
- foreach (const MResidue* residue, chain.mResidues)
- {
- MResidue* newResidue = new MResidue(*residue);
- newResidue->SetPrev(previous);
- mResidues.push_back(newResidue);
- previous = newResidue;
- }
+ MResidue* previous = nullptr;
+
+ foreach (const MResidue* residue, chain.mResidues)
+ {
+ MResidue* newResidue = new MResidue(*residue);
+ newResidue->SetPrev(previous);
+ mResidues.push_back(newResidue);
+ previous = newResidue;
+ }
}
MChain::~MChain()
{
- foreach (MResidue* residue, mResidues)
- delete residue;
+ foreach (MResidue* residue, mResidues)
+ delete residue;
}
MChain& MChain::operator=(const MChain& chain)
{
- foreach (MResidue* residue, mResidues)
- delete residue;
- mResidues.clear();
+ foreach (MResidue* residue, mResidues)
+ delete residue;
+ mResidues.clear();
+
+ foreach (const MResidue* residue, chain.mResidues)
+ mResidues.push_back(new MResidue(*residue));
- foreach (const MResidue* residue, chain.mResidues)
- mResidues.push_back(new MResidue(*residue));
-
- mChainID = chain.mChainID;
-
- return *this;
+ mChainID = chain.mChainID;
+
+ return *this;
}
-void MChain::SetChainID(const string& inChainID)
+void MChain::SetChainID(const std::string& inChainID)
{
- mChainID = inChainID;
- for_each(mResidues.begin(), mResidues.end(), boost::bind(&MResidue::SetChainID, _1, inChainID));
+ mChainID = inChainID;
+ for_each(mResidues.begin(), mResidues.end(),
+ boost::bind(&MResidue::SetChainID, _1, inChainID));
}
void MChain::Translate(const MPoint& inTranslation)
{
- for_each(mResidues.begin(), mResidues.end(), boost::bind(&MResidue::Translate, _1, inTranslation));
+ for_each(mResidues.begin(), mResidues.end(),
+ boost::bind(&MResidue::Translate, _1, inTranslation));
}
void MChain::Rotate(const MQuaternion& inRotation)
{
- for_each(mResidues.begin(), mResidues.end(), boost::bind(&MResidue::Rotate, _1, inRotation));
+ for_each(mResidues.begin(), mResidues.end(),
+ boost::bind(&MResidue::Rotate, _1, inRotation));
}
-void MChain::WritePDB(ostream& os)
+void MChain::WritePDB(std::ostream& os)
{
- for_each(mResidues.begin(), mResidues.end(), boost::bind(&MResidue::WritePDB, _1, boost::ref(os)));
-
- boost::format ter("TER %4.4d %3.3s %1.1s%4.4d%c");
-
- MResidue* last = mResidues.back();
-
- os << (ter % (last->GetCAlpha().mSerial + 1) % kResidueInfo[last->GetType()].name % mChainID % last->GetNumber() % ' ') << endl;
+ for_each(mResidues.begin(), mResidues.end(),
+ boost::bind(&MResidue::WritePDB, _1, boost::ref(os)));
+
+ boost::format ter("TER %4.4d %3.3s %1.1s%4.4d%c");
+
+ MResidue* last = mResidues.back();
+
+ os << (ter % (last->GetCAlpha().mSerial + 1) % kResidueInfo[last->GetType()].name % mChainID % last->GetNumber() % ' ') << std::endl;
}
-MResidue* MChain::GetResidueBySeqNumber(uint16 inSeqNumber, const string& inInsertionCode)
+const MResidue* MChain::GetResidueBySeqNumber(uint16 inSeqNumber,
+ const std::string& inInsertionCode) const
{
- vector<MResidue*>::iterator r = find_if(mResidues.begin(), mResidues.end(),
- boost::bind(&MResidue::GetSeqNumber, _1) == inSeqNumber and
- boost::bind(&MResidue::GetInsertionCode, _1) == inInsertionCode);
- if (r == mResidues.end())
- throw mas_exception(boost::format("Residue %d%s not found") % inSeqNumber % inInsertionCode);
- return *r;
+ const auto r = find_if(mResidues.begin(), mResidues.end(),
+ boost::bind(&MResidue::GetSeqNumber, _1) == inSeqNumber and
+ boost::bind(&MResidue::GetInsertionCode, _1) == inInsertionCode);
+ if (r == mResidues.end())
+ throw mas_exception(boost::format("Residue %d%s not found") % inSeqNumber % inInsertionCode);
+ return *r;
}
-void MChain::GetSequence(string& outSequence) const
+void MChain::GetSequence(std::string& outSequence) const
{
- foreach (const MResidue* r, GetResidues())
- outSequence += kResidueInfo[r->GetType()].code;
+ foreach (const MResidue* r, GetResidues())
+ outSequence += kResidueInfo[r->GetType()].code;
}
// --------------------------------------------------------------------
struct MResidueID
{
- string chain;
- uint16 seqNumber;
- string insertionCode;
-
- bool operator<(const MResidueID& o) const
- {
- return
- chain < o.chain or
- (chain == o.chain and seqNumber < o.seqNumber) or
- (chain == o.chain and seqNumber == o.seqNumber and insertionCode < o.insertionCode);
- }
-
- bool operator!=(const MResidueID& o) const
- {
- return chain != o.chain or seqNumber != o.seqNumber or insertionCode != o.insertionCode;
- }
+ std::string chain;
+ uint16 seqNumber;
+ std::string insertionCode;
+
+ bool operator<(const MResidueID& o) const
+ {
+ return
+ chain < o.chain or
+ (chain == o.chain and seqNumber < o.seqNumber) or
+ (chain == o.chain and seqNumber == o.seqNumber and
+ insertionCode < o.insertionCode);
+ }
+
+ bool operator!=(const MResidueID& o) const
+ {
+ return chain != o.chain or
+ seqNumber != o.seqNumber or
+ insertionCode != o.insertionCode;
+ }
};
MProtein::MProtein()
- : mResidueCount(0)
- , mChainBreaks(0)
- , mIgnoredWaterMolecules(0)
- , mNrOfHBondsInParallelBridges(0)
- , mNrOfHBondsInAntiparallelBridges(0)
-{
- fill(mParallelBridgesPerLadderHistogram, mParallelBridgesPerLadderHistogram + kHistogramSize, 0);
- fill(mAntiparallelBridgesPerLadderHistogram, mAntiparallelBridgesPerLadderHistogram + kHistogramSize, 0);
- fill(mLaddersPerSheetHistogram, mLaddersPerSheetHistogram + kHistogramSize, 0);
-}
-
-MProtein::MProtein(const string& inID, MChain* inChain)
- : mID(inID)
- , mChainBreaks(0)
- , mIgnoredWaterMolecules(0)
- , mNrOfHBondsInParallelBridges(0)
- , mNrOfHBondsInAntiparallelBridges(0)
-{
- fill(mParallelBridgesPerLadderHistogram, mParallelBridgesPerLadderHistogram + kHistogramSize, 0);
- fill(mAntiparallelBridgesPerLadderHistogram, mAntiparallelBridgesPerLadderHistogram + kHistogramSize, 0);
- fill(mLaddersPerSheetHistogram, mLaddersPerSheetHistogram + kHistogramSize, 0);
-
- mChains.push_back(inChain);
+ : mResidueCount(0)
+ , mChainBreaks(0)
+ , mIgnoredWaterMolecules(0)
+ , mNrOfHBondsInParallelBridges(0)
+ , mNrOfHBondsInAntiparallelBridges(0)
+{
+ std::fill(mParallelBridgesPerLadderHistogram,
+ mParallelBridgesPerLadderHistogram + kHistogramSize, 0);
+ std::fill(mAntiparallelBridgesPerLadderHistogram,
+ mAntiparallelBridgesPerLadderHistogram + kHistogramSize, 0);
+ std::fill(mLaddersPerSheetHistogram,
+ mLaddersPerSheetHistogram + kHistogramSize, 0);
+}
+
+MProtein::MProtein(const std::string& inID, MChain* inChain)
+ : mID(inID)
+ , mChainBreaks(0)
+ , mIgnoredWaterMolecules(0)
+ , mNrOfHBondsInParallelBridges(0)
+ , mNrOfHBondsInAntiparallelBridges(0)
+ , mResidueCount(0)
+{
+ std::fill(mParallelBridgesPerLadderHistogram,
+ mParallelBridgesPerLadderHistogram + kHistogramSize, 0);
+ std::fill(mAntiparallelBridgesPerLadderHistogram,
+ mAntiparallelBridgesPerLadderHistogram + kHistogramSize, 0);
+ std::fill(mLaddersPerSheetHistogram,
+ mLaddersPerSheetHistogram + kHistogramSize, 0);
+
+ mChains.push_back(inChain);
}
MProtein::~MProtein()
{
- foreach (MChain* chain, mChains)
- delete chain;
-}
-
-void MProtein::ReadPDB(istream& is, bool cAlphaOnly)
-{
- mResidueCount = 0;
- mChainBreaks = 0;
- mIgnoredWaterMolecules = 0;
- mNrOfHBondsInParallelBridges = 0;
- mNrOfHBondsInAntiparallelBridges = 0;
-
- fill(mParallelBridgesPerLadderHistogram, mParallelBridgesPerLadderHistogram + kHistogramSize, 0);
- fill(mAntiparallelBridgesPerLadderHistogram, mAntiparallelBridgesPerLadderHistogram + kHistogramSize, 0);
- fill(mLaddersPerSheetHistogram, mLaddersPerSheetHistogram + kHistogramSize, 0);
-
- vector<pair<MResidueID,MResidueID> > ssbonds;
- set<char> terminatedChains;
-
- bool model = false;
- vector<MAtom> atoms;
- char firstAltLoc = 0;
- bool atomSeen = false;
-
- while (not is.eof())
- {
- string line;
- getline(is, line);
- if (line.empty() and is.eof())
- break;
-
- if (VERBOSE > 3)
- cerr << line << endl;
-
- if (ba::starts_with(line, "HEADER"))
- {
- mHeader = line;
- ba::trim(mHeader);
- if (line.length() >= 66)
- mID = line.substr(62, 4);
- else
- mID = "UNDF";
- continue;
- }
-
- if (ba::starts_with(line, "COMPND"))
- {
- ba::trim_right(line);
- mCompound = mCompound + line.substr(10);
- continue;
- }
-
- if (ba::starts_with(line, "SOURCE"))
- {
- ba::trim_right(line);
- mSource = mSource + line.substr(10);
- continue;
- }
-
- if (ba::starts_with(line, "AUTHOR"))
- {
- ba::trim_right(line);
- mAuthor = mAuthor + line.substr(10);
- continue;
- }
-
- if (ba::starts_with(line, "DBREF"))
- {
- ba::trim(line);
- mDbRef.push_back(line);
- continue;
- }
-
- // brain dead support for only the first model in the file (NMR)
- if (ba::starts_with(line, "MODEL"))
- {
- model = true;
- continue;
- }
-
- if (ba::starts_with(line, "ENDMDL") and model == true)
- break;
-
- if (ba::starts_with(line, "SSBOND"))
- {
- //SSBOND 1 CYS A 6 CYS A 11 1555 1555 2.03
- pair<MResidueID,MResidueID> ssbond;
- ssbond.first.chain = line[15];
- ssbond.first.seqNumber = boost::lexical_cast<uint16>(ba::trim_copy(line.substr(16, 5)));
- ssbond.first.insertionCode = line[21];
- ssbond.second.chain = line[29];
- ssbond.second.seqNumber = boost::lexical_cast<uint16>(ba::trim_copy(line.substr(30, 5)));
- ssbond.second.insertionCode = line[35];
-
- ssbonds.push_back(ssbond);
- continue;
- }
-
- // add ATOMs only if the chain isn't terminated
- if (terminatedChains.count(line[21]))
- continue;
-
- if (ba::starts_with(line, "TER "))
- {
- if (atoms.empty())
- {
- cerr << "no atoms read before TER record " << endl
- << line << endl;
- continue;
- }
-
- AddResidue(atoms);
- atoms.clear();
- firstAltLoc = 0;
- atomSeen = false;
-
- terminatedChains.insert(line[21]);
-
- continue;
- }
-
- if (ba::starts_with(line, "ATOM ") or ba::starts_with(line, "HETATM"))
- // 1 - 6 Record name "ATOM "
- {
- if (cAlphaOnly and line.substr(12, 4) != " CA ")
- continue;
-
- atomSeen = ba::starts_with(line, "ATOM ");
-
- MAtom atom = {};
-
- // 7 - 11 Integer serial Atom serial number.
- atom.mSerial = boost::lexical_cast<uint32>(ba::trim_copy(line.substr(6, 5)));
- // 13 - 16 Atom name Atom name.
- atom.mName = ba::trim_copy(line.substr(12, 4));
- // 17 Character altLoc Alternate location indicator.
- atom.mAltLoc = line[16];
- // 18 - 20 Residue name resName Residue name.
- atom.mResName = ba::trim_copy(line.substr(17, 4));
- // 22 Character chainID Chain identifier.
- atom.mChainID = line[21];
- // 23 - 26 Integer resSeq Residue sequence number.
- atom.mResSeq = boost::lexical_cast<int16>(ba::trim_copy(line.substr(22, 4)));
- // 27 AChar iCode Code for insertion of residues.
- atom.mICode = line.substr(26, 1);
-
- // 31 - 38 Real(8.3) x Orthogonal coordinates for X in Angstroms.
- atom.mLoc.mX = ParseFloat(line.substr(30, 8));
- // 39 - 46 Real(8.3) y Orthogonal coordinates for Y in Angstroms.
- atom.mLoc.mY = ParseFloat(line.substr(38, 8));
- // 47 - 54 Real(8.3) z Orthogonal coordinates for Z in Angstroms.
- atom.mLoc.mZ = ParseFloat(line.substr(46, 8));
- // 55 - 60 Real(6.2) occupancy Occupancy.
- atom.mOccupancy = ParseFloat(line.substr(54, 6));
- // 61 - 66 Real(6.2) tempFactor Temperature factor.
- atom.mTempFactor = ParseFloat(line.substr(60, 6));
- // 77 - 78 LString(2) element Element symbol, right-justified.
- if (line.length() > 76)
- atom.mElement = ba::trim_copy(line.substr(76, 3));
- // 79 - 80 LString(2) charge Charge on the atom.
- atom.mCharge = 0;
-
-// alternative test, check chain ID as well.
- if (not atoms.empty() and
- (atom.mChainID != atoms.back().mChainID or
- (atom.mResSeq != atoms.back().mResSeq or
- (atom.mResSeq == atoms.back().mResSeq and atom.mICode != atoms.back().mICode))))
-// if (not atoms.empty() and
-// (atom.mResSeq != atoms.back().mResSeq or (atom.mResSeq == atoms.back().mResSeq and atom.mICode != atoms.back().mICode)))
- {
- AddResidue(atoms);
- atoms.clear();
- firstAltLoc = 0;
- }
-
- try
- {
- atom.mType = MapElement(line.substr(76, 2));
- }
- catch (exception& e)
- {
- if (VERBOSE)
- cerr << e.what() << endl;
- atom.mType = kUnknownAtom;
- }
-
- if (atom.mType == kHydrogen)
- continue;
-
- if (atom.mAltLoc != ' ')
- {
- if (firstAltLoc == 0)
- firstAltLoc = atom.mAltLoc;
- if (atom.mAltLoc == firstAltLoc)
- atom.mAltLoc = 'A';
- }
-
- if (firstAltLoc != 0 and atom.mAltLoc != ' ' and atom.mAltLoc != firstAltLoc)
- {
- if (VERBOSE)
- cerr << "skipping alternate atom record " << atom.mResName << endl;
- continue;
- }
-
- atoms.push_back(atom);
- }
- }
-
- if (not atoms.empty()) // we have read atoms without a TER
- {
- if (atomSeen and VERBOSE)
- cerr << "ATOM records not terminated by TER record" << endl;
-
- AddResidue(atoms);
- }
-
- // map the sulfur bridges
- uint32 ssbondNr = 1;
- typedef pair<MResidueID,MResidueID> SSBond;
- foreach (const SSBond& ssbond, ssbonds)
- {
- try
- {
- MResidue* first = GetResidue(ssbond.first.chain, ssbond.first.seqNumber, ssbond.first.insertionCode);
- MResidue* second = GetResidue(ssbond.second.chain, ssbond.second.seqNumber, ssbond.second.insertionCode);
-
- if (first == second)
- throw mas_exception("first and second residue are the same");
-
- first->SetSSBridgeNr(ssbondNr);
- second->SetSSBridgeNr(ssbondNr);
-
- mSSBonds.push_back(make_pair(first, second));
-
- ++ssbondNr;
- }
- catch (exception& e)
- {
- if (VERBOSE)
- cerr << "invalid residue referenced in SSBOND record: " << e.what() << endl;
- }
- }
-
- mChains.erase(
- remove_if(mChains.begin(), mChains.end(), boost::bind(&MChain::Empty, _1)),
- mChains.end());
-
- if (VERBOSE and mIgnoredWaterMolecules)
- cerr << "Ignored " << mIgnoredWaterMolecules << " water molecules" << endl;
-
- if (mChains.empty())
- throw mas_exception("empty protein, or no valid complete residues");
-}
-
-void MProtein::ReadmmCIF(istream& is, bool cAlphaOnly)
-{
- mResidueCount = 0;
- mChainBreaks = 0;
- mIgnoredWaterMolecules = 0;
- mNrOfHBondsInParallelBridges = 0;
- mNrOfHBondsInAntiparallelBridges = 0;
-
- fill(mParallelBridgesPerLadderHistogram, mParallelBridgesPerLadderHistogram + kHistogramSize, 0);
- fill(mAntiparallelBridgesPerLadderHistogram, mAntiparallelBridgesPerLadderHistogram + kHistogramSize, 0);
- fill(mLaddersPerSheetHistogram, mLaddersPerSheetHistogram + kHistogramSize, 0);
-
- vector<pair<MResidueID,MResidueID> > ssbonds;
- set<char> terminatedChains;
-
- // Read the mmCIF data into a mmCIF file class
- // Using http://mmcif.rcsb.org/dictionaries/pdb-correspondence/pdb2mmcif-2010.html
- // as a reference.
-
- mmCIF::file data(is);
-
- // ID
- mID = data.get("_entry.id");
-
- // HEADER
- string keywords = data.get("_struct_keywords.text").substr(0, 39);
- mHeader = (boost::format("HEADER %s %s%s %s") %
- keywords % string(39 - keywords.length(), ' ') % data.get("_database_PDB_rev.date_original") % mID).str();
-
- // COMPND
- foreach (const mmCIF::row& desc, data["_entity"])
- {
- if (desc["type"] == "polymer")
- {
- string s = desc["pdbx_description"];
- ba::trim(s);
- mCompound = (mCompound.empty() ? mCompound : mCompound + "; ") + s;
- }
- }
-
- if (mCompound.empty())
- mCompound = data.get_joined("_struct.pdbx_descriptor", "; ");
-
- // SOURCE
- mSource = data.get_joined("_entity_src_nat.pdbx_organism_scientific", "; ");
- if (mSource.empty())
- mSource = data.get_joined("_entity_src_gen.pdbx_gene_src_scientific_name", "; ");
- if (mSource.empty())
- mSource = data.get_joined("_pdbx_entity_src_syn.organism_scientific", "; ");
-
- // AUTHOR
- mAuthor = data.get_joined("_audit_author.name", "; ");
-
- // ssbonds
-
- foreach (const mmCIF::row& ss, data["_struct_conn"])
- {
- if (ss["conn_type_id"] != "disulf")
- continue;
-
- pair<MResidueID,MResidueID> ssbond;
-
- ssbond.first.chain = ss["ptnr1_label_asym_id"];
- ssbond.first.seqNumber = boost::lexical_cast<uint16>(ss["ptnr1_label_seq_id"]);
- ssbond.first.insertionCode = ss["pdbx_ptnr1_PDB_ins_code"];
- if (ssbond.first.insertionCode == "?")
- ssbond.first.insertionCode.clear();
-
- ssbond.second.chain = ss["ptnr2_label_asym_id"];
- ssbond.second.seqNumber = boost::lexical_cast<uint16>(ss["ptnr2_label_seq_id"]);
- ssbond.second.insertionCode = ss["pdbx_ptnr2_PDB_ins_code"];
- if (ssbond.second.insertionCode == "?")
- ssbond.second.insertionCode.clear();
-
- ssbonds.push_back(ssbond);
- }
-
- vector<MAtom> atoms;
- char firstAltLoc = 0;
-
- // remap label_seq_id to auth_seq_id
- map<string, map<int,int> > seq_id_map;
-
- foreach (const mmCIF::row& atom, data["_atom_site"])
- {
- // skip over NMR models > 1
- if (atoi(atom["pdbx_PDB_model_num"].c_str()) > 1)
- continue;
-
- string label_seq_id = atom["label_seq_id"];
-
- MAtom a;
-
- a.mSerial = boost::lexical_cast<uint32>(atom["id"]);
- a.mName = atom["auth_atom_id"];
- a.mAltLoc = atom["label_alt_id"] == "." ? ' ' : atom["label_alt_id"][0];
- a.mResName = atom["auth_comp_id"];
- a.mChainID = atom["label_asym_id"];
- a.mResSeq = boost::lexical_cast<int16>(atom["auth_seq_id"]);
- a.mICode = atom["pdbx_PDB_ins_code"] == "?" ? "" : atom["pdbx_PDB_ins_code"];
-
- // map seq_id
- if (label_seq_id == "?" or label_seq_id == ".")
- seq_id_map[a.mChainID][a.mResSeq] = a.mResSeq;
- else
- seq_id_map[a.mChainID][boost::lexical_cast<int16>(label_seq_id)] = a.mResSeq;
-
- a.mLoc.mX = ParseFloat(atom["Cartn_x"]);
- a.mLoc.mY = ParseFloat(atom["Cartn_y"]);
- a.mLoc.mZ = ParseFloat(atom["Cartn_z"]);
-
- a.mOccupancy = ParseFloat(atom["occupancy"]);
- a.mTempFactor = ParseFloat(atom["B_iso_or_equiv"]);
- a.mElement = atom["type_symbol"];
- a.mCharge = atom["pdbx_formal_charge"] != "?" ? boost::lexical_cast<int>(atom["pdbx_formal_charge"]) : 0;
-
- try
- {
- a.mType = MapElement(a.mElement);
- }
- catch (exception& e)
- {
- if (VERBOSE)
- cerr << e.what() << endl;
- a.mType = kUnknownAtom;
- }
-
- if (a.mType == kHydrogen)
- continue;
-
- if (not atoms.empty() and
- (a.mChainID != atoms.back().mChainID or
- (a.mResSeq != atoms.back().mResSeq or
- (a.mResSeq == atoms.back().mResSeq and a.mICode != atoms.back().mICode))))
- {
- AddResidue(atoms);
- atoms.clear();
- firstAltLoc = 0;
- }
-
- if (a.mAltLoc != ' ')
- {
- if (firstAltLoc == 0)
- firstAltLoc = a.mAltLoc;
- if (a.mAltLoc == firstAltLoc)
- a.mAltLoc = 'A';
- }
-
- if (firstAltLoc != 0 and a.mAltLoc != ' ' and a.mAltLoc != firstAltLoc)
- {
- if (VERBOSE)
- cerr << "skipping alternate atom record " << a.mResName << endl;
- continue;
- }
-
- atoms.push_back(a);
- }
-
- if (not atoms.empty())
- AddResidue(atoms);
-
- // map the sulfur bridges
- uint32 ssbondNr = 1;
- typedef pair<MResidueID,MResidueID> SSBond;
- foreach (const SSBond& ssbond, ssbonds)
- {
- try
- {
- MResidue* first = GetResidue(ssbond.first.chain,
- seq_id_map[ssbond.first.chain][ssbond.first.seqNumber], ssbond.first.insertionCode);
- MResidue* second = GetResidue(ssbond.second.chain,
- seq_id_map[ssbond.second.chain][ssbond.second.seqNumber], ssbond.second.insertionCode);
-
- if (first == second)
- throw mas_exception("first and second residue are the same");
-
- first->SetSSBridgeNr(ssbondNr);
- second->SetSSBridgeNr(ssbondNr);
-
- mSSBonds.push_back(make_pair(first, second));
-
- ++ssbondNr;
- }
- catch (exception& e)
- {
- if (VERBOSE)
- cerr << "invalid residue referenced in SSBOND record: " << e.what() << endl;
- }
- }
-
- mChains.erase(
- remove_if(mChains.begin(), mChains.end(), boost::bind(&MChain::Empty, _1)),
- mChains.end());
-
- if (VERBOSE and mIgnoredWaterMolecules)
- cerr << "Ignored " << mIgnoredWaterMolecules << " water molecules" << endl;
-
- if (mChains.empty())
- throw mas_exception("empty protein, or no valid complete residues");
-}
-
-string MProtein::GetCompound() const
-{
- string result("COMPND ");
- result += mCompound;
- return result.substr(0, 80);
-}
-
-string MProtein::GetSource() const
-{
- string result("SOURCE ");
- result += mSource;
- return result.substr(0, 80);
-}
-
-string MProtein::GetAuthor() const
-{
- string result("AUTHOR ");
- result += mAuthor;
- return result.substr(0, 80);
+ foreach (MChain* chain, mChains)
+ delete chain;
+}
+
+void MProtein::ReadPDB(std::istream& is, bool cAlphaOnly)
+{
+ mResidueCount = 0;
+ mChainBreaks = 0;
+ mIgnoredWaterMolecules = 0;
+ mNrOfHBondsInParallelBridges = 0;
+ mNrOfHBondsInAntiparallelBridges = 0;
+
+ std::fill(mParallelBridgesPerLadderHistogram,
+ mParallelBridgesPerLadderHistogram + kHistogramSize, 0);
+ std::fill(mAntiparallelBridgesPerLadderHistogram,
+ mAntiparallelBridgesPerLadderHistogram + kHistogramSize, 0);
+ std::fill(mLaddersPerSheetHistogram,
+ mLaddersPerSheetHistogram + kHistogramSize, 0);
+
+ std::vector<std::pair<MResidueID,MResidueID>> ssbonds;
+ std::set<char> terminatedChains;
+
+ bool model = false;
+ std::vector<MAtom> atoms;
+ char firstAltLoc = 0;
+ bool atomSeen = false;
+ optional<MAtom> prevAtom;
+
+ while (not is.eof())
+ {
+ std::string line;
+ getline(is, line);
+ if (line.empty() and is.eof())
+ break;
+
+ if (VERBOSE > 3)
+ std::cerr << line << std::endl;
+
+ if (ba::starts_with(line, "HEADER"))
+ {
+ mHeader = line;
+ ba::trim(mHeader);
+ if (line.length() >= 66)
+ mID = line.substr(62, 4);
+ else
+ mID = "UNDF";
+ continue;
+ }
+
+ if (ba::starts_with(line, "COMPND"))
+ {
+ ba::trim_right(line);
+ if (line.length() >= 10)
+ {
+ mCompound = mCompound + line.substr(10);
+ }
+ continue;
+ }
+
+ if (ba::starts_with(line, "SOURCE"))
+ {
+ ba::trim_right(line);
+ if (line.length() >= 10)
+ {
+ mSource = mSource + line.substr(10);
+ }
+ continue;
+ }
+
+ if (ba::starts_with(line, "AUTHOR"))
+ {
+ ba::trim_right(line);
+ if (line.length() >= 10)
+ {
+ mAuthor = mAuthor + line.substr(10);
+ }
+ continue;
+ }
+
+ if (ba::starts_with(line, "DBREF"))
+ {
+ ba::trim(line);
+ mDbRef.push_back(line);
+ continue;
+ }
+
+ // brain dead support for only the first model in the file (NMR)
+ if (ba::starts_with(line, "MODEL"))
+ {
+ model = true;
+ continue;
+ }
+
+ if (ba::starts_with(line, "ENDMDL") and model == true)
+ break;
+
+ if (ba::starts_with(line, "SSBOND"))
+ {
+ //SSBOND 1 CYS A 6 CYS A 11 1555 1555 2.03
+ std::pair<MResidueID,MResidueID> ssbond;
+ ssbond.first.chain = line[15];
+ ssbond.first.seqNumber = boost::lexical_cast<uint16>(
+ ba::trim_copy(line.substr(16, 5)));
+ ssbond.first.insertionCode = line[21];
+ ssbond.second.chain = line[29];
+ ssbond.second.seqNumber = boost::lexical_cast<uint16>(
+ ba::trim_copy(line.substr(30, 5)));
+ ssbond.second.insertionCode = line[35];
+
+ ssbonds.push_back(ssbond);
+ continue;
+ }
+
+ // add ATOMs only if the chain isn't terminated
+ if (terminatedChains.count(line[21]))
+ continue;
+
+ if (ba::starts_with(line, "TER "))
+ {
+ if (atoms.empty())
+ {
+ std::cerr << "no atoms read before TER record " << std::endl
+ << line << std::endl;
+ continue;
+ }
+
+ AddResidue(atoms);
+ atoms.clear();
+ firstAltLoc = 0;
+ atomSeen = false;
+ prevAtom = none;
+
+ terminatedChains.insert(line[21]);
+
+ continue;
+ }
+
+ if (ba::starts_with(line, "ATOM ") or ba::starts_with(line, "HETATM"))
+ // 1 - 6 Record name "ATOM "
+ {
+ if (cAlphaOnly and line.substr(12, 4) != " CA ")
+ continue;
+
+ atomSeen = ba::starts_with(line, "ATOM ");
+
+ MAtom atom = {};
+
+ // 7 - 11 Integer serial Atom serial number.
+ atom.mSerial = boost::lexical_cast<uint32>(
+ ba::trim_copy(line.substr(6, 5)));
+ // 13 - 16 Atom name Atom name.
+ atom.mName = ba::trim_copy(line.substr(12, 4));
+ // 17 Character altLoc Alternate location indicator.
+ atom.mAltLoc = line[16];
+ // 18 - 20 Residue name resName Residue name.
+ atom.mResName = ba::trim_copy(line.substr(17, 4));
+ // 22 Character chainID Chain identifier.
+ atom.mChainID = line[21];
+ // 23 - 26 Integer resSeq Residue sequence number.
+ atom.mResSeq = boost::lexical_cast<int16>(
+ ba::trim_copy(line.substr(22, 4)));
+ // 27 AChar iCode Code for insertion of residues.
+ atom.mICode = line.substr(26, 1);
+
+ // 31 - 38 Real(8.3) x Orthogonal coordinates for X in Angstroms.
+ atom.mLoc.mX = ParseFloat(line.substr(30, 8));
+ // 39 - 46 Real(8.3) y Orthogonal coordinates for Y in Angstroms.
+ atom.mLoc.mY = ParseFloat(line.substr(38, 8));
+ // 47 - 54 Real(8.3) z Orthogonal coordinates for Z in Angstroms.
+ atom.mLoc.mZ = ParseFloat(line.substr(46, 8));
+
+ // 55 - 60 Real(6.2) occupancy Occupancy.
+ if (line.length() > 54)
+ {
+ atom.mOccupancy = ParseFloat(line.substr(54, 6));
+ }
+
+ // 61 - 66 Real(6.2) tempFactor Temperature factor.
+ if (line.length() > 60)
+ {
+ atom.mTempFactor = ParseFloat(line.substr(60, 6));
+ }
+
+ // 77 - 78 LString(2) element Element symbol, right-justified.
+ if (line.length() > 76)
+ atom.mElement = ba::trim_copy(line.substr(76, 3));
+ // 79 - 80 LString(2) charge Charge on the atom.
+ atom.mCharge = 0;
+
+// alternative test, check chain ID as well.
+ if (prevAtom
+ &&
+ (
+ atom.mChainID != prevAtom->mChainID
+ ||
+ atom.mResSeq != prevAtom->mResSeq
+ ||
+ atom.mICode != prevAtom->mICode
+ )
+ )
+// if (not atoms.empty() and
+// (atom.mResSeq != atoms.back().mResSeq or (atom.mResSeq == atoms.back().mResSeq and atom.mICode != atoms.back().mICode)))
+ {
+ if ( ! atoms.empty() )
+ {
+ AddResidue(atoms);
+ atoms.clear();
+ }
+ firstAltLoc = 0;
+ prevAtom = none;
+ }
+
+ try
+ {
+ atom.mType = MapElement(line.substr(76, 2));
+ }
+ catch (const std::exception& e)
+ {
+ if (VERBOSE)
+ std::cerr << e.what() << std::endl;
+ atom.mType = kUnknownAtom;
+ }
+
+ if (atom.mType == kHydrogen)
+ continue;
+
+ prevAtom = atom;
+
+ if (atom.mAltLoc != ' ')
+ {
+ if (firstAltLoc == 0)
+ firstAltLoc = atom.mAltLoc;
+ if (atom.mAltLoc == firstAltLoc)
+ atom.mAltLoc = 'A';
+ }
+
+ if (firstAltLoc != 0 and
+ atom.mAltLoc != ' ' and
+ atom.mAltLoc != firstAltLoc)
+ {
+ if (VERBOSE)
+ std::cerr << "skipping alternate atom record " << atom.mResName
+ << std::endl;
+ continue;
+ }
+
+ atoms.push_back(atom);
+ }
+ }
+
+ if (not atoms.empty()) // we have read atoms without a TER
+ {
+ if (atomSeen and VERBOSE)
+ std::cerr << "ATOM records not terminated by TER record" << std::endl;
+
+ AddResidue(atoms);
+ }
+
+ // map the sulfur bridges
+ uint32 ssbondNr = 1;
+ typedef std::pair<MResidueID,MResidueID> SSBond;
+ foreach (const SSBond& ssbond, ssbonds)
+ {
+ try
+ {
+ MResidue* first = GetResidue(ssbond.first.chain,
+ ssbond.first.seqNumber,
+ ssbond.first.insertionCode);
+ MResidue* second = GetResidue(ssbond.second.chain,
+ ssbond.second.seqNumber,
+ ssbond.second.insertionCode);
+
+ if (first == second)
+ throw mas_exception("first and second residue are the same");
+
+ first->SetSSBridgeNr(ssbondNr);
+ second->SetSSBridgeNr(ssbondNr);
+
+ mSSBonds.push_back(std::make_pair(first, second));
+
+ ++ssbondNr;
+ }
+ catch (const std::exception& e)
+ {
+ if (VERBOSE)
+ std::cerr << "invalid residue referenced in SSBOND record: "
+ << e.what() << std::endl;
+ }
+ }
+
+ mChains.erase(
+ remove_if(mChains.begin(), mChains.end(), boost::bind(&MChain::Empty, _1)),
+ mChains.end());
+
+ if (VERBOSE and mIgnoredWaterMolecules)
+ std::cerr << "Ignored " << mIgnoredWaterMolecules << " water molecules"
+ << std::endl;
+
+ if (mChains.empty())
+ throw mas_exception("empty protein, or no valid complete residues");
+}
+
+void MProtein::ReadmmCIF(std::istream& is, bool cAlphaOnly)
+{
+ mResidueCount = 0;
+ mChainBreaks = 0;
+ mIgnoredWaterMolecules = 0;
+ mNrOfHBondsInParallelBridges = 0;
+ mNrOfHBondsInAntiparallelBridges = 0;
+
+ std::fill(mParallelBridgesPerLadderHistogram,
+ mParallelBridgesPerLadderHistogram + kHistogramSize, 0);
+ std::fill(mAntiparallelBridgesPerLadderHistogram,
+ mAntiparallelBridgesPerLadderHistogram + kHistogramSize, 0);
+ std::fill(mLaddersPerSheetHistogram,
+ mLaddersPerSheetHistogram + kHistogramSize, 0);
+
+ std::vector<std::pair<MResidueID,MResidueID>> ssbonds;
+ std::set<char> terminatedChains;
+
+ // Read the mmCIF data into a mmCIF file class
+ // Using http://mmcif.rcsb.org/dictionaries/pdb-correspondence/pdb2mmcif-2010.html
+ // as a reference.
+
+ mmCIF::file data(is);
+
+ // ID
+ mID = data.get("_entry.id");
+
+ // HEADER
+ std::string keywords = data.get("_struct_keywords.text").substr(0, 39);
+ mHeader = (boost::format("HEADER %s %s%s %s") %
+ keywords % std::string(39 - keywords.length(), ' ') % data.get("_database_PDB_rev.date_original") % mID).str();
+
+ // COMPND
+ foreach (const mmCIF::row& desc, data["_entity"])
+ {
+ if (desc["type"] == "polymer")
+ {
+ std::string s = desc["pdbx_description"];
+ ba::trim(s);
+ mCompound = (mCompound.empty() ? mCompound : mCompound + "; ") + s;
+ }
+ }
+
+ if (mCompound.empty())
+ mCompound = data.get_joined("_struct.pdbx_descriptor", "; ");
+
+ // SOURCE
+ mSource = data.get_joined("_entity_src_nat.pdbx_organism_scientific", "; ");
+ if (mSource.empty())
+ mSource = data.get_joined("_entity_src_gen.pdbx_gene_src_scientific_name", "; ");
+ if (mSource.empty())
+ mSource = data.get_joined("_pdbx_entity_src_syn.organism_scientific", "; ");
+
+ // AUTHOR
+ mAuthor = data.get_joined("_audit_author.name", "; ");
+
+ // ssbonds
+
+ foreach (const mmCIF::row& ss, data["_struct_conn"])
+ {
+ if (ss["conn_type_id"] != "disulf")
+ continue;
+
+ std::pair<MResidueID,MResidueID> ssbond;
+
+ if (ss["ptnr1_label_seq_id"].compare(".") == 0 || ss["ptnr1_label_seq_id"].compare("?") == 0)
+ continue;
+
+ ssbond.first.chain = ss["ptnr1_label_asym_id"];
+ ssbond.first.seqNumber = boost::lexical_cast<uint16>(
+ ss["ptnr1_label_seq_id"]);
+ ssbond.first.insertionCode = ss["pdbx_ptnr1_PDB_ins_code"];
+ if (ssbond.first.insertionCode == "?")
+ ssbond.first.insertionCode.clear();
+
+ if (ss["ptnr2_label_seq_id"].compare(".") == 0 || ss["ptnr2_label_seq_id"].compare("?") == 0)
+ continue;
+
+ ssbond.second.chain = ss["ptnr2_label_asym_id"];
+ ssbond.second.seqNumber = boost::lexical_cast<uint16>(
+ ss["ptnr2_label_seq_id"]);
+ ssbond.second.insertionCode = ss["pdbx_ptnr2_PDB_ins_code"];
+ if (ssbond.second.insertionCode == "?")
+ ssbond.second.insertionCode.clear();
+
+ ssbonds.push_back(ssbond);
+ }
+
+ std::vector<MAtom> atoms;
+ char firstAltLoc = 0;
+
+ // remap label_seq_id to auth_seq_id
+ std::map<std::string, std::map<int,int> > seq_id_map;
+
+ foreach (const mmCIF::row& atom, data["_atom_site"])
+ {
+ // skip over NMR models > 1
+ if (atoi(atom["pdbx_PDB_model_num"].c_str()) > 1)
+ continue;
+
+ std::string label_seq_id = atom["label_seq_id"];
+
+ MAtom a;
+
+ a.mSerial = boost::lexical_cast<uint32>(atom["id"]);
+ a.mName = atom["auth_atom_id"];
+ a.mAltLoc = atom["label_alt_id"] == "." ? ' ' : atom["label_alt_id"][0];
+ a.mResName = atom["auth_comp_id"];
+ a.mChainID = atom["label_asym_id"];
+ a.mResSeq = boost::lexical_cast<uint32>(atom["auth_seq_id"]);
+ a.mICode = atom["pdbx_PDB_ins_code"] == "?" ? "" : atom["pdbx_PDB_ins_code"];
+
+ // map seq_id
+ if (label_seq_id == "?" or label_seq_id == ".")
+ seq_id_map[a.mChainID][a.mResSeq] = a.mResSeq;
+ else
+ seq_id_map[a.mChainID][boost::lexical_cast<int16>(label_seq_id)] = a.mResSeq;
+
+ a.mLoc.mX = ParseFloat(atom["Cartn_x"]);
+ a.mLoc.mY = ParseFloat(atom["Cartn_y"]);
+ a.mLoc.mZ = ParseFloat(atom["Cartn_z"]);
+
+ a.mOccupancy = ParseFloat(atom["occupancy"]);
+ a.mTempFactor = ParseFloat(atom["B_iso_or_equiv"]);
+ a.mElement = atom["type_symbol"];
+ a.mCharge = atom["pdbx_formal_charge"] != "?" ? boost::lexical_cast<int>(
+ atom["pdbx_formal_charge"]) : 0;
+
+ try
+ {
+ a.mType = MapElement(a.mElement);
+ }
+ catch (const std::exception& e)
+ {
+ if (VERBOSE)
+ std::cerr << e.what() << std::endl;
+ a.mType = kUnknownAtom;
+ }
+
+ if (a.mType == kHydrogen)
+ continue;
+
+ if (not atoms.empty() and
+ (a.mChainID != atoms.back().mChainID or
+ (a.mResSeq != atoms.back().mResSeq or
+ (a.mResSeq == atoms.back().mResSeq and
+ a.mICode != atoms.back().mICode))))
+ {
+ AddResidue(atoms);
+ atoms.clear();
+ firstAltLoc = 0;
+ }
+
+ if (a.mAltLoc != ' ')
+ {
+ if (firstAltLoc == 0)
+ firstAltLoc = a.mAltLoc;
+ if (a.mAltLoc == firstAltLoc)
+ a.mAltLoc = 'A';
+ }
+
+ if (firstAltLoc != 0 and a.mAltLoc != ' ' and a.mAltLoc != firstAltLoc)
+ {
+ if (VERBOSE)
+ std::cerr << "skipping alternate atom record " << a.mResName
+ << std::endl;
+ continue;
+ }
+
+ atoms.push_back(a);
+ }
+
+ if (not atoms.empty())
+ AddResidue(atoms);
+
+ // map the sulfur bridges
+ uint32 ssbondNr = 1;
+ typedef std::pair<MResidueID,MResidueID> SSBond;
+ foreach (const SSBond& ssbond, ssbonds)
+ {
+ try
+ {
+ MResidue* first = GetResidue(
+ ssbond.first.chain,
+ seq_id_map[ssbond.first.chain][ssbond.first.seqNumber],
+ ssbond.first.insertionCode);
+ MResidue* second = GetResidue(
+ ssbond.second.chain,
+ seq_id_map[ssbond.second.chain][ssbond.second.seqNumber],
+ ssbond.second.insertionCode);
+
+ if (first == second)
+ throw mas_exception("first and second residue are the same");
+
+ first->SetSSBridgeNr(ssbondNr);
+ second->SetSSBridgeNr(ssbondNr);
+
+ mSSBonds.push_back(std::make_pair(first, second));
+
+ ++ssbondNr;
+ }
+ catch (const std::exception& e)
+ {
+ if (VERBOSE)
+ std::cerr << "invalid residue referenced in SSBOND record: "
+ << e.what() << std::endl;
+ }
+ }
+
+ mChains.erase(remove_if(mChains.begin(), mChains.end(),
+ boost::bind(&MChain::Empty, _1)),
+ mChains.end());
+
+ if (VERBOSE and mIgnoredWaterMolecules)
+ std::cerr << "Ignored " << mIgnoredWaterMolecules << " water molecules"
+ << std::endl;
+
+ if (mChains.empty())
+ throw mas_exception("empty protein, or no valid complete residues");
+}
+
+std::string MProtein::GetCompound() const
+{
+ std::string result("COMPND ");
+ result += mCompound;
+ return result.substr(0, 80);
+}
+
+std::string MProtein::GetSource() const
+{
+ std::string result("SOURCE ");
+ result += mSource;
+ return result.substr(0, 80);
+}
+
+std::string MProtein::GetAuthor() const
+{
+ std::string result("AUTHOR ");
+ result += mAuthor;
+ return result.substr(0, 80);
}
void MProtein::GetStatistics(uint32& outNrOfResidues, uint32& outNrOfChains,
- uint32& outNrOfSSBridges, uint32& outNrOfIntraChainSSBridges,
- uint32& outNrOfHBonds, uint32 outNrOfHBondsPerDistance[11]) const
-{
- outNrOfResidues = mResidueCount;
- outNrOfChains = mChains.size() + mChainBreaks;
- outNrOfSSBridges = mSSBonds.size();
-
- outNrOfIntraChainSSBridges = 0;
- for (vector<pair<MResidue*,MResidue*> >::const_iterator ri = mSSBonds.begin(); ri != mSSBonds.end(); ++ri)
- {
- if (ri->first->GetChainID() == ri->second->GetChainID() and
- (MResidue::NoChainBreak(ri->first, ri->second) or MResidue::NoChainBreak(ri->first, ri->second)))
- {
- ++outNrOfIntraChainSSBridges;
- }
- }
-
- outNrOfHBonds = 0;
- foreach (const MChain* chain, mChains)
- {
- foreach (const MResidue* r, chain->GetResidues())
- {
- const HBond* donor = r->Donor();
-
- for (uint32 i = 0; i < 2; ++i)
- {
- if (donor[i].residue != nullptr and donor[i].energy < kMaxHBondEnergy)
- {
- ++outNrOfHBonds;
- int32 k = donor[i].residue->GetNumber() - r->GetNumber();
- if (k >= -5 and k <= 5)
- outNrOfHBondsPerDistance[k + 5] += 1;
- }
- }
- }
- }
+ uint32& outNrOfSSBridges,
+ uint32& outNrOfIntraChainSSBridges,
+ uint32& outNrOfHBonds,
+ uint32 outNrOfHBondsPerDistance[11]) const
+{
+ outNrOfResidues = mResidueCount;
+ outNrOfChains = mChains.size() + mChainBreaks;
+ outNrOfSSBridges = mSSBonds.size();
+
+ outNrOfIntraChainSSBridges = 0;
+ for (std::vector<std::pair<MResidue*,MResidue*> >::const_iterator ri = mSSBonds.begin(); ri != mSSBonds.end(); ++ri)
+ {
+ if (ri->first->GetChainID() == ri->second->GetChainID() and
+ (MResidue::NoChainBreak(ri->first, ri->second) or
+ MResidue::NoChainBreak(ri->first, ri->second)))
+ {
+ ++outNrOfIntraChainSSBridges;
+ }
+ }
+
+ outNrOfHBonds = 0;
+ foreach (const MChain* chain, mChains)
+ {
+ foreach (const MResidue* r, chain->GetResidues())
+ {
+ const HBond* donor = r->Donor();
+
+ for (uint32 i = 0; i < 2; ++i)
+ {
+ if (donor[i].residue != nullptr and donor[i].energy < kMaxHBondEnergy)
+ {
+ ++outNrOfHBonds;
+ int32 k = donor[i].residue->GetNumber() - r->GetNumber();
+ if (k >= -5 and k <= 5)
+ outNrOfHBondsPerDistance[k + 5] += 1;
+ }
+ }
+ }
+ }
}
void MProtein::GetResiduesPerAlphaHelixHistogram(uint32 outHistogram[30]) const
{
- fill(outHistogram, outHistogram + 30, 0);
-
- foreach (const MChain* chain, mChains)
- {
- uint32 helixLength = 0;
-
- foreach (const MResidue* r, chain->GetResidues())
- {
- if (r->GetSecondaryStructure() == alphahelix)
- ++helixLength;
- else if (helixLength > 0)
- {
- if (helixLength > kHistogramSize)
- helixLength = kHistogramSize;
-
- outHistogram[helixLength - 1] += 1;
- helixLength = 0;
- }
- }
- }
+ std::fill(outHistogram, outHistogram + 30, 0);
+
+ foreach (const MChain* chain, mChains)
+ {
+ uint32 helixLength = 0;
+
+ foreach (const MResidue* r, chain->GetResidues())
+ {
+ if (r->GetSecondaryStructure() == alphahelix)
+ ++helixLength;
+ else if (helixLength > 0)
+ {
+ if (helixLength > kHistogramSize)
+ helixLength = kHistogramSize;
+
+ outHistogram[helixLength - 1] += 1;
+ helixLength = 0;
+ }
+ }
+ }
}
void MProtein::GetParallelBridgesPerLadderHistogram(uint32 outHistogram[30]) const
{
- copy(mParallelBridgesPerLadderHistogram, mParallelBridgesPerLadderHistogram + kHistogramSize, outHistogram);
+ std::copy(mParallelBridgesPerLadderHistogram,
+ mParallelBridgesPerLadderHistogram + kHistogramSize, outHistogram);
}
void MProtein::GetAntiparallelBridgesPerLadderHistogram(uint32 outHistogram[30]) const
{
- copy(mAntiparallelBridgesPerLadderHistogram, mAntiparallelBridgesPerLadderHistogram + kHistogramSize, outHistogram);
+ std::copy(mAntiparallelBridgesPerLadderHistogram,
+ mAntiparallelBridgesPerLadderHistogram + kHistogramSize,
+ outHistogram);
}
void MProtein::GetLaddersPerSheetHistogram(uint32 outHistogram[30]) const
{
- copy(mLaddersPerSheetHistogram, mLaddersPerSheetHistogram + kHistogramSize, outHistogram);
-}
-
-void MProtein::AddResidue(const vector<MAtom>& inAtoms)
-{
- bool hasN = false, hasCA = false, hasC = false, hasO = false;
- foreach (const MAtom& atom, inAtoms)
- {
- if (not hasN and atom.GetName() == "N")
- hasN = true;
- if (not hasCA and atom.GetName() == "CA")
- hasCA = true;
- if (not hasC and atom.GetName() == "C")
- hasC = true;
- if (not hasO and atom.GetName() == "O")
- hasO = true;
- }
-
- if (hasN and hasCA and hasC and hasO)
- {
- MChain& chain = GetChain(inAtoms.front().mChainID);
- vector<MResidue*>& residues(chain.GetResidues());
-
- MResidue* prev = nullptr;
- if (not residues.empty())
- prev = residues.back();
-
- uint32 resNumber = mResidueCount + mChains.size() + mChainBreaks;
- MResidue* r = new MResidue(resNumber, prev, inAtoms);
- if (prev != nullptr and not prev->ValidDistance(*r)) // check for chain breaks
- {
- if (VERBOSE)
- cerr << boost::format("The distance between residue %1% and %2% is larger than the maximum peptide bond length")
- % prev->GetNumber() % resNumber << endl;
-
- ++mChainBreaks;
- r->SetNumber(resNumber + 1);
- }
-
- residues.push_back(r);
- ++mResidueCount;
- }
- else if (string(inAtoms.front().mResName) == "HOH")
- ++mIgnoredWaterMolecules;
- else if (VERBOSE)
- cerr << "ignoring incomplete residue " << inAtoms.front().mResName << " (" << inAtoms.front().mResSeq << ')' << endl;
-}
-
-const MChain& MProtein::GetChain(const string& inChainID) const
-{
- for (uint32 i = 0; i < mChains.size(); ++i)
- if (mChains[i]->GetChainID() == inChainID)
- return *mChains[i];
-
- throw mas_exception("Chain not found");
- return *mChains.front();
-}
-
-MChain& MProtein::GetChain(const string& inChainID)
-{
- for (uint32 i = 0; i < mChains.size(); ++i)
- if (mChains[i]->GetChainID() == inChainID)
- return *mChains[i];
-
- mChains.push_back(new MChain(inChainID));
- return *mChains.back();
-}
-
-void MProtein::GetPoints(vector<MPoint>& outPoints) const
-{
- foreach (const MChain* chain, mChains)
- {
- foreach (const MResidue* r, chain->GetResidues())
- r->GetPoints(outPoints);
- }
+ std::copy(mLaddersPerSheetHistogram,
+ mLaddersPerSheetHistogram + kHistogramSize, outHistogram);
+}
+
+void MProtein::AddResidue(const std::vector<MAtom>& inAtoms)
+{
+ bool hasN = false, hasCA = false, hasC = false, hasO = false;
+ foreach (const MAtom& atom, inAtoms)
+ {
+ if (not hasN and atom.GetName() == "N")
+ hasN = true;
+ if (not hasCA and atom.GetName() == "CA")
+ hasCA = true;
+ if (not hasC and atom.GetName() == "C")
+ hasC = true;
+ if (not hasO and atom.GetName() == "O")
+ hasO = true;
+ }
+
+ if (hasN and hasCA and hasC and hasO)
+ {
+ MChain& chain = GetChain(inAtoms.front().mChainID);
+ std::vector<MResidue*>& residues(chain.GetResidues());
+
+ MResidue* prev = nullptr;
+ if (not residues.empty())
+ prev = residues.back();
+
+ int32 resNumber = mResidueCount + mChains.size() + mChainBreaks;
+ MResidue* r = new MResidue(resNumber, prev, inAtoms);
+ // check for chain breaks
+ if (prev != nullptr and not prev->ValidDistance(*r))
+ {
+ if (VERBOSE)
+ std::cerr << boost::format("The distance between residue %1% and %2% is larger than the maximum peptide bond length")
+ % prev->GetNumber() % resNumber << std::endl;
+
+ ++mChainBreaks;
+ r->SetNumber(resNumber + 1);
+ }
+
+ residues.push_back(r);
+ ++mResidueCount;
+ }
+ else if (std::string(inAtoms.front().mResName) == "HOH")
+ ++mIgnoredWaterMolecules;
+ else if (VERBOSE)
+ std::cerr << "ignoring incomplete residue " << inAtoms.front().mResName
+ << " (" << inAtoms.front().mResSeq << ')' << std::endl;
+}
+
+const MChain& MProtein::GetChain(const std::string& inChainID) const
+{
+ for (uint32 i = 0; i < mChains.size(); ++i)
+ if (mChains[i]->GetChainID() == inChainID)
+ return *mChains[i];
+
+ throw mas_exception("Chain not found");
+ return *mChains.front();
+}
+
+MChain& MProtein::GetChain(const std::string& inChainID)
+{
+ for (uint32 i = 0; i < mChains.size(); ++i)
+ if (mChains[i]->GetChainID() == inChainID)
+ return *mChains[i];
+
+ mChains.push_back(new MChain(inChainID));
+ return *mChains.back();
+}
+
+void MProtein::GetPoints(std::vector<MPoint>& outPoints) const
+{
+ foreach (const MChain* chain, mChains)
+ {
+ foreach (const MResidue* r, chain->GetResidues())
+ r->GetPoints(outPoints);
+ }
}
void MProtein::Translate(const MPoint& inTranslation)
{
- foreach (MChain* chain, mChains)
- chain->Translate(inTranslation);
+ foreach (MChain* chain, mChains)
+ chain->Translate(inTranslation);
}
void MProtein::Rotate(const MQuaternion& inRotation)
{
- foreach (MChain* chain, mChains)
- chain->Rotate(inRotation);
+ foreach (MChain* chain, mChains)
+ chain->Rotate(inRotation);
}
void MProtein::CalculateSecondaryStructure(bool inPreferPiHelices)
{
- vector<MResidue*> residues;
- residues.reserve(mResidueCount);
- foreach (const MChain* chain, mChains)
- residues.insert(residues.end(), chain->GetResidues().begin(), chain->GetResidues().end());
-
- if (VERBOSE)
- cerr << "using " << residues.size() << " residues" << endl;
-
- boost::thread t(boost::bind(&MProtein::CalculateAccessibilities, this, boost::ref(residues)));
-
- CalculateHBondEnergies(residues);
- CalculateBetaSheets(residues);
- CalculateAlphaHelices(residues, inPreferPiHelices);
-
- t.join();
-}
-
-void MProtein::CalculateHBondEnergies(const vector<MResidue*>& inResidues)
-{
- if (VERBOSE)
- cerr << "Calculate H-bond energies" << endl;
-
- // Calculate the HBond energies
- for (uint32 i = 0; i + 1 < inResidues.size(); ++i)
- {
- MResidue* ri = inResidues[i];
-
- for (uint32 j = i + 1; j < inResidues.size(); ++j)
- {
- MResidue* rj = inResidues[j];
-
- if (Distance(ri->GetCAlpha(), rj->GetCAlpha()) < kMinimalCADistance)
- {
- MResidue::CalculateHBondEnergy(*ri, *rj);
- if (j != i + 1)
- MResidue::CalculateHBondEnergy(*rj, *ri);
- }
- }
- }
-}
-
-// TODO: improve alpha helix calculation by better recognizing pi-helices
-void MProtein::CalculateAlphaHelices(const vector<MResidue*>& inResidues, bool inPreferPiHelices)
-{
- if (VERBOSE)
- cerr << "Calculate alhpa helices" << endl;
-
- // Helix and Turn
- foreach (const MChain* chain, mChains)
- {
- for (uint32 stride = 3; stride <= 5; ++stride)
- {
- vector<MResidue*> res(chain->GetResidues());
- if (res.size() < stride)
- continue;
-
- for (uint32 i = 0; i + stride < res.size(); ++i)
- {
- if (MResidue::TestBond(res[i + stride], res[i]) and MResidue::NoChainBreak(res[i], res[i + stride]))
- {
- res[i + stride]->SetHelixFlag(stride, helixEnd);
- for (uint32 j = i + 1; j < i + stride; ++j)
- {
- if (res[j]->GetHelixFlag(stride) == helixNone)
- res[j]->SetHelixFlag(stride, helixMiddle);
- }
-
- if (res[i]->GetHelixFlag(stride) == helixEnd)
- res[i]->SetHelixFlag(stride, helixStartAndEnd);
- else
- res[i]->SetHelixFlag(stride, helixStart);
- }
- }
- }
- }
-
- foreach (MResidue* r, inResidues)
- {
- double kappa = r->Kappa();
- r->SetBend(kappa != 360 and kappa > 70);
- }
-
- for (uint32 i = 1; i + 4 < inResidues.size(); ++i)
- {
- if (inResidues[i]->IsHelixStart(4) and inResidues[i - 1]->IsHelixStart(4))
- {
- for (uint32 j = i; j <= i + 3; ++j)
- inResidues[j]->SetSecondaryStructure(alphahelix);
- }
- }
-
- for (uint32 i = 1; i + 3 < inResidues.size(); ++i)
- {
- if (inResidues[i]->IsHelixStart(3) and inResidues[i - 1]->IsHelixStart(3))
- {
- bool empty = true;
- for (uint32 j = i; empty and j <= i + 2; ++j)
- empty = inResidues[j]->GetSecondaryStructure() == loop or inResidues[j]->GetSecondaryStructure() == helix_3;
- if (empty)
- {
- for (uint32 j = i; j <= i + 2; ++j)
- inResidues[j]->SetSecondaryStructure(helix_3);
- }
- }
- }
-
- for (uint32 i = 1; i + 5 < inResidues.size(); ++i)
- {
- if (inResidues[i]->IsHelixStart(5) and inResidues[i - 1]->IsHelixStart(5))
- {
- bool empty = true;
- for (uint32 j = i; empty and j <= i + 4; ++j)
- empty = inResidues[j]->GetSecondaryStructure() == loop or inResidues[j]->GetSecondaryStructure() == helix_5 or
- (inPreferPiHelices and inResidues[j]->GetSecondaryStructure() == alphahelix);
- if (empty)
- {
- for (uint32 j = i; j <= i + 4; ++j)
- inResidues[j]->SetSecondaryStructure(helix_5);
- }
- }
- }
-
- for (uint32 i = 1; i + 1 < inResidues.size(); ++i)
- {
- if (inResidues[i]->GetSecondaryStructure() == loop)
- {
- bool isTurn = false;
- for (uint32 stride = 3; stride <= 5 and not isTurn; ++stride)
- {
- for (uint32 k = 1; k < stride and not isTurn; ++k)
- isTurn = (i >= k) and inResidues[i - k]->IsHelixStart(stride);
- }
-
- if (isTurn)
- inResidues[i]->SetSecondaryStructure(turn);
- else if (inResidues[i]->IsBend())
- inResidues[i]->SetSecondaryStructure(bend);
- }
- }
-}
-
-void MProtein::CalculateBetaSheets(const vector<MResidue*>& inResidues)
-{
- if (VERBOSE)
- cerr << "Calculate beta sheets" << endl;
-
- // Calculate Bridges
- vector<MBridge> bridges;
- if (inResidues.size() > 4)
- {
- for (uint32 i = 1; i + 4 < inResidues.size(); ++i)
- {
- MResidue* ri = inResidues[i];
-
- for (uint32 j = i + 3; j + 1 < inResidues.size(); ++j)
- {
- MResidue* rj = inResidues[j];
-
- MBridgeType type = ri->TestBridge(rj);
- if (type == btNoBridge)
- continue;
-
- bool found = false;
- foreach (MBridge& bridge, bridges)
- {
- if (type != bridge.type or i != bridge.i.back() + 1)
- continue;
-
- if (type == btParallel and bridge.j.back() + 1 == j)
- {
- bridge.i.push_back(i);
- bridge.j.push_back(j);
- found = true;
- break;
- }
-
- if (type == btAntiParallel and bridge.j.front() - 1 == j)
- {
- bridge.i.push_back(i);
- bridge.j.push_front(j);
- found = true;
- break;
- }
- }
-
- if (not found)
- {
- MBridge bridge = {};
-
- bridge.type = type;
- bridge.i.push_back(i);
- bridge.chainI = ri->GetChainID();
- bridge.j.push_back(j);
- bridge.chainJ = rj->GetChainID();
-
- bridges.push_back(bridge);
- }
- }
- }
- }
-
- // extend ladders
- sort(bridges.begin(), bridges.end());
-
- for (uint32 i = 0; i < bridges.size(); ++i)
- {
- for (uint32 j = i + 1; j < bridges.size(); ++j)
- {
- uint32 ibi = bridges[i].i.front();
- uint32 iei = bridges[i].i.back();
- uint32 jbi = bridges[i].j.front();
- uint32 jei = bridges[i].j.back();
- uint32 ibj = bridges[j].i.front();
- uint32 iej = bridges[j].i.back();
- uint32 jbj = bridges[j].j.front();
- uint32 jej = bridges[j].j.back();
-
- if (bridges[i].type != bridges[j].type or
- MResidue::NoChainBreak(inResidues[min(ibi, ibj)], inResidues[max(iei, iej)]) == false or
- MResidue::NoChainBreak(inResidues[min(jbi, jbj)], inResidues[max(jei, jej)]) == false or
- ibj - iei >= 6 or
- (iei >= ibj and ibi <= iej))
- {
- continue;
- }
-
- bool bulge;
- if (bridges[i].type == btParallel)
- bulge = ((jbj - jei < 6 and ibj - iei < 3) or (jbj - jei < 3));
- else
- bulge = ((jbi - jej < 6 and ibj - iei < 3) or (jbi - jej < 3));
-
- if (bulge)
- {
- bridges[i].i.insert(bridges[i].i.end(), bridges[j].i.begin(), bridges[j].i.end());
- if (bridges[i].type == btParallel)
- bridges[i].j.insert(bridges[i].j.end(), bridges[j].j.begin(), bridges[j].j.end());
- else
- bridges[i].j.insert(bridges[i].j.begin(), bridges[j].j.begin(), bridges[j].j.end());
- bridges.erase(bridges.begin() + j);
- --j;
- }
- }
- }
-
- // Sheet
- set<MBridge*> ladderset;
- foreach (MBridge& bridge, bridges)
- {
- ladderset.insert(&bridge);
-
- uint32 n = bridge.i.size();
- if (n > kHistogramSize)
- n = kHistogramSize;
-
- if (bridge.type == btParallel)
- mParallelBridgesPerLadderHistogram[n - 1] += 1;
- else
- mAntiparallelBridgesPerLadderHistogram[n - 1] += 1;
- }
-
- uint32 sheet = 1, ladder = 0;
- while (not ladderset.empty())
- {
- set<MBridge*> sheetset;
- sheetset.insert(*ladderset.begin());
- ladderset.erase(ladderset.begin());
-
- bool done = false;
- while (not done)
- {
- done = true;
- foreach (MBridge* a, sheetset)
- {
- foreach (MBridge* b, ladderset)
- {
- if (Linked(*a, *b))
- {
- sheetset.insert(b);
- ladderset.erase(b);
- done = false;
- break;
- }
- }
- if (not done)
- break;
- }
- }
-
- foreach (MBridge* bridge, sheetset)
- {
- bridge->ladder = ladder;
- bridge->sheet = sheet;
- bridge->link = sheetset;
-
- ++ladder;
- }
-
- uint32 nrOfLaddersPerSheet = sheetset.size();
- if (nrOfLaddersPerSheet > kHistogramSize)
- nrOfLaddersPerSheet = kHistogramSize;
- if (nrOfLaddersPerSheet == 1 and (*sheetset.begin())->i.size() > 1)
- mLaddersPerSheetHistogram[0] += 1;
- else if (nrOfLaddersPerSheet > 1)
- mLaddersPerSheetHistogram[nrOfLaddersPerSheet - 1] += 1;
-
- ++sheet;
- }
-
- foreach (MBridge& bridge, bridges)
- {
- // find out if any of the i and j set members already have
- // a bridge assigned, if so, we're assigning bridge 2
-
- uint32 betai = 0, betaj = 0;
-
- foreach (uint32 l, bridge.i)
- {
- if (inResidues[l]->GetBetaPartner(0).residue != nullptr)
- {
- betai = 1;
- break;
- }
- }
-
- foreach (uint32 l, bridge.j)
- {
- if (inResidues[l]->GetBetaPartner(0).residue != nullptr)
- {
- betaj = 1;
- break;
- }
- }
-
- MSecondaryStructure ss = betabridge;
- if (bridge.i.size() > 1)
- ss = strand;
-
- if (bridge.type == btParallel)
- {
- mNrOfHBondsInParallelBridges += bridge.i.back() - bridge.i.front() + 2;
-
- deque<uint32>::iterator j = bridge.j.begin();
- foreach (uint32 i, bridge.i)
- inResidues[i]->SetBetaPartner(betai, inResidues[*j++], bridge.ladder, true);
-
- j = bridge.i.begin();
- foreach (uint32 i, bridge.j)
- inResidues[i]->SetBetaPartner(betaj, inResidues[*j++], bridge.ladder, true);
- }
- else
- {
- mNrOfHBondsInAntiparallelBridges += bridge.i.back() - bridge.i.front() + 2;
-
- deque<uint32>::reverse_iterator j = bridge.j.rbegin();
- foreach (uint32 i, bridge.i)
- inResidues[i]->SetBetaPartner(betai, inResidues[*j++], bridge.ladder, false);
-
- j = bridge.i.rbegin();
- foreach (uint32 i, bridge.j)
- inResidues[i]->SetBetaPartner(betaj, inResidues[*j++], bridge.ladder, false);
- }
-
- for (uint32 i = bridge.i.front(); i <= bridge.i.back(); ++i)
- {
- if (inResidues[i]->GetSecondaryStructure() != strand)
- inResidues[i]->SetSecondaryStructure(ss);
- inResidues[i]->SetSheet(bridge.sheet);
- }
-
- for (uint32 i = bridge.j.front(); i <= bridge.j.back(); ++i)
- {
- if (inResidues[i]->GetSecondaryStructure() != strand)
- inResidues[i]->SetSecondaryStructure(ss);
- inResidues[i]->SetSheet(bridge.sheet);
- }
- }
-}
-
-void MProtein::CalculateAccessibilities(const vector<MResidue*>& inResidues)
-{
- if (VERBOSE)
- cerr << "Calculate accessibilities" << endl;
-
- uint32 nr_of_threads = boost::thread::hardware_concurrency();
- if (nr_of_threads <= 1)
- {
- foreach (MResidue* residue, inResidues)
- residue->CalculateSurface(inResidues);
- }
- else
- {
- MResidueQueue queue;
-
- boost::thread_group t;
-
- for (uint32 ti = 0; ti < nr_of_threads; ++ti)
- t.create_thread(boost::bind(&MProtein::CalculateAccessibility, this,
- boost::ref(queue), boost::ref(inResidues)));
-
- foreach (MResidue* residue, inResidues)
- queue.put(residue);
-
- queue.put(nullptr);
-
- t.join_all();
- }
+ std::vector<MResidue*> residues;
+ residues.reserve(mResidueCount);
+ foreach (const MChain* chain, mChains)
+ residues.insert(residues.end(), chain->GetResidues().begin(),
+ chain->GetResidues().end());
+
+ if (VERBOSE)
+ std::cerr << "using " << residues.size() << " residues" << std::endl;
+
+ boost::thread t(boost::bind(&MProtein::CalculateAccessibilities, this,
+ boost::ref(residues)));
+
+ CalculateHBondEnergies(residues);
+ CalculateBetaSheets(residues);
+ CalculateAlphaHelices(residues, inPreferPiHelices);
+
+ t.join();
+}
+
+void MProtein::CalculateHBondEnergies(const std::vector<MResidue*>& inResidues)
+{
+ if (VERBOSE)
+ std::cerr << "Calculate H-bond energies" << std::endl;
+
+ // Calculate the HBond energies
+ for (uint32 i = 0; i + 1 < inResidues.size(); ++i)
+ {
+ MResidue* ri = inResidues[i];
+
+ for (uint32 j = i + 1; j < inResidues.size(); ++j)
+ {
+ MResidue* rj = inResidues[j];
+
+ if (Distance(ri->GetCAlpha(), rj->GetCAlpha()) < kMinimalCADistance)
+ {
+ MResidue::CalculateHBondEnergy(*ri, *rj);
+ if (j != i + 1)
+ MResidue::CalculateHBondEnergy(*rj, *ri);
+ }
+ }
+ }
+}
+
+// TODO: improve alpha helix calculation by better recognizing pi-helices
+void MProtein::CalculateAlphaHelices(const std::vector<MResidue*>& inResidues,
+ bool inPreferPiHelices)
+{
+ if (VERBOSE)
+ std::cerr << "Calculate alhpa helices" << std::endl;
+
+ // Helix and Turn
+ foreach (const MChain* chain, mChains)
+ {
+ for (uint32 stride = 3; stride <= 5; ++stride)
+ {
+ std::vector<MResidue*> res(chain->GetResidues());
+ if (res.size() < stride)
+ continue;
+
+ for (uint32 i = 0; i + stride < res.size(); ++i)
+ {
+ if (MResidue::TestBond(res[i + stride], res[i]) and
+ MResidue::NoChainBreak(res[i], res[i + stride]))
+ {
+ res[i + stride]->SetHelixFlag(stride, helixEnd);
+ for (uint32 j = i + 1; j < i + stride; ++j)
+ {
+ if (res[j]->GetHelixFlag(stride) == helixNone)
+ res[j]->SetHelixFlag(stride, helixMiddle);
+ }
+
+ if (res[i]->GetHelixFlag(stride) == helixEnd)
+ res[i]->SetHelixFlag(stride, helixStartAndEnd);
+ else
+ res[i]->SetHelixFlag(stride, helixStart);
+ }
+ }
+ }
+ }
+
+ foreach (MResidue* r, inResidues)
+ {
+ double kappa = r->Kappa();
+ r->SetBend(kappa != 360 and kappa > 70);
+ }
+
+ for (uint32 i = 1; i + 4 < inResidues.size(); ++i)
+ {
+ if (inResidues[i]->IsHelixStart(4) and inResidues[i - 1]->IsHelixStart(4))
+ {
+ for (uint32 j = i; j <= i + 3; ++j)
+ inResidues[j]->SetSecondaryStructure(alphahelix);
+ }
+ }
+
+ for (uint32 i = 1; i + 3 < inResidues.size(); ++i)
+ {
+ if (inResidues[i]->IsHelixStart(3) and inResidues[i - 1]->IsHelixStart(3))
+ {
+ bool empty = true;
+ for (uint32 j = i; empty and j <= i + 2; ++j)
+ empty = inResidues[j]->GetSecondaryStructure() == loop or
+ inResidues[j]->GetSecondaryStructure() == helix_3;
+ if (empty)
+ {
+ for (uint32 j = i; j <= i + 2; ++j)
+ inResidues[j]->SetSecondaryStructure(helix_3);
+ }
+ }
+ }
+
+ for (uint32 i = 1; i + 5 < inResidues.size(); ++i)
+ {
+ if (inResidues[i]->IsHelixStart(5) and inResidues[i - 1]->IsHelixStart(5))
+ {
+ bool empty = true;
+ for (uint32 j = i; empty and j <= i + 4; ++j)
+ empty = inResidues[j]->GetSecondaryStructure() == loop or
+ inResidues[j]->GetSecondaryStructure() == helix_5 or
+ (inPreferPiHelices and
+ inResidues[j]->GetSecondaryStructure() == alphahelix);
+ if (empty)
+ {
+ for (uint32 j = i; j <= i + 4; ++j)
+ inResidues[j]->SetSecondaryStructure(helix_5);
+ }
+ }
+ }
+
+ for (uint32 i = 1; i + 1 < inResidues.size(); ++i)
+ {
+ if (inResidues[i]->GetSecondaryStructure() == loop)
+ {
+ bool isTurn = false;
+ for (uint32 stride = 3; stride <= 5 and not isTurn; ++stride)
+ {
+ for (uint32 k = 1; k < stride and not isTurn; ++k)
+ isTurn = (i >= k) and inResidues[i - k]->IsHelixStart(stride);
+ }
+
+ if (isTurn)
+ inResidues[i]->SetSecondaryStructure(turn);
+ else if (inResidues[i]->IsBend())
+ inResidues[i]->SetSecondaryStructure(bend);
+ }
+ }
+}
+
+void MProtein::CalculateBetaSheets(const std::vector<MResidue*>& inResidues)
+{
+ if (VERBOSE)
+ std::cerr << "Calculate beta sheets" << std::endl;
+
+ // Calculate Bridges
+ std::vector<MBridge> bridges;
+ if (inResidues.size() > 4)
+ {
+ for (uint32 i = 1; i + 4 < inResidues.size(); ++i)
+ {
+ MResidue* ri = inResidues[i];
+
+ for (uint32 j = i + 3; j + 1 < inResidues.size(); ++j)
+ {
+ MResidue* rj = inResidues[j];
+
+ MBridgeType type = ri->TestBridge(rj);
+ if (type == btNoBridge)
+ continue;
+
+ bool found = false;
+ foreach (MBridge& bridge, bridges)
+ {
+ if (type != bridge.type or i != bridge.i.back() + 1)
+ continue;
+
+ if (type == btParallel and bridge.j.back() + 1 == j)
+ {
+ bridge.i.push_back(i);
+ bridge.j.push_back(j);
+ found = true;
+ break;
+ }
+
+ if (type == btAntiParallel and bridge.j.front() - 1 == j)
+ {
+ bridge.i.push_back(i);
+ bridge.j.push_front(j);
+ found = true;
+ break;
+ }
+ }
+
+ if (not found)
+ {
+ MBridge bridge = {};
+
+ bridge.type = type;
+ bridge.i.push_back(i);
+ bridge.chainI = ri->GetChainID();
+ bridge.j.push_back(j);
+ bridge.chainJ = rj->GetChainID();
+
+ bridges.push_back(bridge);
+ }
+ }
+ }
+ }
+
+ // extend ladders
+ sort(bridges.begin(), bridges.end());
+
+ for (uint32 i = 0; i < bridges.size(); ++i)
+ {
+ for (uint32 j = i + 1; j < bridges.size(); ++j)
+ {
+ uint32 ibi = bridges[i].i.front();
+ uint32 iei = bridges[i].i.back();
+ uint32 jbi = bridges[i].j.front();
+ uint32 jei = bridges[i].j.back();
+ uint32 ibj = bridges[j].i.front();
+ uint32 iej = bridges[j].i.back();
+ uint32 jbj = bridges[j].j.front();
+ uint32 jej = bridges[j].j.back();
+
+ if (bridges[i].type != bridges[j].type or
+ MResidue::NoChainBreak(inResidues[std::min(ibi, ibj)],
+ inResidues[std::max(iei, iej)]) == false or
+ MResidue::NoChainBreak(inResidues[std::min(jbi, jbj)],
+ inResidues[std::max(jei, jej)]) == false or
+ ibj - iei >= 6 or
+ (iei >= ibj and ibi <= iej))
+ {
+ continue;
+ }
+
+ bool bulge;
+ if (bridges[i].type == btParallel)
+ bulge = ((jbj - jei < 6 and ibj - iei < 3) or (jbj - jei < 3));
+ else
+ bulge = ((jbi - jej < 6 and ibj - iei < 3) or (jbi - jej < 3));
+
+ if (bulge)
+ {
+ bridges[i].i.insert(bridges[i].i.end(), bridges[j].i.begin(),
+ bridges[j].i.end());
+ if (bridges[i].type == btParallel)
+ bridges[i].j.insert(bridges[i].j.end(), bridges[j].j.begin(),
+ bridges[j].j.end());
+ else
+ bridges[i].j.insert(bridges[i].j.begin(), bridges[j].j.begin(),
+ bridges[j].j.end());
+ bridges.erase(bridges.begin() + j);
+ --j;
+ }
+ }
+ }
+
+ // Sheet
+ std::set<MBridge*> ladderset;
+ foreach (MBridge& bridge, bridges)
+ {
+ ladderset.insert(&bridge);
+
+ uint32 n = bridge.i.size();
+ if (n > kHistogramSize)
+ n = kHistogramSize;
+
+ if (bridge.type == btParallel)
+ mParallelBridgesPerLadderHistogram[n - 1] += 1;
+ else
+ mAntiparallelBridgesPerLadderHistogram[n - 1] += 1;
+ }
+
+ uint32 sheet = 1, ladder = 0;
+ while (not ladderset.empty())
+ {
+ std::set<MBridge*> sheetset;
+ sheetset.insert(*ladderset.begin());
+ ladderset.erase(ladderset.begin());
+
+ bool done = false;
+ while (not done)
+ {
+ done = true;
+ foreach (MBridge* a, sheetset)
+ {
+ foreach (MBridge* b, ladderset)
+ {
+ if (Linked(*a, *b))
+ {
+ sheetset.insert(b);
+ ladderset.erase(b);
+ done = false;
+ break;
+ }
+ }
+ if (not done)
+ break;
+ }
+ }
+
+ foreach (MBridge* bridge, sheetset)
+ {
+ bridge->ladder = ladder;
+ bridge->sheet = sheet;
+ bridge->link = sheetset;
+
+ ++ladder;
+ }
+
+ uint32 nrOfLaddersPerSheet = sheetset.size();
+ if (nrOfLaddersPerSheet > kHistogramSize)
+ nrOfLaddersPerSheet = kHistogramSize;
+ if (nrOfLaddersPerSheet == 1 and (*sheetset.begin())->i.size() > 1)
+ mLaddersPerSheetHistogram[0] += 1;
+ else if (nrOfLaddersPerSheet > 1)
+ mLaddersPerSheetHistogram[nrOfLaddersPerSheet - 1] += 1;
+
+ ++sheet;
+ }
+
+ foreach (MBridge& bridge, bridges)
+ {
+ // find out if any of the i and j set members already have
+ // a bridge assigned, if so, we're assigning bridge 2
+
+ uint32 betai = 0, betaj = 0;
+
+ foreach (uint32 l, bridge.i)
+ {
+ if (inResidues[l]->GetBetaPartner(0).residue != nullptr)
+ {
+ betai = 1;
+ break;
+ }
+ }
+
+ foreach (uint32 l, bridge.j)
+ {
+ if (inResidues[l]->GetBetaPartner(0).residue != nullptr)
+ {
+ betaj = 1;
+ break;
+ }
+ }
+
+ MSecondaryStructure ss = betabridge;
+ if (bridge.i.size() > 1)
+ ss = strand;
+
+ if (bridge.type == btParallel)
+ {
+ mNrOfHBondsInParallelBridges += bridge.i.back() - bridge.i.front() + 2;
+
+ std::deque<uint32>::iterator j = bridge.j.begin();
+ foreach (uint32 i, bridge.i)
+ inResidues[i]->SetBetaPartner(betai, inResidues[*j++], bridge.ladder,
+ true);
+
+ j = bridge.i.begin();
+ foreach (uint32 i, bridge.j)
+ inResidues[i]->SetBetaPartner(betaj, inResidues[*j++], bridge.ladder,
+ true);
+ }
+ else
+ {
+ mNrOfHBondsInAntiparallelBridges += bridge.i.back() - bridge.i.front() + 2;
+
+ std::deque<uint32>::reverse_iterator j = bridge.j.rbegin();
+ foreach (uint32 i, bridge.i)
+ inResidues[i]->SetBetaPartner(betai, inResidues[*j++], bridge.ladder,
+ false);
+
+ j = bridge.i.rbegin();
+ foreach (uint32 i, bridge.j)
+ inResidues[i]->SetBetaPartner(betaj, inResidues[*j++], bridge.ladder,
+ false);
+ }
+
+ for (uint32 i = bridge.i.front(); i <= bridge.i.back(); ++i)
+ {
+ if (inResidues[i]->GetSecondaryStructure() != strand)
+ inResidues[i]->SetSecondaryStructure(ss);
+ inResidues[i]->SetSheet(bridge.sheet);
+ }
+
+ for (uint32 i = bridge.j.front(); i <= bridge.j.back(); ++i)
+ {
+ if (inResidues[i]->GetSecondaryStructure() != strand)
+ inResidues[i]->SetSecondaryStructure(ss);
+ inResidues[i]->SetSheet(bridge.sheet);
+ }
+ }
+}
+
+void MProtein::CalculateAccessibilities(
+ const std::vector<MResidue*>& inResidues)
+{
+ if (VERBOSE)
+ std::cerr << "Calculate accessibilities" << std::endl;
+
+ uint32 nr_of_threads = boost::thread::hardware_concurrency();
+ if (nr_of_threads <= 1)
+ {
+ foreach (MResidue* residue, inResidues)
+ residue->CalculateSurface(inResidues);
+ }
+ else
+ {
+ MResidueQueue queue;
+
+ boost::thread_group t;
+
+ for (uint32 ti = 0; ti < nr_of_threads; ++ti)
+ t.create_thread(boost::bind(&MProtein::CalculateAccessibility, this,
+ boost::ref(queue), boost::ref(inResidues)));
+
+ foreach (MResidue* residue, inResidues)
+ queue.put(residue);
+
+ queue.put(nullptr);
+
+ t.join_all();
+ }
}
void MProtein::CalculateAccessibility(MResidueQueue& inQueue,
- const vector<MResidue*>& inResidues)
-{
- // make sure the MSurfaceDots is constructed once
- (void)MSurfaceDots::Instance();
-
- for (;;)
- {
- MResidue* residue = inQueue.get();
- if (residue == nullptr)
- break;
-
- residue->CalculateSurface(inResidues);
- }
-
- inQueue.put(nullptr);
+ const std::vector<MResidue*>& inResidues)
+{
+ // make sure the MSurfaceDots is constructed once
+ (void)MSurfaceDots::Instance();
+
+ for (;;)
+ {
+ MResidue* residue = inQueue.get();
+ if (residue == nullptr)
+ break;
+
+ residue->CalculateSurface(inResidues);
+ }
+
+ inQueue.put(nullptr);
}
void MProtein::Center()
{
- vector<MPoint> p;
- GetPoints(p);
-
- MPoint t = CenterPoints(p);
-
- Translate(MPoint(-t.mX, -t.mY, -t.mZ));
+ std::vector<MPoint> p;
+ GetPoints(p);
+
+ MPoint t = CenterPoints(p);
+
+ Translate(MPoint(-t.mX, -t.mY, -t.mZ));
}
-void MProtein::SetChain(const string& inChainID, const MChain& inChain)
+void MProtein::SetChain(const std::string& inChainID, const MChain& inChain)
{
- MChain& chain(GetChain(inChainID));
- chain = inChain;
- chain.SetChainID(inChainID);
+ MChain& chain(GetChain(inChainID));
+ chain = inChain;
+ chain.SetChainID(inChainID);
}
-MResidue* MProtein::GetResidue(const string& inChainID, uint16 inSeqNumber, const string& inInsertionCode)
+// Non-const overload, implemented in terms of the const overload
+MResidue* MProtein::GetResidue(const std::string& inChainID,
+ uint16 inSeqNumber,
+ const std::string& inInsertionCode)
{
- MChain& chain = GetChain(inChainID);
- if (chain.GetResidues().empty())
- throw mas_exception(boost::format("Invalid chain id '%s'") % inChainID);
- return chain.GetResidueBySeqNumber(inSeqNumber, inInsertionCode);
+ return const_cast<MResidue *>( static_cast<const MProtein &>( *this ).GetResidue(
+ inChainID,
+ inSeqNumber,
+ inInsertionCode
+ ) );
}
-void MProtein::GetCAlphaLocations(const string& inChainID, vector<MPoint>& outPoints) const
+// Const overload
+const MResidue* MProtein::GetResidue(const std::string& inChainID,
+ uint16 inSeqNumber,
+ const std::string& inInsertionCode) const
{
- string chainID = inChainID;
- if (chainID.empty())
- chainID = mChains.front()->GetChainID();
-
- foreach (const MResidue* r, GetChain(chainID).GetResidues())
- outPoints.push_back(r->GetCAlpha());
+ const MChain& chain = GetChain(inChainID);
+ if (chain.GetResidues().empty())
+ throw mas_exception(boost::format("Invalid chain id '%s'") % inChainID);
+ return chain.GetResidueBySeqNumber(inSeqNumber, inInsertionCode);
}
-MPoint MProtein::GetCAlphaPosition(const string& inChainID, int16 inPDBResSeq) const
+void MProtein::GetCAlphaLocations(const std::string& inChainID,
+ std::vector<MPoint>& outPoints) const
{
- string chainID = inChainID;
- if (chainID.empty())
- chainID = mChains.front()->GetChainID();
-
- MPoint result;
- foreach (const MResidue* r, GetChain(chainID).GetResidues())
- {
- if (r->GetSeqNumber() != inPDBResSeq)
- continue;
-
- result = r->GetCAlpha();
- }
-
- return result;
+ std::string chainID = inChainID;
+ if (chainID.empty())
+ chainID = mChains.front()->GetChainID();
+
+ foreach (const MResidue* r, GetChain(chainID).GetResidues())
+ outPoints.push_back(r->GetCAlpha());
}
-void MProtein::GetSequence(const string& inChainID, entry& outEntry) const
+MPoint MProtein::GetCAlphaPosition(const std::string& inChainID,
+ int16 inPDBResSeq) const
{
- string chainID = inChainID;
- if (chainID.empty())
- chainID = mChains.front()->GetChainID();
-
- string seq;
- foreach (const MResidue* r, GetChain(chainID).GetResidues())
- {
- seq += kResidueInfo[r->GetType()].code;
- outEntry.m_positions.push_back(r->GetSeqNumber());
- outEntry.m_ss += r->GetSecondaryStructure();
- }
-
- outEntry.m_seq = encode(seq);
+ std::string chainID = inChainID;
+ if (chainID.empty())
+ chainID = mChains.front()->GetChainID();
+
+ MPoint result;
+ foreach (const MResidue* r, GetChain(chainID).GetResidues())
+ {
+ if (r->GetSeqNumber() != inPDBResSeq)
+ continue;
+
+ result = r->GetCAlpha();
+ }
+
+ return result;
}
-void MProtein::GetSequence(const string& inChainID, sequence& outSequence) const
+void MProtein::GetSequence(const std::string& inChainID, entry& outEntry) const
{
- string chainID = inChainID;
- if (chainID.empty())
- chainID = mChains.front()->GetChainID();
-
- string seq;
- foreach (const MResidue* r, GetChain(chainID).GetResidues())
- seq += kResidueInfo[r->GetType()].code;
-
- outSequence = encode(seq);
+ std::string chainID = inChainID;
+ if (chainID.empty())
+ chainID = mChains.front()->GetChainID();
+
+ std::string seq;
+ foreach (const MResidue* r, GetChain(chainID).GetResidues())
+ {
+ seq += kResidueInfo[r->GetType()].code;
+ outEntry.m_positions.push_back(r->GetSeqNumber());
+ outEntry.m_ss += r->GetSecondaryStructure();
+ }
+
+ outEntry.m_seq = encode(seq);
+}
+
+void MProtein::GetSequence(const std::string& inChainID,
+ sequence& outSequence) const
+{
+ std::string chainID = inChainID;
+ if (chainID.empty())
+ chainID = mChains.front()->GetChainID();
+
+ std::string seq;
+ foreach (const MResidue* r, GetChain(chainID).GetResidues())
+ seq += kResidueInfo[r->GetType()].code;
+
+ outSequence = encode(seq);
}
-void MProtein::WritePDB(ostream& os)
+void MProtein::WritePDB(std::ostream& os)
{
- foreach (MChain* chain, mChains)
- chain->WritePDB(os);
+ foreach (MChain* chain, mChains)
+ chain->WritePDB(os);
}
diff --git a/src/structure.h b/src/structure.h
old mode 100644
new mode 100755
index 75bc889..3df6958
--- a/src/structure.h
+++ b/src/structure.h
@@ -1,12 +1,14 @@
// Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
+// Copyright Coos Baakman, Jon Black, Wouter G. Touw & Gert Vriend, Radboud university medical center 2015.
// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
#pragma once
-#include "primitives-3d.h"
#include "align-2d.h"
+#include "mas.h"
+#include "primitives-3d.h"
struct MAtom;
class MResidue;
@@ -15,31 +17,32 @@ class MProtein;
// forward declaration of buffer
template<typename T, uint32 N> class buffer;
-typedef buffer<MResidue*,100> MResidueQueue;
+typedef buffer<MResidue*,100> MResidueQueue;
const uint32 kHistogramSize = 30;
-// a limited set of known atoms. This is an obvious candidate for improvement of DSSP.
+// a limited set of known atoms. This is an obvious candidate for improvement
+// of DSSP.
enum MAtomType
{
- kUnknownAtom,
- kHydrogen,
- // ...
- kCarbon,
- kNitrogen,
- kOxygen,
- kFluorine,
- // ...
- kPhosphorus,
- kSulfur,
- kChlorine,
- kMagnesium,
- kPotassium,
- kCalcium,
- kZinc,
- kSelenium,
-
- kAtomTypeCount
+ kUnknownAtom,
+ kHydrogen,
+ // ...
+ kCarbon,
+ kNitrogen,
+ kOxygen,
+ kFluorine,
+ // ...
+ kPhosphorus,
+ kSulfur,
+ kChlorine,
+ kMagnesium,
+ kPotassium,
+ kCalcium,
+ kZinc,
+ kSelenium,
+
+ kAtomTypeCount
};
MAtomType MapElement(std::string inElement);
@@ -47,64 +50,64 @@ MAtomType MapElement(std::string inElement);
// for now, MAtom contains exactly what the ATOM line contains in a PDB file
struct MAtom
{
- uint32 mSerial;
- std::string mName;
- char mAltLoc;
- std::string mResName;
- std::string mChainID;
- int16 mResSeq;
- std::string mICode;
- MAtomType mType;
- MPoint mLoc;
- double mOccupancy;
- double mTempFactor;
- std::string mElement;
- int mCharge;
-
- void SetChainID(const std::string& inChainID){ mChainID = inChainID;}
- std::string GetName() const { return mName; }
- void Translate(const MPoint& inTranslation) { mLoc += inTranslation; }
- void Rotate(const MQuaternion& inRotation) { mLoc.Rotate(inRotation); }
- void WritePDB(std::ostream& os) const;
-
- operator const MPoint&() const { return mLoc; }
- operator MPoint&() { return mLoc; }
+ uint32 mSerial;
+ std::string mName;
+ char mAltLoc;
+ std::string mResName;
+ std::string mChainID;
+ int32 mResSeq;
+ std::string mICode;
+ MAtomType mType;
+ MPoint mLoc;
+ double mOccupancy;
+ double mTempFactor;
+ std::string mElement;
+ int mCharge;
+
+ void SetChainID(const std::string& inChainID){ mChainID = inChainID;}
+ std::string GetName() const { return mName; }
+ void Translate(const MPoint& inTranslation) { mLoc += inTranslation; }
+ void Rotate(const MQuaternion& inRotation) { mLoc.Rotate(inRotation); }
+ void WritePDB(std::ostream& os) const;
+
+ operator const MPoint&() const { return mLoc; }
+ operator MPoint&() { return mLoc; }
};
enum MResidueType
{
- kUnknownResidue,
-
- //
- kAlanine, // A ala
- kArginine, // R arg
- kAsparagine, // N asn
- kAsparticAcid, // D asp
- kCysteine, // C cys
- kGlutamicAcid, // E glu
- kGlutamine, // Q gln
- kGlycine, // G gly
- kHistidine, // H his
- kIsoleucine, // I ile
- kLeucine, // L leu
- kLysine, // K lys
- kMethionine, // M met
- kPhenylalanine, // F phe
- kProline, // P pro
- kSerine, // S ser
- kThreonine, // T thr
- kTryptophan, // W trp
- kTyrosine, // Y tyr
- kValine, // V val
-
- kResidueTypeCount
+ kUnknownResidue,
+
+ //
+ kAlanine, // A ala
+ kArginine, // R arg
+ kAsparagine, // N asn
+ kAsparticAcid, // D asp
+ kCysteine, // C cys
+ kGlutamicAcid, // E glu
+ kGlutamine, // Q gln
+ kGlycine, // G gly
+ kHistidine, // H his
+ kIsoleucine, // I ile
+ kLeucine, // L leu
+ kLysine, // K lys
+ kMethionine, // M met
+ kPhenylalanine, // F phe
+ kProline, // P pro
+ kSerine, // S ser
+ kThreonine, // T thr
+ kTryptophan, // W trp
+ kTyrosine, // Y tyr
+ kValine, // V val
+
+ kResidueTypeCount
};
struct MResidueInfo
{
- MResidueType type;
- char code;
- char name[4];
+ MResidueType type;
+ char code;
+ char name[4];
};
// a residue number to info mapping
@@ -114,314 +117,334 @@ MResidueType MapResidue(std::string inName);
struct HBond
{
- MResidue* residue;
- double energy;
+ MResidue* residue;
+ double energy;
};
enum MBridgeType
{
- btNoBridge, btParallel, btAntiParallel
+ btNoBridge, btParallel, btAntiParallel
};
struct MBridgeParner
{
- MResidue* residue;
- uint32 ladder;
- bool parallel;
+ MResidue* residue;
+ uint32 ladder;
+ bool parallel;
};
enum MHelixFlag
{
- helixNone, helixStart, helixEnd, helixStartAndEnd, helixMiddle
+ helixNone, helixStart, helixEnd, helixStartAndEnd, helixMiddle
};
enum MSecondaryStructure
{
- loop, //' '
- alphahelix, // H
- betabridge, // B
- strand, // E
- helix_3, // G
- helix_5, // I
- turn, // T
- bend // S
+ loop, //' '
+ alphahelix, // H
+ betabridge, // B
+ strand, // E
+ helix_3, // G
+ helix_5, // I
+ turn, // T
+ bend // S
};
class MResidue
{
public:
- MResidue(const MResidue& residue);
- MResidue(uint32 inNumber, char inTypeCode, MResidue* inPrevious);
- MResidue(uint32 inNumber,
- MResidue* inPrevious, const std::vector<MAtom>& inAtoms);
-
- void SetChainID(const std::string& inChainID);
- std::string GetChainID() const { return mChainID; }
-
- MResidueType GetType() const { return mType; }
-
- const MAtom& GetCAlpha() const { return mCA; }
- const MAtom& GetC() const { return mC; }
- const MAtom& GetN() const { return mN; }
- const MAtom& GetO() const { return mO; }
- const MAtom& GetH() const { return mH; }
-
- double Phi() const;
- double Psi() const;
- std::tr1::tuple<double,char>
- Alpha() const;
- double Kappa() const;
- double TCO() const;
-
- double Accessibility() const { return mAccessibility; }
-
- void SetSecondaryStructure(MSecondaryStructure inSS)
- { mSecondaryStructure = inSS; }
- MSecondaryStructure GetSecondaryStructure() const { return mSecondaryStructure; }
-
- const MResidue* Next() const { return mNext; }
- const MResidue* Prev() const { return mPrev; }
-
- void SetPrev(MResidue* inResidue);
-
- void SetBetaPartner(uint32 n, MResidue* inResidue, uint32 inLadder,
- bool inParallel);
- MBridgeParner GetBetaPartner(uint32 n) const;
-
- void SetSheet(uint32 inSheet) { mSheet = inSheet; }
- uint32 GetSheet() const { return mSheet; }
-
- bool IsBend() const { return mBend; }
- void SetBend(bool inBend) { mBend = inBend; }
-
- MHelixFlag GetHelixFlag(uint32 inHelixStride) const;
- bool IsHelixStart(uint32 inHelixStride) const;
- void SetHelixFlag(uint32 inHelixStride, MHelixFlag inHelixFlag);
-
- void SetSSBridgeNr(uint8 inBridgeNr);
- uint8 GetSSBridgeNr() const;
-
- void AddAtom(MAtom& inAtom);
-
- HBond* Donor() { return mHBondDonor; }
- HBond* Acceptor() { return mHBondAcceptor; }
-
- const HBond* Donor() const { return mHBondDonor; }
- const HBond* Acceptor() const { return mHBondAcceptor; }
-
- bool ValidDistance(const MResidue& inNext) const;
-
- static bool TestBond(const MResidue* a, const MResidue* b)
- {
- return a->TestBond(b);
- }
-
- // bridge functions
- MBridgeType TestBridge(MResidue* inResidue) const;
-
- uint16 GetSeqNumber() const { return mSeqNumber; }
- std::string GetInsertionCode() const { return mInsertionCode; }
-
- void SetNumber(uint16 inNumber) { mNumber = inNumber; }
- uint16 GetNumber() const { return mNumber; }
-
- void Translate(const MPoint& inTranslation);
- void Rotate(const MQuaternion& inRotation);
-
- void WritePDB(std::ostream& os);
-
- static double CalculateHBondEnergy(MResidue& inDonor, MResidue& inAcceptor);
-
- std::vector<MAtom>& GetSideChain() { return mSideChain; }
- const std::vector<MAtom>&
- GetSideChain() const { return mSideChain; }
-
- void GetPoints(std::vector<MPoint>& outPoints) const;
-
- void CalculateSurface(const std::vector<MResidue*>& inResidues);
-
- void GetCenterAndRadius(MPoint& outCenter, double& outRadius) const
- { outCenter = mCenter; outRadius = mRadius; }
-
- static bool NoChainBreak(const MResidue* from, const MResidue* to);
+ MResidue(const MResidue& residue);
+ MResidue(int32 inNumber, char inTypeCode, MResidue* inPrevious);
+ MResidue(int32 inNumber,
+ MResidue* inPrevious, const std::vector<MAtom>& inAtoms);
+
+ void SetChainID(const std::string& inChainID);
+ std::string GetChainID() const { return mChainID; }
+
+ MResidueType GetType() const { return mType; }
+
+ const MAtom& GetCAlpha() const { return mCA; }
+ const MAtom& GetC() const { return mC; }
+ const MAtom& GetN() const { return mN; }
+ const MAtom& GetO() const { return mO; }
+ const MAtom& GetH() const { return mH; }
+
+ double Phi() const;
+ double Psi() const;
+ std::tr1::tuple<double,char>
+ Alpha() const;
+ double Kappa() const;
+ double TCO() const;
+
+ double Accessibility() const { return mAccessibility; }
+
+ void SetSecondaryStructure(MSecondaryStructure inSS)
+ { mSecondaryStructure = inSS; }
+ MSecondaryStructure GetSecondaryStructure() const
+ {
+ return mSecondaryStructure;
+ }
+
+ const MResidue* Next() const { return mNext; }
+ const MResidue* Prev() const { return mPrev; }
+
+ void SetPrev(MResidue* inResidue);
+
+ void SetBetaPartner(uint32 n, MResidue* inResidue, uint32 inLadder,
+ bool inParallel);
+ MBridgeParner GetBetaPartner(uint32 n) const;
+
+ void SetSheet(uint32 inSheet) { mSheet = inSheet; }
+ uint32 GetSheet() const { return mSheet; }
+
+ bool IsBend() const { return mBend; }
+ void SetBend(bool inBend) { mBend = inBend; }
+
+ MHelixFlag GetHelixFlag(uint32 inHelixStride) const;
+ bool IsHelixStart(uint32 inHelixStride) const;
+ void SetHelixFlag(uint32 inHelixStride, MHelixFlag inHelixFlag);
+
+ void SetSSBridgeNr(uint8 inBridgeNr);
+ uint8 GetSSBridgeNr() const;
+
+ void AddAtom(MAtom& inAtom);
+
+ HBond* Donor() { return mHBondDonor; }
+ HBond* Acceptor() { return mHBondAcceptor; }
+
+ const HBond* Donor() const { return mHBondDonor; }
+ const HBond* Acceptor() const { return mHBondAcceptor; }
+
+ bool ValidDistance(const MResidue& inNext) const;
+
+ static bool TestBond(const MResidue* a, const MResidue* b)
+ {
+ return a->TestBond(b);
+ }
+
+ // bridge functions
+ MBridgeType TestBridge(MResidue* inResidue) const;
+
+ int16 GetSeqNumber() const { return mSeqNumber; }
+ std::string GetInsertionCode() const { return mInsertionCode; }
+
+ void SetNumber(uint16 inNumber) { mNumber = inNumber; }
+ uint16 GetNumber() const { return mNumber; }
+
+ void Translate(const MPoint& inTranslation);
+ void Rotate(const MQuaternion& inRotation);
+
+ void WritePDB(std::ostream& os);
+
+ static double CalculateHBondEnergy(MResidue& inDonor, MResidue& inAcceptor);
+
+ std::vector<MAtom>& GetSideChain() { return mSideChain; }
+ const std::vector<MAtom>&
+ GetSideChain() const { return mSideChain; }
+
+ void GetPoints(std::vector<MPoint>& outPoints) const;
+
+ void CalculateSurface(const std::vector<MResidue*>& inResidues);
+
+ void GetCenterAndRadius(MPoint& outCenter, double& outRadius) const
+ { outCenter = mCenter; outRadius = mRadius; }
+
+ static bool NoChainBreak(const MResidue* from, const MResidue* to);
protected:
- double CalculateSurface(
- const MAtom& inAtom, double inRadius,
- const std::vector<MResidue*>& inResidues);
-
- bool TestBond(const MResidue* other) const;
-
- void ExtendBox(const MAtom& atom, double inRadius);
- bool AtomIntersectsBox(const MAtom& atom, double inRadius) const;
-
- std::string mChainID;
- MResidue* mPrev;
- MResidue* mNext;
- int32 mSeqNumber, mNumber;
- std::string mInsertionCode;
- MResidueType mType;
- uint8 mSSBridgeNr;
- double mAccessibility;
- MSecondaryStructure mSecondaryStructure;
- MAtom mC, mN, mCA, mO, mH;
- HBond mHBondDonor[2], mHBondAcceptor[2];
- std::vector<MAtom> mSideChain;
- MBridgeParner mBetaPartner[2];
- uint32 mSheet;
- MHelixFlag mHelixFlags[3]; //
- bool mBend;
- MPoint mBox[2]; // The 3D box containing all atoms
- MPoint mCenter; // and the 3d Sphere containing all atoms
- double mRadius;
+ double CalculateSurface(
+ const MAtom& inAtom, double inRadius,
+ const std::vector<MResidue*>& inResidues);
+
+ bool TestBond(const MResidue* other) const;
+
+ void ExtendBox(const MAtom& atom, double inRadius);
+ bool AtomIntersectsBox(const MAtom& atom, double inRadius) const;
+
+ std::string mChainID;
+ MResidue* mPrev;
+ MResidue* mNext;
+ int32 mSeqNumber, mNumber;
+ std::string mInsertionCode;
+ MResidueType mType;
+ uint8 mSSBridgeNr;
+ double mAccessibility;
+ MSecondaryStructure mSecondaryStructure;
+ MAtom mC, mN, mCA, mO, mH;
+ HBond mHBondDonor[2], mHBondAcceptor[2];
+ std::vector<MAtom> mSideChain;
+ MBridgeParner mBetaPartner[2];
+ uint32 mSheet;
+ MHelixFlag mHelixFlags[3]; //
+ bool mBend;
+ MPoint mBox[2]; // The 3D box containing all atoms
+ MPoint mCenter; // and the 3d Sphere containing all atoms
+ double mRadius;
private:
- MResidue& operator=(const MResidue& residue);
+ MResidue& operator=(const MResidue& residue);
};
class MChain
{
public:
- MChain(const MChain& chain);
- MChain(const std::string& inChainID) : mChainID(inChainID) {}
- ~MChain();
+ MChain(const MChain& chain);
+ MChain(const std::string& inChainID) : mChainID(inChainID) {}
+ ~MChain();
+
+ MChain& operator=(const MChain& chain);
- MChain& operator=(const MChain& chain);
+ std::string GetChainID() const { return mChainID; }
+ void SetChainID(const std::string& inChainID);
- std::string GetChainID() const { return mChainID; }
- void SetChainID(const std::string& inChainID);
+ const MResidue* GetResidueBySeqNumber(uint16 inSeqNumber,
+ const std::string& inInsertionCode) const;
- MResidue* GetResidueBySeqNumber(uint16 inSeqNumber, const std::string& inInsertionCode);
-
- void GetSequence(std::string& outSequence) const;
+ void GetSequence(std::string& outSequence) const;
- void Translate(const MPoint& inTranslation);
- void Rotate(const MQuaternion& inRotation);
+ void Translate(const MPoint& inTranslation);
+ void Rotate(const MQuaternion& inRotation);
- void WritePDB(std::ostream& os);
-
- std::vector<MResidue*>&
- GetResidues() { return mResidues; }
- const std::vector<MResidue*>&
- GetResidues() const { return mResidues; }
+ void WritePDB(std::ostream& os);
- bool Empty() const { return mResidues.empty(); }
+ std::vector<MResidue*>&
+ GetResidues() { return mResidues; }
+ const std::vector<MResidue*>&
+ GetResidues() const { return mResidues; }
+
+ bool Empty() const { return mResidues.empty(); }
private:
- std::string mChainID;
- std::vector<MResidue*>
- mResidues;
+ std::string mChainID;
+ std::vector<MResidue*>
+ mResidues;
};
class MProtein
{
public:
- MProtein();
- MProtein(const std::string& inID, MChain* inChain);
- ~MProtein();
-
-// MProtein(std::istream& is, bool inCAlphaOnly = false);
-
- void ReadPDB(std::istream& is, bool inCAlphaOnly = false);
- void ReadmmCIF(std::istream& is, bool inCAlphaOnly = false);
-
- const std::string& GetID() const { return mID; }
- const std::string& GetHeader() const { return mHeader; }
- std::string GetCompound() const;
- std::string GetSource() const;
- std::string GetAuthor() const;
- const std::vector<std::string>&
- GetDbRef() const { return mDbRef; }
-
- void CalculateSecondaryStructure(bool inPreferPiHelices = true);
-
- void GetStatistics(uint32& outNrOfResidues, uint32& outNrOfChains,
- uint32& outNrOfSSBridges, uint32& outNrOfIntraChainSSBridges,
- uint32& outNrOfHBonds, uint32 outNrOfHBondsPerDistance[11]) const;
-
- void GetCAlphaLocations(const std::string& inChainID, std::vector<MPoint>& outPoints) const;
- MPoint GetCAlphaPosition(const std::string& inChainID, int16 inPDBResSeq) const;
-
- void GetSequence(const std::string& inChainID, entry& outEntry) const;
- void GetSequence(const std::string& inChainID, sequence& outSequence) const;
-
- void Center();
- void Translate(const MPoint& inTranslation);
- void Rotate(const MQuaternion& inRotation);
-
- void WritePDB(std::ostream& os);
-
- void GetPoints(std::vector<MPoint>& outPoints) const;
-
- std::string GetFirstChainID() const { return mChains.front()->GetChainID(); }
-
- void SetChain(const std::string& inChainID, const MChain& inChain);
-
- MChain& GetChain(const std::string& inChainID);
- const MChain& GetChain(const std::string& inChainID) const;
-
- const std::vector<MChain*>&
- GetChains() const { return mChains; }
-
- template<class OutputIterator>
- void GetSequences(OutputIterator outSequences) const;
-
- MResidue* GetResidue(const std::string& inChainID, uint16 inSeqNumber, const std::string& inInsertionCode);
-
- // statistics
- uint32 GetNrOfHBondsInParallelBridges() const { return mNrOfHBondsInParallelBridges; }
- uint32 GetNrOfHBondsInAntiparallelBridges() const { return mNrOfHBondsInAntiparallelBridges; }
-
- void GetResiduesPerAlphaHelixHistogram(uint32 outHistogram[30]) const;
- void GetParallelBridgesPerLadderHistogram(uint32 outHistogram[30]) const;
- void GetAntiparallelBridgesPerLadderHistogram(uint32 outHistogram[30]) const;
- void GetLaddersPerSheetHistogram(uint32 outHistogram[30]) const;
-
+ MProtein();
+ MProtein(const std::string& inID, MChain* inChain);
+ ~MProtein();
+
+// MProtein(std::istream& is, bool inCAlphaOnly = false);
+
+ void ReadPDB(std::istream& is, bool inCAlphaOnly = false);
+ void ReadmmCIF(std::istream& is, bool inCAlphaOnly = false);
+
+ const std::string& GetID() const { return mID; }
+ const std::string& GetHeader() const { return mHeader; }
+ std::string GetCompound() const;
+ std::string GetSource() const;
+ std::string GetAuthor() const;
+ const std::vector<std::string>& GetDbRef() const { return mDbRef; }
+
+ void CalculateSecondaryStructure(bool inPreferPiHelices = true);
+
+ void GetStatistics(uint32& outNrOfResidues, uint32& outNrOfChains,
+ uint32& outNrOfSSBridges, uint32& outNrOfIntraChainSSBridges,
+ uint32& outNrOfHBonds, uint32 outNrOfHBondsPerDistance[11]) const;
+
+ void GetCAlphaLocations(const std::string& inChainID,
+ std::vector<MPoint>& outPoints) const;
+ MPoint GetCAlphaPosition(const std::string& inChainID,
+ int16 inPDBResSeq) const;
+
+ void GetSequence(const std::string& inChainID,
+ entry& outEntry) const;
+ void GetSequence(const std::string& inChainID,
+ sequence& outSequence) const;
+
+ void Center();
+ void Translate(const MPoint& inTranslation);
+ void Rotate(const MQuaternion& inRotation);
+
+ void WritePDB(std::ostream& os);
+
+ void GetPoints(std::vector<MPoint>& outPoints) const;
+
+ std::string GetFirstChainID() const
+ {
+ return mChains.front()->GetChainID();
+ }
+
+ void SetChain(const std::string& inChainID, const MChain& inChain);
+
+ MChain& GetChain(const std::string& inChainID);
+ const MChain& GetChain(const std::string& inChainID) const;
+
+ const std::vector<MChain*>& GetChains() const { return mChains; }
+
+ template<class OutputIterator>
+ void GetSequences(OutputIterator outSequences) const;
+
+ MResidue* GetResidue(const std::string& inChainID, uint16 inSeqNumber,
+ const std::string& inInsertionCode);
+
+ const MResidue* GetResidue(const std::string& inChainID, uint16 inSeqNumber,
+ const std::string& inInsertionCode) const;
+
+ // statistics
+ uint32 GetNrOfHBondsInParallelBridges() const
+ {
+ return mNrOfHBondsInParallelBridges;
+ }
+
+ uint32 GetNrOfHBondsInAntiparallelBridges() const
+ {
+ return mNrOfHBondsInAntiparallelBridges;
+ }
+
+ void GetResiduesPerAlphaHelixHistogram(uint32 outHistogram[30]) const;
+ void GetParallelBridgesPerLadderHistogram(uint32 outHistogram[30]) const;
+ void GetAntiparallelBridgesPerLadderHistogram(uint32 outHistogram[30]) const;
+ void GetLaddersPerSheetHistogram(uint32 outHistogram[30]) const;
+
private:
- void AddResidue(const std::vector<MAtom>& inAtoms);
-
- void CalculateHBondEnergies(const std::vector<MResidue*>& inResidues);
- void CalculateAlphaHelices(const std::vector<MResidue*>& inResidues, bool inPreferPiHelices);
- void CalculateBetaSheets(const std::vector<MResidue*>& inResidues);
- void CalculateAccessibilities(const std::vector<MResidue*>& inResidues);
-
- // a thread entry point
- void CalculateAccessibility(MResidueQueue& inQueue,
- const std::vector<MResidue*>& inResidues);
-
- std::string mID, mHeader;
-
- std::vector<std::string>
- mDbRef;
- std::string mCompound, mSource, mAuthor;
- std::vector<MChain*>mChains;
- uint32 mResidueCount, mChainBreaks;
-
- std::vector<std::pair<MResidue*,MResidue*> >
- mSSBonds;
- uint32 mIgnoredWaterMolecules;
-
- // statistics
- uint32 mNrOfHBondsInParallelBridges, mNrOfHBondsInAntiparallelBridges;
- uint32 mParallelBridgesPerLadderHistogram[kHistogramSize];
- uint32 mAntiparallelBridgesPerLadderHistogram[kHistogramSize];
- uint32 mLaddersPerSheetHistogram[kHistogramSize];
+ void AddResidue(const std::vector<MAtom>& inAtoms);
+
+ void CalculateHBondEnergies(const std::vector<MResidue*>& inResidues);
+ void CalculateAlphaHelices(const std::vector<MResidue*>& inResidues,
+ bool inPreferPiHelices);
+ void CalculateBetaSheets(const std::vector<MResidue*>& inResidues);
+ void CalculateAccessibilities(const std::vector<MResidue*>& inResidues);
+
+ // a thread entry point
+ void CalculateAccessibility(MResidueQueue& inQueue,
+ const std::vector<MResidue*>& inResidues);
+
+ std::string mID, mHeader;
+
+ std::vector<std::string> mDbRef;
+ std::string mCompound, mSource, mAuthor;
+ std::vector<MChain*> mChains;
+ uint32 mResidueCount, mChainBreaks;
+
+ std::vector<std::pair<MResidue*,MResidue*> > mSSBonds;
+ uint32 mIgnoredWaterMolecules;
+
+ // statistics
+ uint32 mNrOfHBondsInParallelBridges, mNrOfHBondsInAntiparallelBridges;
+ uint32 mParallelBridgesPerLadderHistogram[kHistogramSize];
+ uint32 mAntiparallelBridgesPerLadderHistogram[kHistogramSize];
+ uint32 mLaddersPerSheetHistogram[kHistogramSize];
};
// inlines
-// GetSequences can be used to quickly get all sequences in a vector<string> e.g.
+// GetSequences can be used to quickly get all sequences in a vector<string>
template<class OutputIterator>
void MProtein::GetSequences(OutputIterator outSequences) const
{
- for (std::vector<MChain*>::const_iterator chain = mChains.begin(); chain != mChains.end(); ++chain)
- {
- std::string seq;
- (*chain)->GetSequence(seq);
- *outSequences++ = seq;
- }
+ std::vector<MChain*>::const_iterator chain;
+ for (chain = mChains.begin(); chain != mChains.end(); ++chain)
+ {
+ std::string seq;
+ (*chain)->GetSequence(seq);
+ *outSequences++ = seq;
+ }
}
diff --git a/src/utils.cpp b/src/utils.cpp
old mode 100644
new mode 100755
index 409ab5a..6168362
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -1,76 +1,74 @@
// Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
+// Copyright Coos Baakman, Jon Black, Wouter G. Touw & Gert Vriend, Radboud university medical center 2015.
// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
-#include "mas.h"
+#include "utils.h"
-#include <iostream>
-#include <cstdio>
+#include "align-2d.h"
#include <boost/bind.hpp>
-#include <boost/thread.hpp>
#include <boost/foreach.hpp>
-#define foreach BOOST_FOREACH
-//#include <boost/timer/timer.hpp>
+#include <boost/thread.hpp>
-#include "align-2d.h"
-#include "utils.h"
+#include <cstdio>
+#include <iostream>
-using namespace std;
namespace fs = boost::filesystem;
+#define foreach BOOST_FOREACH
// --------------------------------------------------------------------
arg_vector::operator char* const*()
{
- m_argv.clear();
- foreach (string& s, m_args)
- {
- m_argv.push_back(s.c_str());
- if (VERBOSE > 1)
- cerr << m_argv.back() << ' ';
- }
- if (VERBOSE > 1)
- cerr << endl;
-
- m_argv.push_back(nullptr);
- return const_cast<char*const*>(&m_argv[0]);
+ m_argv.clear();
+ foreach (std::string& s, m_args)
+ {
+ m_argv.push_back(s.c_str());
+ if (VERBOSE > 1)
+ std::cerr << m_argv.back() << ' ';
+ }
+ if (VERBOSE > 1)
+ std::cerr << std::endl;
+
+ m_argv.push_back(nullptr);
+ return const_cast<char*const*>(&m_argv[0]);
}
-ostream& operator<<(ostream& os, const arg_vector& argv)
+std::ostream& operator<<(std::ostream& os, const arg_vector& argv)
{
- os << "About to execute: " << endl;
- foreach (const string& a, argv.m_args)
- os << a << ' ';
- os << endl;
+ os << "About to execute: " << std::endl;
+ foreach (const std::string& a, argv.m_args)
+ os << a << ' ';
+ os << std::endl;
- return os;
+ return os;
}
// --------------------------------------------------------------------
-mas_exception::mas_exception(const string& msg)
+mas_exception::mas_exception(const std::string& msg)
{
- snprintf(m_msg, sizeof(m_msg), "%s", msg.c_str());
+ snprintf(m_msg, sizeof(m_msg), "%s", msg.c_str());
}
mas_exception::mas_exception(const boost::format& msg)
{
- snprintf(m_msg, sizeof(m_msg), "%s", msg.str().c_str());
+ snprintf(m_msg, sizeof(m_msg), "%s", msg.str().c_str());
}
//// --------------------------------------------------------------------
//
-//string decode(const sequence& s)
+//std::string decode(const sequence& s)
//{
-// string result;
-// result.reserve(s.length());
-//
-// foreach (aa a, s)
-// result.push_back(kAA[a]);
+// std::string result;
+// result.reserve(s.length());
+//
+// foreach (aa a, s)
+// result.push_back(kAA[a]);
//
-// return result;
+// return result;
//}
//
//namespace {
@@ -80,55 +78,55 @@ mas_exception::mas_exception(const boost::format& msg)
//
//inline void init_reverse()
//{
-// if (not sInited)
-// {
-// // init global reverse mapping
-// for (uint32 a = 0; a < 256; ++a)
-// kAA_Reverse[a] = 255;
-// for (uint8 a = 0; a < sizeof(kAA); ++a)
-// {
-// kAA_Reverse[toupper(kAA[a])] = a;
-// kAA_Reverse[tolower(kAA[a])] = a;
-// }
-// }
+// if (not sInited)
+// {
+// // init global reverse mapping
+// for (uint32 a = 0; a < 256; ++a)
+// kAA_Reverse[a] = 255;
+// for (uint8 a = 0; a < sizeof(kAA); ++a)
+// {
+// kAA_Reverse[toupper(kAA[a])] = a;
+// kAA_Reverse[tolower(kAA[a])] = a;
+// }
+// }
//}
//
//}
//
//aa encode(char r)
//{
-// init_reverse();
-//
-// if (r == '.' or r == '*' or r == '~' or r == '_')
-// r = '-';
-//
-// aa result = kAA_Reverse[static_cast<uint8>(r)];
-// if (result >= sizeof(kAA))
-// throw mas_exception(boost::format("invalid residue %1%") % r);
-//
-// return result;
+// init_reverse();
+//
+// if (r == '.' or r == '*' or r == '~' or r == '_')
+// r = '-';
+//
+// aa result = kAA_Reverse[static_cast<uint8>(r)];
+// if (result >= sizeof(kAA))
+// throw mas_exception(boost::format("invalid residue %1%") % r);
+//
+// return result;
//}
//
-//sequence encode(const string& s)
+//sequence encode(const std::string& s)
//{
-// init_reverse();
-//
-// sequence result;
-// result.reserve(s.length());
-//
-// foreach (char r, s)
-// {
-// if (r == '.' or r == '*' or r == '~' or r == '_')
-// r = '-';
-//
-// aa rc = kAA_Reverse[static_cast<uint8>(r)];
-// if (rc >= sizeof(kAA))
-// throw mas_exception(boost::format("invalid residue in sequence %1%") % r);
-//
-// result.push_back(rc);
-// }
-//
-// return result;
+// init_reverse();
+//
+// sequence result;
+// result.reserve(s.length());
+//
+// foreach (char r, s)
+// {
+// if (r == '.' or r == '*' or r == '~' or r == '_')
+// r = '-';
+//
+// aa rc = kAA_Reverse[static_cast<uint8>(r)];
+// if (rc >= sizeof(kAA))
+// throw mas_exception(boost::format("invalid residue in sequence %1%") % r);
+//
+// result.push_back(rc);
+// }
+//
+// return result;
//}
// --------------------------------------------------------------------
@@ -136,42 +134,43 @@ mas_exception::mas_exception(const boost::format& msg)
#ifndef NDEBUG
stats::~stats()
{
- if (VERBOSE)
- cerr << endl << "max: " << m_max << " count: " << m_count << " average: " << (m_cumm / m_count) << endl;
+ if (VERBOSE)
+ std::cerr << std::endl << "max: " << m_max << " count: " << m_count
+ << " average: " << (m_cumm / m_count) << std::endl;
}
#endif
// --------------------------------------------------------------------
#if P_UNIX
-void WriteToFD(int inFD, const std::string& inText)
+void WriteToFD(int inFD, const std::std::string& inText)
{
- const char kEOLN[] = "\n";
- const char* s = inText.c_str();
- uint32 l = inText.length();
-
- while (l > 0)
- {
- int r = write(inFD, s, l);
-
- if (r >= 0)
- {
- l -= r;
- if (l == 0 and s != kEOLN)
- {
- s = kEOLN;
- l = 1;
- }
- continue;
- }
-
- if (r == -1 and errno == EAGAIN)
- continue;
-
- throw mas_exception("Failed to write to file descriptor");
-
- break;
- }
+ const char kEOLN[] = "\n";
+ const char* s = inText.c_str();
+ uint32 l = inText.length();
+
+ while (l > 0)
+ {
+ int r = write(inFD, s, l);
+
+ if (r >= 0)
+ {
+ l -= r;
+ if (l == 0 and s != kEOLN)
+ {
+ s = kEOLN;
+ l = 1;
+ }
+ continue;
+ }
+
+ if (r == -1 and errno == EAGAIN)
+ continue;
+
+ throw mas_exception("Failed to write to file descriptor");
+
+ break;
+ }
}
#endif
@@ -181,22 +180,22 @@ void WriteToFD(int inFD, const std::string& inText)
fs::path get_home()
{
- const char* home = getenv("HOME");
- if (home == nullptr)
- home = getenv("HOMEPATH");
- if (home == nullptr)
- throw mas_exception("No home defined");
- return fs::path(home);
+ const char* home = getenv("HOME");
+ if (home == nullptr)
+ home = getenv("HOMEPATH");
+ if (home == nullptr)
+ throw mas_exception("No home defined");
+ return fs::path(home);
}
#else
fs::path get_home()
{
- const char* home = getenv("HOME");
- if (home == nullptr)
- throw mas_exception("No home defined");
- return fs::path(home);
+ const char* home = getenv("HOME");
+ if (home == nullptr)
+ throw mas_exception("No home defined");
+ return fs::path(home);
}
#endif
diff --git a/src/utils.h b/src/utils.h
old mode 100644
new mode 100755
index cbe471f..4c5d1a5
--- a/src/utils.h
+++ b/src/utils.h
@@ -1,19 +1,25 @@
// Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
+// Copyright Coos Baakman, Jon Black, Wouter G. Touw & Gert Vriend, Radboud university medical center 2015.
// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef XSSP_UTILS_H
+#define XSSP_UTILS_H
#pragma once
+#include "mas.h"
+
+#include <boost/filesystem.hpp>
+#include <boost/format.hpp>
+#include <boost/lexical_cast.hpp>
+#include <boost/thread.hpp>
+
#ifndef NDEBUG
#include <iostream>
#endif
-
#include <time.h>
-#include <boost/thread.hpp>
-#include <boost/lexical_cast.hpp>
-#include <boost/format.hpp>
-#include <boost/filesystem.hpp>
// --------------------------------------------------------------------
@@ -21,42 +27,42 @@ class arg_vector
{
public:
- arg_vector(const std::string& program)
- {
- m_args.push_back(program);
- }
+ arg_vector(const std::string& program)
+ {
+ m_args.push_back(program);
+ }
- void push(const std::string& option)
- {
- m_args.push_back(option);
- }
+ void push(const std::string& option)
+ {
+ m_args.push_back(option);
+ }
- template<class T>
- void push(const std::string& option, const T& value);
+ template<class T>
+ void push(const std::string& option, const T& value);
- operator char* const*();
+ operator char* const*();
private:
- friend std::ostream& operator<<(std::ostream& os, const arg_vector& argv);
+ friend std::ostream& operator<<(std::ostream& os, const arg_vector& argv);
- std::vector<std::string> m_args;
- std::vector<const char*> m_argv;
+ std::vector<std::string> m_args;
+ std::vector<const char*> m_argv;
};
template<class T>
inline
void arg_vector::push(const std::string& option, const T& value)
{
- m_args.push_back(option);
- m_args.push_back(boost::lexical_cast<std::string>(value));
+ m_args.push_back(option);
+ m_args.push_back(boost::lexical_cast<std::string>(value));
}
template<>
inline
void arg_vector::push(const std::string& option, const std::string& value)
{
- m_args.push_back(option);
- m_args.push_back(value);
+ m_args.push_back(option);
+ m_args.push_back(value);
}
std::ostream& operator<<(std::ostream& os, const arg_vector& argv);
@@ -66,14 +72,14 @@ std::ostream& operator<<(std::ostream& os, const arg_vector& argv);
class mas_exception : public std::exception
{
public:
- mas_exception(const std::string& msg);
- mas_exception(const boost::format& msg);
+ mas_exception(const std::string& msg);
+ mas_exception(const boost::format& msg);
- virtual const char*
- what() const throw() { return m_msg; }
+ virtual const char*
+ what() const throw() { return m_msg; }
private:
- char m_msg[1024];
+ char m_msg[1024];
};
// --------------------------------------------------------------------
@@ -81,18 +87,18 @@ class mas_exception : public std::exception
#ifndef NDEBUG
struct stats
{
- stats() : m_max(0), m_count(0), m_cumm(0) {}
- ~stats();
-
- void operator()(uint32 i)
- {
- if (m_max < i)
- m_max = i;
- ++m_count;
- m_cumm += i;
- }
-
- uint32 m_max, m_count, m_cumm;
+ stats() : m_max(0), m_count(0), m_cumm(0) {}
+ ~stats();
+
+ void operator()(uint32 i)
+ {
+ if (m_max < i)
+ m_max = i;
+ ++m_count;
+ m_cumm += i;
+ }
+
+ uint32 m_max, m_count, m_cumm;
};
#endif
@@ -100,3 +106,5 @@ struct stats
void WriteToFD(int inFD, const std::string& inText);
boost::filesystem::path get_home();
+
+#endif
diff --git a/src/version.h b/src/version.h
new file mode 100755
index 0000000..730b318
--- /dev/null
+++ b/src/version.h
@@ -0,0 +1,6 @@
+#ifndef version_h
+#define version_h
+
+#define XSSP_VERSION "3.0.0"
+
+#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/dssp.git
More information about the debian-med-commit
mailing list