[med-svn] r12392 - in trunk/packages/conservation-code/trunk/debian: . patches
Laszlo Kajan
lkajan-guest at alioth.debian.org
Thu Oct 11 20:50:46 UTC 2012
Author: lkajan-guest
Date: 2012-10-11 20:50:46 +0000 (Thu, 11 Oct 2012)
New Revision: 12392
Added:
trunk/packages/conservation-code/trunk/debian/patches/stockholm_format
Modified:
trunk/packages/conservation-code/trunk/debian/changelog
trunk/packages/conservation-code/trunk/debian/patches/series
trunk/packages/conservation-code/trunk/debian/score_conservation.1.pod
Log:
allow parsing of Stockholm format as well
Modified: trunk/packages/conservation-code/trunk/debian/changelog
===================================================================
--- trunk/packages/conservation-code/trunk/debian/changelog 2012-10-11 19:59:08 UTC (rev 12391)
+++ trunk/packages/conservation-code/trunk/debian/changelog 2012-10-11 20:50:46 UTC (rev 12392)
@@ -1,3 +1,9 @@
+conservation-code (20110309.0-2) UNRELEASED; urgency=low
+
+ * Allow parsing of Stockholm format as well.
+
+ -- Laszlo Kajan <lkajan at rostlab.org> Thu, 11 Oct 2012 22:50:04 +0200
+
conservation-code (20110309.0-1) unstable; urgency=low
* Initial release. (Closes: #690058)
Modified: trunk/packages/conservation-code/trunk/debian/patches/series
===================================================================
--- trunk/packages/conservation-code/trunk/debian/patches/series 2012-10-11 19:59:08 UTC (rev 12391)
+++ trunk/packages/conservation-code/trunk/debian/patches/series 2012-10-11 20:50:46 UTC (rev 12392)
@@ -3,3 +3,4 @@
numpy.numarray
default_matrix_path
optimize_loop
+stockholm_format
Added: trunk/packages/conservation-code/trunk/debian/patches/stockholm_format
===================================================================
--- trunk/packages/conservation-code/trunk/debian/patches/stockholm_format (rev 0)
+++ trunk/packages/conservation-code/trunk/debian/patches/stockholm_format 2012-10-11 20:50:46 UTC (rev 12392)
@@ -0,0 +1,53 @@
+Author: Laszlo Kajan <lkajan at rostlab.org>
+Description: allow parsing of Stockholm format as well
+Forwarded: no
+--- a/score_conservation.py
++++ b/score_conservation.py
+@@ -84,6 +84,7 @@
+ ################################################################################
+
+ import math, sys, getopt
++import re
+ # numarray imported below
+
+ PSEUDOCOUNT = .0000001
+@@ -679,12 +680,14 @@
+ return names, alignment
+
+ def read_clustal_alignment(filename):
+- """ Read in the alignment stored in the CLUSTAL file, filename. Return
++ """ Read in the alignment stored in the CLUSTAL or Stockholm file, filename. Return
+ two lists: the names and sequences. """
+
+ names = []
+ alignment = []
+
++ re_stock_markup = re.compile('^#=')
++
+ f = open(filename)
+
+ for line in f:
+@@ -692,16 +695,20 @@
+ if len(line) == 0: continue
+ if '*' in line: continue
+
+- if 'CLUSTAL' in line: continue
++ if line[0:7] == 'CLUSTAL': continue
++ if line[0:11] == '# STOCKHOLM': continue
++ if line[0:2] == '//': continue
++
++ if re_stock_markup.match(line): continue
+
+ t = line.split()
+
+ if len(t) == 2 and t[1][0] in iupac_alphabet:
+ if t[0] not in names:
+ names.append(t[0])
+- alignment.append(t[1].upper().replace('B', 'D').replace('Z', 'Q').replace('X', '-').replace('\r', ''))
++ alignment.append(t[1].upper().replace('B', 'D').replace('Z', 'Q').replace('X', '-').replace('\r', '').replace('.', '-'))
+ else:
+- alignment[names.index(t[0])] += t[1].upper().replace('B', 'D').replace('Z', 'Q').replace('X','-').replace('\r', '')
++ alignment[names.index(t[0])] += t[1].upper().replace('B', 'D').replace('Z', 'Q').replace('X','-').replace('\r', '').replace('.', '-')
+
+ return names, alignment
+
Modified: trunk/packages/conservation-code/trunk/debian/score_conservation.1.pod
===================================================================
--- trunk/packages/conservation-code/trunk/debian/score_conservation.1.pod 2012-10-11 19:59:08 UTC (rev 12391)
+++ trunk/packages/conservation-code/trunk/debian/score_conservation.1.pod 2012-10-11 20:50:46 UTC (rev 12392)
@@ -10,7 +10,7 @@
=head1 DESCRIPTION
-Score protein sequence conservation in B<ALIGNFILE>. B<ALIGNFILE> must be in FASTA or CLUSTAL format.
+Score protein sequence conservation in B<ALIGNFILE>. B<ALIGNFILE> must be in FASTA, CLUSTAL or Stockholm format.
The following conservation scoring methods are implemented:
* sum of pairs
@@ -32,7 +32,7 @@
Jensen-Shannon divergence and a window B<-w> of I<3>.
The sequence-specific output can be used as the conservation input for
-concavity.
+concavity(1).
Conservation is highly predictive in identifying catalytic sites and
residues near bound ligands.
More information about the debian-med-commit
mailing list