[med-svn] r11712 - in trunk/packages/predictprotein/trunk/debian: . predictprotein predictprotein/DEBIAN predictprotein/usr predictprotein/usr/bin predictprotein/usr/share predictprotein/usr/share/doc predictprotein/usr/share/doc/predictprotein predictprotein/usr/share/lintian predictprotein/usr/share/lintian/overrides predictprotein/usr/share/man predictprotein/usr/share/man/man1 predictprotein/usr/share/man/man5 predictprotein/usr/share/predictprotein predictprotein/usr/share/predictprotein/example predictprotein/usr/share/predictprotein/helper_apps predictprotein/usr/share/predictprotein/resources

Simon Domke sdomke-guest at alioth.debian.org
Tue Jul 10 15:44:07 UTC 2012


Author: sdomke-guest
Date: 2012-07-10 15:44:06 +0000 (Tue, 10 Jul 2012)
New Revision: 11712

Added:
   trunk/packages/predictprotein/trunk/debian/files
   trunk/packages/predictprotein/trunk/debian/predictprotein.debhelper.log
   trunk/packages/predictprotein/trunk/debian/predictprotein.postrm.debhelper
   trunk/packages/predictprotein/trunk/debian/predictprotein.substvars
   trunk/packages/predictprotein/trunk/debian/predictprotein/
   trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/
   trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/control
   trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/md5sums
   trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/postrm
   trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/preinst
   trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/templates
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/bin/
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/bin/predictprotein
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/doc/
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/doc/predictprotein/
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/doc/predictprotein/changelog.Debian.gz
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/doc/predictprotein/changelog.gz
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/doc/predictprotein/copyright
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/lintian/
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/lintian/overrides/
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/lintian/overrides/predictprotein
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/man/
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/man/man1/
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/man/man1/predictprotein.1.gz
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/man/man5/
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/man/man5/prosite_scan.5.gz
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/MakefilePP.mk
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/example/
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/example/cad23.fasta
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/example/p53.fasta
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/example/tquick.fasta
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/example/tsnap_query.fasta
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/blast2html.pl
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/filter_blastp_big.pl
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/nors.pl
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/prosite_scan.pl
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/predictproteinrc.default
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/resources/
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/resources/HtmlHr.html
   trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/resources/HtmlQuote.html
Log:


Added: trunk/packages/predictprotein/trunk/debian/files
===================================================================
--- trunk/packages/predictprotein/trunk/debian/files	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/files	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1 @@
+predictprotein_1.0.81-1_all.deb science optional

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/control
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/control	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/control	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,33 @@
+Package: predictprotein
+Version: 1.0.81-1
+Architecture: all
+Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
+Installed-Size: 203
+Pre-Depends: debconf (>= 0.5)
+Depends: blast2, bioperl, ncoils (>= 2002-3), disulfinder, hmmer (>= 3.0), hmmer2, librg-liu-bundle-perl, librg-pp-bundle-perl (>= 1.0.21), librg-utils-perl (>= 1.0.24), lowcompseg, make, norsnet, norsp, perl, predictnls, profbval (>= 1.0.15), profisis, profphd, proftmb (>= 1.1.9-1)
+Recommends: pp-popularity-contest
+Suggests: loctree (>= 1.0.20), metadisorder, pp-cache-mgr (>= 1.0.3), profasp, rost-runpsic (>= 1.0.5), tmhmm (>= 2.0)
+Conflicts: coiledcoils
+Section: science
+Priority: optional
+Homepage: http://www.predictprotein.org
+Description: Package providing tools for sequence analysis
+ PredictProtein is a sequence analysis suite providing prediction of protein
+ structure and function. It takes protein sequences or alignments as input
+ and provides the following per-residue or whole protein annotations: multiple
+ sequence alignments, PROSITE sequence motifs, low-complexity regions, nuclear
+ localisation signals, regions lacking regular structure (NORS), secondary
+ structure, solvent accessibility, globular regions, transmembrane helices,
+ coiled-coil regions, structural switch regions, disulfide-bonds, sub-cellular
+ localization, disordered regions, B-value flexibility, protein-protein
+ interaction sites and protein-DNA interaction sites. Upon request fold
+ recognition by prediction-based threading, predictions of transmembrane
+ strands and inter-residue contacts are also available.
+ .
+ tmhmm (http://www.cbs.dtu.dk/cgi-bin/nph-sw_request?tmhmm) is non-
+ redistributable.  Please obtain your own version and configure
+ predictprotein accordingly.
+ .
+ loctree depends on SignalP (http://www.cbs.dtu.dk/services/SignalP/) that
+ is non-redistributable.  Please obtain your own version and configure
+ predictprotein accordingly.

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/md5sums
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/md5sums	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/md5sums	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,19 @@
+75c43464f64353e6be536006ffccd185  usr/bin/predictprotein
+f8fcc9f18faa206d88f1ad92b71e16b4  usr/share/doc/predictprotein/changelog.Debian.gz
+c630da8e45dea79a701b38c3bb8f8ec9  usr/share/doc/predictprotein/changelog.gz
+33e6b78c742230ad8a5302dd82d17d37  usr/share/doc/predictprotein/copyright
+d41d8cd98f00b204e9800998ecf8427e  usr/share/lintian/overrides/predictprotein
+3cd986aee22c1f3677e76fed9d1bad9c  usr/share/man/man1/predictprotein.1.gz
+20c02655e41953c250fbce2797db01a1  usr/share/man/man5/prosite_scan.5.gz
+fcbb41e16e1d86e493c6dbee3cb56c90  usr/share/predictprotein/MakefilePP.mk
+cdd34cf6b5bda869f740378a05eeee5a  usr/share/predictprotein/example/cad23.fasta
+c1e876c08339242c013d7a156a84da89  usr/share/predictprotein/example/p53.fasta
+bf59da0fa77b7f46253547e1d01a2f4a  usr/share/predictprotein/example/tquick.fasta
+1ea45726c1abda63c03b98b87b24fe56  usr/share/predictprotein/example/tsnap_query.fasta
+3de14161a22241311af8a55790c819ac  usr/share/predictprotein/helper_apps/blast2html.pl
+26023df608b157e0ebb3982eaf449fc3  usr/share/predictprotein/helper_apps/filter_blastp_big.pl
+563a6a6381adeec2c7504f25f2573f4a  usr/share/predictprotein/helper_apps/nors.pl
+c99b270ce004971502bc43dcd302bdac  usr/share/predictprotein/helper_apps/prosite_scan.pl
+a7e94dd8d71cd31d804bc066648e15d2  usr/share/predictprotein/predictproteinrc.default
+90d34079a1b7159d0dd3fddd5bea532e  usr/share/predictprotein/resources/HtmlHr.html
+a9523be3eed2b86dfe27b2af6c10f7d9  usr/share/predictprotein/resources/HtmlQuote.html

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/postrm
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/postrm	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/postrm	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,8 @@
+#!/bin/sh
+set -e
+# Automatically added by dh_installdebconf
+if [ "$1" = purge ] && [ -e /usr/share/debconf/confmodule ]; then
+	. /usr/share/debconf/confmodule
+	db_purge
+fi
+# End automatically added section


Property changes on: trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/postrm
___________________________________________________________________
Added: svn:executable
   + *

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/preinst
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/preinst	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/preinst	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,90 @@
+#!/bin/sh
+# preinst script for #PACKAGE#
+#
+# see: dh_installdeb(1)
+
+set -e
+
+# do we have debconf?
+if [ -f /usr/share/debconf/confmodule ]; then
+    . /usr/share/debconf/confmodule
+    DEBCONF=true
+else
+    DEBCONF=
+fi
+
+
+# summary of how this script can be called:
+#        * <new-preinst> `install'
+#        * <new-preinst> `install' <old-version>
+#        * <new-preinst> `upgrade' <old-version>
+#        * <old-preinst> `abort-upgrade' <new-version>
+# for details, see http://www.debian.org/doc/debian-policy/ or
+# the debian-policy package
+
+validate_academic() {
+    if [ -n "$DEBCONF" ]; then
+	db_input high shared/predictprotein/academic || true
+	db_go || true
+
+	# Throw error if not academic or equivalent:
+	db_get shared/predictprotein/academic
+	if [ "$RET" != "true" ]; then
+		# The user is for profit - $$$ - but perhaps this system should forget about it so that the question is asked again.
+		#db_unregister shared/predictprotein/academic
+
+		#db_fset shared/predictprotein/contact_biosof seen false
+		db_input high shared/predictprotein/contact_biosof || true
+		db_go || true
+		db_purge || true
+		echo "aborting - ineligible organization" >&2;
+		exit 1;
+	fi
+    else
+	# no debconf, fall back
+	cat << EOF
+This package is provided for non-profit educational, academic, and/or research
+institutions only.
+Organizations not covered under the license and users representing them please
+contact info at bio-sof.com to obtain a commercial license.
+EOF
+	echo -n "Do you represent a non-profit educational, academic, and/or research institution? (y/n)" >&2
+	read line
+	case ${line} in
+	    y|yes|Y|Yes|YES)
+	    		;;
+
+	    *)		echo "Please contact info at bio-sof.com to obtain a commercial license." >&2
+	    		echo "This package is provided for non-profit educational, academic, and/or research institutions only.  This installation is now going to abort." >&2
+			exit 1
+			;;
+	esac
+    fi
+ 
+}
+
+case "$1" in
+    install)
+    	validate_academic
+    ;;
+
+    upgrade)
+    ;;
+
+    abort-upgrade)
+    ;;
+
+    *)
+        echo "preinst called with unknown argument \`$1'" >&2
+        exit 1
+    ;;
+esac
+
+# dh_installdeb will replace this with shell code automatically
+# generated by other debhelper scripts.
+
+
+
+exit 0
+
+# vim:ai:


Property changes on: trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/preinst
___________________________________________________________________
Added: svn:executable
   + *

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/templates
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/templates	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/DEBIAN/templates	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1 @@
+

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/bin/predictprotein
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/usr/bin/predictprotein	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/usr/bin/predictprotein	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,971 @@
+#!/usr/bin/perl -w
+use warnings;
+use strict;
+use Carp qw(cluck :DEFAULT);
+use Cwd qw||;
+use Data::Dumper;
+use File::Copy;
+use File::Temp;
+use File::Basename;
+use Getopt::Long;
+use Pod::Usage;
+use Config::IniFiles;
+use Bio::Seq;
+use Bio::SeqIO;
+use RG::PP::Acl;
+
+$Carp::Verbose = 1; 
+
+our %meth_ext_hash = (
+  asp => 'profasp',
+  blastPsiAli => 'blastpgp',
+  blastPsiMat => 'blastpgp',
+  blastPsiOutTmp => 'blastpgp',
+  blastPsiRdb => 'blastpgp',
+  blastPsi80Rdb => 'blastpgp',
+  blastpSwissM8 => 'blastpswiss',
+  chk => 'blastpgp',
+  clustalngz => 'psic',
+  coils => 'coiledcoils',
+  coils_raw => 'coiledcoils',
+  disis => 'profdisis',
+  disulfinder => 'disulfinder',
+  fasta => 'input',
+  globe => 'profglobe',
+  hmm2pfam => 'hmmer',
+  hmm3pfam => 'hmmer',
+  hmm3pfamTbl => 'hmmer',
+  hmm3pfamDomTbl => 'hmmer',
+  hssp => 'blastpgp',
+  hssp80 => 'blastpgp',
+  hsspPsiFil => 'hssp_filter',
+  in => 'input',
+  isis => 'profisis',
+  loctreeAnimal => 'loctree',
+  loctreeAnimalTxt => 'loctree',
+  loctreePlant => 'loctree',
+  loctreePlantTxt => 'loctree',
+  loctreeProka => 'loctree',
+  loctreeProkaTxt => 'loctree',
+  mdisorder => 'metadisorder',
+  nls => 'predictnls',
+  nlsDat => 'predictnls',
+  nlsSum => 'predictnls',
+  norsnet => 'norsnet',
+  nors => 'nors',
+  phdNotHtm => 'phd',
+  phdPred => 'phd',
+  phdRdb => 'phd',
+  proDom => 'prodom', # obsolete
+  profAscii => 'prof',
+  profbval => 'profbval',
+  profb4snap => 'profbval',
+  prof1Rdb => 'prof',
+  profRdb => 'prof',
+  proftmb => 'proftmb',
+  proftmbdat => 'proftmb',
+  prosite => 'prosite',
+  psic => 'psic',
+  safBlastPsi => 'blastpgp',
+  safBlastPsi80 => 'blastpgp',
+  segNormGCG => 'ncbi-seg',
+  segNorm => 'ncbi-seg',
+  seqGCG => 'input',
+  sumNors => 'nors',
+  tmhmm => 'tmhmm'
+);
+
+# SYSTEM CONFIGURATION RETRIEVAL
+our $config;
+BEGIN {
+    our $VERSION = "1.0.81";
+    delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
+    $ENV{PATH} = "/bin:/usr/bin";
+    my ( $defaultconfig, $etcconfig );
+    if( -e "/usr/share/predictprotein/predictproteinrc.default" ) { $defaultconfig = Config::IniFiles->new( -file => "/usr/share/predictprotein/predictproteinrc.default" ); }
+    if( -e "/etc/predictproteinrc" ) { $etcconfig = Config::IniFiles->new( -file => "/etc/predictproteinrc", -import => $defaultconfig ); } else { $etcconfig = $defaultconfig; }
+    if( ( $ENV{PREDICTPROTEINCONF} && -e "$ENV{PREDICTPROTEINCONF}" ) || -e "$ENV{HOME}/.predictproteinrc" ) { $config = Config::IniFiles->new( -file => $ENV{PREDICTPROTEINCONF} || "$ENV{HOME}/.predictproteinrc", -import => $etcconfig ); } else { $config = $etcconfig; }
+}
+
+# popularity contest
+if( system('pp_popcon_cnt', '-p', 'predictprotein') == -1 ){ warn("The Rost Lab recommends you install the pp-popularity-contest package that provides pp_popcon_cnt:\n\nsudo apt-get install pp-popularity-contest\n"); }
+
+# NOTE: all configuration can be overridden by command line options
+my $pp_rootdir  = glob($config->val('predictprotein', 'predictprotein_utildir')).'/';
+#
+my $bigblastdb = glob( $config->val('predictprotein', 'bigblastdb' ) );
+my $big80blastdb = glob( $config->val('predictprotein', 'big80blastdb' ) );
+my @default_targets = ( $config->val('predictprotein', 'default_targets') ? split(/ /o, $config->val('predictprotein', 'default_targets') ) : () );
+my $librg_utils_perl = glob($config->val('predictprotein', 'librg_utils_perl')).'/' || '/usr/share/librg-utils-perl/';
+my $light_cache = $config->val('predictprotein', 'light_cache') || 0;
+my $numresmax = $config->val('predictprotein', 'numresmax') || 6000; # Marco's suggestion, we know it now blows up for over 19000 (convert_seq I think)
+my $pfam2db = glob($config->val('predictprotein', 'pfam2db'));
+my $pfam3db = glob($config->val('predictprotein', 'pfam3db'));
+my $prodomblastdb; # obsolete, kept here to not break interfaces that still use this cmd line option
+my $prof_app_root = glob($config->val('predictprotein', 'prof'));
+my $profnumresmin = $config->val('predictprotein', 'profnumresmin') || 17;
+my $prositedat = glob( $config->val('predictprotein', 'prositedat' ) );
+my $prositeconvdat = glob( $config->val('predictprotein', 'prositeconvdat' ) );
+my $psicexe = glob( $config->val('predictprotein', 'psicexe' ) );
+my $spkeyidx = glob( $config->val('predictprotein', 'spkeyidx' ) );
+my $swissblastdb = glob( $config->val('predictprotein', 'swissblastdb' ) );
+my $use_cache = $config->val('predictprotein', 'use_cache');
+#
+my $root_dir = $pp_rootdir || "/usr/share/predictprotein";
+my $make_file = $root_dir."/MakefilePP.mk";
+
+#my $output_format = "text";
+my $work_dir;
+
+# COMMAND LINE OPTIONS
+my( $help, $man, $dbg, $dry_run, $prot_name, $makedebug, @method, $output_dir, $print_ext_method_map, $seqfile, $sequence_in, $setacl, @target, $version);
+#my $input_file;
+
+my $cache_merge_cl;
+my $force_cache_store = 0;
+my $num_cpus = 1;
+my $blast_processors = 1;
+
+if (@ARGV < 1){ die "Usage: $0 [OPTIONS]\n";}
+
+my $result = GetOptions (
+          'acl|setacl=s' => \$setacl,
+          'bigblastdb=s' => \$bigblastdb,
+          'big80blastdb=s' => \$big80blastdb,
+			    "blast-processors=i" => \$blast_processors,
+			    "c|num-cpus=i" =>\$num_cpus,
+          "cache-merge!" => \$cache_merge_cl,
+			    "d|debug!" => \$dbg,
+#			    "f|output-format=s" => \$output_format,
+          "force-cache-store!" => \$force_cache_store,
+			    "help|?" => \$help,
+#          "i|input-file=s" => \$input_file,
+			    "m|make-file=s" =>\$make_file,
+          "makedebug=s" => \$makedebug,
+			    "man" => \$man,
+          'method=s@' => \@method,
+          "numresmax=i" => \$numresmax,
+			    "n|dryrun" => \$dry_run,
+			    "o|output-dir=s" => \$output_dir,
+			    "p|prot-name=s" =>\$prot_name,
+          "pfam2db=s" => \$pfam2db,
+          "pfam3db=s" => \$pfam3db,
+          "print-ext-method-map!" => \$print_ext_method_map,
+          "prodomblastdb=s" => \$prodomblastdb, # kept for compatibility
+          "profnumresmin=i" => \$profnumresmin,
+          "prositedat=s" => \$prositedat,
+          "prositeconvdat=s" => \$prositeconvdat,
+          "psicexe=s" => \$psicexe,
+          "spkeyidx=s" => \$spkeyidx,
+			    "s|seq|sequence=s" =>\$sequence_in,
+          'seqfile=s' => \$seqfile,
+          'swissblastdb=s' => \$swissblastdb,
+          'target=s@' => \@target,
+          'use-cache!' => \$use_cache,
+			    "v|version" => \$version,
+			    "w|work-dir=s" => \$work_dir
+) or pod2usage(2);
+
+pod2usage(0) if $help;
+pod2usage(-verbose => 2) if $man;
+if ($version){
+    print STDERR qq|This is PredictProtein version 1.0.81
+
+Copyright 1992-2010, Rostlab
+
+Please see COPYING file for license information
+
+Complete documentation for PredictProtein should be found on
+this system using "man predictprotein" or at http://www.predictprotein.org/.
+
+|;
+    exit (0);
+}
+
+if( $print_ext_method_map ) { foreach my $ext ( sort{$a cmp $b}(keys(%meth_ext_hash)) ){ print $ext, "\t", $meth_ext_hash{$ext}, "\n"; }; exit(0); }
+
+if( $seqfile )
+{
+  my $in = Bio::SeqIO->new( ( $seqfile eq '-' ? ( -fh => \*STDIN ) : ( -file => scalar( glob( $seqfile ) ) ) ), -format => 'Fasta' );
+  my $seq = $in->next_seq();
+  $sequence_in = $seq->seq();
+}
+# Get sequence
+$sequence_in || die("Error: no input sequence".( $dbg ? '' : "\n" ));
+
+my( $makemethod, $maketarget, $hash_meth ) = _get_makemethod( \@method );
+my $hash_meth_as_options = _get_method_options( $hash_meth );
+
+my $cache_merge = 0;
+{
+  # lkajan: warning: meddling with default value ahead:
+  # if --use-cache and --noforce-cache-store and --target is used and the cache is not empty, make $cache_merge ON by default, OFF otherwise
+  $cache_merge = ( ( $use_cache && !$force_cache_store && @target ) ? 1 : 0 );
+  if( defined( $cache_merge_cl ) ){ $cache_merge = $cache_merge_cl; }
+
+  # lkajan: if there is nothing in the cache, silently ignore a request (on the cmd line or otherwise) for cache merging:
+  if( $cache_merge && !_in_cache( $sequence_in, $hash_meth_as_options ) ){ $cache_merge = 0; }
+}
+
+if($dbg){ warn("cache merging is ".( $cache_merge ? 'on' : 'off' )); }
+
+# force_cache_store, cache_merge_cl imply use-cache
+if( $force_cache_store || $cache_merge_cl ){ $use_cache = 1; }
+
+# force_cache_store can not be used together with cache_merge
+if( $force_cache_store && $cache_merge ){ die("Error: --force-cache-store is incompatible with --cache_merge".( $dbg ? '' : "\n" )); }
+
+if( !$output_dir && !$use_cache && !$work_dir ){ die("Error: no output directory or work directory given and no cache is to be used. Results would be lost unless you use --output-dir, --work-dir or --use-cache.".( $dbg ? '' : "\n" ) ); }
+if( $output_dir )
+{
+  $output_dir = Cwd::realpath(glob($output_dir));
+  system( 'mkdir', '-p', $output_dir ) && die( "could not mkdir '$output_dir': $?" );
+}
+
+if(! $prot_name){ $prot_name="query"; }
+if( $prot_name =~ /[^[:alnum:]._-]/o ){ die("Error: invalid protein name, please use only [[:alnum:]._-]".( $dbg ? '' : "\n" )); }
+
+# check acl syntax if given
+if( $setacl ) { RG::PP::Acl::acl2hash( $setacl ); }
+
+if( !@target ){ if( @default_targets ){ @target = @default_targets; } else { @target = ( 'all' ); } } # lkajan: attention: `all' does not mean all methods, there also are the `optional' ones
+push @target, @$maketarget;
+
+# there's no need to give a target more than once - collapse duplicates
+{
+  my %tgt_hash = map { ( $_ => 1 ); } @target;
+  @target = keys %tgt_hash;
+}
+
+# get number of processors on this host
+#my @num_cpus=();
+#if (!($num_cpus) && -e '/proc/cpuinfo'){
+#     @num_cpus=`grep processor /proc/cpuinfo`;
+#     $num_cpus = scalar(@num_cpus);
+#}
+
+my $cache_dir;
+my $hashlockhdl;
+my $hashlockpid; END { if( $hashlockpid ){ local $?; kill 15, $hashlockpid; undef($hashlockpid); if( !close( $hashlockhdl ) ){ warn("failed to close ppc_lock pipe: ".( $! ? "$!" : " command exited with $?" )); } while(wait != -1){}; if($dbg){ warn("no children left"); } } }
+
+if( $use_cache && !$force_cache_store )
+{
+  # Do we have it in the cache?
+  my @ppf_cmd = _get_ppf_cmd_base( $sequence_in, $hash_meth_as_options );
+
+  if( !$cache_merge )
+  {
+    # use cache, no force, no merge: fetch results to the output dir if not cache merge
+    if( $output_dir ){ push @ppf_cmd, ( '--output-dir', $output_dir ); }
+    if( $prot_name ){ push @ppf_cmd, ( '--prot-name', $prot_name ); }
+  }
+  else
+  {
+    # Obtain lock, keep ppc_fetch from locking
+    my @ppl_cmd = ( 'ppc_lock', '--seq', $sequence_in, '--parent', $$, '--allow-cache-write', @$hash_meth_as_options );
+    if( $dbg ){ cluck( "@ppl_cmd" ); }
+    $hashlockpid = open( $hashlockhdl, '-|' ); if( !defined( $hashlockpid ) ){ die("failed to execute '|@ppl_cmd': $?"); }
+    if( !$hashlockpid ){ if( !exec { $ppl_cmd[0] } @ppl_cmd ){ confess( "failed to call @ppl_cmd: $!" ); } }
+    chomp( $cache_dir = <$hashlockhdl> );
+    # lkajan 20110514: with acls -w does not report correctly
+    if( !defined($cache_dir) || !-e $cache_dir ){ confess("error: $cache_dir".( -e $cache_dir ? '' : ' does not exist' ) ); }
+    #
+    push @ppf_cmd, '--skip-hash-lock';
+  }
+
+  if( $dbg ){ cluck( "@ppf_cmd" ); }
+
+  my( @cachefiles, $closeret );
+  {
+    my $pid = open( my $pipe, '-|' ); if( !defined( $pid ) ){ die("failed to execute '|@ppf_cmd': $?"); }
+    if( !$pid ){ if( !exec { $ppf_cmd[0] } @ppf_cmd ){ confess( "failed to call @ppf_cmd: $!" ); } }
+    @cachefiles = <$pipe>;
+    $closeret = close( $pipe );
+    if( !$closeret && $! ){ confess("failed to call @ppf_cmd: $!"); }
+  }
+  #
+  if( !$cache_merge && $closeret )
+  {
+    # in cache, so we do not run. The ppc_fetch call fetched results to output-dir if we had that set, nothing left to do.
+    exit(0);
+  }
+}
+
+# not in cache or merge or forced recalc
+if( length($sequence_in) < $profnumresmin )
+{
+  $! = 254; die("ERROR: sequence is too short, shorter than minimum length required by prof ($profnumresmin)".( $dbg ? '' : "\n" ));
+}
+if( length($sequence_in) > $numresmax )
+{
+  $! = 253; die("ERROR: sequence is too long, longer than maximum length ($numresmax)".( $dbg ? '' : "\n" ));
+}
+
+# If not cache_merge, run in given or tmp work dir. If cache_merge then run directly on cache directory.
+if( !$cache_merge )
+{
+  if( ! $work_dir ){ $work_dir = File::Temp::tempdir( CLEANUP => !$dbg ); }
+  else { $work_dir = Cwd::realpath(glob($work_dir)); system ( "mkdir", '-p', $work_dir ) && die( "could not mkdir '$work_dir': $?" ); }
+}
+else
+{
+  $work_dir = $cache_dir;
+}
+
+# PRINT OUT JOB ENV
+if ($dbg){ cluck "work_dir=$work_dir"; }
+
+# work_seq_file holds sequence information and resides in intermediate work directory
+my $work_seq_file = "$work_dir/$prot_name.in";
+
+if( ! -e $work_seq_file )
+{
+  my $seqobj = Bio::Seq->new( -display_id => $prot_name, -seq => uc($sequence_in) );
+  my $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => ">$work_seq_file" );
+  $seqout->write_seq($seqobj);
+}
+
+
+# RUN MAKEFILE
+my @cmd = ();
+push (@cmd,"make");
+push (@cmd,"INFILE=$prot_name.in");
+push (@cmd,"-C", $work_dir);
+push (@cmd,"JOBID=".$prot_name);
+push (@cmd, "-n") if ($dry_run);
+push (@cmd, "-j", $num_cpus);
+if( defined($makedebug) ){ push (@cmd, "--debug=$makedebug" ); }
+push (@cmd, "BLASTCORES=".$blast_processors);
+push (@cmd, "LIBRGUTILS=".$librg_utils_perl);
+push (@cmd, "PPROOT=".$pp_rootdir);
+push (@cmd, "PROFNUMRESMIN=$profnumresmin");
+push (@cmd, "PROFROOT=".$prof_app_root);
+# resource locations e.g. BIGBLASTDB, BIG80BLASTDB, PROSITECONVDAT, SWISSBLASTDB
+if( $bigblastdb ){ push @cmd, "BIGBLASTDB=$bigblastdb"; }
+if( $big80blastdb ){ push @cmd, "BIG80BLASTDB=$big80blastdb"; }
+if( $pfam2db ){ push @cmd, "PFAM2DB=$pfam2db"; }
+if( $pfam3db ){ push @cmd, "PFAM3DB=$pfam3db"; }
+if( $prositedat ){ push @cmd, "PROSITEDAT=$prositedat"; }
+if( $prositeconvdat ){ push @cmd, "PROSITECONVDAT=$prositeconvdat"; }
+if( $psicexe ){ push @cmd, "PSICEXE=$psicexe"; }
+if( $spkeyidx ){ push @cmd, "SPKEYIDX=$spkeyidx"; }
+if( $swissblastdb ){ push @cmd, "SWISSBLASTDB=$swissblastdb"; }
+# method control params
+push (@cmd, @$makemethod );
+if( !$dbg ){ push @cmd, '--quiet'; } else { push (@cmd, "DEBUG=".$dbg); }
+push( @cmd, $make_file ? ( "-f", Cwd::realpath(scalar(glob($make_file))) ) : () );
+
+# list targets
+my @cmd_all = ( @cmd, @target );
+
+my $oldout;
+if($dbg) { cluck("@cmd_all"); }
+else
+{
+  # so the methods speak on standard out ... let's just silence them all
+  open( $oldout, '>&', \*STDOUT ) || die( $! );
+  open( STDOUT, '>', '/dev/null' ) || die( $! );
+}
+  #
+    ##
+      system(@cmd_all) && die( "@cmd_all failed: $?" );
+    ##
+  #
+if( !$dbg ) { open( STDOUT, '>&', $oldout ) || die( $! ); }
+
+# copy to output dir?
+# We have to copy results to the output directory /before/ storing because in case light_cache is on and the cache is being directly accessed, some result files are deleted from $work_dir during storing.
+if( $output_dir )
+{
+  my @cmd_install = ( @cmd, "DESTDIR=$output_dir", 'install' );
+
+  if ($dbg) { cluck("@cmd_install"); }
+  system( @cmd_install ) && die( "@cmd_install DESTDIR=$output_dir install failed: $?" );
+
+  if( $dbg ){ warn( "results were copied into $output_dir" ); }
+}
+
+# cache store?
+if( $use_cache )
+{
+  # We have to combine method ctrl params (%$hash_meth) with result files here and that is not trivial. We are going to have logic here to establish which result file belongs to which method.
+  # Associate extensions with method names: %meth_ext_hash
+  # lkajan 20110514: there is no need for a precache_dir and an install into it since PP now leaves the working directory in pristine condition DOESNT'T IT?!
+  my $precache_dir = $work_dir;
+  if( $dbg ){ warn("precache dir = $precache_dir"); }
+   
+  my $result_files = _ls_dir( $precache_dir );
+  foreach my $res_file (@$result_files)
+  {
+    if( substr( $res_file, 0, 1 ) eq '.' ){ next; }
+
+    # lkajan 20100415: certain result files we do not want to store into the cache in order to save space
+    # lkajan: these are result files that are big, derived from other files kept and not needed for the web interface (or anything of importance) so regeneration time is not crucial
+    # lkajan: at present these are: safBlastPsi safBlastPsi80 hssp
+    # lkajan: When cache_merge is in use, these files need to be actively removed from the precache_dir since in this case it is /the/ cache dir
+    if( $light_cache && $res_file =~ /\.(safBlastPsi|safBlastPsi80|hssp)$/o )
+    # lkajan 20111018: pending new policy for files stored in cache:
+    # - we keep files directly shown on the web interface
+    # - we keep the immediate input files of (top level) predictors so that
+    #   they can be re-run, files such as .hsspPsiFil and .blastpSwissM8
+    # - we do not keep ancestors of these files, such as .blastPsiOutTmp and
+    #   .hssp80
+    #if( $light_cache && $res_file =~ /\.(blastPsiAli|blastPsiOutTmp|hssp|hssp80|safBlastPsi|safBlastPsi80)$/o )
+    {
+      if( $cache_merge ){ if( $dbg ){ warn("light-cache removing '$precache_dir/$res_file'"); } unlink( "$precache_dir/$res_file" ) || confess("failed to remove '$precache_dir/$res_file': $!"); }
+      next;
+    }
+
+    # query.blastpSwissM8 => $method->{blastpswiss}->{res}->{blastpSwissM8} = "$precache_dir/$res_file";
+    my( $filename, $directories, $suffix ) = fileparse( $res_file, qr/\.[^.]*$/o );
+    my $extkey = substr( $suffix, 1 );
+
+    my $method = $meth_ext_hash{$extkey};
+    if( !$method ){ warn("Warning: extension '$extkey' does not have associated method"); $method = $extkey; }
+
+    $hash_meth->{$method}->{res}->{$extkey} = "$precache_dir/$res_file";
+  }
+  #if( $dbg ){ cluck( Dumper( $hash_meth ) ); }
+
+  # Now call ppc_store
+  my @method_args = map {
+    my $method = $_;
+    my $ctrl = join(',', map { my $cp = $_; "$cp=$hash_meth->{$method}->{ctrl}->{$cp}"; } keys(%{$hash_meth->{$method}->{ctrl}}) );
+    my $res = join(',', map { my $res = $_; "res_$res=$hash_meth->{$method}->{res}->{$res}"; } keys(%{$hash_meth->{$method}->{res}}) );
+    "--method=$method".( $ctrl ? ",$ctrl" : '' ).($res ? ",$res" : '' );
+  } keys(%$hash_meth);
+
+  my @cmd = (
+    'ppc_store',
+    ( $setacl ? ( '--setacl', $setacl ) : () ),
+    ( $cache_merge ? ( '--merge-res', '--skip-hash-lock' ) :  () ),
+    @method_args,
+    '--seq', $sequence_in
+  );
+
+  if( $dbg ){ cluck( "@cmd" ); }
+  system( @cmd ) && die( "@cmd failed: $?" );
+}
+
+exit(0);
+
+
+
+# lkajan: method - translate and pass --method=asp,ws=3,... --method=... method control parameters to make file
+# lkajan: idea: translate control params simply like this: `--method=MNAME,CTRLP1=CTRLVAL1,CTRLP2=CTRLVAL2' -> `NMAMECTRL=--CTRLP1=CTRLVAL1 --CTRLP2=CTRLVAL2'
+# lkajan: Also return all methods as targets.
+sub               _get_makemethod
+{
+  my( $__method ) = @_;
+  my $makemethod = [];
+  my $maketarget = [];
+  my $hash_meth = {};
+
+  foreach my $method_cm ( @$__method )
+  {
+    # split each string in two parts:method and res 
+    my ($meth1, $rest)= split /,/o, $method_cm, 2; 
+
+    push @$maketarget, $meth1;
+  
+    # split rest after and save in an array
+    my @tmp_hash; if( $rest ){ @tmp_hash = split /,/o, $rest; }
+  
+    #look for the crtl and res elements for each method
+    my @tmp_crl = grep( !/^res_/o, @tmp_hash );
+    my @tmp_res = grep( /^res_/o, @tmp_hash );
+
+    if( @tmp_res ){ warn( "Warning: `res_'-type parameters were given ('@tmp_res') - these are ignored by this program\n" ); }
+
+    if( @tmp_crl )
+    {
+      # Certain methods like profbval do not accept Getopt::Long-style cmd line parameters. That's bad. We can not pass parameters to such methods because we do not want to program special cases for them.
+      # The long term solution is to make these methods accept Getopt::Long params.
+      if(
+        $meth1 eq 'prof' ||
+        $meth1 eq 'profbval' ||
+        $meth1 eq 'norsnet'
+      ){ die("Error: this interface does not support passing control parameters to $meth1 because of ${meth1}'s primitive command line interface.".( $dbg ? '' : "\n" )); }
+    }
+  
+    #store the results from each crt and res element as one hash
+    push @$makemethod, uc( $meth1 ).'CTRL="'.join(' ', map {
+      my $ctrlpair = $_;
+      if( $ctrlpair =~ /^-/o ){ confess( "Error: control parameter with leading `-': '$ctrlpair'" ); } "--$_";
+    } @tmp_crl ).'"';
+    
+    my $ctrl = { map{ split(/=/o, $_, 2 ); }@tmp_crl };
+    $hash_meth->{$meth1}{'ctrl'} = $ctrl;
+  }
+
+  return( $makemethod, $maketarget, $hash_meth );
+}
+
+
+sub               _ls_dir
+{
+  my( $__dir ) = @_;
+  opendir( my $dh, $__dir ) || confess( "failed to open $__dir: $!" );
+  my $files = [ readdir( $dh ) ];
+  closedir( $dh );
+  return $files;
+}
+
+
+sub               _get_method_options
+{
+  my( $__hash_meth ) = @_;
+
+  my @ret = map {
+    my $method = $_;
+    my $ctrl = join(',', map {
+      my $ctrl = $_; "$ctrl=$__hash_meth->{$method}->{ctrl}->{$ctrl}";
+    } keys(%{$__hash_meth->{$method}->{ctrl}}) );
+    ( '--method', $method.( $ctrl ? ",$ctrl" : '' ) );
+  } keys(%$__hash_meth);
+
+  return [ @ret ];
+}
+
+
+sub               _in_cache
+{
+  # lkajan: this is a very quick check, it should not do anything else but tell if the cache slot is empty or not
+  # lkajan: warning: race condition here: whatever this call finds may not be true by the time ppc_lock or another fetch is executed
+  my( $__seq, $__hash_meth_as_options ) = @_;
+
+  my @ppf_cmd = _get_ppf_cmd_base( $__seq, $__hash_meth_as_options ); # ppc_fetch --seq ... --method ... --method ...
+  push @ppf_cmd, "--print-dir";
+
+  if( $dbg ){ cluck( "@ppf_cmd" ); }
+
+  my( @cachefiles, $closeret );
+  {
+    my $pid = open( my $pipe, '-|' ); if( !defined( $pid ) ){ die("failed to execute '|@ppf_cmd': $?"); }
+    if( !$pid ){ if( !exec { $ppf_cmd[0] } @ppf_cmd ){ confess( "failed to call @ppf_cmd: $!" ); } }
+    @cachefiles = <$pipe>;
+    $closeret = close( $pipe );
+    if( !$closeret && $! ){ confess("failed to call @ppf_cmd: $!"); }
+  }
+
+  return $closeret;
+}
+
+
+sub               _get_ppf_cmd_base
+{
+  my( $__seq, $__hash_meth_as_options ) = @_;
+  
+  my @ret = ( 'ppc_fetch', '--seq', $sequence_in, @$__hash_meth_as_options );
+}
+
+
+__END__
+
+=head1 NAME
+
+predictprotein - analyse protein sequence
+
+=head1 SYNOPSIS
+
+predictprotein [options]
+
+=head1 DESCRIPTION
+
+predictprotein runs a set of protein sequnce analysis methods:
+
+=head2 Standard methods
+
+These methods are run by the default target 'all':
+
+ Feature                Target            Extension               Man page
+ -------                ------            ---------               --------
+ atom mobility          profbval          profbval, profb4snap    profbval(1)
+ bacterial transmem-    proftmb           proftmb, proftmbdat     proftmb(1)
+  brane beta barrels
+ coiled-coils           coiledcoils       coils, coils_raw        coils-wrap(1)
+                                                                  ncoils(1)
+ conformational         profasp           asp                     profasp(1)
+  switches
+ disulfide bridges      disulfinder       disulfinder             disulfinder(1)
+ local alignment        blast             blastPsiOutTmp, chk,    blastpgp(1)
+                                          blastPsiMat,
+                                          blastPsiAli,
+                                          blastpSwissM8           blastall(1)
+ local complexity       ncbi-seg          segNorm, segNormGCG     ncbi-seg(1)
+ non-regular secondary  norsp             nors, sumNors           norsp(1)
+  structure
+ nuclear localization   predictnls        nls, nlsDat, nlsSum     predictnls(1)
+ Pfam scan hmmer v2     hmm2pfam          hmm2pfam                hmm2pfam(1)
+ Pfam scan hmmer v3     hmm3pfam          hmm3pfam, hmm3pfamTbl,  hmmscan(1)
+                                          hmm3pfamDomTbl
+ PROSITE scan           prosite           prosite                 prosite_scan(1)
+ protein-protein        profisis          isis                    profisis(1)
+  interaction sites
+ secondary structure,   prof              profRdb                 prof(1)
+  accessibility from
+  sequence profile
+ secondary structure,   prof              prof1Rdb                prof(1)
+  accessibility from
+  single sequence
+ transmembrane          phd               phdPred, phdRdb         prof(1)
+  helices
+ unstructured loops     norsnet           norsnet                 norsnet(1)
+
+=head2 Optional methods
+
+These methods are non-redistributable or depend on non-redistributable software (indicated by '*').  You have to acquire the non-redistributable components yourself before you can use these methods.
+
+These methods are run by the target 'optional'.
+
+ Feature                Target            Extension               Man page
+ -------                ------            ---------               --------
+ disordered regions     metadisorder      mdisorder               metadisorder(1)
+ subcellular            loctree           loctree{Animal,Plant,   loctree(1)
+  localization           |                 Proka}{,Txt}
+                         tmhmm*           tmhmm
+ protein-DNA            profdisis         disis                   profdisis(1)
+  interaction sites
+ position-specific      psic*             psic, clustalngz        psic(1),
+  independent counts                                              runNewPSIC(1),
+  and its base multi-                                             clustalw(1)
+  ple alignment
+ transmembrane helices  tmhmm             tmhmm                   n.a.
+
+=head2 Resources
+
+ Database                             Cmd line argument
+ --------                             -----------------
+ big (Uniprot+PDB) blast database     --bigblastdb
+ big_80 (big @ 80% sequence identity  --big80blastdb
+   redunancy level) blast database
+ swiss blast database                 --swissblastdb
+ pfam v2 database                     --pfam2db
+ pfam v3 database                     --pfam3db
+ prosite_convert.dat                  --prositeconvdat
+
+=head3 Resources for optional targets
+
+ Database                             Cmd line argument
+ --------                             -----------------
+ big (Uniprot+PDB) blast database     --bigblastdb
+ prosite.dat                          --prositedat
+ Swiss-Prot keyword-to-accession      --spkeyidx
+  'index' for loctree
+
+=head2 Output format
+
+Method outputs are deposited into B<--output-dir>.  Each method has one or more file name extensions associated with it, see the table above.  Refer to the man page of the individual methods for further details.  Extensions ending with `gz' are compressed with gzip(1).
+
+=head1 REFERENCES
+
+=over
+
+=item Rost, B., Yachdav, G., and Liu, J. (2004). The PredictProtein server. Nucleic Acids Res, 32(Web Server issue), W321-6.
+
+=back
+
+In case you find predictprotein and the tools within useful please cite:
+
+* the references for PredictProtein, see above
+
+* the references for the tools you used, see REFERENCES on the man page of the tool
+
+=head1 OPTIONS
+
+=over
+
+=item B<--blast-processors>
+
+Number of processors to use, default = 1
+
+=item B<-c>, B<--num-cpus>
+
+Make jobs, default = 1
+
+=item B<-d>, B<--debug>
+
+=item B<--help>
+
+Print a brief help message and exits.
+
+=item B<-m>, B<--make-file>
+
+make file to use, default = /usr/share/predictprotein/MakefilePP.mk
+
+=item B<--makedebug>
+
+debug argument for make, see make(1)
+
+=item B<--man>
+
+This documentation page
+
+=item B<--method>
+
+Describes method control parameters and requests methods to run when B<--target> is not I<all>. Format example:
+ 
+ --method=profasp,ws=3
+ 
+* begin with the method name, e.g. `profasp'
+
+* list method control parameters, e.g. ws=3
+
+Not all methods support passing control parameters in this way due to their primitive command line interfaces.
+
+=item B<-n>, B<--dryrun>
+
+Do not execute, just shows what is about to be run
+
+=item B<--numresmax>
+
+Maximum sequence length, default: I<6000>. Sequences longer than this will make predictprotein fail with the respective error code, see L<ERRORS>.
+
+=item B<-o>, B<--output-dir>
+
+Final location of outputfiles, required unless caching is used.
+
+=item B<--print-ext-method-map>
+
+Print externsion-to-method map.  Useful as input file for consistency checkers.  Format: <extension><tab><method>.
+
+=item B<--profnumresmin>
+
+Minimum sequence length required by prof, default: I<17>. Sequences shorter than this will make predictprotein fail with the respective error code, see L<ERRORS>.
+
+=item B<--psicexe>
+
+psic wrapper executable, default: /usr/share/rost-runpsic/runNewPSIC.pl
+
+=item B<-p>, B<--prot-name>
+
+Base name of result files and protein name in - for example - FASTA files. Default = `query'.
+
+Valid names are of the character set C<[[:alnum:]._-]>.
+
+=item B<-s>, B<--seq>, B<--sequence>
+
+one letter amino acid sequence input
+
+=item B<--seqfile>
+
+FASTA amino acid sequence file; if `-', standard input is read
+
+=item B<--spkeyidx>
+
+Swiss-Prot keyword-to-identifier 'index' file for loctree
+
+=item B<--target>=I<string>
+
+Method groups to run.  Give this argument for each target you need.  Default: the value of `default_targets' in the configuration file; `all' if that is not given.
+
+Some targets of interest:
+
+=over
+
+=item I<all>
+
+methods that are GPL or redistributable to non-commercial entities
+
+=item I<optional>
+
+methods that do not fit into I<all>
+
+=back
+
+Look at /usr/share/predictprotein/MakefilePP.mk for a list of targets ("Use the source Luke").
+
+=item B<-v>, B<--version>
+
+Print package version
+
+=item B<-w>, B<--work-dir>
+
+Working directory, optional
+
+=back
+
+=head2 Database options
+
+=over
+
+=item B<--bigblastdb>
+
+Path to comprehensive blast database
+
+=item B<--big80blastdb>
+
+Path to comprehensive blast database at 80% sequence identity redundancy level
+
+=item B<--pfam2db>
+
+Pfam v2 database, e.g. F<Pfam_ls>
+
+=item B<--pfam3db>
+
+Pfam v3 database, e.g. F<Pfam-A.hmm>
+
+=item B<--prodomblastdb>
+
+Obsolete.  This argument is kept only to maintain compatibility with older versions.
+
+=item B<--prositedat>
+
+Path to `prosite.dat' file, see L<https://rostlab.org/owiki/index.php/Packages#Resource_definitions>
+
+=item B<--prositeconvdat>
+
+Path to `prosite_convert.dat' file, see L<https://rostlab.org/owiki/index.php/Packages#Resource_definitions>
+
+=item B<--swissblastdb>
+
+Path to SwissProt blast database
+
+=back
+
+=head2 Cache related options
+
+=over
+
+=item B<--acl>, B<--setacl>
+
+Set access control lists.  Access control lists are set I<only> in case results are stored in the cache.  This option is ineffective otherwise.
+All previous ACLs are lost - no merging.  The read bit controls browsability of results. Other bits are not used. E.g.
+
+ u:lkajan:4,u:gyachdav:4,g:lkajan:4,o::0
+
+=item B<--cache-merge>
+
+=item B<--nocache-merge>
+
+Merge/do not merge results into cache.  B<--cache-merge> reuses results already in cache; this turns B<--use-cache> on automatically.  B<--cache-merge> is incompatible with B<--force-cache-store>.
+
+B<--nocache-merge> is the default UNLESS
+
+=over
+
+=item * B<--use-cache> is on and
+
+=item * B<--noforce-cache-store> is in effect and
+
+=item * B<--target> is used and
+
+=item * the cache is not empty
+
+=back
+
+B<--cache-merge> is silently ignored in case the cache is empty.
+
+=item B<--force-cache-store>
+
+=item B<--noforce-cache-store>
+
+Enable/disable forcing storage of results into cache.  Implies B<--use-cache>.  Default: B<--noforce-cache-store>
+
+With B<--noforce-cache-store> when predictprotein finds cached results it simply fetches them from the cache and does no processing (even if the results are incomplete).  With B<--force-cache-store> predictprotein does not fetch anything from the cache but does store the results, completely replacing what was cached.
+
+B<--force-cache-store> is incompatible with B<--cache-merge>.
+
+=item B<--use-cache>
+
+=item B<--nouse-cache>
+
+Use/do not use cache for predictprotein results.  Default: B<--nouse-cache>.
+
+Option `use_cache' may be given in configuration files to override default.
+
+=back
+
+=head1 ERRORS
+
+=over
+
+=item 253
+
+Sequence is too long, see B<--numresmax>
+
+=item 254
+
+Sequence is too short, shorter than minimum length required by prof. See B<--profnumresmin>.
+
+=back
+
+=head1 EXAMPLES
+
+ predictprotein --seqfile /usr/share/predictprotein/example/tquick.fasta --output-dir /tmp/pp 
+
+ predictprotein --seqfile /usr/share/predictprotein/example/tquick.fasta --method=profasp,ws=3 --output-dir /tmp/pp 
+
+=head2 Cache examples
+
+=over
+
+=item Store results in cache, do not care about storing files in B<--output-dir>:
+
+ predictprotein --seqfile /usr/share/predictprotein/example/tquick.fasta --method=profasp,ws=3 --use-cache --setacl g:rostlab:7
+
+=item If not in cache store, otherwise fetch results from cache into B<--output-dir>:
+
+ predictprotein --seqfile /usr/share/predictprotein/example/tquick.fasta --method=profasp,ws=3 --use-cache --setacl g:rostlab:7 --output-dir /tmp/pp
+
+=back
+
+=head1 ENVIRONMENT
+
+=over
+
+=item PREDICTPROTEINCONF
+
+Location of predictproteinrc configuration file to use, overriding other configuration files
+
+=back
+
+=head1 FILES
+
+=over
+
+=item F</usr/share/predictprotein/predictproteinrc.default>
+
+Default configuration file. See this file for a description of the parameters.
+
+=item F</etc/predictproteinrc>
+
+System configuration file overriding values in F</usr/share/predictprotein/predictproteinrc.default>
+
+=item F<~/.predictproteinrc>
+
+User configuration file overriding values in F</etc/predictproteinrc>
+
+=back
+
+=head1 NOTES
+
+=head2 Popularity contest
+
+The pp-popularity-contest package included with this image sets up a cron job
+that will periodically anonymously submit to the Rost Lab developers
+statistics about the most used Rost Lab packages on this system.
+
+This information helps us making decisions such as which packages
+should receive high priority when fixing bugs.
+It also helps us decide which packages should receive funding for further
+development and support.
+This information is also very important when the Rost Lab applies for funding.
+
+Without the funding we receive based on the usage statistics you volunteer
+none of the packages on this image could be made available to you at no cost.
+
+In case you do not wish to participate in the popularity contest please
+remove the pp-popularity-contest package.
+
+=head1 AUTHOR
+
+Burkhard Rost, Antoine de Daruvar, Jinfeng Liu, Guy Yachdav, Laszlo Kajan
+
+=head1 SEE ALSO
+
+ppc_store(1), ppc_fetch(1), ppqsub(1)
+
+=cut
+
+# vim:et:ts=2:ai:


Property changes on: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/bin/predictprotein
___________________________________________________________________
Added: svn:executable
   + *

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/doc/predictprotein/changelog.Debian.gz
===================================================================
(Binary files differ)


Property changes on: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/doc/predictprotein/changelog.Debian.gz
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/doc/predictprotein/changelog.gz
===================================================================
(Binary files differ)


Property changes on: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/doc/predictprotein/changelog.gz
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/doc/predictprotein/copyright
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/doc/predictprotein/copyright	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/doc/predictprotein/copyright	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,28 @@
+Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: predictprotein
+Source: http://www.predictprotein.org/
+
+Files: *
+Copyright: 2010 ROSTLAB
+License: GPL-3.0+
+
+Files: debian/*
+Copyright: 2012 Simon Domke <domkes at in.tum.de>
+License: GPL-3.0+
+
+License: GPL-3.0+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    .
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    .
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+    .
+    On Debian systems, the complete text of the GNU General Public
+    License version 3 can be found in "/usr/share/common-licenses/GPL-3".

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/lintian/overrides/predictprotein
===================================================================
Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/man/man1/predictprotein.1.gz
===================================================================
(Binary files differ)


Property changes on: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/man/man1/predictprotein.1.gz
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/man/man5/prosite_scan.5.gz
===================================================================
(Binary files differ)


Property changes on: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/man/man5/prosite_scan.5.gz
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/MakefilePP.mk
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/MakefilePP.mk	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/MakefilePP.mk	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,467 @@
+#####################################
+#	PREDICTPROTEIN PIPELINE
+#
+#	(c) 2010 Guy Yachdav rostlab
+#	(c) 2010 Laszlo Kajan rostlab
+#####################################
+
+JOBID:=$(basename $(notdir $(INFILE)))
+
+DEBUG:=
+export DESTDIR:=.
+
+# These temporary directories are automatically removed after use
+DISULFINDDIR:=$(shell mktemp -d)
+TMHMMDIR:=$(shell mktemp -d)
+
+BLASTCORES := 1
+PROFNUMRESMIN := 17
+
+# FOLDER LOCATION (CONFIGURABLE)
+PPROOT:=/usr/share/predictprotein
+HELPERAPPSDIR:=$(PPROOT)/helper_apps/
+LIBRGUTILS:=/usr/share/librg-utils-perl/
+PROFROOT:=/usr/share/profphd/prof/
+
+# DATA (CONFIGURABLE)
+BIGBLASTDB:=/mnt/project/rost_db/data/blast/big
+BIG80BLASTDB:=/mnt/project/rost_db/data/blast/big_80
+DBSWISS:=/mnt/project/rost_db/data/swissprot/current/
+PFAM2DB:=/mnt/project/rost_db/data/pfam/Pfam_ls
+PFAM3DB:=/mnt/project/rost_db/data/pfam/Pfam-A.hmm
+PROSITEDAT:=/mnt/project/rost_db/data/prosite/prosite.dat
+PROSITECONVDAT:=/mnt/project/rost_db/data/prosite/prosite_convert.dat
+PSICMAT:=/usr/share/psic/blosum62_psic.txt
+SPKEYIDX:=/mnt/project/rost_db/data/swissprot/keyindex_loctree.txt
+SWISSBLASTDB:=/mnt/project/rost_db/data/blast/swiss
+
+# TOOLS (CONFIGURABLE)
+HMM2PFAMEXE:=hmm2pfam
+HMM3SCANEXE:=hmmscan
+NORSPEXE:=norsp
+PSICEXE:=/usr/share/rost-runpsic/runNewPSIC.pl
+
+# RESULTS FILES
+HSSPFILE:=$(INFILE:%.in=%.hssp)
+HSSP80FILE:=$(INFILE:%.in=%.hssp80)
+HSSPFILTERFILE:=$(INFILE:%.in=%.hsspPsiFil)
+COILSFILE:=$(INFILE:%.in=%.coils)
+COILSRAWFILE:=$(INFILE:%.in=%.coils_raw)
+NLSFILE:=$(INFILE:%.in=%.nls)
+NLSDATFILE:=$(INFILE:%.in=%.nlsDat)
+NLSSUMFILE:=$(INFILE:%.in=%.nlsSum)
+PHDFILE:=$(INFILE:%.in=%.phdPred)
+PHDRDBFILE:=$(INFILE:%.in=%.phdRdb)
+# lkajan: never make PHDNOTHTMFILE a target - its creation depends on whether phd found an HTM region or not: it isn't always created
+PHDNOTHTMFILE:=$(INFILE:%.in=%.phdNotHtm)
+PROFFILE:=$(INFILE:%.in=%.profRdb)
+# prof output generated explicitely from one sequence - NO alignment - Chris Schaefer initiated this - B approved
+PROF1FILE:=$(INFILE:%.in=%.prof1Rdb)
+ASPFILE:=$(INFILE:%.in=%.asp)
+NORSFILE:=$(INFILE:%.in=%.nors)
+NORSSUMFILE:=$(INFILE:%.in=%.sumNors)
+NORSNETFILE:=$(INFILE:%.in=%.norsnet)
+PROSITEFILE:=$(INFILE:%.in=%.prosite)
+SEGFILE:=$(INFILE:%.in=%.segNorm)
+SEGGCGFILE:=$(INFILE:%.in=%.segNormGCG)
+# this file is output from the first blastpgp call with 3 iterations
+BLASTFILE:=$(INFILE:%.in=%.blastPsiOutTmp)
+BLASTCHECKFILE:=$(INFILE:%.in=%.chk)
+# as of 20110415 Guy says the following IS needed by the web interface
+BLASTFILERDB:=$(INFILE:%.in=%.blastPsiRdb)
+# as of 20110415 Guy says the following is NOT needed by the web interface
+BLAST80FILERDB:=$(INFILE:%.in=%.blastPsi80Rdb)
+BLASTPSWISSM8:=$(INFILE:%.in=%.blastpSwissM8)
+BLASTMATFILE:=$(INFILE:%.in=%.blastPsiMat)
+BLASTALIFILE:=$(INFILE:%.in=%.blastPsiAli)
+DISULFINDERFILE:=$(INFILE:%.in=%.disulfinder)
+HMM2PFAM:=$(INFILE:%.in=%.hmm2pfam)
+HMM3PFAM:=$(INFILE:%.in=%.hmm3pfam)
+HMM3PFAMTBL:=$(INFILE:%.in=%.hmm3pfamTbl)
+HMM3PFAMDOMTBL:=$(INFILE:%.in=%.hmm3pfamDomTbl)
+SAFFILE:=$(INFILE:%.in=%.safBlastPsi)
+SAF80FILE:=$(INFILE:%.in=%.safBlastPsi80)
+FASTAFILE:=$(INFILE:%.in=%.fasta)
+GCGFILE:=$(INFILE:%.in=%.seqGCG)
+PROFBVALFILE:=$(INFILE:%.in=%.profbval)
+PROFB4SNAPFILE:=$(INFILE:%.in=%.profb4snap)
+METADISORDERFILE:=$(INFILE:%.in=%.mdisorder)
+PROFTEXTFILE:=$(INFILE:%.in=%.profAscii)
+# profcon is very slow and it is said not to have much effect on md results - so we do not run it
+PROFCONFILE:=$(INFILE:%.in=%.profcon)
+PROFTMBFILE:=$(INFILE:%.in=%.proftmb)
+PROFTMBDATFILE:=$(INFILE:%.in=%.proftmbdat)
+PCCFILE:=$(INFILE:%.in=%.pcc)
+LOCTREEANIMALFILE:=$(INFILE:%.in=%.loctreeAnimal)
+LOCTREEANIMALTXTFILE:=$(INFILE:%.in=%.loctreeAnimalTxt)
+LOCTREEPLANTFILE:=$(INFILE:%.in=%.loctreePlant)
+LOCTREEPLANTTXTFILE:=$(INFILE:%.in=%.loctreePlantTxt)
+LOCTREEPROKAFILE:=$(INFILE:%.in=%.loctreeProka)
+LOCTREEPROKATXTFILE:=$(INFILE:%.in=%.loctreeProkaTxt)
+PSICFILE:=$(INFILE:%.in=%.psic)
+CLUSTALNGZ:=$(INFILE:%.in=%.clustalngz)
+ISISFILE:=$(INFILE:%.in=%.isis)
+DISISFILE:=$(INFILE:%.in=%.disis)
+PPFILE:=$(INFILE:%.in=%.predictprotein)
+TMHMMFILE:=$(INFILE:%.in=%.tmhmm)
+
+DISULFINDERCTRL :=
+NCBISEGCTRL := "NOT APPLICABLE"
+METADISORDERCTRL :=
+NORSNETCTRL := "NOT APPLICABLE"
+NORSPCTRL := --win=70 --secCut=12 --accLen=10
+PREDICTNLSCTRL :=
+PROFCTRL := "NOT APPLICABLE"
+PROFASPCTRL := --ws=5 --z=-1.75 --min=9
+PROFBVALCTRL := "NOT APPLICABLE"
+PROFDISISCTRL :=
+PROFISISCTRL :=
+PROFTMBCTRL :=
+
+# lkajan: This target 'all' does NOT invoke all the methods! It only invokes the 'standard' methods: those that are available through hard Debian dependencies.
+# lkajan: So 'optional' targets are NOT included since these are not guaranteed to work.
+.PHONY: all
+all:  $(FASTAFILE) $(GCGFILE) $(SEGGCGFILE) blast disorder function html hssp interaction ncbi-seg pfam saf sec-struct subcell-loc
+
+.PHONY: blast
+blast: $(BLASTALIFILE) $(BLASTCHECKFILE) $(BLASTFILE) $(BLASTMATFILE) $(BLASTPSWISSM8)
+
+.PHONY: disorder
+disorder: norsnet profbval norsp
+
+.PHONY: function
+function: disulfinder predictnls prosite
+
+.PHONY: hssp
+hssp: $(HSSPFILE) $(HSSP80FILE) $(HSSPFILTERFILE)
+
+# lkajan: this target is for files that are solely needed by the web interface
+.PHONY: html
+html: $(BLASTFILERDB)
+
+.PHONY: interaction
+interaction: profisis
+
+.PHONY: pfam
+pfam: hmm2pfam hmm3pfam
+
+.PHONY: saf
+saf: $(SAFFILE) $(SAF80FILE)
+
+.PHONY: sec-struct
+sec-struct: $(PROFTEXTFILE) coiledcoils phd prof proftmb
+
+.PHONY: subcell-loc
+subcell-loc:
+
+# optional: these targets may not work in case the packages that provide them are missing - these packages are not hard requirements of PP
+#           These packages are usually non-redistributable or have some other problem with them.
+#           loctree depends on SignalP
+#           profdisis depends on svm-light5
+#
+#           psic is non-redistributable
+#           SignalP is non-redistributable
+#           svm-light5 is non-redistributable but there is an exception for the Rost Lab
+#           tmhmm is non-redistributable
+#
+# Optional targets should never appear in other aggregate targets (such as 'interaction').
+.PHONY: optional
+optional: loctree metadisorder profasp psic tmhmm
+
+.PHONY: coiledcoils
+coiledcoils: $(COILSFILE)
+
+.PHONY: loctree
+loctree: $(LOCTREEANIMALFILE) $(LOCTREEANIMALTXTFILE) $(LOCTREEPLANTFILE) $(LOCTREEPLANTTXTFILE) $(LOCTREEPROKAFILE) $(LOCTREEPROKATXTFILE)
+
+.PHONY: psic
+psic: $(PSICFILE) $(CLUSTALNGZ)
+
+# lkajan: rules that make multiple targets HAVE TO be expressed with %
+.SECONDARY: $(LOCTREEANIMALFILE) $(LOCTREEANIMALTXTFILE)
+%.loctreeAnimal %.loctreeAnimalTxt : $(FASTAFILE) $(BLASTPSWISSM8) $(HMM2PFAM) $(HSSPFILTERFILE) $(PROFFILE)
+	loctree --fasta $(FASTAFILE) --loctreeres $(LOCTREEANIMALFILE) --loctreetxt $(LOCTREEANIMALTXTFILE) \
+	  --use-blastall $(BLASTPSWISSM8) --use-blastall-names $(JOBID) --use-pfamres $(HMM2PFAM) --use-pfamres-names $(JOBID) --use-hssp-coll $(HSSPFILTERFILE) --use-rdbprof-coll $(PROFFILE) \
+	  --prosite-dat $(PROSITEDAT) --swissprot-docs-keyindex $(SPKEYIDX) \
+	  --org animal \
+	  $(if $(DEBUG), --debug, )
+
+.SECONDARY: $(LOCTREEPLANTFILE) $(LOCTREEPLANTTXTFILE)
+%.loctreePlant %.loctreePlantTxt : $(FASTAFILE) $(BLASTPSWISSM8) $(HMM2PFAM) $(HSSPFILTERFILE) $(PROFFILE)
+	loctree --fasta $(FASTAFILE) --loctreeres $(LOCTREEPLANTFILE) --loctreetxt $(LOCTREEPLANTTXTFILE) \
+	  --use-blastall $(BLASTPSWISSM8) --use-blastall-names $(JOBID) --use-pfamres $(HMM2PFAM) --use-pfamres-names $(JOBID) --use-hssp-coll $(HSSPFILTERFILE) --use-rdbprof-coll $(PROFFILE) \
+	  --prosite-dat $(PROSITEDAT) --swissprot-docs-keyindex $(SPKEYIDX) \
+	  --org plant \
+	  $(if $(DEBUG), --debug, )
+
+.SECONDARY: $(LOCTREEPROKAFILE) $(LOCTREEPROKATXTFILE)
+%.loctreeProka %.loctreeProkaTxt : $(FASTAFILE) $(BLASTPSWISSM8) $(HMM2PFAM) $(HSSPFILTERFILE) $(PROFFILE)
+	loctree --fasta $(FASTAFILE) --loctreeres $(LOCTREEPROKAFILE) --loctreetxt $(LOCTREEPROKATXTFILE) \
+	  --use-blastall $(BLASTPSWISSM8) --use-blastall-names $(JOBID) --use-pfamres $(HMM2PFAM) --use-pfamres-names $(JOBID) --use-hssp-coll $(HSSPFILTERFILE) --use-rdbprof-coll $(PROFFILE) \
+	  --prosite-dat $(PROSITEDAT) --swissprot-docs-keyindex $(SPKEYIDX) \
+	  --org proka \
+	  $(if $(DEBUG), --debug, )
+
+.SECONDARY: $(PSICFILE) $(CLUSTALNGZ)
+%.psic %.clustalngz : $(FASTAFILE) $(BLASTFILE)
+	# lkajan: Yana's $(PSICEXE) fails when there are no blast hits - catch those conditions. Even in those conditions we have to make the targets, preferably in the expected format (compressed for clustalngz).
+	# lkajan: there's a blast in here - remove its empty error.log
+	trap "rm -f error.log" EXIT; \
+	$(PSICEXE) --use-blastfile $(BLASTFILE) --infile $< $(if $(DEBUG), --debug, ) --quiet --min-seqlen $(PROFNUMRESMIN) --blastdata_uniref $(BIGBLASTDB) --blastpgp_seg_filter F --blastpgp_processors $(BLASTCORES) --psic_matrix $(PSICMAT) --psicfile $(PSICFILE) --save-clustaln '$(CLUSTALNGZ)' --gzip-clustaln; \
+	RETVAL=$$?; \
+	if [ -s error.log ]; then cat error.log >&2; fi; \
+	case "$$RETVAL" in \
+	  253) MSG="blastpgp: No hits found"; echo $$MSG > $(PSICFILE); echo $$MSG | gzip -c > $(CLUSTALNGZ) ;; \
+	  254) MSG="sequence too short"; echo $$MSG > $(PSICFILE); echo $$MSG | gzip -c > $(CLUSTALNGZ) ;; \
+	  *) exit $$RETVAL; ;; \
+	esac
+
+.PHONY: hmm2pfam
+hmm2pfam: $(HMM2PFAM)
+
+$(HMM2PFAM): $(FASTAFILE)
+	$(HMM2PFAMEXE) --cpu $(BLASTCORES) --acc --cut_ga $(PFAM2DB) $< > $@
+
+.SECONDARY: $(HMM3PFAM) $(HMM3PFAMTBL) $(HMM3PFAMDOMTBL)
+%.hmm3pfam %.hmm3pfamTbl %.hmm3pfamDomTbl : $(FASTAFILE)
+	$(HMM3SCANEXE) --cpu $(BLASTCORES) --acc --cut_ga --notextw --tblout $(HMM3PFAMTBL) --domtblout $(HMM3PFAMDOMTBL) -o $(HMM3PFAM) $(PFAM3DB) $<
+
+.PHONY: hmm3pfam
+hmm3pfam: $(HMM3PFAM) $(HMM3PFAMTBL) $(HMM3PFAMDOMTBL)
+
+.PHONY: tmhmm
+tmhmm: $(TMHMMFILE)
+
+$(TMHMMFILE): $(FASTAFILE)
+	trap "rm -rf '$(TMHMMDIR)'" EXIT; tmhmm --workdir=$(TMHMMDIR) --nohtml --noplot $< > $@
+
+.PHONY: proftmb
+proftmb: $(PROFTMBFILE) $(PROFTMBDATFILE)
+
+.SECONDARY: $(PROFTMBFILE) $(PROFTMBDATFILE)
+%.proftmb %.proftmbdat: $(BLASTMATFILE)
+	proftmb -d /usr/share/proftmb -a StateRedux4 -b ReduxDecode4 -m StrandStates -z Swiss_Zcurve -x ZCalibration -n bact.comp -c 4 -s Swiss8.arch -t Swiss8.params -w '$(JOBID)' $(PROFTMBCTRL) -q '$<' --outfile-pretty '$(PROFTMBFILE)' --outfile-tab '' --outfile-dat '$(PROFTMBDATFILE)' --quiet
+
+$(ISISFILE): $(FASTAFILE) $(PROFFILE) $(HSSPFILTERFILE)
+	profisis $(PROFISISCTRL) --fastafile $(FASTAFILE)  --rdbproffile $(PROFFILE) --hsspfile $(HSSPFILTERFILE)  --outfile $@
+
+.PHONY: profisis
+profisis: $(ISISFILE)
+
+$(DISISFILE): $(PROFFILE) $(HSSPFILTERFILE)
+	profdisis $(PROFDISISCTRL) --hsspfile $(HSSPFILTERFILE)  --rdbproffile $(PROFFILE)  --outfile $@
+
+.PHONY: profdisis
+profdisis: $(DISISFILE)
+
+.SECONDARY: $(PROFBVALFILE) $(PROFB4SNAPFILE)
+%.profbval %.profb4snap : $(FASTAFILE) $(PROFFILE) $(HSSPFILTERFILE)
+	profbval $(FASTAFILE) $(PROFFILE) $(HSSPFILTERFILE) $(PROFBVALFILE),$(PROFB4SNAPFILE) 1 5,snap $(DEBUG)
+
+.PHONY: profbval
+profbval: $(PROFBVALFILE) $(PROFB4SNAPFILE)
+
+$(NORSNETFILE): $(FASTAFILE) $(PROFFILE) $(HSSPFILTERFILE) $(PROFBVALFILE)
+	norsnet $(FASTAFILE) $(PROFFILE) $(HSSPFILTERFILE) $@ $(JOBID) $(PROFBVALFILE)
+
+.PHONY: norsnet
+norsnet: $(NORSNETFILE)
+
+$(METADISORDERFILE): $(FASTAFILE) $(PROFFILE) $(PROFBVALFILE) $(NORSNETFILE) $(HSSPFILTERFILE) $(BLASTCHECKFILE)
+	# This line reuses blast files used by other methods as well. On a handful of test cases this and Avner's version (below) gave only a tiny - seemingly insignificant - difference in results.
+	metadisorder $(METADISORDERCTRL) fasta=$(FASTAFILE) prof=$(PROFFILE) profbval_raw=$(PROFBVALFILE) norsnet=$(NORSNETFILE) hssp=$(HSSPFILTERFILE) chk=$(BLASTCHECKFILE) out=$@ out_mode=1
+
+.PHONY: metadisorder
+metadisorder: $(METADISORDERFILE)
+
+$(HSSPFILE): $(SAFFILE)
+	$(LIBRGUTILS)/copf.pl $< formatIn=saf formatOut=hssp fileOut=$@ exeConvertSeq=convert_seq
+
+$(HSSP80FILE): $(SAF80FILE)
+	$(LIBRGUTILS)/copf.pl $< formatIn=saf formatOut=hssp fileOut=$@ exeConvertSeq=convert_seq
+
+$(HSSPFILTERFILE): $(HSSP80FILE)
+	$(LIBRGUTILS)/hssp_filter.pl  red=80 $< fileOut=$@
+
+.PHONY: blastpSwissM8
+blastpSwissM8: $(BLASTPSWISSM8)
+
+$(BLASTPSWISSM8): $(FASTAFILE)
+	# lkajan: we have to switch off filtering (default for blastpgp) or sequences like ASDSADADASDASDASDSADASA fail with
+	# 'WARNING: query: Could not calculate ungapped Karlin-Altschul parameters due to an invalid query sequence or its translation. Please verify the query sequence(s) and/or filtering options'
+	# Does switching off filtering hurt us? Loctree uses the results of this for extracting keywords from swissprot, so I am not worried.
+	# This blast call also often writes 'Selenocysteine (U) at position 59 replaced by X' - we are not really interested. Silence this in non-debug mode.
+	trap "rm -f error.log" EXIT; \
+	if ! ( blastall -F F -a $(BLASTCORES) -p blastp -d $(SWISSBLASTDB) -b 1000 -e 100 -m 8 -i $< -o $@ $(if $(DEBUG), , >>error.log 2>&1) ); then \
+		EXIT=$$?; cat error.log >&2; exit $$EXIT; \
+	fi
+
+.SECONDARY: $(COILSFILE) $(COILSRAWFILE)
+%.coils %.coils_raw : $(FASTAFILE)
+	coils-wrap -m MTIDK -i $< -o $(COILSFILE) -r $(COILSRAWFILE)
+
+$(DISULFINDERFILE): $(BLASTMATFILE)
+	# lkajan: disulfinder now is talkative on STDERR showing progress - silence it when not DEBUG
+	trap "rm -rf '$(DISULFINDDIR)' error.log" EXIT; \
+	if ! ( disulfinder $(DISULFINDERCTRL) -a 1 -p $<  -o $(DISULFINDDIR) -r $(DISULFINDDIR) -F html $(if $(DEBUG), , >>error.log 2>&1) ); then \
+		EXIT=$$?; cat error.log >&2; exit $$EXIT; \
+	else \cp -a $(DISULFINDDIR)/$(notdir $<) $@; fi
+
+.PHONY: disulfinder
+disulfinder: $(DISULFINDERFILE)
+
+.SECONDARY: $(NLSFILE) $(NLSDATFILE) $(NLSSUMFILE)
+%.nls %.nlsDat %.nlsSum : %.fasta
+	predictnls $(PREDICTNLSCTRL) fileIn=$< fileOut=$(NLSFILE) fileSummary=$(NLSSUMFILE) html=1 nlsdat=$(NLSDATFILE)
+
+.PHONY: predictnls
+predictnls: $(NLSFILE) $(NLSDATFILE) $(NLSSUMFILE)
+
+.SECONDARY: $(PHDFILE) $(PHDRDBFILE)
+%.phdPred %.phdRdb : $(HSSPFILTERFILE)
+	$(PROFROOT)embl/phd.pl $(HSSPFILTERFILE) htm exePhd=phd1994 filterHsspMetric=$(PROFROOT)embl/mat/Maxhom_Blosum.metric  exeHtmfil=$(PROFROOT)embl/scr/phd_htmfil.pl \
+	exeHtmtop=$(PROFROOT)embl/scr/phd_htmtop.pl paraSec=$(PROFROOT)embl/para/Para-sec317-may94.com paraAcc=$(PROFROOT)embl/para/Para-exp152x-mar94.com \
+	paraHtm=$(PROFROOT)embl/para/Para-htm69-aug94.com user=phd noPhdHeader \
+	fileOutPhd=$(PHDFILE)  fileOutRdb=$(PHDRDBFILE)  fileNotHtm=$(PHDNOTHTMFILE)  \
+	optDoHtmref=1  optDoHtmtop=1 optHtmisitMin=0.2 exeCopf=$(LIBRGUTILS)/copf.pl \
+	nresPerLineAli=60 exePhd2msf=$(PROFROOT)embl/scr/conv_phd2msf.pl exePhd2dssp=$(PROFROOT)/embl/scr/conv_phd2dssp.pl  exeConvertSeq=convert_seq \
+	exeHsspFilter=filter_hssp doCleanTrace=1
+
+.PHONY: phd
+phd: $(PHDFILE) $(PHDRDBFILE)
+
+$(PROFFILE): $(HSSPFILTERFILE)
+	prof $< both fileRdb=$@ $(if $(DEBUG), 'dbg', ) numresMin=$(PROFNUMRESMIN) nresPerLineAli=60 riSubSec=4 riSubAcc=3 riSubSym=.
+
+$(PROF1FILE): $(FASTAFILE)
+	prof $< both fileRdb=$@ $(if $(DEBUG), 'dbg', ) numresMin=$(PROFNUMRESMIN) nresPerLineAli=60 riSubSec=4 riSubAcc=3 riSubSym=.
+
+.PHONY: prof
+prof: $(PROFFILE) $(PROF1FILE)
+
+$(PROFTEXTFILE): $(PROFFILE)
+	# conv_prof creates query.profAscii.tmp in case query.profAscii already exists - make sure it does not
+	rm -f $(PROFTEXTFILE); $(PROFROOT)scr/conv_prof.pl $< fileOut=$@ ascii nohtml nodet nograph
+
+$(ASPFILE): $(PROFFILE)
+	profasp $(PROFASPCTRL) -in $< -out $@
+
+.PHONY: profasp
+profasp: $(ASPFILE)
+
+# NORSp
+.SECONDARY: $(NORSFILE) $(NORSSUMFILE)
+%.nors %.sumNors : $(FASTAFILE) $(HSSPFILTERFILE) $(PROFFILE) $(PHDRDBFILE) $(COILSFILE)
+	# this call may throw warnings on STDERR (like 'wrong parsing coil file?? ctCoils=0') - silence it when we are not in debug mode
+	$(NORSPEXE) $(NORSPCTRL) -fileSeq $(FASTAFILE) -fileHssp $(HSSPFILTERFILE) \
+	-filePhd $(PROFFILE) -filePhdHtm $(PHDRDBFILE) -fileCoils $(COILSFILE) -o $(NORSFILE) -fileSum $(NORSSUMFILE) -html
+
+.PHONY: norsp
+norsp: $(NORSFILE) $(NORSSUMFILE)
+
+$(PROSITEFILE): $(GCGFILE)
+	$(HELPERAPPSDIR)prosite_scan.pl -h $(PROSITECONVDAT) $< >> $@
+
+.PHONY: prosite
+prosite: $(PROSITEFILE)
+
+$(SEGFILE): $(FASTAFILE)
+	ncbi-seg $< -x > $@
+
+# lkajan: legacy name for ncbi-seg
+.PHONY: lowcompseg
+lowcompseg: ncbi-seg
+
+.PHONY: ncbi-seg
+ncbi-seg: $(SEGFILE)
+
+$(SEGGCGFILE): $(SEGFILE)
+	$(LIBRGUTILS)/copf.pl $< formatOut=gcg fileOut=$@
+
+.SECONDARY: $(BLASTFILE) $(BLASTCHECKFILE) $(BLASTMATFILE)
+%.blastPsiOutTmp %.chk %.blastPsiMat : $(FASTAFILE)
+	# blast call may throw warnings on STDERR - silence it when we are not in debug mode; blastpgp and blastall create a normally 0-sized 'error.log' - remove it
+	trap "rm -f error.log" EXIT; \
+	if ! ( blastpgp -F F -a $(BLASTCORES) -j 3 -b 3000 -e 1 -h 1e-3 -d $(BIG80BLASTDB) -i $< -o $(BLASTFILE) -C $(BLASTCHECKFILE) -Q $(BLASTMATFILE) $(if $(DEBUG), , >>error.log 2>&1) ); then \
+		EXIT=$$?; cat error.log >&2; exit $$EXIT; \
+	fi
+
+$(BLASTALIFILE): $(BLASTCHECKFILE) $(FASTAFILE)
+	# blast call may throw warnings on STDERR - silence it when we are not in debug mode
+	trap "rm -f error.log" EXIT; \
+	if ! ( blastpgp -F F -a $(BLASTCORES) -b 1000 -e 1 -d $(BIGBLASTDB) -i $(FASTAFILE) -o $@ -R $(BLASTCHECKFILE) $(if $(DEBUG), , >>error.log 2>&1) ); then \
+		EXIT=$$?; cat error.log >&2; exit $$EXIT; \
+	fi
+
+.SECONDARY: $(SAFFILE) $(BLASTFILERDB)
+%.safBlastPsi %.blastPsiRdb : $(BLASTALIFILE)  $(FASTAFILE)
+	$(LIBRGUTILS)/blastpgp_to_saf.pl fileInBlast=$< fileInQuery=$(FASTAFILE)  fileOutRdb=$(BLASTFILERDB) fileOutSaf=$(SAFFILE) red=100 maxAli=3000 tile=0
+
+.SECONDARY: $(SAF80FILE) $(BLAST80FILERDB)
+%.safBlastPsi80 %.blastPsi80Rdb : $(BLASTFILE)  $(FASTAFILE)
+	$(LIBRGUTILS)/blastpgp_to_saf.pl fileInBlast=$< fileInQuery=$(FASTAFILE)  fileOutRdb=$(BLAST80FILERDB) fileOutSaf=$(SAF80FILE) red=100 maxAli=3000 tile=0
+
+$(FASTAFILE): $(INFILE)
+	$(LIBRGUTILS)/copf.pl $< formatIn=fasta formatOut=fasta fileOut=$@ exeConvertSeq=convert_seq
+
+$(GCGFILE): $(INFILE)
+	$(LIBRGUTILS)/copf.pl $< formatIn=fasta formatOut=gcg fileOut=$@ exeConvertSeq=convert_seq
+
+$(DISULFINDDIR):
+	mkdir -p $@
+
+.PHONY: install
+install:
+	for f in \
+		$(ASPFILE) \
+		$(BLASTFILE) $(BLASTALIFILE) $(BLASTMATFILE) $(BLASTFILERDB) $(BLAST80FILERDB) $(BLASTCHECKFILE) \
+		$(BLASTPSWISSM8) \
+		$(COILSFILE) $(COILSRAWFILE) \
+		$(DISISFILE) \
+		$(DISULFINDERFILE) \
+		$(FASTAFILE) \
+		$(HMM2PFAM) $(HMM3PFAM) $(HMM3PFAMTBL) $(HMM3PFAMDOMTBL) \
+		$(HSSPFILE) $(HSSP80FILE) $(HSSPFILTERFILE) \
+		$(INFILE) \
+		$(ISISFILE) \
+		$(LOCTREEANIMALFILE) $(LOCTREEANIMALTXTFILE) $(LOCTREEPLANTFILE) $(LOCTREEPLANTTXTFILE) $(LOCTREEPROKAFILE) $(LOCTREEPROKATXTFILE) \
+		$(METADISORDERFILE) \
+		$(NLSFILE) $(NLSSUMFILE) \
+		$(NORSFILE) $(NORSSUMFILE) \
+		$(NORSNETFILE) \
+		$(PHDFILE) $(PHDNOTHTMFILE) $(PHDRDBFILE) \
+		$(PROFTEXTFILE) $(PROFFILE) $(PROF1FILE) \
+		$(PROFBVALFILE) \
+		$(PROFTMBFILE) $(PROFTMBDATFILE) \
+		$(PROSITEFILE) \
+		$(PSICFILE) $(CLUSTALNGZ) \
+		$(SAFFILE) $(SAF80FILE) \
+		$(SEGFILE) $(SEGGCGFILE) \
+		$(GCGFILE) \
+		$(TMHMMFILE) \
+	; do if [ -e $$f ]; then cp -a $$f "$$DESTDIR/" ; fi; done
+
+.PHONY: help
+help:
+	@echo "Targets:"
+	@echo "all - default"
+	@echo "install - copy results to DESTDIR"
+	@echo
+	@echo "disorder - run disorder predictors"
+	@echo "function - function prediction"
+	@echo "interaction - run binding site predictors"
+	@echo "pfam, hmm2pfam, hmm3pfam"
+	@echo "profbval"
+	@echo "psic"
+	@echo "sec-struct - secondary structure prediction"
+	@echo
+	@echo "optional - optional targets available when respective packages are"
+	@echo "  installed"
+	@echo
+	@echo "help - this message"
+	@echo
+	@echo "Variables:"
+	@echo "BLASTCORES - default: 1"
+	@echo "DESTDIR - result files are copied here on 'install'"
+
+# vim:ai:

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/example/cad23.fasta
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/example/cad23.fasta	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/example/cad23.fasta	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,69 @@
+>cad23
+MGRHVATSCHVAWLLVLISGCWGQVNRLPFFTNHFFDTYLLISEDTPVGS
+SVTQLLAQDMDNDPLVFGVSGEEASRFFAVEPDTGVVWLRQPLDRETKSE
+FTVEFSVSDHQGVITRKVNIQVGDVNDNAPTFHNQPYSVRIPENTPVGTP
+IFIVNATDPDLGAGGSVLYSFQPPSQFFAIDSARGIVTVIRELDYETTQA
+YQLTVNATDQDKTRPLSTLANLAIIITDVQDMDPIFINLPYSTNIYEHSP
+PGTTVRIITAIDQDKGRPRGIGYTIVSGNTNSIFALDYISGVLTLNGLLD
+RENPLYSHGFILTVKGTELNDDRTPSDATVTTTFNILVIDINDNAPEFNS
+SEYSVAITELAQVGFALPLFIQVVDKDENLGLNSMFEVYLVGNNSHHFII
+SPTSVQGKADIRIRVAIPLDYETVDRYDFDLFANESVPDHVGYAKVKITL
+INENDNRPIFSQPLYNISLYENVTVGTSVLTVLATDNDAGTFGEVSYFFS
+DDPDRFSLDKDTGLIMLIARLDYELIQRFTLTIIARDGGGEETTGRVRIN
+VLDVNDNVPTFQKDAYVGALRENEPSVTQLVRLRATDEDSPPNNQITYSI
+VSASAFGSYFDISLYEGYGVISVSRPLDYEQISNGLIYLTVMAMDAGNPP
+LNSTVPVTIEVFDENDNPPTFSKPAYFVSVVENIMAGATVLFLNATDLDR
+SREYGQESIIYSLEGSTQFRINARSGEITTTSLLDRETKSEYILIVRAVD
+GGVGHNQKTGIATVNITLLDINDNHPTWKDAPYYINLVEMTPPDSDVTTV
+VAVDPDLGENGTLVYSIQPPNKFYSLNSTTGKIRTTHAMLDRENPDPHEA
+ELMRKIVVSVTDCGRPPLKATSSATVFVNLLDLNDNDPTFQNLPFVAEVL
+EGIPAGVSIYQVVAIDLDEGLNGLVSYRMPVGMPRMDFLINSSSGVVVTT
+TELDRERIAEYQLRVVASDAGTPTKSSTSTLTIHVLDVNDETPTFFPAVY
+NVSVSEDVPREFRVVWLNCTDNDVGLNAELSYFITGGNVDGKFSVGYRDA
+VVRTVVGLDRETTAAYMLILEAIDNGPVGKRHTGTATVFVTVLDVNDNRP
+IFLQSSYEASVPEDIPEGHSILQLKATDADEGEFGRVWYRILHGNHGNNF
+RIHVSNGLLMRGPRPLDRERNSSHVLIVEAYNHDLGPMRSSVRVIVYVED
+INDEAPVFTQQQYSRLGLRETAGIGTSVIVVQATDRDSGDGGLVNYRILS
+GAEGKFEIDESTGLIITVNYLDYETKTSYMMNVSATDQAPPFNQGFCSVY
+ITLLNELDEAVQFSNASYEAAILENLALGTEIVRVQAYSIDNLNQITYRF
+DAYTSTQAKALFKIDAITGVITVQGLVDREKGDFYTLTVVADDGGPKVDS
+TVKVYITVLDENDNSPRFDFTSDSAVSIPEDCPVGQRVATVKAWDPDAGS
+NGQVVFSLASGNIAGAFEIVTTNDSIGEVFVARPLDREELDHYILQVVAS
+DRGTPPRKKDHILQVTILDINDNPPVIESPFGYNVSVNENVGGGTAVVQV
+RATDRDIGINSVLSYYITEGNKDMTFRMDRISGEIATRPAPPDRERQSFY
+HLVATVEDEGTPTLSATTHVYVTIVDENDNAPMFQQPHYEVLLDEGPDTL
+NTSLITIQALDLDEGPNGTVTYAIVAGNIVNTFRIDRHMGVITAAKELDY
+EISHGRYTLIVTATDQCPILSHRLTSTTTVLVNVNDINDNVPTFPRDYEG
+PFEVTEGQPGPRVWTFLAHDRDSGPNGQVEYSIMDGDPLGEFVISPVEGV
+LRVRKDVELDRETIAFYNLTICARDRGMPPLSSTMLVGIRVLDINDNDPV
+LLNLPMNITISENSPVSSFVAHVLASDADSGCNARLTFNITAGNRERAFF
+INATTGIVTVNRPLDRERIPEYKLTISVKDNPENPRIARRDYDLLLIFLS
+DENDNHPLFTKSTYQAEVMENSPAGTPLTVLNGPILALDADQDIYAVVTY
+QLLGAQSGLFDINSSTGVVTVRSGVIIDREAFSPPILELLLLAEDIGLLN
+STAHLLITILDDNDNRPTFSPATLTVHLLENCPPGFSVLQVTATDEDSGL
+NGELVYRIEAGAQDRFLIHLVTGVIRVGNATIDREEQESYRLTVVATDRG
+TVPLSGTAIVTILIDDINDSRPEFLNPIQTVSVLESAEPGTVIANITAID
+HDLNPKLEYHIVGIVAKDDTDRLVPNQEDAFAVNINTGSVMVKSPMNREL
+VATYEVTLSVIDNASDLPERSVSVPNAKLTVNVLDVNDNTPQFKPFGITY
+YMERILEGATPGTTLIAVAAVDPDKGLNGLVTYTLLDLVPPGYVQLEDSS
+AGKVIANRTVDYEEVHWLNFTVRASDNGSPPRAAEIPVYLEIVDINDNNP
+IFDQPSYQEAVFEDVPVGTIILTVTATDADSGNFALIEYSLGDGESKFAI
+NPTTGDIYVLSSLDREKKDHYILTALAKDNPGDVASNRRENSVQVVIQVL
+DVNDCRPQFSKPQFSTSVYENEPAGTSVITMMATDQDEGPNGELTYSLEG
+PGVEAFHVDMDSGLVTTQRPLQSYEKFSLTVVATDGGEPPLWGTTMLLVE
+VIDVNDNRPVFVRPPNGTILHIREEIPLRSNVYEVYATDKDEGLNGAVRY
+SFLKTAGNRDWEFFIIDPISGLIQTAQRLDRESQAVYSLILVASDLGQPV
+PYETMQPLQVALEDIDDNEPLFVRPPKGSPQYQLLTVPEHSPRGTLVGNV
+TGAVDADEGPNAIVYYFIAAGNEEKNFHLQPDGCLLVLRDLDREREAIFS
+FIVKASSNRSWTPPRGPSPTLDLVADLTLQEVRVVLEDINDQPPRFTKAE
+YTAGVATDAKVGSELIQVLALDADIGNNSLVFYSILAIHYFRALANDSED
+VGQVFTMGSMDGILRTFDLFMAYSPGYFVVDIVARDLAGHNDTAIIGIYI
+LRDDQRVKIVINEIPDRVRGFEEEFIHLLSNITGAIVNTDNVQFHVDKKG
+RVNFAQTELLIHVVNRDTNRILDVDRVIQMIDENKEQLRNLFRNYNVLDV
+QPAISVRLPDDMSALQMAIIVLAILLFLAAMLFVLMNWYYRTVHKRKLKA
+IVAGSAGNRGFIDIMDMPNTNKYSFDGANPVWLDPFCRNLELAAQAEHED
+DLPENLSEIADLWNSPTRTHGTFGREPAAVKPDDDRYLRAAIQEYDNIAK
+LGQIIREGPIKGSLLKVVLEDYLRLKKLFAQRMVQKASSCHSSISELIQT
+ELDEEPGDHSPGQGSLRFRHKPPVELKGPDGIHVVHGSTGTLLATDLNSL
+PEEDQKGLGRSLETLTAAEATAFERNARTESAKSTPLHKLRDVIMETPLE
+ITEL

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/example/p53.fasta
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/example/p53.fasta	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/example/p53.fasta	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,9 @@
+>p53
+MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDI
+EQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQ
+KTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDST
+PPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGN
+LRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRP
+ILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELP
+PGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALEL
+KDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/example/tquick.fasta
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/example/tquick.fasta	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/example/tquick.fasta	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,2 @@
+>tquick fast test sequence (P08305)
+MSAQISDSIEEKRGFFTRWFMSTNHKDIGVLYLF

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/example/tsnap_query.fasta
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/example/tsnap_query.fasta	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/example/tsnap_query.fasta	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,7 @@
+>tsnap
+MVNSTHRGMHTSLHLWNRSSYRLHSNASESLGKGYSDGGCYEQLFVSPEVFVTLGVISLL
+ENILVIVAIAKNKNLHSPMYFFICSLAVADMLVSVSNGSETIVITLLNSTDTDAQSFTVN
+IDNVIDSVICSSLLASICSLLSIAVDRYFTIFYALQYHNIMTVKRVGIIISCIWAACTVS
+GILFIIYSDSSAVIICLITMFFTMLALMASLYVHMFLMARLHIKRIAVLPGTGAIRQGAN
+MKGAITLTILIGVFVVCWAPFFLHLIFYISCPQNPYCVCFMSHFNLYLILIMCNSIIDPL
+IYALRSQELRKTFKEIICCYPLGGLCDLSSRY

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/blast2html.pl
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/blast2html.pl	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/blast2html.pl	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,275 @@
+#!/usr/bin/perl
+
+#----------------------------------------------------------------------
+# blastPsi_rdb4pp
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# usage: 	blastPsi_rdb4pp.pl file_rdb
+#
+# task:	        convert PsiBlast-rdb file to ascii and html format for PP
+# 		
+# subroutines   rd_rdb
+#
+#----------------------------------------------------------------------#
+#	Burkhard Rost		       November,        1995           #
+#			changed:       .	,    	1995           #
+#	EMBL				Version 0.1                    #
+#	Meyerhofstrasse 1                                              #
+#	D-69117 Heidelberg		(rost at EMBL-Heidelberg.DE)      #
+#----------------------------------------------------------------------#
+
+local($file_in,$file_out,$fhout,$sep,
+      @des,%ptr_form,%rd,$Lok,$des,$tmp_form,$tmp_sep,$tmp,$ct,
+      $tmp_id, $remain_len, $tmp_len, $field_width);
+      
+$[ =1 ;				# sets array count to start at 1, not at 0
+
+if ($#ARGV<1){			# error if insufficient input argument
+    print "Usage: $0 file_blast_rdb file_out html path_to_split_swiss\n"; 
+#	"*** input ERROR, call with 'blastPsi_rdb4pp.pl file_rdb' (2nd arg = output) or\n",
+#	"'blastPsi_rdb4pp.pl file_rdb html url_srs'\n";
+    exit;}
+				# input
+$file_in=$ARGV[1];
+if ($#ARGV <2){
+    $file_out=$file_in;$file_out=~s/^.*\///g;$file_out.="_header";}
+else {
+    $file_out=$ARGV[2];}
+
+$isHtml = 0;
+if ( $#ARGV > 2 ) {
+    if ( $ARGV[3] eq 'html' ) {
+	$isHtml = 1;
+	$urlPDB = "http://srs6.ebi.ac.uk/srs6bin/cgi-bin/wgetz"; # HARD-coded
+	$urlUNIPROT = "http://www.uniprot.org/uniprot"; # HARD-coded
+    }
+}
+
+
+				# defaults
+$dirSwiss = $ARGV[4] || '/mnt/project/rost_db/data/swissprot/current/';
+
+$fhout="FHOUT_BLASTPSI_RDB";
+$sep= " ";			# separater for output (between columns)
+				# desired column names
+ at des=("ID","LSEQ2","IDE","SIM","LALI","LGAP","BSCORE","BEXPECT","OMIM","PROTEIN");
+				# perl printf formats
+$ptr_form{"ID"}="%-27s";$ptr_form{"LSEQ2"}="%5d";$ptr_form{"IDE"}="%4d";$ptr_form{"SIM"}="%4d";
+$ptr_form{"LALI"}="%4d";$ptr_form{"LGAP"}="%4d";$ptr_form{"BSCORE"}="%6d";
+$ptr_form{"BEXPECT"}="%7s";$ptr_form{"OMIM"}= "%-7s";$ptr_form{"PROTEIN"}= "%-25s";
+				# --------------------------------------------------
+				# call reader
+				# --------------------------------------------------
+%rd=&rd_rdb($file_in);
+				# --------------------------------------------------
+				# write output file
+				# --------------------------------------------------
+$Lok=1;
+open ("$fhout", ">$file_out") || 
+    (do {warn "*** blastPsi_rdb4pp: Can't create new file: $file_out\n"; 
+	 $Lok=0;
+     });
+if (!$Lok){
+    exit;}
+print $fhout "<pre>";
+                               # write notation into header
+print $fhout 
+    "--- ------------------------------------------------------------\n",
+    "--- PSI-BLAST multiple sequence alignment\n",
+    "--- ------------------------------------------------------------\n",
+    "--- \n",
+    "--- PSI-BLAST ALIGNMENT HEADER: ABBREVIATIONS FOR SUMMARY\n",
+    "--- SEQLENGTH    : $rd{'len1'}\n",
+    "--- ID           : identifier of aligned (homologous) protein\n",
+    "--- LSEQ2        : length of aligned sequence\n",
+    "--- IDE          : percentage of pairwise sequence identity\n",
+    "--- SIM          : percentage of similarity\n",
+    "--- LALI         : number of residues aligned\n",
+    "--- LGAP         : number of residues in all indels\n",
+    "--- BSCORE       : blast score (bits)\n",
+    "--- BEXPECT      : blast expectation value\n",
+    "--- OMIM         : OMIM (Online Mendelian Inheritance in Man) ID\n",
+    "--- PROTEIN      : one-line description of aligned protein\n",
+    "--- '!'          : indicates lower scoring alignment that is combined\n",
+    "---                with the higher scoring adjacent one\n",
+    "--- \n",
+    "--- PSI-BLAST ALIGNMENT HEADER: SUMMARY\n\n";
+
+
+foreach $des (@des){		# print $fhout descriptors
+    next if ( $des eq "OMIM" and ! $hasOmim);
+    $tmp_form=$ptr_form{"$des"};
+    $tmp_form=~s/d|\.\d+f/s/g;
+    if ($des eq $des[$#des]) {
+	$tmp_sep="\n";}
+    else {
+	$tmp_sep="$sep";}
+    printf $fhout "$tmp_form$tmp_sep",$des; }
+
+foreach $ct (1..$rd{"NROWS"}){	# print data
+    foreach $des (@des){
+	next if ( $des eq "OMIM" and ! $hasOmim);
+	if (! defined $rd{"$ct","$des"}) {
+	    next;}
+	$tmp_form=$ptr_form{"$des"};
+	if ($des eq "PROTEIN"){
+	    $tmp=substr($rd{"$ct","$des"},1,25);
+	    $tmp_sep="\n"; }
+	else {
+	    if ($des =~ /IDE|SIM/){
+		$tmp=int($rd{"$ct","$des"}); }
+	    else {
+		$tmp=$rd{"$ct","$des"}; }
+	    $tmp_sep=$sep;}
+	if ( $isHtml and $des =~ /^(ID|OMIM)$/ ) {
+	    $tmp = "";
+	    $tmp_id = $rd{"$ct","$des"};
+	    $tmp_len = length ( $tmp_id );
+	    $short_id = $tmp_id;
+	    $short_id =~ s/^.*\|//g;
+	    $short_id =~ s/\!//g;
+	    $tmp = "";
+	    if ( $des eq "ID" ) {
+		if ( $tmp_id =~ /^(tr|pdb|swiss)\|(\w+)\|(\w+)/ ) {
+		    $pdb_id=$2; $up_id=$3;
+
+		    $database = uc($1);
+		    $database =~ s/\!//g;
+		    if ($database  =~ "SWISS" || $database =~ "TR"){		    
+			$database="SWISSPROT";
+			$tmp = "<A HREF='$urlUNIPROT/$up_id'";
+		    }
+		    elsif ($database eq "PDB"){
+			$short_id =~ s/\_\w+$//;
+			$tmp = "<a href='http://www.pdb.org/pdb/explore/explore.do?structureId=$pdb_id'";
+		    }
+		} else {
+		    $database = "SWISSPROT";
+		}
+		
+	    } elsif ( $des eq "OMIM" ) {
+		$tmp .= "<a href='http://srs.ebi.ac.uk/srs6bin/cgi-bin/wgetz?-e+[omim-ID:$short_id]'";
+	    }
+	    
+	    $tmp .= " TARGET=\"_blank\">$tmp_id"."</A>";
+	    if ( $tmp_form =~ /\D+(\d+)\D+/ ) {
+		$field_width = $1;
+	    }
+	    $remain_len = $field_width - $tmp_len + 1;
+		if ( $tmp_id ) {
+		    $tmp .= " "x$remain_len;
+		} else {
+		    $tmp = " "x$remain_len;
+		}
+		print $fhout $tmp;
+	    } else {
+		printf $fhout "$tmp_form$tmp_sep",$tmp;
+	    }
+
+    }
+#    printf $fhout "</BR>";
+}
+print $fhout
+    "---\n",
+    "--- PSI-BLAST ALIGNMENT \n\n";
+print $fhout "</pre>";
+close($fhout); 
+exit;
+
+#==========================================================================================
+sub rd_rdb {
+    local ($file_rdb) = @_ ;
+    local (@des1,$fhin,
+	   %rd, at tmp,$tmp,$ct,$id,$idOmim,$ctDes);
+    $[ =1 ;
+#--------------------------------------------------------------------------------
+#    rd_rdb             reads the PSI-BLAST RDB file 
+#--------------------------------------------------------------------------------
+				# defaults
+    $fhin="FHIN_RDB";
+    return(0)                   if ( ! -e $file_rdb);	# check existence
+
+				# read file
+    open ("$fhin", "$file_rdb") || 
+	(do {warn "*** rd_rdb ($0): Cannot read file: $file_rdb\n"; });
+
+    while ( <$fhin> ) {		# length
+	if ($_=~/^\#\#ID/) {
+	    s/\#//g;
+	    s/^\s+|\s+$//g;
+	    s/\%//g;
+	    @des1 = split/\t/;
+	    $ctDes = scalar(@des1);
+	    last;
+	}
+	if ($_=~/^\#SEQLENGTH/) {
+	    $_=~s/\n|\s|SEQLENGTH|\#//g;
+	    $rd{"len1"}=$_; } 
+    }
+
+    $hasOmim = 0;		# GLOBAL variable!
+    $ct=0;
+    while ( <$fhin> ) { 
+	next if ( $_ !~ /\w+/ ); # skip empty lines if any
+	s/^\s+|\s+$//g;
+	s/\%//g;
+	@tmp=split /\t/; 
+	#return (0) if ( scalar(@tmp) != $ctDes );
+	$ct++;
+
+	for $i (1..$ctDes) {
+	    if ( ! defined $tmp[$i] ) {
+		$tmp[$i] = " ";
+	    }
+	    $rd{"$ct","$des1[$i]"}=$tmp[$i];
+	}
+
+	$id = $rd{"$ct","ID"}; 
+	if ( $id =~ /^swiss/ ) { # only get OMIM from SWISSPROT
+	    $idOmim = &getOmim($id);
+	} else {
+	    $idOmim = "";
+	}
+	$rd{"$ct","OMIM"}= $idOmim;
+	$hasOmim = 1 if ( $idOmim );  
+    }
+    close($fhin);
+    $rd{"NROWS"}=$ct;
+    return(%rd);
+}				# end of rd_rdb
+
+
+sub getOmim {
+    local ( $id ) = @_;
+    local ( $subDir, $fileSwiss, $idOmim );
+
+    $id =~ s/^.*\|//g;
+    $id =lc($id);
+    if ( $id =~ /[1-9a-zA-Z]+_(\w)/ ) {
+	$subDir = $1.'/';
+    } else {
+	$subDir = '';
+    }
+    $fileSwiss = $dirSwiss.$subDir.$id;
+    return "" if ( ! -e $fileSwiss );
+    open ( SWISS, $fileSwiss ) or return "";
+    $idOmim = "";
+    while ( <SWISS> ) {
+	if ( /^DR\s+MIM\;\s*(\d+)\D+/ ) {
+	    $idOmim = $1;
+	    last;
+	}
+	last if ( /^SQ/ );
+    }
+    return $idOmim;
+}
+
+
+
+
+
+
+
+
+
+


Property changes on: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/blast2html.pl
___________________________________________________________________
Added: svn:executable
   + *

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/filter_blastp_big.pl
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/filter_blastp_big.pl	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/filter_blastp_big.pl	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,266 @@
+#!/usr/bin/perl -w
+
+$scrName=$0;$scrName=~s/^.*\/|\.pl//g;
+$scrGoal="extracts list of proteins from BLAST output";
+
+$[ =1 ;				# start counting at one
+
+				# ------------------------------
+				# defaults
+$p_value_out=       10;
+$dbDef=             "swiss"; 
+$dbDef=             "big";   
+$dirOut=0;
+$extOut=".blast_list";		# note: for Maxhom must end with list ..
+
+#my $ppData;
+
+				# ------------------------------
+				# help
+if ($#ARGV < 1 || $ARGV[1]=~/^(help|\-h|\-m)$/){
+    print "goal: $scrGoal\n";
+    print "use:  $scrName file <options>\n";
+    print "opt:  p=Pvalue_cutoff  (def=$p_value_out)\n";
+    print "      db=<swiss|big>   (def=$dbDef)\n";
+#    print "      ppData=/path\n";
+    print "      dir=directory    directory with db split into many FASTA|SWISS-PROT\n";
+    print "      fileOut=name_of_output_file (def=$fileIn - extension + .blastLis\n";
+    print "      dirOut           directory for output file\n";
+    print "      extOut           extension for output file  (def=$extOut)\n";
+    print "      dbg              write messages, keep files\n";
+    print "      ----           --------\n";
+    print "      NOTE: print to standard out if no fileOut=x defined!!\n";
+    print "      ----           --------\n";
+    print "      \n";
+#    print "      \n";
+    exit;}
+    
+
+$dir_db=$dbWant=$fileOut=$Ldebug=$Lverb=0;
+				# ------------------------------
+				# read command line
+$fileIn=$ARGV[1];
+foreach $arg (@ARGV){
+    next if ($arg eq $ARGV[1]);
+    if    ($arg=~/^fileOut=(.*)$/)        { $fileOut=        $1;}
+    elsif ($arg=~/^dirOut=(.*)$/)         { $dirOut=         $1; 
+					    $dirOut.="/"     if ($dirOut !~/\/$/);}
+    elsif ($arg=~/^de?bu?g$/)             { $Ldebug=         1;}
+    elsif ($arg=~/^verb(ose)?$|^\-v$/)    { $Lverb=          1;}
+#    elsif ($arg=~/^ppData=(.*)$/o)        { $ppData =        $1;}
+    elsif ($arg=~/^silent$|^\-s$/)        { $Lverb=          0;}
+    elsif ($arg=~/^p=(.*)$/)              { $p_value_out=    $1;}
+    elsif ($arg=~/^db=(.*)$/)             { $dbWant=             $1; $dbWant = lc( $dbWant ); }
+    elsif ($arg=~/^dir=(.*)$/)            { $dir_db=         $1;}
+#    elsif ($arg=~/^=(.*)$/){ $=$1;}
+	else
+	{
+		print "*** $0: wrong command line arg=$arg!\n";
+		exit(1);
+	}
+}
+
+#if( !$ppData ){ die("no ppData"); }
+#$dir_db_swiss=      "$ppData/swissprot/current/";
+#$dir_db_big=        "$ppData/derived/big/";
+#$dir_db_big_pdb=    $dir_db_big."splitPdb/";
+#$dir_db_big_swiss=  $dir_db_big."splitSwiss/";
+#$dir_db_big_trembl= $dir_db_big."splitTrembl/";
+
+$dir_dbDef=$dir_db_swiss;
+$dir_dbDef=$dir_db_big             if ($dbDef eq "big");
+
+
+if    (! $dbWant && ! $dir_db) {	# both NOT passed
+    $dir_db=$dir_dbDef;
+    $dir_db=$dir_db_big         if ($dbWant eq "big");
+    $dir_db=$dir_db_big         if ($dbWant eq "pdb");
+    $dir_db=$dir_db_big         if ($dbWant eq "trembl");
+    $dbWant=    $dbDef;
+    $dbWant=$dbDef;}
+elsif (! $dbWant) {			# db NOT passed
+    $dbWant=$dbDef;
+} elsif (! $dir_db) {		# dir NOT passed, but db
+die( $dbWant);
+    $dir_db=$dir_db_swiss;
+    $dir_db=$dir_db_big         if ($dbWant eq "big"); 
+    $dir_db=$dir_db_big         if ($dbWant eq "pdb");
+    $dir_db=$dir_db_big         if ($dbWant eq "trembl");
+}
+
+$dir_db.="/"                    if ($dir_db !~/\/$/); # add slash
+
+				# automaticly name output file
+$fileOut=$dirOut.$fileOut       if ($dirOut && $fileOut); 
+
+				# ------------------------------
+				# check input
+die "*** unrecognised db=$dbWant (must be <big|swiss>)\n" if ($dbWant !~/^(big|swiss|pdb|trembl)/);
+die "*** non-existing dir=$dir_db!\n"                 if (! -d $dir_db);
+die "*** missing input file:$fileIn!\n"               if (! -e $fileIn);
+
+				# ------------------------------
+				# now read
+				# ------------------------------
+open(FHIN,$fileIn) || die "*** $0: failed opening fileIn=$fileIn!\n";
+
+				# skip before section with summary
+$LoldVersion=1;
+
+while (<FHIN>){
+    $LoldVersion=0              if ($LoldVersion && $_=~/BLASTP [2-9]/);
+    last if ($_=~/^Sequences producing .*:/);
+}
+$ctFound=0;
+
+undef %already;
+$#found=0;
+
+while (<FHIN>){
+    $line=$_; $line=~s/\n//g;
+    last if ( $line=~ /^Parameters\:/ );
+
+#    next if ($line=~/^\S/);
+    if ($line=~/^\s*>/) {
+	last if ($LoldVersion);
+	next;}
+
+				# <*** <*** <*** <*** <*** <*** <*** <*** <*** 
+				# none (beg)
+    if ($line =~ /\*+\s+NONE\s+\*+/) {
+	print "none\n";
+	close($fhout)           if ($fhout ne "STDOUT");
+#	exit(0,"none"); 
+	exit(0);
+    }
+				# none (end)
+				# <*** <*** <*** <*** <*** <*** <*** <*** <*** 
+    next if ($line=~/^\s*$/);
+
+    if (! $LoldVersion && $line=~/^Sequences producing .*:/) {
+	$#found=0;
+	undef %already;}
+	
+    next if (! $LoldVersion && 
+	     ($line=~/^Query/     ||
+	      $line=~/^Sbjct/     ||
+	      $line=~/^Searching/ ||
+	      $line=~/^Results/   ||
+	      $line=~/^Sequences /) );
+
+
+# swiss|Q8HXX3|MC4R_MACFA Melanocortin receptor 4 OS=Macaca fascic...   625   e-179
+# swiss|Q0H8Y4|COX1_USTMA Cytochrome c oxidase subunit 1 OS=Ustila...    29   9.2
+# pdb|1crn PLANTSEEDPROTEIN CRAMBIN source=ABYSSINIAN CABB...   262  1.8e-32   1
+# 1ppt.pdb PANCREATICHORMONE AVIAN PANCREATIC POLYPEPTI               82  4e-16
+    
+    #                 1      2       3          4
+    if ($line=~/^\s*(\S+)\s+(.*)\s+(\d+)\s+([\-x\d\.e]+)\s*.*$/) {
+	
+				# finish reading if Pvalue too high
+        # lkajan: perl does not recognize e-179 as a number. It does recognize 1e-179 though.
+        my $e_val = $4; if( $e_val =~ /^e/o ){ $e_val = "1$e_val"; }
+	if ($e_val > $p_value_out){ last; }
+
+	#$db_info=$1;
+				# (1) big redundant db 
+	if    ($line=~/pdb\|/) {
+	    $dbHere="pdb";
+	    #$dir_dbSplit=$dir_db_big_pdb;
+	}
+	elsif ($line=~/trembl\|/) {
+	    $dbHere="trembl";
+	    #$dir_dbSplit=$dir_db_big_trembl;
+        }
+	else {
+	    $dbHere="swiss";
+	    #$dir_dbSplit=$dir_db_big_swiss;
+	}
+				# skip wrong db for BIG
+
+	if( !defined($dbWant) || ( $dbWant ne "big" && $dbWant ne $dbHere ) ){ next; }
+
+				# (2) ordinary swiss-prot
+	if ($dbWant eq "swiss") {
+	    $tmp=$line;  
+	    $tmp=~s/^(.*\|)[^\|]+.*$/$1/g;
+#	    print STDERR "tmp=$tmp\n";	    
+	    $id=$line;   
+	    $id=~s/^.*\|([^\|\s]+).*$/$1/g;
+#	    $id = $1;
+	    #print STDERR "Line 181: id=$id\n";
+	    $id=~tr/[A-Z]/[a-z]/;
+	    $sub_dir=substr($id,index($id,'_')+1,1);
+
+	    $file=$dir_db.$sub_dir."/".$id;
+	    #print STDERR $file,"\n";
+	    next if (! -e $file);
+	    push(@found,$file);
+	    #print $fhout $dir_db.$sub_dir."/".$id."\n";
+	    #print STDERR $file,"\n";
+
+	    ++$ctFound;
+	    next; }
+
+	$line=~s/^\s*\S+\|//g;
+
+	$id=$line;   $id=~s/^(\S+).*$/$1/g;
+
+	$id=~tr/[A-Z]/[a-z]/ if ($dbHere ne "pdb");
+				# split into /data/derived/big/splitSwiss/o/prot_organism
+	
+	if    ($dbHere =~/^swiss/) {
+	    $tmp=$id; $tmp=~s/^[^_]*_(\w).*$/$1/;
+	    $dir_dbSplit.=$tmp."/";}
+				# split into /data/derived/big/splitTrembl/a/af001_1;
+	elsif ($dbHere =~/^trembl/) {
+	    $tmp=substr($id,1,1);
+	    $dir_dbSplit.=$tmp."/";}
+#	print STDERR $id  ;	# 
+	
+			# note: split into PDB done already
+
+	$file=$dir_dbSplit.$id.".f";
+
+
+
+	next if (! -e $file);	# skip if missing
+	next if (defined $already{$id});
+	$already{$id}=1;
+	++$ctFound;
+
+	push(@found,$file);
+#	print  $file,"\n";
+    }
+}
+close(FHIN);
+
+
+				# ------------------------------
+				# open output handle
+my $fhout;			# default
+if ($fileOut) {
+    open($fhout,">".$fileOut) || die "*** $0: failed opening out=$fileOut\n";
+}
+else
+{
+	$fhout = \*STDOUT;
+}
+
+foreach $found (@found){
+    print $fhout $found,"\n";
+}
+
+if (! $ctFound) {
+#    print $fhout "none\n";
+    close($fhout)           if ($fhout ne "STDOUT");
+    #exit(0,"none");
+    exit(0);
+}
+
+print "$0 output in $fileOut\n"       if ($fileOut && -e $fileOut);
+print "ERROR $0 no output=$fileOut\n" if ($fileOut && ! -e $fileOut);
+
+exit;
+
+# vim:ai:et:


Property changes on: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/filter_blastp_big.pl
___________________________________________________________________
Added: svn:executable
   + *

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/nors.pl
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/nors.pl	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/nors.pl	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,947 @@
+#!/usr/bin/perl -w
+
+#============================================================
+# template for scripts using Getopt.pm
+#===============================================================
+
+use Getopt::Long;
+
+
+&init();
+
+
+($Lok,$norsRegion,$msg,$sec,$coils,$htm,$acc) =
+    &isNors($window,$secCut,$accLen,$collagenCutoff,
+	    $fileHssp,$filePhd,$filePhdHtm,$fileCoils);
+
+if ( ! $Lok ) {
+    print STDERR
+	"sub isNors returns error: $msg\n";
+    exit(1);
+}
+
+&writeHeader($fileOut,$sec);
+if ( $opt_html ) {
+    &writeNorsHtml($fileSeq,$fileOut,$norsRegion,$sec,$acc,$htm,$coils);
+} else {
+    &writeNorsTxt($fileSeq,$fileOut,$norsRegion,$sec,$acc,$htm,$coils);
+}
+
+open (SUM,">$fileSum") or die "cannot write to $fileSum:$!";
+if ( $Lok == 1 ) {
+    print SUM "1\n";
+} else {
+    print SUM "0\n";
+}
+close SUM;
+
+exit;
+
+
+
+sub init {
+				# default options
+    $opt_help = '';
+    $opt_debug = 0;
+    $opt_html = 0;
+
+    $window = 70;			# the window of the non-struc. length
+    $secCut = 12;			# the cutoff the SS percentage
+    $accLen = 10;			# at least have continuous 10 exposed residue
+    $collagenCutoff = 0.3;		# sequence identity to indicate a collagen
+
+    $minAcc = 16;		# acc threshold for exposed residue
+
+    $style_td1 = 'background:#bcd2ee;font-family:Times,Serif';
+    
+    $style_td2 = 'background:#ebebeb;font-family:Times,Serif';
+    $style_td3 = 'background:#e0eee0;font-family:Times,Serif';
+    #$style_td3 = 'background:black;font-family:Times,Serif';
+
+    $Lok = GetOptions ('debug!' => \$opt_debug,
+		       'win=i' => \$window,
+		       'secCut=i' => \$secCut,
+		       'accLen=i' => \$accLen,
+		       'fileSeq=s' => \$fileSeq,
+		       'fileHssp=s' => \$fileHssp,
+		       'filePhd=s' => \$filePhd,
+		       'filePhdHtm=s' => \$filePhdHtm,
+		       'fileCoils=s' => \$fileCoils,
+		       'fileSum=s' => \$fileSum,
+		       'o=s' => \$fileOut,
+		       'html!' => \$opt_html,
+		       'help'  => \$opt_help,
+		       );
+    
+    if ( ! $Lok ) {
+	print STDERR "Invalid arguments found, -h or --help for help\n";
+	exit(1);
+    }
+    
+    $nameScr = $0;
+    $nameScr =~ s/.*\///g;
+    
+    if ( $opt_help ) {
+	print STDERR
+	    "$nameScr: find NORS in sequence \n",
+	    "Usage: $nameScr [options]  -fileSeq seq_file -fileHssp hssp_file  \n",
+	"         -filePhd phd_file -filePhdHtm phdHtm_file -fileCoils coils_file\n",
+	"         -o output_file\n",
+	    "  Opt:  -help         print this help\n",
+	    "        -html         print HTML output(default=0)\n",
+	    "        -win          window size(default=$window)\n",
+	    "        -secCut       max structure content(default=$secCut)\n",
+	    "        -accLen       minimum consecutive exposed residues(default=$accLen)\n",
+	    "        --(no)debug   print debug info(default=nodebug)\n";
+	exit(1);
+    }
+
+    if ( ! $fileSeq ) {
+	&exit_error_undefine('fileSeq');
+    }
+    if ( ! -f $fileSeq ) {
+	&exit_error_missing('fileSeq');
+    } 
+    if ( ! $fileHssp ) {
+	&exit_error_undefine('fileHssp');
+    }
+    if ( ! -f $fileHssp ) {
+	&exit_error_missing('fileHssp');
+    }
+    if ( ! $filePhd ) {
+	&exit_error_undefine('filePhd');
+    }
+    if ( ! -f $filePhd ) {
+	&exit_error_missing('filePhd');
+    }
+    if ( ! $filePhdHtm ) {
+	&exit_error_undefine('filePhdHtm');
+    }
+#    if ( ! -f $filePhdHtm ) {
+#	&exit_error_missing('filePhdHtm');
+#    }				# 
+    if ( ! $fileCoils ) {
+	&exit_error_undefine('fileCoils');
+    }
+    if ( ! -f $fileCoils ) {
+	&exit_error_missing('fileCoils');
+    }
+    if ( ! $fileOut ) {
+	&exit_error_undefine('fileOut');
+    }
+    if ( ! $fileSum ) {
+	$fileSum = $fileOut.'_sum';
+    }
+    $window++ if ( $window % 2 != 0 );
+
+				# end of option/sanity check
+    
+}
+
+
+
+
+sub isNors {
+    my ($window,$secCut,$accLen,$collagenCutoff,
+	$fileHssp,$filePhd,$filePhdHtm,$fileCoils) = @_;
+    my ($maxLen,$beg,$orgDir,$seqDir,$hsspDir,$phdDir,$coilDir);
+    my ($seq,$acc,$seqLen,$isCollagen,$msgCollagen,$end);
+    my ($contentPhd,$ctPhdNsr,$nsrBeg,$nsrEnd,$ctNsr,$i,$nsrReg);
+
+
+    $maxLen = 0;
+    $beg = $window/2;
+			       
+    ($sec,$acc) = &norsCheckPhd($filePhd);
+    $seqLen = scalar @$sec - 1;
+    if ( $seqLen < $window ) {
+	return (2,"NULL","seqLen=$seqLen, smaller than window size $window",$sec,undef,undef,$acc);
+    }
+
+    ( $isCollagen,$msgCollagen ) =  &isCollagen($fileHssp,$collagenCutoff);
+    if ( $isCollagen ) {
+	return (2,"NULL", "is collagen homolog.",$sec,undef,undef,$acc);
+    }
+
+    $fileCoilsRaw = $fileCoils."_raw";
+    $coils = &norsCheckCoils($fileCoils,$fileCoilsRaw);
+    
+    $fileNotHtm = $filePhdHtm;
+    $fileNotHtm =~ s/\..*/.phdNotHtm/;
+    $htm = &norsCheckHtm($filePhdHtm,$fileNotHtm);
+    
+    $allSec = &getAllSec($seqLen,$sec,$coils,$htm);
+    if ( $opt_debug ) {
+	print STDERR "sec=\n";
+	for $i (1..$seqLen) {
+	    print STDERR 
+		$allSec->[$i];
+	}
+	print STDERR "\n";
+    }
+
+    $end = $seqLen - $window/2;
+    $maxLen = $end if ( $end > $maxLen );
+    $contentPhd = &calcSecContent($allSec,$window,$beg,$end);
+
+
+    ($ctPhdNsr,$nsrBeg,$nsrEnd) = &hasNsrPhd($window,$secCut,$contentPhd,$beg,$end);
+    if ( ! $ctPhdNsr ) {
+	return(2,"NULL","no NORS after PHD",$sec,$coils,$htm,$acc);
+    }
+				 
+    ($ctNsr,$nsrBeg,$nsrEnd) = &hasNsrAcc($accLen,$ctPhdNsr,$acc,$nsrBeg,$nsrEnd);
+    #print STDERR "$org,$id,ctNst=$ctNsr\n";
+    if (! $ctNsr ) {
+	return(2,"NULL","no NORS after ACC",$sec,$coils,$htm,$acc);
+    }
+
+    #print STDERR "$org,$id,ctNst=$ctNsr\n";
+    $nsrReg = "";
+    for $i ( 1..$ctNsr ) {
+	$nsrReg .= $nsrBeg->[$i].'-'.$nsrEnd->[$i].',';
+    }
+    $nsrReg =~ s/,$//g;
+    return (1,$nsrReg,"ok",$sec,$coils,$htm,$acc);
+
+}
+
+
+sub calcSecContent {
+    my $sbr = "calcSecContent";
+    my ($sec,$window,$beg,$end) = @_;
+    my ($ctStrut,$i, at contentPhd,$winEnd,$preWin,$current );
+
+    $ctStrut = 0;
+    for $i (1..$window) {	# 
+	$ctStrut++ if ( $sec->[$i]  );
+    }
+    $contentPhd[$beg] = $ctStrut/$window;
+	
+    for $i ( $beg+1..$end) {
+	$winEnd = $i + $window/2;
+	$preWin = $i - $window/2;
+	    
+	if ( $sec->[$winEnd]  ) {
+	    if ( $sec->[$preWin]  ) {
+		$current = $contentPhd[$i-1];
+	    } else {
+		$current = $contentPhd[$i-1] + 1/$window;
+	    }
+	} else {
+	    if ( $sec->[$preWin] ) {
+		if ( ! defined $contentPhd[$i-1] ) {
+		    print STDERR "content for pos $i not defined.\n";
+		    die;
+		}
+		$current = $contentPhd[$i-1] - 1/$window;
+	    } else {
+		$current = $contentPhd[$i-1];
+	    }
+	}
+	$contentPhd[$i] = $current;   
+    }
+#   for $i ( $beg..$end ) {
+#	printf "%.2f,",$contentPhd[$i];
+#    }
+#    print "\n";
+    return [@contentPhd];
+}
+
+sub exit_error_undefine {
+    my ($arg) = @_;
+    print STDERR 
+	"*** '$arg' not defined\n",
+	"Usage: $nameScr [options]  -fileSeq seq_file -fileHssp hssp_file  \n",
+	"         -filePhd phd_file -filePhdHtm phdHtm_file -fileCoils coils_file\n",
+	"         -o output_file\n",
+        "Try $nameScr --help for more information\n";
+    exit(1);
+}
+
+sub exit_error_missing {
+    my ($arg) = @_;
+    print STDERR 
+	"Input file '$arg' not found, exiting..\n";
+    exit(1);
+}
+
+sub getAllSec {
+    my ($seqLen,$sec,$coils,$htm) = @_;
+    my ( $i, $allSec );
+
+    for $i ( 1..$seqLen ) {
+	if ( $sec->[$i] =~ /[he]/i or $coils->[$i] or $htm->[$i] ) {
+	    $allSec->[$i] = 1;
+	} else {
+	    $allSec->[$i] = 0;
+	}
+    }
+    return $allSec;
+}
+    
+
+sub norsCheckCoils {
+    my $sbr = "checkCoils";
+    my ($coilFile,$coilFileRaw) = @_;
+    my $fhCoils = "COILS_$sbr";
+    my ($ctCoil, at tmpCoil,$pos,$score,$coilsRef);
+    
+    
+    if ( -s $coilFile < 50 ) {	# no coils
+	return undef;
+    }
+
+    open ( $fhCoils, $coilFileRaw ) or die "cannot open $coilFileRaw:$!";
+    $ctCoil = 0;
+    #   0 1 2   3       4     5  6	 7
+    #   1 M a   0.598   0.000 (  0.000   2.965)
+    while ( <$fhCoils> ) {
+	s/^\s+//g;
+	s/\s+$//g;
+	if ( $_ !~ /^\d+/ ) { next; }
+	
+	@tmpCoil = split /\s+/;
+	$pos = $tmpCoil[0];
+	$score = $tmpCoil[4];
+	if ( $score >= 0.9 ) {
+	    $coilsRef->[$pos] = 1;
+	    $ctCoil++;
+	}
+    }
+    close $fhCoils;
+    if ( $ctCoil < 14 ) { # error
+	if( $opt_debug ){ warn( "wrong parsing coil file?? ctCoils=$ctCoil < 14" ); }
+    }
+
+    return $coilsRef;
+}
+
+
+
+sub norsCheckHtm {
+    my $sbr = "checkHtm";
+    my ($htmFile,$fileNotHtm) = @_;
+    my $fhHtm = "HTM_$sbr";
+    my ($ctHtm, at htmBeg, at htmEnd);
+    my (@tmpHtm,$htmReg,$i,$h);
+
+    
+    if ( ! -f $htmFile or (-s $htmFile) < 50 or -f $fileNotHtm ) {	# no HTM
+	return undef;
+    }
+				# phdHtm
+    open ( $fhHtm, $htmFile ) or die "cannot open $htmFile:$!";
+    $ctHtm = 0;
+    undef @htmBeg;
+    undef @htmEnd;
+    while ( <$fhHtm> ) {
+	if ( $_ !~ /^\#/ ) {
+	    last;
+	}
+	if ( $_ !~ /^\# MODEL_DAT/ ) {
+	    next;
+	}
+	
+	@tmpHtm = split /,/;
+	$htmReg = $tmpHtm[-1];
+	$htmReg =~ s/\s+//g;
+	if ( $htmReg !~ /^\d+-\d+$/ ) {
+	    die "wrong htm format??\n$_\n";
+	}
+	
+	$ctHtm++;
+	($htmBeg[$ctHtm], $htmEnd[$ctHtm] ) = split /-/, $htmReg;
+    }
+    close $fhHtm;
+    if ( $ctHtm == 0 ) {
+	if( $opt_debug ){ warn "wrong parsin htm file??\nctHtm=$ctHtm "; }
+    }
+	    
+    for ( $i = 1; $i <= $ctHtm; $i++ ) {
+	for ( $h = $htmBeg[$i]; $h <= $htmEnd[$i]; $h++ ) {
+	    $htmRef->[$h] = 1;
+	}
+    }
+
+    return $htmRef;
+}
+
+
+sub norsCheckPhd {
+    my $sbr = "checkPhd";
+    my ($filePhd) = @_;
+    my $fhPhd = "PHD_$sbr";
+    my (@cols,$colPhdNo,$i,$colAccNo, at tmp,$strut,$acc,$pos, at sec, at acc);
+    
+    
+    open ( $fhPhd, $filePhd ) or die "cannot open $filePhd:$!";
+    while ( <$fhPhd> ) {	 
+	next if ( /^\#/ ) ;     
+	s/^\s+|\s+$//g;
+	if ( /^No/ ) {
+	    @cols= split /\t/;
+	    for $i ( 0..$#cols ) {
+		if ( $cols[$i] eq "PHEL" ) {
+		    $colPhdNo = $i;
+		} elsif ( $cols[$i] eq "PREL" ) { # accessibility
+		    $colAccNo = $i;
+		}
+	    }
+	    if (! defined $colPhdNo or ! defined $colAccNo ) {
+		die "column name not found in $_\n"; 
+	    }		# 
+	}
+	
+	if ( $_ !~ /^\d+\s+/ ) { 
+	    next;
+	}			 
+	@tmp = split /\s+/;
+	$pos = $tmp[0];
+	$strut = lc($tmp[$colPhdNo]);
+	$acc = $tmp[$colAccNo];
+	$sec[$pos] = $strut;
+	if ( $acc > $minAcc ) {
+	    $acc[$pos] = 1;
+	} else {
+	    $acc[$pos] = 0;
+	}
+    }			       
+    close $fhPhd;
+    return ([@sec],[@acc]);
+}
+
+
+
+sub hasNsrAcc {
+    my ($accLen,$ctPhdNsr,$acc,$phdNsrBeg,$phdNsrEnd) = @_;
+    my ($ctNsrAcc,$i,$nsrBeg,$nsrEnd,$isBuried,$lenExp,$maxLenExp);
+    my ($j,$accNsrBeg,$accNsrEnd);
+
+    $ctNsrAcc=0;
+
+    for $i ( 1..$ctPhdNsr ) {
+	$nsrBeg = $phdNsrBeg->[$i];
+	$nsrEnd = $phdNsrEnd->[$i];
+	$isBuried = 1;
+	$lenExp = 0;
+	$maxLenExp=0;
+	
+	for $j ( $nsrBeg..$nsrEnd ) {
+	    if ( ! $acc->[$j] ) {
+		if ( ! $isBuried ) {
+		    $maxLenExp = $lenExp if ( $lenExp > $maxLenExp );
+		}
+		$lenExp = 0;
+		$isBuried = 1;
+	    } else {
+		$isBuried = 0;
+		$lenExp++;
+		$maxLenExp = $lenExp if ( $lenExp > $maxLenExp );
+	    }
+	}
+	
+	if ( $maxLenExp >= $accLen ) {
+	    $ctNsrAcc++;
+	    $accNsrBeg->[$ctNsrAcc] = $phdNsrBeg->[$i];
+	    $accNsrEnd->[$ctNsrAcc] = $phdNsrEnd->[$i];
+	}
+    }
+    if ( $ctNsrAcc ) {
+	return ($ctNsrAcc,$accNsrBeg,$accNsrEnd);
+    } else {
+	return 0;
+    }
+}
+
+
+	
+
+sub hasNsrPhd {
+    my $sbr = "isNsrPhd";
+    my ($window,$secCut,$contentPhd,$beg,$end) = @_;
+    my ($isNsrPhd,$ctPhdNsr,$i, at phdNsrEnd, at phdNsrBeg);
+
+    undef @phdNsrBeg;
+    undef @phdNsrEnd;
+
+    $isNsrPhd = 0;
+    $ctPhdNsr = 0;
+    for $i ( $beg..$end) {
+	if ( $contentPhd->[$i] <= $secCut/100 ) {
+	    if ( ! $isNsrPhd ) {
+		$isNsrPhd = 1;
+		if ( $ctPhdNsr > 0 and  $phdNsrEnd[$ctPhdNsr] < ($i - $window/2)) {
+		    $ctPhdNsr++;
+		    $phdNsrBeg[$ctPhdNsr] = $i - $window/2 + 1;
+		}
+		if ( $ctPhdNsr == 0 ) {
+		    $ctPhdNsr++;
+		    $phdNsrBeg[$ctPhdNsr] = $i - $window/2 + 1;
+		}
+	    } 
+	    $phdNsrEnd[$ctPhdNsr] = $i + $window/2;
+	} else {		# 
+	    $isNsrPhd = 0;
+	}
+    }
+    if ( $ctPhdNsr ) {
+	return ($ctPhdNsr,[@phdNsrBeg],[@phdNsrEnd]);
+    } else {
+	return 0;
+    }
+
+    
+}
+
+
+sub isCollagen {
+    my ($fileHssp,$collagenCutoff) = @_;
+    my $sbr = "isCollage";
+    my ($msgCollagen,$posIde,$posSim,$ide,$sim );
+    my ($hsspHeader);
+    
+ 
+    if ( ! -e $fileHssp ) {
+	warn "$fileHssp not found.\n";
+	return (0,"");
+    }
+    
+    $fhHssp = "HSSP_$sbr";
+    open ($fhHssp, $fileHssp) or die "cannot open $fileHssp:$!";
+    while ($tmpLine=<$fhHssp>) {
+	if ( $tmpLine =~ /^\#\# PROTEINS/ ) {
+	    $hsspHeader = <$fhHssp>;
+	    $posIde = index($hsspHeader,'%IDE');
+	    if ( $hsspHeader =~ /\%WSIM/ ) { 
+		$posSim = index($hsspHeader,'%WSIM');
+	    } else {
+		$posSim = index($hsspHeader,'%SIM');
+	    }
+	    last;
+	}
+    }
+    while ($tmpLine=<$fhHssp>) {
+	if ( $tmpLine =~ /^\#\# ALIGNMENTS/ ) {
+	     last;
+	 }
+	if ( $tmpLine !~ /^\s*\d+\s*:/ ) {
+	    next;
+	}
+	
+	$ide = substr($tmpLine,$posIde,5);
+	$sim = substr($tmpLine,$posSim,5);
+	$ide =~ s/\s+//g;
+	$sim =~ s/\s+//g;
+	
+#	print STDERR "xx ide=$ide, sim=$sim\n";
+
+	if ( $ide !~ /\d/ or $sim !~ /\d/ ) {
+	    print STDERR 
+		"ide=$ide,sim=$sim\n",
+		"$fileHssp: wrong parsing hssp?\n$_\n$tmpLine\n";
+	    die;
+	}
+	
+	if ( $ide > 1 or $sim > 1 ) {
+	    die "$fileHssp: wrong parsing hssp?\n$_\nide=$ide,sim=$sim\n";
+	}
+	
+	if ( $ide < $collagenCutoff and $sim < $collagenCutoff ) {
+	    last;
+	}
+	
+	if ( $tmpLine =~ /collagen/i and $tmpLine !~ /collagenase/i ) {
+	    $msgCollagen .= "$fileHssp -- from hssp:\n$tmpLine";
+	    last;
+	}
+    }
+    close $fhHssp;
+    if ( $msgCollagen ) {
+	return (1,$msgCollagen);
+    } else {
+	return (0,"");
+    }
+}
+
+
+sub readSeq {
+    my ( $fileIn ) = @_;
+    my $sbr = "readSeq";
+    my $fhIn = "SEQ_$sbr";
+    open ( $fhIn,$fileIn ) or die "cannot open $fileIn:$!";
+    my $seq = "";
+    while ( <$fhIn> ) {
+	next if ( /^\s*\>/ );
+	chomp;
+	$seq .= $_;
+    }
+    close $fhIn;
+
+    $seq =~ s/\W+//g;
+    $seq =~ s/\d+//g;
+    
+    return $seq;
+}
+
+
+sub writeHeader {
+    my ( $fileNors,$secRef) = @_;
+    my ( @secType,$seqLen,$sec,%ct,$s,%percSec,$fhOut );
+    @secType = qw(H E L);
+    $seqLen = scalar (@$secRef) -1 ;
+    for $i ( 1..$seqLen ) {
+	$sec = $secRef->[$i];
+	$sec = uc $sec;
+	$ct{$sec}++;
+    }
+    foreach $s ( @secType ) {
+	$ct{$s} = 0 if ( ! defined $ct{$s} );
+	$percSec{$s} = $ct{$s}/$seqLen*100;
+	#print "xx ",$percSec{$s},"\n";
+    }
+
+    unlink $fileNors if ( -f $fileNors ); # remove old files
+    open ($fhOut,">$fileNors") or die "cannot write to $fileNors:$!";
+    if ( $opt_html ) {
+	print $fhOut
+	    "<P>\n",
+	    "<TABLE CELLPADDING=2>\n",
+	    "<TR VALIGN=TOP> <TD style='$style_td1'>Sequence length</TD>\n ",
+	    "<TD style='$style_td2' > $seqLen </TD></TR>\n",
+	    "<TR VALIGN=TOP> <TD style='$style_td1'>Secondary structure</TD>\n ";
+	printf $fhOut 
+	    "<TD style='$style_td2' > Helix=%.1f%%, Strand=%.1f%%, Loop=%.1f%% </TD></TR>\n",
+	     $percSec{'H'},$percSec{'E'},$percSec{'L'};
+	print $fhOut
+	    "<TR VALIGN=TOP> <TD BGCOLOR='#ffffff'></TD>\n ",
+	    "<TD BGCOLOR='#ffffff' >&nbsp</TD></TR>\n",
+	    "<TR VALIGN=TOP> <TD style='$style_td1'>window size</TD>\n",
+	    "<TD style='$style_td2' > $window </TD></TR>\n",
+	    "<TR VALIGN=TOP> <TD style='$style_td1'>Structure content cutoff</TD>\n",
+	    "<TD style='$style_td2' > $secCut% </TD></TR>\n",
+	    "<TR VALIGN=TOP> <TD style='$style_td1'>Minimum consecutive exposed residues</TD>\n",
+	    "<TD style='$style_td2' > $accLen </TD></TR>\n",
+	    "<TR VALIGN=TOP> <TD BGCOLOR='#ffffff'></TD>\n ",
+	    "<TD BGCOLOR='#ffffff' >&nbsp</TD></TR>\n";
+    } else {
+	print $fhOut
+	    "\n\n",
+	    "Sequence length     : $seqLen\n";
+	printf $fhOut
+	    "Secondary structure : Helix=%.1f%%, Strand=%.1f%%, Loop=%.1f%%\n\n",
+	    $percSec{'H'},$percSec{'E'},$percSec{'L'};
+	print $fhOut
+	    "window size         : $window\n",
+	    "Structure content cutoff: $secCut%\n",
+	    "Minimum consecutive exposed residues: $accLen\n\n";
+    }
+    				# legend
+    if ( $opt_html ) {
+	print $fhOut
+	    			# colors: #cccccc, #ebebeb #e0eee0
+	    "<P>\n",
+	 #   "<TABLE CELLPADDING=2>\n",
+	    "<TR VALIGN=TOP> <TD style='$style_td1'>NORS</TD>\n ",
+	    "<TD style='$style_td2' > N=NORS region </TD></TR>\n",
+	    "<TR VALIGN=TOP> <TD style='$style_td1'>Secondary structure</TD>\n ",
+	    "<TD style='$style_td2' > H=helix, E=strand, ' '=loop </TD></TR>\n",
+	    "<TR VALIGN=TOP> <TD style='$style_td1'>Coiled-coil region</TD>\n ",
+	    "<TD style='$style_td2'> c=coils </TD></TR>\n",
+	    "<TR VALIGN=TOP> <TD style='$style_td1'>Transmembrane helix</TD>\n ",
+	    "<TD style='$style_td2'> m=transmembrane helix </TD></TR>\n",
+	    "<TR VALIGN=TOP> <TD style='$style_td1'>Solvent accessibility</TD>\n ",
+	    "<TD style='$style_td2'> e=exposed, ' '=buried </TD></TR>\n",
+	    "</TABLE>\n";
+    } else {
+	print $fhOut 
+	    "NORS                 : n=NORS region\n",
+	    "Secondary structure  : h=helix, e=strand, l=loop\n",
+	    "Transmembrane helix  : m=transmembrane helix\n",
+	    "Solvent accessibility: e=exposed, b=buried\n";
+	    
+    }
+
+    
+    close $fhOut;
+    return;
+}
+
+
+sub writeNorsHtml {
+    my ( $fileSeq,$fileNors,$norsReg,$sec,$acc,$htm,$coils ) = @_;
+    my $sbr = "writeNorsHtml";
+    my $fhNors = "NORS_$sbr";
+    my ($seqPerLine,$lineTitleLen,$formatLineTitle,$seq,$seqLen);
+    my ($i, at nors, at norsReg,$reg,$beg,$end,$nors,$ctLine);
+    my ($lineEnd,$lineBeg,$c);
+
+    $norsReg = 'None' if ( $norsReg eq 'NULL' );
+    open ($fhNors,">>$fileNors") or die "cannot write to $fileNors:$!";
+    print $fhNors
+	"<P>\n",
+	"<TABLE CELLPADDING=2>\n",
+	"<TR VALIGN=TOP> <TD style='$style_td1'>NORS region predicted: </TD>\n ",
+	"<TD style='$style_td3' > $norsReg </TD></TR>\n",
+	"</TABLE>\n";
+
+    if ( $norsReg eq 'None' ) {
+	print $fhNors "<P>\n";
+	close $fhNors;
+	return;
+    }
+    
+    
+    $seqPerLine = 50;
+    $lineTitleLen = 7;
+    $formatLineTitle = '%-'.$lineTitleLen."s";
+    $seq = &readSeq($fileSeq);
+    $seqLen = length $seq;
+    $color = "blue";
+
+
+    for $i ( 1..$seqLen ) {
+	$nors[$i] = '.';
+    }
+    @norsReg = split /,/,$norsReg;
+    foreach $reg ( @norsReg ) {
+	($beg,$end) = split /-/,$reg;
+	for $i ( $beg..$end ) {
+	    $norsStr[$i] = "<font color=$color>".'N'."</font>";
+	}
+    }
+
+    $nors = "";
+    for $i ( 1..$seqLen ) {
+	if ( $sec->[$i] eq 'h' ) {
+	    $secStr[$i] = "<font color=red>".'H'."</font>";
+	} elsif ( $sec->[$i] eq 'e' ) {
+	    $secStr[$i] = "<font color=green>".'E'."</font>";
+	} else {
+	    $secStr[$i] = " ";
+	}
+
+	if ( $acc->[$i] ) {
+	    $accStr[$i] = "<font color=green>".'e'."</font>";
+	} else {
+	    $accStr[$i] = ' ';
+	}
+
+	if ( $htm->[$i] ) {
+	    $htmStr[$i] = "<font color=green>".'M'."</font>";;
+	} else {
+	    $htmStr[$i] = ' ';
+	}
+
+	if ( $coils->[$i] ) {
+	    $coilsStr[$i] = "<font color=green>".'c'."</font>";
+	} else {
+	    $coilsStr[$i] = ' ';
+	}
+    }
+
+    for $i ( 1..$seqLen ) {
+	$seq[$i] = substr($seq,$i-1,1);
+    }
+
+    if ( $seqLen % 50 == 0 ) {
+	$ctLine = $seqLen/$seqPerLine;
+    } else {
+	$ctLine = $seqLen/$seqPerLine + 1;
+    }
+
+    print $fhNors "<PRE>\n";
+    foreach $i ( 1..$ctLine ) {
+	$lineEnd = $seqPerLine * $i;
+	$lineBeg = $lineEnd - $seqPerLine + 1;
+
+				# print sequence number index
+	printf $fhNors $formatLineTitle,"";
+	foreach $c ( $lineBeg..$lineEnd ) {
+	    if ( $c == $lineEnd ) {
+		print $fhNors $lineEnd/10,"\n";
+		last;
+	    }
+	    if ( $c % 10 == 0 ) {
+		print $fhNors ":";
+	    } elsif ( $c % 5 == 0 ) {
+		print $fhNors ".";
+	    } else {
+		print $fhNors " ";
+	    }
+	}
+				
+				# print sequence
+	printf $fhNors $formatLineTitle,"SEQ";
+	foreach $c ( $lineBeg..$lineEnd ) {
+	    last if ( $c > $seqLen );
+	    print $fhNors $seq[$c];
+	}
+	print $fhNors "\n";
+
+				# print sequence
+	printf $fhNors $formatLineTitle,"NORS";
+	foreach $c ( $lineBeg..$lineEnd ) {
+	    last if ( $c > $seqLen );
+	    if ( ! defined $norsStr[$c] ) {
+		$norsStr[$c] = " ";
+	    }
+	    print $fhNors $norsStr[$c];
+	}
+	print $fhNors "\n";
+	
+				# print sequence
+	printf $fhNors $formatLineTitle,"SEC";
+	foreach $c ( $lineBeg..$lineEnd ) {
+	    last if ( $c > $seqLen );
+	    print $fhNors $secStr[$c];
+	}
+	print $fhNors "\n";
+				# print sequence
+	printf $fhNors $formatLineTitle,"COILS";
+	foreach $c ( $lineBeg..$lineEnd ) {
+	    last if ( $c > $seqLen );
+	    print $fhNors $coilsStr[$c];
+	}
+	print $fhNors "\n";
+
+				# print HTM
+	printf $fhNors $formatLineTitle,"HTM";
+	foreach $c ( $lineBeg..$lineEnd ) {
+	    last if ( $c > $seqLen );
+	    print $fhNors $htmStr[$c];
+	}
+	print $fhNors "\n";
+
+				# print sequence
+	printf $fhNors $formatLineTitle,"ACC";
+	foreach $c ( $lineBeg..$lineEnd ) {
+	    last if ( $c > $seqLen );
+	    print $fhNors $accStr[$c];
+	}
+	print $fhNors "\n";
+
+    }
+    print $fhNors 
+	"//\n",
+	"</PRE>\n";
+    close $fhNors;
+    return;
+}
+
+
+sub writeNorsTxt {
+    my ( $fileSeq,$fileNors,$norsReg,$sec,$acc,$htm,$coils ) = @_;
+    my $sbr = "writeNorsTxt";
+    my $fhNors = "NORS_$sbr";
+    my ($seqPerLine,$lineTitleLen,$formatLineTitle,$seq,$seqLen);
+    my ($i, at nors, at norsReg,$reg,$beg,$end,$nors,$ctLine);
+    my ($lineEnd,$lineBeg,$c);
+
+
+    $norsReg = 'None' if ( $norsReg eq 'NULL' );
+    open ($fhNors,">>$fileNors") or die "cannot write to $fileNors:$!";
+    print $fhNors "\n\nNORS region          : $norsReg\n\n";
+    if ( $norsReg eq 'None' ) {
+	print $fhNors "//\n";
+	close $fhNors;
+	return;
+    }
+
+    $seqPerLine = 50;
+    $lineTitleLen = 7;
+    $formatLineTitle = '%-'.$lineTitleLen."s";
+    $seq = &readSeq($fileSeq);
+    $seqLen = length $seq;
+    
+    for $i ( 1..$seqLen ) {
+	$nors[$i] = '.';
+    }
+    @norsReg = split /,/,$norsReg;
+    foreach $reg ( @norsReg ) {
+	($beg,$end) = split /-/,$reg;
+	for $i ( $beg..$end ) {
+	    $nors[$i] = 'n';
+	}
+    }
+
+    $nors = "";
+    for $i ( 1..$seqLen ) {
+	$nors .= $nors[$i];
+	$secStr .= $sec->[$i];
+
+	if ( $acc->[$i] ) {
+	    $accStr .= 'e';
+	} else {
+	    $accStr .= 'b';
+	}
+
+	if ( $htm->[$i] ) {
+	    $htmStr .= 'm';
+	} else {
+	    $htmStr .= '.';
+	}
+
+	if ( $coils->[$i] ) {
+	    $coilsStr .= 'c';
+	} else {
+	    $coilsStr .= '.';
+	}
+
+    }
+
+	
+    
+
+    if ( $seqLen % 50 == 0 ) {
+	$ctLine = $seqLen/$seqPerLine;
+    } else {
+	$ctLine = $seqLen/$seqPerLine + 1;
+    }
+    foreach $i ( 1..$ctLine ) {
+	$lineEnd = $seqPerLine * $i;
+	$lineBeg = $lineEnd - $seqPerLine + 1;
+
+				# print sequence number index
+	printf $fhNors $formatLineTitle,"";
+	foreach $c ( $lineBeg..$lineEnd ) {
+	    if ( $c == $lineEnd ) {
+		print $fhNors $lineEnd/10,"\n";
+		last;
+	    }
+	    if ( $c % 10 == 0 ) {
+		print $fhNors ":";
+	    } elsif ( $c % 5 == 0 ) {
+		print $fhNors ".";
+	    } else {
+		print $fhNors " ";
+	    }
+	}
+				# print sequence
+	printf $fhNors $formatLineTitle,"seq";
+	print $fhNors substr($seq,$lineBeg-1,$seqPerLine),"\n";
+				# print NORS prediction
+	printf $fhNors $formatLineTitle,"NORS";
+	print $fhNors substr($nors,$lineBeg-1,$seqPerLine),"\n";
+
+				# print NORS prediction
+	printf $fhNors $formatLineTitle,"SEC";
+	print $fhNors substr($secStr,$lineBeg-1,$seqPerLine),"\n";
+				# print NORS prediction
+	printf $fhNors $formatLineTitle,"COILS";
+	print $fhNors substr($coilsStr,$lineBeg-1,$seqPerLine),"\n";
+		# print NORS prediction
+	printf $fhNors $formatLineTitle,"HTM";
+	print $fhNors substr($htmStr,$lineBeg-1,$seqPerLine),"\n";
+		# print NORS prediction
+	printf $fhNors $formatLineTitle,"ACC";
+	print $fhNors substr($accStr,$lineBeg-1,$seqPerLine),"\n";
+	
+    }
+    print $fhNors "//\n";
+    close $fhNors;
+    return;
+}
+
+
+


Property changes on: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/nors.pl
___________________________________________________________________
Added: svn:executable
   + *

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/prosite_scan.pl
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/prosite_scan.pl	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/prosite_scan.pl	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,285 @@
+#!/usr/bin/perl
+#
+# Author: Kay Hofmann (khofmann at isrec-sun1.unil.ch) November 1994
+#
+# This program scans a protein sequence or database with a collection
+# of regular expression patterns (like the one generated by prosite_convert).
+# supported sequence formats are: 
+# EMBL : single or multiple sequences 
+# FASTA: single or multiple sequences
+# GCG  : single sequences
+#
+# The call syntax is:
+#        prosite_scan [-h -s] patternfile sequencefile
+# 
+#        -h : HTML output will be created
+#        -s : abundant patterns will be skipped
+#
+
+$http_dat = 'HREF="http://www.expasy.ch/cgi-bin/get-prosite-entry?';
+$http_doc = 'HREF="http://www.expasy.ch/cgi-bin/get-prodoc-entry?';
+$htmlmode=0;
+$skipmode=0;
+
+# --------- Program starts here -----------------------------------
+
+###################################################################
+# test command line arguments and open files:
+###################################################################
+  
+while ($ARGV[0]=~/^-/) {
+    $agm = shift(@ARGV);
+    $htmlmode=1 if ($agm=~/^\-?h/);
+    $skipmode=1 if ($agm=~/^\-?s/); }
+
+die "SYNTAX: prosite_scan [-h -s] patternfile sequencefile\n"   if ($#ARGV!=1);
+die "$ARGV[0] no valid input file\n"  unless (-T $ARGV[0]);
+die "$ARGV[1] no valid input file\n"  unless (-T $ARGV[1]);
+open(PATFILE,"$ARGV[0]")   || die "can't open $ARGV[0]: $!\n"; 
+open(SEQFILE,"$ARGV[1]")   || die "can't open $ARGV[1]: $!\n";
+				# 
+print "<pre>\n"                 if ($htmlmode);
+
+###################################################################
+# determine sequence file format
+###################################################################
+
+$line=<SEQFILE>;
+if    ($line =~ /^\>/)        {$format="FASTA"}
+elsif ($line =~ /^ID\s/)      {$format="EMBL"}
+else  {
+    while (! ($line =~ /\.\./) && ($line=<SEQFILE>)) {}
+    if ($line=~/\.\./)        {
+	$format="GCG"}
+    else {
+	die "Sequence format not recognized\n"}
+}
+close(SEQFILE);
+
+###################################################################
+# read all patterns from file
+###################################################################
+
+$ct=0;
+while (<PATFILE>) {
+    $ct=$ct+1;
+    ($pattern[$ct], $patID[$ct], $patAC[$ct], $patDO[$ct], 
+     $patSK[$ct],   $patDE[$ct]) = split(' ',$_,6);
+				# 
+    $patDE[$ct]=$1              if ($patDE[$ct] =~ /^\"(.*)\"$/);
+				# SKIP this pattern!
+    $ct=$ct-1                   if ($skipmode && ($patSK[$ct]==1));
+}
+$maxpat=$ct;
+				#print "$maxpat patterns read\n";  
+close(PATFILE);
+
+###################################################################
+# scan all patterns for each sequence found in file
+###################################################################
+
+open(SEQFILE,$ARGV[1])   || die "cannot reopen $ARGV[1]: $!\n";
+
+				# for br: label
+# $#matchesGlobal=0;
+# $#seq=0;
+				# store sequence
+
+do {
+    &getseq();
+				# for br: label
+#    @seq=split(//,$Seq)         if ($#seq<1);
+    if ($Seq) {
+
+	if ($htmlmode) { 
+	    print "<B>-------------------------------------------------------------</B>\n";
+	    if (length($SeqID)>0) {
+		print "<B>$SeqID  $SeqDE</B>\n";
+		print "<B>-------------------------------------------------------------</B>\n\n";}}
+	else {
+	    print "--------------------------------------------------------\n";
+	    if (length($SeqID)>1) {
+		print "$SeqID $SeqDE\n";
+		print "--------------------------------------------------------\n\n";}}
+				# ------------------------------
+				# extract pattern (and write)
+	&getpattern();
+
+	print "\n";
+    }
+} until ($Seq eq "");
+close(SEQFILE);
+
+
+				# ------------------------------
+				# for HTML mode: links
+
+				# for br: label
+# if ($htmlmode) {
+#     foreach $match (@matchesGlobal) {
+# 	next if ($match !~ /\d+\_\d+/);
+# 	($beg,$end)=split(/_/,$match);
+# 	$beg=~s/\D//g;$end=~s/\D//g;
+# #	next if ($beg < 1 || $end > $len);
+# 	$seq[$beg]="<A NAME=\"".$beg."_".$end."\">".$seq[$beg];
+# 	$seq[$end]=$seq[$end]."</A>";
+#     }
+#     print "patterns in sequence:\n";
+#     print "\n";
+#     print join('', at seq),"\n";
+#     print "\n";
+# }
+
+print "</pre>\n" if ($htmlmode);
+
+exit;
+
+##################################################################
+# extract the pattern
+##################################################################
+sub getpattern {
+    # --------------------------------------------------
+    # GLOBAL in: $maxpat=     number of prosite patterns
+    #            $pattern[$i]=ith pattern
+    #            $Seq=        sequence from getseq
+    # --------------------------------------------------
+    for ($i=1; $i<=$maxpat; $i++) {
+	@matches=();
+	$end=$beg=1;
+	while ($Seq =~ /$pattern[$i]/g) {
+				# '$`' is the part before pattern
+	    $beg= 1+length($`);
+				# '$&' is the pattern
+	    $end= $beg+length($&);
+				# for br: label
+#	    $beg_end=$beg."_".$end;
+	    $beg="   $beg";
+				# fill up blanks
+	    while(length($beg)<=10) {
+		$beg .= " "}
+	    $match= "$beg ";
+				# for br: label
+#	    $match.="<A HREF=\"#".$beg_end."\">" if ($htmlmode);
+	    $match.="$&";
+				# for br: label
+#	    $match.="</A>"                       if ($htmlmode);
+
+	    push(@matches,$match);
+				# for br: label
+#	    push(@matchesGlobal,$beg_end)        if ($htmlmode);
+	}
+				# pattern found
+	if (@matches) {
+	    if ($htmlmode) {
+		$outAC="<A $http_dat$patAC[$i]\">$patAC[$i]</A>" ;
+		$outDO="<A $http_doc$patDO[$i]\">$patDO[$i]</A>" ;
+		$outID="<B>$patID[$i]</B>"; }
+	    else {
+		$outAC=$patAC[$i];	  
+		$outDO=$patDO[$i];	  
+		$outID=$patID[$i]; }
+	    print "Pattern-ID: $outID $outAC $outDO\n";
+	    print "Pattern-DE: $patDE[$i]\n";
+	    print "Pattern:    $pattern[$i]\n";
+	    foreach $line (@matches) {
+		print "$line\n"}
+	    print "\n";}
+    }
+}
+
+##################################################################
+# get sequence in any format and put it into Seq, SeqID and SeqDE
+##################################################################
+
+sub getseq {
+    if    ($format eq "FASTA") {
+	&getfasta();
+	$Seq   = $fastabuffer;
+	$SeqID = $FastaID;
+	$SeqDE = $FastaDE; }
+    elsif ($format eq "EMBL")  {
+	&getembl();
+	$Seq   = $emblbuffer;
+	$SeqID = $EmblID;
+	$SeqDE = $EmblDE; }
+    else                       {
+	&getgcg();
+	$Seq   = $gcgbuffer;
+	$SeqID = $gcgid;
+	$SeqDE = $gcgde; }
+}
+  
+###################################################################
+# read Pearson/FASTA format sequence (not to be called externally) 
+###################################################################
+  
+sub getfasta {
+    $fastabuffer="";
+    $FastaID="";
+    $FastaDE="";
+    $line="";
+    until (($fastaline =~ /^\>/) || 
+	   eof(SEQFILE)) {
+	$fastaline=<SEQFILE>};
+    if ($fastaline=~/^\>(\S+)\s(.*)$/) {
+	$FastaID=$1;
+	$FastaDE=$2;
+    }
+    until (($line =~ /^\>/) || 
+	   eof(SEQFILE)) {
+	$line=<SEQFILE>;
+	if (!($line =~ /^\>/)) {$fastabuffer .= $line}
+    }
+    if ($line =~ /^\>/) {$fastaline=$line}
+    else {$fastaline=""};
+    $fastabuffer =~ tr/a-z/A-Z/;
+    $fastabuffer =~ s/[^A-Z]//g;
+  }
+
+###################################################################
+# read EMBL/Swissprot format sequence (not to be called externally) 
+###################################################################
+    
+sub getembl {
+    $emblbuffer="";
+    $EmblID="";
+    $EmblDE="";
+    $line="";
+    until (($line =~ /^ID\s/) || eof(SEQFILE)) {$line=<SEQFILE>};
+    if ($line=~/^ID\s+(\w+).*$/)          {$EmblID=$1;}
+    until (($line =~ /^SQ\s/) || eof(SEQFILE)) {
+      $line=<SEQFILE>;
+      if ($line =~ /^DE\s+(.*)/) {
+        if($EmblDE) {$EmblDE.=" "};
+	$EmblDE .= $1
+      }
+    }
+    if ($line =~ /^SQ\s/) {
+      until (($line =~ /^\/\//) || eof(SEQFILE)) {
+        $line=<SEQFILE>;
+        if   (!($line =~ /^\/\//)) {$emblbuffer .= $line}
+      }
+    }
+    $emblbuffer =~ tr/a-z/A-Z/;
+    $emblbuffer =~ s/[^A-Z]//g;
+  }
+    
+###################################################################
+# read GCG format sequence (not to be called externally) 
+###################################################################
+  
+sub getgcg {
+    $gcgbuffer="";
+    $gcgid=    "";
+    $gcgde=    "";
+    $line=     "";
+    until (($line =~ /\.\./) || eof(SEQFILE)) {$line=<SEQFILE>};
+    if ($line=~/^(\w*).*\.\./)                {$gcgid=$1;}
+    until (eof(SEQFILE)) {
+      $line=<SEQFILE>;
+      $gcgbuffer .= $line
+    }
+    $gcgbuffer =~ tr/a-z/A-Z/;
+    $gcgbuffer =~ s/[^A-Z]//g;
+  }
+    


Property changes on: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/helper_apps/prosite_scan.pl
___________________________________________________________________
Added: svn:executable
   + *

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/predictproteinrc.default
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/predictproteinrc.default	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/predictproteinrc.default	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,53 @@
+[predictprotein]
+# predictprotein_utildir=path - path to package utilities, default: /usr/share/predictprotein
+predictprotein_utildir=/usr/share/predictprotein
+
+# librg_utils_perl=path - path to librg-utils-perl utilities, default: /usr/share/librg-utils-perl
+librg_utils_perl=/usr/share/librg-utils-perl
+
+# use/do not use result cache
+use_cache=0
+
+# If `1', do not cache certain derived files that take up a lot of space and are not required after all prediction methods are run. Default: 0. [0|1]
+# Files not cached when `1': .safBlastPsi .safBlastPsi80 .hssp .hssp80
+light_cache=0
+
+# bigblastdb: Path to comprehensive blast database
+bigblastdb=/mnt/project/rost_db/data/blast/big
+
+# big80blastdb: Path to comprehensive blast database at 80% sequence identity redundancy level
+big80blastdb=/mnt/project/rost_db/data/blast/big_80
+
+# path to profphd 
+prof=/usr/share/profphd/prof/
+
+# prof numresMin, default: 17
+profnumresmin=17
+
+# maximum length of input AA sequence, default: 6000
+numresmax=6000
+
+# prositedat: Path to `prosite.dat' file, see L<https://rostlab.org/owiki/index.php/Packages#Resource_definitions>
+prositedat=/mnt/project/rost_db/data/prosite/prosite.dat
+
+# prositeconvdat: Path to `prosite_convert.dat' file, see L<https://rostlab.org/owiki/index.php/Packages#Resource_definitions>
+prositeconvdat=/mnt/project/rost_db/data/prosite/prosite_convert.dat
+
+# swissblastdb: Path to SwissProt blast database
+swissblastdb=/mnt/project/rost_db/data/blast/swiss
+
+# psicexe: psic wrapper executable
+psicexe=/usr/share/rost-runpsic/runNewPSIC.pl
+
+# pfam2 db
+pfam2db=/mnt/project/rost_db/data/pfam/Pfam_ls
+
+# pfam3 db
+pfam3db=/mnt/project/rost_db/data/pfam/Pfam-A.hmm
+
+# Swiss-Prot key 'index' (e.g.) for loctree
+spkeyidx=/mnt/project/rost_db/data/swissprot/keyindex_loctree.txt
+
+# default_targets: targets to make if none is given on the command line (separate targets with space)
+default_targets=all
+

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/resources/HtmlHr.html
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/resources/HtmlHr.html	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/resources/HtmlHr.html	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1 @@
+<hr>

Added: trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/resources/HtmlQuote.html
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/resources/HtmlQuote.html	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein/usr/share/predictprotein/resources/HtmlQuote.html	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,316 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Draft//EN">
+<div class="nice">
+<h2>References</h2>
+<ol>
+
+<li><a NAME="quote_pp"><strong><font SIZE="+1">PredictProtein</font>: </a> <cite>B Rost,G Yachdav and J Liu (2004) The PredictProtein Server. Nucleic Acids Research 32(Web Server issue):W321-W326.
+</cite></strong>
+<ul>
+<li> Author:	B Rost
+<li> Contact:	help at predictprotein.org
+<li> URL:		<a href="http://www.predictprotein.org">http://www.predictprotein.org</a>
+<li> Version:	10.20.04
+<li> Description:	PredictProtein is the acronym for all prediction programs run.
+</ul>
+
+
+
+<li><a NAME="quote_pp"><strong><font SIZE="+1">PROSITE</font>: </a> <cite>A Bairoch, P Bucher ∧ K Hofmann (1997) Nucleic Acids Research, 25:217-221
+</cite></strong>
+<ul>
+<li> Author:	A Bairoch, bairoch at cmu.unige.ch P Bucher ∧ K Hofmann
+<li> Contact:	bairoch at cmu.unige.ch
+<li> URL:		<a href="http://www.expasy.ch/prosite">http://www.expasy.ch/prosite</a>
+
+<li> Version:	99.07
+<li> Description:	PROSITE is a database of functional motifs. ScanProsite, finds all functional motifs in your sequence that are annotated in the ProSite db.
+
+</ul>
+<li><a NAME="quote_pp"><strong><font SIZE="+1">SEG</font>: </a> <cite>J C Wootton ∧ S Federhen (1996) Methods in Enzymology, 266:554-571
+</cite></strong>
+<ul>
+<li> Author:	J C Wootton ∧ S Federhen, wootton at ncbi.nlm.nih.gov
+<li> Contact:	help at predictprotein.org 
+<li> URL:		<a href="wootton at ncbi.nlm.nih.gov">wootton at ncbi.nlm.nih.gov</a>
+
+<li> Version:	1994
+<li> Description:	SEG divides sequences into regions of low-, and high-complexity. Low-complexity regions typically correspond to 'simple sequences' or 'compositionally-biased' regions.
+
+</ul>
+<li><a NAME="quote_pp"><strong><font SIZE="+1">ProDom</font>: </a> <cite>ELL Sonnhammer ∧ D Kahn (1994) Protein Science, 3:482-492 
+</cite></strong>
+<ul>
+<li> Author:	LL Sonnhammer; J Gouzy, F Corpet, F Servant, D Kahn, dkahn at zyx.toulouse.inra.fr
+<li> Contact:	dkahn at zyx.toulouse.inra.fr
+<li> URL:		<a href="http://protein.toulouse.inra.fr/prodom.html">http://protein.toulouse.inra.fr/prodom.html</a>
+
+<li> Version:	2000.1
+<li> Description:	ProDom is a database of putative protein domains. The database is searched with BLAST for domains corresponding to your protein.
+
+</ul>
+<li><a NAME="quote_pp"><strong><font SIZE="+1">PHD</font>: </a> <cite>B Rost (1996) Methods in Enzymology, 266:525-539
+</cite></strong>
+<ul>
+<li> Author:	B Rost
+<li> Contact:	help at predictprotein.org
+<li> URL:		<a href="http://cubic.bioc.columbia.edu">http://cubic.bioc.columbia.edu</a>
+
+<li> Version:	1.96
+<li> Description:	PHD is a suite of programs predicting 1D structure (secondary structure, solvent accessibility) from multiple sequence alignments.
+
+</ul>
+<li><a NAME="quote_pp"><strong><font SIZE="+1">PHDsec</font>: </a> <cite>B Rost ∧ C Sander (1993) J. of Molecular Biology, 232:584-599
+</cite></strong>
+<ul>
+<li> Author:	B Rost
+<li> Contact:	help at predictprotein.org
+<li> URL:		<a href="http://cubic.bioc.columbia.edu">http://cubic.bioc.columbia.edu</a>
+
+<li> Version:	1.96
+<li> Description:	PHDsec predicts secondary structure from multiple sequence alignments.
+
+</ul>
+<li><a NAME="quote_pp"><strong><font SIZE="+1">PHDacc</font>: </a> <cite>B Rost ∧ C Sander (1994) Proteins, 20:216-226
+</cite></strong>
+<ul>
+<li> Author:	B Rost
+<li> Contact:	help at predictprotein.org
+
+
+<li> Version:	1.96
+<li> Description:	PHDacc predicts per residue solvent accessibility from multiple sequence alignments.
+
+</ul>
+<li><a NAME="quote_pp"><strong><font SIZE="+1">PHDhtm</font>: </a> <cite>B Rost, P Fariselli ∧  R Casadio (1996) Protein Science, 7:1704-1718 
+</cite></strong>
+<ul>
+<li> Author:	B Rost
+<li> Contact:	help at predictprotein.org
+<li> URL:		<a href="http://cubic.bioc.columbia.edu">http://cubic.bioc.columbia.edu</a>
+
+<li> Version:	1.96
+<li> Description:	PHDhtm predicts the location and topology of transmembrane helices from multiple sequence alignments.
+
+</ul>
+<li><a NAME="quote_pp"><strong><font SIZE="+1">PROF</font>: </a> <cite>B Rost (2004) Meth. Mol. Biol., submitted.
+</cite></strong>
+<ul>
+<li> Author:	B Rost
+<li> Contact:	help at predictprotein.org
+
+
+<li> Version:	2000_04
+<li> Description:	PROF is a suite of programs predicting 1D structure (secondary structure, solvent accessibility) from multiple sequence alignments.
+
+</ul>
+<li><a NAME="quote_pp"><strong><font SIZE="+1">PROFsec</font>: </a> <cite>B Rost (2004) Meth. Mol. Biol., submitted.
+</cite></strong>
+<ul>
+<li> Author:	B Rost
+<li> Contact:	help at predictprotein.org
+<li> URL:		<a href="http://cubic.bioc.columbia.edu ">http://cubic.bioc.columbia.edu </a>
+
+<li> Version:	2000_04
+<li> Description:	PROFsec predicts secondary structure from multiple sequence alignments.
+
+</ul>
+<li><a NAME="quote_pp"><strong><font SIZE="+1">PROFACC</font>: </a> <cite>B Rost (2004) Meth. Mol. Biol., submitted.
+</cite></strong>
+<ul>
+<li> Author:	B Rost
+<li> Contact:	help at predictprotein.org
+<li> URL:		<a href="http://cubic.bioc.columbia.edu">http://cubic.bioc.columbia.edu</a>
+
+<li> Version:	2000_04
+<li> Description:	PROFacc predicts per residue solvent accessibility from multiple sequence alignments.
+
+</ul>
+<li><a NAME="quote_pp"><strong><font SIZE="+1">GLOBE</font>: </a> <cite>B Rost (1998) unpublished
+</cite></strong>
+<ul>
+<li> Author:	B Rost
+<li> Contact:	help at predictprotein.org
+<li> URL:       <a href="http://cubic.bioc.columbia.edu/papers/1999_globe/paper.html">http://cubic.bioc.columbia.edu/papers/1999_globe/paper.html</a>
+
+<li> Version:	1.98.05
+<li> Description:	GLOBE predicts the globularity of a protein
+
+</ul>
+<!--
+<li><a NAME="quote_pp"><strong><font SIZE="+1">CYSPRED</font>: </a> <cite>Fariselli P, Riccobelli P ∧ Casadio R (1999) PROTEINS 36:340-346
+</cite></strong>
+<ul>
+<li> Author:	Fariselli P, Riccobelli P, Casadio R
+<li> Contact:	piero at lipid.biocomp.unibo.it
+<li> URL:		<a href="http://prion.biocomp.unibo.it/cyspred.html">http://prion.biocomp.unibo.it/cyspred.html</a>
+
+<li> Version:	0
+<li> Description:	CYSPRED finds whether the cys residue in your protein forms disulfide bridge.
+
+</ul>
+-->
+<li><a NAME="quote_pp"><strong><font SIZE="+1">DISULFIND</font>: </a> <cite>A.Ceroni, P.Frasconi, A.Passerini and A.Vullo (2004) Bioinformatics, 20, 653-659, 2004
+</cite></strong>
+<ul>
+<li> Author:	A.Ceroni, P.Frasconi, A.Passerini and A.Vullo
+<li> Contact:	cystein at dsi.unifi.it
+<li> URL:	<a href="http://cassandra.dsi.unifi.it/cysteines/index.html">http://cassandra.dsi.unifi.it/cysteines/index.html</a>
+
+<li> Version:	2.0
+<li> Description: DISULFIND is a disulphide bridges predictor based on a two steps process.
+
+</ul>
+
+<li><a NAME="quote_pp"><strong><font SIZE="+1">A conformational switch prediction program</font>: </a> <cite>Young et al. Protein Science(1999) 8:1752-64.
+</cite></strong>
+<ul>
+<li> Author:	Young M, Kirshenbaum K, Dill KA and Highsmith S.
+<li> Contact:	mmyoung at sandia.gov, kent at cheme.caltech.edu, shighsmith at sf.uop.edu
+
+
+<li> Version:	1.0
+<li> Description:	ASP finds regions that are most likely to behave as switches in proteins known to exhibit this behavior
+
+</ul>
+<li> <a NAME="quote_pp"><strong><font SIZE="+1">HMMPFAM:</font></a> <cite>Bateman et al. Nucleic Acids Research 2004 32:D138-D141.</cite></strong>
+<ul>
+<li> Author:	Bateman A, Coin L, Durbin R, Finn RD, Hollich V, Griffiths-Jones S, Khanna A, Marshall M, Moxon S, Sonnhammer EL, Studholme DJ, Yeats C, Eddy SR. 
+<li> Contact:   agb at sanger.ac.uk
+
+<li> Version:	2.2g
+<li> Description:	Search one or more sequences against HMM database
+</ul>
+
+
+<li><a NAME="quote_pp"><strong><font SIZE="+1">DISULFIND</font>: </a> <cite>A.Ceroni, P.Frasconi, A.Passerini and A.Vullo (2004) Bioinformatics, 20, 653-659, 2004
+</cite></strong>
+<ul>
+<li> Author:	A.Ceroni, P.Frasconi, A.Passerini and A.Vullo
+<li> Contact:	cystein at dsi.unifi.it
+<li> URL:	<a href="http://cassandra.dsi.unifi.it/cysteines/index.html">http://cassandra.dsi.unifi.it/cysteines/index.html</a>
+
+<li> Version:	2.0
+<li> Description: DISULFIND is a disulphide bridges predictor based on a two steps process.
+
+</ul>
+
+
+<li><a NAME="quote_pp"><strong><font SIZE="+1">NORS</font>: </a> <cite>Liu J, Rost B (2003) NORSp: predictions of long regions without regular secondary structure. Nucleic Acids Research 31(13):3833-3835
+</cite></strong>
+<ul>
+<li> Author:	J. Liu
+<li> Contact:	Jinfeng Liu <jinfeng.liu at gmail.com>
+<li> URL:	<a href="http://cubic.bioc.columbia.edu/services/NORSp/">http://cubic.bioc.columbia.edu/services/NORSp/</a>
+
+<li> Version:	1.0
+<li> Description: NORSp is a predictor of NOn-Regular Secondary Structure.
+
+</ul>
+
+<li><a NAME="quote_pp"><strong><font SIZE="+1">CHOP</font>: </a> <cite>Liu J ∧ Rost B (2004) CHOP proteins into structural domain-like fragments. Proteins, 55(3):678-688
+</cite></strong>
+<ul>
+<li> Author:	J. Liu
+<li> Contact:	Jinfeng Liu <jinfeng.liu at gmail.com>
+<li> URL:	<a href="http://cubic.bioc.columbia.edu/services/chop/">http://cubic.bioc.columbia.edu/services/chop/</a>
+
+<li> Version:	1.0
+<li> Description: CHOP is a method of dissecting proteins into domain-like fragments based on sequence homology.
+
+</ul>
+
+<li><a NAME="quote_pp"><strong><font SIZE="+1">ISIS</font>: </a> <cite>Yanay Ofran and Burkhard Rost (2007). ISIS: Interaction Sites Identified from Sequence. Bioinformatics. 23  (2), e13-e16
+</cite></strong>
+<ul>
+<li> Author:	Y. Ofran
+<li> Contact:	Yanay Ofran <yanay at ofranlab.org>
+<li> URL:	<a href="http://cubic.bioc.columbia.edu/services/isis/index.php">http://cubic.bioc.columbia.edu/services/isis/</a>
+
+<li> Version:	1.0
+<li> Description: Prediction of residues involved in external protein-protein interactions.
+</ul>
+
+<li><a NAME="quote_pp"><strong><font SIZE="+1">DISIS</font>: </a> <cite>Yanay Ofran and Venkatesh Mysore and Burkhard Rost (2007). Prediction of DNA-binding residues from sequence. Bioinformatics. 23  (13), i347-i353
+</cite></strong>
+<ul>
+<li> Author:	Y. Ofran
+<li> Contact:	Yanay Ofran <yanay at ofranlab.org>
+<li> URL:	<a href="http://cubic.bioc.columbia.edu/services/disis/">http://cubic.bioc.columbia.edu/services/disis/</a>
+
+<li> Version:	1.0
+<li> Description: Prediction of residues involved in DNA binding.
+</ul>
+
+<li><a NAME="quote_pp"><strong><font SIZE="+1">NORSnet</font>: </a> <cite>Avner Schlessinger and Jinfeng Liu and Burkhard Rost (2007). Natively unstructured loops differ from other loops. PLoS Computational Biology. 3  (7), e140.
+</cite></strong>
+<ul>
+<li> Author:	A. Schlessinger
+<li> Contact:	Avner Schlessinger <avnersch at gmail.com>
+<li> URL:	<a href="http://predictprotein.org/">http://predictprotein.org/</a>
+
+<li> Version:	1.0.4
+<li> Description: Identifies unstructured loops from sequence.
+</ul>
+
+<li><a NAME="quote_pp"><strong><font SIZE="+1">PROFbval</font>: </a> <cite>Avner Schlessinger and Guy Yachdav and Burkhard Rost (2006). PROFbval: predict flexible and rigid residues in proteins. Bioinformatics. 22  891-893.</cite></strong>
+<ul>
+<li> Author:	A. Schlessinger
+<li> Contact:	Avner Schlessinger <avnersch at gmail.com>
+<li> URL:	<a href="http://cubic.bioc.columbia.edu/newwebsite/services/Profbval">http://cubic.bioc.columbia.edu/newwebsite/services/Profbval</a>
+
+<li> Version:	1.0.4
+<li> Description: Prediction of protein flexibility and rigidity prediction from sequence.
+</ul>
+
+<li><a NAME="quote_pp"><strong><font SIZE="+1">METADISORDER</font>: </a> <cite>A. Schlessinger and M. Punta and G. Yachdav and L. Kajan and B. Rost (2009). Improved disorder prediction by combination of orthogonal approaches. PLoS One. 4  (2), e4433.</cite></strong>
+<ul>
+<li> Author:	A. Schlessinger
+<li> Contact:	Avner Schlessinger <avnersch at gmail.com>
+<li> URL:	<a href="http://cubic.bioc.columbia.edu/newwebsite/services/md/">http://cubic.bioc.columbia.edu/newwebsite/services/md/index.php</a>
+
+<li> Version:	1.0.3
+<li> Description: Protein disorder prediction based on orthogonal sources of information.
+</ul>
+
+<li><a NAME="quote_pp"><strong><font SIZE="+1">PROFcon</font>: </a> <cite>Marco Punta and Burkhard Rost (2005). PROFcon: novel prediction of long-range contacts. Bioinformatics. 21  (13), 2960-2968</cite></strong>
+<ul>
+<li> Author:	M. Punta
+<li> Contact:	Marco Punta <punta at rostlab.org>
+<li> URL:	<a href=http://cubic.bioc.columbia.edu/services/profcon/">http://cubic.bioc.columbia.edu/services/profcon/</a>
+
+<li> Version:	1.0
+<li> Description: contact prediction method.
+</ul>
+
+<li><a NAME="quote_pp"><strong><font SIZE="+1">PROFtmb</font>: </a> <cite>Henry Bigelow and Burkhard Rost (2006). PROFtmb: a web server for predicting bacterial transmembrane beta barrel proteins. Nucleic Acids Research. 34  (Web Server issue), W186-188.</cite></strong>
+<ul>
+<li> Author:	H. Bigelow
+<li> Contact:	Henry Bigelow <hrbigelow at gmail.com>
+<li> URL:	<a href=http://cubic.bioc.columbia.edu/services/proftmb/">http://cubic.bioc.columbia.edu/services/proftmb/</a>
+
+<li> Version:	1.1.1
+<li> Description: per-residue prediction of bacterial transmembrane beta barrels.
+</ul>
+
+<li><a NAME="quote_pp"><strong><font SIZE="+1">SNAP</font>: </a> <cite>Yana Bromberg and Guy Yachdav and Burkhard Rost (2008). SNAP predicts effect of mutations on protein function. Bioinformatics. in press.</cite></strong>
+<ul>
+<li> Author:	Y. Bromberg
+<li> Contact:	Yana Bromberg <bromberg at rostlab.org>
+<li> URL:	<a href=http://cubic.bioc.columbia.edu/services/snap/">http://cubic.bioc.columbia.edu/services/snap/</a>
+
+<li> Version:	1.0.8
+<li> Description: a method for evaluating effects of single amino acid substitutions on protein function.
+</ul>
+
+<li><a NAME="quote_pp"><strong><font SIZE="+1">LOCtree</font>: </a> <cite>Rajesh Nair and Burkhard Rost (2005). Mimicking cellular sorting improves prediction of subcellular localization. Journal of Molecular Biology. 348  (1), 85-100</cite></strong>
+<ul>
+<li> Author:	R. Nair 
+<li> Contact:	Rajesh Nair <rajnair5 at gmail.com>
+<li> URL:	<a href=http://cubic.bioc.columbia.edu/services/loctree/">http://cubic.bioc.columbia.edu/services/loctree/</a>
+
+<li> Version:	 1.0.3
+<li> Description: predict the subcellular localization of proteins.
+</ul>
+
+</ol>
+</div>

Added: trunk/packages/predictprotein/trunk/debian/predictprotein.debhelper.log
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein.debhelper.log	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein.debhelper.log	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,44 @@
+dh_auto_configure
+dh_auto_build
+dh_auto_test
+dh_prep
+dh_installdirs
+dh_auto_install
+dh_install
+dh_installdocs
+dh_installchangelogs
+dh_installexamples
+dh_installman
+dh_installcatalogs
+dh_installcron
+dh_installdebconf
+dh_installemacsen
+dh_installifupdown
+dh_installinfo
+dh_installinit
+dh_installmenu
+dh_installmime
+dh_installmodules
+dh_installlogcheck
+dh_installlogrotate
+dh_installpam
+dh_installppp
+dh_installudev
+dh_installwm
+dh_installxfonts
+dh_installgsettings
+dh_bugfiles
+dh_ucf
+dh_lintian
+dh_gconf
+dh_icons
+dh_perl
+dh_usrlocal
+dh_link
+dh_compress
+dh_fixperms
+dh_installdeb
+dh_gencontrol
+dh_md5sums
+dh_builddeb
+dh_builddeb

Added: trunk/packages/predictprotein/trunk/debian/predictprotein.postrm.debhelper
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein.postrm.debhelper	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein.postrm.debhelper	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,6 @@
+# Automatically added by dh_installdebconf
+if [ "$1" = purge ] && [ -e /usr/share/debconf/confmodule ]; then
+	. /usr/share/debconf/confmodule
+	db_purge
+fi
+# End automatically added section

Added: trunk/packages/predictprotein/trunk/debian/predictprotein.substvars
===================================================================
--- trunk/packages/predictprotein/trunk/debian/predictprotein.substvars	                        (rev 0)
+++ trunk/packages/predictprotein/trunk/debian/predictprotein.substvars	2012-07-10 15:44:06 UTC (rev 11712)
@@ -0,0 +1,2 @@
+misc:Depends=debconf (>= 0.5) | debconf-2.0
+perl:Depends=perl




More information about the debian-med-commit mailing list