[med-svn] r2204 - in trunk/community/infrastructure/getData: . debian getData.d

smoe-guest at alioth.debian.org smoe-guest at alioth.debian.org
Tue Jul 8 16:05:43 UTC 2008


Author: smoe-guest
Date: 2008-07-08 16:05:42 +0000 (Tue, 08 Jul 2008)
New Revision: 2204

Added:
   trunk/community/infrastructure/getData/getData.d/
   trunk/community/infrastructure/getData/getData.d/pdb.getData
Modified:
   trunk/community/infrastructure/getData/TODO
   trunk/community/infrastructure/getData/debian/control
   trunk/community/infrastructure/getData/getData
Log:
* getData can now read its configuration from separate files.



Modified: trunk/community/infrastructure/getData/TODO
===================================================================
--- trunk/community/infrastructure/getData/TODO	2008-07-08 12:23:00 UTC (rev 2203)
+++ trunk/community/infrastructure/getData/TODO	2008-07-08 16:05:42 UTC (rev 2204)
@@ -6,11 +6,6 @@
   Some configuration file should be read it at the time the
   application is started.
 
-* allow for Debian maintainers to install additional
-  database descriptions
-
-  This could be achieved by reading in all files in some /etc/getData.d directory or so.
-
 * come up with a set of standard use cases as a start
 
 * define Debian packages and/or applications that must

Modified: trunk/community/infrastructure/getData/debian/control
===================================================================
--- trunk/community/infrastructure/getData/debian/control	2008-07-08 12:23:00 UTC (rev 2203)
+++ trunk/community/infrastructure/getData/debian/control	2008-07-08 16:05:42 UTC (rev 2204)
@@ -3,7 +3,7 @@
 Priority: optional
 Maintainer: Steffen Moeller <moeller at debian.org>
 Build-Depends: cdbs, debhelper (>= 5)
-Standards-Version: 3.7.2
+Standards-Version: 3.8.0
 Homepage: http://debian-med.alioth.debian.org
 
 Package: getdata

Modified: trunk/community/infrastructure/getData/getData
===================================================================
--- trunk/community/infrastructure/getData/getData	2008-07-08 12:23:00 UTC (rev 2203)
+++ trunk/community/infrastructure/getData/getData	2008-07-08 16:05:42 UTC (rev 2204)
@@ -1,6 +1,7 @@
 #!/usr/bin/perl -w
 
 use strict;
+use vars qw(%toBeMirrored);
 
 =head1 NAME
 
@@ -31,24 +32,30 @@
 # Or else go to GNU Web pages http://www.gnu.org and follow the white rabbit.
 #
 
-my $mirrordir;
+my ($mirrordir,$confd);
 
 my $configfile="/etc/getData.conf";
+my $error=0;
 if ( -r $configfile ) {
    open(FH,"<$configfile") or die "Could not open config data at '$configfile'.\n";
    while(<FH>) {
       next if /^\s*#/;
       next unless /\s*(\S.*\S)\s*=\s*"([^"]*)"\s*/;
       #print STDERR "Read: $1\n";
-      if ($1 eq "mirrordir") {
+      if ("mirrordir" eq "$1") {
       	$mirrordir=$2;
-	print STDERR "Set mirrordir to '$mirrordir'.\n";
+	print STDERR "Seting mirrordir to '$mirrordir'.\n";
       }
+      elsif ("confd" eq "$1") {
+        $confd=$2;
+	print STDERR "Configuration directory not existing: '$confd'\n" unless ! -d "$confd";
+	$error++;
+      }
    }
    close FH;
 }
 
-$mirrordir = "/var/lib/mirrored" unless defined($mirrordir);
+exit -1 if $error;
 
 =head1 SYNOPSIS
 
@@ -116,6 +123,10 @@
 
     Perform only the unpacking/indexing, but do not retrieve/update the databases. This option may be beneficial when the site administator is aware of current analyses that should not be disturbed by the indexing process but the downloading from the net can already be started.
 
+=item --confd <directory>
+
+    Allows for the specification of a directory in which multiple files can be stored that will be read by getData upon its invocation. These may add values to the global variable %toBeMirrored that specifies the databases and their download scripts.
+
 =item --config <system>
 
     Preparation of the configuration file that would be reuired for a particular system that deals with the database. The configuration is printed to stdout and is expected to be copied manually to the proper file or folder. One could imagine this process to be automated, though this is not yet implemented. Currently implemented systems are: emboss and dre. "dre" stands for "dynamic runtime environment", which is a concept of the ARC grid middleware of which more can be learned on http://www.nordugrid.org.
@@ -150,7 +161,7 @@
 use Getopt::Long;
 use Cwd;
 
-my %toBeMirrored = (
+%toBeMirrored = (
 
 #
 #                     A  S  T  R  O  N  O  M  Y
@@ -197,6 +208,22 @@
 		"post-download" => "unzip `find ftp.ebi.ac.uk -name '*.zip'`"
 	},
 
+	"jaspar.core" => {
+		name => "Jaspar Core Database of transcription factor binding sites",
+		source => "wget --mirror http://jaspar.genereg.net/html/DOWNLOAD/MatrixDir/JASPAR_CORE_2008/"
+		#recommends => ["TFBS"]
+	},
+
+#	"jaspar.sites" => {
+#		name => "Sites subfolder of JASPAR database",
+#		source => "wget --mirror http://jaspar.genereg.net/html/DOWNLOAD/SITES/*/*.sites"
+#	},
+#
+#	"jaspar.MatrixDir" => {
+#		name => "MatrixDir subfolder of JASPAR database",
+#		source => "wget --mirror http://jaspar.genereg.net/html/DOWNLOAD/MatrixDir/*/*.sites"
+#	}
+
 	"kegg.brite" => {
 		name => "Kegg Brite Database",
 		source => "wget --mirror ftp://ftp.genome.jp/pub/kegg/release/current/brite.tar.gz"
@@ -243,7 +270,10 @@
 		"post-download" => "d=uncompressed; if [ ! -d \$d ]; then mkdir \$d; fi; "
 				  ."rm -rf \$d/trembl.dat; "
 				  ."(find ftp.ebi.ac.uk -name '*.dat.gz' | xargs -r zcat ) > \$d/trembl.dat; "
-				  ."[ -x /usr/bin/dbxflat ] && cd \$d && dbxflat -dbresource embl -dbname trembllocal -idformat swiss -filenames=trembl.dat -fields id,acc -auto",
+				  ."[ -x /usr/bin/dbxflat ] "
+				      . "&& cd \$d && "
+				      . "dbxflat -dbresource embl -dbname trembllocal -idformat swiss -filenames=trembl.dat -fields id,acc -auto",
+		"recommends" => ["emboss"],
 		"test" => "seqret trembllocal:Q9YZN7"
 	},
 
@@ -253,8 +283,11 @@
 		"post-download" => "d=uncompressed; if [ ! -d \$d ]; then mkdir \$d; fi; "
 				  ."rm -rf \$d/swissprot.dat; "
 				  ."(find ftp.ebi.ac.uk -name '*.dat.gz' | xargs -r zcat ) > \$d/swissprot.dat; "
-				  ."[ -x /usr/bin/dbiflat ] && cd \$d && dbiflat -dbname swisslocal -fields acc,des -idformat swiss -auto",
+				  ."[ -x /usr/bin/dbiflat ] && "
+				      . "cd \$d && "
+				      . "dbiflat -dbname swisslocal -fields acc,des -idformat swiss -auto",
 	#	source => "wget --mirror ftp://ftp.ebi.ac.uk/pub/databases/swissprot/release_compressed/uniprot_sprot.dat.gz"
+		"recommends" => ["emboss"],
 		"test" => "seqret -feature swisslocal:p12345 -osf swiss -stdout -auto"
 	},
 
@@ -268,17 +301,11 @@
 		source => "wget --mirror ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz"
 	},
 
-#	"jaspar.sites" => {
-#		name => "Sites subfolder of JASPAR database",
-#		source => "wget --mirror http://jaspar.genereg.net/html/DOWNLOAD/SITES/*/*.sites"
-#	},
-#
-#	"jaspar.MatrixDir" => {
-#		name => "MatrixDir subfolder of JASPAR database",
-#		source => "wget --mirror http://jaspar.genereg.net/html/DOWNLOAD/MatrixDir/*/*.sites"
-#	}
 );
 
+$mirrordir = "/var/lib/mirrored" unless defined($mirrordir);
+$confd = "/etc/getData.d" unless defined($confd);
+
 my ($post,$source,$config,$list,$help,$man,$verbose)=(undef,undef,undef,0,0,0,0);
 
 my %options=("mirrordir:s" => \$mirrordir,
@@ -286,6 +313,7 @@
              "help"=>\$help, "man"=>\$man,
 	     "verbose"=>\$verbose,
 	     "post"=>\$post,
+	     "confd"=>\$confd,
 	     "config:s@"=>\$config,
 	     "source"=>\$source);
 
@@ -336,9 +364,24 @@
 	}
 }
 
+if ( -d "$confd" ) {
+	foreach my $f (glob "$confd/*") {
+		eval require $f or die "Could not read '$f': $@\n";
+		print STDERR "Read '$f'\n";
+	}
+}
+
+############## A C T I O N ##################
+
 if ($list) {
+
+	my $maxlength=0;
+	foreach my $db (keys %toBeMirrored) {
+		$maxlength=length($db) if length($db)>$maxlength;
+	}
+
 	foreach my $db (sort keys %toBeMirrored) {
-		printf "%-10s",$db;
+		printf "%-${maxlength}s",$db;
 		# more information like the expected size should be printed here.
 		print "\t".$toBeMirrored{$db}{"name"};
 		print "\n";
@@ -348,6 +391,7 @@
 
 die "Cannot write to root destination directory at '$mirrordir'.\n" unless ( -w "$mirrordir" or $do_config);
 
+
 #print join(", ", at ARGV)."\n";
 
 my $d=getcwd();

Added: trunk/community/infrastructure/getData/getData.d/pdb.getData
===================================================================
--- trunk/community/infrastructure/getData/getData.d/pdb.getData	                        (rev 0)
+++ trunk/community/infrastructure/getData/getData.d/pdb.getData	2008-07-08 16:05:42 UTC (rev 2204)
@@ -0,0 +1,9 @@
+
+print STDERR "Reading PDB configuration file\n";
+
+$toBeMirrored{"pdb"}={
+  "name" => "PDB - protein structure database",
+  "source" => "wget --mirror ftp://ftp.ebi.ac.uk/pub/databases/msd/pdb_uncompressed"
+};
+
+1;




More information about the debian-med-commit mailing list