[med-svn] r2129 - trunk/community/infrastructure/getData
smoe-guest at alioth.debian.org
smoe-guest at alioth.debian.org
Thu Jul 3 12:34:41 UTC 2008
Author: smoe-guest
Date: 2008-07-03 12:34:40 +0000 (Thu, 03 Jul 2008)
New Revision: 2129
Modified:
trunk/community/infrastructure/getData/getData
Log:
Working towards an automated indexing for Emboss.
Modified: trunk/community/infrastructure/getData/getData
===================================================================
--- trunk/community/infrastructure/getData/getData 2008-07-02 18:51:51 UTC (rev 2128)
+++ trunk/community/infrastructure/getData/getData 2008-07-03 12:34:40 UTC (rev 2129)
@@ -1,5 +1,7 @@
#!/usr/bin/perl -w
+use strict;
+
=head1 NAME
getData - retrieves databases from the Internet
@@ -106,6 +108,14 @@
Only those databases that are explicitly requested to be downloaded will be downloaded. Such databases may require considerable bandwidth, so please make sure you know you are doing the right thing.
+=item --post
+
+ Perform only the unpacking/indexing, but do not retrieve/update the databases. This option is considered useful when adding a new database management system to the system, e.g. after installing EMBOSS.
+
+=item --source
+
+ Perform only the unpacking/indexing, but do not retrieve/update the databases. This option may be beneficial when the site administator is aware of current analyses that should not be disturbed by the indexing process but the downloading from the net can already be started.
+
=back
=head1 EXAMPLES
@@ -208,16 +218,22 @@
source => "wget --mirror http://www.reactome.org/download/interactions.README.txt http://www.reactome.org/download/current/homo_sapiens.interactions.txt.gz"
},
-
"trembl.dat" => {
name => "UniProt - TrEMBL in EMBL format",
- source => "wget --mirror ftp://ftp.ebi.ac.uk/pub/databases/swissprot/release_compressed/uniprot_trembl.dat.gz"
- # source => "wget --mirror ftp://ftp.ebi.ac.uk/pub/databases/swissprot/release_compressed/uniprot_sprot.dat.gz"
+ source => "wget --mirror ftp://ftp.ebi.ac.uk/pub/databases/swissprot/release_compressed/uniprot_trembl.dat.gz",
+ "post-download" => "d=uncompressed; if [ ! -d \$d ]; then mkdir \$d; fi; "
+ ."rm -rf \$d/trembl.dat; "
+ ."(find ftp.ebi.ac.uk -name '*.dat.gz' | xargs -r zcat ) > \$d/swissprot.dat; "
+ ."[ -x /usr/bin/dbxflat ] && dbxflat -dbname swisslocal -fields acc,des"
},
"swiss.dat" => {
name => "UniProt - SwissProt in EMBL format",
- source => "wget --mirror ftp://ftp.ebi.ac.uk/pub/databases/swissprot/release_compressed/uniprot_sprot.dat.gz ftp://ftp.ebi.ac.uk/pub/databases/swissprot/updates_compressed/*.dat.gz"
+ source => "wget --mirror ftp://ftp.ebi.ac.uk/pub/databases/swissprot/release_compressed/uniprot_sprot.dat.gz ftp://ftp.ebi.ac.uk/pub/databases/swissprot/updates_compressed/*.dat.gz",
+ "post-download" => "d=uncompressed; if [ ! -d \$d ]; then mkdir \$d; fi; "
+ ."rm -rf \$d/swissprot.dat; "
+ ."(tfind ftp.ebi.ac.uk -name '*.dat.gz` | xargs -r zcat ) > \$d/swissprot.dat; "
+ ."[ -x /usr/bin/dbiflat ] && dbiflat -dbname swisslocal -fields acc,des"
# source => "wget --mirror ftp://ftp.ebi.ac.uk/pub/databases/swissprot/release_compressed/uniprot_sprot.dat.gz"
},
@@ -242,11 +258,19 @@
# }
);
-my ($list,$help,$man,$verbose)=(0,0,0,0);
+my ($post,$source,$list,$help,$man,$verbose)=(undef,undef,0,0,0,0);
-my %options=("mirrordir:s" => \$mirrordir, "list" => \$list,
- "help"=>\$help, "man"=>\$man, "verbose"=>\$verbose);
+my %options=("mirrordir:s" => \$mirrordir,
+ "list" => \$list,
+ "help"=>\$help, "man"=>\$man,
+ "verbose"=>\$verbose,
+ "post"=>\$post,
+ "source"=>\$source);
+
+my ($do_source,$do_post)=(1,1);
+
+
sub myhelp () {
require "Pod/Usage.pm";
import Pod::Usage;
@@ -255,9 +279,33 @@
}
-myhelp() if !GetOptions(%options) or $help or $man;
+GetOptions(%options) or die "Could not parse arguments.\n";
+die "Cannot set both --post-download-only and --source-only options.\n" if $post and $source;
+if ($help or $man) {
+ myhelp()
+}
+
+print "post: $post, source: $source\n" if $verbose;
+
+if ($post) {
+ print "disabling retrieval of database updates.\n";
+ $do_source=0;
+ $do_post=1;
+}
+if ($source) {
+ print "disabling unpacking and indexing.\n";
+ $do_post=0;
+ $do_source=1;
+}
+
+if ($verbose) {
+ foreach my $o (keys %options) {
+ print "$o => " . ${$options{$o}}."\n";
+ }
+}
+
if ($list) {
foreach my $db (sort keys %toBeMirrored) {
printf "%-10s",$db;
@@ -284,26 +332,36 @@
print "Mirroring ".$toBeMirrored{$db}{"name"}." ($db)\n";
unless ( -d "$mirrordir/$db" ) {
- print " creating directory $mirrordir/$db\n";
- mkdir("$mirrordir/$db") or die "Could not create directory \"$mirrordir/$db\"\n";
+ if ($do_source) {
+ print " creating directory $mirrordir/$db\n";
+ mkdir("$mirrordir/$db") or die "Could not create directory \"$mirrordir/$db\"\n";
+ }
+ else {
+ die "Directory '$mirrordir/$db' is not existing, no data to treat post-download,"
+ ." the download itself was disabled via the command line.\n";
+ }
}
chdir("$mirrordir/$db") or die "Could not change directory to \"$mirrordir/$db\"\n";
- my $cmd = $toBeMirrored{$db}{"source"};
- if (!defined ($cmd) or "" eq $cmd) {
- print STDERR "$db: download instructions not specified - skipping.\n";
- next;
- }
- print STDERR "$cmd\n";
- system($cmd) and die "Experienced problem.";
-
- $cmd = $toBeMirrored{$db}{"post-download"};
- if ( defined($cmd) and "" ne $cmd) {
+ if ($do_source) {
+ my $cmd = $toBeMirrored{$db}{"source"};
+ if (!defined ($cmd) or "" eq $cmd) {
+ print STDERR "$db: download instructions not specified - skipping.\n";
+ next;
+ }
print STDERR "$cmd\n";
system($cmd) and die "Experienced problem.";
}
- else {
- print STDERR "$db: No post-download command defined.\n" if $verbose;
+
+ if ($do_post) {
+ my $cmd = $toBeMirrored{$db}{"post-download"};
+ if ( defined($cmd) and "" ne $cmd) {
+ print STDERR "$cmd\n";
+ system($cmd) and die "Experienced problem.";
+ }
+ else {
+ print STDERR "$db: No post-download command defined.\n" if $verbose;
+ }
}
chdir($d) or die "Could not change back to dir '$d'.\n";
}
More information about the debian-med-commit
mailing list