[med-svn] r23294 - in trunk/packages/bioperl-run/trunk/debian: . patches
Andreas Tille
tille at moszumanska.debian.org
Fri Dec 16 12:48:07 UTC 2016
Author: tille
Date: 2016-12-16 12:48:06 +0000 (Fri, 16 Dec 2016)
New Revision: 23294
Added:
trunk/packages/bioperl-run/trunk/debian/patches/move-StandAloneBlast-and-WrapperBase-from-root-Bio.-.patch
Modified:
trunk/packages/bioperl-run/trunk/debian/changelog
trunk/packages/bioperl-run/trunk/debian/patches/series
Log:
Module Bio::Tools::Run::WrapperBase was taken over from BioPerl into bioperl-run in commit acd57a7d14112c5fd2cd979005b072efdaf57679 which is taken over in quilt patch
Modified: trunk/packages/bioperl-run/trunk/debian/changelog
===================================================================
--- trunk/packages/bioperl-run/trunk/debian/changelog 2016-12-16 09:58:28 UTC (rev 23293)
+++ trunk/packages/bioperl-run/trunk/debian/changelog 2016-12-16 12:48:06 UTC (rev 23294)
@@ -3,8 +3,9 @@
* Testsuite: autopkgtest-pkg-perl
* cme fix dpkg-control
* Enable lots of tests
- TODO: not sure whether this should remain for the package release but
- for the moment we need to verify functionality with latest bioperl
+ * Module Bio::Tools::Run::WrapperBase was taken over from BioPerl into
+ bioperl-run in commit acd57a7d14112c5fd2cd979005b072efdaf57679 which is
+ taken over in quilt patch
* Fix homepage
-- Andreas Tille <tille at debian.org> Fri, 16 Dec 2016 09:17:37 +0100
Added: trunk/packages/bioperl-run/trunk/debian/patches/move-StandAloneBlast-and-WrapperBase-from-root-Bio.-.patch
===================================================================
--- trunk/packages/bioperl-run/trunk/debian/patches/move-StandAloneBlast-and-WrapperBase-from-root-Bio.-.patch (rev 0)
+++ trunk/packages/bioperl-run/trunk/debian/patches/move-StandAloneBlast-and-WrapperBase-from-root-Bio.-.patch 2016-12-16 12:48:06 UTC (rev 23294)
@@ -0,0 +1,3430 @@
+From: "Mark A. Jensen" <maj at fortinbras.us>
+Date: Sat, 23 Aug 2014 15:56:12 -0400
+Subject: [PATCH 02/28] move StandAloneBlast* and WrapperBase* from root/Bio...
+ to root/lib/Bio...
+
+---
+ Bio/Tools/Run/StandAloneBlast.pm | 634 ------------
+ Bio/Tools/Run/StandAloneNCBIBlast.pm | 538 ----------
+ Bio/Tools/Run/StandAloneWUBlast.pm | 299 ------
+ Bio/Tools/Run/WrapperBase.pm | 511 ----------
+ Bio/Tools/Run/WrapperBase/CommandExts.pm | 1405 --------------------------
+ lib/Bio/Tools/Run/StandAloneBlast.pm | 634 ++++++++++++
+ lib/Bio/Tools/Run/StandAloneNCBIBlast.pm | 538 ++++++++++
+ lib/Bio/Tools/Run/StandAloneWUBlast.pm | 299 ++++++
+ lib/Bio/Tools/Run/WrapperBase.pm | 511 ++++++++++
+ lib/Bio/Tools/Run/WrapperBase/CommandExts.pm | 1405 ++++++++++++++++++++++++++
+ 10 files changed, 3387 insertions(+), 3387 deletions(-)
+ delete mode 100644 Bio/Tools/Run/StandAloneBlast.pm
+ delete mode 100644 Bio/Tools/Run/StandAloneNCBIBlast.pm
+ delete mode 100644 Bio/Tools/Run/StandAloneWUBlast.pm
+ delete mode 100644 Bio/Tools/Run/WrapperBase.pm
+ delete mode 100644 Bio/Tools/Run/WrapperBase/CommandExts.pm
+ create mode 100644 lib/Bio/Tools/Run/StandAloneBlast.pm
+ create mode 100644 lib/Bio/Tools/Run/StandAloneNCBIBlast.pm
+ create mode 100644 lib/Bio/Tools/Run/StandAloneWUBlast.pm
+ create mode 100644 lib/Bio/Tools/Run/WrapperBase.pm
+ create mode 100644 lib/Bio/Tools/Run/WrapperBase/CommandExts.pm
+
+--- /dev/null
++++ b/lib/Bio/Tools/Run/StandAloneBlast.pm
+@@ -0,0 +1,634 @@
++#
++# BioPerl module for Bio::Tools::Run::StandAloneBlast
++#
++# Copyright Peter Schattner
++#
++# You may distribute this module under the same terms as perl itself
++
++# POD documentation - main docs before the code
++
++=head1 NAME
++
++Bio::Tools::Run::StandAloneBlast - Object for the local execution
++of the NCBI BLAST program suite (blastall, blastpgp, bl2seq).
++There is experimental support for WU-Blast and NCBI rpsblast.
++
++=head1 SYNOPSIS
++
++ # Local-blast "factory object" creation and blast-parameter
++ # initialization:
++ @params = (-database => 'swissprot', -outfile => 'blast1.out');
++ $factory = Bio::Tools::Run::StandAloneBlast->new(@params);
++
++ # Blast a sequence against a database:
++ $str = Bio::SeqIO->new(-file=>'t/amino.fa', -format => 'Fasta');
++ $input = $str->next_seq();
++ $input2 = $str->next_seq();
++ $blast_report = $factory->blastall($input);
++
++ # Run an iterated Blast (psiblast) of a sequence against a database:
++ $factory->j(3); # 'j' is blast parameter for # of iterations
++ $factory->outfile('psiblast1.out');
++ $factory = Bio::Tools::Run::StandAloneBlast->new(@params);
++ $blast_report = $factory->blastpgp($input);
++
++ # Use blast to align 2 sequences against each other:
++ $factory = Bio::Tools::Run::StandAloneBlast->new(-outfile => 'bl2seq.out');
++ $factory->bl2seq($input, $input2);
++
++ # Experimental support for WU-Blast 2.0
++ my $factory = Bio::Tools::Run::StandAloneBlast->new(-program =>"wublastp",
++ -database =>"swissprot",
++ -e => 1e-20);
++ my $blast_report = $factory->wublast($seq);
++
++ # Experimental support for NCBI rpsblast
++ my $factory = Bio::Tools::Run::StandAloneBlast->new(-db => 'CDD/Cog',
++ -expect => 0.001);
++ $factory->F('T'); # turn on SEG filtering of query sequence
++ my $blast_report = $factory->rpsblast($seq);
++
++ # Use the experimental fast Blast parser, 'blast_pull'
++ my $factory = Bio::Tools::Run::StandAloneBlast->new(-_READMETHOD =>'blast_pull',
++ @other_params);
++
++ # Various additional options and input formats are available,
++ # see the DESCRIPTION section for details.
++
++=head1 DESCRIPTION
++
++This DESCRIPTION only documents Bio::Tools::Run::StandAloneBlast, a
++Bioperl object for running the NCBI standAlone BLAST package. Blast
++itself is a large & complex program - for more information regarding
++BLAST, please see the BLAST documentation which accompanies the BLAST
++distribution. BLAST is available from ftp://ncbi.nlm.nih.gov/blast/.
++
++A source of confusion in documenting a BLAST interface is that the
++term "program" is used in - at least - three different ways in the
++BLAST documentation. In this DESCRIPTION, "program" will refer to the
++BLAST routine set by the BLAST C<-p> parameter that can be set to blastn,
++blastp, tblastx etc. We will use the term Blast "executable" to refer
++to the various different executable files that may be called - ie.
++blastall, blastpgp or bl2seq. In addition, there are several BLAST
++capabilities, which are also referred to as "programs", and are
++implemented by using specific combinations of BLAST executables,
++programs and parameters. They will be referred by their specific
++names - eg PSIBLAST and PHIBLAST.
++
++Before running StandAloneBlast it is necessary: to install BLAST
++on your system, to edit set the environmental variable $BLASTDIR
++or your $PATH variable to point to the BLAST directory, and to
++ensure that users have execute privileges for the BLAST program.
++
++If the databases which will be searched by BLAST are located in the
++data subdirectory of the blast program directory (the default
++installation location), StandAloneBlast will find them; however,
++if the database files are located in any other location, environmental
++variable $BLASTDATADIR will need to be set to point to that directory.
++
++The use of the StandAloneBlast module is as follows: Initially, a
++local blast "factory object" is created. The constructor may be passed
++an optional array of (non-default) parameters to be used by the
++factory, eg:
++
++ @params = (-program => 'blastn', -database => 'ecoli.nt');
++ $factory = Bio::Tools::Run::StandAloneBlast->new(@params);
++
++Any parameters not explicitly set will remain as the defaults of the
++BLAST executable. Note each BLAST executable has somewhat different
++parameters and options. See the BLAST Documentation for a description
++or run the BLAST executable from the command line followed solely with
++a "-" to see a list of options and default values for that executable;
++eg E<gt>blastall -.
++
++BLAST parameters can be changed and/or examined at any time after the
++factory has been created. The program checks that any
++parameter/switch being set/read is valid. Except where specifically
++noted, StandAloneBlast uses the same single-letter, case-sensitive
++parameter names as the actual blast program. Currently no checks are
++included to verify that parameters are of the proper type (e.g. string
++or numeric) or that their values are within the proper range.
++
++As an example, to change the value of the Blast parameter 'e' ('e' is
++the parameter for expectation-value cutoff)
++
++ $expectvalue = 0.01;
++ $factory->e($expectvalue);
++
++Note that for improved script readibility one can modify the name of
++the (ncbi) BLAST parameters as desired as long as the initial letter (and
++case) of the parameter are preserved, e.g.:
++
++ $factory->expectvalue($expectvalue);
++
++Unfortunately, some of the BLAST parameters are not the single
++letter one might expect (eg "iteration round" in blastpgp is 'j').
++Again one can check by using, for example:
++
++ > blastpgp -
++
++Wublast parameters need to be complete (ie. don't truncate them to their
++first letter), but are case-insensitive.
++
++Once the factory has been created and the appropriate parameters set,
++one can call one of the supported blast executables. The input
++sequence(s) to these executables may be fasta file(s) as described in
++the BLAST documentation.
++
++ $inputfilename = 't/testquery.fa';
++ $blast_report = $factory->blastall($inputfilename);
++
++In addition, sequence input may be in the form of either a Bio::Seq
++object or (a reference to) an array of Bio::Seq objects, e.g.:
++
++ $input = Bio::Seq->new(-id => "test query",
++ -seq => "ACTACCCTTTAAATCAGTGGGGG");
++ $blast_report = $factory->blastall($input);
++
++NOTE: Use of the BPlite method has been deprecated and is no longer supported.
++
++For blastall and non-psiblast blastpgp runs, report object is a L<Bio::SearchIO>
++object, selected by the user with the parameter _READMETHOD. The leading
++underscore is needed to distinguish this option from options which are passed to
++the BLAST executable. The default parser is Bio::SearchIO::blast. In any case,
++the "raw" blast report is also available. The filename is set by the 'outfile'
++parameter and has the default value of "blastreport.out".
++
++For psiblast execution in the BLAST "jumpstart" mode, the program must
++be passed (in addition to the query sequence itself) an alignment
++containing the query sequence (in the form of a SimpleAlign object) as
++well as a "mask" specifying at what residues position-specific scoring
++matrices (PSSMs) are to used and at what residues default scoring
++matrices (eg BLOSUM) are to be used. See psiblast documentation for
++more details. The mask itself is a string of 0's and 1's which is the
++same length as each sequence in the alignment and has a "1" at
++locations where (PSSMs) are to be used and a "0" at all other
++locations. So for example:
++
++ $str = Bio::AlignIO->new(-file => "cysprot.msf",
++ -format => 'msf');
++ $aln = $str->next_aln();
++ $len = $aln->length_aln();
++ $mask = '1' x $len;
++ # simple case where PSSM's to be used at all residues
++ $report = $factory->blastpgp("cysprot1.fa", $aln, $mask);
++
++For bl2seq execution, StandAloneBlast.pm can be combined with
++AlignIO.pm to directly produce a SimpleAlign object from the alignment
++of the two sequences produced by bl2seq as in:
++
++ # Get 2 sequences
++ $str = Bio::SeqIO->new(-file=>'t/amino.fa' , -format => 'Fasta');
++ my $seq3 = $str->next_seq();
++ my $seq4 = $str->next_seq();
++
++ # Run bl2seq on them
++ $factory = Bio::Tools::Run::StandAloneBlast->new(-program => 'blastp',
++ -outfile => 'bl2seq.out');
++ my $bl2seq_report = $factory->bl2seq($seq3, $seq4);
++
++ # Use AlignIO.pm to create a SimpleAlign object from the bl2seq report
++ $str = Bio::AlignIO->new(-file=> 'bl2seq.out',-format => 'bl2seq');
++ $aln = $str->next_aln();
++
++For more examples of syntax and use of StandAloneBlast.pm, the user is
++encouraged to run the scripts standaloneblast.pl in the bioperl
++examples/tools directory and StandAloneBlast.t in the bioperl t/
++directory.
++
++=head1 FEEDBACK
++
++=head2 Mailing Lists
++
++User feedback is an integral part of the evolution of this and other
++Bioperl modules. Send your comments and suggestions preferably to one
++of the Bioperl mailing lists. Your participation is much appreciated.
++
++ bioperl-l at bioperl.org - General discussion
++ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
++
++=head2 Support
++
++Please direct usage questions or support issues to the mailing list:
++
++I<bioperl-l at bioperl.org>
++
++rather than to the module maintainer directly. Many experienced and
++reponsive experts will be able look at the problem and quickly
++address it. Please include a thorough description of the problem
++with code and data examples if at all possible.
++
++=head2 Reporting Bugs
++
++Report bugs to the Bioperl bug tracking system to help us keep track
++the bugs and their resolution. Bug reports can be submitted via
++the web:
++
++ https://github.com/bioperl/bioperl-live/issues
++
++=head1 AUTHOR - Peter Schattner
++
++Email schattner at alum.mit.edu
++
++=head1 MAINTAINER - Torsten Seemann
++
++Email torsten at infotech.monash.edu.au
++
++=head1 CONTRIBUTORS
++
++Sendu Bala bix at sendu.me.uk (reimplementation)
++
++=head1 APPENDIX
++
++The rest of the documentation details each of the object
++methods. Internal methods are usually preceded with a _
++
++=cut
++
++package Bio::Tools::Run::StandAloneBlast;
++
++use strict;
++use warnings;
++
++use Bio::Root::IO;
++use Bio::Seq;
++use Bio::SeqIO;
++use Bio::SearchIO;
++use File::Spec;
++
++use base qw(Bio::Tools::Run::WrapperBase Bio::Factory::ApplicationFactoryI);
++
++our $AUTOLOAD;
++our $DEFAULTBLASTTYPE = 'NCBI';
++our $DEFAULTREADMETHOD = 'BLAST';
++
++# If local BLAST databases are not stored in the standard
++# /data directory, the variable BLASTDATADIR will need to be
++# set explicitly
++our $DATADIR = $ENV{'BLASTDATADIR'} || $ENV{'BLASTDB'};
++if (! defined $DATADIR && defined $ENV{'BLASTDIR'}) {
++ my $dir = Bio::Root::IO->catfile($ENV{'BLASTDIR'}, 'data');
++ if (-d $dir) {
++ $DATADIR = $dir;
++ }
++ elsif ($ENV{'BLASTDIR'} =~ /bin/) {
++ $dir = $ENV{'BLASTDIR'};
++ $dir =~ s/bin/data/;
++ $DATADIR = $dir if -d $dir;
++ }
++}
++
++=head2 new
++
++ Title : new
++ Usage : my $obj = Bio::Tools::Run::StandAloneBlast->new();
++ Function: Builds a newBio::Tools::Run::StandAloneBlast object
++ Returns : Bio::Tools::Run::StandAloneNCBIBlast or StandAloneWUBlast
++ Args : -quiet => boolean # make program execution quiet
++ -_READMETHOD => 'BLAST' (default, synonym 'SearchIO') || 'blast_pull'
++ # the parsing method, case insensitive
++
++Essentially all BLAST parameters can be set via StandAloneBlast.pm.
++Some of the most commonly used parameters are listed below. All
++parameters have defaults and are optional except for -p in those programs that
++have it. For a complete listing of settable parameters, run the relevant
++executable BLAST program with the option "-" as in blastall -
++Note that the input parameters (-i, -j, -input) should not be set directly by
++you: this module sets them when you call one of the executable methods.
++
++Blastall
++
++ -p Program Name [String]
++ Input should be one of "blastp", "blastn", "blastx",
++ "tblastn", or "tblastx".
++ -d Database [String] default = nr
++ The database specified must first be formatted with formatdb.
++ Multiple database names (bracketed by quotations) will be accepted.
++ An example would be -d "nr est"
++ -e Expectation value (E) [Real] default = 10.0
++ -o BLAST report Output File [File Out] Optional,
++ default = ./blastreport.out ; set by StandAloneBlast.pm
++ -S Query strands to search against database (for blast[nx], and tblastx). 3 is both, 1 is top, 2 is bottom [Integer]
++ default = 3
++
++Blastpgp (including Psiblast)
++
++ -j is the maximum number of rounds (default 1; i.e., regular BLAST)
++ -h is the e-value threshold for including sequences in the
++ score matrix model (default 0.001)
++ -c is the "constant" used in the pseudocount formula specified in the paper (default 10)
++ -B Multiple alignment file for PSI-BLAST "jump start mode" Optional
++ -Q Output File for PSI-BLAST Matrix in ASCII [File Out] Optional
++
++rpsblast
++
++ -d Database [String] default = (none - you must specify a database)
++ The database specified must first be formatted with formatdb.
++ Multiple database names (bracketed by quotations) will be accepted.
++ An example would be -d "Cog Smart"
++ -e Expectation value (E) [Real] default = 10.0
++ -o BLAST report Output File [File Out] Optional,
++ default = ./blastreport.out ; set by StandAloneBlast.pm
++
++Bl2seq
++
++ -p Program name: blastp, blastn, blastx. For blastx 1st argument should be nucleotide [String]
++ default = blastp
++ -o alignment output file [File Out] default = stdout
++ -e Expectation value (E) [Real] default = 10.0
++ -S Query strands to search against database (blastn only). 3 is both, 1 is top, 2 is bottom [Integer]
++ default = 3
++
++WU-Blast
++
++ -p Program Name [String]
++ Input should be one of "wublastp", "wublastn", "wublastx",
++ "wutblastn", or "wutblastx".
++ -d Database [String] default = nr
++ The database specified must first be formatted with xdformat.
++ -E Expectation value (E) [Real] default = 10.0
++ -o BLAST report Output File [File Out] Optional,
++ default = ./blastreport.out ; set by StandAloneBlast.pm
++
++=cut
++
++sub new {
++ my ($caller, @args) = @_;
++ my $class = ref($caller) || $caller;
++
++ # Because of case-sensitivity issues, ncbi and wublast methods are
++ # mutually exclusive. We can't load ncbi methods if we start with wublast
++ # (and vice versa) since wublast e() and E() should be the same thing,
++ # whilst they must be different things in ncbi blast.
++ #
++ # Solution: split StandAloneBlast out into two more modules for NCBI and WU
++
++ if ($class =~ /NCBI|WU/) {
++ return $class->SUPER::new(@args);
++ }
++
++ my %args = @args;
++ my $blasttype = $DEFAULTBLASTTYPE;
++ while (my ($attr, $value) = each %args) {
++ if ($attr =~/^-?\s*program\s*$|^-?p$/) {
++ if ($value =~ /^wu*/) {
++ $blasttype = 'WU';
++ }
++ }
++ }
++
++ my $module = "Bio::Tools::Run::StandAlone${blasttype}Blast";
++ Bio::Root::Root->_load_module($module);
++ return $module->new(@args);
++}
++
++=head2 executable
++
++ Title : executable
++ Usage : my $exe = $blastfactory->executable('blastall');
++ Function: Finds the full path to the executable
++ Returns : string representing the full path to the exe
++ Args : [optional] name of executable to set path to
++ [optional] boolean flag whether or not warn when exe is not found
++
++=cut
++
++sub executable {
++ my ($self, $exename, $exe, $warn) = @_;
++ $exename = 'blastall' unless (defined $exename || $self =~ /WUBlast/);
++ $self->program_name($exename);
++
++ if( defined $exe && -x $exe ) {
++ $self->{'_pathtoexe'}->{$exename} = $exe;
++ }
++ unless( defined $self->{'_pathtoexe'}->{$exename} ) {
++ my $f = $self->program_path($exename);
++ $exe = $self->{'_pathtoexe'}->{$exename} = $f if(-e $f && -x $f );
++
++ # This is how I meant to split up these conditionals --jason
++ # if exe is null we will execute this (handle the case where
++ # PROGRAMDIR pointed to something invalid)
++ unless( $exe ) { # we didn't find it in that last conditional
++ if( ($exe = $self->io->exists_exe($exename)) && -x $exe ) {
++ $self->{'_pathtoexe'}->{$exename} = $exe;
++ }
++ else {
++ $self->warn("Cannot find executable for $exename") if $warn;
++ $self->{'_pathtoexe'}->{$exename} = undef;
++ }
++ }
++ }
++ return $self->{'_pathtoexe'}->{$exename};
++}
++
++=head2 program_dir
++
++ Title : program_dir
++ Usage : my $dir = $factory->program_dir();
++ Function: Abstract get method for dir of program.
++ Returns : string representing program directory
++ Args : none
++
++=cut
++
++sub program_dir {
++ my $self = shift;
++ $self =~ /NCBIBlast/? $ENV{'BLASTDIR'}: $ENV{'WUBLASTDIR'};
++}
++
++sub program_name {
++ my $self = shift;
++ if (@_) { $self->{program_name} = shift }
++ return $self->{program_name} || '';
++}
++
++sub program {
++ my $self = shift;
++ if( wantarray ) {
++ return ($self->executable, $self->p());
++ } else {
++ return $self->executable(@_);
++ }
++}
++
++=head2 _setinput
++
++ Title : _setinput
++ Usage : Internal function, not to be called directly
++ Function: Create input file(s) for Blast executable
++ Example :
++ Returns : name of file containing Blast data input
++ Args : Seq object reference or input file name
++
++=cut
++
++sub _setinput {
++ my ($self, $executable, $input1, $input2) = @_;
++ my ($seq, $temp, $infilename1, $infilename2,$fh ) ;
++ # If $input1 is not a reference it better be the name of a file with
++ # the sequence/ alignment data...
++ $self->io->_io_cleanup();
++
++ SWITCH: {
++ unless (ref $input1) {
++ $infilename1 = (-e $input1) ? $input1 : 0 ;
++ last SWITCH;
++ }
++
++ # $input may be an array of BioSeq objects...
++ if (ref($input1) =~ /ARRAY/i ) {
++ ($fh,$infilename1) = $self->io->tempfile();
++ $temp = Bio::SeqIO->new(-fh=> $fh, -format => 'fasta');
++ foreach $seq (@$input1) {
++ unless ($seq->isa("Bio::PrimarySeqI")) {return 0;}
++ $seq->display_id($seq->display_id);
++ $temp->write_seq($seq);
++ }
++ close $fh;
++ $fh = undef;
++ last SWITCH;
++ }
++
++ # $input may be a single BioSeq object...
++ elsif ($input1->isa("Bio::PrimarySeqI")) {
++ ($fh,$infilename1) = $self->io->tempfile();
++
++ # just in case $input1 is taken from an alignment and has spaces (ie
++ # deletions) indicated within it, we have to remove them - otherwise
++ # the BLAST programs will be unhappy
++ my $seq_string = $input1->seq();
++ $seq_string =~ s/\W+//g; # get rid of spaces in sequence
++ $input1->seq($seq_string);
++ $temp = Bio::SeqIO->new(-fh=> $fh, '-format' => 'fasta');
++ $temp->write_seq($input1);
++ close $fh;
++ undef $fh;
++ last SWITCH;
++ }
++
++ $infilename1 = 0; # Set error flag if you get here
++ }
++
++ unless ($input2) { return $infilename1; }
++
++ SWITCH2: {
++ unless (ref $input2) {
++ $infilename2 = (-e $input2) ? $input2 : 0 ;
++ last SWITCH2;
++ }
++ if ($input2->isa("Bio::PrimarySeqI") && $executable eq 'bl2seq' ) {
++ ($fh,$infilename2) = $self->io->tempfile();
++
++ $temp = Bio::SeqIO->new(-fh=> $fh, '-format' => 'Fasta');
++ $temp->write_seq($input2);
++ close $fh;
++ undef $fh;
++ last SWITCH2;
++ }
++
++ # Option for using psiblast's pre-alignment "jumpstart" feature
++ elsif ($input2->isa("Bio::SimpleAlign") && $executable eq 'blastpgp' ) {
++ # a bit of a lie since it won't be a fasta file
++ ($fh,$infilename2) = $self->io->tempfile();
++
++ # first we retrieve the "mask" that determines which residues should
++ # by scored according to their position and which should be scored
++ # using the non-position-specific matrices
++ my @mask = split("", shift ); # get mask
++
++ # then we have to convert all the residues in every sequence to upper
++ # case at the positions that we want psiblast to use position specific
++ # scoring
++ foreach $seq ( $input2->each_seq() ) {
++ my @seqstringlist = split("",$seq->seq());
++ for (my $i = 0; $i < scalar(@mask); $i++) {
++ unless ( $seqstringlist[$i] =~ /[a-zA-Z]/ ) {next}
++ $seqstringlist[$i] = $mask[$i] ? uc $seqstringlist[$i]: lc $seqstringlist[$i] ;
++ }
++ my $newseqstring = join("", @seqstringlist);
++ $seq->seq($newseqstring);
++ }
++
++ # Now we need to write out the alignment to a file
++ # in the "psi format" which psiblast is expecting
++ $input2->map_chars('\.','-');
++ $temp = Bio::AlignIO->new(-fh=> $fh, '-format' => 'psi');
++ $temp->write_aln($input2);
++ close $fh;
++ undef $fh;
++ last SWITCH2;
++ }
++
++ $infilename2 = 0; # Set error flag if you get here
++ }
++
++ return ($infilename1, $infilename2);
++}
++
++=head1 Bio::Tools::Run::WrapperBase methods
++
++=cut
++
++=head2 no_param_checks
++
++ Title : no_param_checks
++ Usage : $obj->no_param_checks($newval)
++ Function: Boolean flag as to whether or not we should
++ trust the sanity checks for parameter values
++ Returns : value of no_param_checks
++ Args : newvalue (optional)
++
++=cut
++
++=head2 save_tempfiles
++
++ Title : save_tempfiles
++ Usage : $obj->save_tempfiles($newval)
++ Function:
++ Returns : value of save_tempfiles
++ Args : newvalue (optional)
++
++=cut
++
++=head2 outfile_name
++
++ Title : outfile_name
++ Usage : my $outfile = $tcoffee->outfile_name();
++ Function: Get/Set the name of the output file for this run
++ (if you wanted to do something special)
++ Returns : string
++ Args : [optional] string to set value to
++
++=cut
++
++=head2 tempdir
++
++ Title : tempdir
++ Usage : my $tmpdir = $self->tempdir();
++ Function: Retrieve a temporary directory name (which is created)
++ Returns : string which is the name of the temporary directory
++ Args : none
++
++=cut
++
++=head2 cleanup
++
++ Title : cleanup
++ Usage : $tcoffee->cleanup();
++ Function: Will cleanup the tempdir directory after a PAML run
++ Returns : none
++ Args : none
++
++=cut
++
++=head2 io
++
++ Title : io
++ Usage : $obj->io($newval)
++ Function: Gets a Bio::Root::IO object
++ Returns : Bio::Root::IO
++ Args : none
++
++=cut
++
++1;
+--- /dev/null
++++ b/lib/Bio/Tools/Run/StandAloneNCBIBlast.pm
+@@ -0,0 +1,538 @@
++#
++# BioPerl module for Bio::Tools::Run::StandAloneBlast
++#
++# Copyright Peter Schattner
++#
++# You may distribute this module under the same terms as perl itself
++
++# POD documentation - main docs before the code
++
++=head1 NAME
++
++Bio::Tools::Run::StandAloneNCBIBlast - Object for the local execution
++of the NCBI BLAST program suite (blastall, blastpgp, bl2seq). With
++experimental support for NCBI rpsblast.
++
++=head1 SYNOPSIS
++
++ # Do not use directly; see Bio::Tools::Run::StandAloneBlast
++
++=head1 DESCRIPTION
++
++See Bio::Tools::Run::StandAloneBlast
++
++=head1 FEEDBACK
++
++=head2 Mailing Lists
++
++User feedback is an integral part of the evolution of this and other
++Bioperl modules. Send your comments and suggestions preferably to one
++of the Bioperl mailing lists. Your participation is much appreciated.
++
++ bioperl-l at bioperl.org - General discussion
++ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
++
++=head2 Support
++
++Please direct usage questions or support issues to the mailing list:
++
++I<bioperl-l at bioperl.org>
++
++rather than to the module maintainer directly. Many experienced and
++reponsive experts will be able look at the problem and quickly
++address it. Please include a thorough description of the problem
++with code and data examples if at all possible.
++
++=head2 Reporting Bugs
++
++Report bugs to the Bioperl bug tracking system to help us keep track
++the bugs and their resolution. Bug reports can be submitted via
++the web:
++
++ https://github.com/bioperl/bioperl-live/issues
++
++=head1 AUTHOR - Peter Schattner
++
++Email schattner at alum.mit.edu
++
++=head1 MAINTAINER - Torsten Seemann
++
++Email torsten at infotech.monash.edu.au
++
++=head1 CONTRIBUTORS
++
++Sendu Bala bix at sendu.me.uk (reimplementation)
++
++=head1 APPENDIX
++
++The rest of the documentation details each of the object
++methods. Internal methods are usually preceded with a _
++
++=cut
++
++package Bio::Tools::Run::StandAloneNCBIBlast;
++
++use strict;
++use warnings;
++
++use base qw(Bio::Tools::Run::StandAloneBlast);
++
++our $AUTOLOAD;
++our $DEFAULTREADMETHOD = 'BLAST';
++
++# If local BLAST databases are not stored in the standard
++# /data directory, the variable BLASTDATADIR will need to be
++# set explicitly
++our $DATADIR = $Bio::Tools::Run::StandAloneBlast::DATADIR;
++
++our %GENERAL_PARAMS = (i => 'input',
++ o => 'outfile',
++ p => 'program',
++ d => 'database');
++our @BLASTALL_PARAMS = qw(A B C D E F G K L M O P Q R S W X Y Z a b e f l m q r t v w y z n);
++our @BLASTALL_SWITCH = qw(I g J T U n V s);
++our @BLASTPGP_PARAMS = qw(A B C E F G H I J K L M N O P Q R S T U W X Y Z a b c e f h j k l m q s t u v y z);
++our @RPSBLAST_PARAMS = qw(F I J L N O P T U V X Y Z a b e l m v y z);
++our @BL2SEQ_PARAMS = qw(A D E F G I J M S T U V W X Y a e g j m q r t);
++
++our @OTHER_PARAMS = qw(_READMETHOD);
++
++
++=head2 new
++
++ Title : new
++ Usage : my $obj = Bio::Tools::Run::StandAloneBlast->new();
++ Function: Builds a newBio::Tools::Run::StandAloneBlast object
++ Returns : Bio::Tools::Run::StandAloneBlast
++ Args : -quiet => boolean # make program execution quiet
++ -_READMETHOD => 'BLAST' (default, synonym 'SearchIO') || 'blast_pull'
++ # the parsing method, case insensitive
++
++Essentially all BLAST parameters can be set via StandAloneBlast.pm.
++Some of the most commonly used parameters are listed below. All
++parameters have defaults and are optional except for -p in those programs that
++have it. For a complete listing of settable parameters, run the relevant
++executable BLAST program with the option "-" as in blastall -
++Note that the input parameters (-i, -j, -input) should not be set directly by
++you: this module sets them when you call one of the executable methods.
++
++Blastall
++
++ -p Program Name [String]
++ Input should be one of "blastp", "blastn", "blastx",
++ "tblastn", or "tblastx".
++ -d Database [String] default = nr
++ The database specified must first be formatted with formatdb.
++ Multiple database names (bracketed by quotations) will be accepted.
++ An example would be -d "nr est"
++ -e Expectation value (E) [Real] default = 10.0
++ -o BLAST report Output File [File Out] Optional,
++ default = ./blastreport.out ; set by StandAloneBlast.pm
++ -S Query strands to search against database (for blast[nx], and tblastx). 3 is both, 1 is top, 2 is bottom [Integer]
++ default = 3
++
++Blastpgp (including Psiblast)
++
++ -j is the maximum number of rounds (default 1; i.e., regular BLAST)
++ -h is the e-value threshold for including sequences in the
++ score matrix model (default 0.001)
++ -c is the "constant" used in the pseudocount formula specified in the paper (default 10)
++ -B Multiple alignment file for PSI-BLAST "jump start mode" Optional
++ -Q Output File for PSI-BLAST Matrix in ASCII [File Out] Optional
++
++rpsblast
++
++ -d Database [String] default = (none - you must specify a database)
++ The database specified must first be formatted with formatdb.
++ Multiple database names (bracketed by quotations) will be accepted.
++ An example would be -d "Cog Smart"
++ -e Expectation value (E) [Real] default = 10.0
++ -o BLAST report Output File [File Out] Optional,
++ default = ./blastreport.out ; set by StandAloneBlast.pm
++
++Bl2seq
++
++ -p Program name: blastp, blastn, blastx. For blastx 1st argument should be nucleotide [String]
++ default = blastp
++ -o alignment output file [File Out] default = stdout
++ -e Expectation value (E) [Real] default = 10.0
++ -S Query strands to search against database (blastn only). 3 is both, 1 is top, 2 is bottom [Integer]
++ default = 3
++
++=cut
++
++sub new {
++ my ($caller, @args) = @_;
++ my $self = $caller->SUPER::new(@args);
++
++ # StandAloneBlast is special in that "one can modify the name of
++ # the (ncbi) BLAST parameters as desired as long as the initial letter (and
++ # case) of the parameter are preserved". We handle this by truncating input
++ # args to their first char
++ my %args = @args;
++ @args = ();
++ while (my ($attr, $value) = each %args) {
++ $attr =~ s/^-//;
++ $attr = substr($attr, 0, 1) unless $attr =~ /^_/;
++ push(@args, $attr, $value);
++ }
++
++ $self->_set_from_args(\@args, -methods => {(map { $_ => $GENERAL_PARAMS{$_} } keys %GENERAL_PARAMS),
++ (map { $_ => $_ } (@OTHER_PARAMS,
++ @BLASTALL_PARAMS,
++ @BLASTALL_SWITCH,
++ @BLASTPGP_PARAMS,
++ @RPSBLAST_PARAMS,
++ @BL2SEQ_PARAMS))},
++ -code => { map { $_ => 'my $self = shift;
++ if (@_) {
++ my $value = shift;
++ if ($value && $value ne \'F\') {
++ $value = \'T\';
++ }
++ else {
++ $value = \'F\';
++ }
++ $self->{\'_\'.$method} = $value;
++ }
++ return $self->{\'_\'.$method} || return;' } @BLASTALL_SWITCH }, # these methods can take boolean or 'T' and 'F'
++ -create => 1,
++ -force => 1,
++ -case_sensitive => 1);
++
++ my ($tfh, $tempfile) = $self->io->tempfile();
++ my $outfile = $self->o || $self->outfile || $tempfile;
++ $self->o($outfile);
++ close($tfh);
++
++ $self->_READMETHOD($DEFAULTREADMETHOD) unless $self->_READMETHOD;
++
++ return $self;
++}
++
++# StandAloneBlast is special in that "one can modify the name of
++# the (ncbi) BLAST parameters as desired as long as the initial letter (and
++# case) of the parameter are preserved". We handle this with AUTOLOAD
++# redirecting to the automatically created methods from _set_from_args() !
++sub AUTOLOAD {
++ my $self = shift;
++ my $attr = $AUTOLOAD;
++ $attr =~ s/.*:://;
++
++ my $orig = $attr;
++
++ $attr = substr($attr, 0, 1);
++
++ $self->can($attr) || $self->throw("Unallowed parameter: $orig !");
++
++ return $self->$attr(@_);
++}
++
++=head2 blastall
++
++ Title : blastall
++ Usage : $blast_report = $factory->blastall('t/testquery.fa');
++ or
++ $input = Bio::Seq->new(-id=>"test query",
++ -seq=>"ACTACCCTTTAAATCAGTGGGGG");
++ $blast_report = $factory->blastall($input);
++ or
++ $seq_array_ref = \@seq_array;
++ # where @seq_array is an array of Bio::Seq objects
++ $blast_report = $factory->blastall($seq_array_ref);
++ Returns : Reference to a Blast object containing the blast report.
++ Args : Name of a file or Bio::Seq object or an array of
++ Bio::Seq object containing the query sequence(s).
++ Throws an exception if argument is not either a string
++ (eg a filename) or a reference to a Bio::Seq object
++ (or to an array of Seq objects). If argument is string,
++ throws exception if file corresponding to string name can
++ not be found.
++
++=cut
++
++sub blastall {
++ my ($self, $input1) = @_;
++ $self->io->_io_cleanup();
++ my $executable = 'blastall';
++
++ # Create input file pointer
++ my $infilename1 = $self->_setinput($executable, $input1) || $self->throw("$input1 not Bio::Seq object or array of Bio::Seq objects or file name!");
++ $self->i($infilename1);
++
++ my $blast_report = $self->_generic_local_blast($executable);
++}
++
++=head2 blastpgp
++
++ Title : blastpgp
++ Usage : $blast_report = $factory-> blastpgp('t/testquery.fa');
++ or
++ $input = Bio::Seq->new(-id=>"test query",
++ -seq=>"ACTADDEEQQPPTCADEEQQQVVGG");
++ $blast_report = $factory->blastpgp ($input);
++ or
++ $seq_array_ref = \@seq_array;
++ # where @seq_array is an array of Bio::Seq objects
++ $blast_report = $factory-> blastpgp(\@seq_array);
++ Returns : Reference to a Bio::SearchIO object containing the blast report
++ Args : Name of a file or Bio::Seq object. In psiblast jumpstart
++ mode two additional arguments are required: a SimpleAlign
++ object one of whose elements is the query and a "mask" to
++ determine how BLAST should select scoring matrices see
++ DESCRIPTION above for more details.
++
++ Throws an exception if argument is not either a string
++ (eg a filename) or a reference to a Bio::Seq object
++ (or to an array of Seq objects). If argument is string,
++ throws exception if file corresponding to string name can
++ not be found.
++ Returns : Reference to Bio::SearchIO object containing the blast report.
++
++=cut
++
++sub blastpgp {
++ my $self = shift;
++ my $executable = 'blastpgp';
++ my $input1 = shift;
++ my $input2 = shift;
++ # used by blastpgp's -B option to specify which
++ # residues are position aligned
++ my $mask = shift;
++
++ my ($infilename1, $infilename2 ) = $self->_setinput($executable,
++ $input1, $input2,
++ $mask);
++ if (!$infilename1) {$self->throw("$input1 not Bio::Seq object or array of Bio::Seq objects or file name!");}
++ $self->i($infilename1); # set file name of sequence to be blasted to inputfilename1 (-i param of blastpgp)
++ if ($input2) {
++ unless ($infilename2) {$self->throw("$input2 not SimpleAlign Object in pre-aligned psiblast\n");}
++ $self->B($infilename2); # set file name of partial alignment to inputfilename2 (-B param of blastpgp)
++ }
++
++ my $blast_report = $self->_generic_local_blast($executable);
++}
++
++=head2 rpsblast
++
++ Title : rpsblast
++ Usage : $blast_report = $factory->rpsblast('t/testquery.fa');
++ or
++ $input = Bio::Seq->new(-id=>"test query",
++ -seq=>"MVVLCRADDEEQQPPTCADEEQQQVVGG");
++ $blast_report = $factory->rpsblast($input);
++ or
++ $seq_array_ref = \@seq_array;
++ # where @seq_array is an array of Bio::Seq objects
++ $blast_report = $factory->rpsblast(\@seq_array);
++ Args : Name of a file or Bio::Seq object or an array of
++ Bio::Seq object containing the query sequence(s).
++ Throws an exception if argument is not either a string
++ (eg a filename) or a reference to a Bio::Seq object
++ (or to an array of Seq objects). If argument is string,
++ throws exception if file corresponding to string name can
++ not be found.
++ Returns : Reference to a Bio::SearchIO object containing the blast report
++
++=cut
++
++sub rpsblast {
++ my ($self, $input1) = @_;
++ $self->io->_io_cleanup();
++ my $executable = 'rpsblast';
++
++ # Create input file pointer
++ my $infilename1 = $self->_setinput($executable, $input1) || $self->throw("$input1 not Bio::Seq object or array of Bio::Seq objects or file name!");
++ $self->i($infilename1);
++
++ my $blast_report = $self->_generic_local_blast($executable);
++}
++
++=head2 bl2seq
++
++ Title : bl2seq
++ Usage : $factory-> bl2seq('t/seq1.fa', 't/seq2.fa');
++ or
++ $input1 = Bio::Seq->new(-id=>"test query1",
++ -seq=>"ACTADDEEQQPPTCADEEQQQVVGG");
++ $input2 = Bio::Seq->new(-id=>"test query2",
++ -seq=>"ACTADDEMMMMMMMDEEQQQVVGG");
++ $blast_report = $factory->bl2seq ($input1, $input2);
++ Returns : Reference to a BPbl2seq object containing the blast report.
++ Args : Names of 2 files or 2 Bio::Seq objects containing the
++ sequences to be aligned by bl2seq.
++
++ Throws an exception if argument is not either a pair of
++ strings (eg filenames) or references to Bio::Seq objects.
++ If arguments are strings, throws exception if files
++ corresponding to string names can not be found.
++
++=cut
++
++sub bl2seq {
++ my $self = shift;
++ my $executable = 'bl2seq';
++ my $input1 = shift;
++ my $input2 = shift;
++
++ # Create input file pointer
++ my ($infilename1, $infilename2 ) = $self->_setinput($executable,
++ $input1, $input2);
++ if (!$infilename1){$self->throw(" $input1 not Seq Object or file name!");}
++ if (!$infilename2){$self->throw("$input2 not Seq Object or file name!");}
++
++ $self->i($infilename1); # set file name of first sequence to
++ # be aligned to inputfilename1
++ # (-i param of bl2seq)
++ $self->j($infilename2); # set file name of first sequence to
++ # be aligned to inputfilename2
++ # (-j param of bl2seq)
++
++ my $blast_report = $self->_generic_local_blast($executable);
++}
++
++=head2 _generic_local_blast
++
++ Title : _generic_local_blast
++ Usage : internal function not called directly
++ Returns : Bio::SearchIO
++ Args : Reference to calling object and name of BLAST executable
++
++=cut
++
++sub _generic_local_blast {
++ my $self = shift;
++ my $executable = shift;
++
++ # Create parameter string to pass to Blast program
++ my $param_string = $self->_setparams($executable);
++
++ # run Blast
++ my $blast_report = $self->_runblast($executable, $param_string);
++}
++
++=head2 _runblast
++
++ Title : _runblast
++ Usage : Internal function, not to be called directly
++ Function: makes actual system call to Blast program
++ Example :
++ Returns : Report Bio::SearchIO object in the appropriate format
++ Args : Reference to calling object, name of BLAST executable,
++ and parameter string for executable
++
++=cut
++
++sub _runblast {
++ my ($self, $executable, $param_string) = @_;
++ my ($blast_obj, $exe);
++ if (! ($exe = $self->executable($executable)) ) {
++ $self->warn("cannot find path to $executable");
++ return;
++ }
++
++ # Use double quotes if executable path have empty spaces
++ if ($exe =~ m/ /) {
++ $exe = "\"$exe\"";
++ }
++ my $commandstring = $exe.$param_string;
++
++ $self->debug("$commandstring\n");
++ system($commandstring) && $self->throw("$executable call crashed: $? | $! | $commandstring\n");
++
++ # set significance cutoff to set expectation value or default value
++ # (may want to make this value vary for different executables)
++ my $signif = $self->e() || 1e-5;
++
++ # get outputfilename
++ my $outfile = $self->o();
++
++ # this should allow any blast SearchIO parser (not just 'blast_pull' or 'blast',
++ # but 'blastxml' and 'blasttable'). Fall back to 'blast' if not stipulated.
++ my $method = $self->_READMETHOD;
++ if ($method =~ /^(?:blast|SearchIO)/i ) {
++ $method = 'blast' if $method =~ m{SearchIO}i;
++ $blast_obj = Bio::SearchIO->new(-file => $outfile,
++ -format => $method);
++ }
++ # should these be here? They have been deprecated...
++ elsif ($method =~ /BPlite/i ) {
++ if ($executable =~ /bl2seq/i) {
++ # Added program info so BPbl2seq can compute strand info
++ $self->throw("Use of Bio::Tools::BPbl2seq is deprecated; use Bio::SearchIO modules instead");
++ }
++ elsif ($executable =~ /blastpgp/i && defined $self->j() && $self->j() > 1) {
++ $self->throw("Use of Bio::Tools::BPpsilite is deprecated; use Bio::SearchIO modules instead");
++ }
++ elsif ($executable =~ /blastall|rpsblast/i) {
++ $self->throw("Use of Bio::Tools::BPlite is deprecated; use Bio::SearchIO modules instead");
++ }
++ else {
++ $self->warn("Unrecognized executable $executable");
++ }
++ }
++ else {
++ $self->warn("Unrecognized readmethod $method");
++ }
++
++ return $blast_obj;
++}
++
++=head2 _setparams
++
++ Title : _setparams
++ Usage : Internal function, not to be called directly
++ Function: Create parameter inputs for Blast program
++ Example :
++ Returns : parameter string to be passed to Blast
++ Args : Reference to calling object and name of BLAST executable
++
++=cut
++
++sub _setparams {
++ my ($self, $executable) = @_;
++ my ($attr, $value, @execparams);
++
++ if ($executable eq 'blastall') { @execparams = (@BLASTALL_PARAMS,
++ @BLASTALL_SWITCH); }
++ elsif ($executable eq 'blastpgp') { @execparams = @BLASTPGP_PARAMS; }
++ elsif ($executable eq 'rpsblast') { @execparams = @RPSBLAST_PARAMS; }
++ elsif ($executable eq 'bl2seq' ) { @execparams = @BL2SEQ_PARAMS; }
++
++ # we also have all the general params
++ push(@execparams, keys %GENERAL_PARAMS);
++
++ my $database = $self->d;
++ if ($database && $executable ne 'bl2seq') {
++ # Need to prepend datadirectory to database name
++ my @dbs = split(/ /, $database);
++ for my $i (0..$#dbs) {
++ # (works with multiple databases)
++ if (! (-e $dbs[$i].".nin" || -e $dbs[$i].".pin") &&
++ ! (-e $dbs[$i].".nal" || -e $dbs[$i].".pal") ) {
++ $dbs[$i] = File::Spec->catdir($DATADIR, $dbs[$i]);
++ }
++ }
++ $self->d('"'.join(" ", @dbs).'"');
++ }
++
++ # workaround for problems with shell metacharacters [bug 2707]
++ # simply quoting does not always work!
++ my $tmp = $self->o;
++ $self->o(quotemeta($tmp)) if ($tmp && $^O !~ /^MSWin/);
++
++ my $param_string = $self->SUPER::_setparams(-params => [@execparams],
++ -dash => 1);
++
++ $self->o($tmp) if ($tmp && $^O !~ /^MSWin/);
++
++ $self->d($database) if $database;
++
++ if ($self->quiet()) {
++ $param_string .= ' 2> '.File::Spec->devnull;
++ }
++
++ return $param_string;
++}
++
++1;
+--- /dev/null
++++ b/lib/Bio/Tools/Run/StandAloneWUBlast.pm
+@@ -0,0 +1,299 @@
++#
++# BioPerl module for Bio::Tools::Run::StandAloneBlast
++#
++# Copyright Peter Schattner
++#
++# You may distribute this module under the same terms as perl itself
++
++# POD documentation - main docs before the code
++
++=head1 NAME
++
++Bio::Tools::Run::StandAloneWUBlast - Object for the local execution
++of WU-Blast.
++
++=head1 SYNOPSIS
++
++ # Do not use directly; use Bio::Tools::Run::StandAloneBlast
++
++=head1 DESCRIPTION
++
++See Bio::Tools::Run::StandAloneBlast
++
++=head1 FEEDBACK
++
++=head2 Mailing Lists
++
++User feedback is an integral part of the evolution of this and other
++Bioperl modules. Send your comments and suggestions preferably to one
++of the Bioperl mailing lists. Your participation is much appreciated.
++
++ bioperl-l at bioperl.org - General discussion
++ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
++
++=head2 Support
++
++Please direct usage questions or support issues to the mailing list:
++
++I<bioperl-l at bioperl.org>
++
++rather than to the module maintainer directly. Many experienced and
++reponsive experts will be able look at the problem and quickly
++address it. Please include a thorough description of the problem
++with code and data examples if at all possible.
++
++=head2 Reporting Bugs
++
++Report bugs to the Bioperl bug tracking system to help us keep track
++the bugs and their resolution. Bug reports can be submitted via
++the web:
++
++ https://github.com/bioperl/bioperl-live/issues
++
++=head1 AUTHOR - Peter Schattner
++
++Email schattner at alum.mit.edu
++
++=head1 MAINTAINER - Torsten Seemann
++
++Email torsten at infotech.monash.edu.au
++
++=head1 CONTRIBUTORS
++
++Sendu Bala bix at sendu.me.uk (reimplementation)
++
++=head1 APPENDIX
++
++The rest of the documentation details each of the object
++methods. Internal methods are usually preceded with a _
++
++=cut
++
++package Bio::Tools::Run::StandAloneWUBlast;
++
++use strict;
++
++use base qw(Bio::Tools::Run::StandAloneBlast);
++
++our $AUTOLOAD;
++our $DEFAULTREADMETHOD = 'BLAST';
++
++# If local BLAST databases are not stored in the standard
++# /data directory, the variable BLASTDATADIR will need to be
++# set explicitly
++our $DATADIR = $Bio::Tools::Run::StandAloneBlast::DATADIR;
++
++our %GENERAL_PARAMS = (i => 'input',
++ o => 'outfile',
++ p => 'program',
++ d => 'database');
++our @WUBLAST_PARAMS = qw(e s e2 s2 w t x m y z l k h v b q r
++ matrix filter wordmask filter maskextra hitdist wink ctxfactor gape
++ gaps gape2 gaps2 gapw gapx olf golf olmax golmax gapdecayrate
++ topcombon topcomboe sumstatsmethod hspsepqmax hspsepsmax gapsepqmax
++ gapsepsmax altscore hspmax gspmax qoffset nwstart nwlen qrecmin qrecmax
++ dbrecmin dbrecmax vdbdescmax dbchunks sort_by_pvalue cpus putenv
++ getenv progress);
++our @WUBLAST_SWITCH = qw(kap sump poissonp lcfilter lcmask echofilter
++ stats nogap gapall pingpong nosegs postsw span2 span1 span prune
++ consistency links ucdb gi noseqs qtype qres sort_by_pvalue
++ sort_by_count sort_by_highscore sort_by_totalscore
++ sort_by_subjectlength mmio nonnegok novalidctxok shortqueryok notes
++ warnings errors endputenv getenv endgetenv abortonerror abortonfatal);
++
++our @OTHER_PARAMS = qw(_READMETHOD);
++
++
++=head2 new
++
++ Title : new
++ Usage : my $obj = Bio::Tools::Run::StandAloneBlast->new();
++ Function: Builds a newBio::Tools::Run::StandAloneBlast object
++ Returns : Bio::Tools::Run::StandAloneBlast
++ Args : -quiet => boolean # make program execution quiet
++ -_READMETHOD => 'BLAST' (default, synonym 'SearchIO') || 'blast_pull'
++ # the parsing method, case insensitive
++
++Essentially all BLAST parameters can be set via StandAloneBlast.pm.
++Some of the most commonly used parameters are listed below. All
++parameters have defaults and are optional except for -p.
++
++ -p Program Name [String]
++ Input should be one of "wublastp", "wublastn", "wublastx",
++ "wutblastn", or "wutblastx".
++ -d Database [String] default = nr
++ The database specified must first be formatted with xdformat.
++ -E Expectation value (E) [Real] default = 10.0
++ -o BLAST report Output File [File Out] Optional,
++ default = ./blastreport.out ; set by StandAloneBlast.pm
++
++=cut
++
++sub new {
++ my ($caller, @args) = @_;
++ my $self = $caller->SUPER::new(@args);
++
++ $self->_set_from_args(\@args, -methods => {(map { $_ => $GENERAL_PARAMS{$_} } keys %GENERAL_PARAMS),
++ (map { $_ => $_ } (@OTHER_PARAMS,
++ @WUBLAST_PARAMS,
++ @WUBLAST_SWITCH))},
++ -create => 1,
++ -force => 1);
++
++ my ($tfh, $tempfile) = $self->io->tempfile();
++ my $outfile = $self->o || $self->outfile || $tempfile;
++ $self->o($outfile);
++ close($tfh);
++
++ $self->_READMETHOD($DEFAULTREADMETHOD) unless $self->_READMETHOD;
++
++ return $self;
++}
++
++# We let get/setter method names be case-insensitve
++sub AUTOLOAD {
++ my $self = shift;
++ my $attr = $AUTOLOAD;
++ $attr =~ s/.*:://;
++
++ my $orig = $attr;
++
++ $attr = lc($attr);
++
++ $self->can($attr) || $self->throw("Unallowed parameter: $orig !");
++
++ return $self->$attr(@_);
++}
++
++=head2 wublast
++
++ Title : wublast
++ Usage : $blast_report = $factory->wublast('t/testquery.fa');
++ or
++ $input = Bio::Seq->new(-id=>"test query",
++ -seq=>"ACTACCCTTTAAATCAGTGGGGG");
++ $blast_report = $factory->wublast($input);
++ or
++ $seq_array_ref = \@seq_array; # where @seq_array is an array of Bio::Seq objects
++ $blast_report = $factory->wublast(\@seq_array);
++ Returns : Reference to a Blast object
++ Args : Name of a file or Bio::Seq object or an array of
++ Bio::Seq object containing the query sequence(s).
++ Throws an exception if argument is not either a string
++ (eg a filename) or a reference to a Bio::Seq object
++ (or to an array of Seq objects). If argument is string,
++ throws exception if file corresponding to string name can
++ not be found.
++
++=cut
++
++sub wublast {
++ my ($self, $input1) = @_;
++ $self->io->_io_cleanup();
++ my $executable = 'wublast';
++
++ # Create input file pointer
++ my $infilename1 = $self->_setinput($executable, $input1) || $self->throw("$input1 not Bio::Seq object or array of Bio::Seq objects or file name!");
++ $self->i($infilename1);
++
++ my $blast_report = $self->_generic_local_wublast($executable);
++}
++
++=head2 _generic_local_wublast
++
++ Title : _generic_local_wublast
++ Usage : internal function not called directly
++ Returns : Blast object
++ Args : Reference to calling object and name of BLAST executable
++
++=cut
++
++sub _generic_local_wublast {
++ my $self = shift;
++ my $executable = shift;
++
++ # Create parameter string to pass to Blast program
++ my $param_string = $self->_setparams($executable);
++ $param_string = " ".$self->database." ".$self->input." ".$param_string;
++
++ # run Blast
++ my $blast_report = $self->_runwublast($executable, $param_string);
++}
++
++=head2 _runwublast
++
++ Title : _runwublast
++ Usage : Internal function, not to be called directly
++ Function: makes actual system call to WU-Blast program
++ Example :
++ Returns : Report Blast object
++ Args : Reference to calling object, name of BLAST executable,
++ and parameter string for executable
++
++=cut
++
++sub _runwublast {
++ my ($self, $executable, $param_string) = @_;
++ my ($blast_obj, $exe);
++ if (! ($exe = $self->executable($self->p))){
++ $self->warn("cannot find path to $executable");
++ return;
++ }
++
++ # Use double quotes if executable path have empty spaces
++ if ($exe =~ m/ /) {
++ $exe = "\"$exe\"";
++ }
++ my $commandstring = $exe.$param_string;
++
++ $self->debug("$commandstring\n");
++ system($commandstring) && $self->throw("$executable call crashed: $? | $! | $commandstring\n");
++
++ # get outputfilename
++ my $outfile = $self->o();
++ $blast_obj = Bio::SearchIO->new(-file => $outfile, -format => 'blast');
++
++ return $blast_obj;
++}
++
++=head2 _setparams
++
++ Title : _setparams
++ Usage : Internal function, not to be called directly
++ Function: Create parameter inputs for Blast program
++ Example :
++ Returns : parameter string to be passed to Blast
++ Args : Reference to calling object and name of BLAST executable
++
++=cut
++
++sub _setparams {
++ my ($self, $executable) = @_;
++ my ($attr, $value, @execparams);
++
++ @execparams = @WUBLAST_PARAMS;
++
++ # of the general params, wublast only takes outfile at
++ # this stage (we add in program, input and database manually elsewhere)
++ push(@execparams, 'o');
++
++ # workaround for problems with shell metacharacters [bug 2707]
++ # simply quoting does not always work!
++ # Fixed so Windows files are not quotemeta'd
++ my $tmp = $self->o;
++ $self->o(quotemeta($tmp)) if ($tmp && $^O !~ /^MSWin/);
++
++ my $param_string = $self->SUPER::_setparams(-params => [@execparams],
++ -switches => \@WUBLAST_SWITCH,
++ -dash => 1);
++
++ $self->o($tmp) if ($tmp && $^O !~ /^MSWin/);
++
++ if ($self->quiet()) {
++ $param_string .= ' 2> '.File::Spec->devnull;
++ }
++
++ return $param_string;
++}
++
++1;
+--- /dev/null
++++ b/lib/Bio/Tools/Run/WrapperBase.pm
+@@ -0,0 +1,511 @@
++#
++# BioPerl module for Bio::Tools::Run::WrapperBase
++#
++# Please direct questions and support issues to <bioperl-l at bioperl.org>
++#
++# Cared for by Jason Stajich <jason at bioperl.org>
++#
++# Copyright Jason Stajich
++#
++# You may distribute this module under the same terms as perl itself
++
++# POD documentation - main docs before the code
++
++=head1 NAME
++
++Bio::Tools::Run::WrapperBase - A Base object for wrappers around executables
++
++=head1 SYNOPSIS
++
++ # do not use this object directly, it provides the following methods
++ # for its subclasses
++
++ my $errstr = $obj->error_string();
++ my $exe = $obj->executable();
++ $obj->save_tempfiles($booleanflag)
++ my $outfile= $obj->outfile_name();
++ my $tempdir= $obj->tempdir(); # get a temporary dir for executing
++ my $io = $obj->io; # Bio::Root::IO object
++ my $cleanup= $obj->cleanup(); # remove tempfiles
++
++ $obj->run({-arg1 => $value});
++
++=head1 DESCRIPTION
++
++This is a basic module from which to build executable wrapper modules.
++It has some basic methods to help when implementing new modules.
++
++=head1 FEEDBACK
++
++=head2 Mailing Lists
++
++User feedback is an integral part of the evolution of this and other
++Bioperl modules. Send your comments and suggestions preferably to
++the Bioperl mailing list. Your participation is much appreciated.
++
++ bioperl-l at bioperl.org - General discussion
++ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
++
++=head2 Support
++
++Please direct usage questions or support issues to the mailing list:
++
++I<bioperl-l at bioperl.org>
++
++rather than to the module maintainer directly. Many experienced and
++reponsive experts will be able look at the problem and quickly
++address it. Please include a thorough description of the problem
++with code and data examples if at all possible.
++
++=head2 Reporting Bugs
++
++Report bugs to the Bioperl bug tracking system to help us keep track of
++the bugs and their resolution. Bug reports can be submitted via the
++web:
++
++ https://github.com/bioperl/bioperl-live/issues
++
++=head1 AUTHOR - Jason Stajich
++
++Email jason-at-bioperl.org
++
++=head1 CONTRIBUTORS
++
++Sendu Bala, bix at sendu.me.uk
++
++=head1 APPENDIX
++
++The rest of the documentation details each of the object methods.
++Internal methods are usually preceded with a _
++
++=cut
++
++
++# Let the code begin...
++
++
++package Bio::Tools::Run::WrapperBase;
++use strict;
++
++# Object preamble - inherits from Bio::Root::Root
++
++use base qw(Bio::Root::Root);
++
++use File::Spec;
++use File::Path qw(); # don't import anything
++
++=head2 run
++
++ Title : run
++ Usage : $wrapper->run({ARGS HERE});
++ Function: Support generic running with args passed in
++ as a hashref
++ Returns : Depends on the implementation, status OR data
++ Args : hashref of named arguments
++
++
++=cut
++
++sub run {
++ my ($self, at args) = @_;
++ $self->throw_not_implemented();
++}
++
++
++=head2 error_string
++
++ Title : error_string
++ Usage : $obj->error_string($newval)
++ Function: Where the output from the last analysis run is stored.
++ Returns : value of error_string
++ Args : newvalue (optional)
++
++
++=cut
++
++sub error_string{
++ my ($self,$value) = @_;
++ if( defined $value) {
++ $self->{'_error_string'} = $value;
++ }
++ return $self->{'_error_string'} || '';
++}
++
++=head2 arguments
++
++ Title : arguments
++ Usage : $obj->arguments($newval)
++ Function: Commandline parameters
++ Returns : value of arguments
++ Args : newvalue (optional)
++
++
++=cut
++
++sub arguments {
++ my ($self,$value) = @_;
++ if(defined $value) {
++ $self->{'_arguments'} = $value;
++ }
++ return $self->{'_arguments'} || '';
++}
++
++
++=head2 no_param_checks
++
++ Title : no_param_checks
++ Usage : $obj->no_param_checks($newval)
++ Function: Boolean flag as to whether or not we should
++ trust the sanity checks for parameter values
++ Returns : value of no_param_checks
++ Args : newvalue (optional)
++
++
++=cut
++
++sub no_param_checks{
++ my ($self,$value) = @_;
++ if( defined $value || ! defined $self->{'no_param_checks'} ) {
++ $value = 0 unless defined $value;
++ $self->{'no_param_checks'} = $value;
++ }
++ return $self->{'no_param_checks'};
++}
++
++=head2 save_tempfiles
++
++ Title : save_tempfiles
++ Usage : $obj->save_tempfiles($newval)
++ Function: Get/set the choice of if tempfiles in the temp dir (see tempdir())
++ are kept or cleaned up. Default is '0', ie. delete temp files.
++ NB: This must be set to the desired value PRIOR to first creating
++ a temp dir with tempdir(). Any attempt to set this after tempdir creation will get a warning.
++ Returns : boolean
++ Args : none to get, boolean to set
++
++=cut
++
++sub save_tempfiles{
++ my $self = shift;
++ my @args = @_;
++ if (($args[0]) && (exists ($self->{'_tmpdir'}))) {
++ $self->warn ("Tempdir already created; setting save_tempfiles will not affect cleanup behavior.");
++ }
++ return $self->io->save_tempfiles(@_);
++}
++
++=head2 outfile_name
++
++ Title : outfile_name
++ Usage : my $outfile = $wrapper->outfile_name();
++ Function: Get/Set the name of the output file for this run
++ (if you wanted to do something special)
++ Returns : string
++ Args : [optional] string to set value to
++
++
++=cut
++
++sub outfile_name{
++ my ($self,$nm) = @_;
++ if( defined $nm || ! defined $self->{'_outfilename'} ) {
++ $nm = 'mlc' unless defined $nm;
++ $self->{'_outfilename'} = $nm;
++ }
++ return $self->{'_outfilename'};
++}
++
++
++=head2 tempdir
++
++ Title : tempdir
++ Usage : my $tmpdir = $self->tempdir();
++ Function: Retrieve a temporary directory name (which is created)
++ Returns : string which is the name of the temporary directory
++ Args : none
++
++
++=cut
++
++sub tempdir{
++ my ($self) = shift;
++
++ $self->{'_tmpdir'} = shift if @_;
++ unless( $self->{'_tmpdir'} ) {
++ $self->{'_tmpdir'} = $self->io->tempdir(CLEANUP => ! $self->save_tempfiles );
++ }
++ unless( -d $self->{'_tmpdir'} ) {
++ mkdir($self->{'_tmpdir'},0777);
++ }
++ return $self->{'_tmpdir'};
++}
++
++=head2 cleanup
++
++ Title : cleanup
++ Usage : $wrapper->cleanup();
++ Function: Will cleanup the tempdir directory
++ Returns : none
++ Args : none
++
++
++=cut
++
++sub cleanup{
++ my ($self) = @_;
++ $self->io->_io_cleanup();
++ if( defined $self->{'_tmpdir'} && -d $self->{'_tmpdir'} ) {
++ my $verbose = ($self->verbose >= 1) ? 1 : 0;
++ File::Path::rmtree( $self->{'_tmpdir'}, $verbose);
++ }
++}
++
++=head2 io
++
++ Title : io
++ Usage : $obj->io($newval)
++ Function: Gets a Bio::Root::IO object
++ Returns : Bio::Root::IO object
++ Args : none
++
++
++=cut
++
++sub io{
++ my ($self) = @_;
++ unless( defined $self->{'io'} ) {
++ $self->{'io'} = Bio::Root::IO->new(-verbose => $self->verbose);
++ }
++ return $self->{'io'};
++}
++
++=head2 version
++
++ Title : version
++ Usage : $version = $wrapper->version()
++ Function: Returns the program version (if available)
++ Returns : string representing version of the program
++ Args : [Optional] value to (re)set version string
++
++
++=cut
++
++sub version{
++ my ($self, at args) = @_;
++ return;
++}
++
++=head2 executable
++
++ Title : executable
++ Usage : my $exe = $factory->executable();
++ Function: Finds the full path to the executable
++ Returns : string representing the full path to the exe
++ Args : [optional] name of executable to set path to
++ [optional] boolean flag whether or not warn when exe is not found
++
++=cut
++
++sub executable {
++ my ($self, $exe, $warn) = @_;
++
++ if (defined $exe) {
++ $self->{'_pathtoexe'} = $exe;
++ }
++
++ unless( defined $self->{'_pathtoexe'} ) {
++ my $prog_path = $self->program_path;
++
++ if ($prog_path) {
++ if (-f $prog_path && -x $prog_path) {
++ $self->{'_pathtoexe'} = $prog_path;
++ }
++ elsif ($self->program_dir) {
++ $self->warn("executable not found in $prog_path, trying system path...") if $warn;
++ }
++ }
++ unless ($self->{'_pathtoexe'}) {
++ my $exe;
++ if ( $exe = $self->io->exists_exe($self->program_name) ) {
++ $self->{'_pathtoexe'} = $exe;
++ }
++ else {
++ $self->warn("Cannot find executable for ".$self->program_name) if $warn;
++ $self->{'_pathtoexe'} = undef;
++ }
++ }
++ }
++
++ # bail if we never found the executable
++ unless ( defined $self->{'_pathtoexe'}) {
++ $self->throw("Cannot find executable for ".$self->program_name .
++ ". path=\"".$self->program_path."\"");
++ }
++ return $self->{'_pathtoexe'};
++}
++
++=head2 program_path
++
++ Title : program_path
++ Usage : my $path = $factory->program_path();
++ Function: Builds path for executable
++ Returns : string representing the full path to the exe
++ Args : none
++
++=cut
++
++sub program_path {
++ my ($self) = @_;
++ my @path;
++ push @path, $self->program_dir if $self->program_dir;
++ push @path, $self->program_name.($^O =~ /mswin/i ? '.exe' : '') if $self->program_name;
++ return File::Spec->catfile(@path);
++}
++
++=head2 program_dir
++
++ Title : program_dir
++ Usage : my $dir = $factory->program_dir();
++ Function: Abstract get method for dir of program. To be implemented
++ by wrapper.
++ Returns : string representing program directory
++ Args : none
++
++=cut
++
++sub program_dir {
++ my ($self) = @_;
++ $self->throw_not_implemented();
++}
++
++=head2 program_name
++
++ Title : program_name
++ Usage : my $name = $factory->program_name();
++ Function: Abstract get method for name of program. To be implemented
++ by wrapper.
++ Returns : string representing program name
++ Args : none
++
++=cut
++
++sub program_name {
++ my ($self) = @_;
++ $self->throw_not_implemented();
++}
++
++=head2 quiet
++
++ Title : quiet
++ Usage : $factory->quiet(1);
++ if ($factory->quiet()) { ... }
++ Function: Get/set the quiet state. Can be used by wrappers to control if
++ program output is printed to the console or not.
++ Returns : boolean
++ Args : none to get, boolean to set
++
++=cut
++
++sub quiet {
++ my $self = shift;
++ if (@_) { $self->{quiet} = shift }
++ return $self->{quiet} || 0;
++}
++
++=head2 _setparams()
++
++ Title : _setparams
++ Usage : $params = $self->_setparams(-params => [qw(window evalue_cutoff)])
++ Function: For internal use by wrapper modules to build parameter strings
++ suitable for sending to the program being wrapped. For each method
++ name supplied, calls the method and adds the method name (as modified
++ by optional things) along with its value (unless a switch) to the
++ parameter string
++ Example : $params = $self->_setparams(-params => [qw(window evalue_cutoff)],
++ -switches => [qw(simple large all)],
++ -double_dash => 1,
++ -underscore_to_dash => 1);
++ If window() and simple() had not been previously called, but
++ evalue_cutoff(0.5), large(1) and all(0) had been called, $params
++ would be ' --evalue-cutoff 0.5 --large'
++ Returns : parameter string
++ Args : -params => [] or {} # array ref of method names to call,
++ or hash ref where keys are method names and
++ values are how those names should be output
++ in the params string
++ -switches => [] or {}# as for -params, but no value is printed for
++ these methods
++ -join => string # define how parameters and their values are
++ joined, default ' '. (eg. could be '=' for
++ param=value)
++ -lc => boolean # lc() method names prior to output in string
++ -dash => boolean # prefix all method names with a single dash
++ -double_dash => bool # prefix all method names with a double dash
++ -mixed_dash => bool # prefix single-character method names with a
++ # single dash, and multi-character method names
++ # with a double-dash
++ -underscore_to_dash => boolean # convert all underscores in method
++ names to dashes
++
++=cut
++
++sub _setparams {
++ my ($self, @args) = @_;
++
++ my ($params, $switches, $join, $lc, $d, $dd, $md, $utd) =
++ $self->_rearrange([qw(PARAMS
++ SWITCHES
++ JOIN
++ LC
++ DASH
++ DOUBLE_DASH
++ MIXED_DASH
++ UNDERSCORE_TO_DASH)], @args);
++ $self->throw('at least one of -params or -switches is required') unless ($params || $switches);
++ $self->throw("-dash, -double_dash and -mixed_dash are mutually exclusive") if (defined($d) + defined($dd) + defined($md) > 1);
++ $join ||= ' ';
++
++ my %params = ref($params) eq 'HASH' ? %{$params} : map { $_ => $_ } @{$params};
++ my %switches = ref($switches) eq 'HASH' ? %{$switches} : map { $_ => $_ } @{$switches};
++
++ my $param_string = '';
++ for my $hash_ref (\%params, \%switches) {
++ while (my ($method, $method_out) = each %{$hash_ref}) {
++ my $value = $self->$method();
++ next unless (defined $value);
++ next if (exists $switches{$method} && ! $value);
++
++ $method_out = lc($method_out) if $lc;
++ my $method_length = length($method_out) if $md;
++ $method_out = '-'.$method_out if ($d || ($md && ($method_length == 1)));
++ $method_out = '--'.$method_out if ($dd || ($md && ($method_length > 1)));
++ $method_out =~ s/_/-/g if $utd;
++
++ if ( exists $params{$method} ) {
++ # if value are quoted with " or ', re-quote it
++ if ( $value =~ m{^[\'\"]+(.+)[\'\"]+$} ) {
++ $value = '"'. $1 . '"';
++ }
++ # quote values that contain spaces
++ elsif ( $value =~ m{\s+} ) {
++ $value = '"'. $value . '"';
++ }
++ }
++
++ $param_string .= ' '.$method_out.(exists $switches{$method} ? '' : $join.$value);
++ }
++ }
++
++ return $param_string;
++}
++
++sub DESTROY {
++ my $self= shift;
++ unless ( $self->save_tempfiles ) {
++ $self->cleanup();
++ }
++ $self->SUPER::DESTROY();
++}
++
++
++1;
+--- /dev/null
++++ b/lib/Bio/Tools/Run/WrapperBase/CommandExts.pm
+@@ -0,0 +1,1405 @@
++#
++# BioPerl module for Bio::Tools::Run::WrapperBase::CommandExts
++#
++# Please direct questions and support issues to <bioperl-l at bioperl.org>
++#
++# Cared for by Mark A. Jensen <maj -at- fortinbras -dot- us>
++#
++# Copyright Mark A. Jensen
++#
++# You may distribute this module under the same terms as perl itself
++
++# POD documentation - main docs before the code
++
++=head1 NAME
++
++Bio::Tools::Run::WrapperBase::CommandExts - Extensions to WrapperBase for handling programs with commands *ALPHA*
++
++=head1 SYNOPSIS
++
++Devs, see L</DEVELOPER INTERFACE>.
++Users, see L</USER INTERFACE>.
++
++=head1 DESCRIPTION
++
++This is a developer-focused experimental module. The main idea is to
++extend L<Bio::Tools::Run::WrapperBase> to make it relatively easy to
++create run wrappers around I<suites> of related programs, like
++C<samtools> or C<blast+>.
++
++Some definitions:
++
++=over
++
++=item * program
++
++The program is the command-line frontend application. C<samtools>, for example, is run from the command line as follows:
++
++ $ samtools view -bS in.bam > out.sam
++ $ samtools faidx
++
++=item * command
++
++The command is the specific component of a suite run by executing the
++program. In the example above, C<view> and C<faidx> are commands.
++
++=item * command prefix
++
++The command prefix is an abbreviation of the command name used
++internally by C<CommandExts> method, and sometimes by the user of the
++factory for specifying command line parameters to subcommands of
++composite commands.
++
++=item * composite command
++
++A composite command is a pipeline or script representing a series of
++separate executions of different commands. Composite commands can be
++specified by configuring C<CommandExts> appropriately; the composite
++command can be run by the user from a factory in the same way as
++ordinary commands.
++
++=item * options, parameters, switches and filespecs
++
++An option is any command-line option; i.e., a specification set off by
++a command-line by a specifier (like C<-v> or C<--outfile>). Parameters
++are command-line options that accept a value (C<-title mydb>);
++switches are boolean flags (C<--no-filter>). Filespecs are barewords
++at the end of the command line that usually indicate input or output
++files. In this module, this includes files that capture STDIN, STDOUT,
++or STDERR via redirection.
++
++=item * pseudo-program
++
++A "pseudo-program" is a way to refer to a collection of related
++applications that are run independently from the command line, rather
++than via a frontend program. The C<blast+> suite of programs is an
++example: C<blastn>, C<makeblastdb>, etc. C<CommandExts> can be
++configured to create a single factory for a suite of related,
++independent programs that treats each independent program as a
++"pseudo-program" command.
++
++=back
++
++This module essentially adds the non-assembler-specific wrapper
++machinery of fangly's L<Bio::Tools::Run::AssemblerBase> to the
++L<Bio::Tools::Run::WrapperBase> namespace, adding the general
++command-handling capability of L<Bio::Tools::Run::BWA>. It creates run
++factories that are automatically Bio::ParameterBaseI compliant,
++meaning that C<available_parameters()>, C<set_parameters()>,
++C<get_parameters>, C<reset_parameters()>, and C<parameters_changed()>
++are available.
++
++=head1 DEVELOPER INTERFACE
++
++C<CommandExts> is currently set up to read particular package globals
++which define the program, the commands available, command-line options
++for those commands, and human-readable aliases for those options.
++
++The easiest way to use C<CommandExts> is probably to create two modules:
++
++ Bio::Tools::Run::YourRunPkg
++ Bio::Tools::Run::YourRunPkg::Config
++
++The package globals should be defined in the C<Config> module, and the
++run package itself should begin with the following mantra:
++
++ use YourRunPkg::Config;
++ use Bio::Tools::Run::WrapperBase;
++ use Bio::Tools::Run::WrapperBase::CommandExts;
++ sub new {
++ my $class = shift;
++ my @args = @_;
++ my $self = $class->SUPER::new(@args);
++ ...
++ return $self;
++ }
++
++The following globals can/should be defined in the C<Config> module:
++
++ $program_name
++ $program_dir
++ $use_dash
++ $join
++ @program_commands
++ %command_prefixes
++ @program_params
++ @program_switches
++ %param_translation
++ %composite_commands
++ %command_files
++
++See L</Config Globals> for detailed descriptions.
++
++The work of creating a run wrapper with C<CommandExts> lies mainly in
++setting up the globals. The key methods for the developer interface are:
++
++=over
++
++=item * program_dir($path_to_programs)
++
++Set this to point the factory to the executables.
++
++=item * _run(@file_args)
++
++Runs an instantiated factory with the given file args. Use in the
++ C<run()> method override.
++
++=item * _create_factory_set()
++
++Returns a hash of instantiated factories for each true command from a
++composite command factory. The hash keys are the true command names, so
++you could do
++
++ $cmds = $composite_fac->_create_factory_set;
++ for (@true_commands) {
++ $cmds->{$_}->_run(@file_args);
++ }
++
++=item * executables($cmd,[$fullpath])
++
++For pseudo-programs, this gets/sets the full path to the executable of
++the true program corresponding to the command C<$cmd>.
++
++=back
++
++=head2 Implementing Composite Commands
++
++=head2 Implementing Pseudo-programs
++
++To indicate that a package wraps disparate programs under a single pseudo program, use an asterisk before the program name:
++
++ package Bio::Tools::Run::YourPkg::Config;
++ ...
++ our $program_name = '*blast+';
++
++and C<_run> will know what to do. Specify the rest of the globals as
++if the desired programs were commands. Use the basename of the
++programs for the command names.
++
++If all the programs can be found in a single directory, just specify
++that directory in C<program_dir()>. If not, use C<executables()> to set the paths to each program explicitly:
++
++ foreach (keys %cmdpaths) {
++ $self->executables($_, $cmdpaths{$_});
++ }
++
++=head2 Config Globals
++
++Here is an example config file. Further details in prose are below.
++
++ package Dummy::Config;
++ use strict;
++ use warnings;
++ no warnings qw(qw);
++ use Exporter;
++ our (@ISA, @EXPORT, @EXPORT_OK);
++ push @ISA, 'Exporter';
++ @EXPORT = qw(
++ $program_name
++ $program_dir
++ $use_dash
++ $join
++ @program_commands
++ %command_prefixes
++ @program_params
++ @program_switches
++ %param_translation
++ %command_files
++ %composite_commands
++ );
++
++ our $program_name = '*flurb';
++ our $program_dir = 'C:\cygwin\usr\local\bin';
++ our $use_dash = 'mixed';
++ our $join = ' ';
++
++ our @program_commands = qw(
++ rpsblast
++ find
++ goob
++ blorb
++ multiglob
++ );
++
++ our %command_prefixes = (
++ blastp => 'blp',
++ tblastn => 'tbn',
++ goob => 'g',
++ blorb => 'b',
++ multiglob => 'm'
++ );
++
++ our @program_params = qw(
++ command
++ g|narf
++ g|schlurb
++ b|scroob
++ b|frelb
++ m|trud
++ );
++
++ our @program_switches = qw(
++ g|freen
++ b|klep
++ );
++
++ our %param_translation = (
++ 'g|narf' => 'n',
++ 'g|schlurb' => 'schlurb',
++ 'g|freen' => 'f',
++ 'b|scroob' => 's',
++ 'b|frelb' => 'frelb'
++ );
++
++ our %command_files = (
++ 'goob' => [qw( fas faq )],
++ );
++
++ our %composite_commands = (
++ 'multiglob' => [qw( blorb goob )]
++ );
++ 1;
++
++C<$use_dash> can be one of C<single>, C<double>, or C<mixed>. See L<Bio::Tools::Run::WrapperBase>.
++
++There is a syntax for the C<%command_files> specification. The token
++matching C<[a-zA-Z0-9_]+> in each element of each arrayref becomes the
++named filespec parameter for the C<_run()> method in the wrapper
++class. Additional symbols surrounding this token indicate how this
++argument should be handled. Some examples:
++
++ >out : stdout is redirected into the file
++ specified by (..., -out => $file,... )
++ <in : stdin is accepted from the file
++ specified by (..., -in => $file,... )
++ 2>log : stderr is redirected into the file
++ specified by (..., -log => $file,... )
++ #opt : this filespec argument is optional
++ (no throw if -opt => $option is missing)
++ 2>#log: if -log is not specified in the arguments, the stderr()
++ method will capture stderr
++ *lst : this filespec can take multiple arguments,
++ specify using an arrayref (..., -lst => [$file1, $file2], ...)
++ *#lst : an optional list
++
++The tokens above are examples; they can be anything matching the above regexp.
++
++=head1 USER INTERFACE
++
++Using a wrapper created with C<Bio::Tools::Run::WrapperBase::CommandExts>:
++
++=over
++
++=item * Getting a list of available commands, parameters, and filespecs:
++
++To get a list of commands, simply:
++
++ @commands = Bio::Tools::Run::ThePkg->available_commands;
++
++The wrapper will generally have human-readable aliases for each of the
++command-line options for the wrapped program and commands. To obtain a
++list of the parameters and switches available for a particular
++command, do
++
++ $factory = Bio::Tools::Run::ThePkg->new( -command => 'glurb' );
++ @params = $factory->available_parameters('params');
++ @switches = $factory->available_parameters('switches');
++ @filespec = $factory->available_parameters('filespec');
++ @filespec = $factory->filespec; # alias
++
++=item * Create factories
++
++The factory is a handle on the program and command you wish to
++run. Create a factory using C<new> to set command-line parameters:
++
++ $factory = Bio::Tools::Run::ThePkg->new( -command => 'glurb',
++ -freen => 1,
++ -furschlugginer => 'vreeble' );
++
++A shorthand for this is:
++
++ $factory = Bio::Tools::Run::ThePkg->new_glurb(
++ -freen => 1,
++ -furschlugginer => 'vreeble' );
++
++=item * Running programs
++
++To run the program, use the C<run> method, providing filespecs as arguments
++
++ $factory = Bio::Tools::Run::ThePkg->new_assemble( -min_qual => 63 );
++ $factory->run( -faq1 => 'read1.fq', -faq2 => 'read2.fq',
++ -ref => 'refseq.fas', -out => 'new.sam' );
++ # do another
++ $factory->run( -faq1 => 'read-old1.fq', -faq2 => 'read-old2.fq',
++ -ref => 'refseq.fas', -out => 'old.sam' );
++
++Messages on STDOUT and STDERR are dumped into their respective attributes:
++
++ $stdout = $factory->stdout;
++ $stderr = $factory->stderr;
++
++unless STDOUT and/or STDERR are part of the named files in the filespec.
++
++=item * Setting/getting/resetting/polling parameters.
++
++A C<CommandExts>-based factory is always L<Bio::ParameterBaseI>
++compliant. That means that you may set, get, and reset parameters
++using C<set_parameters()>, C<get_parameters()>, and
++C<reset_parameters>. You can ask whether parameters have changed since
++they were last accessed by using the predicate
++C<parameters_changed>. See L<Bio::ParameterBaseI> for more details.
++
++Once set, parameters become attributes of the factory. Thus, you can get their values as follows:
++
++ if ($factory->freen) {
++ $furs = $factory->furshlugginer;
++ #...
++ }
++
++=back
++
++=head1 FEEDBACK
++
++=head2 Mailing Lists
++
++User feedback is an integral part of the evolution of this and other
++Bioperl modules. Send your comments and suggestions preferably to
++the Bioperl mailing list. Your participation is much appreciated.
++
++ bioperl-l at bioperl.org - General discussion
++http://bioperl.org/wiki/Mailing_lists - About the mailing lists
++
++=head2 Support
++
++Please direct usage questions or support issues to the mailing list:
++
++L<bioperl-l at bioperl.org>
++
++rather than to the module maintainer directly. Many experienced and
++reponsive experts will be able look at the problem and quickly
++address it. Please include a thorough description of the problem
++with code and data examples if at all possible.
++
++=head2 Reporting Bugs
++
++Report bugs to the Bioperl bug tracking system to help us keep track
++of the bugs and their resolution. Bug reports can be submitted via
++the web:
++
++ https://github.com/bioperl/bioperl-live/issues
++
++=head1 AUTHOR - Mark A. Jensen
++
++Email maj -at- fortinbras -dot- us
++
++Describe contact details here
++
++=head1 CONTRIBUTORS
++
++Dan Kortschak ( dan -dot- kortschak -at- adelaide -dot- edu -dot- au )
++
++=head1 APPENDIX
++
++The rest of the documentation details each of the object methods.
++Internal methods are usually preceded with a _
++
++=cut
++
++# Let the code begin...
++
++package Bio::Tools::Run::WrapperBase; # need these methods in WrapperBase/maj
++use strict;
++use warnings;
++no warnings qw(redefine);
++
++use Bio::Root::Root;
++use File::Spec;
++use IPC::Run;
++use base qw(Bio::Root::Root Bio::ParameterBaseI);
++
++our $AUTOLOAD;
++
++=head2 new()
++
++ Title : new
++ Usage :
++ Function: constructor for WrapperBase::CommandExts ;
++ correctly binds configuration variables
++ to the WrapperBase object
++ Returns : Bio::Tools::Run::WrapperBase object with command extensions
++ Args :
++ Note : this method subsumes the old _register_program_commands and
++ _set_program_options, leaving out the assembler-specific
++ parms ($qual_param and out_type())
++
++=cut
++
++sub new {
++ my ($class, @args) = @_;
++ my $self = bless ({}, $class);
++ # pull in *copies* of the Config variables from the caller namespace:
++ my ($pkg, @goob) = caller();
++ my ($commands,
++ $prefixes,
++ $params,
++ $switches,
++ $translation,
++ $use_dash,
++ $join,
++ $name,
++ $dir,
++ $composite_commands,
++ $files);
++ for (qw( @program_commands
++ %command_prefixes
++ @program_params
++ @program_switches
++ %param_translation
++ $use_dash
++ $join
++ $program_name
++ $program_dir
++ %composite_commands
++ %command_files ) ) {
++ my ($sigil, $var) = m/(.)(.*)/;
++ my $qualvar = "${sigil}${pkg}::${var}";
++ for ($sigil) {
++ /\@/ && do { $qualvar = "\[$qualvar\]" };
++ /\%/ && do { $qualvar = "\{$qualvar\}" };
++ }
++ my $locvar = "\$${var}";
++ $locvar =~ s/program_|command_|param_//g;
++ eval "$locvar = $qualvar";
++ }
++ # set up the info registry hash
++ my %registry;
++ if ($composite_commands) {
++ $self->_register_composite_commands($composite_commands,
++ $params,
++ $switches,
++ $prefixes);
++ }
++ @registry{qw( _commands _prefixes _files
++ _params _switches _translation
++ _composite_commands )} =
++ ($commands, $prefixes, $files,
++ $params, $switches, $translation,
++ $composite_commands);
++ $self->{_options} = \%registry;
++ if (not defined $use_dash) {
++ $self->{'_options'}->{'_dash'} = 1;
++ } else {
++ $self->{'_options'}->{'_dash'} = $use_dash;
++ }
++ if (not defined $join) {
++ $self->{'_options'}->{'_join'} = ' ';
++ } else {
++ $self->{'_options'}->{'_join'} = $join;
++ }
++ if ($name =~ /^\*/) {
++ $self->is_pseudo(1);
++ $name =~ s/^\*//;
++ }
++ $self->program_name($name) if not defined $self->program_name();
++ $self->program_dir($dir) if not defined $self->program_dir();
++ $self->set_parameters(@args);
++ $self->parameters_changed(1); # set on instantiation, per Bio::ParameterBaseI
++ return $self;
++}
++
++=head2 program_name
++
++ Title : program_name
++ Usage : $factory->program_name($name)
++ Function: get/set the executable name
++ Returns: string
++ Args : string
++
++=cut
++
++sub program_name {
++ my ($self, $val) = @_;
++ $self->{'_program_name'} = $val if $val;
++ return $self->{'_program_name'};
++}
++
++=head2 program_dir
++
++ Title : program_dir
++ Usage : $factory->program_dir($dir)
++ Function: get/set the program dir
++ Returns: string
++ Args : string
++
++=cut
++
++sub program_dir {
++ my ($self, $val) = @_;
++ $self->{'_program_dir'} = $val if $val;
++ return $self->{'_program_dir'};
++}
++
++=head2 _register_program_commands()
++
++ Title : _register_program_commands
++ Usage : $factory->_register_program_commands( \@commands, \%prefixes )
++ Function: Register the commands a program accepts (for programs that act
++ as frontends for a set of commands, each command having its own
++ set of params/switches)
++ Returns : true on success
++ Args : arrayref to a list of commands (scalar strings),
++ hashref to a translation table of the form
++ { $prefix1 => $command1, ... } [optional]
++ Note : To implement a program with this kind of calling structure,
++ include a parameter called 'command' in the
++ @program_params global
++ Note : The translation table is used to associate parameters and
++ switches specified in _set_program_options with the correct
++ program command. In the globals @program_params and
++ @program_switches, specify elements as 'prefix1|param' and
++ 'prefix1|switch', etc.
++
++=cut
++
++=head2 _set_program_options
++
++ Title : _set_program_options
++ Usage : $factory->_set_program_options( \@ args );
++ Function: Register the parameters and flags that an assembler takes.
++ Returns : 1 for success
++ Args : - arguments passed by the user
++ - parameters that the program accepts, optional (default: none)
++ - switches that the program accepts, optional (default: none)
++ - parameter translation, optional (default: no translation occurs)
++ - dash option for the program parameters, [1|single|double|mixed],
++ optional (default: yes, use single dashes only)
++ - join, optional (default: ' ')
++
++=cut
++
++=head2 _translate_params
++
++ Title : _translate_params
++ Usage : @options = @{$assembler->_translate_params( )};
++ Function: Translate the Bioperl arguments into the arguments to pass to the
++ program on the command line
++ Returns : Arrayref of arguments
++ Args : none
++
++=cut
++
++sub _translate_params {
++ my ($self) = @_;
++ # Get option string
++ my ($params, $switches, $join, $dash, $translat) =
++ @{$self->{_options}}{qw(_params _switches _join _dash _translation)};
++
++ # access the multiple dash choices of _setparams...
++ my @dash_args;
++ $dash ||= 1; # default as advertised
++ for ($dash) {
++ $_ eq '1' && do {
++ @dash_args = ( -dash => 1 );
++ last;
++ };
++ /^s/ && do { #single dash only
++ @dash_args = ( -dash => 1);
++ last;
++ };
++ /^d/ && do { # double dash only
++ @dash_args = ( -double_dash => 1);
++ last;
++ };
++ /^m/ && do { # mixed dash: one-letter opts get -,
++ # long opts get --
++ @dash_args = ( -mixed_dash => 1);
++ last;
++ };
++ do {
++ $self->warn( "Dash spec '$dash' not recognized; using 'single'" );
++ @dash_args = ( -dash => 1 );
++ };
++ }
++ my $options = $self->_setparams(
++ -params => $params,
++ -switches => $switches,
++ -join => $join,
++ @dash_args
++ );
++
++ # Translate options
++ # parse more carefully - bioperl-run issue #12
++ $options =~ s/^\s+//;
++ $options =~ s/\s+$//;
++ my @options;
++ my $in_quotes;
++ for (split(/(\s|$join)/, $options)) {
++ if (/^-/) {
++ push @options, $_;
++ }
++ elsif (s/^"//) {
++ $in_quotes=1 unless (s/["']$//);
++ push @options, $_;
++ }
++ elsif (s/"$//) {
++ $options[-1] .= $_;
++ $in_quotes=0;
++ }
++ else {
++ $in_quotes ? $options[-1] .= $_ :
++ push(@options, $_);
++ }
++ }
++ $self->throw("Unmatched quote in option value") if $in_quotes;
++ for (my $i = 0; $i < scalar @options; $i++) {
++ my ($prefix, $name) = ( $options[$i] =~ m/^(-{0,2})(.+)$/ );
++ if (defined $name) {
++ if ($name =~ /command/i) {
++ $name = $options[$i+2]; # get the command
++ splice @options, $i, 4;
++ $i--;
++ # don't add the command if this is a pseudo-program
++ unshift @options, $name unless ($self->is_pseudo); # put command first
++ }
++ elsif (defined $$translat{$name}) {
++ $options[$i] = $prefix.$$translat{$name};
++ }
++ }
++ else {
++ splice @options, $i, 1;
++ $i--;
++ }
++ }
++
++ @options = grep (!/^\s*$/, at options);
++ # this is a kludge for mixed options: the reason mixed doesn't
++ # work right on the pass through _setparams is that the
++ # *aliases* and not the actual params are passed to it.
++ # here we just rejigger the dashes
++ if ($dash =~ /^m/) {
++ s/--([a-z0-9](?:\s|$))/-$1/gi for @options;
++ }
++ # Now arrayify the options
++
++ return \@options;
++}
++
++=head2 executable()
++
++ Title : executable
++ Usage :
++ Function: find the full path to the main executable,
++ or to the command executable for pseudo-programs
++ Returns : full path, if found
++ Args : [optional] explicit path to the executable
++ (will set the appropriate command exec if
++ applicable)
++ [optional] boolean flag whether or not to warn when exe no found
++ Note : overrides WrapperBase.pm
++
++=cut
++
++sub executable {
++ my $self = shift;
++ my ($exe, $warn) = @_;
++ if ($self->is_pseudo) {
++ return $self->{_pathtoexe} = $self->executables($self->command,$exe);
++ }
++
++ # otherwise
++ # setter
++ if (defined $exe) {
++ $self->throw("binary '$exe' does not exist") unless -e $exe;
++ $self->throw("'$exe' is not executable") unless -x $exe;
++ return $self->{_pathtoexe} = $exe;
++ }
++
++ # getter
++ return $self->{_pathtoexe} if defined $self->{_pathstoexe};
++
++ # finder
++ return $self->{_pathtoexe} = $self->_find_executable($exe, $warn);
++}
++
++=head2 executables()
++
++ Title : executables
++ Usage :
++ Function: find the full path to a command's executable
++ Returns : full path (scalar string)
++ Args : command (scalar string),
++ [optional] explicit path to this command exe
++ [optional] boolean flag whether or not to warn when exe no found
++
++=cut
++
++sub executables {
++ my $self = shift;
++ my ($cmd, $exe, $warn) = @_;
++ # for now, barf if this is not a pseudo program
++ $self->throw("This wrapper represents a single program with commands, not multiple programs; can't use executables()") unless $self->is_pseudo;
++ $self->throw("Command name required at arg 1") unless defined $cmd;
++ $self->throw("The desired executable '$cmd' is not registered as a command") unless grep /^$cmd$/, @{$self->{_options}->{_commands}};
++
++ # setter
++ if (defined $exe) {
++ $self->throw("binary '$exe' does not exist") unless -e $exe;
++ $self->throw("'$exe' is not executable") unless -x $exe;
++ $self->{_pathstoexe} = {} unless defined $self->{_pathstoexe};
++ return $self->{_pathstoexe}->{$cmd} = $exe;
++ }
++
++ # getter
++ return $self->{_pathstoexe}->{$cmd} if defined $self->{_pathstoexe}->{$cmd};
++
++ $exe ||= $cmd;
++ # finder
++ return $self->{_pathstoexe}->{$cmd} = $self->_find_executable($exe, $warn);
++}
++
++=head2 _find_executable()
++
++ Title : _find_executable
++ Usage : my $exe_path = $fac->_find_executable($exe, $warn);
++ Function: find the full path to a named executable,
++ Returns : full path, if found
++ Args : name of executable to find
++ [optional] boolean flag whether or not to warn when exe no found
++ Note : differs from executable and executables in not
++ setting any object attributes
++
++=cut
++
++sub _find_executable {
++ my $self = shift;
++ my ($exe, $warn) = @_;
++
++ if ($self->is_pseudo && !$exe) {
++ if (!$self->command) {
++ # this throw probably appropriate
++ # the rest are now warns if $warn.../maj
++ $self->throw(
++ "The ".__PACKAGE__." wrapper represents several different programs;".
++ "arg1 to _find_executable must be specified explicitly,".
++ "or the command() attribute set");
++ }
++ else {
++ $exe = $self->command;
++ }
++ }
++ $exe ||= $self->program_path;
++
++ my $path;
++ if ($self->program_dir) {
++ $path = File::Spec->catfile($self->program_dir, $exe);
++ } else {
++ $path = $exe;
++ $self->warn('Program directory not specified; use program_dir($path).') if $warn;
++ }
++
++ # use provided info - we are allowed to follow symlinks, but refuse directories
++ map { return $path.$_ if ( -x $path.$_ && !(-d $path.$_) ) } ('', '.exe') if defined $path;
++
++ # couldn't get path to executable from provided info, so use system path
++ $path = $path ? " in $path" : undef;
++ $self->warn("Executable $exe not found$path, trying system path...") if $warn;
++ if ($path = $self->io->exists_exe($exe)) {
++ return $path;
++ } else {
++ $self->warn("Cannot find executable for program '".($self->is_pseudo ? $self->command : $self->program_name)."'") if $warn;
++ return;
++ }
++}
++
++=head2 _register_composite_commands()
++
++ Title : _register_composite_commands
++ Usage :
++ Function: adds subcomand params and switches for composite commands
++ Returns : true on success
++ Args : \%composite_commands,
++ \@program_params,
++ \@program_switches
++
++=cut
++
++sub _register_composite_commands {
++ my $self = shift;
++ my ($composite_commands, $program_params,
++ $program_switches, $command_prefixes) = @_;
++ my @sub_params;
++ my @sub_switches;
++ foreach my $cmd (keys %$composite_commands) {
++ my $pfx = $command_prefixes->{$cmd} || $cmd;
++ foreach my $subcmd ( @{$$composite_commands{$cmd}} ) {
++ my $spfx = $command_prefixes->{$subcmd} || $subcmd;
++ my @sub_program_params = grep /^$spfx\|/, @$program_params;
++ my @sub_program_switches = grep /^$spfx\|/, @$program_switches;
++ for (@sub_program_params) {
++ m/^$spfx\|(.*)/;
++ push @sub_params, "$pfx\|${spfx}_".$1;
++ }
++ for (@sub_program_switches) {
++ m/^$spfx\|(.*)/;
++ push @sub_switches, "$pfx\|${spfx}_".$1;
++ }
++ }
++ }
++ push @$program_params, @sub_params;
++ push @$program_switches, @sub_switches;
++ # translations for subcmd params/switches not necessary
++ return 1;
++}
++
++=head2 _create_factory_set()
++
++ Title : _create_factory_set
++ Usage : @facs = $self->_create_factory_set
++ Function: instantiate a set of individual command factories for
++ a given composite command
++ Factories will have the correct parameter fields set for
++ their own subcommand
++ Returns : hash of factories: ( $subcmd_prefix => $subcmd_factory, ... )
++ Args : none
++
++=cut
++
++sub _create_factory_set {
++ my $self = shift;
++ $self->throw('command not set') unless $self->command;
++ my $cmd = $self->command;
++ $self->throw('_create_factory_set only works on composite commands')
++ unless grep /^$cmd$/, keys %{$self->{_options}->{_composite_commands}};
++ my %ret;
++ my $class = ref $self;
++ my $subargs_hash = $self->_collate_subcmd_args($cmd);
++ for (keys %$subargs_hash) {
++ $ret{$_} = $class->new( -command => $_, @{$$subargs_hash{$_}} );
++ }
++ return %ret;
++}
++
++=head2 _collate_subcmd_args()
++
++ Title : _collate_subcmd_args
++ Usage : $args_hash = $self->_collate_subcmd_args
++ Function: collate parameters and switches into command-specific
++ arg lists for passing to new()
++ Returns : hash of named argument lists
++ Args : [optional] composite cmd prefix (scalar string)
++ [default is 'run']
++
++=cut
++
++sub _collate_subcmd_args {
++ my $self = shift;
++ my $cmd = shift;
++ my %ret;
++ # default command is 'run'
++ $cmd ||= 'run';
++ return unless $self->{'_options'}->{'_composite_commands'};
++ return unless $self->{'_options'}->{'_composite_commands'}->{$cmd};
++ my @subcmds = @{$self->{'_options'}->{'_composite_commands'}->{$cmd}};
++
++ my $cur_options = $self->{'_options'};
++ # collate
++ foreach my $subcmd (@subcmds) {
++ # find the composite cmd form of the argument in
++ # the current params and switches
++ # e.g., map_max_mismatches
++ my $pfx = $self->{_options}->{_prefixes}->{$subcmd} || $subcmd;
++ my @params = grep /^${pfx}_/, @{$$cur_options{'_params'}};
++ my @switches = grep /^${pfx}_/, @{$$cur_options{'_switches'}};
++ $ret{$subcmd} = [];
++ # create an argument list suitable for passing to new() of
++ # the subcommand factory...
++ foreach my $opt (@params, @switches) {
++ my $subopt = $opt;
++ $subopt =~ s/^${pfx}_//;
++ push(@{$ret{$subcmd}}, '-'.$subopt => $self->$opt) if defined $self->$opt;
++ }
++ }
++ return \%ret;
++}
++
++=head2 _run
++
++ Title : _run
++ Usage : $fac->_run( @file_args )
++ Function: Run a command as specified during object contruction
++ Returns : true on success
++ Args : a specification of the files to operate on according
++ to the filespec
++
++=cut
++
++sub _run {
++ my ($self, @args) = @_;
++ # _translate_params will provide an array of command/parameters/switches
++ # -- these are set at object construction
++ # to set up the run, need to add the files to the call
++ # -- provide these as arguments to this function
++ my $cmd = $self->command if $self->can('command');
++ my $opts = $self->{_options};
++ my %args;
++ $self->throw("No command specified for the object") unless $cmd;
++ # setup files necessary for this command
++ my $filespec = $opts->{'_files'}->{$cmd};
++ my @switches;
++ my ($in, $out, $err);
++ # some applications rely completely on switches
++ if (defined $filespec && @$filespec) {
++ # parse args based on filespec
++ # require named args
++ $self->throw("Named args are required") unless !(@args % 2);
++ s/^-// for @args;
++ %args = @args;
++ # validate
++ my @req = map {
++ my $s = $_;
++ $s =~ s/^-.*\|//;
++ $s =~ s/^[012]?[<>]//;
++ $s =~ s/[^a-zA-Z0-9_]//g;
++ $s
++ } grep !/[#]/, @$filespec;
++ !defined($args{$_}) && $self->throw("Required filearg '$_' not specified") for @req;
++ # set up redirects and file switches
++ for (@$filespec) {
++ m/^1?>#?(.*)/ && do {
++ defined($args{$1}) && ( open $out, '>', $args{$1} or $self->throw("Could not write file '$args{$1}': $!") );
++ next;
++ };
++ m/^2>#?(.*)/ && do {
++ defined($args{$1}) && ( open $err, '>', $args{$1} or $self->throw("Could not write file '$args{$1}': $!") );
++ next;
++ };
++ m/^<#?(.*)/ && do {
++ defined($args{$1}) && ( open $in, '<', $args{$1} or $self->throw("Could not read file '$args{$1}': $!") );
++ next;
++ };
++ if (m/^-(.*)\|/) {
++ push @switches, $self->_dash_switch($1);
++ } else {
++ push @switches, undef;
++ }
++ }
++ }
++ my $dum;
++ $in || ($in = \$dum);
++ $out || ($out = \$self->{'stdout'});
++ $err || ($err = \$self->{'stderr'});
++
++ # Get program executable
++ my $exe = $self->executable;
++ $self->throw("Can't find executable for '".($self->is_pseudo ? $self->command : $self->program_name)."'; can't continue") unless $exe;
++
++ # Get command-line options
++ my $options = $self->_translate_params();
++ # Get file specs sans redirects in correct order
++ my @specs = map {
++ my $s = $_;
++ $s =~ s/^-.*\|//;
++ $s =~ s/[^a-zA-Z0-9_]//g;
++ $s
++ } grep !/[<>]/, @$filespec;
++ my @files = @args{@specs};
++ # expand arrayrefs
++ my $l = $#files;
++
++ # Note: below code block may be brittle, see link on this:
++ # http://lists.open-bio.org/pipermail/bioperl-l/2010-June/033439.html
++
++ for (0..$l) {
++ if (ref($files[$_]) eq 'ARRAY') {
++ splice(@switches, $_, 1, ($switches[$_]) x @{$files[$_]});
++ splice(@files, $_, 1, @{$files[$_]});
++ }
++ }
++
++
++ @files = map {
++ my $s = shift @switches;
++ defined $_ ? ($s, $_): ()
++ } @files;
++ @files = map { defined $_ ? $_ : () } @files; # squish undefs
++ my @ipc_args = ( $exe, @$options, @files );
++ $self->{_last_execution} = join( $self->{'_options'}->{'_join'}, @ipc_args );
++ eval {
++ IPC::Run::run(\@ipc_args, $in, $out, $err) or
++ die ("There was a problem running $exe : ".$$err);
++ };
++
++ if ($@) {
++ $self->throw("$exe call crashed: $@") unless $self->no_throw_on_crash;
++ return 0;
++ }
++
++ return 1;
++}
++
++
++
++=head2 no_throw_on_crash()
++
++ Title : no_throw_on_crash
++ Usage :
++ Function: prevent throw on execution error
++ Returns :
++ Args : [optional] boolean
++
++=cut
++
++sub no_throw_on_crash {
++ my $self = shift;
++ return $self->{'_no_throw'} = shift if @_;
++ return $self->{'_no_throw'};
++}
++
++=head2 last_execution()
++
++ Title : last_execution
++ Usage :
++ Function: return the last executed command with options
++ Returns : string of command line sent to IPC::Run
++ Args :
++
++=cut
++
++sub last_execution {
++ my $self = shift;
++ return $self->{'_last_execution'};
++}
++
++=head2 _dash_switch()
++
++ Title : _dash_switch
++ Usage : $version = $fac->_dash_switch( $switch )
++ Function: Returns an appropriately dashed switch for the executable
++ Args : A string containing a switch without dashes
++ Returns : string containing an appropriately dashed switch for the current executable
++
++=cut
++
++sub _dash_switch {
++ my ($self, $switch) = @_;
++
++ my $dash = $self->{'_options'}->{'_dash'};
++ for ($dash) {
++ $_ eq '1' && do {
++ $switch = '-'.$switch;
++ last;
++ };
++ /^s/ && do { #single dash only
++ $switch = '-'.$switch;
++ last;
++ };
++ /^d/ && do { # double dash only
++ $switch = '--'.$switch;
++ last;
++ };
++ /^m/ && do { # mixed dash: one-letter opts get -,
++ $switch = '-'.$switch;
++ $switch =~ s/^(-[a-z0-9](?:\w+))$/-$1/i;
++ last;
++ };
++ do {
++ $self->warn( "Dash spec '$dash' not recognized; using 'single'" );
++ $switch = '-'.$switch;
++ };
++ }
++
++ return $switch;
++}
++
++=head2 stdout()
++
++ Title : stdout
++ Usage : $fac->stdout()
++ Function: store the output from STDOUT for the run,
++ if no file specified in _run arguments
++ Example :
++ Returns : scalar string
++ Args : on set, new value (a scalar or undef, optional)
++
++=cut
++
++sub stdout {
++ my $self = shift;
++ return $self->{'stdout'} = shift if @_;
++ return $self->{'stdout'};
++}
++
++=head2 stderr()
++
++ Title : stderr
++ Usage : $fac->stderr()
++ Function: store the output from STDERR for the run,
++ if no file is specified in _run arguments
++ Example :
++ Returns : scalar string
++ Args : on set, new value (a scalar or undef, optional)
++
++=cut
++
++sub stderr {
++ my $self = shift;
++ return $self->{'stderr'} = shift if @_;
++ return $self->{'stderr'};
++}
++
++=head2 is_pseudo()
++
++ Title : is_pseudo
++ Usage : $obj->is_pseudo($newval)
++ Function: returns true if this factory represents
++ a pseudo-program
++ Example :
++ Returns : value of is_pseudo (boolean)
++ Args : on set, new value (a scalar or undef, optional)
++
++=cut
++
++sub is_pseudo {
++ my $self = shift;
++
++ return $self->{'is_pseudo'} = shift if @_;
++ return $self->{'is_pseudo'};
++}
++
++=head2 AUTOLOAD
++
++AUTOLOAD permits
++
++ $class->new_yourcommand(@args);
++
++as an alias for
++
++ $class->new( -command => 'yourcommand', @args );
++
++=cut
++
++sub AUTOLOAD {
++ my $class = shift;
++ my $tok = $AUTOLOAD;
++ my @args = @_;
++ $tok =~ s/.*:://;
++ unless ($tok =~ /^new_/) {
++ $class->throw("Can't locate object method '$tok' via package '".ref($class)?ref($class):$class);
++ }
++ my ($cmd) = $tok =~ m/new_(.*)/;
++ return $class->new( -command => $cmd, @args );
++}
++
++=head1 Bio:ParameterBaseI compliance
++
++=head2 set_parameters()
++
++ Title : set_parameters
++ Usage : $pobj->set_parameters(%params);
++ Function: sets the parameters listed in the hash or array
++ Returns : true on success
++ Args : [optional] hash or array of parameter/values.
++
++=cut
++
++sub set_parameters {
++ my ($self, @args) = @_;
++
++ # currently stored stuff
++ my $opts = $self->{'_options'};
++ my $params = $opts->{'_params'};
++ my $switches = $opts->{'_switches'};
++ my $translation = $opts->{'_translation'};
++ my $use_dash = $opts->{'_dash'};
++ my $join = $opts->{'_join'};
++ unless (($self->can('command') && $self->command)
++ || (grep /command/, @args)) {
++ push @args, '-command', 'run';
++ }
++ my %args = @args;
++ my $cmd = $args{'-command'} || $args{'command'} || ($self->can('command') && $self->command);
++ if ($cmd) {
++ my (@p, at s, %x);
++ $self->warn('Command present, but no commands registered') unless $self->{'_options'}->{'_commands'};
++ $self->throw("Command '$cmd' not registered") unless grep /^$cmd$/, @{$self->{'_options'}->{'_commands'}};
++ $cmd = $self->{_options}->{_prefixes}->{$cmd} || $cmd;
++
++ @p = (grep(!/^.*?\|/, @$params), grep(/^${cmd}\|/, @$params));
++ @s = (grep(!/^.*?\|/, @$switches), grep(/^${cmd}\|/, @$switches));
++ s/.*?\|// for @p;
++ s/.*?\|// for @s;
++ @x{@p, @s} = @{$translation}{
++ grep( !/^.*?\|/, @$params, @$switches),
++ grep(/^${cmd}\|/, @$params, @$switches) };
++ $opts->{_translation} = $translation = \%x;
++ $opts->{_params} = $params = \@p;
++ $opts->{_switches} = $switches = \@s;
++ }
++ $self->_set_from_args(
++ \@args,
++ -methods => [ @$params, @$switches, 'program_name', 'program_dir', 'out_type' ],
++ -create => 1,
++ # when our parms are accessed, signal parameters are unchanged for
++ # future reads (until set_parameters is called)
++ -code =>
++ ' my $self = shift;
++ $self->parameters_changed(0);
++ return $self->{\'_\'.$method} = shift if @_;
++ return $self->{\'_\'.$method};'
++ );
++ # the question is, are previously-set parameters left alone when
++ # not specified in @args?
++ $self->parameters_changed(1);
++ return 1;
++}
++
++=head2 reset_parameters()
++
++ Title : reset_parameters
++ Usage : resets values
++ Function: resets parameters to either undef or value in passed hash
++ Returns : none
++ Args : [optional] hash of parameter-value pairs
++
++=cut
++
++sub reset_parameters {
++ my ($self, @args) = @_;
++
++ my @reset_args;
++ # currently stored stuff
++ my $opts = $self->{'_options'};
++ my $params = $opts->{'_params'};
++ my $switches = $opts->{'_switches'};
++ my $translation = $opts->{'_translation'};
++ my $qual_param = $opts->{'_qual_param'};
++ my $use_dash = $opts->{'_dash'};
++ my $join = $opts->{'_join'};
++
++ # handle command name
++ my %args = @args;
++ my $cmd = $args{'-command'} || $args{'command'} || $self->command;
++ $args{'command'} = $cmd;
++ delete $args{'-command'};
++ @args = %args;
++ # don't like this, b/c _set_program_args will create a bunch of
++ # accessors with undef values, but oh well for now /maj
++
++ for my $p (@$params) {
++ push(@reset_args, $p => undef) unless grep /^[-]?$p$/, @args;
++ }
++ for my $s (@$switches) {
++ push(@reset_args, $s => undef) unless grep /^[-]?$s$/, @args;
++ }
++ push @args, @reset_args;
++ $self->set_parameters(@args);
++ $self->parameters_changed(1);
++}
++
++=head2 parameters_changed()
++
++ Title : parameters_changed
++ Usage : if ($pobj->parameters_changed) {...}
++ Function: Returns boolean true (1) if parameters have changed
++ Returns : Boolean (0 or 1)
++ Args : [optional] Boolean
++
++=cut
++
++sub parameters_changed {
++ my $self = shift;
++ return $self->{'_parameters_changed'} = shift if @_;
++ return $self->{'_parameters_changed'};
++}
++
++=head2 available_parameters()
++
++ Title : available_parameters
++ Usage : @params = $pobj->available_parameters()
++ Function: Returns a list of the available parameters
++ Returns : Array of parameters
++ Args : 'params' for settable program parameters
++ 'switches' for boolean program switches
++ default: all
++
++=cut
++
++sub available_parameters {
++ my $self = shift;
++ my $subset = shift;
++ my $opts = $self->{'_options'};
++ my @ret;
++ for ($subset) {
++ (!defined || /^a/) && do {
++ @ret = (@{$opts->{'_params'}}, @{$opts->{'_switches'}});
++ last;
++ };
++ m/^p/i && do {
++ @ret = @{$opts->{'_params'}};
++ last;
++ };
++ m/^s/i && do {
++ @ret = @{$opts->{'_switches'}};
++ last;
++ };
++ m/^c/i && do {
++ @ret = @{$opts->{'_commands'}};
++ last;
++ };
++ m/^f/i && do { # get file spec
++ return @{$opts->{'_files'}->{$self->command}};
++ };
++ do { #fail
++ $self->throw("available_parameters: unrecognized subset");
++ };
++ }
++ return @ret;
++}
++
++sub available_commands { shift->available_parameters('commands') }
++sub filespec { shift->available_parameters('filespec') }
++
++=head2 get_parameters()
++
++ Title : get_parameters
++ Usage : %params = $pobj->get_parameters;
++ Function: Returns list of key-value pairs of parameter => value
++ Returns : List of key-value pairs
++ Args : [optional] A string is allowed if subsets are wanted or (if a
++ parameter subset is default) 'all' to return all parameters
++
++=cut
++
++sub get_parameters {
++ my $self = shift;
++ my $subset = shift;
++ $subset ||= 'all';
++ my @ret;
++ my $opts = $self->{'_options'};
++ for ($subset) {
++ m/^p/i && do { #params only
++ for (@{$opts->{'_params'}}) {
++ push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_;
++ }
++ last;
++ };
++ m/^s/i && do { #switches only
++ for (@{$opts->{'_switches'}}) {
++ push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_;
++ }
++ last;
++ };
++ m/^a/i && do { # all
++ for ((@{$opts->{'_params'}},@{$opts->{'_switches'}})) {
++ push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_;
++ }
++ last;
++ };
++ do {
++ $self->throw("get_parameters: unrecognized subset");
++ };
++ }
++ return @ret;
++}
++
++1;
Modified: trunk/packages/bioperl-run/trunk/debian/patches/series
===================================================================
--- trunk/packages/bioperl-run/trunk/debian/patches/series 2016-12-16 09:58:28 UTC (rev 23293)
+++ trunk/packages/bioperl-run/trunk/debian/patches/series 2016-12-16 12:48:06 UTC (rev 23294)
@@ -1,3 +1,4 @@
install-scripts.patch
Use-system-s-Perl.patch
Some-spellchecking.patch
+move-StandAloneBlast-and-WrapperBase-from-root-Bio.-.patch
More information about the debian-med-commit
mailing list