[med-svn] [transdecoder] 01/01: New upstream version 3.0.1+dfsg

Thu Nov 10 11:22:02 UTC 2016

This is an automated email from the git hooks/post-receive script.

misterc-guest pushed a commit to annotated tag upstream/3.0.1+dfsg
in repository transdecoder.

commit 62d4d5ae335e1c84f93fc5b0723830d4ea672e67
Author: Michael R. Crusoe <crusoe at ucdavis.edu>
Date:   Thu Nov 10 03:14:53 2016 -0800

    New upstream version 3.0.1+dfsg
---
 TransDecoder.Predict          |  64 +++++++++++++++++++-----
 util/gff3_file_to_proteins.pl | 112 ++++++++++++++++++++----------------------
 2 files changed, 105 insertions(+), 71 deletions(-)

diff --git a/TransDecoder.Predict b/TransDecoder.Predict
index 838aea4..db9e6a5 100755
--- a/TransDecoder.Predict
+++ b/TransDecoder.Predict
@@ -26,7 +26,9 @@ Common options:
  --single_best_orf                      Retain only the single best ORF per transcript.
                                         (Best is defined as having (optionally pfam and/or blast support) and longest orf)
 
- --cpu <int>                            Use multipe cores for cd-hit-est. (default=1)
+ --cpu <int>                            Use multiple cores for cd-hit-est. (default=1)
+
+ -G <string>                            genetic code (default: universal; see PerlDoc; options: Euplotes, Tetrahymena, Candida, Acetabularia, ...)
 
 
 Advanced options
@@ -40,6 +42,32 @@ Advanced options
 
 =cut
 
+=head1 Genetic Codes
+
+See L<http://golgi.harvard.edu/biolinks/gencode.html>. These are currently supported:
+
+ universal (default)
+ Euplotes
+ Tetrahymena
+ Candida
+ Acetabularia
+ Mitochondrial-Canonical
+ Mitochondrial-Vertebrates
+ Mitochondrial-Arthropods
+ Mitochondrial-Echinoderms
+ Mitochondrial-Molluscs
+ Mitochondrial-Ascidians
+ Mitochondrial-Nematodes
+ Mitochondrial-Platyhelminths
+ Mitochondrial-Yeasts
+ Mitochondrial-Euascomycetes
+ Mitochondrial-Protozoans
+
+
+=cut
+    
+
+
 
 use strict;
 use warnings;
@@ -83,6 +111,7 @@ my $retain_blastp_hits_file;
 my $cpu = 1;
 my $MPI_DEBUG = 1;
 my $single_best_orf_flag = 0;
+my $genetic_code = "";
 
 &GetOptions( 't=s' => \$transcripts_file,
              'train:s' => \$train_file,
@@ -104,6 +133,9 @@ my $single_best_orf_flag = 0;
              'cpu=i' => \$cpu,
 
              'single_best_orf' => \$single_best_orf_flag,
+
+             'G=s' => \$genetic_code,
+
              
              );
 
@@ -120,6 +152,11 @@ our $SEE = $verbose;
 
 pod2usage(-verbose => 2, -output => \*STDERR, -message => "No transcript file (-t)\n") unless ($transcripts_file && -s $transcripts_file);
 
+
+if ($genetic_code) {
+    $genetic_code = " --genetic_code $genetic_code";
+}
+
 main: {
     my $workdir = basename($transcripts_file) . ".transdecoder_dir"; 
     
@@ -310,7 +347,7 @@ main: {
         # make a peptide file:
         my $best_pep_file = $gff3_file;
         $best_pep_file =~ s/\.gff3$/\.pep/;
-        $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file > $best_pep_file";
+        $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl --gff3 $gff3_file --fasta $transcripts_file $genetic_code > $best_pep_file";
         &process_cmd($cmd);
         
         
@@ -318,7 +355,7 @@ main: {
         # make a CDS file:
         my $best_cds_file = $best_pep_file;
         $best_cds_file =~ s/\.pep$/\.cds/;
-        $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file CDS > $best_cds_file";
+        $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl --gff3 $gff3_file --fasta $transcripts_file --seqType CDS $genetic_code > $best_cds_file";
         &process_cmd($cmd);
         
     }
@@ -401,14 +438,15 @@ sub parse_blastp_hits_file {
 
 
 sub check_program() {
- my @paths;
- foreach my $prog (@_) {
-  my $path = `which $prog`;
-  die "Error, path to a required program ($prog) cannot be found\n\n"
-    unless $path =~ /^\//;
-  chomp($path);
-  $path = readlink($path) if -l $path;
-  push( @paths, $path );
- }
- return @paths;
+    my @paths;
+    foreach my $prog (@_) {
+        my $path = `which $prog`;
+        unless ($path =~ /\w/) {
+            die "Error, path to a required program ($prog) cannot be found\n\n"
+        }
+        chomp($path);
+        $path = readlink($path) if -l $path;
+        push( @paths, $path );
+    }
+    return @paths;
 }
diff --git a/util/gff3_file_to_proteins.pl b/util/gff3_file_to_proteins.pl
index cf83bdf..94806f1 100755
--- a/util/gff3_file_to_proteins.pl
+++ b/util/gff3_file_to_proteins.pl
@@ -9,35 +9,63 @@ use Fasta_reader;
 use GFF3_utils;
 use Carp;
 use Nuc_translator;
-
-my $usage = "\n\nusage: $0 gff3_file genome_db [prot|CDS|cDNA|gene,default=prot] [flank=0]\n\n";
-
-my $gff3_file = $ARGV[0] or die $usage;
-my $fasta_db = $ARGV[1] or die $usage;
-my $seq_type = $ARGV[2] || "prot";
-my $flank = $ARGV[3] || 0;
-
-my ($upstream_flank, $downstream_flank) = (0,0);
-
-if ($flank) {
-	if ($flank =~ /:/) {
-		($upstream_flank, $downstream_flank) = split (/:/, $flank);
-	}
-	else {
-		($upstream_flank, $downstream_flank) = ($flank, $flank);
-	}
+use Getopt::Long qw(:config posix_default no_ignore_case bundling pass_through);
+
+
+my $usage = <<__EOUSAGE__;
+
+####################################################
+#
+# Required:
+#
+#  --gff3 <string>          gff3 file
+#
+#  --fasta <string>         fasta file corresponding to gff3 file
+#
+##
+#  Optional:
+#
+#  --seqType <string>        prot|CDS|cDNA|gene,  default=prot
+#
+#  --genetic_code  <string>   universal (default)
+#                             Euplotes, Tetrahymena, Candida
+#                             Acetabularia, Mitochondrial-Canonical
+#                             Mitochondrial-Vertebrates, Mitochondrial-Arthropods
+#                             Mitochondrial-Echinoderms, Mitochondrial-Molluscs
+#                             Mitochondrial-Ascidians, Mitochondrial-Nematodes
+#                             Mitochondrial-Platyhelminths,Mitochondrial-Yeasts
+#                             Mitochondrial-Euascomycetes, Mitochondrial-Protozoans
+#
+###################################################
+
+
+__EOUSAGE__
+
+    ;
+
+
+my $gff3_file;
+my $fasta_db;
+my $seq_type = 'prot';
+my $genetic_code = '';
+
+&GetOptions ( 'gff3=s' => \$gff3_file,
+              'fasta=s' => \$fasta_db,
+              'seqType=s' => \$seq_type,
+              'genetic_code=s' => \$genetic_code,
+    );
+
+unless ($gff3_file && $fasta_db) {
+    die $usage;
 }
 
-if ($upstream_flank < 0 || $downstream_flank < 0) {
-	die $usage;
-}
-
-
-
 unless ($seq_type =~ /^(prot|CDS|cDNA|gene)$/) {
     die "Error, don't understand sequence type [$seq_type]\n\n$usage";
 }
 
+if ($genetic_code) {
+    &Nuc_translator::use_specified_genetic_code($genetic_code);
+}
 
 ## read genome
 my $fasta_reader = new Fasta_reader($fasta_db);
@@ -83,22 +111,13 @@ foreach my $asmbl_id (sort keys %$contig_to_gene_list_href) {
             }
             elsif ($seq_type eq "CDS") {
                 $seq = $isoform->get_CDS_sequence();
-				if ($upstream_flank || $downstream_flank) {
-					$seq = &add_flank($seq, $upstream_flank, $downstream_flank, $model_lend, $model_rend, $orientation, \$genome_seq);
-				}
-			}
+            }
             elsif ($seq_type eq "cDNA") {
                 $seq = $isoform->get_cDNA_sequence();
-				if ($upstream_flank || $downstream_flank) {
-					$seq = &add_flank($seq, $upstream_flank, $downstream_flank, $gene_lend, $gene_rend, $orientation, \$genome_seq);
-				}
-			}
+            }
             elsif ($seq_type eq "gene" && $counter == 1) {
                 $seq = $isoform->get_gene_sequence();
-				if ($upstream_flank || $downstream_flank) {
-					$seq = &add_flank($seq, $upstream_flank, $downstream_flank, $gene_lend, $gene_rend, $orientation, \$genome_seq);
-				}
-			}
+            }
             
             unless ($seq) {
                 print STDERR "-warning, no $seq_type sequence for $isoform_id\n";
@@ -139,26 +158,3 @@ foreach my $asmbl_id (sort keys %$contig_to_gene_list_href) {
 exit(0);
 
 
-####
-sub add_flank {
-	my ($seq, $upstream_flank, $downstream_flank, $lend, $rend, $orientation, $genome_seq_ref) = @_;
-	
-	my $far_left = ($orientation eq '+') ? $lend - $upstream_flank : $lend - $downstream_flank;
-	
-	if ($far_left < 1) { $far_left = 1; }
-	
-	my $flank_right = ($orientation eq '+') ? $downstream_flank : $upstream_flank;
-
-	my $left_seq = substr($$genome_seq_ref, $far_left - 1, $lend - $far_left);
-
-	my $right_seq = substr($$genome_seq_ref, $rend, $flank_right);
-	
-	if ($orientation eq '+') {
-		return (lc($left_seq) . uc($seq) . lc($right_seq));
-	}
-	else {
-		return (lc(&reverse_complement($right_seq)) . uc($seq) . lc(&reverse_complement($left_seq)));
-	}
-}
-
-

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/transdecoder.git