[med-svn] [transdecoder] 01/01: New upstream version 3.0.1+dfsg
Michael Crusoe
misterc-guest at moszumanska.debian.org
Thu Nov 10 11:22:02 UTC 2016
This is an automated email from the git hooks/post-receive script.
misterc-guest pushed a commit to annotated tag upstream/3.0.1+dfsg
in repository transdecoder.
commit 62d4d5ae335e1c84f93fc5b0723830d4ea672e67
Author: Michael R. Crusoe <crusoe at ucdavis.edu>
Date: Thu Nov 10 03:14:53 2016 -0800
New upstream version 3.0.1+dfsg
---
TransDecoder.Predict | 64 +++++++++++++++++++-----
util/gff3_file_to_proteins.pl | 112 ++++++++++++++++++++----------------------
2 files changed, 105 insertions(+), 71 deletions(-)
diff --git a/TransDecoder.Predict b/TransDecoder.Predict
index 838aea4..db9e6a5 100755
--- a/TransDecoder.Predict
+++ b/TransDecoder.Predict
@@ -26,7 +26,9 @@ Common options:
--single_best_orf Retain only the single best ORF per transcript.
(Best is defined as having (optionally pfam and/or blast support) and longest orf)
- --cpu <int> Use multipe cores for cd-hit-est. (default=1)
+ --cpu <int> Use multiple cores for cd-hit-est. (default=1)
+
+ -G <string> genetic code (default: universal; see PerlDoc; options: Euplotes, Tetrahymena, Candida, Acetabularia, ...)
Advanced options
@@ -40,6 +42,32 @@ Advanced options
=cut
+=head1 Genetic Codes
+
+See L<http://golgi.harvard.edu/biolinks/gencode.html>. These are currently supported:
+
+ universal (default)
+ Euplotes
+ Tetrahymena
+ Candida
+ Acetabularia
+ Mitochondrial-Canonical
+ Mitochondrial-Vertebrates
+ Mitochondrial-Arthropods
+ Mitochondrial-Echinoderms
+ Mitochondrial-Molluscs
+ Mitochondrial-Ascidians
+ Mitochondrial-Nematodes
+ Mitochondrial-Platyhelminths
+ Mitochondrial-Yeasts
+ Mitochondrial-Euascomycetes
+ Mitochondrial-Protozoans
+
+
+=cut
+
+
+
use strict;
use warnings;
@@ -83,6 +111,7 @@ my $retain_blastp_hits_file;
my $cpu = 1;
my $MPI_DEBUG = 1;
my $single_best_orf_flag = 0;
+my $genetic_code = "";
&GetOptions( 't=s' => \$transcripts_file,
'train:s' => \$train_file,
@@ -104,6 +133,9 @@ my $single_best_orf_flag = 0;
'cpu=i' => \$cpu,
'single_best_orf' => \$single_best_orf_flag,
+
+ 'G=s' => \$genetic_code,
+
);
@@ -120,6 +152,11 @@ our $SEE = $verbose;
pod2usage(-verbose => 2, -output => \*STDERR, -message => "No transcript file (-t)\n") unless ($transcripts_file && -s $transcripts_file);
+
+if ($genetic_code) {
+ $genetic_code = " --genetic_code $genetic_code";
+}
+
main: {
my $workdir = basename($transcripts_file) . ".transdecoder_dir";
@@ -310,7 +347,7 @@ main: {
# make a peptide file:
my $best_pep_file = $gff3_file;
$best_pep_file =~ s/\.gff3$/\.pep/;
- $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file > $best_pep_file";
+ $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl --gff3 $gff3_file --fasta $transcripts_file $genetic_code > $best_pep_file";
&process_cmd($cmd);
@@ -318,7 +355,7 @@ main: {
# make a CDS file:
my $best_cds_file = $best_pep_file;
$best_cds_file =~ s/\.pep$/\.cds/;
- $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file CDS > $best_cds_file";
+ $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl --gff3 $gff3_file --fasta $transcripts_file --seqType CDS $genetic_code > $best_cds_file";
&process_cmd($cmd);
}
@@ -401,14 +438,15 @@ sub parse_blastp_hits_file {
sub check_program() {
- my @paths;
- foreach my $prog (@_) {
- my $path = `which $prog`;
- die "Error, path to a required program ($prog) cannot be found\n\n"
- unless $path =~ /^\//;
- chomp($path);
- $path = readlink($path) if -l $path;
- push( @paths, $path );
- }
- return @paths;
+ my @paths;
+ foreach my $prog (@_) {
+ my $path = `which $prog`;
+ unless ($path =~ /\w/) {
+ die "Error, path to a required program ($prog) cannot be found\n\n"
+ }
+ chomp($path);
+ $path = readlink($path) if -l $path;
+ push( @paths, $path );
+ }
+ return @paths;
}
diff --git a/util/gff3_file_to_proteins.pl b/util/gff3_file_to_proteins.pl
index cf83bdf..94806f1 100755
--- a/util/gff3_file_to_proteins.pl
+++ b/util/gff3_file_to_proteins.pl
@@ -9,35 +9,63 @@ use Fasta_reader;
use GFF3_utils;
use Carp;
use Nuc_translator;
-
-my $usage = "\n\nusage: $0 gff3_file genome_db [prot|CDS|cDNA|gene,default=prot] [flank=0]\n\n";
-
-my $gff3_file = $ARGV[0] or die $usage;
-my $fasta_db = $ARGV[1] or die $usage;
-my $seq_type = $ARGV[2] || "prot";
-my $flank = $ARGV[3] || 0;
-
-my ($upstream_flank, $downstream_flank) = (0,0);
-
-if ($flank) {
- if ($flank =~ /:/) {
- ($upstream_flank, $downstream_flank) = split (/:/, $flank);
- }
- else {
- ($upstream_flank, $downstream_flank) = ($flank, $flank);
- }
+use Getopt::Long qw(:config posix_default no_ignore_case bundling pass_through);
+
+
+my $usage = <<__EOUSAGE__;
+
+####################################################
+#
+# Required:
+#
+# --gff3 <string> gff3 file
+#
+# --fasta <string> fasta file corresponding to gff3 file
+#
+##
+# Optional:
+#
+# --seqType <string> prot|CDS|cDNA|gene, default=prot
+#
+# --genetic_code <string> universal (default)
+# Euplotes, Tetrahymena, Candida
+# Acetabularia, Mitochondrial-Canonical
+# Mitochondrial-Vertebrates, Mitochondrial-Arthropods
+# Mitochondrial-Echinoderms, Mitochondrial-Molluscs
+# Mitochondrial-Ascidians, Mitochondrial-Nematodes
+# Mitochondrial-Platyhelminths,Mitochondrial-Yeasts
+# Mitochondrial-Euascomycetes, Mitochondrial-Protozoans
+#
+###################################################
+
+
+__EOUSAGE__
+
+ ;
+
+
+my $gff3_file;
+my $fasta_db;
+my $seq_type = 'prot';
+my $genetic_code = '';
+
+&GetOptions ( 'gff3=s' => \$gff3_file,
+ 'fasta=s' => \$fasta_db,
+ 'seqType=s' => \$seq_type,
+ 'genetic_code=s' => \$genetic_code,
+ );
+
+unless ($gff3_file && $fasta_db) {
+ die $usage;
}
-if ($upstream_flank < 0 || $downstream_flank < 0) {
- die $usage;
-}
-
-
-
unless ($seq_type =~ /^(prot|CDS|cDNA|gene)$/) {
die "Error, don't understand sequence type [$seq_type]\n\n$usage";
}
+if ($genetic_code) {
+ &Nuc_translator::use_specified_genetic_code($genetic_code);
+}
## read genome
my $fasta_reader = new Fasta_reader($fasta_db);
@@ -83,22 +111,13 @@ foreach my $asmbl_id (sort keys %$contig_to_gene_list_href) {
}
elsif ($seq_type eq "CDS") {
$seq = $isoform->get_CDS_sequence();
- if ($upstream_flank || $downstream_flank) {
- $seq = &add_flank($seq, $upstream_flank, $downstream_flank, $model_lend, $model_rend, $orientation, \$genome_seq);
- }
- }
+ }
elsif ($seq_type eq "cDNA") {
$seq = $isoform->get_cDNA_sequence();
- if ($upstream_flank || $downstream_flank) {
- $seq = &add_flank($seq, $upstream_flank, $downstream_flank, $gene_lend, $gene_rend, $orientation, \$genome_seq);
- }
- }
+ }
elsif ($seq_type eq "gene" && $counter == 1) {
$seq = $isoform->get_gene_sequence();
- if ($upstream_flank || $downstream_flank) {
- $seq = &add_flank($seq, $upstream_flank, $downstream_flank, $gene_lend, $gene_rend, $orientation, \$genome_seq);
- }
- }
+ }
unless ($seq) {
print STDERR "-warning, no $seq_type sequence for $isoform_id\n";
@@ -139,26 +158,3 @@ foreach my $asmbl_id (sort keys %$contig_to_gene_list_href) {
exit(0);
-####
-sub add_flank {
- my ($seq, $upstream_flank, $downstream_flank, $lend, $rend, $orientation, $genome_seq_ref) = @_;
-
- my $far_left = ($orientation eq '+') ? $lend - $upstream_flank : $lend - $downstream_flank;
-
- if ($far_left < 1) { $far_left = 1; }
-
- my $flank_right = ($orientation eq '+') ? $downstream_flank : $upstream_flank;
-
- my $left_seq = substr($$genome_seq_ref, $far_left - 1, $lend - $far_left);
-
- my $right_seq = substr($$genome_seq_ref, $rend, $flank_right);
-
- if ($orientation eq '+') {
- return (lc($left_seq) . uc($seq) . lc($right_seq));
- }
- else {
- return (lc(&reverse_complement($right_seq)) . uc($seq) . lc(&reverse_complement($left_seq)));
- }
-}
-
-
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/transdecoder.git
More information about the debian-med-commit
mailing list