[med-svn] [Git][med-team/transdecoder][upstream] New upstream version 6.0.0

Michael R. Crusoe (@crusoe) gitlab at salsa.debian.org
Fri May 1 17:51:15 BST 2026



Michael R. Crusoe pushed to branch upstream at Debian Med / transdecoder


Commits:
ba94bab7 by Michael R. Crusoe at 2026-05-01T13:11:52+02:00
New upstream version 6.0.0
- - - - -


25 changed files:

- Changelog.txt
- Docker/Dockerfile
- Docker/VERSION.txt
- Docker/make_simg.sh
- PerlLib/DelimParser.pm
- PerlLib/GFF3_utils2.pm
- PerlLib/GTF.pm
- PerlLib/GTF_utils2.pm
- PerlLib/PWM.pm
- PerlLib/Pipeliner.pm
- README.md
- + TransDecoder
- sample_data/cufflinks_example/runMe.sh
- sample_data/pasa_example/runMe.sh
- sample_data/simple_transcriptome_target/runMe.Docker.sh
- sample_data/simple_transcriptome_target/runMe.sh
- sample_data/stringtie_example/runMe.sh
- sample_data/supertranscripts_example/runMe.sh
- TransDecoder.LongOrfs → util/TransDecoder.LongOrfs
- TransDecoder.Predict → util/TransDecoder.Predict
- util/gff3_file_to_bed.pl
- util/gtf_genome_to_cdna_fasta.pl
- util/misc/get_FP_FN_scores.py
- util/misc/plot_indiv_seq_likelihood_profile.py
- util/misc/select_TD_orfs.py


Changes:

=====================================
Changelog.txt
=====================================
@@ -1,4 +1,16 @@
-## v5.7.0
+## v6.0.0 Mar 20, 2026
+- added `TransDecoder` full-pipeline wrapper as the primary entrypoint for transcript FASTA and genome+GTF workflows
+- phase-specific executables are now provided under `util/TransDecoder.LongOrfs` and `util/TransDecoder.Predict`
+- wrapper now supports automated protein homology searches using NCBI BLAST or DIAMOND via `--blast_search_pep`
+- wrapper now supports integrated Pfam searching via `--pfam-search-db`
+
+
+    
+## v5.7.1 July 16, 2023
+- Minor updates to better support Docker-based execution.
+- Output directory (via --output_dir | -O) can be set by user, but the working directory name will be fixed within the output directory and based on the name of the target transcriptome.
+
+## v5.7.0 Jan 27, 2023
 - compatible with hmmsearch or hmmscan output
 - cleaner organization of outputs and checkpoints
 - TransDecoder.LongOrfs includes option for --complete_orfs_only (as requested)
@@ -127,4 +139,3 @@ More useful logging information is provided to it's clearer as to how many orfs
 
 -added 'make simple' to build just the essential components involving parafly and cdhit
 -removed the 'cds.' prefix from the pep and cds sequence accessions.
-


=====================================
Docker/Dockerfile
=====================================
@@ -61,7 +61,6 @@ RUN apt-get -qq update && apt-get -qq -y install \
 
 ## Perl stuff
 RUN curl -L https://cpanmin.us | perl - App::cpanminus
-RUN cpanm install DB_File
 RUN cpanm install URI::Escape
 
 #~~~~~~~~~~~
@@ -94,16 +93,13 @@ RUN wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLASTPLUS_VERSION
 # Hmmer
 
 WORKDIR $SRC
-RUN wget http://eddylab.org/software/hmmer/hmmer.tar.gz && \
-    tar xvf hmmer.tar.gz && \
+RUN wget -q http://eddylab.org/software/hmmer/hmmer-3.3.2.tar.gz && \
+    tar xvf hmmer-3.3.2.tar.gz && \
     cd hmmer-3.3.2 && \
     ./configure && \
     make && make install
 
 
-
-
-
 # Trinotate
 WORKDIR $SRC
 ENV TRANSDECODER_CO=817d772325c8a354aec6b94ac58993b994ac7392
@@ -114,4 +110,3 @@ RUN git clone https://github.com/TransDecoder/TransDecoder.git && \
    mv * $BIN
 
 
-


=====================================
Docker/VERSION.txt
=====================================
@@ -1 +1 @@
-5.7.1
+6.0.0


=====================================
Docker/make_simg.sh
=====================================
@@ -4,7 +4,6 @@ VERSION=`cat VERSION.txt`
 
 singularity build transdecoder.v${VERSION}.simg docker://trinityrnaseq/transdecoder:$VERSION
 
-singularity exec -e transdecoder.v${VERSION}.simg TransDecoder.LongOrfs
+singularity exec -e transdecoder.v${VERSION}.simg util/TransDecoder.LongOrfs
 
 ln -sf  transdecoder.v${VERSION}.simg  transdecoder.simg
-


=====================================
PerlLib/DelimParser.pm
=====================================


=====================================
PerlLib/GFF3_utils2.pm
=====================================
@@ -241,7 +241,7 @@ sub index_GFF3_gene_objs {
             push (@$gene_list_aref, $gene_id);
         }
     }
-    print STDERR "\n";
+    #print STDERR "\n";
     return (\%asmbl_id_to_gene_id_list);
 }
 


=====================================
PerlLib/GTF.pm
=====================================


=====================================
PerlLib/GTF_utils2.pm
=====================================


=====================================
PerlLib/PWM.pm
=====================================


=====================================
PerlLib/Pipeliner.pm
=====================================


=====================================
README.md
=====================================
@@ -2,4 +2,6 @@
 
 Visit the project [wiki](https://github.com/TransDecoder/TransDecoder/wiki) for all TransDecoder documentation.
 
+Primary entrypoint: `./TransDecoder`
 
+Phase-specific utilities are available under `./util/TransDecoder.LongOrfs` and `./util/TransDecoder.Predict`.


=====================================
TransDecoder
=====================================
@@ -0,0 +1,310 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use FindBin;
+use Getopt::Long qw(:config posix_default no_ignore_case bundling pass_through);
+use File::Basename;
+use Cwd;
+
+use lib ("$FindBin::RealBin/PerlLib");
+use Pipeliner;
+
+my $VERSION = "6.0.0";
+
+my $UTIL_DIR  = "$FindBin::RealBin/util";
+my $BIN_DIR   = "$FindBin::RealBin";
+$ENV{PATH} = "$UTIL_DIR/bin:$ENV{PATH}";
+
+my $usage = <<__EOUSAGE__;
+
+##############################################################################################
+#
+#  TransDecoder - full pipeline wrapper
+#
+#  Runs: LongOrfs -> [optional homology search] -> Predict -> [optional genome propagation]
+#
+#  Input (choose one):
+#
+#    -t|--transcripts <string>          Transcripts FASTA file
+#
+#    --genome <string>                  Genome FASTA file  \\  use together to extract
+#    --gtf    <string>                  Annotation GTF file /  cDNA sequences via
+#                                       gtf_genome_to_cdna_fasta.pl, then propagate
+#                                       final ORFs back to genome coordinates.
+#
+#  LongOrfs options:
+#
+#    -m <int>                           Minimum protein length (default: 100)
+#    -S                                 Strand-specific (top strand only)
+#    -G|--genetic_code <string>         Genetic code (default: universal)
+#    --gene_trans_map <string>          Gene-to-transcript map (tab-delimited)
+#    --complete_orfs_only               Only report complete ORFs
+#
+#  Homology search options:
+#
+#    --blast_search_pep <string>        Protein FASTA to search against; triggers
+#                                       homology search (DB built automatically)
+#    --blast_tool <string>              'diamond' or 'blastp' (default: diamond)
+#    --blast_evalue <float>             E-value cutoff (default: 1e-5)
+#    --blast_threads <int>              Threads for homology search (default: 1)
+#
+#  Predict options:
+#
+#    -T <int>                           Top ORFs for Markov model training (default: 500)
+#    --retain_long_orfs_mode <string>   'dynamic' or 'strict' (default: dynamic)
+#    --retain_long_orfs_length <int>    Min length to auto-retain under strict mode
+#    --pfam-search-db <string>          Pfam HMM database to search with hmmsearch;
+#                                       hmmpress is run automatically if needed
+#    --single_best_only                 Retain only single best ORF per transcript
+#    --no_refine_starts                 Skip start codon refinement
+#
+#  Other:
+#
+#    -O|--output_dir <string>           Output directory (default: current directory)
+#    -v|--verbose                       Verbose output
+#    --version                          Show version and exit
+#
+##############################################################################################
+
+__EOUSAGE__
+    ;
+
+
+# ── option variables ──────────────────────────────────────────────────────────
+
+my $transcripts_file;
+my $genome_file;
+my $gtf_file;
+
+# longorfs
+my $min_prot_length    = 100;
+my $strand_specific    = 0;
+my $genetic_code       = 'universal';
+my $gene_trans_map;
+my $complete_orfs_only = 0;
+
+# blast
+my $blast_search_pep;
+my $blast_tool         = 'diamond';
+my $blast_evalue       = 1e-5;
+my $blast_threads      = 1;
+
+# predict
+my $top_orfs_train           = 500;
+my $retain_long_orfs_mode    = 'dynamic';
+my $retain_long_orfs_length  = 1000000;
+my $pfam_search_db;
+my $single_best_only         = 0;
+my $no_refine_starts         = 0;
+
+# general
+my $output_dir = &Pipeliner::ensure_full_path(cwd());
+my $verbose    = 0;
+my $help       = 0;
+my $show_version;
+
+# ── parse options ─────────────────────────────────────────────────────────────
+
+&GetOptions(
+    't|transcripts=s'            => \$transcripts_file,
+    'genome=s'                   => \$genome_file,
+    'gtf=s'                      => \$gtf_file,
+
+    'm=i'                        => \$min_prot_length,
+    'S'                          => \$strand_specific,
+    'G|genetic_code=s'           => \$genetic_code,
+    'gene_trans_map=s'           => \$gene_trans_map,
+    'complete_orfs_only'         => \$complete_orfs_only,
+
+    'blast_search_pep=s'         => \$blast_search_pep,
+    'blast_tool=s'               => \$blast_tool,
+    'blast_evalue=f'             => \$blast_evalue,
+    'blast_threads=i'            => \$blast_threads,
+
+    'T=i'                        => \$top_orfs_train,
+    'retain_long_orfs_mode=s'    => \$retain_long_orfs_mode,
+    'retain_long_orfs_length=i'  => \$retain_long_orfs_length,
+    'pfam_search_db|pfam-search-db=s' => \$pfam_search_db,
+    'single_best_only'           => \$single_best_only,
+    'no_refine_starts'           => \$no_refine_starts,
+
+    'O|output_dir=s'             => \$output_dir,
+    'v|verbose'                  => \$verbose,
+    'h|help'                     => \$help,
+    'version'                    => \$show_version,
+) or die $usage;
+
+if ($help)         { print $usage; exit 0; }
+if ($show_version) { print "TransDecoder $VERSION\n"; exit 0; }
+
+# ── validate blast_tool ───────────────────────────────────────────────────────
+
+unless ($blast_tool =~ /^(diamond|blastp)$/) {
+    die "Error: --blast_tool must be 'diamond' or 'blastp' (got: $blast_tool)\n";
+}
+
+# ── validate / resolve genome mode ───────────────────────────────────────────
+
+my $genome_mode = ($genome_file || $gtf_file) ? 1 : 0;
+
+if ($genome_mode) {
+    unless ($genome_file && $gtf_file) {
+        die "Error: --genome and --gtf must be provided together.\n";
+    }
+    unless (-s $genome_file) { die "Error: genome file not found: $genome_file\n"; }
+    unless (-s $gtf_file)    { die "Error: GTF file not found: $gtf_file\n"; }
+    unless ($transcripts_file) {
+        # derive cDNA FASTA name from GTF stem in output_dir
+        my $gtf_base = basename($gtf_file);
+        $gtf_base =~ s/\.gtf$//i;
+        $transcripts_file = "$output_dir/${gtf_base}.cDNA.fasta";
+    }
+} else {
+    unless ($transcripts_file && -s $transcripts_file) {
+        die "Error: provide -t/--transcripts or both --genome and --gtf.\n$usage";
+    }
+}
+
+if ($blast_search_pep && ! -s $blast_search_pep) {
+    die "Error: --blast_search_pep file not found: $blast_search_pep\n";
+}
+if ($pfam_search_db && ! -s $pfam_search_db) {
+    die "Error: --pfam-search-db file not found: $pfam_search_db\n";
+}
+
+unless (-d $output_dir) {
+    &process_cmd("mkdir -p $output_dir");
+}
+
+# ── helpers ───────────────────────────────────────────────────────────────────
+
+sub process_cmd {
+    my ($cmd) = @_;
+    print STDERR "CMD: $cmd\n";
+    my $ret = system($cmd);
+    if ($ret) { die "Error, cmd died with ret $ret:\n  $cmd\n"; }
+}
+
+sub hmmpress_outputs_exist {
+    my ($pfam_db) = @_;
+    foreach my $ext (qw(.h3f .h3i .h3m .h3p)) {
+        return 0 unless -s "${pfam_db}${ext}";
+    }
+    return 1;
+}
+
+# ── PHASE 0: extract cDNA from genome + GTF ──────────────────────────────────
+
+my $alignment_gff3;   # set here; reused in phase 3
+
+if ($genome_mode) {
+
+    # alignment GFF3 (transcript coords -> genome coords)
+    my $gtf_base = basename($gtf_file);
+    $gtf_base =~ s/\.gtf$//i;
+    $alignment_gff3 = "$output_dir/${gtf_base}.gff3";
+
+    print STDERR "\n-- Converting GTF to alignment GFF3 --\n";
+    &process_cmd("$UTIL_DIR/gtf_to_alignment_gff3.pl $gtf_file > $alignment_gff3");
+
+    # cDNA FASTA
+    print STDERR "\n-- Extracting cDNA sequences --\n";
+    &process_cmd("$UTIL_DIR/gtf_genome_to_cdna_fasta.pl $gtf_file $genome_file > $transcripts_file");
+}
+
+# ── PHASE 1: LongOrfs ────────────────────────────────────────────────────────
+
+print STDERR "\n-- Running TransDecoder.LongOrfs --\n";
+
+my $longorfs_cmd = "$UTIL_DIR/TransDecoder.LongOrfs -t $transcripts_file"
+    . " -m $min_prot_length"
+    . " -G $genetic_code"
+    . " -O $output_dir";
+$longorfs_cmd .= " -S"                          if $strand_specific;
+$longorfs_cmd .= " --gene_trans_map $gene_trans_map" if $gene_trans_map;
+$longorfs_cmd .= " --complete_orfs_only"        if $complete_orfs_only;
+
+&process_cmd($longorfs_cmd);
+
+# ── PHASE 1.5: homology search ───────────────────────────────────────────────
+
+my $retain_blastp_hits_file;
+my $retain_pfam_hits_file;
+
+if ($blast_search_pep) {
+
+    my $workdir  = "$output_dir/" . basename($transcripts_file) . ".transdecoder_dir";
+    my $pep_file = "$workdir/longest_orfs.pep";
+    my $blast_out = "$workdir/blastp.outfmt6";
+    my $db_path   = "$workdir/blast_db";
+
+    if ($blast_tool eq 'diamond') {
+        print STDERR "\n-- Building Diamond database --\n";
+        &process_cmd("diamond makedb --in $blast_search_pep -d $db_path -p $blast_threads");
+
+        print STDERR "\n-- Running Diamond blastp --\n";
+        &process_cmd("diamond blastp -q $pep_file -d $db_path -k 1 -f 6 -e $blast_evalue -p $blast_threads -o $blast_out");
+
+    } else {
+        print STDERR "\n-- Building BLAST database --\n";
+        &process_cmd("makeblastdb -in $blast_search_pep -dbtype prot -out $db_path");
+
+        print STDERR "\n-- Running blastp --\n";
+        &process_cmd("blastp -query $pep_file -db $db_path -max_target_seqs 1 -outfmt 6 -evalue $blast_evalue -num_threads $blast_threads -out $blast_out");
+    }
+
+    $retain_blastp_hits_file = $blast_out;
+}
+
+if ($pfam_search_db) {
+
+    my $workdir = "$output_dir/" . basename($transcripts_file) . ".transdecoder_dir";
+    my $pep_file = "$workdir/longest_orfs.pep";
+    my $pfam_out = "$workdir/pfam.domtblout";
+
+    unless (hmmpress_outputs_exist($pfam_search_db)) {
+        print STDERR "\n-- Preparing Pfam database with hmmpress --\n";
+        &process_cmd("hmmpress -f $pfam_search_db");
+    }
+
+    print STDERR "\n-- Running Pfam hmmsearch --\n";
+    &process_cmd("hmmsearch --domtblout $pfam_out $pfam_search_db $pep_file");
+
+    $retain_pfam_hits_file = $pfam_out;
+}
+
+# ── PHASE 2: Predict ─────────────────────────────────────────────────────────
+
+print STDERR "\n-- Running TransDecoder.Predict --\n";
+
+my $predict_cmd = "$UTIL_DIR/TransDecoder.Predict -t $transcripts_file"
+    . " -T $top_orfs_train"
+    . " --retain_long_orfs_mode $retain_long_orfs_mode"
+    . " --retain_long_orfs_length $retain_long_orfs_length"
+    . " -O $output_dir";
+# Only pass -G when non-default; Predict's default 'Universal' works with all downstream tools
+$predict_cmd .= " -G $genetic_code" if lc($genetic_code) ne 'universal';
+$predict_cmd .= " --retain_blastp_hits $retain_blastp_hits_file" if $retain_blastp_hits_file;
+$predict_cmd .= " --retain_pfam_hits $retain_pfam_hits_file"     if $retain_pfam_hits_file;
+$predict_cmd .= " --single_best_only"                            if $single_best_only;
+$predict_cmd .= " --no_refine_starts"                            if $no_refine_starts;
+$predict_cmd .= " -v"                                            if $verbose;
+
+&process_cmd($predict_cmd);
+
+# ── PHASE 3: propagate ORFs to genome coordinates ────────────────────────────
+
+if ($genome_mode) {
+
+    my $td_gff3      = "$output_dir/" . basename($transcripts_file) . ".transdecoder.gff3";
+    my $genome_gff3  = "$output_dir/" . basename($transcripts_file) . ".transdecoder.genome.gff3";
+
+    print STDERR "\n-- Propagating ORFs to genome coordinates --\n";
+    &process_cmd("$UTIL_DIR/cdna_alignment_orf_to_genome_orf.pl $td_gff3 $alignment_gff3 $transcripts_file > $genome_gff3");
+
+    print STDERR "\nGenome-coordinate ORF annotations written to: $genome_gff3\n";
+}
+
+print STDERR "\nTransDecoder finished.\n\n";
+exit 0;


=====================================
sample_data/cufflinks_example/runMe.sh
=====================================
@@ -21,53 +21,18 @@ if [ ! -e mini_sprot.db.pep ]; then
     gunzip -c mini_sprot.db.pep.gz > mini_sprot.db.pep
 fi
 
-
-## generate alignment gff3 formatted output
-../../util/gtf_to_alignment_gff3.pl transcripts.gtf > transcripts.gff3
-
-## generate transcripts fasta file
-../../util/gtf_genome_to_cdna_fasta.pl transcripts.gtf test.genome.fasta > transcripts.fasta 
-
-## Extract the long ORFs
-../../TransDecoder.LongOrfs -t transcripts.fasta
-
 cmd=""
 ## Predict likely ORFs
 if [ "$1" == "" ]; then   # always doing this now.
-    # just coding metrics
-    cmd="../../TransDecoder.Predict -t transcripts.fasta"
+    cmd="../../TransDecoder -t transcripts.fasta --genome test.genome.fasta --gtf transcripts.gtf"
 
 else
-    
-    # this is how I would have run blast and pfam but I'm using precomputed results for ease of demonstration.
-    #BLASTDB=/seq/RNASEQ/DBs/TRINOTATE_RESOURCES/TRINOTATE_V3/uniprot_sprot.pep
-    #PFAMDB=/seq/RNASEQ/DBs/TRINOTATE_RESOURCES/TRINOTATE_V3/Pfam-A.hmm
-    #
-    ## run blast
-    #blastp -query transcripts.fasta.transdecoder_dir/longest_orfs.pep -db $BLASTDB -max_target_seqs 1 -outfmt 6 -evalue 1e-5 > blastp.outfmt6
-
-    makeblastdb -in mini_sprot.db.pep -dbtype prot
-    blastp -query transcripts.fasta.transdecoder_dir/longest_orfs.pep -db mini_sprot.db.pep -max_target_seqs 1 -outfmt 6 -evalue 1e-5 > blastp.outfmt6
-
-    #
-    ## run pfam
-    #hmmsearch --domtblout pfam.domtblout $PFAMDB transcripts.fasta.transdecoder_dir/longest_orfs.pep > pfam.log
-
-    hmmpress -f mini_Pfam-A.hmm
-    hmmsearch --domtblout pfam.domtblout mini_Pfam-A.hmm transcripts.fasta.transdecoder_dir/longest_orfs.pep
-        
-    ## use pfam and blast results:
-    cmd="../../TransDecoder.Predict  -t transcripts.fasta --retain_pfam_hits pfam.domtblout --retain_blastp_hits blastp.outfmt6   -v"
-    
+    cmd="../../TransDecoder -t transcripts.fasta --genome test.genome.fasta --gtf transcripts.gtf --blast_search_pep mini_sprot.db.pep --pfam-search-db mini_Pfam-A.hmm -v"
 fi
 
 eval $cmd
 
 
-## convert to genome coordinates
-../../util/cdna_alignment_orf_to_genome_orf.pl transcripts.fasta.transdecoder.gff3 transcripts.gff3 transcripts.fasta > transcripts.fasta.transdecoder.genome.gff3
-
-
 ## make bed files for viewing with GenomeView
 
 # covert cufflinks gtf to bed


=====================================
sample_data/pasa_example/runMe.sh
=====================================
@@ -20,16 +20,12 @@ fi
 # get the gene-to-transcript relationships
 cut -f2,3 pasa_assemblies_described.txt > pasa.gene_trans_map.txt
 
-../../TransDecoder.LongOrfs -t pasa_assemblies.fasta --gene_trans_map pasa.gene_trans_map.txt -O pasa.transdecoder_workdir
+../../TransDecoder -t pasa_assemblies.fasta --gene_trans_map pasa.gene_trans_map.txt -O pasa.transdecoder_workdir $ARGS
 
+../../util/cdna_alignment_orf_to_genome_orf.pl  pasa.transdecoder_workdir/pasa_assemblies.fasta.transdecoder.gff3 pasa_assemblies.gff3 pasa_assemblies.fasta  >  pasa_assemblies.fasta.transdecoder.genome.gff3
 
 
-../../TransDecoder.Predict -t pasa_assemblies.fasta $ARGS -O pasa.transdecoder_workdir
-
-../../util/cdna_alignment_orf_to_genome_orf.pl  pasa_assemblies.fasta.transdecoder.gff3 pasa_assemblies.gff3 pasa_assemblies.fasta  >  pasa_assemblies.fasta.transdecoder.genome.gff3
-
-
-../../util/fasta_prot_checker.pl pasa_assemblies.fasta.transdecoder.pep
+../../util/fasta_prot_checker.pl pasa.transdecoder_workdir/pasa_assemblies.fasta.transdecoder.pep
 
 
 echo "Done.  See pasa_assemblies.fasta.transdecoder.\*"


=====================================
sample_data/simple_transcriptome_target/runMe.Docker.sh
=====================================
@@ -5,9 +5,9 @@ if [ ! -e Trinity.fasta ]; then
     gunzip -c genome_alignments.gmap.gff3.gz > genome_alignments.gmap.gff3
 fi
 
-docker run --rm  -v `pwd`:/data trinityrnaseq/transdecoder:latest TransDecoder.LongOrfs -t /data/Trinity.fasta -O /data
+docker run --rm  -v `pwd`:/data trinityrnaseq/transdecoder:latest util/TransDecoder.LongOrfs -t /data/Trinity.fasta -O /data
 
-docker run --rm  -v `pwd`:/data trinityrnaseq/transdecoder:latest TransDecoder.Predict -t /data/Trinity.fasta -O /data 
+docker run --rm  -v `pwd`:/data trinityrnaseq/transdecoder:latest util/TransDecoder.Predict -t /data/Trinity.fasta -O /data 
 
 # gmap was used to align the Trinity.fasta transcripts to the genome,
 # using the gmap '-f 3' output formatting parameter, generating file 'genome_alignments.gmap.gff3'


=====================================
sample_data/simple_transcriptome_target/runMe.sh
=====================================
@@ -5,9 +5,7 @@ if [ ! -e Trinity.fasta ]; then
     gunzip -c genome_alignments.gmap.gff3.gz > genome_alignments.gmap.gff3
 fi
 
-../../TransDecoder.LongOrfs -t Trinity.fasta $*
-
-../../TransDecoder.Predict -t Trinity.fasta 
+../../TransDecoder -t Trinity.fasta $*
 
 # gmap was used to align the Trinity.fasta transcripts to the genome,
 # using the gmap '-f 3' output formatting parameter, generating file 'genome_alignments.gmap.gff3'


=====================================
sample_data/stringtie_example/runMe.sh
=====================================
@@ -12,13 +12,7 @@ export PERL_HASH_SEED=0
 # not including the genome here... too big, but here's how you'd do it.
 #../../util/gtf_genome_to_cdna_fasta.pl stringtie_merged.gtf  genome.fasta > stringtie_merged.transcripts.fasta
 
-## Extract the long ORFs
-../../TransDecoder.LongOrfs -t stringtie_merged.transcripts.fasta -S
-
-
-## Predict likely ORFs
-
-../../TransDecoder.Predict -t stringtie_merged.transcripts.fasta $ARGS
+../../TransDecoder -t stringtie_merged.transcripts.fasta -S $ARGS
 
 
 ## convert to genome coordinates


=====================================
sample_data/supertranscripts_example/runMe.sh
=====================================
@@ -9,9 +9,7 @@ set -ex
 ../../util/gtf_genome_to_cdna_fasta.pl supertranscripts.gtf supertranscripts.fasta > transcripts.fasta
 
 # run TransDecoder
-../../TransDecoder.LongOrfs -t transcripts.fasta
-
-cmd="../../TransDecoder.Predict -t transcripts.fasta"
+cmd="../../TransDecoder -t transcripts.fasta"
 if [ $1 ]; then
     cmd="$cmd --no_refine_starts"
 fi


=====================================
TransDecoder.LongOrfs → util/TransDecoder.LongOrfs
=====================================
@@ -9,8 +9,14 @@ use Getopt::Long qw(:config posix_default no_ignore_case bundling pass_through);
 use Data::Dumper;
 use List::Util qw (min max);
 use File::Basename;
+use File::Spec;
 
-use lib ("$FindBin::RealBin/PerlLib");
+our $ROOT_DIR;
+BEGIN {
+    $ROOT_DIR = File::Spec->rel2abs(File::Spec->catdir($FindBin::RealBin, File::Spec->updir()));
+}
+
+use lib ("$ROOT_DIR/PerlLib");
 
 use POSIX qw(ceil);
 use Gene_obj;
@@ -21,10 +27,7 @@ use Pipeliner;
 use Cwd;
 
 
-#my $VERSION = "__BLEEDING_EDGE__";
-my $VERSION = "5.7.1";
-
-my $UTIL_DIR = "$FindBin::RealBin/util";
+my $UTIL_DIR = "$ROOT_DIR/util";
 $ENV{PATH} = "$UTIL_DIR/bin:$ENV{PATH}";
 
 
@@ -64,8 +67,6 @@ my $usage = <<__EOUSAGE__;
 #
 #   --output_dir | -O  <string>            path to intended output directory
 #
-#   --version                              show version tag ($VERSION)
-#
 #   --genetic_code | -G <string>                            genetic code (default: universal; see PerlDoc; options: Euplotes, Tetrahymena, Candida, Acetabularia)
 #                                              Genetic Codes (derived from: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi)#  
 $genetic_code_options 
@@ -90,7 +91,6 @@ my $gene_trans_map_file;
 
 my $MPI_DEBUG = 1;
 
-my $show_version_flag;
 my $output_dir =  &Pipeliner::ensure_full_path(cwd());
 my $COMPLETE_ORFS_ONLY = 0;
 
@@ -101,7 +101,6 @@ my $COMPLETE_ORFS_ONLY = 0;
              'v' => \$verbose,
              'S' => \$TOP_STRAND_ONLY, 
              'gene_trans_map=s' => \$gene_trans_map_file,
-             'version' => \$show_version_flag,
              'output_dir|O=s' => \$output_dir,
              'complete_orfs_only' => \$COMPLETE_ORFS_ONLY,
     );
@@ -111,12 +110,6 @@ if ($help) {
     die $usage;
 }
 
-if ($show_version_flag) {
-    print "TransDecoder.LongOrfs $VERSION\n";
-    exit(0);
-}
-
-
 if (@ARGV) {
     die "Error, don't understand options: @ARGV";
 }


=====================================
TransDecoder.Predict → util/TransDecoder.Predict
=====================================
@@ -8,10 +8,16 @@ use Getopt::Long qw(:config posix_default no_ignore_case bundling pass_through);
 use Data::Dumper;
 use List::Util qw (min max);
 use File::Basename;
+use File::Spec;
 use Carp;
 use Digest::MD5;
 
-use lib ("$FindBin::RealBin/PerlLib");
+our $ROOT_DIR;
+BEGIN {
+    $ROOT_DIR = File::Spec->rel2abs(File::Spec->catdir($FindBin::RealBin, File::Spec->updir()));
+}
+
+use lib ("$ROOT_DIR/PerlLib");
 
 use POSIX qw(ceil);
 use Gene_obj;
@@ -22,9 +28,6 @@ use Pipeliner;
 use DelimParser;
 use Cwd;
 
-#my $VERSION = "__BLEEDING_EDGE__";
-my $VERSION = "5.7.1";
-
 my $RETAIN_LONG_ORFS_MIN_LENGTH = 1000000; # so essentially, off by default
 
 srand(1234);
@@ -76,8 +79,6 @@ my $usage = <<__EOUSAGE__;
 #    -T <int>                            Top longest ORFs to train Markov Model (hexamer stats) (default: 500)
 #                                        Note, 10x this value are first selected for removing redundancies,
 #                                        and then this -T value of longest ORFs are selected from the non-redundant set.
-#  --version                           show version ($VERSION)
-#
 #   --genetic_code | -G <string>                            genetic code (default: universal; see PerlDoc; options: Euplotes, Tetrahymena, Candida, Acetabularia, ...)
 #                                              Genetic Codes (derived from: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi)
 $genetic_code_options
@@ -91,7 +92,7 @@ __EOUSAGE__
 
 
 
-my $UTIL_DIR = "$FindBin::RealBin/util";
+my $UTIL_DIR = "$ROOT_DIR/util";
 $ENV{PATH} = "$UTIL_DIR/bin:$ENV{PATH}";
 
 
@@ -153,9 +154,6 @@ my $NO_REFINE_START_CODONS_FLAG = 0;
 
 my $output_dir = &Pipeliner::ensure_full_path(cwd()); # current working directory by default.
 
-my $show_version_flag;
-
-
 &GetOptions( 't=s' => \$transcripts_file,
 
              'h' => \$help,
@@ -181,8 +179,6 @@ my $show_version_flag;
 
              'no_refine_starts' => \$NO_REFINE_START_CODONS_FLAG,
 
-             'version' => \$show_version_flag,
-
              'output_dir|O=s' => \$output_dir,
                           
              );
@@ -192,12 +188,6 @@ if ($help) {
     die $usage;
 }
 
-if ($show_version_flag) {
-    print "TransDecoder.Predict $VERSION\n";
-    exit(0);
-}
-
-
 if (@ARGV) {
     die "Error, don't understand options: @ARGV";
 }
@@ -498,4 +488,3 @@ sub get_dynamic_retain_long_orf_length {
     return(1000000); #effectively infinity here.
 
 }
-


=====================================
util/gff3_file_to_bed.pl
=====================================
@@ -10,6 +10,8 @@ use Carp;
 use Nuc_translator;
 use File::Basename;
 
+our $SEE = 0;
+
 my $usage = "\n\nusage: $0 gff3_file\n\n";
 
 my $gff3_file = $ARGV[0] or die $usage;
@@ -19,7 +21,7 @@ my $gene_obj_indexer_href = {};
 ## associate gene identifiers with contig id's.
 my $contig_to_gene_list_href = &GFF3_utils2::index_GFF3_gene_objs($gff3_file, $gene_obj_indexer_href);
 
-print "track name=\'" . basename($gff3_file) . "\'\n";
+#print "track name=\'" . basename($gff3_file) . "\'\n";
 
 foreach my $asmbl_id (sort keys %$contig_to_gene_list_href) {
     
@@ -29,12 +31,10 @@ foreach my $asmbl_id (sort keys %$contig_to_gene_list_href) {
         		
 		my $gene_obj_ref = $gene_obj_indexer_href->{$gene_id};
 		
-		foreach my $gene ($gene_obj_ref, $gene_obj_ref->get_additional_isoforms()) {
-
-			my $bed = $gene->to_BED_format();
+        my $bed = $gene_obj_ref->to_BED_format();
 
-			print $bed;
-		}
+        print $bed;
+		
 	}
 }
 


=====================================
util/gtf_genome_to_cdna_fasta.pl
=====================================
@@ -16,7 +16,7 @@ my $genome = $ARGV[1] or die $usage;
 main: {
 
 
-	print STDERR "-parsing cufflinks output: $cufflinks_gtf\n";
+	print STDERR "-parsing GTF: $cufflinks_gtf\n";
 	my %genome_trans_to_coords;
 	
 	open (my $fh, $cufflinks_gtf) or die "Error, cannot open file $cufflinks_gtf";


=====================================
util/misc/get_FP_FN_scores.py
=====================================
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import os, sys, re
 


=====================================
util/misc/plot_indiv_seq_likelihood_profile.py
=====================================
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import os,sys
 import re
@@ -39,9 +39,9 @@ def main():
         score_vec.sort()
 
     if args.cumsum:
-        plt.plot(range(1,len(score_vec)+1), np.cumsum(score_vec), marker ='o')
+        plt.plot(list(range(1,len(score_vec)+1)), np.cumsum(score_vec), marker ='o')
     else:
-        plt.plot(range(1,len(score_vec)+1), score_vec, marker ='+')
+        plt.plot(list(range(1,len(score_vec)+1)), score_vec, marker ='+')
     
     
     plt.show()


=====================================
util/misc/select_TD_orfs.py
=====================================
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import sys, os, re
 import collections



View it on GitLab: https://salsa.debian.org/med-team/transdecoder/-/commit/ba94bab7b31dbe667d9d0b6400e08475b50f444e

-- 
View it on GitLab: https://salsa.debian.org/med-team/transdecoder/-/commit/ba94bab7b31dbe667d9d0b6400e08475b50f444e
You're receiving this email because of your account on salsa.debian.org. Manage all notifications: https://salsa.debian.org/-/profile/notifications | Help: https://salsa.debian.org/help


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20260501/3ab5d919/attachment-0001.htm>


More information about the debian-med-commit mailing list