[med-svn] [Git][med-team/transdecoder][upstream] New upstream version 6.0.0
Michael R. Crusoe (@crusoe)
gitlab at salsa.debian.org
Fri May 1 17:51:15 BST 2026
Michael R. Crusoe pushed to branch upstream at Debian Med / transdecoder
Commits:
ba94bab7 by Michael R. Crusoe at 2026-05-01T13:11:52+02:00
New upstream version 6.0.0
- - - - -
25 changed files:
- Changelog.txt
- Docker/Dockerfile
- Docker/VERSION.txt
- Docker/make_simg.sh
- PerlLib/DelimParser.pm
- PerlLib/GFF3_utils2.pm
- PerlLib/GTF.pm
- PerlLib/GTF_utils2.pm
- PerlLib/PWM.pm
- PerlLib/Pipeliner.pm
- README.md
- + TransDecoder
- sample_data/cufflinks_example/runMe.sh
- sample_data/pasa_example/runMe.sh
- sample_data/simple_transcriptome_target/runMe.Docker.sh
- sample_data/simple_transcriptome_target/runMe.sh
- sample_data/stringtie_example/runMe.sh
- sample_data/supertranscripts_example/runMe.sh
- TransDecoder.LongOrfs → util/TransDecoder.LongOrfs
- TransDecoder.Predict → util/TransDecoder.Predict
- util/gff3_file_to_bed.pl
- util/gtf_genome_to_cdna_fasta.pl
- util/misc/get_FP_FN_scores.py
- util/misc/plot_indiv_seq_likelihood_profile.py
- util/misc/select_TD_orfs.py
Changes:
=====================================
Changelog.txt
=====================================
@@ -1,4 +1,16 @@
-## v5.7.0
+## v6.0.0 Mar 20, 2026
+- added `TransDecoder` full-pipeline wrapper as the primary entrypoint for transcript FASTA and genome+GTF workflows
+- phase-specific executables are now provided under `util/TransDecoder.LongOrfs` and `util/TransDecoder.Predict`
+- wrapper now supports automated protein homology searches using NCBI BLAST or DIAMOND via `--blast_search_pep`
+- wrapper now supports integrated Pfam searching via `--pfam-search-db`
+
+
+
+## v5.7.1 July 16, 2023
+- Minor updates to better support Docker-based execution.
+- Output directory (via --output_dir | -O) can be set by user, but the working directory name will be fixed within the output directory and based on the name of the target transcriptome.
+
+## v5.7.0 Jan 27, 2023
- compatible with hmmsearch or hmmscan output
- cleaner organization of outputs and checkpoints
- TransDecoder.LongOrfs includes option for --complete_orfs_only (as requested)
@@ -127,4 +139,3 @@ More useful logging information is provided to it's clearer as to how many orfs
-added 'make simple' to build just the essential components involving parafly and cdhit
-removed the 'cds.' prefix from the pep and cds sequence accessions.
-
=====================================
Docker/Dockerfile
=====================================
@@ -61,7 +61,6 @@ RUN apt-get -qq update && apt-get -qq -y install \
## Perl stuff
RUN curl -L https://cpanmin.us | perl - App::cpanminus
-RUN cpanm install DB_File
RUN cpanm install URI::Escape
#~~~~~~~~~~~
@@ -94,16 +93,13 @@ RUN wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLASTPLUS_VERSION
# Hmmer
WORKDIR $SRC
-RUN wget http://eddylab.org/software/hmmer/hmmer.tar.gz && \
- tar xvf hmmer.tar.gz && \
+RUN wget -q http://eddylab.org/software/hmmer/hmmer-3.3.2.tar.gz && \
+ tar xvf hmmer-3.3.2.tar.gz && \
cd hmmer-3.3.2 && \
./configure && \
make && make install
-
-
-
# Trinotate
WORKDIR $SRC
ENV TRANSDECODER_CO=817d772325c8a354aec6b94ac58993b994ac7392
@@ -114,4 +110,3 @@ RUN git clone https://github.com/TransDecoder/TransDecoder.git && \
mv * $BIN
-
=====================================
Docker/VERSION.txt
=====================================
@@ -1 +1 @@
-5.7.1
+6.0.0
=====================================
Docker/make_simg.sh
=====================================
@@ -4,7 +4,6 @@ VERSION=`cat VERSION.txt`
singularity build transdecoder.v${VERSION}.simg docker://trinityrnaseq/transdecoder:$VERSION
-singularity exec -e transdecoder.v${VERSION}.simg TransDecoder.LongOrfs
+singularity exec -e transdecoder.v${VERSION}.simg util/TransDecoder.LongOrfs
ln -sf transdecoder.v${VERSION}.simg transdecoder.simg
-
=====================================
PerlLib/DelimParser.pm
=====================================
=====================================
PerlLib/GFF3_utils2.pm
=====================================
@@ -241,7 +241,7 @@ sub index_GFF3_gene_objs {
push (@$gene_list_aref, $gene_id);
}
}
- print STDERR "\n";
+ #print STDERR "\n";
return (\%asmbl_id_to_gene_id_list);
}
=====================================
PerlLib/GTF.pm
=====================================
=====================================
PerlLib/GTF_utils2.pm
=====================================
=====================================
PerlLib/PWM.pm
=====================================
=====================================
PerlLib/Pipeliner.pm
=====================================
=====================================
README.md
=====================================
@@ -2,4 +2,6 @@
Visit the project [wiki](https://github.com/TransDecoder/TransDecoder/wiki) for all TransDecoder documentation.
+Primary entrypoint: `./TransDecoder`
+Phase-specific utilities are available under `./util/TransDecoder.LongOrfs` and `./util/TransDecoder.Predict`.
=====================================
TransDecoder
=====================================
@@ -0,0 +1,310 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use FindBin;
+use Getopt::Long qw(:config posix_default no_ignore_case bundling pass_through);
+use File::Basename;
+use Cwd;
+
+use lib ("$FindBin::RealBin/PerlLib");
+use Pipeliner;
+
+my $VERSION = "6.0.0";
+
+my $UTIL_DIR = "$FindBin::RealBin/util";
+my $BIN_DIR = "$FindBin::RealBin";
+$ENV{PATH} = "$UTIL_DIR/bin:$ENV{PATH}";
+
+my $usage = <<__EOUSAGE__;
+
+##############################################################################################
+#
+# TransDecoder - full pipeline wrapper
+#
+# Runs: LongOrfs -> [optional homology search] -> Predict -> [optional genome propagation]
+#
+# Input (choose one):
+#
+# -t|--transcripts <string> Transcripts FASTA file
+#
+# --genome <string> Genome FASTA file \\ use together to extract
+# --gtf <string> Annotation GTF file / cDNA sequences via
+# gtf_genome_to_cdna_fasta.pl, then propagate
+# final ORFs back to genome coordinates.
+#
+# LongOrfs options:
+#
+# -m <int> Minimum protein length (default: 100)
+# -S Strand-specific (top strand only)
+# -G|--genetic_code <string> Genetic code (default: universal)
+# --gene_trans_map <string> Gene-to-transcript map (tab-delimited)
+# --complete_orfs_only Only report complete ORFs
+#
+# Homology search options:
+#
+# --blast_search_pep <string> Protein FASTA to search against; triggers
+# homology search (DB built automatically)
+# --blast_tool <string> 'diamond' or 'blastp' (default: diamond)
+# --blast_evalue <float> E-value cutoff (default: 1e-5)
+# --blast_threads <int> Threads for homology search (default: 1)
+#
+# Predict options:
+#
+# -T <int> Top ORFs for Markov model training (default: 500)
+# --retain_long_orfs_mode <string> 'dynamic' or 'strict' (default: dynamic)
+# --retain_long_orfs_length <int> Min length to auto-retain under strict mode
+# --pfam-search-db <string> Pfam HMM database to search with hmmsearch;
+# hmmpress is run automatically if needed
+# --single_best_only Retain only single best ORF per transcript
+# --no_refine_starts Skip start codon refinement
+#
+# Other:
+#
+# -O|--output_dir <string> Output directory (default: current directory)
+# -v|--verbose Verbose output
+# --version Show version and exit
+#
+##############################################################################################
+
+__EOUSAGE__
+ ;
+
+
+# ── option variables ──────────────────────────────────────────────────────────
+
+my $transcripts_file;
+my $genome_file;
+my $gtf_file;
+
+# longorfs
+my $min_prot_length = 100;
+my $strand_specific = 0;
+my $genetic_code = 'universal';
+my $gene_trans_map;
+my $complete_orfs_only = 0;
+
+# blast
+my $blast_search_pep;
+my $blast_tool = 'diamond';
+my $blast_evalue = 1e-5;
+my $blast_threads = 1;
+
+# predict
+my $top_orfs_train = 500;
+my $retain_long_orfs_mode = 'dynamic';
+my $retain_long_orfs_length = 1000000;
+my $pfam_search_db;
+my $single_best_only = 0;
+my $no_refine_starts = 0;
+
+# general
+my $output_dir = &Pipeliner::ensure_full_path(cwd());
+my $verbose = 0;
+my $help = 0;
+my $show_version;
+
+# ── parse options ─────────────────────────────────────────────────────────────
+
+&GetOptions(
+ 't|transcripts=s' => \$transcripts_file,
+ 'genome=s' => \$genome_file,
+ 'gtf=s' => \$gtf_file,
+
+ 'm=i' => \$min_prot_length,
+ 'S' => \$strand_specific,
+ 'G|genetic_code=s' => \$genetic_code,
+ 'gene_trans_map=s' => \$gene_trans_map,
+ 'complete_orfs_only' => \$complete_orfs_only,
+
+ 'blast_search_pep=s' => \$blast_search_pep,
+ 'blast_tool=s' => \$blast_tool,
+ 'blast_evalue=f' => \$blast_evalue,
+ 'blast_threads=i' => \$blast_threads,
+
+ 'T=i' => \$top_orfs_train,
+ 'retain_long_orfs_mode=s' => \$retain_long_orfs_mode,
+ 'retain_long_orfs_length=i' => \$retain_long_orfs_length,
+ 'pfam_search_db|pfam-search-db=s' => \$pfam_search_db,
+ 'single_best_only' => \$single_best_only,
+ 'no_refine_starts' => \$no_refine_starts,
+
+ 'O|output_dir=s' => \$output_dir,
+ 'v|verbose' => \$verbose,
+ 'h|help' => \$help,
+ 'version' => \$show_version,
+) or die $usage;
+
+if ($help) { print $usage; exit 0; }
+if ($show_version) { print "TransDecoder $VERSION\n"; exit 0; }
+
+# ── validate blast_tool ───────────────────────────────────────────────────────
+
+unless ($blast_tool =~ /^(diamond|blastp)$/) {
+ die "Error: --blast_tool must be 'diamond' or 'blastp' (got: $blast_tool)\n";
+}
+
+# ── validate / resolve genome mode ───────────────────────────────────────────
+
+my $genome_mode = ($genome_file || $gtf_file) ? 1 : 0;
+
+if ($genome_mode) {
+ unless ($genome_file && $gtf_file) {
+ die "Error: --genome and --gtf must be provided together.\n";
+ }
+ unless (-s $genome_file) { die "Error: genome file not found: $genome_file\n"; }
+ unless (-s $gtf_file) { die "Error: GTF file not found: $gtf_file\n"; }
+ unless ($transcripts_file) {
+ # derive cDNA FASTA name from GTF stem in output_dir
+ my $gtf_base = basename($gtf_file);
+ $gtf_base =~ s/\.gtf$//i;
+ $transcripts_file = "$output_dir/${gtf_base}.cDNA.fasta";
+ }
+} else {
+ unless ($transcripts_file && -s $transcripts_file) {
+ die "Error: provide -t/--transcripts or both --genome and --gtf.\n$usage";
+ }
+}
+
+if ($blast_search_pep && ! -s $blast_search_pep) {
+ die "Error: --blast_search_pep file not found: $blast_search_pep\n";
+}
+if ($pfam_search_db && ! -s $pfam_search_db) {
+ die "Error: --pfam-search-db file not found: $pfam_search_db\n";
+}
+
+unless (-d $output_dir) {
+ &process_cmd("mkdir -p $output_dir");
+}
+
+# ── helpers ───────────────────────────────────────────────────────────────────
+
+sub process_cmd {
+ my ($cmd) = @_;
+ print STDERR "CMD: $cmd\n";
+ my $ret = system($cmd);
+ if ($ret) { die "Error, cmd died with ret $ret:\n $cmd\n"; }
+}
+
+sub hmmpress_outputs_exist {
+ my ($pfam_db) = @_;
+ foreach my $ext (qw(.h3f .h3i .h3m .h3p)) {
+ return 0 unless -s "${pfam_db}${ext}";
+ }
+ return 1;
+}
+
+# ── PHASE 0: extract cDNA from genome + GTF ──────────────────────────────────
+
+my $alignment_gff3; # set here; reused in phase 3
+
+if ($genome_mode) {
+
+ # alignment GFF3 (transcript coords -> genome coords)
+ my $gtf_base = basename($gtf_file);
+ $gtf_base =~ s/\.gtf$//i;
+ $alignment_gff3 = "$output_dir/${gtf_base}.gff3";
+
+ print STDERR "\n-- Converting GTF to alignment GFF3 --\n";
+ &process_cmd("$UTIL_DIR/gtf_to_alignment_gff3.pl $gtf_file > $alignment_gff3");
+
+ # cDNA FASTA
+ print STDERR "\n-- Extracting cDNA sequences --\n";
+ &process_cmd("$UTIL_DIR/gtf_genome_to_cdna_fasta.pl $gtf_file $genome_file > $transcripts_file");
+}
+
+# ── PHASE 1: LongOrfs ────────────────────────────────────────────────────────
+
+print STDERR "\n-- Running TransDecoder.LongOrfs --\n";
+
+my $longorfs_cmd = "$UTIL_DIR/TransDecoder.LongOrfs -t $transcripts_file"
+ . " -m $min_prot_length"
+ . " -G $genetic_code"
+ . " -O $output_dir";
+$longorfs_cmd .= " -S" if $strand_specific;
+$longorfs_cmd .= " --gene_trans_map $gene_trans_map" if $gene_trans_map;
+$longorfs_cmd .= " --complete_orfs_only" if $complete_orfs_only;
+
+&process_cmd($longorfs_cmd);
+
+# ── PHASE 1.5: homology search ───────────────────────────────────────────────
+
+my $retain_blastp_hits_file;
+my $retain_pfam_hits_file;
+
+if ($blast_search_pep) {
+
+ my $workdir = "$output_dir/" . basename($transcripts_file) . ".transdecoder_dir";
+ my $pep_file = "$workdir/longest_orfs.pep";
+ my $blast_out = "$workdir/blastp.outfmt6";
+ my $db_path = "$workdir/blast_db";
+
+ if ($blast_tool eq 'diamond') {
+ print STDERR "\n-- Building Diamond database --\n";
+ &process_cmd("diamond makedb --in $blast_search_pep -d $db_path -p $blast_threads");
+
+ print STDERR "\n-- Running Diamond blastp --\n";
+ &process_cmd("diamond blastp -q $pep_file -d $db_path -k 1 -f 6 -e $blast_evalue -p $blast_threads -o $blast_out");
+
+ } else {
+ print STDERR "\n-- Building BLAST database --\n";
+ &process_cmd("makeblastdb -in $blast_search_pep -dbtype prot -out $db_path");
+
+ print STDERR "\n-- Running blastp --\n";
+ &process_cmd("blastp -query $pep_file -db $db_path -max_target_seqs 1 -outfmt 6 -evalue $blast_evalue -num_threads $blast_threads -out $blast_out");
+ }
+
+ $retain_blastp_hits_file = $blast_out;
+}
+
+if ($pfam_search_db) {
+
+ my $workdir = "$output_dir/" . basename($transcripts_file) . ".transdecoder_dir";
+ my $pep_file = "$workdir/longest_orfs.pep";
+ my $pfam_out = "$workdir/pfam.domtblout";
+
+ unless (hmmpress_outputs_exist($pfam_search_db)) {
+ print STDERR "\n-- Preparing Pfam database with hmmpress --\n";
+ &process_cmd("hmmpress -f $pfam_search_db");
+ }
+
+ print STDERR "\n-- Running Pfam hmmsearch --\n";
+ &process_cmd("hmmsearch --domtblout $pfam_out $pfam_search_db $pep_file");
+
+ $retain_pfam_hits_file = $pfam_out;
+}
+
+# ── PHASE 2: Predict ─────────────────────────────────────────────────────────
+
+print STDERR "\n-- Running TransDecoder.Predict --\n";
+
+my $predict_cmd = "$UTIL_DIR/TransDecoder.Predict -t $transcripts_file"
+ . " -T $top_orfs_train"
+ . " --retain_long_orfs_mode $retain_long_orfs_mode"
+ . " --retain_long_orfs_length $retain_long_orfs_length"
+ . " -O $output_dir";
+# Only pass -G when non-default; Predict's default 'Universal' works with all downstream tools
+$predict_cmd .= " -G $genetic_code" if lc($genetic_code) ne 'universal';
+$predict_cmd .= " --retain_blastp_hits $retain_blastp_hits_file" if $retain_blastp_hits_file;
+$predict_cmd .= " --retain_pfam_hits $retain_pfam_hits_file" if $retain_pfam_hits_file;
+$predict_cmd .= " --single_best_only" if $single_best_only;
+$predict_cmd .= " --no_refine_starts" if $no_refine_starts;
+$predict_cmd .= " -v" if $verbose;
+
+&process_cmd($predict_cmd);
+
+# ── PHASE 3: propagate ORFs to genome coordinates ────────────────────────────
+
+if ($genome_mode) {
+
+ my $td_gff3 = "$output_dir/" . basename($transcripts_file) . ".transdecoder.gff3";
+ my $genome_gff3 = "$output_dir/" . basename($transcripts_file) . ".transdecoder.genome.gff3";
+
+ print STDERR "\n-- Propagating ORFs to genome coordinates --\n";
+ &process_cmd("$UTIL_DIR/cdna_alignment_orf_to_genome_orf.pl $td_gff3 $alignment_gff3 $transcripts_file > $genome_gff3");
+
+ print STDERR "\nGenome-coordinate ORF annotations written to: $genome_gff3\n";
+}
+
+print STDERR "\nTransDecoder finished.\n\n";
+exit 0;
=====================================
sample_data/cufflinks_example/runMe.sh
=====================================
@@ -21,53 +21,18 @@ if [ ! -e mini_sprot.db.pep ]; then
gunzip -c mini_sprot.db.pep.gz > mini_sprot.db.pep
fi
-
-## generate alignment gff3 formatted output
-../../util/gtf_to_alignment_gff3.pl transcripts.gtf > transcripts.gff3
-
-## generate transcripts fasta file
-../../util/gtf_genome_to_cdna_fasta.pl transcripts.gtf test.genome.fasta > transcripts.fasta
-
-## Extract the long ORFs
-../../TransDecoder.LongOrfs -t transcripts.fasta
-
cmd=""
## Predict likely ORFs
if [ "$1" == "" ]; then # always doing this now.
- # just coding metrics
- cmd="../../TransDecoder.Predict -t transcripts.fasta"
+ cmd="../../TransDecoder -t transcripts.fasta --genome test.genome.fasta --gtf transcripts.gtf"
else
-
- # this is how I would have run blast and pfam but I'm using precomputed results for ease of demonstration.
- #BLASTDB=/seq/RNASEQ/DBs/TRINOTATE_RESOURCES/TRINOTATE_V3/uniprot_sprot.pep
- #PFAMDB=/seq/RNASEQ/DBs/TRINOTATE_RESOURCES/TRINOTATE_V3/Pfam-A.hmm
- #
- ## run blast
- #blastp -query transcripts.fasta.transdecoder_dir/longest_orfs.pep -db $BLASTDB -max_target_seqs 1 -outfmt 6 -evalue 1e-5 > blastp.outfmt6
-
- makeblastdb -in mini_sprot.db.pep -dbtype prot
- blastp -query transcripts.fasta.transdecoder_dir/longest_orfs.pep -db mini_sprot.db.pep -max_target_seqs 1 -outfmt 6 -evalue 1e-5 > blastp.outfmt6
-
- #
- ## run pfam
- #hmmsearch --domtblout pfam.domtblout $PFAMDB transcripts.fasta.transdecoder_dir/longest_orfs.pep > pfam.log
-
- hmmpress -f mini_Pfam-A.hmm
- hmmsearch --domtblout pfam.domtblout mini_Pfam-A.hmm transcripts.fasta.transdecoder_dir/longest_orfs.pep
-
- ## use pfam and blast results:
- cmd="../../TransDecoder.Predict -t transcripts.fasta --retain_pfam_hits pfam.domtblout --retain_blastp_hits blastp.outfmt6 -v"
-
+ cmd="../../TransDecoder -t transcripts.fasta --genome test.genome.fasta --gtf transcripts.gtf --blast_search_pep mini_sprot.db.pep --pfam-search-db mini_Pfam-A.hmm -v"
fi
eval $cmd
-## convert to genome coordinates
-../../util/cdna_alignment_orf_to_genome_orf.pl transcripts.fasta.transdecoder.gff3 transcripts.gff3 transcripts.fasta > transcripts.fasta.transdecoder.genome.gff3
-
-
## make bed files for viewing with GenomeView
# covert cufflinks gtf to bed
=====================================
sample_data/pasa_example/runMe.sh
=====================================
@@ -20,16 +20,12 @@ fi
# get the gene-to-transcript relationships
cut -f2,3 pasa_assemblies_described.txt > pasa.gene_trans_map.txt
-../../TransDecoder.LongOrfs -t pasa_assemblies.fasta --gene_trans_map pasa.gene_trans_map.txt -O pasa.transdecoder_workdir
+../../TransDecoder -t pasa_assemblies.fasta --gene_trans_map pasa.gene_trans_map.txt -O pasa.transdecoder_workdir $ARGS
+../../util/cdna_alignment_orf_to_genome_orf.pl pasa.transdecoder_workdir/pasa_assemblies.fasta.transdecoder.gff3 pasa_assemblies.gff3 pasa_assemblies.fasta > pasa_assemblies.fasta.transdecoder.genome.gff3
-../../TransDecoder.Predict -t pasa_assemblies.fasta $ARGS -O pasa.transdecoder_workdir
-
-../../util/cdna_alignment_orf_to_genome_orf.pl pasa_assemblies.fasta.transdecoder.gff3 pasa_assemblies.gff3 pasa_assemblies.fasta > pasa_assemblies.fasta.transdecoder.genome.gff3
-
-
-../../util/fasta_prot_checker.pl pasa_assemblies.fasta.transdecoder.pep
+../../util/fasta_prot_checker.pl pasa.transdecoder_workdir/pasa_assemblies.fasta.transdecoder.pep
echo "Done. See pasa_assemblies.fasta.transdecoder.\*"
=====================================
sample_data/simple_transcriptome_target/runMe.Docker.sh
=====================================
@@ -5,9 +5,9 @@ if [ ! -e Trinity.fasta ]; then
gunzip -c genome_alignments.gmap.gff3.gz > genome_alignments.gmap.gff3
fi
-docker run --rm -v `pwd`:/data trinityrnaseq/transdecoder:latest TransDecoder.LongOrfs -t /data/Trinity.fasta -O /data
+docker run --rm -v `pwd`:/data trinityrnaseq/transdecoder:latest util/TransDecoder.LongOrfs -t /data/Trinity.fasta -O /data
-docker run --rm -v `pwd`:/data trinityrnaseq/transdecoder:latest TransDecoder.Predict -t /data/Trinity.fasta -O /data
+docker run --rm -v `pwd`:/data trinityrnaseq/transdecoder:latest util/TransDecoder.Predict -t /data/Trinity.fasta -O /data
# gmap was used to align the Trinity.fasta transcripts to the genome,
# using the gmap '-f 3' output formatting parameter, generating file 'genome_alignments.gmap.gff3'
=====================================
sample_data/simple_transcriptome_target/runMe.sh
=====================================
@@ -5,9 +5,7 @@ if [ ! -e Trinity.fasta ]; then
gunzip -c genome_alignments.gmap.gff3.gz > genome_alignments.gmap.gff3
fi
-../../TransDecoder.LongOrfs -t Trinity.fasta $*
-
-../../TransDecoder.Predict -t Trinity.fasta
+../../TransDecoder -t Trinity.fasta $*
# gmap was used to align the Trinity.fasta transcripts to the genome,
# using the gmap '-f 3' output formatting parameter, generating file 'genome_alignments.gmap.gff3'
=====================================
sample_data/stringtie_example/runMe.sh
=====================================
@@ -12,13 +12,7 @@ export PERL_HASH_SEED=0
# not including the genome here... too big, but here's how you'd do it.
#../../util/gtf_genome_to_cdna_fasta.pl stringtie_merged.gtf genome.fasta > stringtie_merged.transcripts.fasta
-## Extract the long ORFs
-../../TransDecoder.LongOrfs -t stringtie_merged.transcripts.fasta -S
-
-
-## Predict likely ORFs
-
-../../TransDecoder.Predict -t stringtie_merged.transcripts.fasta $ARGS
+../../TransDecoder -t stringtie_merged.transcripts.fasta -S $ARGS
## convert to genome coordinates
=====================================
sample_data/supertranscripts_example/runMe.sh
=====================================
@@ -9,9 +9,7 @@ set -ex
../../util/gtf_genome_to_cdna_fasta.pl supertranscripts.gtf supertranscripts.fasta > transcripts.fasta
# run TransDecoder
-../../TransDecoder.LongOrfs -t transcripts.fasta
-
-cmd="../../TransDecoder.Predict -t transcripts.fasta"
+cmd="../../TransDecoder -t transcripts.fasta"
if [ $1 ]; then
cmd="$cmd --no_refine_starts"
fi
=====================================
TransDecoder.LongOrfs → util/TransDecoder.LongOrfs
=====================================
@@ -9,8 +9,14 @@ use Getopt::Long qw(:config posix_default no_ignore_case bundling pass_through);
use Data::Dumper;
use List::Util qw (min max);
use File::Basename;
+use File::Spec;
-use lib ("$FindBin::RealBin/PerlLib");
+our $ROOT_DIR;
+BEGIN {
+ $ROOT_DIR = File::Spec->rel2abs(File::Spec->catdir($FindBin::RealBin, File::Spec->updir()));
+}
+
+use lib ("$ROOT_DIR/PerlLib");
use POSIX qw(ceil);
use Gene_obj;
@@ -21,10 +27,7 @@ use Pipeliner;
use Cwd;
-#my $VERSION = "__BLEEDING_EDGE__";
-my $VERSION = "5.7.1";
-
-my $UTIL_DIR = "$FindBin::RealBin/util";
+my $UTIL_DIR = "$ROOT_DIR/util";
$ENV{PATH} = "$UTIL_DIR/bin:$ENV{PATH}";
@@ -64,8 +67,6 @@ my $usage = <<__EOUSAGE__;
#
# --output_dir | -O <string> path to intended output directory
#
-# --version show version tag ($VERSION)
-#
# --genetic_code | -G <string> genetic code (default: universal; see PerlDoc; options: Euplotes, Tetrahymena, Candida, Acetabularia)
# Genetic Codes (derived from: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi)#
$genetic_code_options
@@ -90,7 +91,6 @@ my $gene_trans_map_file;
my $MPI_DEBUG = 1;
-my $show_version_flag;
my $output_dir = &Pipeliner::ensure_full_path(cwd());
my $COMPLETE_ORFS_ONLY = 0;
@@ -101,7 +101,6 @@ my $COMPLETE_ORFS_ONLY = 0;
'v' => \$verbose,
'S' => \$TOP_STRAND_ONLY,
'gene_trans_map=s' => \$gene_trans_map_file,
- 'version' => \$show_version_flag,
'output_dir|O=s' => \$output_dir,
'complete_orfs_only' => \$COMPLETE_ORFS_ONLY,
);
@@ -111,12 +110,6 @@ if ($help) {
die $usage;
}
-if ($show_version_flag) {
- print "TransDecoder.LongOrfs $VERSION\n";
- exit(0);
-}
-
-
if (@ARGV) {
die "Error, don't understand options: @ARGV";
}
=====================================
TransDecoder.Predict → util/TransDecoder.Predict
=====================================
@@ -8,10 +8,16 @@ use Getopt::Long qw(:config posix_default no_ignore_case bundling pass_through);
use Data::Dumper;
use List::Util qw (min max);
use File::Basename;
+use File::Spec;
use Carp;
use Digest::MD5;
-use lib ("$FindBin::RealBin/PerlLib");
+our $ROOT_DIR;
+BEGIN {
+ $ROOT_DIR = File::Spec->rel2abs(File::Spec->catdir($FindBin::RealBin, File::Spec->updir()));
+}
+
+use lib ("$ROOT_DIR/PerlLib");
use POSIX qw(ceil);
use Gene_obj;
@@ -22,9 +28,6 @@ use Pipeliner;
use DelimParser;
use Cwd;
-#my $VERSION = "__BLEEDING_EDGE__";
-my $VERSION = "5.7.1";
-
my $RETAIN_LONG_ORFS_MIN_LENGTH = 1000000; # so essentially, off by default
srand(1234);
@@ -76,8 +79,6 @@ my $usage = <<__EOUSAGE__;
# -T <int> Top longest ORFs to train Markov Model (hexamer stats) (default: 500)
# Note, 10x this value are first selected for removing redundancies,
# and then this -T value of longest ORFs are selected from the non-redundant set.
-# --version show version ($VERSION)
-#
# --genetic_code | -G <string> genetic code (default: universal; see PerlDoc; options: Euplotes, Tetrahymena, Candida, Acetabularia, ...)
# Genetic Codes (derived from: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi)
$genetic_code_options
@@ -91,7 +92,7 @@ __EOUSAGE__
-my $UTIL_DIR = "$FindBin::RealBin/util";
+my $UTIL_DIR = "$ROOT_DIR/util";
$ENV{PATH} = "$UTIL_DIR/bin:$ENV{PATH}";
@@ -153,9 +154,6 @@ my $NO_REFINE_START_CODONS_FLAG = 0;
my $output_dir = &Pipeliner::ensure_full_path(cwd()); # current working directory by default.
-my $show_version_flag;
-
-
&GetOptions( 't=s' => \$transcripts_file,
'h' => \$help,
@@ -181,8 +179,6 @@ my $show_version_flag;
'no_refine_starts' => \$NO_REFINE_START_CODONS_FLAG,
- 'version' => \$show_version_flag,
-
'output_dir|O=s' => \$output_dir,
);
@@ -192,12 +188,6 @@ if ($help) {
die $usage;
}
-if ($show_version_flag) {
- print "TransDecoder.Predict $VERSION\n";
- exit(0);
-}
-
-
if (@ARGV) {
die "Error, don't understand options: @ARGV";
}
@@ -498,4 +488,3 @@ sub get_dynamic_retain_long_orf_length {
return(1000000); #effectively infinity here.
}
-
=====================================
util/gff3_file_to_bed.pl
=====================================
@@ -10,6 +10,8 @@ use Carp;
use Nuc_translator;
use File::Basename;
+our $SEE = 0;
+
my $usage = "\n\nusage: $0 gff3_file\n\n";
my $gff3_file = $ARGV[0] or die $usage;
@@ -19,7 +21,7 @@ my $gene_obj_indexer_href = {};
## associate gene identifiers with contig id's.
my $contig_to_gene_list_href = &GFF3_utils2::index_GFF3_gene_objs($gff3_file, $gene_obj_indexer_href);
-print "track name=\'" . basename($gff3_file) . "\'\n";
+#print "track name=\'" . basename($gff3_file) . "\'\n";
foreach my $asmbl_id (sort keys %$contig_to_gene_list_href) {
@@ -29,12 +31,10 @@ foreach my $asmbl_id (sort keys %$contig_to_gene_list_href) {
my $gene_obj_ref = $gene_obj_indexer_href->{$gene_id};
- foreach my $gene ($gene_obj_ref, $gene_obj_ref->get_additional_isoforms()) {
-
- my $bed = $gene->to_BED_format();
+ my $bed = $gene_obj_ref->to_BED_format();
- print $bed;
- }
+ print $bed;
+
}
}
=====================================
util/gtf_genome_to_cdna_fasta.pl
=====================================
@@ -16,7 +16,7 @@ my $genome = $ARGV[1] or die $usage;
main: {
- print STDERR "-parsing cufflinks output: $cufflinks_gtf\n";
+ print STDERR "-parsing GTF: $cufflinks_gtf\n";
my %genome_trans_to_coords;
open (my $fh, $cufflinks_gtf) or die "Error, cannot open file $cufflinks_gtf";
=====================================
util/misc/get_FP_FN_scores.py
=====================================
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
import os, sys, re
=====================================
util/misc/plot_indiv_seq_likelihood_profile.py
=====================================
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
import os,sys
import re
@@ -39,9 +39,9 @@ def main():
score_vec.sort()
if args.cumsum:
- plt.plot(range(1,len(score_vec)+1), np.cumsum(score_vec), marker ='o')
+ plt.plot(list(range(1,len(score_vec)+1)), np.cumsum(score_vec), marker ='o')
else:
- plt.plot(range(1,len(score_vec)+1), score_vec, marker ='+')
+ plt.plot(list(range(1,len(score_vec)+1)), score_vec, marker ='+')
plt.show()
=====================================
util/misc/select_TD_orfs.py
=====================================
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
import sys, os, re
import collections
View it on GitLab: https://salsa.debian.org/med-team/transdecoder/-/commit/ba94bab7b31dbe667d9d0b6400e08475b50f444e
--
View it on GitLab: https://salsa.debian.org/med-team/transdecoder/-/commit/ba94bab7b31dbe667d9d0b6400e08475b50f444e
You're receiving this email because of your account on salsa.debian.org. Manage all notifications: https://salsa.debian.org/-/profile/notifications | Help: https://salsa.debian.org/help
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20260501/3ab5d919/attachment-0001.htm>
More information about the debian-med-commit
mailing list