[med-svn] [Git][med-team/pyensembl][master] 4 commits: New upstream version 2.6.9
Karsten Schöke (@karso)
gitlab at salsa.debian.org
Thu Apr 30 14:34:07 BST 2026
Karsten Schöke pushed to branch master at Debian Med / pyensembl
Commits:
88d09667 by Karsten Schöke at 2026-04-30T15:11:50+02:00
New upstream version 2.6.9
- - - - -
391c6adf by Karsten Schöke at 2026-04-30T15:11:50+02:00
New upstream version
- - - - -
423192ae by Karsten Schöke at 2026-04-30T15:11:51+02:00
Update upstream source from tag 'upstream/2.6.9'
Update to upstream version '2.6.9'
with Debian dir 467e40ae3b2864030c0c213cc505ce62f26d2220
- - - - -
c4d2b101 by Karsten Schöke at 2026-04-30T15:23:47+02:00
Update changelog for 2.6.9-1 release
- - - - -
8 changed files:
- debian/changelog
- pyensembl/genome.py
- pyensembl/transcript.py
- pyensembl/version.py
- + tests/data/arabidopsis.tair10.partial.cdna.fa
- + tests/data/arabidopsis.tair10.partial.gtf
- + tests/test_tair10_complete.py
- tests/test_transcript_support_level.py
Changes:
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+pyensembl (2.6.9-1) unstable; urgency=medium
+
+ * Team upload.
+ * New upstream version
+
+ -- Karsten Schöke <karsten.schoeke at geobasis-bb.de> Thu, 30 Apr 2026 15:23:11 +0200
+
pyensembl (2.6.7-1) unstable; urgency=medium
* Team upload.
=====================================
pyensembl/genome.py
=====================================
@@ -29,6 +29,25 @@ from .sequence_data import SequenceData
from .transcript import Transcript
+def _parse_transcript_support_level(value):
+ """
+ Coerce a raw ``transcript_support_level`` attribute into an int or None.
+
+ Recent Ensembl releases append text such as
+ ``"1 (assigned to previous version 5)"`` after the numeric TSL, and
+ older or missing entries can be ``None`` or the literal string ``"NA"``.
+ Keep only the leading whitespace-separated token and return it as an
+ int when it is a digit, otherwise None.
+ """
+ if not value:
+ return None
+ tokens = value.split()
+ leading = tokens[0] if tokens else None
+ if leading and leading.isdigit():
+ return int(leading)
+ return None
+
+
class Genome(Serializable):
"""
Bundles together the genomic annotation and sequence data associated with
@@ -896,11 +915,9 @@ class Genome(Serializable):
extra_data = dict(zip(extra_field_names, result[5:]))
transcript_name = extra_data.get("transcript_name")
transcript_biotype = extra_data.get("transcript_biotype")
- tsl = extra_data.get("transcript_support_level")
- if not tsl or tsl == "NA":
- tsl = None
- else:
- tsl = int(tsl)
+ tsl = _parse_transcript_support_level(
+ extra_data.get("transcript_support_level")
+ )
self._transcripts[transcript_id] = Transcript(
transcript_id=transcript_id,
=====================================
pyensembl/transcript.py
=====================================
@@ -489,6 +489,13 @@ class Transcript(LocusWithGenome):
if self.sequence is None:
return None
+ # Some GTF annotations (e.g. fragments in Ensembl Plants) leave a
+ # protein-coding transcript without a start_codon or stop_codon
+ # feature. Return None rather than crashing when either endpoint of
+ # the CDS cannot be located.
+ if not self.contains_start_codon or not self.contains_stop_codon:
+ return None
+
start = self.first_start_codon_spliced_offset
end = self.last_stop_codon_spliced_offset
@@ -508,6 +515,8 @@ class Transcript(LocusWithGenome):
cDNA sequence of 5' UTR
(untranslated region at the beginning of the transcript)
"""
+ if self.sequence is None or not self.contains_start_codon:
+ return None
# pylint: disable=invalid-slice-index
# TODO(tavi) Figure out pylint is not happy with this slice
return self.sequence[: self.first_start_codon_spliced_offset]
@@ -518,6 +527,8 @@ class Transcript(LocusWithGenome):
cDNA sequence of 3' UTR
(untranslated region at the end of the transcript)
"""
+ if self.sequence is None or not self.contains_stop_codon:
+ return None
return self.sequence[self.last_stop_codon_spliced_offset + 1 :]
@memoized_property
=====================================
pyensembl/version.py
=====================================
@@ -1,4 +1,4 @@
-__version__ = "2.6.7"
+__version__ = "2.6.9"
def print_version():
print(f"v{__version__}")
=====================================
tests/data/arabidopsis.tair10.partial.cdna.fa
=====================================
@@ -0,0 +1,46 @@
+>AT1G01010.1
+AAATTATTAGATATACCAAACCAGAGAAAACAAATACATAATCGGAGAAATACAGATTAC
+AGAGAGCGAGAGAGATCGACGGCGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGT
+TTAGTGAAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTT
+GGTCACTATCTCCGTAACAAAATCGAAGGAAACACTAGCCGCGACGTTGAAGTAGCCATC
+AGCGAGGTCAACATCTGTAGCTACGATCCTTGGAACTTGCGCTTCCAGTCAAAGTACAAA
+TCGAGAGATGCTATGTGGTACTTCTTCTCTCGTAGAGAAAACAACAAAGGGAATCGACAG
+AGCAGGACAACGGTTTCTGGTAAATGGAAGCTTACCGGAGAATCTGTTGAGGTCAAGGAC
+CAGTGGGGATTTTGTAGTGAGGGCTTTCGTGGTAAGATTGGTCATAAAAGGGTTTTGGTG
+TTCCTCGATGGAAGATACCCTGACAAAACCAAATCTGATTGGGTTATCCACGAGTTCCAC
+TACGACCTCTTACCAGAACATCAGAGGACATATGTCATCTGCAGACTTGAGTACAAGGGT
+GATGATGCGGACATTCTATCTGCTTATGCAATAGATCCCACTCCCGCTTTTGTCCCCAAT
+ATGACTAGTAGTGCAGGTTCTGTGGTCAACCAATCACGTCAACGAAATTCAGGATCTTAC
+AACACTTACTCTGAGTATGATTCAGCAAATCATGGCCAGCAGTTTAATGAAAACTCTAAC
+ATTATGCAGCAGCAACCACTTCAAGGATCATTCAACCCTCTCCTTGAGTATGATTTTGCA
+AATCACGGCGGTCAGTGGCTGAGTGACTATATCGACCTGCAACAGCAAGTTCCTTACTTG
+GCACCTTATGAAAATGAGTCGGAGATGATTTGGAAGCATGTGATTGAAGAAAATTTTGAG
+TTTTTGGTAGATGAAAGGACATCTATGCAACAGCATTACAGTGATCACCGGCCCAAAAAA
+CCTGTGTCTGGGGTTTTGCCTGATGATAGCAGTGATACTGAAACTGGATCAATGATTTTC
+GAAGACACTTCGAGCTCCACTGATAGTGTTGGTAGTTCAGATGAACCGGGCCATACTCGT
+ATAGATGATATTCCATCATTGAACATTATTGAGCCTTTGCACAATTATAAGGCACAAGAG
+CAACCAAAGCAGCAGAGCAAAGAAAAGGTGATAAGTTCGCAGAAAAGCGAATGCGAGTGG
+AAAATGGCTGAAGACTCGATCAAGATACCTCCATCCACCAACACGGTGAAGCAGAGCTGG
+ATTGTTTTGGAGAATGCACAGTGGAACTATCTCAAGAACATGATCATTGGTGTCTTGTTG
+TTCATCTCCGTCATTAGTTGGATCATTCTTGTTGGTTAAGAGGTCAAATCGGATTCTTGC
+TCAAAATTTGTATTTCTTAGAATGTGTGTTTTTTTTTGTTTTTTTTTCTTTGCTCTGTTT
+TCTCGCTCCGGAAAAGTTTGAAGTTATATTTTATTAGTATGTAAAGAAGAGAAAAAGGGG
+GAAAGAAGAGAGAAGAAAAATGCAGAAAATCATATATATGAATTGGAAAAAAGTATATGT
+AATAATAATTAGTGCATCGTTTTGTGGTGTAGTTTATATAAATAAAGTGATATATAGTCT
+TGTATAAG
+>AT1G03325.1
+TCATCTGTGCATCATAAAGGCAAAAACTTTAAGATTTGTGAAGATAAAAGTAAGAAATCT
+TCAAATAGGTTAAACAACTCACCTTCGTTTTTCTTCGATTTCTTCTTCTTCGTTGCTTTA
+ACCTGA
+>AT1G24475.1
+GCCCAATGGGCCATATATTTATCCACGAAAGGTGGAGGAGCAAATACAAACTTGAAAATA
+TG
+>AT1G42615.1
+TTTGATAAAGAGCCAACAAATCCTGTGGATTTTGGAGCAGAGACTTTGAGTTATGAGGAT
+TATTACGATGAAACAAGAGATAGATATGATAAAGCTTTTCTGATGATGATTACTTATCAG
+TGTGATGCTTTGGTTGACAAGTTTAATGTCACTCCTTTGATTATTGGTGAAGTAAAAGAT
+ACTAAGAGGCCTAAAACACACAAAGCTGAGCCGTGTAACTTAGATGGTAAAAGAGCAGTG
+ACGATATGGTTTAGTATGCTTAAAATGACTATGCCTTTATCGAGTTCTTTGGTGATATCT
+TTCCTTGCTTGTCACCAAGCGGGAGCACCGGTACTTCATCCATCAGTCGGAACTTCATCT
+ATCTCTACTGTACATGGAATAGAGCAGGAGGGGAACATACATATTCAGGATGACCTTCCT
+AAACCAGAG
=====================================
tests/data/arabidopsis.tair10.partial.gtf
=====================================
@@ -0,0 +1,44 @@
+#!genome-build TAIR10
+#!genome-version TAIR10
+#!genome-date 2008-04
+#!genome-build-accession GCA_000001735.1
+#!genebuild-last-updated 2010-09
+1 araport11 gene 817053 817178 . - . gene_id "AT1G03325"; gene_name "AT1G03325"; gene_source "araport11"; gene_biotype "protein_coding";
+1 araport11 transcript 817053 817178 . - . gene_id "AT1G03325"; transcript_id "AT1G03325.1"; gene_name "AT1G03325"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G03325-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; tag "Ensembl_canonical";
+1 araport11 exon 817053 817178 . - . gene_id "AT1G03325"; transcript_id "AT1G03325.1"; exon_number "1"; gene_name "AT1G03325"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G03325-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; exon_id "AT1G03325.1.exon1"; tag "Ensembl_canonical";
+1 araport11 CDS 817056 817178 . - 0 gene_id "AT1G03325"; transcript_id "AT1G03325.1"; exon_number "1"; gene_name "AT1G03325"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G03325-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; protein_id "AT1G03325.1"; tag "Ensembl_canonical";
+1 araport11 stop_codon 817053 817055 . - 0 gene_id "AT1G03325"; transcript_id "AT1G03325.1"; exon_number "1"; gene_name "AT1G03325"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G03325-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; tag "Ensembl_canonical";
+1 araport11 gene 3631 5899 . + . gene_id "AT1G01010"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding";
+1 araport11 transcript 3631 5899 . + . gene_id "AT1G01010"; transcript_id "AT1G01010.1"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "NAC001-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; tag "Ensembl_canonical";
+1 araport11 exon 3631 3913 . + . gene_id "AT1G01010"; transcript_id "AT1G01010.1"; exon_number "1"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "NAC001-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; exon_id "AT1G01010.1.exon1"; tag "Ensembl_canonical";
+1 araport11 CDS 3760 3913 . + 0 gene_id "AT1G01010"; transcript_id "AT1G01010.1"; exon_number "1"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "NAC001-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; protein_id "AT1G01010.1"; tag "Ensembl_canonical";
+1 araport11 start_codon 3760 3762 . + 0 gene_id "AT1G01010"; transcript_id "AT1G01010.1"; exon_number "1"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "NAC001-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; tag "Ensembl_canonical";
+1 araport11 exon 3996 4276 . + . gene_id "AT1G01010"; transcript_id "AT1G01010.1"; exon_number "2"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "NAC001-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; exon_id "AT1G01010.1.exon2"; tag "Ensembl_canonical";
+1 araport11 CDS 3996 4276 . + 2 gene_id "AT1G01010"; transcript_id "AT1G01010.1"; exon_number "2"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "NAC001-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; protein_id "AT1G01010.1"; tag "Ensembl_canonical";
+1 araport11 exon 4486 4605 . + . gene_id "AT1G01010"; transcript_id "AT1G01010.1"; exon_number "3"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "NAC001-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; exon_id "AT1G01010.1.exon3"; tag "Ensembl_canonical";
+1 araport11 CDS 4486 4605 . + 0 gene_id "AT1G01010"; transcript_id "AT1G01010.1"; exon_number "3"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "NAC001-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; protein_id "AT1G01010.1"; tag "Ensembl_canonical";
+1 araport11 exon 4706 5095 . + . gene_id "AT1G01010"; transcript_id "AT1G01010.1"; exon_number "4"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "NAC001-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; exon_id "AT1G01010.1.exon4"; tag "Ensembl_canonical";
+1 araport11 CDS 4706 5095 . + 0 gene_id "AT1G01010"; transcript_id "AT1G01010.1"; exon_number "4"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "NAC001-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; protein_id "AT1G01010.1"; tag "Ensembl_canonical";
+1 araport11 exon 5174 5326 . + . gene_id "AT1G01010"; transcript_id "AT1G01010.1"; exon_number "5"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "NAC001-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; exon_id "AT1G01010.1.exon5"; tag "Ensembl_canonical";
+1 araport11 CDS 5174 5326 . + 0 gene_id "AT1G01010"; transcript_id "AT1G01010.1"; exon_number "5"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "NAC001-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; protein_id "AT1G01010.1"; tag "Ensembl_canonical";
+1 araport11 exon 5439 5899 . + . gene_id "AT1G01010"; transcript_id "AT1G01010.1"; exon_number "6"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "NAC001-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; exon_id "AT1G01010.1.exon6"; tag "Ensembl_canonical";
+1 araport11 CDS 5439 5627 . + 0 gene_id "AT1G01010"; transcript_id "AT1G01010.1"; exon_number "6"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "NAC001-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; protein_id "AT1G01010.1"; tag "Ensembl_canonical";
+1 araport11 stop_codon 5628 5630 . + 0 gene_id "AT1G01010"; transcript_id "AT1G01010.1"; exon_number "6"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "NAC001-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; tag "Ensembl_canonical";
+1 araport11 five_prime_utr 3631 3759 . + . gene_id "AT1G01010"; transcript_id "AT1G01010.1"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "NAC001-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; tag "Ensembl_canonical";
+1 araport11 three_prime_utr 5631 5899 . + . gene_id "AT1G01010"; transcript_id "AT1G01010.1"; gene_name "NAC001"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "NAC001-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; tag "Ensembl_canonical";
+1 araport11 gene 8676282 8676343 . - . gene_id "AT1G24475"; gene_name "AT1G24475"; gene_source "araport11"; gene_biotype "protein_coding";
+1 araport11 transcript 8676282 8676343 . - . gene_id "AT1G24475"; transcript_id "AT1G24475.1"; gene_name "AT1G24475"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G24475-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; tag "Ensembl_canonical";
+1 araport11 exon 8676282 8676343 . - . gene_id "AT1G24475"; transcript_id "AT1G24475.1"; exon_number "1"; gene_name "AT1G24475"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G24475-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; exon_id "AT1G24475.1.exon1"; tag "Ensembl_canonical";
+1 araport11 CDS 8676282 8676338 . - 0 gene_id "AT1G24475"; transcript_id "AT1G24475.1"; exon_number "1"; gene_name "AT1G24475"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G24475-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; protein_id "AT1G24475.1"; tag "Ensembl_canonical";
+1 araport11 start_codon 8676336 8676338 . - 0 gene_id "AT1G24475"; transcript_id "AT1G24475.1"; exon_number "1"; gene_name "AT1G24475"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G24475-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; tag "Ensembl_canonical";
+1 araport11 five_prime_utr 8676339 8676343 . - . gene_id "AT1G24475"; transcript_id "AT1G24475.1"; gene_name "AT1G24475"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G24475-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; tag "Ensembl_canonical";
+1 araport11 gene 16037606 16038455 . + . gene_id "AT1G42615"; gene_name "AT1G42615"; gene_source "araport11"; gene_biotype "protein_coding";
+1 araport11 transcript 16037606 16038455 . + . gene_id "AT1G42615"; transcript_id "AT1G42615.1"; gene_name "AT1G42615"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G42615-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; tag "Ensembl_canonical";
+1 araport11 exon 16037606 16037771 . + . gene_id "AT1G42615"; transcript_id "AT1G42615.1"; exon_number "1"; gene_name "AT1G42615"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G42615-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; exon_id "AT1G42615.1.exon1"; tag "Ensembl_canonical";
+1 araport11 CDS 16037606 16037771 . + 0 gene_id "AT1G42615"; transcript_id "AT1G42615.1"; exon_number "1"; gene_name "AT1G42615"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G42615-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; protein_id "AT1G42615.1"; tag "Ensembl_canonical";
+1 araport11 exon 16037964 16038127 . + . gene_id "AT1G42615"; transcript_id "AT1G42615.1"; exon_number "2"; gene_name "AT1G42615"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G42615-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; exon_id "AT1G42615.1.exon2"; tag "Ensembl_canonical";
+1 araport11 CDS 16037964 16038127 . + 2 gene_id "AT1G42615"; transcript_id "AT1G42615.1"; exon_number "2"; gene_name "AT1G42615"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G42615-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; protein_id "AT1G42615.1"; tag "Ensembl_canonical";
+1 araport11 exon 16038329 16038385 . + . gene_id "AT1G42615"; transcript_id "AT1G42615.1"; exon_number "3"; gene_name "AT1G42615"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G42615-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; exon_id "AT1G42615.1.exon3"; tag "Ensembl_canonical";
+1 araport11 CDS 16038329 16038385 . + 0 gene_id "AT1G42615"; transcript_id "AT1G42615.1"; exon_number "3"; gene_name "AT1G42615"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G42615-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; protein_id "AT1G42615.1"; tag "Ensembl_canonical";
+1 araport11 exon 16038414 16038455 . + . gene_id "AT1G42615"; transcript_id "AT1G42615.1"; exon_number "4"; gene_name "AT1G42615"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G42615-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; exon_id "AT1G42615.1.exon4"; tag "Ensembl_canonical";
+1 araport11 CDS 16038414 16038455 . + 0 gene_id "AT1G42615"; transcript_id "AT1G42615.1"; exon_number "4"; gene_name "AT1G42615"; gene_source "araport11"; gene_biotype "protein_coding"; transcript_name "AT1G42615-201"; transcript_source "araport11"; transcript_biotype "protein_coding"; protein_id "AT1G42615.1"; tag "Ensembl_canonical";
=====================================
tests/test_tair10_complete.py
=====================================
@@ -0,0 +1,105 @@
+"""
+Regression tests for Transcript.complete and Transcript.coding_sequence on
+Ensembl Plants / TAIR10-style data.
+
+Covers issue #252 (Transcript.complete returning the wrong result for TAIR
+transcripts — fixed by #268 which stopped stripping the `.N` isoform suffix
+from non-ENSEMBL FASTA headers) and a related defect where coding_sequence
+and the 5'/3' UTR accessors raised ValueError for transcripts missing a
+start_codon or stop_codon feature.
+
+GTF and cDNA FASTA fragments were taken from Ensembl Plants release 57:
+ http://ftp.ensemblgenomes.org/pub/plants/release-57/gtf/arabidopsis_thaliana/
+ Arabidopsis_thaliana.TAIR10.57.gtf.gz
+ http://ftp.ensemblgenomes.org/pub/plants/release-57/fasta/arabidopsis_thaliana/cdna/
+ Arabidopsis_thaliana.TAIR10.cdna.all.fa.gz
+
+Four transcripts were selected:
+ * AT1G01010.1 (NAC001) — fully annotated: 6 exons, start_codon + stop_codon.
+ * AT1G03325.1 — protein-coding fragment: no start_codon feature.
+ * AT1G24475.1 — protein-coding fragment: no stop_codon feature.
+ * AT1G42615.1 — protein-coding fragment: neither start_codon nor stop_codon.
+"""
+from __future__ import absolute_import
+
+from pyensembl import Genome
+
+from .common import eq_, ok_
+from .data import data_path
+
+
+TAIR10_GTF_PATH = data_path("arabidopsis.tair10.partial.gtf")
+TAIR10_CDNA_FASTA_PATH = data_path("arabidopsis.tair10.partial.cdna.fa")
+
+
+custom_tair10_genome_subset = Genome(
+ reference_name="TAIR10",
+ annotation_name="_test_arabidopsis_tair10_subset",
+ gtf_path_or_url=TAIR10_GTF_PATH,
+ transcript_fasta_paths_or_urls=[TAIR10_CDNA_FASTA_PATH],
+)
+
+
+def setup_module(module):
+ custom_tair10_genome_subset.clear_cache()
+ custom_tair10_genome_subset.index()
+
+
+def test_complete_transcript_with_start_and_stop():
+ # AT1G01010.1 has both start_codon and stop_codon features and a cDNA
+ # in the FASTA — .complete must be True and the coding sequence must
+ # start with ATG and end with a stop codon.
+ transcript = custom_tair10_genome_subset.transcript_by_id("AT1G01010.1")
+ ok_(transcript.contains_start_codon)
+ ok_(transcript.contains_stop_codon)
+ ok_(transcript.sequence is not None)
+ ok_(transcript.complete)
+ cds = transcript.coding_sequence
+ ok_(cds is not None)
+ eq_(len(cds) % 3, 0)
+ eq_(cds[:3], "ATG")
+ ok_(cds[-3:] in ("TAA", "TAG", "TGA"))
+ # UTRs flank the CDS and concatenate back to the full cDNA.
+ five_prime = transcript.five_prime_utr_sequence
+ three_prime = transcript.three_prime_utr_sequence
+ ok_(five_prime is not None)
+ ok_(three_prime is not None)
+ eq_(five_prime + cds + three_prime, transcript.sequence)
+
+
+def test_transcript_missing_start_codon_is_not_complete():
+ # AT1G03325.1 has stop_codon but no start_codon. .complete must be
+ # False and .coding_sequence / .five_prime_utr_sequence must return
+ # None rather than raise. The 3' UTR is still resolvable.
+ transcript = custom_tair10_genome_subset.transcript_by_id("AT1G03325.1")
+ eq_(transcript.contains_start_codon, False)
+ eq_(transcript.contains_stop_codon, True)
+ eq_(transcript.complete, False)
+ eq_(transcript.coding_sequence, None)
+ eq_(transcript.five_prime_utr_sequence, None)
+ ok_(transcript.three_prime_utr_sequence is not None)
+
+
+def test_transcript_missing_stop_codon_is_not_complete():
+ # AT1G24475.1 has start_codon but no stop_codon. .complete must be
+ # False and .coding_sequence / .three_prime_utr_sequence must return
+ # None rather than raise. The 5' UTR is still resolvable.
+ transcript = custom_tair10_genome_subset.transcript_by_id("AT1G24475.1")
+ eq_(transcript.contains_start_codon, True)
+ eq_(transcript.contains_stop_codon, False)
+ eq_(transcript.complete, False)
+ eq_(transcript.coding_sequence, None)
+ ok_(transcript.five_prime_utr_sequence is not None)
+ eq_(transcript.three_prime_utr_sequence, None)
+
+
+def test_transcript_missing_both_codons_is_not_complete():
+ # AT1G42615.1 has neither start_codon nor stop_codon. All CDS/UTR
+ # accessors must return None rather than raise.
+ transcript = custom_tair10_genome_subset.transcript_by_id("AT1G42615.1")
+ eq_(transcript.contains_start_codon, False)
+ eq_(transcript.contains_stop_codon, False)
+ eq_(transcript.complete, False)
+ eq_(transcript.coding_sequence, None)
+ eq_(transcript.five_prime_utr_sequence, None)
+ eq_(transcript.three_prime_utr_sequence, None)
=====================================
tests/test_transcript_support_level.py
=====================================
@@ -7,6 +7,23 @@ from __future__ import absolute_import
from .common import eq_
from pyensembl import cached_release
+from pyensembl.genome import _parse_transcript_support_level
+
+
+def test_parse_transcript_support_level_values():
+ # Recent Ensembl releases append parenthetical text to the TSL; we should
+ # still recover the leading integer. See openvax/pyensembl#297.
+ eq_(_parse_transcript_support_level("1 (assigned to previous version 5)"), 1)
+ eq_(_parse_transcript_support_level("3 (assigned to previous version 12)"), 3)
+ # Plain numeric strings still work.
+ eq_(_parse_transcript_support_level("1"), 1)
+ eq_(_parse_transcript_support_level("5"), 5)
+ # Missing / NA / junk values collapse to None.
+ eq_(_parse_transcript_support_level(None), None)
+ eq_(_parse_transcript_support_level(""), None)
+ eq_(_parse_transcript_support_level("NA"), None)
+ eq_(_parse_transcript_support_level("NA (something)"), None)
+ eq_(_parse_transcript_support_level(" "), None)
def test_transcript_support_level():
View it on GitLab: https://salsa.debian.org/med-team/pyensembl/-/compare/90e19fd1bdc11892038a17ae7dc2609564858572...c4d2b101144c3065a0e9ea7ecb6f9e047d65250b
--
View it on GitLab: https://salsa.debian.org/med-team/pyensembl/-/compare/90e19fd1bdc11892038a17ae7dc2609564858572...c4d2b101144c3065a0e9ea7ecb6f9e047d65250b
You're receiving this email because of your account on salsa.debian.org. Manage all notifications: https://salsa.debian.org/-/profile/notifications | Help: https://salsa.debian.org/help
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20260430/18b6bcce/attachment-0001.htm>
More information about the debian-med-commit
mailing list