[med-svn] [Git][med-team/pyensembl][upstream] New upstream version 2.3.12
Étienne Mollier (@emollier)
gitlab at salsa.debian.org
Wed Apr 24 12:38:40 BST 2024
Étienne Mollier pushed to branch upstream at Debian Med / pyensembl
Commits:
95672b5d by Étienne Mollier at 2024-04-24T13:35:00+02:00
New upstream version 2.3.12
- - - - -
5 changed files:
- pyensembl/ensembl_release.py
- pyensembl/ensembl_url_templates.py
- pyensembl/ensembl_versions.py
- pyensembl/species.py
- pyensembl/version.py
Changes:
=====================================
pyensembl/ensembl_release.py
=====================================
@@ -77,12 +77,14 @@ class EnsemblRelease(Genome):
species=self.species.latin_name,
sequence_type="cdna",
server=server,
+ is_plant = self.species.is_plant,
),
make_fasta_url(
ensembl_release=self.release,
species=self.species.latin_name,
sequence_type="ncrna",
server=server,
+ is_plant = self.species.is_plant,
),
]
@@ -92,6 +94,7 @@ class EnsemblRelease(Genome):
species=self.species.latin_name,
sequence_type="pep",
server=self.server,
+ is_plant = self.species.is_plant,
)
]
=====================================
pyensembl/ensembl_url_templates.py
=====================================
@@ -24,13 +24,19 @@ from .species import Species, find_species_by_name
from .ensembl_versions import check_release_number
ENSEMBL_FTP_SERVER = "https://ftp.ensembl.org"
+ENSEMBL_PLANTS_FTP_SERVER = "https://ftp.ensemblgenomes.ebi.ac.uk/"
# Example directories
# FASTA files: /pub/release-78/fasta/homo_sapiens/
# GTF annotation files: /pub/release-78/gtf/homo_sapiens/
FASTA_SUBDIR_TEMPLATE = "/pub/release-%(release)d/fasta/%(species)s/%(type)s/"
+PLANTS_FASTA_SUBDIR_TEMPLATE = "/pub/release-%(release)d/plants/fasta/%(species)s/%(type)s/"
GTF_SUBDIR_TEMPLATE = "/pub/release-%(release)d/gtf/%(species)s/"
+PLANTS_GTF_SUBDIR_TEMPLATE = "/pub/release-%(release)d/plants/gtf/%(species)s/"
+#List plants
+#Lest do a vector with all the plants species that we added to make the custom url
+lPlants = ("arabidopsis_thaliana","arabidopsis")
def normalize_release_properties(ensembl_release, species):
"""
@@ -63,12 +69,18 @@ def make_gtf_filename(ensembl_release, species):
}
-def make_gtf_url(ensembl_release, species, server=ENSEMBL_FTP_SERVER):
+def make_gtf_url(ensembl_release, species, server=ENSEMBL_FTP_SERVER, gtf_subdir=GTF_SUBDIR_TEMPLATE):
"""
Returns a URL and a filename, which can be joined together.
"""
+ if species.is_plant:
+ server = ENSEMBL_PLANTS_FTP_SERVER
+ gtf_subdir = PLANTS_GTF_SUBDIR_TEMPLATE
+ #else:
+ #print(f"[+] {species.latin_name} it is not a plant", flush=True)
+
ensembl_release, species, _ = normalize_release_properties(ensembl_release, species)
- subdir = GTF_SUBDIR_TEMPLATE % {"release": ensembl_release, "species": species}
+ subdir = gtf_subdir % {"release": ensembl_release, "species": species}
filename = make_gtf_filename(ensembl_release=ensembl_release, species=species)
return server + subdir + filename
@@ -93,11 +105,11 @@ NEW_FASTA_FILENAME_TEMPLATE = "%(Species)s.%(reference)s.%(sequence_type)s.all.f
NEW_FASTA_FILENAME_TEMPLATE_NCRNA = "%(Species)s.%(reference)s.ncrna.fa.gz"
-def make_fasta_filename(ensembl_release, species, sequence_type):
+def make_fasta_filename(ensembl_release, species, sequence_type, is_plant):
ensembl_release, species, reference_name = normalize_release_properties(
ensembl_release, species
)
- if ensembl_release <= 75:
+ if ensembl_release <= 75 and not is_plant:
if sequence_type == "ncrna":
return OLD_FASTA_FILENAME_TEMPLATE_NCRNA % {
"Species": species.capitalize(),
@@ -125,7 +137,7 @@ def make_fasta_filename(ensembl_release, species, sequence_type):
}
-def make_fasta_url(ensembl_release, species, sequence_type, server=ENSEMBL_FTP_SERVER):
+def make_fasta_url(ensembl_release, species, sequence_type, is_plant, server=ENSEMBL_FTP_SERVER, fasta_subdir=FASTA_SUBDIR_TEMPLATE):
"""Construct URL to FASTA file with cDNA transcript or protein sequences
Parameter examples:
@@ -136,12 +148,17 @@ def make_fasta_url(ensembl_release, species, sequence_type, server=ENSEMBL_FTP_S
ensembl_release, species, reference_name = normalize_release_properties(
ensembl_release, species
)
- subdir = FASTA_SUBDIR_TEMPLATE % {
+
+ if is_plant:
+ server = ENSEMBL_PLANTS_FTP_SERVER
+ fasta_subdir = PLANTS_FASTA_SUBDIR_TEMPLATE
+
+ subdir = fasta_subdir % {
"release": ensembl_release,
"species": species,
"type": sequence_type,
}
filename = make_fasta_filename(
- ensembl_release=ensembl_release, species=species, sequence_type=sequence_type
+ ensembl_release=ensembl_release, species=species, sequence_type=sequence_type, is_plant = is_plant
)
return server + subdir + filename
=====================================
pyensembl/ensembl_versions.py
=====================================
@@ -12,7 +12,7 @@
MIN_ENSEMBL_RELEASE = 47
MAX_ENSEMBL_RELEASE = 111
-
+MAX_PLANTS_ENSEMBL_RELEASE = 58
def check_release_number(release):
"""
=====================================
pyensembl/species.py
=====================================
@@ -12,7 +12,7 @@
from serializable import Serializable
-from .ensembl_versions import MAX_ENSEMBL_RELEASE
+from .ensembl_versions import MAX_ENSEMBL_RELEASE, MAX_PLANTS_ENSEMBL_RELEASE
# TODO: replace Serializable with data class
@@ -30,7 +30,7 @@ class Species(Serializable):
_reference_names_to_species = {}
@classmethod
- def register(cls, latin_name, synonyms, reference_assemblies):
+ def register(cls, latin_name, synonyms, reference_assemblies, is_plant=False):
"""
Create a Species object from the given arguments and enter into
all the dicts used to look the species up by its fields.
@@ -39,6 +39,7 @@ class Species(Serializable):
latin_name=latin_name,
synonyms=synonyms,
reference_assemblies=reference_assemblies,
+ is_plant=is_plant,
)
cls._latin_names_to_species[species.latin_name] = species
for synonym in synonyms:
@@ -80,7 +81,7 @@ class Species(Serializable):
for release in range(release_range[0], release_range[1] + 1):
yield species_name, release
- def __init__(self, latin_name, synonyms=[], reference_assemblies={}):
+ def __init__(self, latin_name, synonyms=[], reference_assemblies={}, is_plant=False):
"""
Parameters
----------
@@ -96,6 +97,7 @@ class Species(Serializable):
self.synonyms = synonyms
self.reference_assemblies = reference_assemblies
self._release_to_genome = {}
+ self.is_plant = is_plant
for genome_name, (start, end) in self.reference_assemblies.items():
for i in range(start, end + 1):
if i in self._release_to_genome:
@@ -350,3 +352,21 @@ yeast = Species.register(
"R64-1-1": (76, MAX_ENSEMBL_RELEASE),
},
)
+
+arabidopsis_thaliana = Species.register(
+ latin_name="arabidopsis_thaliana",
+ synonyms=["arabidopsis"],
+ reference_assemblies={
+ "TAIR10": (40, MAX_PLANTS_ENSEMBL_RELEASE),
+ },
+ is_plant=True
+)
+
+rice = Species.register(
+ latin_name="oryza_sativa",
+ synonyms=["rice"],
+ reference_assemblies={
+ "IRGSP-1.0": (40, MAX_PLANTS_ENSEMBL_RELEASE),
+ },
+ is_plant=True
+)
\ No newline at end of file
=====================================
pyensembl/version.py
=====================================
@@ -1,4 +1,4 @@
-__version__ = "2.3.11"
+__version__ = "2.3.12"
def print_version():
print(f"v{__version__}")
View it on GitLab: https://salsa.debian.org/med-team/pyensembl/-/commit/95672b5d5f5b68c4616b94c9abd262a4eaa226ac
--
View it on GitLab: https://salsa.debian.org/med-team/pyensembl/-/commit/95672b5d5f5b68c4616b94c9abd262a4eaa226ac
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240424/606c5c1b/attachment-0001.htm>
More information about the debian-med-commit
mailing list