[med-svn] [Git][med-team/pyensembl][upstream] New upstream version 2.3.12

Étienne Mollier (@emollier) gitlab at salsa.debian.org
Wed Apr 24 12:38:40 BST 2024



Étienne Mollier pushed to branch upstream at Debian Med / pyensembl


Commits:
95672b5d by Étienne Mollier at 2024-04-24T13:35:00+02:00
New upstream version 2.3.12
- - - - -


5 changed files:

- pyensembl/ensembl_release.py
- pyensembl/ensembl_url_templates.py
- pyensembl/ensembl_versions.py
- pyensembl/species.py
- pyensembl/version.py


Changes:

=====================================
pyensembl/ensembl_release.py
=====================================
@@ -77,12 +77,14 @@ class EnsemblRelease(Genome):
                 species=self.species.latin_name,
                 sequence_type="cdna",
                 server=server,
+                is_plant = self.species.is_plant,
             ),
             make_fasta_url(
                 ensembl_release=self.release,
                 species=self.species.latin_name,
                 sequence_type="ncrna",
                 server=server,
+                is_plant = self.species.is_plant,
             ),
         ]
 
@@ -92,6 +94,7 @@ class EnsemblRelease(Genome):
                 species=self.species.latin_name,
                 sequence_type="pep",
                 server=self.server,
+                is_plant = self.species.is_plant,
             )
         ]
 


=====================================
pyensembl/ensembl_url_templates.py
=====================================
@@ -24,13 +24,19 @@ from .species import Species, find_species_by_name
 from .ensembl_versions import check_release_number
 
 ENSEMBL_FTP_SERVER = "https://ftp.ensembl.org"
+ENSEMBL_PLANTS_FTP_SERVER = "https://ftp.ensemblgenomes.ebi.ac.uk/"
 
 # Example directories
 # FASTA files: /pub/release-78/fasta/homo_sapiens/
 # GTF annotation files: /pub/release-78/gtf/homo_sapiens/
 FASTA_SUBDIR_TEMPLATE = "/pub/release-%(release)d/fasta/%(species)s/%(type)s/"
+PLANTS_FASTA_SUBDIR_TEMPLATE = "/pub/release-%(release)d/plants/fasta/%(species)s/%(type)s/"
 GTF_SUBDIR_TEMPLATE = "/pub/release-%(release)d/gtf/%(species)s/"
+PLANTS_GTF_SUBDIR_TEMPLATE = "/pub/release-%(release)d/plants/gtf/%(species)s/"
 
+#List plants
+#Lest do a vector with all the plants species that we added to make the custom url
+lPlants = ("arabidopsis_thaliana","arabidopsis")
 
 def normalize_release_properties(ensembl_release, species):
     """
@@ -63,12 +69,18 @@ def make_gtf_filename(ensembl_release, species):
     }
 
 
-def make_gtf_url(ensembl_release, species, server=ENSEMBL_FTP_SERVER):
+def make_gtf_url(ensembl_release, species, server=ENSEMBL_FTP_SERVER, gtf_subdir=GTF_SUBDIR_TEMPLATE):
     """
     Returns a URL and a filename, which can be joined together.
     """
+    if species.is_plant:
+        server = ENSEMBL_PLANTS_FTP_SERVER
+        gtf_subdir = PLANTS_GTF_SUBDIR_TEMPLATE
+    #else:
+        #print(f"[+] {species.latin_name} it is not a plant", flush=True)
+
     ensembl_release, species, _ = normalize_release_properties(ensembl_release, species)
-    subdir = GTF_SUBDIR_TEMPLATE % {"release": ensembl_release, "species": species}
+    subdir = gtf_subdir % {"release": ensembl_release, "species": species}
     filename = make_gtf_filename(ensembl_release=ensembl_release, species=species)
     return server + subdir + filename
 
@@ -93,11 +105,11 @@ NEW_FASTA_FILENAME_TEMPLATE = "%(Species)s.%(reference)s.%(sequence_type)s.all.f
 NEW_FASTA_FILENAME_TEMPLATE_NCRNA = "%(Species)s.%(reference)s.ncrna.fa.gz"
 
 
-def make_fasta_filename(ensembl_release, species, sequence_type):
+def make_fasta_filename(ensembl_release, species, sequence_type, is_plant):
     ensembl_release, species, reference_name = normalize_release_properties(
         ensembl_release, species
     )
-    if ensembl_release <= 75:
+    if ensembl_release <= 75 and not is_plant:
         if sequence_type == "ncrna":
             return OLD_FASTA_FILENAME_TEMPLATE_NCRNA % {
                 "Species": species.capitalize(),
@@ -125,7 +137,7 @@ def make_fasta_filename(ensembl_release, species, sequence_type):
             }
 
 
-def make_fasta_url(ensembl_release, species, sequence_type, server=ENSEMBL_FTP_SERVER):
+def make_fasta_url(ensembl_release, species, sequence_type, is_plant, server=ENSEMBL_FTP_SERVER, fasta_subdir=FASTA_SUBDIR_TEMPLATE):
     """Construct URL to FASTA file with cDNA transcript or protein sequences
 
     Parameter examples:
@@ -136,12 +148,17 @@ def make_fasta_url(ensembl_release, species, sequence_type, server=ENSEMBL_FTP_S
     ensembl_release, species, reference_name = normalize_release_properties(
         ensembl_release, species
     )
-    subdir = FASTA_SUBDIR_TEMPLATE % {
+
+    if is_plant:
+        server = ENSEMBL_PLANTS_FTP_SERVER
+        fasta_subdir = PLANTS_FASTA_SUBDIR_TEMPLATE
+
+    subdir = fasta_subdir % {
         "release": ensembl_release,
         "species": species,
         "type": sequence_type,
     }
     filename = make_fasta_filename(
-        ensembl_release=ensembl_release, species=species, sequence_type=sequence_type
+        ensembl_release=ensembl_release, species=species, sequence_type=sequence_type, is_plant = is_plant
     )
     return server + subdir + filename


=====================================
pyensembl/ensembl_versions.py
=====================================
@@ -12,7 +12,7 @@
 
 MIN_ENSEMBL_RELEASE = 47
 MAX_ENSEMBL_RELEASE = 111
-
+MAX_PLANTS_ENSEMBL_RELEASE = 58
 
 def check_release_number(release):
     """


=====================================
pyensembl/species.py
=====================================
@@ -12,7 +12,7 @@
 
 from serializable import Serializable
 
-from .ensembl_versions import MAX_ENSEMBL_RELEASE
+from .ensembl_versions import MAX_ENSEMBL_RELEASE, MAX_PLANTS_ENSEMBL_RELEASE
 
 # TODO: replace Serializable with data class
 
@@ -30,7 +30,7 @@ class Species(Serializable):
     _reference_names_to_species = {}
 
     @classmethod
-    def register(cls, latin_name, synonyms, reference_assemblies):
+    def register(cls, latin_name, synonyms, reference_assemblies, is_plant=False):
         """
         Create a Species object from the given arguments and enter into
         all the dicts used to look the species up by its fields.
@@ -39,6 +39,7 @@ class Species(Serializable):
             latin_name=latin_name,
             synonyms=synonyms,
             reference_assemblies=reference_assemblies,
+            is_plant=is_plant,
         )
         cls._latin_names_to_species[species.latin_name] = species
         for synonym in synonyms:
@@ -80,7 +81,7 @@ class Species(Serializable):
                 for release in range(release_range[0], release_range[1] + 1):
                     yield species_name, release
 
-    def __init__(self, latin_name, synonyms=[], reference_assemblies={}):
+    def __init__(self, latin_name, synonyms=[], reference_assemblies={}, is_plant=False):
         """
         Parameters
         ----------
@@ -96,6 +97,7 @@ class Species(Serializable):
         self.synonyms = synonyms
         self.reference_assemblies = reference_assemblies
         self._release_to_genome = {}
+        self.is_plant = is_plant
         for genome_name, (start, end) in self.reference_assemblies.items():
             for i in range(start, end + 1):
                 if i in self._release_to_genome:
@@ -350,3 +352,21 @@ yeast = Species.register(
         "R64-1-1": (76, MAX_ENSEMBL_RELEASE),
     },
 )
+
+arabidopsis_thaliana = Species.register(
+    latin_name="arabidopsis_thaliana",
+    synonyms=["arabidopsis"],
+    reference_assemblies={
+        "TAIR10": (40, MAX_PLANTS_ENSEMBL_RELEASE),
+    },
+    is_plant=True
+)
+
+rice = Species.register(
+    latin_name="oryza_sativa",
+    synonyms=["rice"],
+    reference_assemblies={
+        "IRGSP-1.0": (40, MAX_PLANTS_ENSEMBL_RELEASE),
+    },
+    is_plant=True
+)
\ No newline at end of file


=====================================
pyensembl/version.py
=====================================
@@ -1,4 +1,4 @@
-__version__ = "2.3.11"
+__version__ = "2.3.12"
 
 def print_version():
     print(f"v{__version__}")



View it on GitLab: https://salsa.debian.org/med-team/pyensembl/-/commit/95672b5d5f5b68c4616b94c9abd262a4eaa226ac

-- 
View it on GitLab: https://salsa.debian.org/med-team/pyensembl/-/commit/95672b5d5f5b68c4616b94c9abd262a4eaa226ac
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240424/606c5c1b/attachment-0001.htm>


More information about the debian-med-commit mailing list