[med-svn] [Git][med-team/mirtop][upstream] New upstream version 0.4.30
Alexandre Detiste (@detiste-guest)
gitlab at salsa.debian.org
Sun Apr 13 13:21:46 BST 2025
Alexandre Detiste pushed to branch upstream at Debian Med / mirtop
Commits:
ce9d65d9 by Alexandre Detiste at 2025-04-13T14:15:13+02:00
New upstream version 0.4.30
- - - - -
14 changed files:
- HISTORY.md
- + data/examples/annotate/mirtop.db
- environment.yml
- mirtop/command_line.py
- mirtop/exporter/isomirs.py
- mirtop/exporter/vcf.py
- mirtop/gff/__init__.py
- mirtop/gff/convert.py
- mirtop/gff/read.py
- mirtop/importer/prost.py
- mirtop/mirna/mapper.py
- requirements.txt
- setup.py
- test/test_functions.py
Changes:
=====================================
HISTORY.md
=====================================
@@ -1,3 +1,7 @@
+0.4.29
+
+* Update handling of --database argument [#90](https://github.com/miRTop/mirtop/issues/90)
+
0.4.28
* fix random order in Variant field [#84](https://github.com/miRTop/mirtop/issues/83)
=====================================
data/examples/annotate/mirtop.db
=====================================
Binary files /dev/null and b/data/examples/annotate/mirtop.db differ
=====================================
environment.yml
=====================================
@@ -4,4 +4,5 @@ dependencies:
- bioconda::pybedtools
- bioconda::samtools=1.21
- conda-forge::pandas
+ - conda-forge::sqlite
- conda-forge::biopython=1.83
\ No newline at end of file
=====================================
mirtop/command_line.py
=====================================
@@ -14,6 +14,7 @@ from mirtop.gff import validator
from mirtop.libs import spikeins
from mirtop.gff import update
from mirtop.sql import sql
+from mirtop.mirna import mapper
import mirtop.libs.logger as mylog
import time
@@ -25,6 +26,9 @@ def main(**kwargs):
kwargs['args'].print_debug)
logger = mylog.getLogger(__name__)
start = time.time()
+ if not hasattr(kwargs["args"], "database"):
+ if ("sql" not in kwargs and "stats" not in kwargs and "update" not in kwargs and "validate" not in kwargs):
+ kwargs["args"].database = mapper.guess_database(kwargs["args"])
if "gff" in kwargs:
logger.info("Run annotation")
=====================================
mirtop/exporter/isomirs.py
=====================================
@@ -38,7 +38,7 @@ def convert(args):
def _convert_file(gff, args):
sep = "\t"
precursors = fasta.read_precursor(args.hairpin, args.sps)
- matures = mapper.read_gtf_to_precursor(args.gtf)
+ matures = mapper.read_gtf_to_precursor(args.gtf, args.database)
variant_header = sep.join(['mism', 'add', 't5', 't3'])
gff_file = open(gff, 'r')
=====================================
mirtop/exporter/vcf.py
=====================================
@@ -25,7 +25,7 @@ def convert(args):
for fn in args.files:
out_file = op.join(args.out, "%s.vcf" % op.splitext(op.basename(fn))[0])
logger.info("Reading %s" % fn)
- create_vcf(fn, args.hairpin, args.gtf, out_file)
+ create_vcf(fn, args.hairpin, args.gtf, out_file, args.database)
logger.info("VCF generated %s" % out_file)
@@ -121,7 +121,7 @@ def cigar_2_key(cigar, readseq, refseq, pos, var5p, var3p, parent_ini_pos, paren
return(key_pos, key_var, ref, alt)
-def create_vcf(mirgff3, precursor, gtf, vcffile):
+def create_vcf(mirgff3, precursor, gtf, vcffile, database):
"""
Args:
'mirgff3(str)': File with mirGFF3 format that will be converted
@@ -178,7 +178,7 @@ def create_vcf(mirgff3, precursor, gtf, vcffile):
n_noSNP = 0
no_var = 0
hairpins = read_precursor(precursor)
- gff3 = read_gtf_to_precursor(gtf)
+ gff3 = read_gtf_to_precursor(gtf, database)
gtf_dic = read_gtf_to_mirna(gtf)
for line in range(0, len(gff3_data)):
if not gff3_data[line]:
=====================================
mirtop/gff/__init__.py
=====================================
@@ -21,11 +21,14 @@ def reader(args):
read.reader(args)
return None
samples = []
- database = mapper.guess_database(args)
+ if args.database is None:
+ database = mapper.guess_database(args)
+ else:
+ database = args.database
args.database = database
precursors = fasta.read_precursor(args.hairpin, args.sps)
args.precursors = precursors
- matures = mapper.read_gtf_to_precursor(args.gtf)
+ matures = mapper.read_gtf_to_precursor(args.gtf, database)
args.matures = matures
# TODO check numbers of miRNA and precursors read
# TODO print message if numbers mismatch
@@ -75,13 +78,14 @@ def reader(args):
def _write(lines, header, fn, args = None):
out_handle = open(fn, 'w')
print(header, file=out_handle)
- mapper = read_gtf_to_mirna(args.gtf)
+ database = mapper.guess_database(args)
+ dbmapper = read_gtf_to_mirna(args.gtf, database)
for m in lines:
for s in sorted(lines[m].keys()):
for hit in lines[m][s]:
# TODO: convert to genomic if args.out_genomic
if args and args.out_genomic:
- lifted = body.lift_to_genome(hit[4], mapper)
+ lifted = body.lift_to_genome(hit[4], dbmapper)
print(lifted, file=out_handle)
else:
print(hit[4], file=out_handle)
=====================================
mirtop/gff/convert.py
=====================================
@@ -30,7 +30,7 @@ def convert_gff_counts(args):
'iso_add3p', 'iso_snp']
if args.add_extra:
precursors = fasta.read_precursor(args.hairpin, args.sps)
- matures = mapper.read_gtf_to_precursor(args.gtf)
+ matures = mapper.read_gtf_to_precursor(args.gtf, args.database)
variant_header = variant_header + ['iso_5p_nt', 'iso_3p_nt', 'iso_add3p_nt', 'iso_snp_nt']
logger.info("INFO Reading GFF file %s", args.gff)
=====================================
mirtop/gff/read.py
=====================================
@@ -20,7 +20,7 @@ def reader(args):
args.database = database
precursors = fasta.read_precursor(args.hairpin, args.sps)
args.precursors = precursors
- matures = mapper.read_gtf_to_precursor(args.gtf)
+ matures = mapper.read_gtf_to_precursor(args.gtf, args.database)
args.matures = matures
# TODO check numbers of miRNA and precursors read
# TODO print message if numbers mismatch
=====================================
mirtop/importer/prost.py
=====================================
@@ -41,7 +41,7 @@ def read_file(fn, hairpins, database, mirna_gtf):
reads = defaultdict(hits)
sample = os.path.splitext(os.path.basename(fn))[0]
genomics = mapper.read_gtf_to_mirna(mirna_gtf)
- matures = mapper.read_gtf_to_precursor(mirna_gtf)
+ matures = mapper.read_gtf_to_precursor(mirna_gtf, database)
non_mirna = 0
non_chromosome_mirna = 0
outside_mirna = 0
=====================================
mirtop/mirna/mapper.py
=====================================
@@ -1,6 +1,7 @@
"""Read database information"""
from collections import defaultdict
+import re
import mirtop.libs.logger as mylog
@@ -20,10 +21,14 @@ def guess_database(args):
TODO: this needs to be generic to other databases.
"""
+ if not hasattr(args, "database"):
+ args.database = None
return _guess_database_file(args.gtf, args.database)
def _guess_database_file(gff, database=None):
+ if database:
+ return database
with open(gff) as in_handle:
for line in in_handle:
if not line.startswith("#"):
@@ -54,7 +59,7 @@ def get_primary_transcript(database):
raise ValueError("Only miRBase is supported for this action.")
-def read_gtf_to_mirna(gtf):
+def read_gtf_to_mirna(gtf, database=None):
"""
Load GTF file with precursor positions on genome.
@@ -68,9 +73,11 @@ def read_gtf_to_mirna(gtf):
"""
if not gtf:
return gtf
- if _guess_database_file(gtf).find("miRBase") > -1:
+ if not database:
+ database = _guess_database_file(gtf)
+ if database.find("miRBase") > -1:
mapped = read_gtf_to_precursor_mirbase(gtf, format="genomic")
- elif _guess_database_file(gtf).find("MirGeneDB") > -1:
+ elif database.find("MirGeneDB") > -1:
mapped = read_gtf_to_precursor_mirgenedb(gtf, format="genomic")
else:
logger.info("Database different than miRBase or MirGeneDB")
@@ -143,7 +150,7 @@ def read_gtf_chr2mirna2(gtf): # to remove
return db_mir
-def read_gtf_to_precursor(gtf):
+def read_gtf_to_precursor(gtf, database=None):
"""
Load GTF file with precursor positions on genome
Return dict with key being precursor name and
@@ -161,15 +168,28 @@ def read_gtf_to_precursor(gtf):
"""
if not gtf:
return gtf
- if _guess_database_file(gtf).find("miRBase") > -1:
+ if not database:
+ database = _guess_database_file(gtf)
+ if database.find("miRBase") > -1:
mapped = read_gtf_to_precursor_mirbase(gtf)
- elif _guess_database_file(gtf).find("MirGeneDB") > -1:
+ elif database.find("MirGeneDB") > -1:
mapped = read_gtf_to_precursor_mirgenedb(gtf)
else:
logger.info("Database different than miRBase or MirGeneDB")
logger.info("If you get an error when loading,")
logger.info("report it to https://github.com/miRTop/mirtop/issues")
- mapped = read_gtf_to_precursor_mirbase(gtf)
+ try:
+ mapped = read_gtf_to_precursor_mirbase(gtf)
+ return mapped
+ except Exception as e:
+ print(f"Failed to parse with Mirbase: {e}")
+ try:
+ mapped = read_gtf_to_precursor_mirgenedb(gtf)
+ return mapped
+ except Exception as e:
+ print(f"Failed to parse with Mirgenedb: {e}")
+ raise ValueError(f"There is no parser available for the database that you used: {database}")
+
return mapped
@@ -271,6 +291,7 @@ def read_gtf_to_precursor_mirgenedb(gtf, format="precursor"):
db = defaultdict(list)
db_mir = defaultdict(list)
id_dict = dict()
+ pattern = r'(_3p\*?|_5p\*?)'
with open(gtf) as in_handle:
for line in in_handle:
if line.startswith("#"):
@@ -286,7 +307,10 @@ def read_gtf_to_precursor_mirgenedb(gtf, format="precursor"):
if cols[2] == "miRNA":
idname_mi = [n.split("=")[1] for n in cols[-1].split(";")
if n.startswith("ID")][0]
- parent = "%s_pre" % idname_mi.split("_")[0]
+ # parent = "%s_pre" % idname_mi.replace("_3p.*", "").replace("_5p.*", "")
+ parent = re.sub(pattern, '', idname_mi)
+ parent = "%s_pre" % parent
+ # import pdb; pdb.set_trace()
db_mir[(parent, name)] = [chrom,
int(start), int(end),
strand, parent]
=====================================
requirements.txt
=====================================
@@ -3,5 +3,5 @@ pybedtools
pandas
biopython
pyyaml
-pybedtools
six
+pytest
=====================================
setup.py
=====================================
@@ -3,7 +3,7 @@
import os
from setuptools import setup, find_packages
-version = '0.4.28'
+version = '0.4.30'
url = 'http://github.com/mirtop/mirtop'
=====================================
test/test_functions.py
=====================================
@@ -45,7 +45,7 @@ def annotate(fn, read_file, load=False, create=True, keep_name=False,
args.keep_name = keep_name
from mirtop.mirna import fasta, mapper
precursors = fasta.read_precursor(args.hairpin, args.sps)
- matures = mapper.read_gtf_to_precursor(args.gtf)
+ matures = mapper.read_gtf_to_precursor(args.gtf, args.database)
args.precursors = precursors
args.matures = matures
args.database = mapper.guess_database(args)
@@ -81,7 +81,7 @@ class FunctionsTest(unittest.TestCase):
from mirtop.libs import logger
logger.initialize_logger("test_read_files", True, True)
map_mir = mapper.read_gtf_to_precursor(
- "data/examples/annotate/hsa.gff3")
+ "data/examples/annotate/hsa.gff3", None)
print(map_mir)
if map_mir["hsa-let-7a-1"]["hsa-let-7a-5p"][0] != 5:
raise ValueError("GFF is not loaded correctly.")
@@ -102,7 +102,7 @@ class FunctionsTest(unittest.TestCase):
from mirtop.libs import logger
logger.initialize_logger("test_read_files", True, True)
map_mir = mapper.read_gtf_to_precursor(
- "data/db/mirgenedb/hsa.gff")
+ "data/db/mirgenedb/hsa.gff", None)
print(map_mir)
##@attr(read_mir2chr=True)
@@ -259,7 +259,7 @@ class FunctionsTest(unittest.TestCase):
precursors = fasta.read_precursor("data/examples/annotate/hairpin.fa",
"hsa")
matures = mapper.read_gtf_to_precursor(
- "data/examples/annotate/hsa.gff3")
+ "data/examples/annotate/hsa.gff3", None)
res = get_mature_sequence("GAAAATTTTTTTTTTTAAAAG", [5, 15])
if res != "AAAATTTTTTTTTTTAAAA":
raise ValueError("Results for GAAAATTTTTTTTTTTAAAAG was %s" % res)
@@ -447,6 +447,7 @@ class FunctionsTest(unittest.TestCase):
args.gff = 'data/examples/synthetic/let7a-5p.gff'
args.out = 'data/examples/synthetic'
args.add_extra = True
+ args.database = None
convert_gff_counts(args)
os.remove(os.path.join(args.out, "let7a-5p.tsv"))
View it on GitLab: https://salsa.debian.org/med-team/mirtop/-/commit/ce9d65d9cbc16b7ee0f52502ddac0150c7c61706
--
View it on GitLab: https://salsa.debian.org/med-team/mirtop/-/commit/ce9d65d9cbc16b7ee0f52502ddac0150c7c61706
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20250413/6817c2e0/attachment-0001.htm>
More information about the debian-med-commit
mailing list