[med-svn] [Git][med-team/mirtop][master] 3 commits: New upstream version 0.4.30
Alexandre Detiste (@detiste-guest)
gitlab at salsa.debian.org
Sun Apr 13 13:21:36 BST 2025
Alexandre Detiste pushed to branch master at Debian Med / mirtop
Commits:
ce9d65d9 by Alexandre Detiste at 2025-04-13T14:15:13+02:00
New upstream version 0.4.30
- - - - -
fc5af9b1 by Alexandre Detiste at 2025-04-13T14:15:15+02:00
Update upstream source from tag 'upstream/0.4.30'
Update to upstream version '0.4.30'
with Debian dir b2c452ab31657bb16930ae6f5bd66a1ac7be834e
- - - - -
0250cfd2 by Alexandre Detiste at 2025-04-13T14:21:22+02:00
release
- - - - -
18 changed files:
- HISTORY.md
- + data/examples/annotate/mirtop.db
- debian/changelog
- debian/control
- + debian/patches/remove_six.patch
- debian/patches/series
- environment.yml
- mirtop/command_line.py
- mirtop/exporter/isomirs.py
- mirtop/exporter/vcf.py
- mirtop/gff/__init__.py
- mirtop/gff/convert.py
- mirtop/gff/read.py
- mirtop/importer/prost.py
- mirtop/mirna/mapper.py
- requirements.txt
- setup.py
- test/test_functions.py
Changes:
=====================================
HISTORY.md
=====================================
@@ -1,3 +1,7 @@
+0.4.29
+
+* Update handling of --database argument [#90](https://github.com/miRTop/mirtop/issues/90)
+
0.4.28
* fix random order in Variant field [#84](https://github.com/miRTop/mirtop/issues/83)
=====================================
data/examples/annotate/mirtop.db
=====================================
Binary files /dev/null and b/data/examples/annotate/mirtop.db differ
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+mirtop (0.4.30-1) unstable; urgency=medium
+
+ * New upstream version 0.4.30
+ * Patch-out usage of python3-six
+
+ -- Alexandre Detiste <tchet at debian.org> Sun, 13 Apr 2025 14:17:43 +0200
+
mirtop (0.4.28-3) unstable; urgency=medium
* Team upload.
=====================================
debian/control
=====================================
@@ -9,7 +9,6 @@ Build-Depends: debhelper-compat (= 13),
dh-sequence-python3,
python3-all,
python3-setuptools,
- python3-six,
python3-sphinx,
python3-recommonmark,
python3-pysam,
@@ -28,7 +27,6 @@ Section: python
Depends: ${python3:Depends},
${misc:Depends},
${sphinxdoc:Depends},
- python3-six,
python3-pysam,
python3-pybedtools,
python3-pandas,
=====================================
debian/patches/remove_six.patch
=====================================
@@ -0,0 +1,71 @@
+--- a/mirtop/exporter/vcf.py
++++ b/mirtop/exporter/vcf.py
+@@ -4,7 +4,6 @@
+ import sys
+ import os.path as op
+
+-import six
+
+ from mirtop.mirna.fasta import read_precursor
+ from mirtop.mirna.mapper import read_gtf_to_precursor, read_gtf_to_mirna
+@@ -133,14 +132,12 @@
+ """
+ #Check if the input files exist:
+ try:
+- gff3_file = open(mirgff3, "r", encoding="utf-8") if six.PY3 else open(mirgff3, "r")
++ gff3_file = open(mirgff3, "r", encoding="utf-8")
+ except IOError:
+ print ("Can't read the file", end=mirgff3)
+ sys.exit()
+ with gff3_file:
+ data = gff3_file.read()
+- if six.PY2:
+- data = data.decode("utf-8-sig").encode("utf-8")
+
+ gff3_data = data.split("\n")
+ vcf_file = open(vcffile, "w")
+--- a/mirtop/libs/do.py
++++ b/mirtop/libs/do.py
+@@ -5,7 +5,6 @@
+ import subprocess
+ import logging
+
+-import six
+
+
+ logger = logging.getLogger("run")
+@@ -16,7 +15,7 @@
+ """Run the provided command, logging details and checking for errors.
+ """
+ try:
+- logger.debug(" ".join(str(x) for x in cmd) if not isinstance(cmd, six.string_types) else cmd)
++ logger.debug(" ".join(str(x) for x in cmd) if not isinstance(cmd, str) else cmd)
+ _do_run(cmd, checks, log_stdout)
+ except:
+ if log_error:
+@@ -44,7 +43,7 @@
+ Piped commands set pipefail and require use of bash to help with debugging
+ intermediate errors.
+ """
+- if isinstance(cmd, six.string_types):
++ if isinstance(cmd, str):
+ # check for standard or anonymous named pipes
+ if cmd.find(" | ") > 0 or cmd.find(">(") or cmd.find("<("):
+ return "set -o pipefail; " + cmd, True, find_bash()
+@@ -74,7 +73,7 @@
+ for line in s.stdout:
+ debug_stdout.append(line)
+ if exitcode is not None and exitcode != 0:
+- error_msg = " ".join(cmd) if not isinstance(cmd, six.string_types) else cmd
++ error_msg = " ".join(cmd) if not isinstance(cmd, str) else cmd
+ error_msg += "\n"
+ error_msg += "".join(debug_stdout)
+ s.communicate()
+--- a/requirements.txt
++++ b/requirements.txt
+@@ -3,5 +3,4 @@
+ pandas
+ biopython
+ pyyaml
+-six
+ pytest
=====================================
debian/patches/series
=====================================
@@ -3,3 +3,4 @@ fix-circular-import.patch
#pytest.patch
python3.12-syntax.patch
remove_undeclared_pkg_resources.patch
+remove_six.patch
=====================================
environment.yml
=====================================
@@ -4,4 +4,5 @@ dependencies:
- bioconda::pybedtools
- bioconda::samtools=1.21
- conda-forge::pandas
+ - conda-forge::sqlite
- conda-forge::biopython=1.83
\ No newline at end of file
=====================================
mirtop/command_line.py
=====================================
@@ -14,6 +14,7 @@ from mirtop.gff import validator
from mirtop.libs import spikeins
from mirtop.gff import update
from mirtop.sql import sql
+from mirtop.mirna import mapper
import mirtop.libs.logger as mylog
import time
@@ -25,6 +26,9 @@ def main(**kwargs):
kwargs['args'].print_debug)
logger = mylog.getLogger(__name__)
start = time.time()
+ if not hasattr(kwargs["args"], "database"):
+ if ("sql" not in kwargs and "stats" not in kwargs and "update" not in kwargs and "validate" not in kwargs):
+ kwargs["args"].database = mapper.guess_database(kwargs["args"])
if "gff" in kwargs:
logger.info("Run annotation")
=====================================
mirtop/exporter/isomirs.py
=====================================
@@ -38,7 +38,7 @@ def convert(args):
def _convert_file(gff, args):
sep = "\t"
precursors = fasta.read_precursor(args.hairpin, args.sps)
- matures = mapper.read_gtf_to_precursor(args.gtf)
+ matures = mapper.read_gtf_to_precursor(args.gtf, args.database)
variant_header = sep.join(['mism', 'add', 't5', 't3'])
gff_file = open(gff, 'r')
=====================================
mirtop/exporter/vcf.py
=====================================
@@ -25,7 +25,7 @@ def convert(args):
for fn in args.files:
out_file = op.join(args.out, "%s.vcf" % op.splitext(op.basename(fn))[0])
logger.info("Reading %s" % fn)
- create_vcf(fn, args.hairpin, args.gtf, out_file)
+ create_vcf(fn, args.hairpin, args.gtf, out_file, args.database)
logger.info("VCF generated %s" % out_file)
@@ -121,7 +121,7 @@ def cigar_2_key(cigar, readseq, refseq, pos, var5p, var3p, parent_ini_pos, paren
return(key_pos, key_var, ref, alt)
-def create_vcf(mirgff3, precursor, gtf, vcffile):
+def create_vcf(mirgff3, precursor, gtf, vcffile, database):
"""
Args:
'mirgff3(str)': File with mirGFF3 format that will be converted
@@ -178,7 +178,7 @@ def create_vcf(mirgff3, precursor, gtf, vcffile):
n_noSNP = 0
no_var = 0
hairpins = read_precursor(precursor)
- gff3 = read_gtf_to_precursor(gtf)
+ gff3 = read_gtf_to_precursor(gtf, database)
gtf_dic = read_gtf_to_mirna(gtf)
for line in range(0, len(gff3_data)):
if not gff3_data[line]:
=====================================
mirtop/gff/__init__.py
=====================================
@@ -21,11 +21,14 @@ def reader(args):
read.reader(args)
return None
samples = []
- database = mapper.guess_database(args)
+ if args.database is None:
+ database = mapper.guess_database(args)
+ else:
+ database = args.database
args.database = database
precursors = fasta.read_precursor(args.hairpin, args.sps)
args.precursors = precursors
- matures = mapper.read_gtf_to_precursor(args.gtf)
+ matures = mapper.read_gtf_to_precursor(args.gtf, database)
args.matures = matures
# TODO check numbers of miRNA and precursors read
# TODO print message if numbers mismatch
@@ -75,13 +78,14 @@ def reader(args):
def _write(lines, header, fn, args = None):
out_handle = open(fn, 'w')
print(header, file=out_handle)
- mapper = read_gtf_to_mirna(args.gtf)
+ database = mapper.guess_database(args)
+ dbmapper = read_gtf_to_mirna(args.gtf, database)
for m in lines:
for s in sorted(lines[m].keys()):
for hit in lines[m][s]:
# TODO: convert to genomic if args.out_genomic
if args and args.out_genomic:
- lifted = body.lift_to_genome(hit[4], mapper)
+ lifted = body.lift_to_genome(hit[4], dbmapper)
print(lifted, file=out_handle)
else:
print(hit[4], file=out_handle)
=====================================
mirtop/gff/convert.py
=====================================
@@ -30,7 +30,7 @@ def convert_gff_counts(args):
'iso_add3p', 'iso_snp']
if args.add_extra:
precursors = fasta.read_precursor(args.hairpin, args.sps)
- matures = mapper.read_gtf_to_precursor(args.gtf)
+ matures = mapper.read_gtf_to_precursor(args.gtf, args.database)
variant_header = variant_header + ['iso_5p_nt', 'iso_3p_nt', 'iso_add3p_nt', 'iso_snp_nt']
logger.info("INFO Reading GFF file %s", args.gff)
=====================================
mirtop/gff/read.py
=====================================
@@ -20,7 +20,7 @@ def reader(args):
args.database = database
precursors = fasta.read_precursor(args.hairpin, args.sps)
args.precursors = precursors
- matures = mapper.read_gtf_to_precursor(args.gtf)
+ matures = mapper.read_gtf_to_precursor(args.gtf, args.database)
args.matures = matures
# TODO check numbers of miRNA and precursors read
# TODO print message if numbers mismatch
=====================================
mirtop/importer/prost.py
=====================================
@@ -41,7 +41,7 @@ def read_file(fn, hairpins, database, mirna_gtf):
reads = defaultdict(hits)
sample = os.path.splitext(os.path.basename(fn))[0]
genomics = mapper.read_gtf_to_mirna(mirna_gtf)
- matures = mapper.read_gtf_to_precursor(mirna_gtf)
+ matures = mapper.read_gtf_to_precursor(mirna_gtf, database)
non_mirna = 0
non_chromosome_mirna = 0
outside_mirna = 0
=====================================
mirtop/mirna/mapper.py
=====================================
@@ -1,6 +1,7 @@
"""Read database information"""
from collections import defaultdict
+import re
import mirtop.libs.logger as mylog
@@ -20,10 +21,14 @@ def guess_database(args):
TODO: this needs to be generic to other databases.
"""
+ if not hasattr(args, "database"):
+ args.database = None
return _guess_database_file(args.gtf, args.database)
def _guess_database_file(gff, database=None):
+ if database:
+ return database
with open(gff) as in_handle:
for line in in_handle:
if not line.startswith("#"):
@@ -54,7 +59,7 @@ def get_primary_transcript(database):
raise ValueError("Only miRBase is supported for this action.")
-def read_gtf_to_mirna(gtf):
+def read_gtf_to_mirna(gtf, database=None):
"""
Load GTF file with precursor positions on genome.
@@ -68,9 +73,11 @@ def read_gtf_to_mirna(gtf):
"""
if not gtf:
return gtf
- if _guess_database_file(gtf).find("miRBase") > -1:
+ if not database:
+ database = _guess_database_file(gtf)
+ if database.find("miRBase") > -1:
mapped = read_gtf_to_precursor_mirbase(gtf, format="genomic")
- elif _guess_database_file(gtf).find("MirGeneDB") > -1:
+ elif database.find("MirGeneDB") > -1:
mapped = read_gtf_to_precursor_mirgenedb(gtf, format="genomic")
else:
logger.info("Database different than miRBase or MirGeneDB")
@@ -143,7 +150,7 @@ def read_gtf_chr2mirna2(gtf): # to remove
return db_mir
-def read_gtf_to_precursor(gtf):
+def read_gtf_to_precursor(gtf, database=None):
"""
Load GTF file with precursor positions on genome
Return dict with key being precursor name and
@@ -161,15 +168,28 @@ def read_gtf_to_precursor(gtf):
"""
if not gtf:
return gtf
- if _guess_database_file(gtf).find("miRBase") > -1:
+ if not database:
+ database = _guess_database_file(gtf)
+ if database.find("miRBase") > -1:
mapped = read_gtf_to_precursor_mirbase(gtf)
- elif _guess_database_file(gtf).find("MirGeneDB") > -1:
+ elif database.find("MirGeneDB") > -1:
mapped = read_gtf_to_precursor_mirgenedb(gtf)
else:
logger.info("Database different than miRBase or MirGeneDB")
logger.info("If you get an error when loading,")
logger.info("report it to https://github.com/miRTop/mirtop/issues")
- mapped = read_gtf_to_precursor_mirbase(gtf)
+ try:
+ mapped = read_gtf_to_precursor_mirbase(gtf)
+ return mapped
+ except Exception as e:
+ print(f"Failed to parse with Mirbase: {e}")
+ try:
+ mapped = read_gtf_to_precursor_mirgenedb(gtf)
+ return mapped
+ except Exception as e:
+ print(f"Failed to parse with Mirgenedb: {e}")
+ raise ValueError(f"There is no parser available for the database that you used: {database}")
+
return mapped
@@ -271,6 +291,7 @@ def read_gtf_to_precursor_mirgenedb(gtf, format="precursor"):
db = defaultdict(list)
db_mir = defaultdict(list)
id_dict = dict()
+ pattern = r'(_3p\*?|_5p\*?)'
with open(gtf) as in_handle:
for line in in_handle:
if line.startswith("#"):
@@ -286,7 +307,10 @@ def read_gtf_to_precursor_mirgenedb(gtf, format="precursor"):
if cols[2] == "miRNA":
idname_mi = [n.split("=")[1] for n in cols[-1].split(";")
if n.startswith("ID")][0]
- parent = "%s_pre" % idname_mi.split("_")[0]
+ # parent = "%s_pre" % idname_mi.replace("_3p.*", "").replace("_5p.*", "")
+ parent = re.sub(pattern, '', idname_mi)
+ parent = "%s_pre" % parent
+ # import pdb; pdb.set_trace()
db_mir[(parent, name)] = [chrom,
int(start), int(end),
strand, parent]
=====================================
requirements.txt
=====================================
@@ -3,5 +3,5 @@ pybedtools
pandas
biopython
pyyaml
-pybedtools
six
+pytest
=====================================
setup.py
=====================================
@@ -3,7 +3,7 @@
import os
from setuptools import setup, find_packages
-version = '0.4.28'
+version = '0.4.30'
url = 'http://github.com/mirtop/mirtop'
=====================================
test/test_functions.py
=====================================
@@ -45,7 +45,7 @@ def annotate(fn, read_file, load=False, create=True, keep_name=False,
args.keep_name = keep_name
from mirtop.mirna import fasta, mapper
precursors = fasta.read_precursor(args.hairpin, args.sps)
- matures = mapper.read_gtf_to_precursor(args.gtf)
+ matures = mapper.read_gtf_to_precursor(args.gtf, args.database)
args.precursors = precursors
args.matures = matures
args.database = mapper.guess_database(args)
@@ -81,7 +81,7 @@ class FunctionsTest(unittest.TestCase):
from mirtop.libs import logger
logger.initialize_logger("test_read_files", True, True)
map_mir = mapper.read_gtf_to_precursor(
- "data/examples/annotate/hsa.gff3")
+ "data/examples/annotate/hsa.gff3", None)
print(map_mir)
if map_mir["hsa-let-7a-1"]["hsa-let-7a-5p"][0] != 5:
raise ValueError("GFF is not loaded correctly.")
@@ -102,7 +102,7 @@ class FunctionsTest(unittest.TestCase):
from mirtop.libs import logger
logger.initialize_logger("test_read_files", True, True)
map_mir = mapper.read_gtf_to_precursor(
- "data/db/mirgenedb/hsa.gff")
+ "data/db/mirgenedb/hsa.gff", None)
print(map_mir)
##@attr(read_mir2chr=True)
@@ -259,7 +259,7 @@ class FunctionsTest(unittest.TestCase):
precursors = fasta.read_precursor("data/examples/annotate/hairpin.fa",
"hsa")
matures = mapper.read_gtf_to_precursor(
- "data/examples/annotate/hsa.gff3")
+ "data/examples/annotate/hsa.gff3", None)
res = get_mature_sequence("GAAAATTTTTTTTTTTAAAAG", [5, 15])
if res != "AAAATTTTTTTTTTTAAAA":
raise ValueError("Results for GAAAATTTTTTTTTTTAAAAG was %s" % res)
@@ -447,6 +447,7 @@ class FunctionsTest(unittest.TestCase):
args.gff = 'data/examples/synthetic/let7a-5p.gff'
args.out = 'data/examples/synthetic'
args.add_extra = True
+ args.database = None
convert_gff_counts(args)
os.remove(os.path.join(args.out, "let7a-5p.tsv"))
View it on GitLab: https://salsa.debian.org/med-team/mirtop/-/compare/7989144aee785099bb4e8e9df9f9d85396a6c5c8...0250cfd2e922eeebc2f3a42746da981d735c9d90
--
View it on GitLab: https://salsa.debian.org/med-team/mirtop/-/compare/7989144aee785099bb4e8e9df9f9d85396a6c5c8...0250cfd2e922eeebc2f3a42746da981d735c9d90
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20250413/a444aac2/attachment-0001.htm>
More information about the debian-med-commit
mailing list