[med-svn] [Git][med-team/metaphlan][master] 2 commits: patch added Replace distutils.version with packaging.version for Python 3.12+ compatibility
harish chavre (@Harish1)
gitlab at salsa.debian.org
Sat May 3 10:06:29 BST 2025
harish chavre pushed to branch master at Debian Med / metaphlan
Commits:
03c0cb6f by Harish chavre at 2025-05-03T09:01:51+00:00
patch added Replace distutils.version with packaging.version for Python 3.12+ compatibility
- - - - -
c9b7630a by Harish chavre at 2025-05-03T09:04:36+00:00
Add autopkgtest for metaphlan
- - - - -
23 changed files:
- + .pc/.quilt_patches
- + .pc/.quilt_series
- + .pc/.version
- + .pc/applied-patches
- + .pc/use-packaging-version.patch/.timestamp
- + .pc/use-packaging-version.patch/metaphlan/metaphlan.py
- debian/control
- + debian/patches/series
- + debian/patches/use-packaging-version.patch
- + debian/tests/.pc/.quilt_patches
- + debian/tests/.pc/.quilt_series
- + debian/tests/.pc/.version
- + debian/tests/control
- + debian/tests/data/Test.fastq
- + debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.1.bt2l
- + debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.2.bt2l
- + debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.3.bt2l
- + debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.4.bt2l
- + debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.pkl
- + debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.rev.1.bt2l
- + debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.rev.2.bt2l
- + debian/tests/run-unit-test
- metaphlan/metaphlan.py
Changes:
=====================================
.pc/.quilt_patches
=====================================
@@ -0,0 +1 @@
+debian/patches
=====================================
.pc/.quilt_series
=====================================
@@ -0,0 +1 @@
+series
=====================================
.pc/.version
=====================================
@@ -0,0 +1 @@
+2
=====================================
.pc/applied-patches
=====================================
@@ -0,0 +1 @@
+use-packaging-version.patch
=====================================
.pc/use-packaging-version.patch/.timestamp
=====================================
=====================================
.pc/use-packaging-version.patch/metaphlan/metaphlan.py
=====================================
@@ -0,0 +1,1304 @@
+#!/usr/bin/env python
+__author__ = ('Aitor Blanco-Miguez (aitor.blancomiguez at unitn.it), '
+ 'Francesco Beghini (francesco.beghini at unitn.it), '
+ 'Nicola Segata (nicola.segata at unitn.it), '
+ 'Duy Tin Truong, '
+ 'Francesco Asnicar (f.asnicar at unitn.it)')
+__version__ = '4.0.4'
+__date__ = '17 Jan 2023'
+
+import sys
+try:
+ from metaphlan import mybytes, plain_read_and_split, plain_read_and_split_line, read_and_split, read_and_split_line, check_and_install_database, remove_prefix
+except ImportError:
+ sys.exit("CRITICAL ERROR: Unable to find the MetaPhlAn python package. Please check your install.")
+
+if float(sys.version_info[0]) < 3.0:
+ sys.stderr.write("MetaPhlAn requires Python 3, your current Python version is {}.{}.{}\n"
+ .format(sys.version_info[0], sys.version_info[1], sys.version_info[2]))
+ sys.exit(1)
+import os
+import stat
+import re
+import time
+import random
+from collections import defaultdict as defdict
+from distutils.version import LooseVersion
+from glob import glob
+from subprocess import DEVNULL
+import argparse as ap
+import bz2
+import pickle
+import subprocess as subp
+import tempfile as tf
+
+try:
+ import numpy as np
+except ImportError:
+ sys.stderr.write("Error! numpy python library not detected!!\n")
+ sys.exit(1)
+
+#**********************************************************************************************
+# Modification of Code : *
+# Modified the code so instead of using the current clade IDs, which are numbers, we will *
+# use the clade_names *
+# Users reported the biom output is invalid and also the IDs were changing from run to *
+# run. *
+# George Weingart 05/22/2017 george.weingart at mail.com *
+#**********************************************************************************************
+
+#*************************************************************
+#* Imports related to biom file generation *
+#*************************************************************
+try:
+ import biom
+ import biom.table
+except ImportError:
+ sys.stderr.write("Warning! Biom python library not detected!"
+ "\n Exporting to biom format will not work!\n")
+import json
+
+# get the directory that contains this script
+metaphlan_script_install_folder = os.path.dirname(os.path.abspath(__file__))
+# get the default database folder
+DEFAULT_DB_FOLDER = os.path.join(metaphlan_script_install_folder, "metaphlan_databases")
+DEFAULT_DB_FOLDER= os.environ.get('METAPHLAN_DB_DIR', DEFAULT_DB_FOLDER)
+#Wether to execute a SGB-based analysis
+SGB_ANALYSIS = True
+INDEX = 'latest'
+tax_units = "kpcofgst"
+
+def read_params(args):
+ p = ap.ArgumentParser( description=
+ "DESCRIPTION\n"
+ " MetaPhlAn version "+__version__+" ("+__date__+"): \n"
+ " METAgenomic PHyLogenetic ANalysis for metagenomic taxonomic profiling.\n\n"
+ "AUTHORS: "+__author__+"\n\n"
+ "COMMON COMMANDS\n\n"
+ " We assume here that MetaPhlAn is installed using the several options available (pip, conda, PyPi)\n"
+ " Also BowTie2 should be in the system path with execution and read permissions, and Perl should be installed)\n\n"
+
+ "\n========== MetaPhlAn clade-abundance estimation ================= \n\n"
+ "The basic usage of MetaPhlAn consists in the identification of the clades (from phyla to species ) \n"
+ "present in the metagenome obtained from a microbiome sample and their \n"
+ "relative abundance. This correspond to the default analysis type (-t rel_ab).\n\n"
+
+ "* Profiling a metagenome from raw reads:\n"
+ "$ metaphlan metagenome.fastq --input_type fastq -o profiled_metagenome.txt\n\n"
+
+ "* You can take advantage of multiple CPUs and save the intermediate BowTie2 output for re-running\n"
+ " MetaPhlAn extremely quickly:\n"
+ "$ metaphlan metagenome.fastq --bowtie2out metagenome.bowtie2.bz2 --nproc 5 --input_type fastq -o profiled_metagenome.txt\n\n"
+
+ "* If you already mapped your metagenome against the marker DB (using a previous MetaPhlAn run), you\n"
+ " can obtain the results in few seconds by using the previously saved --bowtie2out file and \n"
+ " specifying the input (--input_type bowtie2out):\n"
+ "$ metaphlan metagenome.bowtie2.bz2 --nproc 5 --input_type bowtie2out -o profiled_metagenome.txt\n\n"
+
+ "* bowtie2out files generated with MetaPhlAn versions below 3 are not compatibile.\n"
+ " Starting from MetaPhlAn 3.0, the BowTie2 ouput now includes the size of the profiled metagenome and the average read length.\n"
+ " If you want to re-run MetaPhlAn using these file you should provide the metagenome size via --nreads:\n"
+ "$ metaphlan metagenome.bowtie2.bz2 --nproc 5 --input_type bowtie2out --nreads 520000 -o profiled_metagenome.txt\n\n"
+
+ "* You can also provide an externally BowTie2-mapped SAM if you specify this format with \n"
+ " --input_type. Two steps: first apply BowTie2 and then feed MetaPhlAn with the obtained sam:\n"
+ "$ bowtie2 --sam-no-hd --sam-no-sq --no-unal --very-sensitive -S metagenome.sam -x ${mpa_dir}/metaphlan_databases/mpa_v30_CHOCOPhlAn_201901 -U metagenome.fastq\n"
+ "$ metaphlan metagenome.sam --input_type sam -o profiled_metagenome.txt\n\n"
+
+ "* We can also natively handle paired-end metagenomes, and, more generally, metagenomes stored in \n"
+ " multiple files (but you need to specify the --bowtie2out parameter):\n"
+ "$ metaphlan metagenome_1.fastq,metagenome_2.fastq --bowtie2out metagenome.bowtie2.bz2 --nproc 5 --input_type fastq\n\n"
+ "\n------------------------------------------------------------------- \n \n\n"
+
+ "\n========== Marker level analysis ============================ \n\n"
+ "MetaPhlAn introduces the capability of characterizing organisms at the strain level using non\n"
+ "aggregated marker information. Such capability comes with several slightly different flavours and \n"
+ "are a way to perform strain tracking and comparison across multiple samples.\n"
+ "Usually, MetaPhlAn is first ran with the default -t to profile the species present in\n"
+ "the community, and then a strain-level profiling can be performed to zoom-in into specific species\n"
+ "of interest. This operation can be performed quickly as it exploits the --bowtie2out intermediate \n"
+ "file saved during the execution of the default analysis type.\n\n"
+
+ "* The following command will output the abundance of each marker with a RPK (reads per kilo-base) \n"
+ " higher 0.0. (we are assuming that metagenome_outfmt.bz2 has been generated before as \n"
+ " shown above).\n"
+ "$ metaphlan -t marker_ab_table metagenome_outfmt.bz2 --input_type bowtie2out -o marker_abundance_table.txt\n"
+ " The obtained RPK can be optionally normalized by the total number of reads in the metagenome \n"
+ " to guarantee fair comparisons of abundances across samples. The number of reads in the metagenome\n"
+ " needs to be passed with the '--nreads' argument\n\n"
+
+ "* The list of markers present in the sample can be obtained with '-t marker_pres_table'\n"
+ "$ metaphlan -t marker_pres_table metagenome_outfmt.bz2 --input_type bowtie2out -o marker_abundance_table.txt\n"
+ " The --pres_th argument (default 1.0) set the minimum RPK value to consider a marker present\n\n"
+
+ "* The list '-t clade_profiles' analysis type reports the same information of '-t marker_ab_table'\n"
+ " but the markers are reported on a clade-by-clade basis.\n"
+ "$ metaphlan -t clade_profiles metagenome_outfmt.bz2 --input_type bowtie2out -o marker_abundance_table.txt\n\n"
+
+ "* Finally, to obtain all markers present for a specific clade and all its subclades, the \n"
+ " '-t clade_specific_strain_tracker' should be used. For example, the following command\n"
+ " is reporting the presence/absence of the markers for the B. fragilis species and its strains\n"
+ " the optional argument --min_ab specifies the minimum clade abundance for reporting the markers\n\n"
+ "$ metaphlan -t clade_specific_strain_tracker --clade s__Bacteroides_fragilis metagenome_outfmt.bz2 --input_type bowtie2out -o marker_abundance_table.txt\n"
+
+ "\n------------------------------------------------------------------- \n\n"
+ "",
+ formatter_class=ap.RawTextHelpFormatter,
+ add_help=False )
+ arg = p.add_argument
+
+ arg( 'inp', metavar='INPUT_FILE', type=str, nargs='?', default=None, help=
+ "the input file can be:\n"
+ "* a fastq file containing metagenomic reads\n"
+ "OR\n"
+ "* a BowTie2 produced SAM file. \n"
+ "OR\n"
+ "* an intermediary mapping file of the metagenome generated by a previous MetaPhlAn run \n"
+ "If the input file is missing, the script assumes that the input is provided using the standard \n"
+ "input, or named pipes.\n"
+ "IMPORTANT: the type of input needs to be specified with --input_type" )
+
+ arg( 'output', metavar='OUTPUT_FILE', type=str, nargs='?', default=None,
+ help= "the tab-separated output file of the predicted taxon relative abundances \n"
+ "[stdout if not present]")
+
+
+ g = p.add_argument_group('Required arguments')
+ arg = g.add_argument
+ input_type_choices = ['fastq','fasta','bowtie2out','sam']
+ arg( '--input_type', choices=input_type_choices, required = '--install' not in args, help =
+ "set whether the input is the FASTA file of metagenomic reads or \n"
+ "the SAM file of the mapping of the reads against the MetaPhlAn db.\n"
+ )
+
+ g = p.add_argument_group('Mapping arguments')
+ arg = g.add_argument
+ arg('--force', action='store_true', help="Force profiling of the input file by removing the bowtie2out file")
+ arg('--bowtie2db', metavar="METAPHLAN_BOWTIE2_DB", type=str, default=DEFAULT_DB_FOLDER,
+ help=("Folder containing the MetaPhlAn database. You can specify the location by exporting the DEFAULT_DB_FOLDER variable in the shell."
+ "[default "+DEFAULT_DB_FOLDER+"]\n"))
+
+ arg('-x', '--index', type=str, default=INDEX,
+ help=("Specify the id of the database version to use. "
+ "If \"latest\", MetaPhlAn will get the latest version.\n"
+ "If an index name is provided, MetaPhlAn will try to use it, if available, and skip the online check.\n"
+ "If the database files are not found on the local MetaPhlAn installation they\n"
+ "will be automatically downloaded [default "+INDEX+"]\n"))
+
+ bt2ps = ['sensitive', 'very-sensitive', 'sensitive-local', 'very-sensitive-local']
+ arg('--bt2_ps', metavar="BowTie2 presets", default='very-sensitive',
+ choices=bt2ps, help="Presets options for BowTie2 (applied only when a "
+ "FASTA file is provided)\n"
+ "The choices enabled in MetaPhlAn are:\n"
+ " * sensitive\n"
+ " * very-sensitive\n"
+ " * sensitive-local\n"
+ " * very-sensitive-local\n"
+ "[default very-sensitive]\n")
+ arg('--bowtie2_exe', type=str, default=None,
+ help='Full path and name of the BowTie2 executable. This option allows'
+ 'MetaPhlAn to reach the executable even when it is not in the '
+ 'system PATH or the system PATH is unreachable')
+ arg('--bowtie2_build', type=str, default='bowtie2-build',
+ help="Full path to the bowtie2-build command to use, deafult assumes "
+ "that 'bowtie2-build is present in the system path")
+ arg('--bowtie2out', metavar="FILE_NAME", type=str, default=None,
+ help="The file for saving the output of BowTie2")
+ arg('--min_mapq_val', type=int, default=5,
+ help="Minimum mapping quality value (MAPQ) [default 5]")
+ arg('--no_map', action='store_true',
+ help="Avoid storing the --bowtie2out map file")
+ arg('--tmp_dir', metavar="", default=None, type=str,
+ help="The folder used to store temporary files [default is the OS "
+ "dependent tmp dir]")
+
+ g = p.add_argument_group('Post-mapping arguments')
+ arg = g.add_argument
+ stat_choices = ['avg_g','avg_l','tavg_g','tavg_l','wavg_g','wavg_l','med']
+ arg( '--tax_lev', metavar='TAXONOMIC_LEVEL', type=str,
+ choices='a'+tax_units, default='a', help =
+ "The taxonomic level for the relative abundance output:\n"
+ "'a' : all taxonomic levels\n"
+ "'k' : kingdoms\n"
+ "'p' : phyla only\n"
+ "'c' : classes only\n"
+ "'o' : orders only\n"
+ "'f' : families only\n"
+ "'g' : genera only\n"
+ "'s' : species only\n"
+ "'t' : SGBs only\n"
+ "[default 'a']" )
+ arg( '--min_cu_len', metavar="", default="2000", type=int, help =
+ "minimum total nucleotide length for the markers in a clade for\n"
+ "estimating the abundance without considering sub-clade abundances\n"
+ "[default 2000]\n" )
+ arg( '--min_alignment_len', metavar="", default=None, type=int, help =
+ "The sam records for aligned reads with the longest subalignment\n"
+ "length smaller than this threshold will be discarded.\n"
+ "[default None]\n" )
+ arg( '--add_viruses', action='store_true', help=
+ "Together with --mpa3, allow the profiling of viral organisms" )
+ arg( '--ignore_eukaryotes', action='store_true', help=
+ "Do not profile eukaryotic organisms" )
+ arg( '--ignore_bacteria', action='store_true', help=
+ "Do not profile bacterial organisms" )
+ arg( '--ignore_archaea', action='store_true', help=
+ "Do not profile archeal organisms" )
+ arg( '--ignore_ksgbs', action='store_true', help=
+ "Do not profile known SGBs (together with --sgb option)" )
+ arg( '--ignore_usgbs', action='store_true', help=
+ "Do not profile unknown SGBs (together with --sgb option)" )
+ arg( '--stat_q', metavar="", type = float, default=0.2, help =
+ "Quantile value for the robust average\n"
+ "[default 0.2]" )
+ arg( '--perc_nonzero', metavar="", type = float, default=0.33, help =
+ "Percentage of markers with a non zero relative abundance for misidentify a species\n"
+ "[default 0.33]" )
+ arg( '--ignore_markers', type=str, default = None, help =
+ "File containing a list of markers to ignore. \n")
+ arg( '--avoid_disqm', action="store_true", help =
+ "Deactivate the procedure of disambiguating the quasi-markers based on the \n"
+ "marker abundance pattern found in the sample. It is generally recommended \n"
+ "to keep the disambiguation procedure in order to minimize false positives\n")
+ arg( '--stat', metavar="", choices=stat_choices, default="tavg_g", type=str, help =
+ "Statistical approach for converting marker abundances into clade abundances\n"
+ "'avg_g' : clade global (i.e. normalizing all markers together) average\n"
+ "'avg_l' : average of length-normalized marker counts\n"
+ "'tavg_g' : truncated clade global average at --stat_q quantile\n"
+ "'tavg_l' : truncated average of length-normalized marker counts (at --stat_q)\n"
+ "'wavg_g' : winsorized clade global average (at --stat_q)\n"
+ "'wavg_l' : winsorized average of length-normalized marker counts (at --stat_q)\n"
+ "'med' : median of length-normalized marker counts\n"
+ "[default tavg_g]" )
+
+ arg = p.add_argument
+
+ g = p.add_argument_group('Additional analysis types and arguments')
+ arg = g.add_argument
+ analysis_types = ['rel_ab', 'rel_ab_w_read_stats', 'reads_map', 'clade_profiles', 'marker_ab_table', 'marker_counts', 'marker_pres_table', 'clade_specific_strain_tracker']
+ arg( '-t', metavar='ANALYSIS TYPE', type=str, choices = analysis_types,
+ default='rel_ab', help =
+ "Type of analysis to perform: \n"
+ " * rel_ab: profiling a metagenomes in terms of relative abundances\n"
+ " * rel_ab_w_read_stats: profiling a metagenomes in terms of relative abundances and estimate the number of reads coming from each clade.\n"
+ " * reads_map: mapping from reads to clades (only reads hitting a marker)\n"
+ " * clade_profiles: normalized marker counts for clades with at least a non-null marker\n"
+ " * marker_ab_table: normalized marker counts (only when > 0.0 and normalized by metagenome size if --nreads is specified)\n"
+ " * marker_counts: non-normalized marker counts [use with extreme caution]\n"
+ " * marker_pres_table: list of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th\n"
+ " * clade_specific_strain_tracker: list of markers present for a specific clade, specified with --clade, and all its subclades\n"
+ "[default 'rel_ab']" )
+ arg( '--nreads', metavar="NUMBER_OF_READS", type=int, default = None, help =
+ "The total number of reads in the original metagenome. It is used only when \n"
+ "-t marker_table is specified for normalizing the length-normalized counts \n"
+ "with the metagenome size as well. No normalization applied if --nreads is not \n"
+ "specified" )
+ arg( '--pres_th', metavar="PRESENCE_THRESHOLD", type=int, default = 1.0, help =
+ 'Threshold for calling a marker present by the -t marker_pres_table option' )
+ arg( '--clade', metavar="", default=None, type=str, help =
+ "The clade for clade_specific_strain_tracker analysis\n" )
+ arg( '--min_ab', metavar="", default=0.1, type=float, help =
+ "The minimum percentage abundance for the clade in the clade_specific_strain_tracker analysis\n" )
+
+ g = p.add_argument_group('Output arguments')
+ arg = g.add_argument
+ arg( '-o', '--output_file', metavar="output file", type=str, default=None, help =
+ "The output file (if not specified as positional argument)\n")
+ arg('--sample_id_key', metavar="name", type=str, default="SampleID",
+ help =("Specify the sample ID key for this analysis."
+ " Defaults to 'SampleID'."))
+ arg('--use_group_representative', action='store_true', help =("Use a species as representative for species groups."))
+ arg('--sample_id', metavar="value", type=str,
+ default="Metaphlan_Analysis",
+ help =("Specify the sample ID for this analysis."
+ " Defaults to 'Metaphlan_Analysis'."))
+ arg( '-s', '--samout', metavar="sam_output_file",
+ type=str, default=None, help="The sam output file\n")
+
+ arg( '--legacy-output', action='store_true', help="Old MetaPhlAn2 two columns output\n")
+ arg( '--CAMI_format_output', action='store_true', help="Report the profiling using the CAMI output format\n")
+ arg( '--unclassified_estimation', action='store_true', help="Scale relative abundances to the number of reads mapping to identified clades in order to estimate unclassified taxa\n")
+ arg( '--mpa3', action='store_true', help="Perform the analysis using the MetaPhlAn 3 algorithm\n")
+
+ #*************************************************************
+ #* Parameters related to biom file generation *
+ #*************************************************************
+ arg( '--biom', '--biom_output_file', metavar="biom_output", type=str, default=None, help =
+ "If requesting biom file output: The name of the output file in biom format \n")
+
+ arg( '--mdelim', '--metadata_delimiter_char', metavar="mdelim", type=str, default="|", help =
+ "Delimiter for bug metadata: - defaults to pipe. e.g. the pipe in k__Bacteria|p__Proteobacteria \n")
+ #*************************************************************
+ #* End parameters related to biom file generation *
+ #*************************************************************
+
+ g = p.add_argument_group('Other arguments')
+ arg = g.add_argument
+ arg('--nproc', metavar="N", type=int, default=4,
+ help="The number of CPUs to use for parallelizing the mapping [default 4]")
+ arg('--subsampling', type=int, default=None,
+ help="Specify the number of reads to be considered from the input metagenomes [default None]")
+ arg('--subsampling_seed', type=str, default='1992',
+ help="Random seed to use in the selection of the subsampled reads. Choose \"random\r for a random behaviour")
+ arg('--install', action='store_true',
+ help="Only checks if the MetaPhlAn DB is installed and installs it if not. All other parameters are ignored.")
+ arg('--offline', action='store_true',
+ help="If used, MetaPhlAn will not check for new database updates.")
+ arg('--force_download', action='store_true',
+ help="Force the re-download of the latest MetaPhlAn database.")
+ arg('--read_min_len', type=int, default=70,
+ help="Specify the minimum length of the reads to be considered when parsing the input file with "
+ "'read_fastx.py' script, default value is 70")
+ arg('-v', '--version', action='version',
+ version="MetaPhlAn version {} ({})".format(__version__, __date__),
+ help="Prints the current MetaPhlAn version and exit")
+ arg("-h", "--help", action="help", help="show this help message and exit")
+
+ return vars(p.parse_args())
+
+def set_mapping_arguments(index, bowtie2_db):
+ mpa_pkl = 'mpa_pkl'
+ bowtie2db = 'bowtie2db'
+ bt2_ext = 'bt2l' if SGB_ANALYSIS else 'bt2'
+
+ if os.path.isfile(os.path.join(bowtie2_db, "{}.pkl".format(index))):
+ mpa_pkl = os.path.join(bowtie2_db, "{}.pkl".format(index))
+
+ if glob(os.path.join(bowtie2_db, "{}*.{}".format(index, bt2_ext))):
+ bowtie2db = os.path.join(bowtie2_db, "{}".format(index))
+
+ return (mpa_pkl, bowtie2db)
+
+def run_bowtie2(fna_in, outfmt6_out, bowtie2_db, preset, nproc, min_mapq_val, file_format="fasta",
+ exe=None, samout=None, min_alignment_len=None, read_min_len=0):
+ # checking read_fastx.py
+ read_fastx = "read_fastx.py"
+
+ try:
+ subp.check_call([read_fastx, "-h"], stdout=DEVNULL, stderr=DEVNULL)
+ except Exception as e:
+ try:
+ read_fastx = os.path.join(os.path.join(os.path.dirname(__file__), "utils"), read_fastx)
+ subp.check_call([read_fastx, "-h"], stdout=DEVNULL, stderr=DEVNULL)
+ except Exception as e:
+ sys.stderr.write("OSError: fatal error running '{}'. Is it in the system path?\n".format(read_fastx))
+ sys.exit(1)
+
+ # checking bowtie2
+ try:
+ subp.check_call([exe if exe else 'bowtie2', "-h"], stdout=DEVNULL)
+ except Exception as e:
+ sys.stderr.write('OSError: "{}"\nFatal error running BowTie2. Is BowTie2 in the system path?\n'.format(e))
+ sys.exit(1)
+
+ try:
+ if fna_in:
+ readin = subp.Popen([read_fastx, '-l', str(read_min_len), fna_in], stdout=subp.PIPE, stderr=subp.PIPE)
+
+ else:
+ readin = subp.Popen([read_fastx, '-l', str(read_min_len)], stdin=sys.stdin, stdout=subp.PIPE, stderr=subp.PIPE)
+
+ bowtie2_cmd = [exe if exe else 'bowtie2', "--seed", "1992", "--quiet", "--no-unal", "--{}".format(preset),
+ "-S", "-", "-x", bowtie2_db]
+
+ if int(nproc) > 1:
+ bowtie2_cmd += ["-p", str(nproc)]
+
+ bowtie2_cmd += ["-U", "-"] # if not stat.S_ISFIFO(os.stat(fna_in).st_mode) else []
+
+ if file_format == "fasta":
+ bowtie2_cmd += ["-f"]
+
+ p = subp.Popen(bowtie2_cmd, stdout=subp.PIPE, stdin=readin.stdout)
+ readin.stdout.close()
+ lmybytes, outf = (mybytes, bz2.BZ2File(outfmt6_out, "w")) if outfmt6_out.endswith(".bz2") else (str, open(outfmt6_out, "w"))
+ try:
+ if samout:
+ if samout[-4:] == '.bz2':
+ sam_file = bz2.BZ2File(samout, 'w')
+ else:
+ sam_file = open(samout, 'wb')
+ except IOError as e:
+ sys.stderr.write('IOError: "{}"\nUnable to open sam output file.\n'.format(e))
+ sys.exit(1)
+ for line in p.stdout:
+ if samout:
+ sam_file.write(line)
+
+ o = read_and_split_line(line)
+ if not o[0].startswith('@'):
+ if not o[2].endswith('*'):
+ if (hex(int(o[1]) & 0x100) == '0x0'): #no secondary
+ if mapq_filter(o[2], int(o[4]), min_mapq_val) : # filter low mapq reads
+ if ((min_alignment_len is None) or
+ (max([int(x.strip('M')) for x in re.findall(r'(\d*M)', o[5]) if x]) >= min_alignment_len)):
+ outf.write(lmybytes("\t".join([ o[0], o[2].split('/')[0] ]) + "\n"))
+
+ if samout:
+ sam_file.close()
+
+ p.communicate()
+ read_fastx_stderr = readin.stderr.readlines()
+ nreads = None
+ avg_read_length = None
+ try:
+ nreads, avg_read_length = list(map(float, read_fastx_stderr[0].decode().split()))
+ if not nreads:
+ sys.stderr.write('Fatal error running MetaPhlAn. Total metagenome size was not estimated.\nPlease check your input files.\n')
+ sys.exit(1)
+ if not avg_read_length:
+ sys.stderr.write('Fatal error running MetaPhlAn. The average read length was not estimated.\nPlease check your input files.\n')
+ sys.exit(1)
+ outf.write(lmybytes('#nreads\t{}\n'.format(int(nreads))))
+ outf.write(lmybytes('#avg_read_length\t{}'.format(avg_read_length)))
+ outf.close()
+ except ValueError:
+ sys.stderr.write(b''.join(read_fastx_stderr).decode())
+ outf.close()
+ os.unlink(outfmt6_out)
+ sys.exit(1)
+
+ except OSError as e:
+ sys.stderr.write('OSError: "{}"\nFatal error running BowTie2.\n'.format(e))
+ sys.exit(1)
+ except IOError as e:
+ sys.stderr.write('IOError: "{}"\nFatal error running BowTie2.\n'.format(e))
+ sys.exit(1)
+
+ if p.returncode == 13:
+ sys.stderr.write("Permission Denied Error: fatal error running BowTie2."
+ "Is the BowTie2 file in the path with execution and read permissions?\n")
+ sys.exit(1)
+ elif p.returncode != 0:
+ sys.stderr.write("Error while running bowtie2.\n")
+ sys.exit(1)
+
+class TaxClade:
+ min_cu_len = -1
+ markers2lens = None
+ stat = None
+ perc_nonzero = None
+ quantile = None
+ avoid_disqm = False
+ avg_read_length = 1
+
+ def __init__( self, name, tax_id, uncl = False):
+ self.children, self.markers2nreads = {}, {}
+ self.name, self.father = name, None
+ self.uncl, self.subcl_uncl = uncl, False
+ self.abundance, self.uncl_abundance = None, 0
+ self.nreads, self.uncl_nreads = 0, 0
+ self.tax_id = tax_id
+
+ def add_child( self, name, tax_id ):
+ new_clade = TaxClade( name, tax_id )
+ self.children[name] = new_clade
+ new_clade.father = self
+ return new_clade
+
+
+ def get_terminals( self ):
+ terms = []
+ if not self.children:
+ return [self]
+ for c in self.children.values():
+ terms += c.get_terminals()
+ return terms
+
+ def get_full_taxids( self ):
+ fullname = ['']
+ if self.tax_id:
+ fullname = [self.tax_id]
+ cl = self.father
+ while cl:
+ fullname = [cl.tax_id] + fullname
+ cl = cl.father
+ return "|".join(fullname[1:])
+
+ def get_full_name( self ):
+ fullname = [self.name]
+ cl = self.father
+ while cl:
+ fullname = [cl.name] + fullname
+ cl = cl.father
+ return "|".join(fullname[1:])
+
+ def get_normalized_counts( self ):
+ return [(m,float(n)*1000.0/(np.absolute(self.markers2lens[m] - self.avg_read_length) +1) )
+ for m,n in self.markers2nreads.items()]
+
+ def compute_mapped_reads( self ):
+ tax_level = 't__' if SGB_ANALYSIS else 's__'
+ if self.nreads != 0 or self.name.startswith(tax_level):
+ return self.nreads
+ for c in self.children.values():
+ self.nreads += c.compute_mapped_reads()
+ return self.nreads
+
+ def compute_abundance( self ):
+ if self.abundance is not None: return self.abundance
+
+ sum_ab = sum([c.compute_abundance() for c in self.children.values()])
+
+ # rat_nreads = sorted([(self.markers2lens[marker], n_reads)
+ # for marker,n_reads in self.markers2nreads.items()],
+ # key = lambda x: x[1])
+
+ rat_nreads, removed = [], []
+ for marker, n_reads in sorted(self.markers2nreads.items(),key=lambda x:x[0]):
+ misidentified = False
+
+ if not self.avoid_disqm:
+ for ext in self.markers2exts[marker]:
+ ext_clade = self.taxa2clades[ext]
+ m2nr = ext_clade.markers2nreads
+
+ tocladetmp = ext_clade
+ while len(tocladetmp.children) == 1:
+ tocladetmp = list(tocladetmp.children.values())[0]
+ m2nr = tocladetmp.markers2nreads
+
+ nonzeros = sum([v>0 for v in m2nr.values()])
+ if len(m2nr):
+ if float(nonzeros) / len(m2nr) > self.perc_nonzero:
+ misidentified = True
+ removed.append( (self.markers2lens[marker],n_reads) )
+ break
+ if not misidentified:
+ rat_nreads.append( (self.markers2lens[marker],n_reads) )
+
+ if not self.avoid_disqm and len(removed):
+ n_rat_nreads = float(len(rat_nreads))
+ n_removed = float(len(removed))
+ n_tot = n_rat_nreads + n_removed
+ n_ripr = 10
+
+ if len(self.get_terminals()) < 2:
+ n_ripr = 0
+
+ if "k__Viruses" in self.get_full_name():
+ n_ripr = 0
+
+ if n_rat_nreads < n_ripr and n_tot > n_rat_nreads:
+ rat_nreads += removed[:n_ripr-int(n_rat_nreads)]
+
+
+ rat_nreads = sorted(rat_nreads, key = lambda x: x[1])
+
+ rat_v,nreads_v = zip(*rat_nreads) if rat_nreads else ([],[])
+ rat, nrawreads, loc_ab = float(sum(rat_v)) or -1.0, sum(nreads_v), 0.0
+ quant = int(self.quantile*len(rat_nreads))
+ ql,qr,qn = (quant,-quant,quant) if quant else (None,None,0)
+
+ if not SGB_ANALYSIS and self.name[0] == 't' and (len(self.father.children) > 1 or "_sp" in self.father.name or "k__Viruses" in self.get_full_name()):
+ non_zeros = float(len([n for r,n in rat_nreads if n > 0]))
+ nreads = float(len(rat_nreads))
+ if nreads == 0.0 or non_zeros / nreads < 0.7:
+ self.abundance = 0.0
+ return 0.0
+
+ if rat < 0.0:
+ pass
+ elif self.stat == 'avg_g' or (not qn and self.stat in ['wavg_g','tavg_g']):
+ loc_ab = nrawreads / rat if rat >= 0 else 0.0
+ elif self.stat == 'avg_l' or (not qn and self.stat in ['wavg_l','tavg_l']):
+ loc_ab = np.mean([float(n)/(np.absolute(r - self.avg_read_length) + 1) for r,n in rat_nreads])
+ elif self.stat == 'tavg_g':
+ wnreads = sorted([(float(n)/(np.absolute(r-self.avg_read_length)+1),(np.absolute(r - self.avg_read_length)+1) ,n) for r,n in rat_nreads], key=lambda x:x[0])
+ den,num = zip(*[v[1:] for v in wnreads[ql:qr]])
+ loc_ab = float(sum(num))/float(sum(den)) if any(den) else 0.0
+ elif self.stat == 'tavg_l':
+ loc_ab = np.mean(sorted([float(n)/(np.absolute(r - self.avg_read_length) + 1) for r,n in rat_nreads])[ql:qr])
+ elif self.stat == 'wavg_g':
+ vmin, vmax = nreads_v[ql], nreads_v[qr]
+ wnreads = [vmin]*qn+list(nreads_v[ql:qr])+[vmax]*qn
+ loc_ab = float(sum(wnreads)) / rat
+ elif self.stat == 'wavg_l':
+ wnreads = sorted([float(n)/(np.absolute(r - self.avg_read_length) + 1) for r,n in rat_nreads])
+ vmin, vmax = wnreads[ql], wnreads[qr]
+ wnreads = [vmin]*qn+list(wnreads[ql:qr])+[vmax]*qn
+ loc_ab = np.mean(wnreads)
+ elif self.stat == 'med':
+ loc_ab = np.median(sorted([float(n)/(np.absolute(r - self.avg_read_length) +1) for r,n in rat_nreads])[ql:qr])
+
+ self.abundance = loc_ab
+ if rat < self.min_cu_len and self.children:
+ self.abundance = sum_ab
+ elif loc_ab < sum_ab:
+ self.abundance = sum_ab
+
+ if self.abundance > sum_ab and self.children: # *1.1??
+ self.uncl_abundance = self.abundance - sum_ab
+ self.subcl_uncl = not self.children and self.name[0] not in tax_units[-2:]
+
+ return self.abundance
+
+ def get_all_abundances( self ):
+ ret = [(self.name, self.tax_id, self.abundance)]
+ if self.uncl_abundance > 0.0:
+ lchild = list(self.children.values())[0].name[:3]
+ ret += [(lchild+self.name[3:]+"_unclassified", "", self.uncl_abundance)]
+ if self.subcl_uncl and self.name[0] != tax_units[-2]:
+ cind = tax_units.index( self.name[0] )
+ ret += [( tax_units[cind+1]+self.name[1:]+"_unclassified","",
+ self.abundance)]
+ for c in self.children.values():
+ ret += c.get_all_abundances()
+ return ret
+
+class TaxTree:
+ def __init__( self, mpa, markers_to_ignore = None ): #, min_cu_len ):
+ self.root = TaxClade( "root", 0)
+ self.all_clades, self.markers2lens, self.markers2clades, self.taxa2clades, self.markers2exts = {}, {}, {}, {}, {}
+ TaxClade.markers2lens = self.markers2lens
+ TaxClade.markers2exts = self.markers2exts
+ TaxClade.taxa2clades = self.taxa2clades
+ self.avg_read_length = 1
+
+ for clade, value in mpa['taxonomy'].items():
+ clade = clade.strip().split("|")
+ if isinstance(value,tuple):
+ taxids, lenc = value
+ taxids = taxids.strip().split("|")
+ if isinstance(value,int):
+ lenc = value
+ taxids = None
+
+ father = self.root
+ for i in range(len(clade)):
+ clade_lev = clade[i]
+ if SGB_ANALYSIS:
+ clade_taxid = taxids[i] if i < 8 and taxids is not None else None
+ else:
+ clade_taxid = taxids[i] if i < 7 and taxids is not None else None
+ if not clade_lev in father.children:
+ father.add_child(clade_lev, tax_id=clade_taxid)
+ self.all_clades[clade_lev] = father.children[clade_lev]
+ if SGB_ANALYSIS: father = father.children[clade_lev]
+ if clade_lev[0] == "t":
+ self.taxa2clades[clade_lev[3:]] = father
+ if not SGB_ANALYSIS: father = father.children[clade_lev]
+ if clade_lev[0] == "t":
+ father.glen = lenc
+
+ def add_lens( node ):
+ if not node.children:
+ return node.glen
+ lens = []
+ for c in node.children.values():
+ lens.append( add_lens( c ) )
+ node.glen = min(np.mean(lens), np.median(lens))
+ return node.glen
+
+ add_lens(self.root)
+
+ # for k,p in mpa_pkl['markers'].items():
+ for k, p in mpa['markers'].items():
+ if k in markers_to_ignore:
+ continue
+ self.markers2lens[k] = p['len']
+ self.markers2clades[k] = p['clade']
+ self.add_reads(k, 0)
+ self.markers2exts[k] = p['ext']
+
+ def set_min_cu_len( self, min_cu_len ):
+ TaxClade.min_cu_len = min_cu_len
+
+ def set_stat( self, stat, quantile, perc_nonzero, avg_read_length, avoid_disqm = False):
+ TaxClade.stat = stat
+ TaxClade.perc_nonzero = perc_nonzero
+ TaxClade.quantile = quantile
+ TaxClade.avoid_disqm = avoid_disqm
+ TaxClade.avg_read_length = avg_read_length
+
+ def add_reads( self, marker, n,
+ add_viruses = False,
+ ignore_eukaryotes = False,
+ ignore_bacteria = False, ignore_archaea = False,
+ ignore_ksgbs = False, ignore_usgbs = False ):
+ clade = self.markers2clades[marker]
+ cl = self.all_clades[clade]
+ if ignore_bacteria or ignore_archaea or ignore_eukaryotes:
+ cn = cl.get_full_name()
+ if ignore_archaea and cn.startswith("k__Archaea"):
+ return (None, None)
+ if ignore_bacteria and cn.startswith("k__Bacteria"):
+ return (None, None)
+ if ignore_eukaryotes and cn.startswith("k__Eukaryota"):
+ return (None, None)
+ if not SGB_ANALYSIS and not add_viruses:
+ cn = cl.get_full_name()
+ if not add_viruses and cn.startswith("k__Vir"):
+ return (None, None)
+ if SGB_ANALYSIS and (ignore_ksgbs or ignore_usgbs):
+ cn = cl.get_full_name()
+ if ignore_ksgbs and not '_SGB' in cn.split('|')[-2]:
+ return (None, None)
+ if ignore_usgbs and '_SGB' in cn.split('|')[-2]:
+ return (None, None)
+ # while len(cl.children) == 1:
+ # cl = list(cl.children.values())[0]
+ cl.markers2nreads[marker] = n
+ return (cl.get_full_name(), cl.get_full_taxids(), )
+
+
+ def markers2counts( self ):
+ m2c = {}
+ for _ ,v in self.all_clades.items():
+ for m,c in v.markers2nreads.items():
+ m2c[m] = c
+ return m2c
+
+ def clade_profiles( self, tax_lev, get_all = False ):
+ cl2pr = {}
+ for k,v in self.all_clades.items():
+ if tax_lev and not k.startswith(tax_lev):
+ continue
+ prof = v.get_normalized_counts()
+ if not get_all and ( len(prof) < 1 or not sum([p[1] for p in prof]) > 0.0 ):
+ continue
+ cl2pr[v.get_full_name()] = prof
+ return cl2pr
+
+ def relative_abundances( self, tax_lev ):
+ clade2abundance_n = dict([(tax_label, clade) for tax_label, clade in self.all_clades.items()
+ if tax_label.startswith("k__") and not clade.uncl])
+
+ clade2abundance, clade2est_nreads, tot_ab, tot_reads = {}, {}, 0.0, 0
+
+ for tax_label, clade in clade2abundance_n.items():
+ tot_ab += clade.compute_abundance()
+
+ for tax_label, clade in clade2abundance_n.items():
+ for clade_label, tax_id, abundance in sorted(clade.get_all_abundances(), key=lambda pars:pars[0]):
+ if SGB_ANALYSIS or clade_label[:3] != 't__':
+ if not tax_lev:
+ if clade_label not in self.all_clades:
+ to = tax_units.index(clade_label[0])
+ t = tax_units[to-1]
+ clade_label = t + clade_label.split("_unclassified")[0][1:]
+ tax_id = self.all_clades[clade_label].get_full_taxids()
+ clade_label = self.all_clades[clade_label].get_full_name()
+ spl = clade_label.split("|")
+ clade_label = "|".join(spl+[tax_units[to]+spl[-1][1:]+"_unclassified"])
+ glen = self.all_clades[spl[-1]].glen
+ else:
+ glen = self.all_clades[clade_label].glen
+ tax_id = self.all_clades[clade_label].get_full_taxids()
+ tax_level = 't__' if SGB_ANALYSIS else 's__'
+ if tax_level in clade_label and abundance > 0:
+ self.all_clades[clade_label].nreads = int(np.floor(abundance*glen))
+
+ clade_label = self.all_clades[clade_label].get_full_name()
+ elif not clade_label.startswith(tax_lev):
+ if clade_label in self.all_clades:
+ glen = self.all_clades[clade_label].glen
+ else:
+ glen = 1.0
+ continue
+ clade2abundance[(clade_label, tax_id)] = abundance
+
+ for tax_label, clade in clade2abundance_n.items():
+ tot_reads += clade.compute_mapped_reads()
+
+ for clade_label, clade in self.all_clades.items():
+ if SGB_ANALYSIS or clade.name[:3] != 't__':
+ nreads = clade.nreads
+ clade_label = clade.get_full_name()
+ tax_id = clade.get_full_taxids()
+ clade2est_nreads[(clade_label, tax_id)] = nreads
+
+ ret_d = dict([( tax, float(abundance) / tot_ab if tot_ab else 0.0) for tax, abundance in clade2abundance.items()])
+
+ ret_r = dict([( tax, (abundance, clade2est_nreads[tax] )) for tax, abundance in clade2abundance.items() if tax in clade2est_nreads])
+
+ if tax_lev:
+ ret_d[("UNCLASSIFIED", '-1')] = 1.0 - sum(ret_d.values())
+ return ret_d, ret_r, tot_reads
+
+def mapq_filter(marker_name, mapq_value, min_mapq_val):
+ if 'GeneID:' in marker_name:
+ return True
+ else:
+ if mapq_value > min_mapq_val:
+ return True
+ return False
+
+
+def separate_reads2markers(reads2markers):
+ if not SGB_ANALYSIS:
+ return reads2markers, {}
+ else:
+ return {r: m for r, m in reads2markers.items() if ('SGB' in m or 'EUK' in m) and not 'VDB' in m}, {r: m for r, m in reads2markers.items() if 'VDB' in m and not ('SGB' in m or 'EUK' in m)}
+
+def map2bbh(mapping_f, min_mapq_val, input_type='bowtie2out', min_alignment_len=None, nreads=None, subsampling=None, subsampling_seed='1992'):
+ if not mapping_f:
+ ras, ras_line, inpf = plain_read_and_split, plain_read_and_split_line, sys.stdin
+ else:
+ if mapping_f.endswith(".bz2"):
+ ras, ras_line, inpf = read_and_split, read_and_split_line, bz2.BZ2File(mapping_f, "r")
+ else:
+ ras, ras_line, inpf = plain_read_and_split, plain_read_and_split_line, open(mapping_f)
+
+ reads2markers = {}
+ n_metagenome_reads = None
+ avg_read_length = 1 #Set to 1 if it is not calculated from read_fastx
+
+ if input_type == 'bowtie2out':
+ for r, c in ras(inpf):
+ if r.startswith('#') and 'nreads' in r:
+ n_metagenome_reads = int(c)
+ if r.startswith('#') and 'avg_read_length' in r:
+ avg_read_length = float(c)
+ else:
+ reads2markers[r] = c
+ elif input_type == 'sam':
+ n_metagenome_reads = nreads
+ for line in inpf:
+ o = ras_line(line)
+ if ((o[0][0] != '@') and #no header
+ (o[2][-1] != '*') and # no unmapped reads
+ (hex(int(o[1]) & 0x100) == '0x0') and #no secondary
+ mapq_filter(o[2], int(o[4]), min_mapq_val) and # filter low mapq reads
+ ( (min_alignment_len is None) or ( max(int(x.strip('M')) for x in re.findall(r'(\d*M)', o[5]) if x) >= min_alignment_len ) )
+ ):
+ reads2markers[o[0]] = o[2].split('/')[0]
+ inpf.close()
+
+ if subsampling != None:
+ if subsampling >= n_metagenome_reads:
+ sys.stderr.write("WARNING: The specified subsampling ({}) is higher than the original number of reads ({}).".format(subsampling, n_metagenome_reads))
+ elif subsampling < 10000:
+ sys.stderr.write("WARNING: The specified subsampling ({}) is below the recommended minimum of 10,000 reads.".format(subsampling))
+ else:
+ reads2markers = dict(sorted(reads2markers.items()))
+ if subsampling_seed.lower() != 'random':
+ random.seed(int(subsampling_seed))
+ reads2filtmarkers = {}
+ sgb_reads2markers, viral_reads2markers = separate_reads2markers(reads2markers)
+ n_sgb_mapped_reads = int((len(sgb_reads2markers) * subsampling) / n_metagenome_reads)
+ reads2filtmarkers = { r:sgb_reads2markers[r] for r in random.sample(list(sgb_reads2markers.keys()), n_sgb_mapped_reads) }
+ if SGB_ANALYSIS:
+ n_viral_mapped_reads = int((len(viral_reads2markers) * subsampling) / n_metagenome_reads)
+ reads2filtmarkers.update({ r:viral_reads2markers[r] for r in random.sample(list(viral_reads2markers.keys()), n_viral_mapped_reads) })
+ reads2markers = reads2filtmarkers
+ sgb_reads2markers.clear()
+ viral_reads2markers.clear()
+ n_metagenome_reads = subsampling
+ elif n_metagenome_reads < 10000:
+ sys.stderr.write("WARNING: The number of reads in the sample ({}) is below the recommended minimum of 10,000 reads.".format(subsampling))
+
+ markers2reads = defdict(set)
+ for r, m in reads2markers.items():
+ markers2reads[m].add(r)
+
+ return (markers2reads, n_metagenome_reads, avg_read_length)
+
+def maybe_generate_biom_file(tree, pars, abundance_predictions):
+ json_key = "MetaPhlAn"
+
+ if not pars['biom']:
+ return None
+ if not abundance_predictions:
+ biom_table = biom.Table([], [], []) # create empty BIOM table
+
+ with open(pars['biom'], 'w') as outfile:
+ biom_table.to_json(json_key, direct_io=outfile)
+
+ return True
+
+ delimiter = "|" if len(pars['mdelim']) > 1 else pars['mdelim']
+
+ def istip(clade_name):
+ end_name = clade_name.split(delimiter)[-1]
+ return end_name.startswith("s__") or end_name.endswith("_unclassified")
+
+ def findclade(clade_name):
+ if clade_name.endswith('_unclassified'):
+ name = clade_name.split(delimiter)[-2]
+ else:
+ name = clade_name.split(delimiter)[-1]
+ return tree.all_clades[name]
+
+ def to_biomformat(clade_name):
+ return {'taxonomy': clade_name.split(delimiter)}
+
+ clades = iter((abundance, findclade(name))
+ for (name, taxid, abundance) in abundance_predictions if istip(name))
+ packed = iter(([abundance], clade.get_full_name(), clade.tax_id)
+ for (abundance, clade) in clades)
+
+ # unpack that tuple here to stay under 80 chars on a line
+ data, clade_names, _ = zip(*packed)
+ # biom likes column vectors, so we give it an array like this:
+ # np.array([a],[b],[c])
+ data = np.array(data)
+ sample_ids = [pars['sample_id']]
+ table_id = 'MetaPhlAn_Analysis'
+
+
+
+
+ #**********************************************************************************************
+ # Modification of Code : *
+ # Modified the code so instead of using the current clade IDs, which are numbers, we will *
+ # use the clade_names *
+ # Users reported the biom output is invalid and also the IDs were changing from run to *
+ # run. *
+ # George Weingart 05/22/2017 george.weingart at mail.com *
+ #**********************************************************************************************
+ if LooseVersion(biom.__version__) < LooseVersion("2.0.0"):
+ biom_table = biom.table.table_factory(
+ data,
+ sample_ids,
+ ######## clade_ids, #Modified by George Weingart 5/22/2017 - We will use instead the clade_names
+ clade_names, #Modified by George Weingart 5/22/2017 - We will use instead the clade_names
+ sample_metadata = None,
+ observation_metadata = list(map(to_biomformat, clade_names)),
+ table_id = table_id,
+ constructor = biom.table.DenseOTUTable
+ )
+ with open(pars['biom'], 'w') as outfile:
+ json.dump( biom_table.getBiomFormatObject(json_key),
+ outfile )
+ else: # Below is the biom2 compatible code
+ biom_table = biom.table.Table(
+ data,
+ #clade_ids, #Modified by George Weingart 5/22/2017 - We will use instead the clade_names
+ clade_names, #Modified by George Weingart 5/22/2017 - We will use instead the clade_names
+ sample_ids,
+ sample_metadata = None,
+ observation_metadata = list(map(to_biomformat, clade_names)),
+ table_id = table_id,
+ input_is_dense = True
+ )
+
+ with open(pars['biom'], 'w') as outfile:
+ biom_table.to_json( json_key,
+ direct_io = outfile )
+
+ return True
+
+def main():
+ ranks2code = { 'k' : 'superkingdom', 'p' : 'phylum', 'c':'class',
+ 'o' : 'order', 'f' : 'family', 'g' : 'genus', 's' : 'species'}
+ pars = read_params(sys.argv)
+
+ #Set SGB- / species- analysis
+ global SGB_ANALYSIS
+ SGB_ANALYSIS = not pars['mpa3']
+
+ ESTIMATE_UNK = pars['unclassified_estimation']
+
+ if not (pars['subsampling_seed'].lower() == 'random' or pars['subsampling_seed'].isdigit()):
+ sys.stderr.write("Error: The --subsampling_seed parameter is not accepted. It should contain an integer number or \"random\". Exiting...\n\n")
+ sys.exit(1)
+
+
+ # check if the database is installed, if not then install
+ pars['index'] = check_and_install_database(pars['index'], pars['bowtie2db'], pars['bowtie2_build'], pars['nproc'], pars['force_download'], pars['offline'])
+
+ if pars['install']:
+ sys.stderr.write('The database is installed\n')
+ return
+
+ # set correct map_pkl and bowtie2db variables
+ pars['mpa_pkl'], pars['bowtie2db'] = set_mapping_arguments(pars['index'], pars['bowtie2db'])
+
+ if (pars['bt2_ps'] in ["sensitive-local", "very-sensitive-local"]) and (pars['min_alignment_len'] is None):
+ pars['min_alignment_len'] = 100
+ sys.stderr.write('Warning! bt2_ps is set to local mode, and min_alignment_len is None, I automatically '
+ 'set min_alignment_len to 100! If you do not like, rerun the command and set '
+ 'min_alignment_len to a specific value.\n')
+
+ # check for the mpa_pkl file
+ if not os.path.isfile(pars['mpa_pkl']):
+ sys.stderr.write("Error: Unable to find the mpa_pkl file at: " + pars['mpa_pkl'] +
+ "Exiting...\n\n")
+ sys.exit(1)
+
+ if pars['ignore_markers']:
+ with open(pars['ignore_markers']) as ignv:
+ ignore_markers = set([l.strip() for l in ignv])
+ else:
+ ignore_markers = set()
+
+ no_map = False
+ if pars['input_type'] == 'fasta' or pars['input_type'] == 'fastq':
+ bow = pars['bowtie2db'] is not None
+
+ if not bow:
+ sys.stderr.write( "No MetaPhlAn BowTie2 database provided\n "
+ "[--bowtie2db and --index options]!\n"
+ "Exiting...\n\n" )
+ sys.exit(1)
+
+ if pars['no_map']:
+ pars['bowtie2out'] = tf.NamedTemporaryFile(dir=pars['tmp_dir']).name
+ no_map = True
+ else:
+ if bow and not pars['bowtie2out']:
+ if pars['inp'] and "," in pars['inp']:
+ sys.stderr.write("Error! --bowtie2out needs to be specified when multiple "
+ "FASTQ or FASTA files (comma separated) are provided\n")
+ sys.exit(1)
+ fname = pars['inp']
+ if fname is None:
+ fname = "stdin_map"
+ elif stat.S_ISFIFO(os.stat(fname).st_mode):
+ fname = "fifo_map"
+ pars['bowtie2out'] = fname + ".bowtie2out.txt"
+
+ if os.path.exists( pars['bowtie2out'] ) and not pars['force']:
+ sys.stderr.write(
+ "BowTie2 output file detected: " + pars['bowtie2out'] + "\n"
+ "Please use it as input or remove it if you want to "
+ "re-perform the BowTie2 run.\n"
+ "Exiting...\n\n" )
+ sys.exit(1)
+ if pars['force']:
+ if os.path.exists(pars['bowtie2out']):
+ os.remove( pars['bowtie2out'] )
+
+ bt2_ext = 'bt2l' if SGB_ANALYSIS else 'bt2'
+ if bow and not all([os.path.exists(".".join([str(pars['bowtie2db']), p]))
+ for p in ["1." + bt2_ext, "2." + bt2_ext, "3." + bt2_ext, "4." + bt2_ext, "rev.1." + bt2_ext, "rev.2." + bt2_ext]]):
+ sys.stderr.write("No MetaPhlAn BowTie2 database found (--index "
+ "option)!\nExpecting location {}\nExiting..."
+ .format(pars['bowtie2db']))
+ sys.exit(1)
+ if bow and not (abs(os.path.getsize(".".join([str(pars['bowtie2db']), "1." + bt2_ext])) - os.path.getsize(".".join([str(pars['bowtie2db']), "rev.1." + bt2_ext]))) <= 1000):
+ sys.stderr.write("Partial MetaPhlAn BowTie2 database found at {}. "
+ "Please remove and rebuild the database.\nExiting..."
+ .format(pars['bowtie2db']))
+ sys.exit(1)
+
+ if bow:
+ run_bowtie2(pars['inp'], pars['bowtie2out'], pars['bowtie2db'],
+ pars['bt2_ps'], pars['nproc'], file_format=pars['input_type'],
+ exe=pars['bowtie2_exe'], samout=pars['samout'],
+ min_alignment_len=pars['min_alignment_len'], read_min_len=pars['read_min_len'], min_mapq_val=pars['min_mapq_val'])
+ pars['input_type'] = 'bowtie2out'
+ pars['inp'] = pars['bowtie2out'] # !!!
+ with bz2.BZ2File( pars['mpa_pkl'], 'r' ) as a:
+ mpa_pkl = pickle.load( a )
+
+ REPORT_MERGED = mpa_pkl.get('merged_taxon',False)
+ tree = TaxTree( mpa_pkl, ignore_markers )
+ tree.set_min_cu_len( pars['min_cu_len'] )
+
+ if pars['input_type'] == 'sam' and not pars['nreads']:
+ sys.stderr.write(
+ "Please provide the size of the metagenome using the "
+ "--nreads parameter when running MetaPhlAn using SAM files as input"
+ "\nExiting...\n\n" )
+ sys.exit(1)
+
+ markers2reads, n_metagenome_reads, avg_read_length = map2bbh(pars['inp'], pars['min_mapq_val'], pars['input_type'], pars['min_alignment_len'], pars['nreads'], pars['subsampling'], pars['subsampling_seed'])
+
+ tree.set_stat( pars['stat'], pars['stat_q'], pars['perc_nonzero'], avg_read_length, pars['avoid_disqm'])
+
+ if no_map:
+ os.remove( pars['inp'] )
+
+ map_out = []
+ for marker,reads in sorted(markers2reads.items(), key=lambda pars: pars[0]):
+ if marker not in tree.markers2lens:
+ continue
+ tax_seq, ids_seq = tree.add_reads( marker, len(reads),
+ add_viruses = pars['add_viruses'],
+ ignore_eukaryotes = pars['ignore_eukaryotes'],
+ ignore_bacteria = pars['ignore_bacteria'],
+ ignore_archaea = pars['ignore_archaea'],
+ ignore_ksgbs = pars['ignore_ksgbs'],
+ ignore_usgbs = pars['ignore_usgbs']
+ )
+ if tax_seq:
+ map_out +=["\t".join([r,tax_seq, ids_seq]) for r in sorted(reads)]
+
+ if pars['output'] is None and pars['output_file'] is not None:
+ pars['output'] = pars['output_file']
+
+ out_stream = open(pars['output'],"w") if pars['output'] else sys.stdout
+ MPA2_OUTPUT = pars['legacy_output']
+ CAMI_OUTPUT = pars['CAMI_format_output']
+
+ with out_stream as outf:
+ if not MPA2_OUTPUT:
+ outf.write('#{}\n'.format(pars['index']))
+ outf.write('#{}\n'.format(' '.join(sys.argv)))
+ outf.write('#{} reads processed\n'.format(n_metagenome_reads))
+
+ if not CAMI_OUTPUT:
+ outf.write('#' + '\t'.join((pars["sample_id_key"], pars["sample_id"])) + '\n')
+
+ if ESTIMATE_UNK:
+ mapped_reads = 0
+ cl2pr = tree.clade_profiles( pars['tax_lev']+"__" if pars['tax_lev'] != 'a' else None )
+ cl2ab, _, _ = tree.relative_abundances( pars['tax_lev']+"__" if pars['tax_lev'] != 'a' else None )
+ confident_taxa = [taxstr for (taxstr, _),relab in cl2ab.items() if relab > 0.0]
+ for c, m in cl2pr.items():
+ if c in confident_taxa:
+ markers_cov = [a / 1000 for _, a in m if a > 0]
+ mapped_reads += np.mean(markers_cov) * tree.all_clades[c.split('|')[-1]].glen
+ # If the mapped reads are over-estimated, set the ratio at 1
+ fraction_mapped_reads = min(mapped_reads/float(n_metagenome_reads), 1.0)
+ else:
+ fraction_mapped_reads = 1.0
+
+ if pars['t'] == 'reads_map':
+ if not MPA2_OUTPUT:
+ outf.write('#read_id\tNCBI_taxlineage_str\tNCBI_taxlineage_ids\n')
+ outf.write( "\n".join( map_out ) + "\n" )
+
+ elif pars['t'] == 'rel_ab':
+ if CAMI_OUTPUT:
+ outf.write('''@SampleID:{}\n at Version:0.10.0\n at Ranks:superkingdom|phylum|class|order|family|genus|species|strain\n@@TAXID\tRANK\tTAXPATH\tTAXPATHSN\tPERCENTAGE\n'''.format(pars["sample_id"]))
+ if not MPA2_OUTPUT and not CAMI_OUTPUT:
+ if not pars['use_group_representative']:
+ outf.write('#clade_name\tNCBI_tax_id\trelative_abundance\tadditional_species\n')
+ else:
+ outf.write('#clade_name\tNCBI_tax_id\trelative_abundance\n')
+
+ cl2ab, _, tot_nreads = tree.relative_abundances(
+ pars['tax_lev']+"__" if pars['tax_lev'] != 'a' else None )
+
+ outpred = [(taxstr, taxid,round(relab*100.0,5)) for (taxstr, taxid), relab in cl2ab.items() if relab > 0.0]
+ has_repr = False
+
+ if outpred:
+ if CAMI_OUTPUT:
+ for clade, taxid, relab in sorted( outpred, reverse=True,
+ key=lambda x:x[2]+(100.0*(8-(x[0].count("|"))))):
+ if taxid and clade.split('|')[-1][0] != 't':
+ rank = ranks2code[clade.split('|')[-1][0]]
+ leaf_taxid = taxid.split('|')[-1]
+ taxpathsh = '|'.join([remove_prefix(name) if '_unclassified' not in name else '' for name in clade.split('|')])
+ outf.write( '\t'.join( [ leaf_taxid, rank, taxid, taxpathsh, str(relab*fraction_mapped_reads) ] ) + '\n' )
+ else:
+ if ESTIMATE_UNK:
+ outf.write( "\t".join( [ "UNCLASSIFIED",
+ "-1",
+ str(round((1-fraction_mapped_reads)*100,5)),""]) + "\n" )
+
+ for clade, taxid, relab in sorted( outpred, reverse=True,
+ key=lambda x:x[2]+(100.0*(8-(x[0].count("|"))))):
+ add_repr = ''
+ if REPORT_MERGED and (clade, taxid) in mpa_pkl['merged_taxon']:
+ if pars['use_group_representative'] and not SGB_ANALYSIS:
+ if '_group' in clade:
+ clade, taxid, _ = sorted(mpa_pkl['merged_taxon'][(clade, taxid)], key=lambda x:x[2], reverse=True)[0]
+ elif not pars['use_group_representative']:
+ add_repr = '{}'.format(','.join( [ n[0] for n in mpa_pkl['merged_taxon'][(clade, taxid)]] ))
+ has_repr = True
+ if not MPA2_OUTPUT:
+ outf.write( "\t".join( [clade,
+ taxid,
+ str(relab*fraction_mapped_reads),
+ add_repr
+ ] ) + "\n" )
+ else:
+ outf.write( "\t".join( [clade,
+ str(relab*fraction_mapped_reads)] ) + "\n" )
+ if REPORT_MERGED and has_repr:
+ sys.stderr.write("WARNING: The metagenome profile contains clades that represent multiple species merged into a single representant.\n"
+ "An additional column listing the merged species is added to the MetaPhlAn output.\n"
+ )
+ else:
+ if not MPA2_OUTPUT:
+ outf.write( "UNCLASSIFIED\t-1\t100.0\t\n" )
+ else:
+ outf.write( "UNCLASSIFIED\t100.0\n" )
+ sys.stderr.write("WARNING: MetaPhlAn did not detect any microbial taxa in the sample.\n")
+ maybe_generate_biom_file(tree, pars, outpred)
+
+ elif pars['t'] == 'rel_ab_w_read_stats':
+ cl2ab, rr, tot_nreads = tree.relative_abundances(
+ pars['tax_lev']+"__" if pars['tax_lev'] != 'a' else None )
+
+ unmapped_reads = max(n_metagenome_reads - tot_nreads, 0)
+
+ outpred = [(taxstr, taxid,round(relab*100.0*fraction_mapped_reads,5)) for (taxstr, taxid),relab in cl2ab.items() if relab > 0.0]
+
+ if outpred:
+ outf.write( "#estimated_reads_mapped_to_known_clades:{}\n".format(round(tot_nreads)) )
+ outf.write( "\t".join( [ "#clade_name",
+ "clade_taxid",
+ "relative_abundance",
+ "coverage",
+ "estimated_number_of_reads_from_the_clade" ]) +"\n" )
+ if ESTIMATE_UNK:
+ outf.write( "\t".join( [ "UNCLASSIFIED",
+ "-1",
+ str(round((1-fraction_mapped_reads)*100,5)),
+ "-",
+ str(round(unmapped_reads)) ]) + "\n" )
+
+ for taxstr, taxid, relab in sorted( outpred, reverse=True,
+ key=lambda x:x[2]+(100.0*(8-(x[0].count("|"))))):
+ outf.write( "\t".join( [ taxstr,
+ taxid,
+ str(relab),
+ str(round(rr[(taxstr, taxid)][0],5)) if (taxstr, taxid) in rr else '-', #coverage
+ str( int( round( rr[(taxstr, taxid)][1], 0) ) if (taxstr, taxid) in rr else '-') #estimated_number_of_reads_from_the_clade
+ ] ) + "\n" )
+ else:
+ if not MPA2_OUTPUT:
+ outf.write( "#estimated_reads_mapped_to_known_clades:0\n")
+ outf.write( "\t".join( [ "#clade_name",
+ "clade_taxid",
+ "relative_abundance",
+ "coverage",
+ "estimated_number_of_reads_from_the_clade" ]) +"\n" )
+ outf.write( "unclassified\t-1\t100.0\t0\t0\n" )
+ else:
+ outf.write( "unclassified\t100.0\n" )
+ maybe_generate_biom_file(tree, pars, outpred)
+
+ elif pars['t'] == 'clade_profiles':
+ cl2pr = tree.clade_profiles( pars['tax_lev']+"__" if pars['tax_lev'] != 'a' else None )
+ for c,p in cl2pr.items():
+ mn,n = zip(*p)
+ outf.write( "\t".join( [""]+[str(s) for s in mn] ) + "\n" )
+ outf.write( "\t".join( [c]+[str(s) for s in n] ) + "\n" )
+
+ elif pars['t'] == 'marker_ab_table':
+ cl2pr = tree.clade_profiles( pars['tax_lev']+"__" if pars['tax_lev'] != 'a' else None )
+ for v in cl2pr.values():
+ outf.write( "\n".join(["\t".join([str(a),str(b/float(pars['nreads'])) if pars['nreads'] else str(b)])
+ for a,b in v if b > 0.0]) + "\n" )
+
+ elif pars['t'] == 'marker_pres_table':
+ cl2pr = tree.clade_profiles( pars['tax_lev']+"__" if pars['tax_lev'] != 'a' else None )
+ for v in cl2pr.values():
+ strout = ["\t".join([str(a),"1"]) for a,b in v if b > pars['pres_th']]
+ if strout:
+ outf.write( "\n".join(strout) + "\n" )
+
+ elif pars['t'] == 'marker_counts':
+ outf.write( "\n".join( ["\t".join([m,str(c)]) for m,c in tree.markers2counts().items() ]) +"\n" )
+
+ elif pars['t'] == 'clade_specific_strain_tracker':
+ cl2pr = tree.clade_profiles( None, get_all = True )
+ cl2ab, _, _ = tree.relative_abundances( None )
+ strout = []
+ for (taxstr, taxid), relab in cl2ab.items():
+ clade = taxstr
+ if clade.endswith(pars['clade']) and relab*100.0 < pars['min_ab']:
+ strout = []
+ break
+ if pars['clade'] in clade:
+ strout += ["\t".join([str(a),str(int(b > pars['pres_th']))]) for a,b in cl2pr[clade]]
+ if strout:
+ strout = sorted(strout,key=lambda x:x[0])
+ outf.write( "\n".join(strout) + "\n" )
+ else:
+ sys.stderr.write("Clade "+pars['clade']+" not present at an abundance >"+str(round(pars['min_ab'],2))+"%, "
+ "so no clade specific markers are reported\n")
+
+
+if __name__ == '__main__':
+ t0 = time.time()
+ main()
+ sys.stderr.write('Elapsed time to run MetaPhlAn: {} s\n'.format( (time.time()-t0) ) )
+
=====================================
debian/control
=====================================
@@ -24,6 +24,7 @@ Depends: ${python3:Depends},
python3-biom-format,
python3-msgpack,
python3-pandas,
+ python3-packaging,
bowtie2
Conflicts: metaphlan2
Provides: metaphlan2
=====================================
debian/patches/series
=====================================
@@ -0,0 +1 @@
+use-packaging-version.patch
=====================================
debian/patches/use-packaging-version.patch
=====================================
@@ -0,0 +1,22 @@
+Description: Replace deprecated distutils.version with packaging.version
+Author: Harish Chavre<harrych_1 at yahoo.com>
+--- a/metaphlan/metaphlan.py
++++ b/metaphlan/metaphlan.py
+@@ -23,7 +23,7 @@
+ import time
+ import random
+ from collections import defaultdict as defdict
+-from distutils.version import LooseVersion
++from packaging.version import parse
+ from glob import glob
+ from subprocess import DEVNULL
+ import argparse as ap
+@@ -947,7 +947,7 @@
+ # run. *
+ # George Weingart 05/22/2017 george.weingart at mail.com *
+ #**********************************************************************************************
+- if LooseVersion(biom.__version__) < LooseVersion("2.0.0"):
++ if parse(biom.__version__) < parse("2.0.0"):
+ biom_table = biom.table.table_factory(
+ data,
+ sample_ids,
=====================================
debian/tests/.pc/.quilt_patches
=====================================
@@ -0,0 +1 @@
+debian/patches
=====================================
debian/tests/.pc/.quilt_series
=====================================
@@ -0,0 +1 @@
+series
=====================================
debian/tests/.pc/.version
=====================================
@@ -0,0 +1 @@
+2
=====================================
debian/tests/control
=====================================
@@ -0,0 +1,4 @@
+Tests: run-unit-test
+Depends: @, bowtie, metaphlan
+Restrictions: allow-stderr
+
=====================================
debian/tests/data/Test.fastq
=====================================
The diff for this file was not included because it is too large.
=====================================
debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.1.bt2l
=====================================
Binary files /dev/null and b/debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.1.bt2l differ
=====================================
debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.2.bt2l
=====================================
Binary files /dev/null and b/debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.2.bt2l differ
=====================================
debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.3.bt2l
=====================================
Binary files /dev/null and b/debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.3.bt2l differ
=====================================
debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.4.bt2l
=====================================
Binary files /dev/null and b/debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.4.bt2l differ
=====================================
debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.pkl
=====================================
Binary files /dev/null and b/debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.pkl differ
=====================================
debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.rev.1.bt2l
=====================================
Binary files /dev/null and b/debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.rev.1.bt2l differ
=====================================
debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.rev.2.bt2l
=====================================
Binary files /dev/null and b/debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.rev.2.bt2l differ
=====================================
debian/tests/run-unit-test
=====================================
@@ -0,0 +1,27 @@
+#!/bin/sh
+set -e
+
+pkg=metaphlan
+
+export LC_ALL=C.UTF-8
+if [ "${AUTOPKGTEST_TMP}" = "" ] ; then
+ AUTOPKGTEST_TMP=$(mktemp -d /tmp/${pkg}-test.XXXXXX)
+ trap "rm -rf ${AUTOPKGTEST_TMP}" 0 INT QUIT ABRT PIPE TERM
+fi
+
+cp -a "$(dirname "$0")/data/"* "${AUTOPKGTEST_TMP}/"
+
+cd "${AUTOPKGTEST_TMP}"
+
+metaphlan Test.fastq \
+ --input_type fastq \
+ --bowtie2db . \
+ --index mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910 \
+ -o result.txt
+
+if [ -s result.txt ]; then
+ echo "MetaPhlAn test passed"
+else
+ echo "MetaPhlAn test failed"
+ exit 1
+fi
=====================================
metaphlan/metaphlan.py
=====================================
@@ -23,7 +23,7 @@ import re
import time
import random
from collections import defaultdict as defdict
-from distutils.version import LooseVersion
+from packaging.version import parse
from glob import glob
from subprocess import DEVNULL
import argparse as ap
@@ -947,7 +947,7 @@ def maybe_generate_biom_file(tree, pars, abundance_predictions):
# run. *
# George Weingart 05/22/2017 george.weingart at mail.com *
#**********************************************************************************************
- if LooseVersion(biom.__version__) < LooseVersion("2.0.0"):
+ if parse(biom.__version__) < parse("2.0.0"):
biom_table = biom.table.table_factory(
data,
sample_ids,
View it on GitLab: https://salsa.debian.org/med-team/metaphlan/-/compare/e386ea765e93f574bf43b45044555b5d9f4a45b2...c9b7630a9521eff9b5ae59fb1e0774ce8b50de0a
--
View it on GitLab: https://salsa.debian.org/med-team/metaphlan/-/compare/e386ea765e93f574bf43b45044555b5d9f4a45b2...c9b7630a9521eff9b5ae59fb1e0774ce8b50de0a
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20250503/74af299b/attachment-0001.htm>
More information about the debian-med-commit
mailing list