[med-svn] [Git][med-team/metaphlan][master] 2 commits: patch added Replace distutils.version with packaging.version for Python 3.12+ compatibility

Sat May 3 10:06:29 BST 2025


harish chavre pushed to branch master at Debian Med / metaphlan


Commits:
03c0cb6f by Harish chavre at 2025-05-03T09:01:51+00:00
patch added Replace distutils.version with packaging.version for Python 3.12+ compatibility

- - - - -
c9b7630a by Harish chavre at 2025-05-03T09:04:36+00:00
Add autopkgtest for metaphlan

- - - - -


23 changed files:

- + .pc/.quilt_patches
- + .pc/.quilt_series
- + .pc/.version
- + .pc/applied-patches
- + .pc/use-packaging-version.patch/.timestamp
- + .pc/use-packaging-version.patch/metaphlan/metaphlan.py
- debian/control
- + debian/patches/series
- + debian/patches/use-packaging-version.patch
- + debian/tests/.pc/.quilt_patches
- + debian/tests/.pc/.quilt_series
- + debian/tests/.pc/.version
- + debian/tests/control
- + debian/tests/data/Test.fastq
- + debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.1.bt2l
- + debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.2.bt2l
- + debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.3.bt2l
- + debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.4.bt2l
- + debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.pkl
- + debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.rev.1.bt2l
- + debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.rev.2.bt2l
- + debian/tests/run-unit-test
- metaphlan/metaphlan.py


Changes:

=====================================
.pc/.quilt_patches
=====================================
@@ -0,0 +1 @@
+debian/patches


=====================================
.pc/.quilt_series
=====================================
@@ -0,0 +1 @@
+series


=====================================
.pc/.version
=====================================
@@ -0,0 +1 @@
+2


=====================================
.pc/applied-patches
=====================================
@@ -0,0 +1 @@
+use-packaging-version.patch


=====================================
.pc/use-packaging-version.patch/.timestamp
=====================================


=====================================
.pc/use-packaging-version.patch/metaphlan/metaphlan.py
=====================================
@@ -0,0 +1,1304 @@
+#!/usr/bin/env python
+__author__ = ('Aitor Blanco-Miguez (aitor.blancomiguez at unitn.it), '
+              'Francesco Beghini (francesco.beghini at unitn.it), '
+              'Nicola Segata (nicola.segata at unitn.it), '
+              'Duy Tin Truong, '
+              'Francesco Asnicar (f.asnicar at unitn.it)')
+__version__ = '4.0.4'
+__date__ = '17 Jan 2023'
+
+import sys
+try:
+    from metaphlan import mybytes, plain_read_and_split, plain_read_and_split_line, read_and_split, read_and_split_line, check_and_install_database, remove_prefix
+except ImportError:
+    sys.exit("CRITICAL ERROR: Unable to find the MetaPhlAn python package. Please check your install.") 
+
+if float(sys.version_info[0]) < 3.0:
+    sys.stderr.write("MetaPhlAn requires Python 3, your current Python version is {}.{}.{}\n"
+                    .format(sys.version_info[0], sys.version_info[1], sys.version_info[2]))
+    sys.exit(1)
+import os
+import stat
+import re
+import time
+import random
+from collections import defaultdict as defdict
+from distutils.version import LooseVersion
+from glob import glob
+from subprocess import DEVNULL
+import argparse as ap
+import bz2
+import pickle
+import subprocess as subp
+import tempfile as tf
+
+try:
+    import numpy as np
+except ImportError:
+    sys.stderr.write("Error! numpy python library not detected!!\n")
+    sys.exit(1)
+
+#**********************************************************************************************
+#  Modification of Code :                                                                     *
+#  Modified the code so instead of using the current clade IDs, which are numbers, we will    *
+#      use the clade_names                                                                    *
+#      Users reported the biom output is invalid and also the IDs were changing from run to   *
+#      run.                                                                                   *
+#  George Weingart    05/22/2017   george.weingart at mail.com                                   *
+#**********************************************************************************************
+
+#*************************************************************
+#*  Imports related to biom file generation                  *
+#*************************************************************
+try:
+    import biom
+    import biom.table
+except ImportError:
+    sys.stderr.write("Warning! Biom python library not detected!"
+                     "\n Exporting to biom format will not work!\n")
+import json
+
+# get the directory that contains this script
+metaphlan_script_install_folder = os.path.dirname(os.path.abspath(__file__))
+# get the default database folder
+DEFAULT_DB_FOLDER = os.path.join(metaphlan_script_install_folder, "metaphlan_databases")
+DEFAULT_DB_FOLDER= os.environ.get('METAPHLAN_DB_DIR', DEFAULT_DB_FOLDER)
+#Wether to execute a SGB-based analysis
+SGB_ANALYSIS = True
+INDEX = 'latest'
+tax_units = "kpcofgst"
+
+def read_params(args):
+    p = ap.ArgumentParser( description=
+            "DESCRIPTION\n"
+            " MetaPhlAn version "+__version__+" ("+__date__+"): \n"
+            " METAgenomic PHyLogenetic ANalysis for metagenomic taxonomic profiling.\n\n"
+            "AUTHORS: "+__author__+"\n\n"
+            "COMMON COMMANDS\n\n"
+            " We assume here that MetaPhlAn is installed using the several options available (pip, conda, PyPi)\n"
+            " Also BowTie2 should be in the system path with execution and read permissions, and Perl should be installed)\n\n"
+
+            "\n========== MetaPhlAn clade-abundance estimation ================= \n\n"
+            "The basic usage of MetaPhlAn consists in the identification of the clades (from phyla to species ) \n"
+            "present in the metagenome obtained from a microbiome sample and their \n"
+            "relative abundance. This correspond to the default analysis type (-t rel_ab).\n\n"
+
+            "*  Profiling a metagenome from raw reads:\n"
+            "$ metaphlan metagenome.fastq --input_type fastq -o profiled_metagenome.txt\n\n"
+
+            "*  You can take advantage of multiple CPUs and save the intermediate BowTie2 output for re-running\n"
+            "   MetaPhlAn extremely quickly:\n"
+            "$ metaphlan metagenome.fastq --bowtie2out metagenome.bowtie2.bz2 --nproc 5 --input_type fastq -o profiled_metagenome.txt\n\n"
+
+            "*  If you already mapped your metagenome against the marker DB (using a previous MetaPhlAn run), you\n"
+            "   can obtain the results in few seconds by using the previously saved --bowtie2out file and \n"
+            "   specifying the input (--input_type bowtie2out):\n"
+            "$ metaphlan metagenome.bowtie2.bz2 --nproc 5 --input_type bowtie2out -o profiled_metagenome.txt\n\n"
+            
+            "*  bowtie2out files generated with MetaPhlAn versions below 3 are not compatibile.\n"
+            "   Starting from MetaPhlAn 3.0, the BowTie2 ouput now includes the size of the profiled metagenome and the average read length.\n"
+            "   If you want to re-run MetaPhlAn using these file you should provide the metagenome size via --nreads:\n"
+            "$ metaphlan metagenome.bowtie2.bz2 --nproc 5 --input_type bowtie2out --nreads 520000 -o profiled_metagenome.txt\n\n"
+
+            "*  You can also provide an externally BowTie2-mapped SAM if you specify this format with \n"
+            "   --input_type. Two steps: first apply BowTie2 and then feed MetaPhlAn with the obtained sam:\n"
+            "$ bowtie2 --sam-no-hd --sam-no-sq --no-unal --very-sensitive -S metagenome.sam -x ${mpa_dir}/metaphlan_databases/mpa_v30_CHOCOPhlAn_201901 -U metagenome.fastq\n"
+            "$ metaphlan metagenome.sam --input_type sam -o profiled_metagenome.txt\n\n"
+
+            "*  We can also natively handle paired-end metagenomes, and, more generally, metagenomes stored in \n"
+            "  multiple files (but you need to specify the --bowtie2out parameter):\n"
+            "$ metaphlan metagenome_1.fastq,metagenome_2.fastq --bowtie2out metagenome.bowtie2.bz2 --nproc 5 --input_type fastq\n\n"
+            "\n------------------------------------------------------------------- \n \n\n"
+
+            "\n========== Marker level analysis ============================ \n\n"
+            "MetaPhlAn introduces the capability of characterizing organisms at the strain level using non\n"
+            "aggregated marker information. Such capability comes with several slightly different flavours and \n"
+            "are a way to perform strain tracking and comparison across multiple samples.\n"
+            "Usually, MetaPhlAn is first ran with the default -t to profile the species present in\n"
+            "the community, and then a strain-level profiling can be performed to zoom-in into specific species\n"
+            "of interest. This operation can be performed quickly as it exploits the --bowtie2out intermediate \n"
+            "file saved during the execution of the default analysis type.\n\n"
+
+            "*  The following command will output the abundance of each marker with a RPK (reads per kilo-base) \n"
+            "   higher 0.0. (we are assuming that metagenome_outfmt.bz2 has been generated before as \n"
+            "   shown above).\n"
+            "$ metaphlan -t marker_ab_table metagenome_outfmt.bz2 --input_type bowtie2out -o marker_abundance_table.txt\n"
+            "   The obtained RPK can be optionally normalized by the total number of reads in the metagenome \n"
+            "   to guarantee fair comparisons of abundances across samples. The number of reads in the metagenome\n"
+            "   needs to be passed with the '--nreads' argument\n\n"
+
+            "*  The list of markers present in the sample can be obtained with '-t marker_pres_table'\n"
+            "$ metaphlan -t marker_pres_table metagenome_outfmt.bz2 --input_type bowtie2out -o marker_abundance_table.txt\n"
+            "   The --pres_th argument (default 1.0) set the minimum RPK value to consider a marker present\n\n"
+
+            "*  The list '-t clade_profiles' analysis type reports the same information of '-t marker_ab_table'\n"
+            "   but the markers are reported on a clade-by-clade basis.\n"
+            "$ metaphlan -t clade_profiles metagenome_outfmt.bz2 --input_type bowtie2out -o marker_abundance_table.txt\n\n"
+
+            "*  Finally, to obtain all markers present for a specific clade and all its subclades, the \n"
+            "   '-t clade_specific_strain_tracker' should be used. For example, the following command\n"
+            "   is reporting the presence/absence of the markers for the B. fragilis species and its strains\n"
+            "   the optional argument --min_ab specifies the minimum clade abundance for reporting the markers\n\n"
+            "$ metaphlan -t clade_specific_strain_tracker --clade s__Bacteroides_fragilis metagenome_outfmt.bz2 --input_type bowtie2out -o marker_abundance_table.txt\n"
+
+            "\n------------------------------------------------------------------- \n\n"
+            "",
+            formatter_class=ap.RawTextHelpFormatter,
+            add_help=False )
+    arg = p.add_argument
+
+    arg( 'inp', metavar='INPUT_FILE', type=str, nargs='?', default=None, help=
+         "the input file can be:\n"
+         "* a fastq file containing metagenomic reads\n"
+         "OR\n"
+         "* a BowTie2 produced SAM file. \n"
+         "OR\n"
+         "* an intermediary mapping file of the metagenome generated by a previous MetaPhlAn run \n"
+         "If the input file is missing, the script assumes that the input is provided using the standard \n"
+         "input, or named pipes.\n"
+         "IMPORTANT: the type of input needs to be specified with --input_type" )
+
+    arg( 'output', metavar='OUTPUT_FILE', type=str, nargs='?', default=None,
+         help= "the tab-separated output file of the predicted taxon relative abundances \n"
+               "[stdout if not present]")
+
+
+    g = p.add_argument_group('Required arguments')
+    arg = g.add_argument
+    input_type_choices = ['fastq','fasta','bowtie2out','sam']
+    arg( '--input_type', choices=input_type_choices, required = '--install' not in args, help =
+         "set whether the input is the FASTA file of metagenomic reads or \n"
+         "the SAM file of the mapping of the reads against the MetaPhlAn db.\n"
+        )
+
+    g = p.add_argument_group('Mapping arguments')
+    arg = g.add_argument
+    arg('--force', action='store_true', help="Force profiling of the input file by removing the bowtie2out file")
+    arg('--bowtie2db', metavar="METAPHLAN_BOWTIE2_DB", type=str, default=DEFAULT_DB_FOLDER,
+        help=("Folder containing the MetaPhlAn database. You can specify the location by exporting the DEFAULT_DB_FOLDER variable in the shell."
+              "[default "+DEFAULT_DB_FOLDER+"]\n"))
+
+    arg('-x', '--index', type=str, default=INDEX,
+        help=("Specify the id of the database version to use. "
+              "If \"latest\", MetaPhlAn will get the latest version.\n"
+              "If an index name is provided, MetaPhlAn will try to use it, if available, and skip the online check.\n"
+              "If the database files are not found on the local MetaPhlAn installation they\n"
+              "will be automatically downloaded [default "+INDEX+"]\n"))
+
+    bt2ps = ['sensitive', 'very-sensitive', 'sensitive-local', 'very-sensitive-local']
+    arg('--bt2_ps', metavar="BowTie2 presets", default='very-sensitive',
+        choices=bt2ps, help="Presets options for BowTie2 (applied only when a "
+                            "FASTA file is provided)\n"
+                            "The choices enabled in MetaPhlAn are:\n"
+                            " * sensitive\n"
+                            " * very-sensitive\n"
+                            " * sensitive-local\n"
+                            " * very-sensitive-local\n"
+                            "[default very-sensitive]\n")
+    arg('--bowtie2_exe', type=str, default=None,
+        help='Full path and name of the BowTie2 executable. This option allows'
+             'MetaPhlAn to reach the executable even when it is not in the '
+             'system PATH or the system PATH is unreachable')
+    arg('--bowtie2_build', type=str, default='bowtie2-build',
+        help="Full path to the bowtie2-build command to use, deafult assumes "
+             "that 'bowtie2-build is present in the system path")
+    arg('--bowtie2out', metavar="FILE_NAME", type=str, default=None,
+        help="The file for saving the output of BowTie2")
+    arg('--min_mapq_val', type=int, default=5,
+        help="Minimum mapping quality value (MAPQ) [default 5]")
+    arg('--no_map', action='store_true',
+        help="Avoid storing the --bowtie2out map file")
+    arg('--tmp_dir', metavar="", default=None, type=str,
+        help="The folder used to store temporary files [default is the OS "
+             "dependent tmp dir]")
+
+    g = p.add_argument_group('Post-mapping arguments')
+    arg = g.add_argument
+    stat_choices = ['avg_g','avg_l','tavg_g','tavg_l','wavg_g','wavg_l','med']
+    arg( '--tax_lev', metavar='TAXONOMIC_LEVEL', type=str,
+         choices='a'+tax_units, default='a', help =
+         "The taxonomic level for the relative abundance output:\n"
+         "'a' : all taxonomic levels\n"
+         "'k' : kingdoms\n"
+         "'p' : phyla only\n"
+         "'c' : classes only\n"
+         "'o' : orders only\n"
+         "'f' : families only\n"
+         "'g' : genera only\n"
+         "'s' : species only\n"
+         "'t' : SGBs only\n"
+         "[default 'a']" )
+    arg( '--min_cu_len', metavar="", default="2000", type=int, help =
+         "minimum total nucleotide length for the markers in a clade for\n"
+         "estimating the abundance without considering sub-clade abundances\n"
+         "[default 2000]\n"   )
+    arg( '--min_alignment_len', metavar="", default=None, type=int, help =
+         "The sam records for aligned reads with the longest subalignment\n"
+         "length smaller than this threshold will be discarded.\n"
+         "[default None]\n"   )
+    arg( '--add_viruses', action='store_true', help=
+         "Together with --mpa3, allow the profiling of viral organisms" )
+    arg( '--ignore_eukaryotes', action='store_true', help=
+         "Do not profile eukaryotic organisms" )
+    arg( '--ignore_bacteria', action='store_true', help=
+         "Do not profile bacterial organisms" )
+    arg( '--ignore_archaea', action='store_true', help=
+         "Do not profile archeal organisms" )
+    arg( '--ignore_ksgbs', action='store_true', help=
+         "Do not profile known SGBs (together with --sgb option)" )
+    arg( '--ignore_usgbs', action='store_true', help=
+         "Do not profile unknown SGBs (together with --sgb option)" )
+    arg( '--stat_q', metavar="", type = float, default=0.2, help =
+         "Quantile value for the robust average\n"
+         "[default 0.2]" )
+    arg( '--perc_nonzero', metavar="", type = float, default=0.33, help =
+         "Percentage of markers with a non zero relative abundance for misidentify a species\n"
+         "[default 0.33]"   )
+    arg( '--ignore_markers', type=str, default = None, help =
+         "File containing a list of markers to ignore. \n")
+    arg( '--avoid_disqm', action="store_true", help =
+         "Deactivate the procedure of disambiguating the quasi-markers based on the \n"
+         "marker abundance pattern found in the sample. It is generally recommended \n"
+         "to keep the disambiguation procedure in order to minimize false positives\n")
+    arg( '--stat', metavar="", choices=stat_choices, default="tavg_g", type=str, help =
+         "Statistical approach for converting marker abundances into clade abundances\n"
+         "'avg_g'  : clade global (i.e. normalizing all markers together) average\n"
+         "'avg_l'  : average of length-normalized marker counts\n"
+         "'tavg_g' : truncated clade global average at --stat_q quantile\n"
+         "'tavg_l' : truncated average of length-normalized marker counts (at --stat_q)\n"
+         "'wavg_g' : winsorized clade global average (at --stat_q)\n"
+         "'wavg_l' : winsorized average of length-normalized marker counts (at --stat_q)\n"
+         "'med'    : median of length-normalized marker counts\n"
+         "[default tavg_g]"   )
+
+    arg = p.add_argument
+
+    g = p.add_argument_group('Additional analysis types and arguments')
+    arg = g.add_argument
+    analysis_types = ['rel_ab', 'rel_ab_w_read_stats', 'reads_map', 'clade_profiles', 'marker_ab_table', 'marker_counts', 'marker_pres_table', 'clade_specific_strain_tracker']
+    arg( '-t', metavar='ANALYSIS TYPE', type=str, choices = analysis_types,
+         default='rel_ab', help =
+         "Type of analysis to perform: \n"
+         " * rel_ab: profiling a metagenomes in terms of relative abundances\n"
+         " * rel_ab_w_read_stats: profiling a metagenomes in terms of relative abundances and estimate the number of reads coming from each clade.\n"
+         " * reads_map: mapping from reads to clades (only reads hitting a marker)\n"
+         " * clade_profiles: normalized marker counts for clades with at least a non-null marker\n"
+         " * marker_ab_table: normalized marker counts (only when > 0.0 and normalized by metagenome size if --nreads is specified)\n"
+         " * marker_counts: non-normalized marker counts [use with extreme caution]\n"
+         " * marker_pres_table: list of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th\n"
+         " * clade_specific_strain_tracker: list of markers present for a specific clade, specified with --clade, and all its subclades\n"
+         "[default 'rel_ab']" )
+    arg( '--nreads', metavar="NUMBER_OF_READS", type=int, default = None, help =
+         "The total number of reads in the original metagenome. It is used only when \n"
+         "-t marker_table is specified for normalizing the length-normalized counts \n"
+         "with the metagenome size as well. No normalization applied if --nreads is not \n"
+         "specified" )
+    arg( '--pres_th', metavar="PRESENCE_THRESHOLD", type=int, default = 1.0, help =
+         'Threshold for calling a marker present by the -t marker_pres_table option' )
+    arg( '--clade', metavar="", default=None, type=str, help =
+         "The clade for clade_specific_strain_tracker analysis\n"  )
+    arg( '--min_ab', metavar="", default=0.1, type=float, help =
+         "The minimum percentage abundance for the clade in the clade_specific_strain_tracker analysis\n"  )
+
+    g = p.add_argument_group('Output arguments')
+    arg = g.add_argument
+    arg( '-o', '--output_file',  metavar="output file", type=str, default=None, help =
+         "The output file (if not specified as positional argument)\n")
+    arg('--sample_id_key',  metavar="name", type=str, default="SampleID",
+        help =("Specify the sample ID key for this analysis."
+               " Defaults to 'SampleID'."))
+    arg('--use_group_representative', action='store_true',  help =("Use a species as representative for species groups."))
+    arg('--sample_id',  metavar="value", type=str,
+        default="Metaphlan_Analysis",
+        help =("Specify the sample ID for this analysis."
+               " Defaults to 'Metaphlan_Analysis'."))
+    arg( '-s', '--samout', metavar="sam_output_file",
+        type=str, default=None, help="The sam output file\n")
+
+    arg( '--legacy-output', action='store_true', help="Old MetaPhlAn2 two columns output\n")
+    arg( '--CAMI_format_output', action='store_true', help="Report the profiling using the CAMI output format\n")
+    arg( '--unclassified_estimation', action='store_true', help="Scale relative abundances to the number of reads mapping to identified clades in order to estimate unclassified taxa\n")
+    arg( '--mpa3', action='store_true', help="Perform the analysis using the MetaPhlAn 3 algorithm\n")
+
+    #*************************************************************
+    #* Parameters related to biom file generation                *
+    #*************************************************************
+    arg( '--biom', '--biom_output_file',  metavar="biom_output", type=str, default=None, help =
+         "If requesting biom file output: The name of the output file in biom format \n")
+
+    arg( '--mdelim', '--metadata_delimiter_char',  metavar="mdelim", type=str, default="|", help =
+         "Delimiter for bug metadata: - defaults to pipe. e.g. the pipe in k__Bacteria|p__Proteobacteria \n")
+    #*************************************************************
+    #* End parameters related to biom file generation            *
+    #*************************************************************
+
+    g = p.add_argument_group('Other arguments')
+    arg = g.add_argument
+    arg('--nproc', metavar="N", type=int, default=4,
+        help="The number of CPUs to use for parallelizing the mapping [default 4]")
+    arg('--subsampling', type=int, default=None,
+        help="Specify the number of reads to be considered from the input metagenomes [default None]")
+    arg('--subsampling_seed', type=str, default='1992',
+        help="Random seed to use in the selection of the subsampled reads. Choose \"random\r for a random behaviour")
+    arg('--install', action='store_true',
+        help="Only checks if the MetaPhlAn DB is installed and installs it if not. All other parameters are ignored.")
+    arg('--offline', action='store_true',
+        help="If used, MetaPhlAn will not check for new database updates.")
+    arg('--force_download', action='store_true',
+        help="Force the re-download of the latest MetaPhlAn database.")
+    arg('--read_min_len', type=int, default=70,
+        help="Specify the minimum length of the reads to be considered when parsing the input file with "
+             "'read_fastx.py' script, default value is 70")
+    arg('-v', '--version', action='version',
+        version="MetaPhlAn version {} ({})".format(__version__, __date__),
+        help="Prints the current MetaPhlAn version and exit")
+    arg("-h", "--help", action="help", help="show this help message and exit")
+
+    return vars(p.parse_args())
+
+def set_mapping_arguments(index, bowtie2_db):
+    mpa_pkl = 'mpa_pkl'
+    bowtie2db = 'bowtie2db'
+    bt2_ext = 'bt2l' if SGB_ANALYSIS else 'bt2'
+
+    if os.path.isfile(os.path.join(bowtie2_db, "{}.pkl".format(index))):
+        mpa_pkl = os.path.join(bowtie2_db, "{}.pkl".format(index))
+
+    if glob(os.path.join(bowtie2_db, "{}*.{}".format(index, bt2_ext))):
+        bowtie2db = os.path.join(bowtie2_db, "{}".format(index))
+
+    return (mpa_pkl, bowtie2db)
+
+def run_bowtie2(fna_in, outfmt6_out, bowtie2_db, preset, nproc, min_mapq_val, file_format="fasta",
+                exe=None, samout=None, min_alignment_len=None, read_min_len=0):
+    # checking read_fastx.py
+    read_fastx = "read_fastx.py"
+
+    try:
+        subp.check_call([read_fastx, "-h"], stdout=DEVNULL, stderr=DEVNULL)
+    except Exception as e:
+        try:
+            read_fastx = os.path.join(os.path.join(os.path.dirname(__file__), "utils"), read_fastx)
+            subp.check_call([read_fastx, "-h"], stdout=DEVNULL, stderr=DEVNULL)
+        except Exception as e:
+            sys.stderr.write("OSError: fatal error running '{}'. Is it in the system path?\n".format(read_fastx))
+            sys.exit(1)
+
+    # checking bowtie2
+    try:
+        subp.check_call([exe if exe else 'bowtie2', "-h"], stdout=DEVNULL)
+    except Exception as e:
+        sys.stderr.write('OSError: "{}"\nFatal error running BowTie2. Is BowTie2 in the system path?\n'.format(e))
+        sys.exit(1)
+
+    try:    
+        if fna_in:
+            readin = subp.Popen([read_fastx, '-l', str(read_min_len), fna_in], stdout=subp.PIPE, stderr=subp.PIPE)
+
+        else:
+            readin = subp.Popen([read_fastx, '-l', str(read_min_len)], stdin=sys.stdin, stdout=subp.PIPE, stderr=subp.PIPE)
+
+        bowtie2_cmd = [exe if exe else 'bowtie2', "--seed", "1992", "--quiet", "--no-unal", "--{}".format(preset),
+                       "-S", "-", "-x", bowtie2_db]
+
+        if int(nproc) > 1:
+            bowtie2_cmd += ["-p", str(nproc)]
+
+        bowtie2_cmd += ["-U", "-"]  # if not stat.S_ISFIFO(os.stat(fna_in).st_mode) else []
+
+        if file_format == "fasta":
+            bowtie2_cmd += ["-f"]
+
+        p = subp.Popen(bowtie2_cmd, stdout=subp.PIPE, stdin=readin.stdout)
+        readin.stdout.close()
+        lmybytes, outf = (mybytes, bz2.BZ2File(outfmt6_out, "w")) if outfmt6_out.endswith(".bz2") else (str, open(outfmt6_out, "w"))
+        try:
+            if samout:
+                if samout[-4:] == '.bz2':
+                    sam_file = bz2.BZ2File(samout, 'w')
+                else:
+                    sam_file = open(samout, 'wb')
+        except IOError as e:
+            sys.stderr.write('IOError: "{}"\nUnable to open sam output file.\n'.format(e))
+            sys.exit(1)
+        for line in p.stdout:
+            if samout:
+                sam_file.write(line)
+
+            o = read_and_split_line(line)
+            if not o[0].startswith('@'):
+                if not o[2].endswith('*'):
+                    if (hex(int(o[1]) & 0x100) == '0x0'): #no secondary
+                        if mapq_filter(o[2], int(o[4]), min_mapq_val) :  # filter low mapq reads
+                            if ((min_alignment_len is None) or
+                                    (max([int(x.strip('M')) for x in re.findall(r'(\d*M)', o[5]) if x]) >= min_alignment_len)):
+                                outf.write(lmybytes("\t".join([ o[0], o[2].split('/')[0] ]) + "\n"))
+
+        if samout:  
+            sam_file.close()
+
+        p.communicate()
+        read_fastx_stderr = readin.stderr.readlines()
+        nreads = None
+        avg_read_length = None
+        try:
+            nreads, avg_read_length = list(map(float, read_fastx_stderr[0].decode().split()))
+            if not nreads:
+                sys.stderr.write('Fatal error running MetaPhlAn. Total metagenome size was not estimated.\nPlease check your input files.\n')
+                sys.exit(1)
+            if not avg_read_length:
+                sys.stderr.write('Fatal error running MetaPhlAn. The average read length was not estimated.\nPlease check your input files.\n')
+                sys.exit(1)
+            outf.write(lmybytes('#nreads\t{}\n'.format(int(nreads))))
+            outf.write(lmybytes('#avg_read_length\t{}'.format(avg_read_length)))
+            outf.close()
+        except ValueError:
+            sys.stderr.write(b''.join(read_fastx_stderr).decode())
+            outf.close()
+            os.unlink(outfmt6_out)
+            sys.exit(1)
+
+    except OSError as e:
+        sys.stderr.write('OSError: "{}"\nFatal error running BowTie2.\n'.format(e))
+        sys.exit(1)
+    except IOError as e:
+        sys.stderr.write('IOError: "{}"\nFatal error running BowTie2.\n'.format(e))
+        sys.exit(1)
+
+    if p.returncode == 13:
+        sys.stderr.write("Permission Denied Error: fatal error running BowTie2."
+                         "Is the BowTie2 file in the path with execution and read permissions?\n")
+        sys.exit(1)
+    elif p.returncode != 0:
+        sys.stderr.write("Error while running bowtie2.\n")
+        sys.exit(1)
+
+class TaxClade:
+    min_cu_len = -1
+    markers2lens = None
+    stat = None
+    perc_nonzero = None
+    quantile = None
+    avoid_disqm = False
+    avg_read_length = 1
+
+    def __init__( self, name, tax_id, uncl = False):
+        self.children, self.markers2nreads = {}, {}
+        self.name, self.father = name, None
+        self.uncl, self.subcl_uncl = uncl, False
+        self.abundance, self.uncl_abundance = None, 0
+        self.nreads, self.uncl_nreads = 0, 0
+        self.tax_id = tax_id
+
+    def add_child( self, name, tax_id ):
+        new_clade = TaxClade( name, tax_id )
+        self.children[name] = new_clade
+        new_clade.father = self
+        return new_clade
+
+
+    def get_terminals( self ):
+        terms = []
+        if not self.children:
+            return [self]
+        for c in self.children.values():
+            terms += c.get_terminals()
+        return terms
+
+    def get_full_taxids( self ):
+        fullname = ['']
+        if self.tax_id:
+            fullname = [self.tax_id]
+        cl = self.father
+        while cl:
+            fullname = [cl.tax_id] + fullname
+            cl = cl.father
+        return "|".join(fullname[1:])
+
+    def get_full_name( self ):
+        fullname = [self.name]
+        cl = self.father
+        while cl:
+            fullname = [cl.name] + fullname
+            cl = cl.father
+        return "|".join(fullname[1:])
+
+    def get_normalized_counts( self ):
+        return [(m,float(n)*1000.0/(np.absolute(self.markers2lens[m] - self.avg_read_length) +1) )
+                    for m,n in self.markers2nreads.items()]
+
+    def compute_mapped_reads( self ):    
+        tax_level = 't__' if SGB_ANALYSIS else 's__'
+        if self.nreads != 0 or self.name.startswith(tax_level):
+            return self.nreads
+        for c in self.children.values():
+            self.nreads += c.compute_mapped_reads()
+        return self.nreads
+        
+    def compute_abundance( self ):
+        if self.abundance is not None: return self.abundance
+
+        sum_ab = sum([c.compute_abundance() for c in self.children.values()])
+
+        # rat_nreads = sorted([(self.markers2lens[marker], n_reads)
+        #                             for marker,n_reads in self.markers2nreads.items()],
+        #                                     key = lambda x: x[1])
+
+        rat_nreads, removed = [], []
+        for marker, n_reads in sorted(self.markers2nreads.items(),key=lambda x:x[0]):
+            misidentified = False
+
+            if not self.avoid_disqm:
+                for ext in self.markers2exts[marker]:
+                    ext_clade = self.taxa2clades[ext]
+                    m2nr = ext_clade.markers2nreads
+                    
+                    tocladetmp = ext_clade
+                    while len(tocladetmp.children) == 1:
+                        tocladetmp = list(tocladetmp.children.values())[0]
+                        m2nr = tocladetmp.markers2nreads
+
+                    nonzeros = sum([v>0 for v in m2nr.values()])
+                    if len(m2nr):
+                        if float(nonzeros) / len(m2nr) > self.perc_nonzero:
+                            misidentified = True
+                            removed.append( (self.markers2lens[marker],n_reads) )
+                            break
+            if not misidentified:
+                rat_nreads.append( (self.markers2lens[marker],n_reads) )
+
+        if not self.avoid_disqm and len(removed):
+            n_rat_nreads = float(len(rat_nreads))
+            n_removed = float(len(removed))
+            n_tot = n_rat_nreads + n_removed
+            n_ripr = 10
+
+            if len(self.get_terminals()) < 2:
+                n_ripr = 0
+
+            if "k__Viruses" in self.get_full_name():
+                n_ripr = 0
+
+            if n_rat_nreads < n_ripr and n_tot > n_rat_nreads:
+                rat_nreads += removed[:n_ripr-int(n_rat_nreads)]
+
+
+        rat_nreads = sorted(rat_nreads, key = lambda x: x[1])
+
+        rat_v,nreads_v = zip(*rat_nreads) if rat_nreads else ([],[])
+        rat, nrawreads, loc_ab = float(sum(rat_v)) or -1.0, sum(nreads_v), 0.0
+        quant = int(self.quantile*len(rat_nreads))
+        ql,qr,qn = (quant,-quant,quant) if quant else (None,None,0)
+
+        if not SGB_ANALYSIS and self.name[0] == 't' and (len(self.father.children) > 1 or "_sp" in self.father.name or "k__Viruses" in self.get_full_name()):
+            non_zeros = float(len([n for r,n in rat_nreads if n > 0]))
+            nreads = float(len(rat_nreads))
+            if nreads == 0.0 or non_zeros / nreads < 0.7:
+                self.abundance = 0.0
+                return 0.0
+
+        if rat < 0.0:
+            pass
+        elif self.stat == 'avg_g' or (not qn and self.stat in ['wavg_g','tavg_g']):
+            loc_ab = nrawreads / rat if rat >= 0 else 0.0
+        elif self.stat == 'avg_l' or (not qn and self.stat in ['wavg_l','tavg_l']):
+            loc_ab = np.mean([float(n)/(np.absolute(r - self.avg_read_length) + 1) for r,n in rat_nreads])
+        elif self.stat == 'tavg_g':
+            wnreads = sorted([(float(n)/(np.absolute(r-self.avg_read_length)+1),(np.absolute(r - self.avg_read_length)+1) ,n) for r,n in rat_nreads], key=lambda x:x[0])
+            den,num = zip(*[v[1:] for v in wnreads[ql:qr]])
+            loc_ab = float(sum(num))/float(sum(den)) if any(den) else 0.0
+        elif self.stat == 'tavg_l':
+            loc_ab = np.mean(sorted([float(n)/(np.absolute(r - self.avg_read_length) + 1) for r,n in rat_nreads])[ql:qr])
+        elif self.stat == 'wavg_g':
+            vmin, vmax = nreads_v[ql], nreads_v[qr]
+            wnreads = [vmin]*qn+list(nreads_v[ql:qr])+[vmax]*qn
+            loc_ab = float(sum(wnreads)) / rat
+        elif self.stat == 'wavg_l':
+            wnreads = sorted([float(n)/(np.absolute(r - self.avg_read_length) + 1) for r,n in rat_nreads])
+            vmin, vmax = wnreads[ql], wnreads[qr]
+            wnreads = [vmin]*qn+list(wnreads[ql:qr])+[vmax]*qn
+            loc_ab = np.mean(wnreads)
+        elif self.stat == 'med':
+            loc_ab = np.median(sorted([float(n)/(np.absolute(r - self.avg_read_length) +1) for r,n in rat_nreads])[ql:qr])
+
+        self.abundance = loc_ab
+        if rat < self.min_cu_len and self.children:
+            self.abundance = sum_ab
+        elif loc_ab < sum_ab:
+            self.abundance = sum_ab
+
+        if self.abundance > sum_ab and self.children: # *1.1??
+            self.uncl_abundance = self.abundance - sum_ab
+        self.subcl_uncl = not self.children and self.name[0] not in tax_units[-2:]
+
+        return self.abundance
+
+    def get_all_abundances( self ):
+        ret = [(self.name, self.tax_id, self.abundance)]
+        if self.uncl_abundance > 0.0:
+            lchild = list(self.children.values())[0].name[:3]
+            ret += [(lchild+self.name[3:]+"_unclassified", "", self.uncl_abundance)]
+        if self.subcl_uncl and self.name[0] != tax_units[-2]:
+            cind = tax_units.index( self.name[0] )
+            ret += [(   tax_units[cind+1]+self.name[1:]+"_unclassified","",
+                        self.abundance)]
+        for c in self.children.values():
+            ret += c.get_all_abundances()
+        return ret
+
+class TaxTree:
+    def __init__( self, mpa, markers_to_ignore = None ): #, min_cu_len ):
+        self.root = TaxClade( "root", 0)
+        self.all_clades, self.markers2lens, self.markers2clades, self.taxa2clades, self.markers2exts = {}, {}, {}, {}, {}
+        TaxClade.markers2lens = self.markers2lens
+        TaxClade.markers2exts = self.markers2exts
+        TaxClade.taxa2clades = self.taxa2clades
+        self.avg_read_length = 1
+
+        for clade, value in mpa['taxonomy'].items():
+            clade = clade.strip().split("|")
+            if isinstance(value,tuple):
+                taxids, lenc = value
+                taxids = taxids.strip().split("|")
+            if isinstance(value,int):
+                lenc = value
+                taxids = None
+
+            father = self.root
+            for i in range(len(clade)):
+                clade_lev = clade[i]
+                if SGB_ANALYSIS:
+                    clade_taxid = taxids[i] if i < 8 and taxids is not None else None
+                else:
+                    clade_taxid = taxids[i] if i < 7 and taxids is not None else None
+                if not clade_lev in father.children:
+                    father.add_child(clade_lev, tax_id=clade_taxid)
+                    self.all_clades[clade_lev] = father.children[clade_lev]
+                if SGB_ANALYSIS: father = father.children[clade_lev]
+                if clade_lev[0] == "t":
+                    self.taxa2clades[clade_lev[3:]] = father
+                if not SGB_ANALYSIS: father = father.children[clade_lev]
+                if clade_lev[0] == "t":
+                    father.glen = lenc
+
+        def add_lens( node ):
+            if not node.children:
+                return node.glen
+            lens = []
+            for c in node.children.values():
+                lens.append( add_lens( c ) )
+            node.glen = min(np.mean(lens), np.median(lens))
+            return node.glen
+        
+        add_lens(self.root)
+
+        # for k,p in mpa_pkl['markers'].items():
+        for k, p in mpa['markers'].items():
+            if k in markers_to_ignore:
+                continue
+            self.markers2lens[k] = p['len']
+            self.markers2clades[k] = p['clade']
+            self.add_reads(k, 0)
+            self.markers2exts[k] = p['ext']
+
+    def set_min_cu_len( self, min_cu_len ):
+        TaxClade.min_cu_len = min_cu_len
+
+    def set_stat( self, stat, quantile, perc_nonzero, avg_read_length, avoid_disqm = False):
+        TaxClade.stat = stat
+        TaxClade.perc_nonzero = perc_nonzero
+        TaxClade.quantile = quantile
+        TaxClade.avoid_disqm = avoid_disqm
+        TaxClade.avg_read_length = avg_read_length
+
+    def add_reads(  self, marker, n,
+                    add_viruses = False,
+                    ignore_eukaryotes = False,
+                    ignore_bacteria = False, ignore_archaea = False, 
+                    ignore_ksgbs = False, ignore_usgbs = False  ):
+        clade = self.markers2clades[marker]
+        cl = self.all_clades[clade]
+        if ignore_bacteria or ignore_archaea or ignore_eukaryotes:
+            cn = cl.get_full_name()
+            if ignore_archaea and cn.startswith("k__Archaea"):
+                return (None, None)
+            if ignore_bacteria and cn.startswith("k__Bacteria"):
+                return (None, None)
+            if ignore_eukaryotes and cn.startswith("k__Eukaryota"):
+                return (None, None)
+        if not SGB_ANALYSIS and not add_viruses:
+            cn = cl.get_full_name()
+            if not add_viruses and cn.startswith("k__Vir"):
+                return (None, None)
+        if SGB_ANALYSIS and (ignore_ksgbs or ignore_usgbs):
+            cn = cl.get_full_name()
+            if ignore_ksgbs and not '_SGB' in cn.split('|')[-2]:
+                return (None, None)
+            if ignore_usgbs and '_SGB' in cn.split('|')[-2]:
+                return (None, None)
+        # while len(cl.children) == 1:
+            # cl = list(cl.children.values())[0]
+        cl.markers2nreads[marker] = n
+        return (cl.get_full_name(), cl.get_full_taxids(), )
+
+
+    def markers2counts( self ):
+        m2c = {}
+        for _ ,v in self.all_clades.items():
+            for m,c in v.markers2nreads.items():
+                m2c[m] = c
+        return m2c
+
+    def clade_profiles( self, tax_lev, get_all = False  ):
+        cl2pr = {}
+        for k,v in self.all_clades.items():
+            if tax_lev and not k.startswith(tax_lev):
+                continue
+            prof = v.get_normalized_counts()
+            if not get_all and ( len(prof) < 1 or not sum([p[1] for p in prof]) > 0.0 ):
+                continue
+            cl2pr[v.get_full_name()] = prof
+        return cl2pr
+
+    def relative_abundances( self, tax_lev  ):
+        clade2abundance_n = dict([(tax_label, clade) for tax_label, clade in self.all_clades.items()
+                    if tax_label.startswith("k__") and not clade.uncl])
+
+        clade2abundance, clade2est_nreads, tot_ab, tot_reads = {}, {}, 0.0, 0
+
+        for tax_label, clade in clade2abundance_n.items():
+            tot_ab += clade.compute_abundance()
+
+        for tax_label, clade in clade2abundance_n.items():
+            for clade_label, tax_id, abundance in sorted(clade.get_all_abundances(), key=lambda pars:pars[0]):
+                if SGB_ANALYSIS or clade_label[:3] != 't__':
+                    if not tax_lev:
+                        if clade_label not in self.all_clades:
+                            to = tax_units.index(clade_label[0])
+                            t = tax_units[to-1]
+                            clade_label = t + clade_label.split("_unclassified")[0][1:]
+                            tax_id = self.all_clades[clade_label].get_full_taxids()
+                            clade_label = self.all_clades[clade_label].get_full_name()
+                            spl = clade_label.split("|")
+                            clade_label = "|".join(spl+[tax_units[to]+spl[-1][1:]+"_unclassified"])
+                            glen = self.all_clades[spl[-1]].glen
+                        else:
+                            glen = self.all_clades[clade_label].glen
+                            tax_id = self.all_clades[clade_label].get_full_taxids()
+                            tax_level = 't__' if SGB_ANALYSIS else 's__' 
+                            if tax_level in clade_label and abundance > 0:
+                                self.all_clades[clade_label].nreads = int(np.floor(abundance*glen))
+
+                            clade_label = self.all_clades[clade_label].get_full_name()
+                    elif not clade_label.startswith(tax_lev):
+                        if clade_label in self.all_clades:
+                            glen = self.all_clades[clade_label].glen
+                        else:
+                            glen = 1.0
+                        continue
+                    clade2abundance[(clade_label, tax_id)] = abundance
+        
+        for tax_label, clade in clade2abundance_n.items():
+            tot_reads += clade.compute_mapped_reads()
+
+        for clade_label, clade in self.all_clades.items():
+            if SGB_ANALYSIS or clade.name[:3] != 't__':
+                nreads = clade.nreads
+                clade_label = clade.get_full_name()
+                tax_id = clade.get_full_taxids()
+                clade2est_nreads[(clade_label, tax_id)] = nreads
+
+        ret_d = dict([( tax, float(abundance) / tot_ab if tot_ab else 0.0) for tax, abundance in clade2abundance.items()])
+
+        ret_r = dict([( tax, (abundance, clade2est_nreads[tax] )) for tax, abundance in clade2abundance.items() if tax in clade2est_nreads])
+
+        if tax_lev:
+            ret_d[("UNCLASSIFIED", '-1')] = 1.0 - sum(ret_d.values())
+        return ret_d, ret_r, tot_reads
+
+def mapq_filter(marker_name, mapq_value, min_mapq_val):
+    if 'GeneID:' in marker_name:
+        return True
+    else:
+        if mapq_value > min_mapq_val:
+            return True
+    return False
+
+
+def separate_reads2markers(reads2markers):
+    if not SGB_ANALYSIS:
+        return reads2markers, {}
+    else:
+        return {r: m for r, m in reads2markers.items() if ('SGB' in m or 'EUK' in m) and not 'VDB' in m}, {r: m for r, m in reads2markers.items() if 'VDB' in m and not ('SGB' in m or 'EUK' in m)}
+
+def map2bbh(mapping_f, min_mapq_val, input_type='bowtie2out', min_alignment_len=None, nreads=None, subsampling=None, subsampling_seed='1992'):
+    if not mapping_f:
+        ras, ras_line, inpf = plain_read_and_split, plain_read_and_split_line, sys.stdin
+    else:
+        if mapping_f.endswith(".bz2"):
+            ras, ras_line, inpf = read_and_split, read_and_split_line, bz2.BZ2File(mapping_f, "r")
+        else:
+            ras, ras_line, inpf = plain_read_and_split, plain_read_and_split_line, open(mapping_f)
+
+    reads2markers = {}
+    n_metagenome_reads = None
+    avg_read_length = 1 #Set to 1 if it is not calculated from read_fastx
+
+    if input_type == 'bowtie2out':
+        for r, c in ras(inpf):
+            if r.startswith('#') and 'nreads' in r:
+                n_metagenome_reads = int(c)
+            if r.startswith('#') and 'avg_read_length' in r:
+                avg_read_length = float(c)
+            else:
+                reads2markers[r] = c
+    elif input_type == 'sam':
+        n_metagenome_reads = nreads 
+        for line in inpf:
+            o = ras_line(line)
+            if ((o[0][0] != '@') and #no header
+                (o[2][-1] != '*') and # no unmapped reads
+                (hex(int(o[1]) & 0x100) == '0x0') and #no secondary
+                mapq_filter(o[2], int(o[4]), min_mapq_val) and # filter low mapq reads
+                ( (min_alignment_len is None) or ( max(int(x.strip('M')) for x in re.findall(r'(\d*M)', o[5]) if x) >= min_alignment_len ) )
+            ):
+                    reads2markers[o[0]] = o[2].split('/')[0]
+    inpf.close()
+    
+    if subsampling != None:
+        if subsampling >= n_metagenome_reads:
+            sys.stderr.write("WARNING: The specified subsampling ({}) is higher than the original number of reads ({}).".format(subsampling, n_metagenome_reads))
+        elif subsampling < 10000:
+            sys.stderr.write("WARNING: The specified subsampling ({}) is below the recommended minimum of 10,000 reads.".format(subsampling))
+        else:
+            reads2markers =  dict(sorted(reads2markers.items()))
+            if subsampling_seed.lower() != 'random':
+                random.seed(int(subsampling_seed))
+            reads2filtmarkers = {}
+            sgb_reads2markers, viral_reads2markers = separate_reads2markers(reads2markers)            
+            n_sgb_mapped_reads = int((len(sgb_reads2markers) * subsampling) / n_metagenome_reads)
+            reads2filtmarkers = { r:sgb_reads2markers[r] for r in random.sample(list(sgb_reads2markers.keys()), n_sgb_mapped_reads) }     
+            if SGB_ANALYSIS:       
+                n_viral_mapped_reads = int((len(viral_reads2markers) * subsampling) / n_metagenome_reads)
+                reads2filtmarkers.update({ r:viral_reads2markers[r] for r in random.sample(list(viral_reads2markers.keys()), n_viral_mapped_reads) })            
+            reads2markers = reads2filtmarkers
+            sgb_reads2markers.clear()
+            viral_reads2markers.clear()
+            n_metagenome_reads = subsampling
+    elif n_metagenome_reads < 10000:
+        sys.stderr.write("WARNING: The number of reads in the sample ({}) is below the recommended minimum of 10,000 reads.".format(subsampling))
+        
+    markers2reads = defdict(set)   
+    for r, m in reads2markers.items():
+        markers2reads[m].add(r)
+
+    return (markers2reads, n_metagenome_reads, avg_read_length)
+
+def maybe_generate_biom_file(tree, pars, abundance_predictions):
+    json_key = "MetaPhlAn"
+
+    if not pars['biom']:
+        return None
+    if not abundance_predictions:
+        biom_table = biom.Table([], [], [])  # create empty BIOM table
+
+        with open(pars['biom'], 'w') as outfile:
+            biom_table.to_json(json_key, direct_io=outfile)
+
+        return True
+
+    delimiter = "|" if len(pars['mdelim']) > 1 else pars['mdelim']
+
+    def istip(clade_name):
+        end_name = clade_name.split(delimiter)[-1]
+        return end_name.startswith("s__") or end_name.endswith("_unclassified")
+
+    def findclade(clade_name):
+        if clade_name.endswith('_unclassified'):
+            name = clade_name.split(delimiter)[-2]
+        else:
+            name = clade_name.split(delimiter)[-1]
+        return tree.all_clades[name]
+
+    def to_biomformat(clade_name):
+        return {'taxonomy': clade_name.split(delimiter)}
+
+    clades = iter((abundance, findclade(name))
+                  for (name, taxid, abundance) in abundance_predictions if istip(name))
+    packed = iter(([abundance], clade.get_full_name(), clade.tax_id)
+                  for (abundance, clade) in clades)
+
+    # unpack that tuple here to stay under 80 chars on a line
+    data, clade_names, _ = zip(*packed)
+    # biom likes column vectors, so we give it an array like this:
+    # np.array([a],[b],[c])
+    data = np.array(data)
+    sample_ids = [pars['sample_id']]
+    table_id = 'MetaPhlAn_Analysis'
+
+
+
+
+    #**********************************************************************************************
+    #  Modification of Code :                                                                     *
+    #  Modified the code so instead of using the current clade IDs, which are numbers, we will    *
+    #      use the clade_names                                                                    *
+    #      Users reported the biom output is invalid and also the IDs were changing from run to   *
+    #      run.                                                                                   *
+    #  George Weingart    05/22/2017   george.weingart at mail.com                                   *
+    #**********************************************************************************************
+    if LooseVersion(biom.__version__) < LooseVersion("2.0.0"):
+        biom_table = biom.table.table_factory(
+            data,
+        sample_ids,
+            ######## clade_ids,     #Modified by George Weingart 5/22/2017 - We will use instead the clade_names
+            clade_names,            #Modified by George Weingart 5/22/2017 - We will use instead the clade_names
+            sample_metadata      = None,
+            observation_metadata = list(map(to_biomformat, clade_names)),
+            table_id             = table_id,
+            constructor          = biom.table.DenseOTUTable
+        )
+        with open(pars['biom'], 'w') as outfile:
+            json.dump( biom_table.getBiomFormatObject(json_key),
+                           outfile )
+    else:  # Below is the biom2 compatible code
+        biom_table = biom.table.Table(
+            data,
+            #clade_ids,           #Modified by George Weingart 5/22/2017 - We will use instead the clade_names
+            clade_names,          #Modified by George Weingart 5/22/2017 - We will use instead the clade_names
+            sample_ids,
+            sample_metadata      = None,
+            observation_metadata = list(map(to_biomformat, clade_names)),
+            table_id             = table_id,
+            input_is_dense       = True
+        )
+
+        with open(pars['biom'], 'w') as outfile:
+            biom_table.to_json( json_key,
+                                direct_io = outfile )
+
+    return True
+
+def main():
+    ranks2code = { 'k' : 'superkingdom', 'p' : 'phylum', 'c':'class',
+                   'o' : 'order', 'f' : 'family', 'g' : 'genus', 's' : 'species'}
+    pars = read_params(sys.argv)
+
+    #Set SGB- / species- analysis
+    global SGB_ANALYSIS
+    SGB_ANALYSIS = not pars['mpa3']
+
+    ESTIMATE_UNK = pars['unclassified_estimation']
+    
+    if not (pars['subsampling_seed'].lower() == 'random' or pars['subsampling_seed'].isdigit()):
+        sys.stderr.write("Error: The --subsampling_seed parameter is not accepted. It should contain an integer number or \"random\". Exiting...\n\n")
+        sys.exit(1)
+    
+
+    # check if the database is installed, if not then install
+    pars['index'] = check_and_install_database(pars['index'], pars['bowtie2db'], pars['bowtie2_build'], pars['nproc'], pars['force_download'], pars['offline'])
+
+    if pars['install']:
+        sys.stderr.write('The database is installed\n')
+        return
+
+    # set correct map_pkl and bowtie2db variables
+    pars['mpa_pkl'], pars['bowtie2db'] = set_mapping_arguments(pars['index'], pars['bowtie2db'])
+
+    if (pars['bt2_ps'] in ["sensitive-local", "very-sensitive-local"]) and (pars['min_alignment_len'] is None):
+            pars['min_alignment_len'] = 100
+            sys.stderr.write('Warning! bt2_ps is set to local mode, and min_alignment_len is None, I automatically '
+                             'set min_alignment_len to 100! If you do not like, rerun the command and set '
+                             'min_alignment_len to a specific value.\n')
+
+    # check for the mpa_pkl file
+    if not os.path.isfile(pars['mpa_pkl']):
+        sys.stderr.write("Error: Unable to find the mpa_pkl file at: " + pars['mpa_pkl'] +
+                         "Exiting...\n\n")
+        sys.exit(1)
+
+    if pars['ignore_markers']:
+        with open(pars['ignore_markers']) as ignv:
+            ignore_markers = set([l.strip() for l in ignv])
+    else:
+        ignore_markers = set()
+
+    no_map = False
+    if pars['input_type'] == 'fasta' or pars['input_type'] == 'fastq':
+        bow = pars['bowtie2db'] is not None
+
+        if not bow:
+            sys.stderr.write( "No MetaPhlAn BowTie2 database provided\n "
+                              "[--bowtie2db and --index options]!\n"
+                              "Exiting...\n\n" )
+            sys.exit(1)
+
+        if pars['no_map']:
+            pars['bowtie2out'] = tf.NamedTemporaryFile(dir=pars['tmp_dir']).name
+            no_map = True
+        else:
+            if bow and not pars['bowtie2out']:
+                if pars['inp'] and "," in  pars['inp']:
+                    sys.stderr.write("Error! --bowtie2out needs to be specified when multiple "
+                                     "FASTQ or FASTA files (comma separated) are provided\n")
+                    sys.exit(1)
+                fname = pars['inp']
+                if fname is None:
+                    fname = "stdin_map"
+                elif stat.S_ISFIFO(os.stat(fname).st_mode):
+                    fname = "fifo_map"
+                pars['bowtie2out'] = fname + ".bowtie2out.txt"
+
+            if os.path.exists( pars['bowtie2out'] ) and not pars['force']:
+                sys.stderr.write(
+                    "BowTie2 output file detected: " + pars['bowtie2out'] + "\n"
+                    "Please use it as input or remove it if you want to "
+                    "re-perform the BowTie2 run.\n"
+                    "Exiting...\n\n" )
+                sys.exit(1)
+            if pars['force']:
+                if os.path.exists(pars['bowtie2out']):
+                    os.remove( pars['bowtie2out'] )
+
+        bt2_ext = 'bt2l' if SGB_ANALYSIS else 'bt2'            
+        if bow and not all([os.path.exists(".".join([str(pars['bowtie2db']), p]))
+                            for p in ["1." + bt2_ext, "2." + bt2_ext, "3." + bt2_ext, "4." + bt2_ext, "rev.1." + bt2_ext, "rev.2." + bt2_ext]]):
+            sys.stderr.write("No MetaPhlAn BowTie2 database found (--index "
+                             "option)!\nExpecting location {}\nExiting..."
+                             .format(pars['bowtie2db']))
+            sys.exit(1)
+        if bow and not (abs(os.path.getsize(".".join([str(pars['bowtie2db']), "1." + bt2_ext])) - os.path.getsize(".".join([str(pars['bowtie2db']), "rev.1." + bt2_ext]))) <= 1000):
+            sys.stderr.write("Partial MetaPhlAn BowTie2 database found at {}. "
+                             "Please remove and rebuild the database.\nExiting..."
+                             .format(pars['bowtie2db']))
+            sys.exit(1)
+
+        if bow:
+            run_bowtie2(pars['inp'], pars['bowtie2out'], pars['bowtie2db'],
+                                pars['bt2_ps'], pars['nproc'], file_format=pars['input_type'],
+                                exe=pars['bowtie2_exe'], samout=pars['samout'],
+                                min_alignment_len=pars['min_alignment_len'], read_min_len=pars['read_min_len'], min_mapq_val=pars['min_mapq_val'])
+            pars['input_type'] = 'bowtie2out'
+        pars['inp'] = pars['bowtie2out'] # !!!
+    with bz2.BZ2File( pars['mpa_pkl'], 'r' ) as a:
+        mpa_pkl = pickle.load( a )
+
+    REPORT_MERGED = mpa_pkl.get('merged_taxon',False)
+    tree = TaxTree( mpa_pkl, ignore_markers )
+    tree.set_min_cu_len( pars['min_cu_len'] )
+
+    if pars['input_type'] == 'sam' and not pars['nreads']:
+        sys.stderr.write(
+                "Please provide the size of the metagenome using the "
+                "--nreads parameter when running MetaPhlAn using SAM files as input"
+                "\nExiting...\n\n" )
+        sys.exit(1)
+
+    markers2reads, n_metagenome_reads, avg_read_length = map2bbh(pars['inp'], pars['min_mapq_val'], pars['input_type'], pars['min_alignment_len'], pars['nreads'], pars['subsampling'], pars['subsampling_seed'])
+
+    tree.set_stat( pars['stat'], pars['stat_q'], pars['perc_nonzero'], avg_read_length, pars['avoid_disqm'])
+
+    if no_map:
+        os.remove( pars['inp'] )
+
+    map_out = []
+    for marker,reads in sorted(markers2reads.items(), key=lambda pars: pars[0]):
+        if marker not in tree.markers2lens:
+            continue
+        tax_seq, ids_seq = tree.add_reads( marker, len(reads),
+                                  add_viruses = pars['add_viruses'],
+                                  ignore_eukaryotes = pars['ignore_eukaryotes'],
+                                  ignore_bacteria = pars['ignore_bacteria'],
+                                  ignore_archaea = pars['ignore_archaea'],
+                                  ignore_ksgbs = pars['ignore_ksgbs'],
+                                  ignore_usgbs = pars['ignore_usgbs']
+                                  )
+        if tax_seq:
+            map_out +=["\t".join([r,tax_seq, ids_seq]) for r in sorted(reads)]
+
+    if pars['output'] is None and pars['output_file'] is not None:
+        pars['output'] = pars['output_file']
+
+    out_stream = open(pars['output'],"w") if pars['output'] else sys.stdout
+    MPA2_OUTPUT = pars['legacy_output']
+    CAMI_OUTPUT = pars['CAMI_format_output']
+
+    with out_stream as outf:
+        if not MPA2_OUTPUT:
+            outf.write('#{}\n'.format(pars['index']))
+            outf.write('#{}\n'.format(' '.join(sys.argv)))
+            outf.write('#{} reads processed\n'.format(n_metagenome_reads))
+
+        if not CAMI_OUTPUT:
+            outf.write('#' + '\t'.join((pars["sample_id_key"], pars["sample_id"])) + '\n')
+
+        if ESTIMATE_UNK:
+            mapped_reads = 0
+            cl2pr = tree.clade_profiles( pars['tax_lev']+"__" if pars['tax_lev'] != 'a' else None  )
+            cl2ab, _, _ = tree.relative_abundances( pars['tax_lev']+"__" if pars['tax_lev'] != 'a' else None )
+            confident_taxa = [taxstr for (taxstr, _),relab in cl2ab.items() if relab > 0.0]
+            for c, m in cl2pr.items():
+                if c in confident_taxa:
+                    markers_cov = [a  / 1000 for _, a in m if a > 0]
+                    mapped_reads += np.mean(markers_cov) * tree.all_clades[c.split('|')[-1]].glen
+            # If the mapped reads are over-estimated, set the ratio at 1
+            fraction_mapped_reads = min(mapped_reads/float(n_metagenome_reads), 1.0)
+        else:
+            fraction_mapped_reads = 1.0
+      
+        if pars['t'] == 'reads_map':
+            if not MPA2_OUTPUT:
+               outf.write('#read_id\tNCBI_taxlineage_str\tNCBI_taxlineage_ids\n')
+            outf.write( "\n".join( map_out ) + "\n" )
+
+        elif pars['t'] == 'rel_ab':
+            if CAMI_OUTPUT:
+                outf.write('''@SampleID:{}\n at Version:0.10.0\n at Ranks:superkingdom|phylum|class|order|family|genus|species|strain\n@@TAXID\tRANK\tTAXPATH\tTAXPATHSN\tPERCENTAGE\n'''.format(pars["sample_id"]))
+            if not MPA2_OUTPUT and not CAMI_OUTPUT:
+                if not pars['use_group_representative']:
+                    outf.write('#clade_name\tNCBI_tax_id\trelative_abundance\tadditional_species\n')
+                else:
+                    outf.write('#clade_name\tNCBI_tax_id\trelative_abundance\n')
+
+            cl2ab, _, tot_nreads = tree.relative_abundances(
+                        pars['tax_lev']+"__" if pars['tax_lev'] != 'a' else None )
+            
+            outpred = [(taxstr, taxid,round(relab*100.0,5)) for (taxstr, taxid), relab in cl2ab.items() if relab > 0.0]
+            has_repr = False
+            
+            if outpred:
+                if CAMI_OUTPUT:
+                    for clade, taxid, relab in sorted(  outpred, reverse=True,
+                                        key=lambda x:x[2]+(100.0*(8-(x[0].count("|"))))):
+                        if taxid and clade.split('|')[-1][0] != 't':
+                            rank = ranks2code[clade.split('|')[-1][0]]
+                            leaf_taxid = taxid.split('|')[-1]
+                            taxpathsh = '|'.join([remove_prefix(name) if '_unclassified' not in name else '' for name in clade.split('|')])
+                            outf.write( '\t'.join( [ leaf_taxid, rank, taxid, taxpathsh, str(relab*fraction_mapped_reads) ] ) + '\n' )
+                else:
+                    if ESTIMATE_UNK:
+                        outf.write( "\t".join( [    "UNCLASSIFIED",
+                                                    "-1",
+                                                    str(round((1-fraction_mapped_reads)*100,5)),""]) + "\n" )
+                                                    
+                    for clade, taxid, relab in sorted(  outpred, reverse=True,
+                                        key=lambda x:x[2]+(100.0*(8-(x[0].count("|"))))):
+                        add_repr = ''
+                        if REPORT_MERGED and (clade, taxid) in mpa_pkl['merged_taxon']:
+                            if pars['use_group_representative'] and not SGB_ANALYSIS:
+                                if '_group' in clade:
+                                    clade, taxid, _ = sorted(mpa_pkl['merged_taxon'][(clade, taxid)], key=lambda x:x[2], reverse=True)[0]
+                            elif not pars['use_group_representative']:
+                                add_repr = '{}'.format(','.join( [ n[0] for n in mpa_pkl['merged_taxon'][(clade, taxid)]] ))
+                                has_repr = True
+                        if not MPA2_OUTPUT:
+                            outf.write( "\t".join( [clade, 
+                                                    taxid, 
+                                                    str(relab*fraction_mapped_reads), 
+                                                    add_repr
+                                                ] ) + "\n" )
+                        else:
+                            outf.write( "\t".join( [clade, 
+                                                    str(relab*fraction_mapped_reads)] ) + "\n" )
+                if REPORT_MERGED and has_repr:
+                    sys.stderr.write("WARNING: The metagenome profile contains clades that represent multiple species merged into a single representant.\n"
+                                     "An additional column listing the merged species is added to the MetaPhlAn output.\n"
+                                    )
+            else:
+                if not MPA2_OUTPUT:
+                    outf.write( "UNCLASSIFIED\t-1\t100.0\t\n" )
+                else:
+                    outf.write( "UNCLASSIFIED\t100.0\n" )
+                sys.stderr.write("WARNING: MetaPhlAn did not detect any microbial taxa in the sample.\n")
+            maybe_generate_biom_file(tree, pars, outpred)
+
+        elif pars['t'] == 'rel_ab_w_read_stats':
+            cl2ab, rr, tot_nreads = tree.relative_abundances(
+                        pars['tax_lev']+"__" if pars['tax_lev'] != 'a' else None )
+
+            unmapped_reads = max(n_metagenome_reads - tot_nreads, 0)
+
+            outpred = [(taxstr, taxid,round(relab*100.0*fraction_mapped_reads,5)) for (taxstr, taxid),relab in cl2ab.items() if relab > 0.0]
+
+            if outpred:
+                outf.write( "#estimated_reads_mapped_to_known_clades:{}\n".format(round(tot_nreads)) )
+                outf.write( "\t".join( [    "#clade_name",
+                                            "clade_taxid",
+                                            "relative_abundance",
+                                            "coverage",
+                                            "estimated_number_of_reads_from_the_clade" ]) +"\n" )
+                if ESTIMATE_UNK:
+                    outf.write( "\t".join( [    "UNCLASSIFIED",
+                                                "-1",
+                                                str(round((1-fraction_mapped_reads)*100,5)),
+                                                "-",
+                                                str(round(unmapped_reads)) ]) + "\n" )
+                                                
+                for taxstr, taxid, relab in sorted(  outpred, reverse=True,
+                                    key=lambda x:x[2]+(100.0*(8-(x[0].count("|"))))):
+                    outf.write( "\t".join( [    taxstr,
+                                                taxid,
+                                                str(relab),
+                                                str(round(rr[(taxstr, taxid)][0],5)) if (taxstr, taxid) in rr else '-',          #coverage
+                                                str( int( round( rr[(taxstr, taxid)][1], 0) )  if (taxstr, taxid) in rr else '-')       #estimated_number_of_reads_from_the_clade
+                                                ] ) + "\n" )
+            else:
+                if not MPA2_OUTPUT:
+                    outf.write( "#estimated_reads_mapped_to_known_clades:0\n")
+                    outf.write( "\t".join( [    "#clade_name",
+                                                "clade_taxid",
+                                                "relative_abundance",
+                                                "coverage",
+                                                "estimated_number_of_reads_from_the_clade" ]) +"\n" )
+                    outf.write( "unclassified\t-1\t100.0\t0\t0\n" )
+                else:
+                    outf.write( "unclassified\t100.0\n" )
+            maybe_generate_biom_file(tree, pars, outpred)
+
+        elif pars['t'] == 'clade_profiles':
+            cl2pr = tree.clade_profiles( pars['tax_lev']+"__" if pars['tax_lev'] != 'a' else None  )
+            for c,p in cl2pr.items():
+                mn,n = zip(*p)
+                outf.write( "\t".join( [""]+[str(s) for s in mn] ) + "\n" )
+                outf.write( "\t".join( [c]+[str(s) for s in n] ) + "\n" )
+
+        elif pars['t'] == 'marker_ab_table':
+            cl2pr = tree.clade_profiles( pars['tax_lev']+"__" if pars['tax_lev'] != 'a' else None  )
+            for v in cl2pr.values():
+                outf.write( "\n".join(["\t".join([str(a),str(b/float(pars['nreads'])) if pars['nreads'] else str(b)])
+                                for a,b in v if b > 0.0]) + "\n" )
+
+        elif pars['t'] == 'marker_pres_table':
+            cl2pr = tree.clade_profiles( pars['tax_lev']+"__" if pars['tax_lev'] != 'a' else None  )
+            for v in cl2pr.values():
+                strout = ["\t".join([str(a),"1"]) for a,b in v if b > pars['pres_th']]
+                if strout:
+                    outf.write( "\n".join(strout) + "\n" )
+
+        elif pars['t'] == 'marker_counts':
+            outf.write( "\n".join( ["\t".join([m,str(c)]) for m,c in tree.markers2counts().items() ]) +"\n" )
+
+        elif pars['t'] == 'clade_specific_strain_tracker':
+            cl2pr = tree.clade_profiles( None, get_all = True  )
+            cl2ab, _, _ = tree.relative_abundances( None )
+            strout = []
+            for (taxstr, taxid), relab in cl2ab.items():
+                clade = taxstr
+                if clade.endswith(pars['clade']) and relab*100.0 < pars['min_ab']:
+                    strout = []
+                    break
+                if pars['clade'] in clade:
+                    strout += ["\t".join([str(a),str(int(b > pars['pres_th']))]) for a,b in cl2pr[clade]]
+            if strout:
+                strout = sorted(strout,key=lambda x:x[0])
+                outf.write( "\n".join(strout) + "\n" )
+            else:
+                sys.stderr.write("Clade "+pars['clade']+" not present at an abundance >"+str(round(pars['min_ab'],2))+"%, "
+                                 "so no clade specific markers are reported\n")
+
+
+if __name__ == '__main__':
+    t0 = time.time()
+    main()
+    sys.stderr.write('Elapsed time to run MetaPhlAn: {} s\n'.format( (time.time()-t0) ) )
+


=====================================
debian/control
=====================================
@@ -24,6 +24,7 @@ Depends: ${python3:Depends},
          python3-biom-format,
          python3-msgpack,
          python3-pandas,
+         python3-packaging,
          bowtie2
 Conflicts: metaphlan2
 Provides: metaphlan2


=====================================
debian/patches/series
=====================================
@@ -0,0 +1 @@
+use-packaging-version.patch


=====================================
debian/patches/use-packaging-version.patch
=====================================
@@ -0,0 +1,22 @@
+Description: Replace deprecated distutils.version with packaging.version
+Author: Harish Chavre<harrych_1 at yahoo.com>
+--- a/metaphlan/metaphlan.py
++++ b/metaphlan/metaphlan.py
+@@ -23,7 +23,7 @@
+ import time
+ import random
+ from collections import defaultdict as defdict
+-from distutils.version import LooseVersion
++from packaging.version import parse
+ from glob import glob
+ from subprocess import DEVNULL
+ import argparse as ap
+@@ -947,7 +947,7 @@
+     #      run.                                                                                   *
+     #  George Weingart    05/22/2017   george.weingart at mail.com                                   *
+     #**********************************************************************************************
+-    if LooseVersion(biom.__version__) < LooseVersion("2.0.0"):
++    if parse(biom.__version__) < parse("2.0.0"):
+         biom_table = biom.table.table_factory(
+             data,
+         sample_ids,


=====================================
debian/tests/.pc/.quilt_patches
=====================================
@@ -0,0 +1 @@
+debian/patches


=====================================
debian/tests/.pc/.quilt_series
=====================================
@@ -0,0 +1 @@
+series


=====================================
debian/tests/.pc/.version
=====================================
@@ -0,0 +1 @@
+2


=====================================
debian/tests/control
=====================================
@@ -0,0 +1,4 @@
+Tests: run-unit-test
+Depends: @, bowtie, metaphlan
+Restrictions: allow-stderr
+


=====================================
debian/tests/data/Test.fastq
=====================================
The diff for this file was not included because it is too large.

=====================================
debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.1.bt2l
=====================================
Binary files /dev/null and b/debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.1.bt2l differ


=====================================
debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.2.bt2l
=====================================
Binary files /dev/null and b/debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.2.bt2l differ


=====================================
debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.3.bt2l
=====================================
Binary files /dev/null and b/debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.3.bt2l differ


=====================================
debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.4.bt2l
=====================================
Binary files /dev/null and b/debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.4.bt2l differ


=====================================
debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.pkl
=====================================
Binary files /dev/null and b/debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.pkl differ


=====================================
debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.rev.1.bt2l
=====================================
Binary files /dev/null and b/debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.rev.1.bt2l differ


=====================================
debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.rev.2.bt2l
=====================================
Binary files /dev/null and b/debian/tests/data/mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910.rev.2.bt2l differ


=====================================
debian/tests/run-unit-test
=====================================
@@ -0,0 +1,27 @@
+#!/bin/sh
+set -e
+
+pkg=metaphlan
+
+export LC_ALL=C.UTF-8
+if [ "${AUTOPKGTEST_TMP}" = "" ] ; then
+  AUTOPKGTEST_TMP=$(mktemp -d /tmp/${pkg}-test.XXXXXX)
+  trap "rm -rf ${AUTOPKGTEST_TMP}" 0 INT QUIT ABRT PIPE TERM
+fi
+
+cp -a "$(dirname "$0")/data/"* "${AUTOPKGTEST_TMP}/"
+
+cd "${AUTOPKGTEST_TMP}"
+
+metaphlan Test.fastq \
+    --input_type fastq \
+    --bowtie2db . \
+    --index mpa_vFeb24_CDIFF_CHOCOPhlAnSGB_20240910 \
+    -o result.txt
+
+if [ -s result.txt ]; then
+    echo "MetaPhlAn test passed"
+else
+    echo "MetaPhlAn test failed"
+    exit 1
+fi


=====================================
metaphlan/metaphlan.py
=====================================
@@ -23,7 +23,7 @@ import re
 import time
 import random
 from collections import defaultdict as defdict
-from distutils.version import LooseVersion
+from packaging.version import parse
 from glob import glob
 from subprocess import DEVNULL
 import argparse as ap
@@ -947,7 +947,7 @@ def maybe_generate_biom_file(tree, pars, abundance_predictions):
     #      run.                                                                                   *
     #  George Weingart    05/22/2017   george.weingart at mail.com                                   *
     #**********************************************************************************************
-    if LooseVersion(biom.__version__) < LooseVersion("2.0.0"):
+    if parse(biom.__version__) < parse("2.0.0"):
         biom_table = biom.table.table_factory(
             data,
         sample_ids,



View it on GitLab: https://salsa.debian.org/med-team/metaphlan/-/compare/e386ea765e93f574bf43b45044555b5d9f4a45b2...c9b7630a9521eff9b5ae59fb1e0774ce8b50de0a

-- 
View it on GitLab: https://salsa.debian.org/med-team/metaphlan/-/compare/e386ea765e93f574bf43b45044555b5d9f4a45b2...c9b7630a9521eff9b5ae59fb1e0774ce8b50de0a
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20250503/74af299b/attachment-0001.htm>