[med-svn] [Git][med-team/metaphlan2][upstream] New upstream version 2.7.6

Mon May 28 07:26:35 BST 2018

Andreas Tille pushed to branch upstream at Debian Med / metaphlan2


Commits:
51196ec9 by Andreas Tille at 2018-05-28T07:27:16+02:00
New upstream version 2.7.6
- - - - -


7 changed files:

- .hg_archival.txt
- .hgtags
- metaphlan2.py
- strainphlan_src/add_metadata_tree.py
- strainphlan_src/compute_distance_all.py
- strainphlan_src/plot_tree_ete2.py
- utils/merge_metaphlan_tables.py


Changes:

=====================================
.hg_archival.txt
=====================================

--- a/.hg_archival.txt
+++ b/.hg_archival.txt
@@ -1,4 +1,4 @@
 repo: 092c2fe2278cb7f0b18d81faeb4aab98b89dc096
-node: b2f9b3286d4be376805e3b5c26cf141ed375c605
+node: cbd7880df400b453b8beb4e62b39e4a23b5523b6
 branch: default
-tag: 2.7.5
+tag: 2.7.6


=====================================
.hgtags
=====================================
--- a/.hgtags
+++ b/.hgtags
@@ -11,3 +11,6 @@ e424931b4d94d50cf62381c79c335935415b33b9 2.5.0
 8963e486f79043c79a299f7a684e4550b0115c32 2.7.0
 d8ab9ca4244c09a7a4995042a99fbba1e3598ac0 2.7.1
 a1fe0d15320c04f69d56f1b7dd31cff972a7b8df 2.7.2
+b2f9b3286d4be376805e3b5c26cf141ed375c605 2.7.5
+847b250adbe97b9f4adc7e15f0d4bb5a66e782ec 2.7.4
+178d1aaf4ac76e5d5477833e8e614104dcd32088 2.7.3


=====================================
metaphlan2.py
=====================================
--- a/metaphlan2.py
+++ b/metaphlan2.py
@@ -16,8 +16,8 @@ from __future__ import with_statement
 __author__ = ('Nicola Segata (nicola.segata at unitn.it), '
               'Duy Tin Truong, '
               'Francesco Asnicar (f.asnicar at unitn.it)')
-__version__ = '2.7.5'
-__date__ = '6 February 2018'
+__version__ = '2.7.6'
+__date__ = '2 March 2018'
 
 
 import sys
@@ -433,7 +433,7 @@ def read_params(args):
             "\n------------------------------------------------------------------- \n \n\n"
 
 
-            "\n========== MetaPhlAn 2 strain tracking ============================ \n\n"
+            "\n========== Marker level analysis ============================ \n\n"
             "MetaPhlAn 2 introduces the capability of charachterizing organisms at the strain level using non\n"
             "aggregated marker information. Such capability comes with several slightly different flavours and \n"
             "are a way to perform strain tracking and comparison across multiple samples.\n"
@@ -499,18 +499,16 @@ def read_params(args):
     arg('--mpa_pkl', type=str, default=None,
         help="The metadata pickled MetaPhlAn file [deprecated]")
 
-    arg('--bowtie2db', metavar="METAPHLAN_BOWTIE2_DB", type=str, default=None,
+    arg('--bowtie2db', metavar="METAPHLAN_BOWTIE2_DB", type=str, default=DEFAULT_DB_FOLDER,
         help=("The BowTie2 database file of the MetaPhlAn database. Used if "
-              "--input_type is fastq, fasta, multifasta, or multifastq "
-              "[deprecated]"))
+              "--input_type is fastq, fasta, multifasta, or multifastq [default "+DEFAULT_DB_FOLDER+"]\n"))
 
     arg('-x', '--index', type=str, default='v20_m200',
-        help=("Specify the id of the database version to use. If the database "
-              "files are not found on the local MetaPhlAn2 installation they "
-              "will be automatically downloaded"))
+        help=("Specify the id of the database version to use. If the database\n"
+              "files are not found on the local MetaPhlAn2 installation they\n"
+              "will be automatically downloaded\n"))
 
-    bt2ps = ['sensitive', 'very-sensitive', 'sensitive-local',
-             'very-sensitive-local']
+    bt2ps = ['sensitive', 'very-sensitive', 'sensitive-local', 'very-sensitive-local']
     arg('--bt2_ps', metavar="BowTie2 presets", default='very-sensitive',
         choices=bt2ps, help="Presets options for BowTie2 (applied only when a "
                             "multifasta file is provided)\n"
@@ -812,29 +810,32 @@ def download_unpack_tar(url, download_file_name, folder, bowtie2_build, nproc):
             sys.stderr.write("Fatal error running '{}'\nError message: '{}'\n\n".format(' '.join(bt2_cmd), e))
             sys.exit(1)
 
+    sys.stderr.write('Removing uncompress database {}\n'.format(fna_file))
+    os.remove(fna_file)
 
-def check_and_install_database(index, bowtie2_build, nproc):
+
+def check_and_install_database(index, bowtie2_db, bowtie2_build, nproc):
     """ Check if the database is installed, if not download and install """
 
-    if len(glob(os.path.join(DEFAULT_DB_FOLDER, "mpa_{}*".format(index)))) >= 7:
+    if len(glob(os.path.join(bowtie2_db, "mpa_{}*".format(index)))) >= 7:
         return
 
     # download the tar archive and decompress
     sys.stderr.write("\nDownloading MetaPhlAn2 database\nPlease note due to "
                      "the size this might take a few minutes\n")
-    download_unpack_tar(DATABASE_DOWNLOAD, index, DEFAULT_DB_FOLDER, bowtie2_build, nproc)
+    download_unpack_tar(DATABASE_DOWNLOAD, index, bowtie2_db, bowtie2_build, nproc)
     sys.stderr.write("\nDownload complete\n")
 
 
-def set_mapping_arguments(index):
+def set_mapping_arguments(index, bowtie2_db):
     mpa_pkl = 'mpa_pkl'
     bowtie2db = 'bowtie2db'
 
-    if os.path.isfile(os.path.join(DEFAULT_DB_FOLDER, "mpa_{}.pkl".format(index))):
-        mpa_pkl = os.path.join(DEFAULT_DB_FOLDER, "mpa_{}.pkl".format(index))
+    if os.path.isfile(os.path.join(bowtie2_db, "mpa_{}.pkl".format(index))):
+        mpa_pkl = os.path.join(bowtie2_db, "mpa_{}.pkl".format(index))
 
-    if glob(os.path.join(DEFAULT_DB_FOLDER, "mpa_{}*.bt2".format(index))):
-        bowtie2db = os.path.join(DEFAULT_DB_FOLDER, "mpa_{}".format(index))
+    if glob(os.path.join(bowtie2_db, "mpa_{}*.bt2".format(index))):
+        bowtie2db = os.path.join(bowtie2_db, "mpa_{}".format(index))
 
     return (mpa_pkl, bowtie2db)
 
@@ -1354,15 +1355,14 @@ def metaphlan2():
     pars = read_params(sys.argv)
 
     # check if the database is installed, if not then install
-    check_and_install_database(pars['index'], pars['bowtie2_build'],
-                               pars['nproc'])
+    check_and_install_database(pars['index'], pars['bowtie2db'], pars['bowtie2_build'], pars['nproc'])
 
     if pars['install']:
         sys.stderr.write('The database is installed\n')
         return
 
     # set correct map_pkl and bowtie2db variables
-    pars['mpa_pkl'], pars['bowtie2db'] = set_mapping_arguments(pars['index'])
+    pars['mpa_pkl'], pars['bowtie2db'] = set_mapping_arguments(pars['index'], pars['bowtie2db'])
 
     #if pars['inp'] is None and ( pars['input_type'] is None or  pars['input_type'] == 'automatic'):
     #    sys.stderr.write( "The --input_type parameter need top be specified when the "
@@ -1440,7 +1440,7 @@ def metaphlan2():
                             for p in ["1.bt2", "2.bt2", "3.bt2", "4.bt2", "rev.1.bt2", "rev.2.bt2"]]):
             sys.stderr.write("No MetaPhlAn BowTie2 database found (--index "
                              "option)!\nExpecting location {}\nExiting..."
-                             .format(DEFAULT_DB_FOLDER))
+                             .format(pars['bowtie2db']))
             sys.exit(1)
 
         if bow:


=====================================
strainphlan_src/add_metadata_tree.py
=====================================
--- a/strainphlan_src/add_metadata_tree.py
+++ b/strainphlan_src/add_metadata_tree.py
@@ -11,7 +11,7 @@ import copy
 import ConfigParser
 import dendropy
 import numpy
-import ipdb
+# import ipdb
 
 
 def read_params():


=====================================
strainphlan_src/compute_distance_all.py
=====================================


=====================================
strainphlan_src/plot_tree_ete2.py
=====================================


=====================================
utils/merge_metaphlan_tables.py
=====================================
--- a/utils/merge_metaphlan_tables.py
+++ b/utils/merge_metaphlan_tables.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-# ============================================================================== 
+# ==============================================================================
 # Merge script: from MetaPhlAn output on single sample to a joined "clades vs samples" table
 # Authors: Timothy Tickle (ttickle at hsph.harvard.edu) and Curtis Huttenhower (chuttenh at hsph.harvard.edu)
 # ==============================================================================
@@ -12,82 +12,85 @@ import sys
 
 
 def merge( aaastrIn, astrLabels, iCol, ostm ):
-	"""
-	Outputs the table join of the given pre-split string collection.
-	
-	:param	aaastrIn:	One or more split lines from which data are read.
-	:type	aaastrIn:	collection of collections of string collections
-	:param	astrLabels:	File names of input data.
-	:type	astrLabels:	collection of strings
-	:param	iCol:		Data column in which IDs are matched (zero-indexed).
-	:type	iCol:		int
-	:param	ostm:		Output stream to which matched rows are written.
-	:type	ostm:		output stream
-
-	"""
-	
-	setstrIDs = set()
-	"""The final set of all IDs in any table."""
-	ahashIDs = [{} for i in range( len( aaastrIn ) )]
-	"""One hash of IDs to row numbers for each input datum."""
-	aaastrData = [[] for i in range( len( aaastrIn ) )]
-	"""One data table for each input datum."""
-	aastrHeaders = [[] for i in range( len( aaastrIn ) )]
-	"""The list of non-ID headers for each input datum."""
-	strHeader = "ID"
-	"""The ID column header."""
-
-	# For each input datum in each input stream...
-	pos = 0
-
-	for f in aaastrIn :
-		with open(f) as csvfile :
-			iIn = csv.reader(csvfile, csv.excel_tab)
-
-			# Lines from the current file, empty list to hold data, empty hash to hold ids
-			aastrData, hashIDs = (a[pos] for a in (aaastrData, ahashIDs))
-
-			iLine = -1
-			# For a line in the file
-			for astrLine in iIn:
-				iLine += 1
-
-				# ID is from first column, data are everything else
-				strID, astrData = astrLine[iCol], ( astrLine[:iCol] + astrLine[( iCol + 1 ):] )
-
-				hashIDs[strID] = iLine
-				aastrData.append( astrData )
-
-			# Batch merge every new ID key set
-			setstrIDs.update( hashIDs.keys( ) )
-
-		pos += 1
-
-	# Create writer
-	csvw = csv.writer( ostm, csv.excel_tab, lineterminator='\n' )
-
-	# Make the file names the column names
-	csvw.writerow( [strHeader] + [os.path.splitext(f)[0] for f in astrLabels] )
-
-	# Write out data
-	for strID in sorted( setstrIDs ):
-		astrOut = []
-		for iIn in range( len( aaastrIn ) ):
-			aastrData, hashIDs = (a[iIn] for a in (aaastrData, ahashIDs))
-			# Look up the row number of the current ID in the current dataset, if any
-			iID = hashIDs.get( strID )
-			# If not, start with no data; if yes, pull out stored data row
-			astrData = [0.0] if ( iID == None ) else aastrData[iID]
-			# Pad output data as needed
-			astrData += [None] * ( len( aastrHeaders[iIn] ) - len( astrData ) )
-			astrOut += astrData
-		csvw.writerow( [strID] + astrOut )
+    """
+    Outputs the table join of the given pre-split string collection.
+
+    :param  aaastrIn:   One or more split lines from which data are read.
+    :type   aaastrIn:   collection of collections of string collections
+    :param  astrLabels: File names of input data.
+    :type   astrLabels: collection of strings
+    :param  iCol:       Data column in which IDs are matched (zero-indexed).
+    :type   iCol:       int
+    :param  ostm:       Output stream to which matched rows are written.
+    :type   ostm:       output stream
+
+    """
+
+    setstrIDs = set()
+    """The final set of all IDs in any table."""
+    ahashIDs = [{} for i in range( len( aaastrIn ) )]
+    """One hash of IDs to row numbers for each input datum."""
+    aaastrData = [[] for i in range( len( aaastrIn ) )]
+    """One data table for each input datum."""
+    aastrHeaders = [[] for i in range( len( aaastrIn ) )]
+    """The list of non-ID headers for each input datum."""
+    strHeader = "ID"
+    """The ID column header."""
+
+    # For each input datum in each input stream...
+    pos = 0
+
+    for f in aaastrIn :
+        with open(f) as csvfile :
+            iIn = csv.reader(csvfile, csv.excel_tab)
+
+            # Lines from the current file, empty list to hold data, empty hash to hold ids
+            aastrData, hashIDs = (a[pos] for a in (aaastrData, ahashIDs))
+
+            iLine = -1
+            # For a line in the file
+            for astrLine in iIn:
+                if astrLine[0].startswith('#'):
+                    continue
+
+                iLine += 1
+
+                # ID is from first column, data are everything else
+                strID, astrData = astrLine[iCol], ( astrLine[:iCol] + astrLine[( iCol + 1 ):] )
+
+                hashIDs[strID] = iLine
+                aastrData.append( astrData )
+
+            # Batch merge every new ID key set
+            setstrIDs.update( hashIDs.keys( ) )
+
+        pos += 1
+
+    # Create writer
+    csvw = csv.writer( ostm, csv.excel_tab, lineterminator='\n' )
+
+    # Make the file names the column names
+    csvw.writerow( [strHeader] + [os.path.splitext(f)[0] for f in astrLabels] )
+
+    # Write out data
+    for strID in sorted( setstrIDs ):
+        astrOut = []
+        for iIn in range( len( aaastrIn ) ):
+            aastrData, hashIDs = (a[iIn] for a in (aaastrData, ahashIDs))
+            # Look up the row number of the current ID in the current dataset, if any
+            iID = hashIDs.get( strID )
+            # If not, start with no data; if yes, pull out stored data row
+            astrData = [0.0] if ( iID == None ) else aastrData[iID]
+            # Pad output data as needed
+            astrData += [None] * ( len( aastrHeaders[iIn] ) - len( astrData ) )
+            astrOut += astrData
+        csvw.writerow( [strID] + astrOut )
 
 
 argp = argparse.ArgumentParser( prog = "merge_metaphlan_tables.py",
-	description = """Performs a table join on one or more metaphlan output files.""")
-argp.add_argument( "aistms",	metavar = "input.txt", nargs = "+",
-	help = "One or more tab-delimited text tables to join" )
+    description = """Performs a table join on one or more metaphlan output files.""")
+argp.add_argument( "aistms",    metavar = "input.txt", nargs = "+",
+    help = "One or more tab-delimited text tables to join" )
 
 __doc__ = "::\n\n\t" + argp.format_help( ).replace( "\n", "\n\t" )
 
@@ -95,9 +98,9 @@ argp.usage = argp.format_usage()[7:]+"\n\n\tPlease make sure to supply file path
 
 
 def _main( ):
-	args = argp.parse_args( )
-	merge(args.aistms, [os.path.split(os.path.basename(f))[1] for f in args.aistms], 0, sys.stdout)
+    args = argp.parse_args( )
+    merge(args.aistms, [os.path.split(os.path.basename(f))[1] for f in args.aistms], 0, sys.stdout)
 
 
 if __name__ == "__main__":
-	_main( )
+    _main( )



View it on GitLab: https://salsa.debian.org/med-team/metaphlan2/commit/51196ec977e34d401797054d8ecb78987e14dde4

-- 
View it on GitLab: https://salsa.debian.org/med-team/metaphlan2/commit/51196ec977e34d401797054d8ecb78987e14dde4
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20180528/094841f6/attachment-0001.html>