[med-svn] [Git][med-team/metaphlan2][upstream] New upstream version 2.7.6
Andreas Tille
gitlab at salsa.debian.org
Mon May 28 07:26:35 BST 2018
Andreas Tille pushed to branch upstream at Debian Med / metaphlan2
Commits:
51196ec9 by Andreas Tille at 2018-05-28T07:27:16+02:00
New upstream version 2.7.6
- - - - -
7 changed files:
- .hg_archival.txt
- .hgtags
- metaphlan2.py
- strainphlan_src/add_metadata_tree.py
- strainphlan_src/compute_distance_all.py
- strainphlan_src/plot_tree_ete2.py
- utils/merge_metaphlan_tables.py
Changes:
=====================================
.hg_archival.txt
=====================================
--- a/.hg_archival.txt
+++ b/.hg_archival.txt
@@ -1,4 +1,4 @@
repo: 092c2fe2278cb7f0b18d81faeb4aab98b89dc096
-node: b2f9b3286d4be376805e3b5c26cf141ed375c605
+node: cbd7880df400b453b8beb4e62b39e4a23b5523b6
branch: default
-tag: 2.7.5
+tag: 2.7.6
=====================================
.hgtags
=====================================
--- a/.hgtags
+++ b/.hgtags
@@ -11,3 +11,6 @@ e424931b4d94d50cf62381c79c335935415b33b9 2.5.0
8963e486f79043c79a299f7a684e4550b0115c32 2.7.0
d8ab9ca4244c09a7a4995042a99fbba1e3598ac0 2.7.1
a1fe0d15320c04f69d56f1b7dd31cff972a7b8df 2.7.2
+b2f9b3286d4be376805e3b5c26cf141ed375c605 2.7.5
+847b250adbe97b9f4adc7e15f0d4bb5a66e782ec 2.7.4
+178d1aaf4ac76e5d5477833e8e614104dcd32088 2.7.3
=====================================
metaphlan2.py
=====================================
--- a/metaphlan2.py
+++ b/metaphlan2.py
@@ -16,8 +16,8 @@ from __future__ import with_statement
__author__ = ('Nicola Segata (nicola.segata at unitn.it), '
'Duy Tin Truong, '
'Francesco Asnicar (f.asnicar at unitn.it)')
-__version__ = '2.7.5'
-__date__ = '6 February 2018'
+__version__ = '2.7.6'
+__date__ = '2 March 2018'
import sys
@@ -433,7 +433,7 @@ def read_params(args):
"\n------------------------------------------------------------------- \n \n\n"
- "\n========== MetaPhlAn 2 strain tracking ============================ \n\n"
+ "\n========== Marker level analysis ============================ \n\n"
"MetaPhlAn 2 introduces the capability of charachterizing organisms at the strain level using non\n"
"aggregated marker information. Such capability comes with several slightly different flavours and \n"
"are a way to perform strain tracking and comparison across multiple samples.\n"
@@ -499,18 +499,16 @@ def read_params(args):
arg('--mpa_pkl', type=str, default=None,
help="The metadata pickled MetaPhlAn file [deprecated]")
- arg('--bowtie2db', metavar="METAPHLAN_BOWTIE2_DB", type=str, default=None,
+ arg('--bowtie2db', metavar="METAPHLAN_BOWTIE2_DB", type=str, default=DEFAULT_DB_FOLDER,
help=("The BowTie2 database file of the MetaPhlAn database. Used if "
- "--input_type is fastq, fasta, multifasta, or multifastq "
- "[deprecated]"))
+ "--input_type is fastq, fasta, multifasta, or multifastq [default "+DEFAULT_DB_FOLDER+"]\n"))
arg('-x', '--index', type=str, default='v20_m200',
- help=("Specify the id of the database version to use. If the database "
- "files are not found on the local MetaPhlAn2 installation they "
- "will be automatically downloaded"))
+ help=("Specify the id of the database version to use. If the database\n"
+ "files are not found on the local MetaPhlAn2 installation they\n"
+ "will be automatically downloaded\n"))
- bt2ps = ['sensitive', 'very-sensitive', 'sensitive-local',
- 'very-sensitive-local']
+ bt2ps = ['sensitive', 'very-sensitive', 'sensitive-local', 'very-sensitive-local']
arg('--bt2_ps', metavar="BowTie2 presets", default='very-sensitive',
choices=bt2ps, help="Presets options for BowTie2 (applied only when a "
"multifasta file is provided)\n"
@@ -812,29 +810,32 @@ def download_unpack_tar(url, download_file_name, folder, bowtie2_build, nproc):
sys.stderr.write("Fatal error running '{}'\nError message: '{}'\n\n".format(' '.join(bt2_cmd), e))
sys.exit(1)
+ sys.stderr.write('Removing uncompress database {}\n'.format(fna_file))
+ os.remove(fna_file)
-def check_and_install_database(index, bowtie2_build, nproc):
+
+def check_and_install_database(index, bowtie2_db, bowtie2_build, nproc):
""" Check if the database is installed, if not download and install """
- if len(glob(os.path.join(DEFAULT_DB_FOLDER, "mpa_{}*".format(index)))) >= 7:
+ if len(glob(os.path.join(bowtie2_db, "mpa_{}*".format(index)))) >= 7:
return
# download the tar archive and decompress
sys.stderr.write("\nDownloading MetaPhlAn2 database\nPlease note due to "
"the size this might take a few minutes\n")
- download_unpack_tar(DATABASE_DOWNLOAD, index, DEFAULT_DB_FOLDER, bowtie2_build, nproc)
+ download_unpack_tar(DATABASE_DOWNLOAD, index, bowtie2_db, bowtie2_build, nproc)
sys.stderr.write("\nDownload complete\n")
-def set_mapping_arguments(index):
+def set_mapping_arguments(index, bowtie2_db):
mpa_pkl = 'mpa_pkl'
bowtie2db = 'bowtie2db'
- if os.path.isfile(os.path.join(DEFAULT_DB_FOLDER, "mpa_{}.pkl".format(index))):
- mpa_pkl = os.path.join(DEFAULT_DB_FOLDER, "mpa_{}.pkl".format(index))
+ if os.path.isfile(os.path.join(bowtie2_db, "mpa_{}.pkl".format(index))):
+ mpa_pkl = os.path.join(bowtie2_db, "mpa_{}.pkl".format(index))
- if glob(os.path.join(DEFAULT_DB_FOLDER, "mpa_{}*.bt2".format(index))):
- bowtie2db = os.path.join(DEFAULT_DB_FOLDER, "mpa_{}".format(index))
+ if glob(os.path.join(bowtie2_db, "mpa_{}*.bt2".format(index))):
+ bowtie2db = os.path.join(bowtie2_db, "mpa_{}".format(index))
return (mpa_pkl, bowtie2db)
@@ -1354,15 +1355,14 @@ def metaphlan2():
pars = read_params(sys.argv)
# check if the database is installed, if not then install
- check_and_install_database(pars['index'], pars['bowtie2_build'],
- pars['nproc'])
+ check_and_install_database(pars['index'], pars['bowtie2db'], pars['bowtie2_build'], pars['nproc'])
if pars['install']:
sys.stderr.write('The database is installed\n')
return
# set correct map_pkl and bowtie2db variables
- pars['mpa_pkl'], pars['bowtie2db'] = set_mapping_arguments(pars['index'])
+ pars['mpa_pkl'], pars['bowtie2db'] = set_mapping_arguments(pars['index'], pars['bowtie2db'])
#if pars['inp'] is None and ( pars['input_type'] is None or pars['input_type'] == 'automatic'):
# sys.stderr.write( "The --input_type parameter need top be specified when the "
@@ -1440,7 +1440,7 @@ def metaphlan2():
for p in ["1.bt2", "2.bt2", "3.bt2", "4.bt2", "rev.1.bt2", "rev.2.bt2"]]):
sys.stderr.write("No MetaPhlAn BowTie2 database found (--index "
"option)!\nExpecting location {}\nExiting..."
- .format(DEFAULT_DB_FOLDER))
+ .format(pars['bowtie2db']))
sys.exit(1)
if bow:
=====================================
strainphlan_src/add_metadata_tree.py
=====================================
--- a/strainphlan_src/add_metadata_tree.py
+++ b/strainphlan_src/add_metadata_tree.py
@@ -11,7 +11,7 @@ import copy
import ConfigParser
import dendropy
import numpy
-import ipdb
+# import ipdb
def read_params():
=====================================
strainphlan_src/compute_distance_all.py
=====================================
=====================================
strainphlan_src/plot_tree_ete2.py
=====================================
=====================================
utils/merge_metaphlan_tables.py
=====================================
--- a/utils/merge_metaphlan_tables.py
+++ b/utils/merge_metaphlan_tables.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
-# ==============================================================================
+# ==============================================================================
# Merge script: from MetaPhlAn output on single sample to a joined "clades vs samples" table
# Authors: Timothy Tickle (ttickle at hsph.harvard.edu) and Curtis Huttenhower (chuttenh at hsph.harvard.edu)
# ==============================================================================
@@ -12,82 +12,85 @@ import sys
def merge( aaastrIn, astrLabels, iCol, ostm ):
- """
- Outputs the table join of the given pre-split string collection.
-
- :param aaastrIn: One or more split lines from which data are read.
- :type aaastrIn: collection of collections of string collections
- :param astrLabels: File names of input data.
- :type astrLabels: collection of strings
- :param iCol: Data column in which IDs are matched (zero-indexed).
- :type iCol: int
- :param ostm: Output stream to which matched rows are written.
- :type ostm: output stream
-
- """
-
- setstrIDs = set()
- """The final set of all IDs in any table."""
- ahashIDs = [{} for i in range( len( aaastrIn ) )]
- """One hash of IDs to row numbers for each input datum."""
- aaastrData = [[] for i in range( len( aaastrIn ) )]
- """One data table for each input datum."""
- aastrHeaders = [[] for i in range( len( aaastrIn ) )]
- """The list of non-ID headers for each input datum."""
- strHeader = "ID"
- """The ID column header."""
-
- # For each input datum in each input stream...
- pos = 0
-
- for f in aaastrIn :
- with open(f) as csvfile :
- iIn = csv.reader(csvfile, csv.excel_tab)
-
- # Lines from the current file, empty list to hold data, empty hash to hold ids
- aastrData, hashIDs = (a[pos] for a in (aaastrData, ahashIDs))
-
- iLine = -1
- # For a line in the file
- for astrLine in iIn:
- iLine += 1
-
- # ID is from first column, data are everything else
- strID, astrData = astrLine[iCol], ( astrLine[:iCol] + astrLine[( iCol + 1 ):] )
-
- hashIDs[strID] = iLine
- aastrData.append( astrData )
-
- # Batch merge every new ID key set
- setstrIDs.update( hashIDs.keys( ) )
-
- pos += 1
-
- # Create writer
- csvw = csv.writer( ostm, csv.excel_tab, lineterminator='\n' )
-
- # Make the file names the column names
- csvw.writerow( [strHeader] + [os.path.splitext(f)[0] for f in astrLabels] )
-
- # Write out data
- for strID in sorted( setstrIDs ):
- astrOut = []
- for iIn in range( len( aaastrIn ) ):
- aastrData, hashIDs = (a[iIn] for a in (aaastrData, ahashIDs))
- # Look up the row number of the current ID in the current dataset, if any
- iID = hashIDs.get( strID )
- # If not, start with no data; if yes, pull out stored data row
- astrData = [0.0] if ( iID == None ) else aastrData[iID]
- # Pad output data as needed
- astrData += [None] * ( len( aastrHeaders[iIn] ) - len( astrData ) )
- astrOut += astrData
- csvw.writerow( [strID] + astrOut )
+ """
+ Outputs the table join of the given pre-split string collection.
+
+ :param aaastrIn: One or more split lines from which data are read.
+ :type aaastrIn: collection of collections of string collections
+ :param astrLabels: File names of input data.
+ :type astrLabels: collection of strings
+ :param iCol: Data column in which IDs are matched (zero-indexed).
+ :type iCol: int
+ :param ostm: Output stream to which matched rows are written.
+ :type ostm: output stream
+
+ """
+
+ setstrIDs = set()
+ """The final set of all IDs in any table."""
+ ahashIDs = [{} for i in range( len( aaastrIn ) )]
+ """One hash of IDs to row numbers for each input datum."""
+ aaastrData = [[] for i in range( len( aaastrIn ) )]
+ """One data table for each input datum."""
+ aastrHeaders = [[] for i in range( len( aaastrIn ) )]
+ """The list of non-ID headers for each input datum."""
+ strHeader = "ID"
+ """The ID column header."""
+
+ # For each input datum in each input stream...
+ pos = 0
+
+ for f in aaastrIn :
+ with open(f) as csvfile :
+ iIn = csv.reader(csvfile, csv.excel_tab)
+
+ # Lines from the current file, empty list to hold data, empty hash to hold ids
+ aastrData, hashIDs = (a[pos] for a in (aaastrData, ahashIDs))
+
+ iLine = -1
+ # For a line in the file
+ for astrLine in iIn:
+ if astrLine[0].startswith('#'):
+ continue
+
+ iLine += 1
+
+ # ID is from first column, data are everything else
+ strID, astrData = astrLine[iCol], ( astrLine[:iCol] + astrLine[( iCol + 1 ):] )
+
+ hashIDs[strID] = iLine
+ aastrData.append( astrData )
+
+ # Batch merge every new ID key set
+ setstrIDs.update( hashIDs.keys( ) )
+
+ pos += 1
+
+ # Create writer
+ csvw = csv.writer( ostm, csv.excel_tab, lineterminator='\n' )
+
+ # Make the file names the column names
+ csvw.writerow( [strHeader] + [os.path.splitext(f)[0] for f in astrLabels] )
+
+ # Write out data
+ for strID in sorted( setstrIDs ):
+ astrOut = []
+ for iIn in range( len( aaastrIn ) ):
+ aastrData, hashIDs = (a[iIn] for a in (aaastrData, ahashIDs))
+ # Look up the row number of the current ID in the current dataset, if any
+ iID = hashIDs.get( strID )
+ # If not, start with no data; if yes, pull out stored data row
+ astrData = [0.0] if ( iID == None ) else aastrData[iID]
+ # Pad output data as needed
+ astrData += [None] * ( len( aastrHeaders[iIn] ) - len( astrData ) )
+ astrOut += astrData
+ csvw.writerow( [strID] + astrOut )
argp = argparse.ArgumentParser( prog = "merge_metaphlan_tables.py",
- description = """Performs a table join on one or more metaphlan output files.""")
-argp.add_argument( "aistms", metavar = "input.txt", nargs = "+",
- help = "One or more tab-delimited text tables to join" )
+ description = """Performs a table join on one or more metaphlan output files.""")
+argp.add_argument( "aistms", metavar = "input.txt", nargs = "+",
+ help = "One or more tab-delimited text tables to join" )
__doc__ = "::\n\n\t" + argp.format_help( ).replace( "\n", "\n\t" )
@@ -95,9 +98,9 @@ argp.usage = argp.format_usage()[7:]+"\n\n\tPlease make sure to supply file path
def _main( ):
- args = argp.parse_args( )
- merge(args.aistms, [os.path.split(os.path.basename(f))[1] for f in args.aistms], 0, sys.stdout)
+ args = argp.parse_args( )
+ merge(args.aistms, [os.path.split(os.path.basename(f))[1] for f in args.aistms], 0, sys.stdout)
if __name__ == "__main__":
- _main( )
+ _main( )
View it on GitLab: https://salsa.debian.org/med-team/metaphlan2/commit/51196ec977e34d401797054d8ecb78987e14dde4
--
View it on GitLab: https://salsa.debian.org/med-team/metaphlan2/commit/51196ec977e34d401797054d8ecb78987e14dde4
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20180528/094841f6/attachment-0001.html>
More information about the debian-med-commit
mailing list