[med-svn] [Git][med-team/parsnp][master] 8 commits: New upstream version 1.2+dfsg
Andreas Tille
gitlab at salsa.debian.org
Thu Nov 14 10:37:59 GMT 2019
Andreas Tille pushed to branch master at Debian Med / parsnp
Commits:
ed761e73 by Andreas Tille at 2018-08-03T13:01:43Z
New upstream version 1.2+dfsg
- - - - -
d2555b50 by Andreas Tille at 2018-08-03T13:06:38Z
New upstream version 1.2+dfsg
- - - - -
39bab456 by Andreas Tille at 2019-11-14T10:17:05Z
New upstream version 1.2.1+dfsg
- - - - -
a0bcd60a by Andreas Tille at 2019-11-14T10:17:05Z
New upstream version
- - - - -
002dd92e by Andreas Tille at 2019-11-14T10:17:06Z
Update upstream source from tag 'upstream/1.2.1+dfsg'
Update to upstream version '1.2.1+dfsg'
with Debian dir 7ef626f49e7941e722d0ee2024a1ebcefc43d9b2
- - - - -
ded87a13 by Andreas Tille at 2019-11-14T10:17:09Z
Standards-Version: 4.4.1
- - - - -
ebdfd433 by Andreas Tille at 2019-11-14T10:26:32Z
Refresh patches
- - - - -
87af9db0 by Andreas Tille at 2019-11-14T10:29:11Z
Upload to unstable
- - - - -
10 changed files:
- LICENSE
- Parsnp.py
- build_parsnp_linux.sh
- build_parsnp_osx.sh
- debian/changelog
- debian/control
- debian/patches/2to3_new.patch
- debian/patches/proper_calls_to_tools.patch
- install.py
- script/shuffle.py
Changes:
=====================================
LICENSE
=====================================
@@ -1,27 +1,45 @@
-Copyright (c) 2014, MarBL
-All rights reserved.
+PURPOSE
+
+Parsnp is a command-line-tool for efficient microbial core genome alignment
+and SNP detection. Parsnp was designed to work in tandem with Gingr,
+a flexible platform for visualizing genome alignments and phylogenetic trees;
+both Parsnp and Gingr form part of the Harvest suite. Parsnp is implemented in C++ and Python.
+
+COPYRIGHT LICENSE
+
+Copyright © 2014, Battelle National Biodefense Institute (BNBI);
+all rights reserved. Authored by: Brian Ondov, Todd Treangen, and
+Adam Phillippy
+
+This Software was prepared for the Department of Homeland Security
+(DHS) by the Battelle National Biodefense Institute, LLC (BNBI) as
+part of contract HSHQDC-07-C-00020 to manage and operate the National
+Biodefense Analysis and Countermeasures Center (NBACC), a Federally
+Funded Research and Development Center.
Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
-
-* Neither the name of the {organization} nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=====================================
Parsnp.py
=====================================
@@ -93,7 +93,7 @@ else:
def handler(signum, frame):
global SIGINT
SIGINT = True
- print 'Caught request to terminate by user (CTRL+C), exiting now, bye'
+ print('Caught request to terminate by user (CTRL+C), exiting now, bye')
sys.exit(128)
signal.signal(signal.SIGINT, handler)
@@ -231,66 +231,66 @@ if os.path.exists("%s/MUMmer/nucmer_run"%(PARSNP_DIR)):
ff.close()
def version():
- print VERSION
+ print(VERSION)
def usage():
- print "usage: parsnp [options] [-g|-r|-q](see below) -d <genome_dir> -p <threads>"
- print ""
- print "Parsnp quick start for three example scenarios: "
- print "1) With reference & genbank file: "
- print " >parsnp -g <reference_genbank_file1,reference_genbank_file2,..> -d <genome_dir> -p <threads> "
- print ""
- print "2) With reference but without genbank file:"
- print " >parsnp -r <reference_genome> -d <genome_dir> -p <threads> "
- print ""
- print "3) Autorecruit reference to a draft assembly:"
- print " >parsnp -q <draft_assembly> -d <genome_db> -p <threads> "
- print ""
- print "[Input parameters]"
- print "<<input/output>>"
- print " -c = <flag>: (c)urated genome directory, use all genomes in dir and ignore MUMi? (default = NO)"
- print " -d = <path>: (d)irectory containing genomes/contigs/scaffolds"
- print " -r = <path>: (r)eference genome (set to ! to pick random one from genome dir)"
- print " -g = <string>: Gen(b)ank file(s) (gbk), comma separated list (default = None)"
- print " -o = <string>: output directory? default [./P_CURRDATE_CURRTIME]"
- print " -q = <path>: (optional) specify (assembled) query genome to use, in addition to genomes found in genome dir (default = NONE)"
- print ""
- print "<<MUMi>>"
- print " -U = <float>: max MUMi distance value for MUMi distribution "
- print " -M = <flag>: calculate MUMi and exit? overrides all other choices! (default: NO)"
+ print("usage: parsnp [options] [-g|-r|-q](see below) -d <genome_dir> -p <threads>")
+ print("")
+ print("Parsnp quick start for three example scenarios: ")
+ print("1) With reference & genbank file: ")
+ print(" >parsnp -g <reference_genbank_file1,reference_genbank_file2,..> -d <genome_dir> -p <threads> ")
+ print("")
+ print("2) With reference but without genbank file:")
+ print(" >parsnp -r <reference_genome> -d <genome_dir> -p <threads> ")
+ print("")
+ print("3) Autorecruit reference to a draft assembly:")
+ print(" >parsnp -q <draft_assembly> -d <genome_db> -p <threads> ")
+ print("")
+ print("[Input parameters]")
+ print("<<input/output>>")
+ print(" -c = <flag>: (c)urated genome directory, use all genomes in dir and ignore MUMi? (default = NO)")
+ print(" -d = <path>: (d)irectory containing genomes/contigs/scaffolds")
+ print(" -r = <path>: (r)eference genome (set to ! to pick random one from genome dir)")
+ print(" -g = <string>: Gen(b)ank file(s) (gbk), comma separated list (default = None)")
+ print(" -o = <string>: output directory? default [./P_CURRDATE_CURRTIME]")
+ print(" -q = <path>: (optional) specify (assembled) query genome to use, in addition to genomes found in genome dir (default = NONE)")
+ print("")
+ print("<<MUMi>>")
+ print(" -U = <float>: max MUMi distance value for MUMi distribution ")
+ print(" -M = <flag>: calculate MUMi and exit? overrides all other choices! (default: NO)")
#new, mutually exclusive
- print " -i = <float>: max MUM(i) distance (default: autocutoff based on distribution of MUMi values)"
- print ""
- print "<<MUM search>>"
+ print(" -i = <float>: max MUM(i) distance (default: autocutoff based on distribution of MUMi values)")
+ print("")
+ print("<<MUM search>>")
#new, default to lower, 12-17
- print " -a = <int>: min (a)NCHOR length (default = 1.1*Log(S))"
- print " -C = <int>: maximal cluster D value? (default=100)"
- print " -z = <path>: min LCB si(z)e? (default = 25)"
- print ""
- print "<<LCB alignment>>"
- print " -D = <float>: maximal diagonal difference? Either percentage (e.g. 0.2) or bp (e.g. 100bp) (default = 0.12)"
- print " -e = <flag> greedily extend LCBs? experimental! (default = NO)"
- print " -n = <string>: alignment program (default: libMUSCLE)"
- print " -u = <flag>: output unaligned regions? .unaligned (default: NO)"
- print ""
- print "<<Recombination filtration>>"
+ print(" -a = <int>: min (a)NCHOR length (default = 1.1*Log(S))")
+ print(" -C = <int>: maximal cluster D value? (default=100)")
+ print(" -z = <path>: min LCB si(z)e? (default = 25)")
+ print("")
+ print("<<LCB alignment>>")
+ print(" -D = <float>: maximal diagonal difference? Either percentage (e.g. 0.2) or bp (e.g. 100bp) (default = 0.12)")
+ print(" -e = <flag> greedily extend LCBs? experimental! (default = NO)")
+ print(" -n = <string>: alignment program (default: libMUSCLE)")
+ print(" -u = <flag>: output unaligned regions? .unaligned (default: NO)")
+ print("")
+ print("<<Recombination filtration>>")
#new, default is OFF
- print " -x = <flag>: enable filtering of SNPs located in PhiPack identified regions of recombination? (default: NO)"
- print ""
- print "<<Misc>>"
- print " -h = <flag>: (h)elp: print this message and exit"
- print " -p = <int>: number of threads to use? (default= 1)"
- print " -P = <int>: max partition size? limits memory usage (default= 15000000)"
- print " -v = <flag>: (v)erbose output? (default = NO)"
- print " -V = <flag>: output (V)ersion and exit"
- print ""
+ print(" -x = <flag>: enable filtering of SNPs located in PhiPack identified regions of recombination? (default: NO)")
+ print("")
+ print("<<Misc>>")
+ print(" -h = <flag>: (h)elp: print this message and exit")
+ print(" -p = <int>: number of threads to use? (default= 1)")
+ print(" -P = <int>: max partition size? limits memory usage (default= 15000000)")
+ print(" -v = <flag>: (v)erbose output? (default = NO)")
+ print(" -V = <flag>: output (V)ersion and exit")
+ print("")
#hidden, not yet supported options
-#print "-q = <path>: (optional) specify (assembled) query genome to use, in addition to genomes found in genome dir (default = NONE)"
-#print "-s = <flag>: (s)plit genomes by n's (default = NO)"
-#print "-z = <path>: min cluster si(z)e? (default = 10)"
-#print "-F = <flag>: fast MUMi calc? (default=NO)"
-#print "-g = <bool>: auto-launch (g)ingr? (default = NO)"
+#print("-q = <path>: (optional) specify (assembled) query genome to use, in addition to genomes found in genome dir (default = NONE)"
+#print("-s = <flag>: (s)plit genomes by n's (default = NO)"
+#print("-z = <path>: min cluster si(z)e? (default = 10)"
+#print("-F = <flag>: fast MUMi calc? (default=NO)"
+#print("-g = <bool>: auto-launch (g)ingr? (default = NO)"
if __name__ == "__main__":
@@ -301,9 +301,9 @@ if __name__ == "__main__":
args = []
try:
opts, args = getopt.getopt(sys.argv[1:], "hxved:C:F:D:i:g:m:MU:o:a:cln:p:P:q:r:Rsz:uV", ["help","xtrafast","verbose","extend","sequencedir","clusterD","DiagonalDiff","iniFile","genbank","mumlength","onlymumi","MUMi","outputDir","anchorlength","curated","layout","aligNmentprog","threads","max-partition-size","query","reference","nofiltreps","split","minclustersiZe","unaligned","version"])
- except getopt.GetoptError, err:
+ except getopt.GetoptError as err:
# print help information and exit:
- print str(err)
+ print(str(err))
usage()
sys.exit(2)
@@ -359,7 +359,7 @@ if __name__ == "__main__":
usage()
sys.exit(0)
elif o in ("-R","--filtreps"):
- print "WARNING: -R option is no longer supported, ignoring. Please see harvest.readthedocs.org for bed filtering w/ harvesttools"
+ print("WARNING: -R option is no longer supported, ignoring. Please see harvest.readthedocs.org for bed filtering w/ harvesttools")
filtreps = False
elif o in ("-r","--reference"):
ref = a
@@ -449,7 +449,7 @@ if __name__ == "__main__":
genbank_ref1.write(">gi|"+giline.split("GI:")[-1])
ntdata = False
data = ""
- for line in rf.xreadlines():
+ for line in rf:
if ntdata:
if "//" in line:
ntdata = False
@@ -562,20 +562,20 @@ if __name__ == "__main__":
ref = query
if 1:
- print (len(outputDir)+17)*"*"
- print BOLDME+"SETTINGS:"+ENDC
+ print((len(outputDir)+17)*"*")
+ print(BOLDME+"SETTINGS:"+ENDC)
if ref != "!":
- print "|-"+BOLDME+"refgenome:\t%s"%(ref)+ENDC
+ print("|-"+BOLDME+"refgenome:\t%s"%(ref)+ENDC)
else:
- print "|-"+BOLDME+"refgenome:\t%s"%("autopick")+ENDC
- print "|-"+BOLDME+"aligner:\tlibMUSCLE"+ENDC
- print "|-"+BOLDME+"seqdir:\t%s"%(seqdir)+ENDC
- print "|-"+BOLDME+"outdir:\t%s"%(outputDir)+ENDC
- print "|-"+BOLDME+"OS:\t\t%s"%(OSTYPE)+ENDC
- print "|-"+BOLDME+"threads:\t%s"%(threads)+ENDC
- print (len(outputDir)+17)*"*"
+ print("|-"+BOLDME+"refgenome:\t%s"%("autopick")+ENDC)
+ print("|-"+BOLDME+"aligner:\tlibMUSCLE"+ENDC)
+ print("|-"+BOLDME+"seqdir:\t%s"%(seqdir)+ENDC)
+ print("|-"+BOLDME+"outdir:\t%s"%(outputDir)+ENDC)
+ print("|-"+BOLDME+"OS:\t\t%s"%(OSTYPE)+ENDC)
+ print("|-"+BOLDME+"threads:\t%s"%(threads)+ENDC)
+ print((len(outputDir)+17)*"*")
- print "\n<<Parsnp started>>\n"
+ print("\n<<Parsnp started>>\n")
#1)read fasta files (contigs/scaffolds/finished/DBs/dirs)
sys.stderr.write( "-->Reading Genome (asm, fasta) files from %s..\n"%(seqdir))
@@ -618,7 +618,7 @@ if __name__ == "__main__":
for char in special_chars:
if char in file:
- print "WARNING: File %s contains a non-supported special character (\'%s\') in file name. Please remove if you'd like to include. For best practices see: http://support.apple.com/en-us/HT202808"%(file,char)
+ print("WARNING: File %s contains a non-supported special character (\'%s\') in file name. Please remove if you'd like to include. For best practices see: http://support.apple.com/en-us/HT202808"%(file,char))
nameok = False
break
if nameok:
@@ -640,12 +640,13 @@ if __name__ == "__main__":
sys.exit(1)
reflen = len(data)
ff.close()
+
for file in files:
nameok = True
for char in special_chars:
if char in file:
- #print "WARNING: File %s contains a non-supported special character (%s) in file name. Please remove if you'd like to include. For best practices see: http://support.apple.com/en-us/HT202808"%(file,char)
+ #print("WARNING: File %s contains a non-supported special character (%s) in file name. Please remove if you'd like to include. For best practices see: http://support.apple.com/en-us/HT202808"%(file,char)
nameok = False
break
if not nameok:
@@ -657,7 +658,7 @@ if __name__ == "__main__":
data = []
totlen = 0
- for line in ff.xreadlines():
+ for line in ff:
if line[0] != ">":
data.append(line.replace("\n",""))
if "-" in line:
@@ -673,21 +674,24 @@ if __name__ == "__main__":
if totlen <= 20:
continue
sizediff = float(reflen)/float(totlen)
- if sizediff <= 0.6 or sizediff >= 1.4:
+ #EDITED THIS TO CHANGE GENOME THRESHOLD, WILL NOW CONSIDER CONCATENATED GENOMES THAT ARE MUCH BIGGER THAN THE REFERENCE
+ #if sizediff <= 0.6 or sizediff >= 1.4:
+ if sizediff >= 1.4:
+ #print file #TEST PRINT FOR CHECKING THRESHOLD
continue
fnafiles.append(file)
fnaf_sizes[file] = totlen#len(data)
ff.close()
-
+ #sys.exit(1) TEST SYS EXIT FOR CHECKING THRESHOLD
if ref in fnafiles:
sys.stderr.write( "ERROR: reference genome %s also in genome directory, restart and select different reference genome\n"%(ref))
sys.exit(1)
if ref == "!":
fnafiles.remove(ref)
-
+
#sort reference by largest replicon to smallest
if sortem and os.path.exists(ref) and not autopick_ref:
ff = open(ref,'r')
@@ -703,7 +707,7 @@ if __name__ == "__main__":
continue
seq_dict[hdr] = nt
seq_len[hdr] = len(nt.replace("\n",""))
- seq_len_sort = sorted(seq_len.iteritems(), key=operator.itemgetter(1))
+ seq_len_sort = sorted(iter(seq_len.items()), key=operator.itemgetter(1))
seq_len_sort.reverse()
ffo = open("%s"%(outputDir+os.sep+ref.split(os.sep)[-1]+".ref"),'w')
for item in seq_len_sort:
@@ -714,7 +718,7 @@ if __name__ == "__main__":
ref = outputDir+os.sep+ref.split(os.sep)[-1]+".ref"
else:
ref = genbank_ref
-
+
#remove any query sequences 30% diff in length
allfiles = [ref.rsplit(os.sep,1)[-1]]
#write INI file
@@ -763,7 +767,8 @@ if __name__ == "__main__":
if not inifile_exists:
if len(fnafiles) < 1 or ref == "":
sys.stderr.write( "Parsnp requires 2 or more genomes to run, exiting\n")
- print fnafiles, ref
+ print(fnafiles, end =' ')
+ print(ref)
sys.exit(0)
file_string = ""
@@ -792,7 +797,7 @@ if __name__ == "__main__":
run_command(command)
try:
mumif = open(outputDir+os.sep+"all.mumi",'r')
- for line in mumif.xreadlines():
+ for line in mumif:
line = line.replace("\n","")
try:
idx,mi = line.split(":")
@@ -804,13 +809,13 @@ if __name__ == "__main__":
i = 0
for file in fnafiles:
mumi_dict[i] = 1
- print " |->["+OK_GREEN+"OK"+ENDC+"]"
+ print(" |->["+OK_GREEN+"OK"+ENDC+"]")
finalfiles = []
lowest_mumi = 100
auto_ref = ""
if autopick_ref:
- for idx in mumi_dict.keys():
+ for idx in list(mumi_dict.keys()):
if mumi_dict[idx] < lowest_mumi:
auto_ref = seqdir+os.sep+fnafiles[idx]
ref = auto_ref
@@ -826,9 +831,9 @@ if __name__ == "__main__":
mumi_f = open(outputDir+os.sep+"recruited_genomes.lst",'w')
if VERBOSE:
- print "RECRUITED GENOMES:\n"
+ print("RECRUITED GENOMES:\n")
- sorted_x = sorted(mumi_dict.iteritems(), key=operator.itemgetter(1))
+ sorted_x = sorted(iter(mumi_dict.items()), key=operator.itemgetter(1))
scnt = 0
mumivals = []
for item in sorted_x:
@@ -859,11 +864,11 @@ if __name__ == "__main__":
if mumi_only:
mumi_f.write(os.path.abspath(seqdir+os.sep+fnafiles[idx])+",%f"%(mumi_dict[idx])+"\n")
if VERBOSE:
- print "\t"+fnafiles[idx]
+ print("\t"+fnafiles[idx])
finalfiles.append(fnafiles[idx])
allfiles.append(fnafiles[idx])
if VERBOSE:
- print
+ print("")
if curated:
for file in fnafiles:
@@ -947,7 +952,7 @@ if __name__ == "__main__":
#3)run parsnp (cores, grid?)
- print "-->Running Parsnp multi-MUM search and libMUSCLE aligner.."
+ print("-->Running Parsnp multi-MUM search and libMUSCLE aligner..")
if not os.path.exists(outputDir+os.sep+"blocks"):
os.mkdir(outputDir+os.sep+"blocks")
command = ""
@@ -996,7 +1001,7 @@ if __name__ == "__main__":
totseqs = 0
try:
cf = open("%sparsnpAligner.log"%(outputDir+os.sep))
- for line in cf.xreadlines():
+ for line in cf:
if "Total coverage among all sequences:" in line:
coverage = line.split(":",1)[-1].replace("\n","")
coverage = float(coverage.replace("%",""))/100.0
@@ -1004,7 +1009,7 @@ if __name__ == "__main__":
totlength += int(line.split(":",1)[-1].replace("\n","").split("bps")[0])
totseqs +=1
except IOError:
- print ERROR_RED+"parsnpAligner.log missing, parsnpAligner failed, exiting.."+ENDC
+ print(ERROR_RED+"parsnpAligner.log missing, parsnpAligner failed, exiting.."+ENDC)
sys.exit(1)
#update thresholds
@@ -1015,10 +1020,10 @@ if __name__ == "__main__":
sys.stderr.write( " |->["+WARNING_YELLOW+"WARNING"+ENDC+"]"+": aligned regions cover less than 10% of reference genome! please verify recruited genomes are all strain of interest"+ENDC)
else:
pass
- print " |->["+OK_GREEN+"OK"+ENDC+"]"
+ print(" |->["+OK_GREEN+"OK"+ENDC+"]")
t2 = time.time()
elapsed = float(t2)-float(t1)
- #print "-->Getting list of LCBs.."
+ #print("-->Getting list of LCBs.."
allbfiles = glob.glob(outputDir+os.sep+"blocks/b*/*")
blockfiles = []
icnt = 0
@@ -1060,7 +1065,7 @@ if __name__ == "__main__":
recombination_sites = {}
bedfile = ""
bedfile_dict = {}
- print "-->Running PhiPack on LCBs to detect recombination.."
+ print("-->Running PhiPack on LCBs to detect recombination..")
if run_recomb_filter and len(blockfiles) > 0:
bedfile = open("%s/parsnp.rec"%(outputDir),'w')
@@ -1091,7 +1096,7 @@ if __name__ == "__main__":
#run parallelPhiPack
pool = Pool(processes=int(threads))
- result = pool.map_async(parallelPhiWrapper,tasks).get(sys.maxint)
+ result = pool.map_async(parallelPhiWrapper,tasks).get(sys.maxsize)
for i in result:
if (i["status"] == 1):
@@ -1126,7 +1131,7 @@ if __name__ == "__main__":
bedfile_dict[srpos] = "1\t%s\t%s\tREC\t%.3f\t+\n"%(srpos,pos+50+block_spos,eval)
else:
chrnum = 1
- chr_spos = ref_seqs.keys()
+ chr_spos = list(ref_seqs.keys())
for cs in chr_spos:
if block_spos < chr_spos:
chrnum = ref_seqs[cs]
@@ -1142,7 +1147,7 @@ if __name__ == "__main__":
pool.close()
pool.join()
- brkeys = bedfile_dict.keys()
+ brkeys = list(bedfile_dict.keys())
brkeys.sort()
for key in brkeys:
bedfile.write(bedfile_dict[key])
@@ -1173,20 +1178,20 @@ if __name__ == "__main__":
run_command("%s/harvest -q -i %s/parsnp.ggr -S "%(PARSNP_DIR,outputDir)+outputDir+os.sep+"parsnp.snps.mblocks")
command = "%s/ft -nt -quote -gamma -slow -boot 100 "%(PARSNP_DIR)+outputDir+os.sep+"parsnp.snps.mblocks > "+outputDir+os.sep+"parsnp.tree"
- print "-->Reconstructing core genome phylogeny.."
+ print("-->Reconstructing core genome phylogeny..")
run_command(command)
#7)reroot to midpoint
if os.path.exists("outtree"):
os.system("rm outtree")
if reroot_tree and len(finalfiles) > 1:
- #print "-->Midpoint reroot.."
+ #print("-->Midpoint reroot.."
try:
mtree = open("%sparsnp.tree"%(outputDir+os.sep), 'r')
mtreedata = mtree.read()
- mtreedata.replace("\n","")
+ mtreedata = mtreedata.replace("\n","")
tree = dendropy.Tree.get_from_string(mtreedata,"newick")
- tree.reroot_at_midpoint(update_splits=False)
+ tree.reroot_at_midpoint(update_bipartitions=False)
mftreef = tree.as_string('newick').split(" ",1)[1]
#print mftreef
mtreef = open(outputDir+os.sep+"parsnp.final.tree",'w')
@@ -1195,25 +1200,25 @@ if __name__ == "__main__":
os.system("mv %s %s"%(outputDir+os.sep+"parsnp.final.tree",outputDir+os.sep+"parsnp.tree"))
except IOError:
sys.stderr.write( "ERROR: cannot process fasttree output, skipping midpoint reroot..\n")
- print " |->["+OK_GREEN+"OK"+ENDC+"]"
+ print(" |->["+OK_GREEN+"OK"+ENDC+"]")
if 1 or len(use_gingr) > 0:
- print "-->Creating Gingr input file.."
+ print("-->Creating Gingr input file..")
if xtrafast or 1:
#if newick available, add
#new flag to update branch lengths
run_command("%s/harvest --midpoint-reroot -u -q -i "%(PARSNP_DIR)+outputDir+os.sep+"parsnp.ggr -o "+outputDir+os.sep+"parsnp.ggr -n %s"%(outputDir+os.sep+"parsnp.tree "))
- print " |->["+OK_GREEN+"OK"+ENDC+"]"
+ print(" |->["+OK_GREEN+"OK"+ENDC+"]")
- print "-->Calculating wall clock time.. "
+ print("-->Calculating wall clock time.. ")
if float(elapsed)/float(60.0) > 60:
- print " |->"+BOLDME+"Aligned %d genomes in %.2f hours"%(totseqs,float(elapsed)/float(3600.0))+ENDC
+ print(" |->"+BOLDME+"Aligned %d genomes in %.2f hours"%(totseqs,float(elapsed)/float(3600.0))+ENDC)
elif float(elapsed) > 60:
- print " |->"+BOLDME+"Aligned %d genomes in %.2f minutes"%(totseqs,float(elapsed)/float(60.0))+ENDC
+ print(" |->"+BOLDME+"Aligned %d genomes in %.2f minutes"%(totseqs,float(elapsed)/float(60.0))+ENDC)
else:
- print " |->"+BOLDME+"Aligned %d genomes in %.2f seconds"%(totseqs,float(elapsed))+ENDC
+ print(" |->"+BOLDME+"Aligned %d genomes in %.2f seconds"%(totseqs,float(elapsed))+ENDC)
#cleanup
rmfiles = glob.glob(outputDir+os.sep+"*.aln")
#rmfiles2 = glob.glob(outputDir+os.sep+"blocks/b*/*")
@@ -1224,33 +1229,33 @@ if __name__ == "__main__":
os.system("rm -rf %s"%(file))
filepres = 0
- print BOLDME+"\n<<Parsnp finished! All output available in %s>>"%(outputDir)+ENDC
- print
- print BOLDME+"Validating output directory contents..."+ENDC
- print BOLDME+"\t1)parsnp.tree:\t\tnewick format tree"+ENDC,
+ print(BOLDME+"\n<<Parsnp finished! All output available in %s>>"%(outputDir)+ENDC)
+ print("")
+ print(BOLDME+"Validating output directory contents..."+ENDC)
+ print(BOLDME+"\t1)parsnp.tree:\t\tnewick format tree"+ENDC, end =' ')
if os.path.exists("%sparsnp.tree"%(outputDir+os.sep)) and os.path.getsize("%sparsnp.tree"%(outputDir+os.sep)) > 0:
- print "\t\t\t["+OK_GREEN+"OK"+ENDC+"]"
+ print("\t\t\t["+OK_GREEN+"OK"+ENDC+"]")
filepres+=1
else:
- print "\t|->"+ERROR_RED+"MISSING"+ENDC
- print BOLDME+"\t2)parsnp.ggr:\t\tharvest input file for gingr (GUI)"+ENDC,
+ print("\t|->"+ERROR_RED+"MISSING"+ENDC)
+ print(BOLDME+"\t2)parsnp.ggr:\t\tharvest input file for gingr (GUI)"+ENDC, end =' ')
if os.path.exists("%sparsnp.ggr"%(outputDir+os.sep)) and os.path.getsize("%sparsnp.ggr"%(outputDir+os.sep)) > 0:
- print "\t["+OK_GREEN+"OK"+ENDC+"]"
+ print("\t["+OK_GREEN+"OK"+ENDC+"]")
filepres+=1
else:
- print "\t|->"+ERROR_RED+"MISSING"+ENDC
- print BOLDME+"\t3)parsnp.xmfa:\t\tXMFA formatted multi-alignment"+ENDC,
+ print("\t|->"+ERROR_RED+"MISSING"+ENDC)
+ print(BOLDME+"\t3)parsnp.xmfa:\t\tXMFA formatted multi-alignment"+ENDC, end = ' ')
if os.path.exists("%sparsnp.xmfa"%(outputDir+os.sep)) and os.path.getsize("%sparsnp.xmfa"%(outputDir+os.sep)) > 0:
- print "\t\t["+OK_GREEN+"OK"+ENDC+"]"
+ print("\t\t["+OK_GREEN+"OK"+ENDC+"]")
filepres+=1
else:
- print "\t|->"+ERROR_RED+"MISSING"+ENDC
+ print("\t|->"+ERROR_RED+"MISSING"+ENDC)
if filepres == 3:
pass
else:
- print "\t\t["+ERROR_RED+"Output files missing, something went wrong. Check logs and relaunch or contact developers for assistance"+ENDC+"]"
- print
+ print("\t\t["+ERROR_RED+"Output files missing, something went wrong. Check logs and relaunch or contact developers for assistance"+ENDC+"]")
+ print("")
if os.path.exists("%sblocks"%(outputDir+os.sep)):
os.rmdir("%sblocks"%(outputDir+os.sep))
if os.path.exists("allmums.out"):
@@ -1294,6 +1299,6 @@ if __name__ == "__main__":
#check if available first
rc = 0
if binary_type == "osx":
- print ">>Launching gingr.."
+ print(">>Launching gingr..")
os.system("open -n %s --args %s/parsnp.ggr"%(use_gingr,outputDir))
=====================================
build_parsnp_linux.sh
=====================================
@@ -5,4 +5,6 @@ make install
cd ..
./autogen.sh
./configure
+echo "Fix MUSCLE-3.7 linker"
+make LDADD=-lMUSCLE-3.7
make install
=====================================
build_parsnp_osx.sh
=====================================
@@ -7,4 +7,6 @@ make install
cd ..
./autogen.sh
./configure
+echo "Fix MUSCLE-3.7 linker"
+make LDADD=-lMUSCLE-3.7
make install
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+parsnp (1.2.1+dfsg-1) unstable; urgency=medium
+
+ * New upstream version
+ * Standards-Version: 4.4.1
+
+ -- Andreas Tille <tille at debian.org> Thu, 14 Nov 2019 11:27:23 +0100
+
parsnp (1.2+dfsg-6) unstable; urgency=medium
* Use 2to3 to port to Python3
=====================================
debian/control
=====================================
@@ -9,7 +9,7 @@ Build-Depends: debhelper-compat (= 12),
python3-setuptools,
cython3,
libmuscle-dev
-Standards-Version: 4.4.0
+Standards-Version: 4.4.1
Vcs-Browser: https://salsa.debian.org/med-team/parsnp
Vcs-Git: https://salsa.debian.org/med-team/parsnp.git
Homepage: https://harvest.readthedocs.org/en/latest/content/parsnp.html
=====================================
debian/patches/2to3_new.patch
=====================================
@@ -1,7 +1,10 @@
Description: Use 2to3 to port to Python3
Bug-Debian: https://bugs.debian.org/937242
Author: Andreas Tille <tille at debian.org>
-Last-Update: Fri, 13 Sep 2019 12:08:17 +0200
+Last-Update: Thu, 14 Nov 2019 11:25:23 +0100
+Remark: Upstream claimed to have applied the 2to3 patch but the
+ remaining iteritems looks suspicious and thus the patch is remaining
+ here.
--- a/Parsnp.py
+++ b/Parsnp.py
@@ -11,307 +14,7 @@ Last-Update: Fri, 13 Sep 2019 12:08:17 +0200
# See the LICENSE file included with this software for license information.
import os, sys, string, getopt, random,subprocess, time, glob,operator, math, datetime,numpy #pysam
-@@ -94,7 +94,7 @@ else:
- def handler(signum, frame):
- global SIGINT
- SIGINT = True
-- print 'Caught request to terminate by user (CTRL+C), exiting now, bye'
-+ print('Caught request to terminate by user (CTRL+C), exiting now, bye')
- sys.exit(128)
-
- signal.signal(signal.SIGINT, handler)
-@@ -218,79 +218,79 @@ if os.path.exists("%s/MUMmer/nucmer_run"
- ff.close()
-
- def version():
-- print VERSION
-+ print(VERSION)
-
- def usage():
-- print "usage: parsnp [options] [-g|-r|-q](see below) -d <genome_dir> -p <threads>"
-- print ""
-- print "Parsnp quick start for three example scenarios: "
-- print "1) With reference & genbank file: "
-- print " >parsnp -g <reference_genbank_file1,reference_genbank_file2,..> -d <genome_dir> -p <threads> "
-- print ""
-- print "2) With reference but without genbank file:"
-- print " >parsnp -r <reference_genome> -d <genome_dir> -p <threads> "
-- print ""
-- print "3) Autorecruit reference to a draft assembly:"
-- print " >parsnp -q <draft_assembly> -d <genome_db> -p <threads> "
-- print ""
-- print "[Input parameters]"
-- print "<<input/output>>"
-- print " -c = <flag>: (c)urated genome directory, use all genomes in dir and ignore MUMi? (default = NO)"
-- print " -d = <path>: (d)irectory containing genomes/contigs/scaffolds"
-- print " -r = <path>: (r)eference genome (set to ! to pick random one from genome dir)"
-- print " -g = <string>: Gen(b)ank file(s) (gbk), comma separated list (default = None)"
-- print " -o = <string>: output directory? default [./P_CURRDATE_CURRTIME]"
-- print " -q = <path>: (optional) specify (assembled) query genome to use, in addition to genomes found in genome dir (default = NONE)"
-- print ""
-- print "<<MUMi>>"
-- print " -U = <float>: max MUMi distance value for MUMi distribution "
-- print " -M = <flag>: calculate MUMi and exit? overrides all other choices! (default: NO)"
-+ print("usage: parsnp [options] [-g|-r|-q](see below) -d <genome_dir> -p <threads>")
-+ print("")
-+ print("Parsnp quick start for three example scenarios: ")
-+ print("1) With reference & genbank file: ")
-+ print(" >parsnp -g <reference_genbank_file1,reference_genbank_file2,..> -d <genome_dir> -p <threads> ")
-+ print("")
-+ print("2) With reference but without genbank file:")
-+ print(" >parsnp -r <reference_genome> -d <genome_dir> -p <threads> ")
-+ print("")
-+ print("3) Autorecruit reference to a draft assembly:")
-+ print(" >parsnp -q <draft_assembly> -d <genome_db> -p <threads> ")
-+ print("")
-+ print("[Input parameters]")
-+ print("<<input/output>>")
-+ print(" -c = <flag>: (c)urated genome directory, use all genomes in dir and ignore MUMi? (default = NO)")
-+ print(" -d = <path>: (d)irectory containing genomes/contigs/scaffolds")
-+ print(" -r = <path>: (r)eference genome (set to ! to pick random one from genome dir)")
-+ print(" -g = <string>: Gen(b)ank file(s) (gbk), comma separated list (default = None)")
-+ print(" -o = <string>: output directory? default [./P_CURRDATE_CURRTIME]")
-+ print(" -q = <path>: (optional) specify (assembled) query genome to use, in addition to genomes found in genome dir (default = NONE)")
-+ print("")
-+ print("<<MUMi>>")
-+ print(" -U = <float>: max MUMi distance value for MUMi distribution ")
-+ print(" -M = <flag>: calculate MUMi and exit? overrides all other choices! (default: NO)")
- #new, mutually exclusive
-- print " -i = <float>: max MUM(i) distance (default: autocutoff based on distribution of MUMi values)"
-- print ""
-- print "<<MUM search>>"
-+ print(" -i = <float>: max MUM(i) distance (default: autocutoff based on distribution of MUMi values)")
-+ print("")
-+ print("<<MUM search>>")
- #new, default to lower, 12-17
-- print " -a = <int>: min (a)NCHOR length (default = 1.1*Log(S))"
-- print " -C = <int>: maximal cluster D value? (default=100)"
-- print " -z = <path>: min LCB si(z)e? (default = 25)"
-- print ""
-- print "<<LCB alignment>>"
-- print " -D = <float>: maximal diagonal difference? Either percentage (e.g. 0.2) or bp (e.g. 100bp) (default = 0.12)"
-- print " -e = <flag> greedily extend LCBs? experimental! (default = NO)"
-- print " -n = <string>: alignment program (default: libMUSCLE)"
-- print " -u = <flag>: output unaligned regions? .unaligned (default: NO)"
-- print ""
-- print "<<Recombination filtration>>"
-+ print(" -a = <int>: min (a)NCHOR length (default = 1.1*Log(S))")
-+ print(" -C = <int>: maximal cluster D value? (default=100)")
-+ print(" -z = <path>: min LCB si(z)e? (default = 25)")
-+ print("")
-+ print("<<LCB alignment>>")
-+ print(" -D = <float>: maximal diagonal difference? Either percentage (e.g. 0.2) or bp (e.g. 100bp) (default = 0.12)")
-+ print(" -e = <flag> greedily extend LCBs? experimental! (default = NO)")
-+ print(" -n = <string>: alignment program (default: libMUSCLE)")
-+ print(" -u = <flag>: output unaligned regions? .unaligned (default: NO)")
-+ print("")
-+ print("<<Recombination filtration>>")
- #new, default is OFF
-- print " -x = <flag>: enable filtering of SNPs located in PhiPack identified regions of recombination? (default: NO)"
-- print ""
-- print "<<Misc>>"
-- print " -h = <flag>: (h)elp: print this message and exit"
-- print " -p = <int>: number of threads to use? (default= 1)"
-- print " -P = <int>: max partition size? limits memory usage (default= 15000000)"
-- print " -v = <flag>: (v)erbose output? (default = NO)"
-- print " -V = <flag>: output (V)ersion and exit"
-- print ""
-+ print(" -x = <flag>: enable filtering of SNPs located in PhiPack identified regions of recombination? (default: NO)")
-+ print("")
-+ print("<<Misc>>")
-+ print(" -h = <flag>: (h)elp: print this message and exit")
-+ print(" -p = <int>: number of threads to use? (default= 1)")
-+ print(" -P = <int>: max partition size? limits memory usage (default= 15000000)")
-+ print(" -v = <flag>: (v)erbose output? (default = NO)")
-+ print(" -V = <flag>: output (V)ersion and exit")
-+ print("")
-
- #hidden, not yet supported options
--#print "-q = <path>: (optional) specify (assembled) query genome to use, in addition to genomes found in genome dir (default = NONE)"
--#print "-s = <flag>: (s)plit genomes by n's (default = NO)"
--#print "-z = <path>: min cluster si(z)e? (default = 10)"
--#print "-F = <flag>: fast MUMi calc? (default=NO)"
--#print "-g = <bool>: auto-launch (g)ingr? (default = NO)"
-+#print("-q = <path>: (optional) specify (assembled) query genome to use, in addition to genomes found in genome dir (default = NONE)")
-+#print("-s = <flag>: (s)plit genomes by n's (default = NO)")
-+#print("-z = <path>: min cluster si(z)e? (default = 10)")
-+#print("-F = <flag>: fast MUMi calc? (default=NO)")
-+#print("-g = <bool>: auto-launch (g)ingr? (default = NO)")
-
-
- if __name__ == "__main__":
- parsnp_dir= sys.path[0]
-- #print parsnp_dir
-+ #print(parsnp_dir)
- #PARSNP_DIR = parsnp_dir
- opts = []
- args = []
- try:
- opts, args = getopt.getopt(sys.argv[1:], "hxved:C:F:D:i:g:m:MU:o:a:cln:p:P:q:r:Rsz:uV", ["help","xtrafast","verbose","extend","sequencedir","clusterD","DiagonalDiff","iniFile","genbank","mumlength","onlymumi","MUMi","outputDir","anchorlength","curated","layout","aligNmentprog","threads","max-partition-size","query","reference","nofiltreps","split","minclustersiZe","unaligned","version"])
-- except getopt.GetoptError, err:
-+ except getopt.GetoptError as err:
- # print help information and exit:
-- print str(err)
-+ print(str(err))
- usage()
- sys.exit(2)
-
-@@ -346,7 +346,7 @@ if __name__ == "__main__":
- usage()
- sys.exit(0)
- elif o in ("-R","--filtreps"):
-- print "WARNING: -R option is no longer supported, ignoring. Please see harvest.readthedocs.org for bed filtering w/ harvesttools"
-+ print("WARNING: -R option is no longer supported, ignoring. Please see harvest.readthedocs.org for bed filtering w/ harvesttools")
- filtreps = False
- elif o in ("-r","--reference"):
- ref = a
-@@ -436,7 +436,7 @@ if __name__ == "__main__":
- genbank_ref1.write(">gi|"+giline.split("GI:")[-1])
- ntdata = False
- data = ""
-- for line in rf.xreadlines():
-+ for line in rf:
- if ntdata:
- if "//" in line:
- ntdata = False
-@@ -549,20 +549,20 @@ if __name__ == "__main__":
- ref = query
-
- if 1:
-- print (len(outputDir)+17)*"*"
-- print BOLDME+"SETTINGS:"+ENDC
-+ print((len(outputDir)+17)*"*")
-+ print(BOLDME+"SETTINGS:"+ENDC)
- if ref != "!":
-- print "|-"+BOLDME+"refgenome:\t%s"%(ref)+ENDC
-+ print("|-"+BOLDME+"refgenome:\t%s"%(ref)+ENDC)
- else:
-- print "|-"+BOLDME+"refgenome:\t%s"%("autopick")+ENDC
-- print "|-"+BOLDME+"aligner:\tlibMUSCLE"+ENDC
-- print "|-"+BOLDME+"seqdir:\t%s"%(seqdir)+ENDC
-- print "|-"+BOLDME+"outdir:\t%s"%(outputDir)+ENDC
-- print "|-"+BOLDME+"OS:\t\t%s"%(OSTYPE)+ENDC
-- print "|-"+BOLDME+"threads:\t%s"%(threads)+ENDC
-- print (len(outputDir)+17)*"*"
-+ print("|-"+BOLDME+"refgenome:\t%s"%("autopick")+ENDC)
-+ print("|-"+BOLDME+"aligner:\tlibMUSCLE"+ENDC)
-+ print("|-"+BOLDME+"seqdir:\t%s"%(seqdir)+ENDC)
-+ print("|-"+BOLDME+"outdir:\t%s"%(outputDir)+ENDC)
-+ print("|-"+BOLDME+"OS:\t\t%s"%(OSTYPE)+ENDC)
-+ print("|-"+BOLDME+"threads:\t%s"%(threads)+ENDC)
-+ print((len(outputDir)+17)*"*")
-
-- print "\n<<Parsnp started>>\n"
-+ print("\n<<Parsnp started>>\n")
-
- #1)read fasta files (contigs/scaffolds/finished/DBs/dirs)
- sys.stderr.write( "-->Reading Genome (asm, fasta) files from %s..\n"%(seqdir))
-@@ -605,7 +605,7 @@ if __name__ == "__main__":
- for char in special_chars:
- if char in file:
-
-- print "WARNING: File %s contains a non-supported special character (\'%s\') in file name. Please remove if you'd like to include. For best practices see: http://support.apple.com/en-us/HT202808"%(file,char)
-+ print("WARNING: File %s contains a non-supported special character (\'%s\') in file name. Please remove if you'd like to include. For best practices see: http://support.apple.com/en-us/HT202808"%(file,char))
- nameok = False
- break
- if nameok:
-@@ -632,7 +632,7 @@ if __name__ == "__main__":
-
- for char in special_chars:
- if char in file:
-- #print "WARNING: File %s contains a non-supported special character (%s) in file name. Please remove if you'd like to include. For best practices see: http://support.apple.com/en-us/HT202808"%(file,char)
-+ #print("WARNING: File %s contains a non-supported special character (%s) in file name. Please remove if you'd like to include. For best practices see: http://support.apple.com/en-us/HT202808"%(file,char))
- nameok = False
- break
- if not nameok:
-@@ -644,7 +644,7 @@ if __name__ == "__main__":
-
- data = []
- totlen = 0
-- for line in ff.xreadlines():
-+ for line in ff:
- if line[0] != ">":
- data.append(line.replace("\n",""))
- if "-" in line:
-@@ -690,7 +690,7 @@ if __name__ == "__main__":
- continue
- seq_dict[hdr] = nt
- seq_len[hdr] = len(nt.replace("\n",""))
-- seq_len_sort = sorted(seq_len.iteritems(), key=operator.itemgetter(1))
-+ seq_len_sort = sorted(iter(seq_len.items()), key=operator.itemgetter(1))
- seq_len_sort.reverse()
- ffo = open("%s"%(outputDir+os.sep+ref.split(os.sep)[-1]+".ref"),'w')
- for item in seq_len_sort:
-@@ -750,7 +750,7 @@ if __name__ == "__main__":
- if not inifile_exists:
- if len(fnafiles) < 1 or ref == "":
- sys.stderr.write( "Parsnp requires 2 or more genomes to run, exiting\n")
-- print fnafiles, ref
-+ print(fnafiles, ref)
- sys.exit(0)
-
- file_string = ""
-@@ -779,7 +779,7 @@ if __name__ == "__main__":
- run_command(command)
- try:
- mumif = open(outputDir+os.sep+"all.mumi",'r')
-- for line in mumif.xreadlines():
-+ for line in mumif:
- line = line.replace("\n","")
- try:
- idx,mi = line.split(":")
-@@ -791,13 +791,13 @@ if __name__ == "__main__":
- i = 0
- for file in fnafiles:
- mumi_dict[i] = 1
-- print " |->["+OK_GREEN+"OK"+ENDC+"]"
-+ print(" |->["+OK_GREEN+"OK"+ENDC+"]")
- finalfiles = []
- lowest_mumi = 100
- auto_ref = ""
-
- if autopick_ref:
-- for idx in mumi_dict.keys():
-+ for idx in list(mumi_dict.keys()):
- if mumi_dict[idx] < lowest_mumi:
- auto_ref = seqdir+os.sep+fnafiles[idx]
- ref = auto_ref
-@@ -813,9 +813,9 @@ if __name__ == "__main__":
- mumi_f = open(outputDir+os.sep+"recruited_genomes.lst",'w')
-
- if VERBOSE:
-- print "RECRUITED GENOMES:\n"
-+ print("RECRUITED GENOMES:\n")
-
-- sorted_x = sorted(mumi_dict.iteritems(), key=operator.itemgetter(1))
-+ sorted_x = sorted(iter(mumi_dict.items()), key=operator.itemgetter(1))
- scnt = 0
- mumivals = []
- for item in sorted_x:
-@@ -838,7 +838,7 @@ if __name__ == "__main__":
- hpv = minv+(3*stdv)
-
-
-- for idx in mumi_dict.keys():
-+ for idx in list(mumi_dict.keys()):
- if mumi_dict[idx] < (float(mumidistance)) or curated:
- if fastmum and mumi_dict[idx] > hpv:
- continue
-@@ -846,11 +846,11 @@ if __name__ == "__main__":
- if mumi_only:
- mumi_f.write(os.path.abspath(seqdir+os.sep+fnafiles[idx])+",%f"%(mumi_dict[idx])+"\n")
- if VERBOSE:
-- print "\t"+fnafiles[idx]
-+ print("\t"+fnafiles[idx])
- finalfiles.append(fnafiles[idx])
- allfiles.append(fnafiles[idx])
- if VERBOSE:
-- print
-+ print()
-
- if curated:
- for file in fnafiles:
-@@ -878,7 +878,7 @@ if __name__ == "__main__":
+@@ -883,7 +883,7 @@ if __name__ == "__main__":
continue
seq_dict[hdr] = nt
seq_len[hdr] = len(nt.replace("\n",""))
@@ -320,286 +23,3 @@ Last-Update: Fri, 13 Sep 2019 12:08:17 +0200
seq_len_sort.reverse()
ffo = open("%s"%(outputDir+os.sep+auto_ref.split(os.sep)[-1]+".ref"),'w')
for item in seq_len_sort:
-@@ -888,8 +888,8 @@ if __name__ == "__main__":
- ffo.close()
- auto_ref = outputDir+os.sep+auto_ref.split(os.sep)[-1]+".ref"
- ref = auto_ref
-- #print ref
-- #print ref
-+ #print(ref)
-+ #print(ref)
- inifiled_closest = inifiled
- if not inifile_exists:
- if len(finalfiles) < 1 or ref == "":
-@@ -934,7 +934,7 @@ if __name__ == "__main__":
-
-
- #3)run parsnp (cores, grid?)
-- print "-->Running Parsnp multi-MUM search and libMUSCLE aligner.."
-+ print("-->Running Parsnp multi-MUM search and libMUSCLE aligner..")
- if not os.path.exists(outputDir+os.sep+"blocks"):
- os.mkdir(outputDir+os.sep+"blocks")
- command = ""
-@@ -983,7 +983,7 @@ if __name__ == "__main__":
- totseqs = 0
- try:
- cf = open("%sparsnpAligner.log"%(outputDir+os.sep))
-- for line in cf.xreadlines():
-+ for line in cf:
- if "Total coverage among all sequences:" in line:
- coverage = line.split(":",1)[-1].replace("\n","")
- coverage = float(coverage.replace("%",""))/100.0
-@@ -991,7 +991,7 @@ if __name__ == "__main__":
- totlength += int(line.split(":",1)[-1].replace("\n","").split("bps")[0])
- totseqs +=1
- except IOError:
-- print ERROR_RED+"parsnpAligner.log missing, parsnpAligner failed, exiting.."+ENDC
-+ print(ERROR_RED+"parsnpAligner.log missing, parsnpAligner failed, exiting.."+ENDC)
- sys.exit(1)
-
- #update thresholds
-@@ -1002,10 +1002,10 @@ if __name__ == "__main__":
- sys.stderr.write( " |->["+WARNING_YELLOW+"WARNING"+ENDC+"]"+": aligned regions cover less than 10% of reference genome! please verify recruited genomes are all strain of interest"+ENDC)
- else:
- pass
-- print " |->["+OK_GREEN+"OK"+ENDC+"]"
-+ print(" |->["+OK_GREEN+"OK"+ENDC+"]")
- t2 = time.time()
- elapsed = float(t2)-float(t1)
-- #print "-->Getting list of LCBs.."
-+ #print("-->Getting list of LCBs..")
- allbfiles = glob.glob(outputDir+os.sep+"blocks/b*/*")
- blockfiles = []
- icnt = 0
-@@ -1047,7 +1047,7 @@ if __name__ == "__main__":
- recombination_sites = {}
- bedfile = ""
- bedfile_dict = {}
-- print "-->Running PhiPack on LCBs to detect recombination.."
-+ print("-->Running PhiPack on LCBs to detect recombination..")
- if run_recomb_filter and len(blockfiles) > 0:
-
- bedfile = open("%s/parsnp.rec"%(outputDir),'w')
-@@ -1078,7 +1078,7 @@ if __name__ == "__main__":
-
- #run parallelPhiPack
- pool = Pool(processes=int(threads))
-- result = pool.map_async(parallelPhiWrapper,tasks).get(sys.maxint)
-+ result = pool.map_async(parallelPhiWrapper,tasks).get(sys.maxsize)
-
- for i in result:
- if (i["status"] == 1):
-@@ -1113,7 +1113,7 @@ if __name__ == "__main__":
- bedfile_dict[srpos] = "1\t%s\t%s\tREC\t%.3f\t+\n"%(srpos,pos+50+block_spos,eval)
- else:
- chrnum = 1
-- chr_spos = ref_seqs.keys()
-+ chr_spos = list(ref_seqs.keys())
- for cs in chr_spos:
- if block_spos < chr_spos:
- chrnum = ref_seqs[cs]
-@@ -1129,7 +1129,7 @@ if __name__ == "__main__":
-
- pool.close()
- pool.join()
-- brkeys = bedfile_dict.keys()
-+ brkeys = list(bedfile_dict.keys())
- brkeys.sort()
- for key in brkeys:
- bedfile.write(bedfile_dict[key])
-@@ -1160,14 +1160,14 @@ if __name__ == "__main__":
- run_command("harvesttools -q -i %s/parsnp.ggr -S "%(outputDir)+outputDir+os.sep+"parsnp.snps.mblocks")
-
- command = "fasttree -nt -quote -gamma -slow -boot 100 "+outputDir+os.sep+"parsnp.snps.mblocks > "+outputDir+os.sep+"parsnp.tree"
-- print "-->Reconstructing core genome phylogeny.."
-+ print("-->Reconstructing core genome phylogeny..")
- run_command(command)
- #7)reroot to midpoint
- if os.path.exists("outtree"):
- os.system("rm outtree")
-
- if reroot_tree and len(finalfiles) > 1:
-- #print "-->Midpoint reroot.."
-+ #print("-->Midpoint reroot..")
- try:
- mtree = open("%sparsnp.tree"%(outputDir+os.sep), 'r')
- mtreedata = mtree.read()
-@@ -1175,32 +1175,32 @@ if __name__ == "__main__":
- tree = dendropy.Tree.get_from_string(mtreedata,"newick")
- tree.reroot_at_midpoint(update_splits=False)
- mftreef = tree.as_string('newick').split(" ",1)[1]
-- #print mftreef
-+ #print(mftreef)
- mtreef = open(outputDir+os.sep+"parsnp.final.tree",'w')
- mtreef.write(mftreef)
- mtreef.close()
- os.system("mv %s %s"%(outputDir+os.sep+"parsnp.final.tree",outputDir+os.sep+"parsnp.tree"))
- except IOError:
- sys.stderr.write( "ERROR: cannot process fasttree output, skipping midpoint reroot..\n")
-- print " |->["+OK_GREEN+"OK"+ENDC+"]"
-+ print(" |->["+OK_GREEN+"OK"+ENDC+"]")
-
-
- if 1 or len(use_gingr) > 0:
-- print "-->Creating Gingr input file.."
-+ print("-->Creating Gingr input file..")
- if xtrafast or 1:
- #if newick available, add
- #new flag to update branch lengths
- run_command("harvesttools --midpoint-reroot -u -q -i "+outputDir+os.sep+"parsnp.ggr -o "+outputDir+os.sep+"parsnp.ggr -n %s"%(outputDir+os.sep+"parsnp.tree "))
-
-- print " |->["+OK_GREEN+"OK"+ENDC+"]"
-+ print(" |->["+OK_GREEN+"OK"+ENDC+"]")
-
-- print "-->Calculating wall clock time.. "
-+ print("-->Calculating wall clock time.. ")
- if float(elapsed)/float(60.0) > 60:
-- print " |->"+BOLDME+"Aligned %d genomes in %.2f hours"%(totseqs,float(elapsed)/float(3600.0))+ENDC
-+ print(" |->"+BOLDME+"Aligned %d genomes in %.2f hours"%(totseqs,float(elapsed)/float(3600.0))+ENDC)
- elif float(elapsed) > 60:
-- print " |->"+BOLDME+"Aligned %d genomes in %.2f minutes"%(totseqs,float(elapsed)/float(60.0))+ENDC
-+ print(" |->"+BOLDME+"Aligned %d genomes in %.2f minutes"%(totseqs,float(elapsed)/float(60.0))+ENDC)
- else:
-- print " |->"+BOLDME+"Aligned %d genomes in %.2f seconds"%(totseqs,float(elapsed))+ENDC
-+ print(" |->"+BOLDME+"Aligned %d genomes in %.2f seconds"%(totseqs,float(elapsed))+ENDC)
- #cleanup
- rmfiles = glob.glob(outputDir+os.sep+"*.aln")
- #rmfiles2 = glob.glob(outputDir+os.sep+"blocks/b*/*")
-@@ -1211,33 +1211,33 @@ if __name__ == "__main__":
- os.system("rm -rf %s"%(file))
-
- filepres = 0
-- print BOLDME+"\n<<Parsnp finished! All output available in %s>>"%(outputDir)+ENDC
-- print
-- print BOLDME+"Validating output directory contents..."+ENDC
-- print BOLDME+"\t1)parsnp.tree:\t\tnewick format tree"+ENDC,
-+ print(BOLDME+"\n<<Parsnp finished! All output available in %s>>"%(outputDir)+ENDC)
-+ print()
-+ print(BOLDME+"Validating output directory contents..."+ENDC)
-+ print(BOLDME+"\t1)parsnp.tree:\t\tnewick format tree"+ENDC, end=' ')
- if os.path.exists("%sparsnp.tree"%(outputDir+os.sep)) and os.path.getsize("%sparsnp.tree"%(outputDir+os.sep)) > 0:
-- print "\t\t\t["+OK_GREEN+"OK"+ENDC+"]"
-+ print("\t\t\t["+OK_GREEN+"OK"+ENDC+"]")
- filepres+=1
- else:
-- print "\t|->"+ERROR_RED+"MISSING"+ENDC
-- print BOLDME+"\t2)parsnp.ggr:\t\tharvest input file for gingr (GUI)"+ENDC,
-+ print("\t|->"+ERROR_RED+"MISSING"+ENDC)
-+ print(BOLDME+"\t2)parsnp.ggr:\t\tharvest input file for gingr (GUI)"+ENDC, end=' ')
- if os.path.exists("%sparsnp.ggr"%(outputDir+os.sep)) and os.path.getsize("%sparsnp.ggr"%(outputDir+os.sep)) > 0:
-- print "\t["+OK_GREEN+"OK"+ENDC+"]"
-+ print("\t["+OK_GREEN+"OK"+ENDC+"]")
- filepres+=1
- else:
-- print "\t|->"+ERROR_RED+"MISSING"+ENDC
-- print BOLDME+"\t3)parsnp.xmfa:\t\tXMFA formatted multi-alignment"+ENDC,
-+ print("\t|->"+ERROR_RED+"MISSING"+ENDC)
-+ print(BOLDME+"\t3)parsnp.xmfa:\t\tXMFA formatted multi-alignment"+ENDC, end=' ')
- if os.path.exists("%sparsnp.xmfa"%(outputDir+os.sep)) and os.path.getsize("%sparsnp.xmfa"%(outputDir+os.sep)) > 0:
-- print "\t\t["+OK_GREEN+"OK"+ENDC+"]"
-+ print("\t\t["+OK_GREEN+"OK"+ENDC+"]")
- filepres+=1
- else:
-- print "\t|->"+ERROR_RED+"MISSING"+ENDC
-+ print("\t|->"+ERROR_RED+"MISSING"+ENDC)
- if filepres == 3:
- pass
-
- else:
-- print "\t\t["+ERROR_RED+"Output files missing, something went wrong. Check logs and relaunch or contact developers for assistance"+ENDC+"]"
-- print
-+ print("\t\t["+ERROR_RED+"Output files missing, something went wrong. Check logs and relaunch or contact developers for assistance"+ENDC+"]")
-+ print()
- if os.path.exists("%sblocks"%(outputDir+os.sep)):
- os.rmdir("%sblocks"%(outputDir+os.sep))
- if os.path.exists("allmums.out"):
-@@ -1281,6 +1281,6 @@ if __name__ == "__main__":
- #check if available first
- rc = 0
- if binary_type == "osx":
-- print ">>Launching gingr.."
-+ print(">>Launching gingr..")
- os.system("open -n %s --args %s/parsnp.ggr"%(use_gingr,outputDir))
-
---- a/install.py
-+++ b/install.py
-@@ -1,12 +1,12 @@
- import os,sys,string
- #parsnp basic INSTALL script
- user_home = os.environ["HOME"]
--print "<<Welcome to Parsnp utility script install>>"
-+print("<<Welcome to Parsnp utility script install>>")
-
- #check for python version
- if (sys.version_info[0] < 2) or (sys.version_info[0] == 2 and sys.version_info[1] < 6):
-
-- print "Python version is %s. Parsnp requires at least 2.6"%(sys.version)
-+ print("Python version is %s. Parsnp requires at least 2.6"%(sys.version))
- sys.exit(1)
-
- #complete shebang
---- a/script/shuffle.py
-+++ b/script/shuffle.py
-@@ -51,7 +51,7 @@ if __name__ == "__main__":
- rnumber = 0
- shuffleperseq = 1
- if len(sys.argv) < 6:
-- print "\nUsage: shuffleGenome <FastA input file> <output file> <Rearrangement size> <Number of rearrangements> <Shuffled sequences per seq>"
-+ print("\nUsage: shuffleGenome <FastA input file> <output file> <Rearrangement size> <Number of rearrangements> <Shuffled sequences per seq>")
- sys.exit(1)
- else:
- infileName = sys.argv[1]
-@@ -77,22 +77,22 @@ if __name__ == "__main__":
- header = infile.readline()
- infiled = infile.read()
- filelen = len(infiled)
-- print "\nInput sequence:%s"%seqfile
-- print "Input sequence length: %d\n"%filelen
-+ print("\nInput sequence:%s"%seqfile)
-+ print("Input sequence length: %d\n"%filelen)
-
- #split genome into filelen/rsize parts
- partlist = []
- pos = 0
- if rsize > filelen:
- rsize = filelen
-- for part in xrange(0,filelen/rsize):
-+ for part in range(0,filelen/rsize):
- partlist.append(infiled[pos:(part+1)*rsize])
- pos +=rsize
-
- partlist.append(infiled[pos:])
-
-
-- for shuffleit in xrange(0,shuffleperseq):
-+ for shuffleit in range(0,shuffleperseq):
- count = 0
- seq = ""
- parttemp = ""
-@@ -114,13 +114,13 @@ if __name__ == "__main__":
- parttemp = partlist[part1]
- partlist[part1] = partlist[part2]
- partlist[part2] = parttemp
-- print "Transposition"
-- print " Positions %d and %d swapped"%(part1,part2)
-+ print("Transposition")
-+ print(" Positions %d and %d swapped"%(part1,part2))
- elif operation == 2:
- #inversion
- partlist[part1] = invertSeq(partlist[part1])
-- print "Inversion"
-- print " Position %d"%part1
-+ print("Inversion")
-+ print(" Position %d"%part1)
-
-
-
-@@ -137,7 +137,7 @@ if __name__ == "__main__":
- #write output to file
- fname = seqfile[:-4]
- fname+= "_Shuffled_%d.fna"%(shuffleit+1)
-- print "output: %s"%(fname)
-+ print("output: %s"%(fname))
-
- fout = open(fname,'w')
- fout.write(output)
=====================================
debian/patches/proper_calls_to_tools.patch
=====================================
@@ -49,7 +49,7 @@ Description: Fix path to Debian installed tools
#set MUMmer paths
if os.path.exists("%s/MUMmer/nucmer_run"%(PARSNP_DIR)):
ff = open("%s/MUMmer/nucmer_run"%(PARSNP_DIR))
-@@ -784,12 +770,12 @@ if __name__ == "__main__":
+@@ -789,12 +775,12 @@ if __name__ == "__main__":
if use_parsnp_mumi and not curated:
sys.stderr.write( "-->Calculating MUMi..\n")
if not inifile_exists:
@@ -64,7 +64,7 @@ Description: Fix path to Debian installed tools
run_command(command)
try:
mumif = open(outputDir+os.sep+"all.mumi",'r')
-@@ -962,14 +948,14 @@ if __name__ == "__main__":
+@@ -967,14 +953,14 @@ if __name__ == "__main__":
if command == "" and xtrafast and 0:
command = "%s/parsnpA_fast %sparsnpAligner.ini"%(PARSNP_DIR,outputDir+os.sep)
elif command == "":
@@ -82,7 +82,7 @@ Description: Fix path to Debian installed tools
run_command(command)
-@@ -1159,21 +1145,21 @@ if __name__ == "__main__":
+@@ -1164,21 +1150,21 @@ if __name__ == "__main__":
if xtrafast or 1:
#add genbank here, if present
if len(genbank_ref) != 0:
@@ -107,15 +107,15 @@ Description: Fix path to Debian installed tools
- command = "%s/ft -nt -quote -gamma -slow -boot 100 "%(PARSNP_DIR)+outputDir+os.sep+"parsnp.snps.mblocks > "+outputDir+os.sep+"parsnp.tree"
+ command = "fasttree -nt -quote -gamma -slow -boot 100 "+outputDir+os.sep+"parsnp.snps.mblocks > "+outputDir+os.sep+"parsnp.tree"
- print "-->Reconstructing core genome phylogeny.."
+ print("-->Reconstructing core genome phylogeny..")
run_command(command)
#7)reroot to midpoint
-@@ -1204,7 +1190,7 @@ if __name__ == "__main__":
+@@ -1209,7 +1195,7 @@ if __name__ == "__main__":
if xtrafast or 1:
#if newick available, add
#new flag to update branch lengths
- run_command("%s/harvest --midpoint-reroot -u -q -i "%(PARSNP_DIR)+outputDir+os.sep+"parsnp.ggr -o "+outputDir+os.sep+"parsnp.ggr -n %s"%(outputDir+os.sep+"parsnp.tree "))
+ run_command("harvesttools --midpoint-reroot -u -q -i "+outputDir+os.sep+"parsnp.ggr -o "+outputDir+os.sep+"parsnp.ggr -n %s"%(outputDir+os.sep+"parsnp.tree "))
- print " |->["+OK_GREEN+"OK"+ENDC+"]"
+ print(" |->["+OK_GREEN+"OK"+ENDC+"]")
=====================================
install.py
=====================================
@@ -1,12 +1,12 @@
import os,sys,string
#parsnp basic INSTALL script
user_home = os.environ["HOME"]
-print "<<Welcome to Parsnp utility script install>>"
+print("<<Welcome to Parsnp utility script install>>")
#check for python version
-if (sys.version_info[0] < 2) or (sys.version_info[0] == 2 and sys.version_info[1] < 6):
+if (sys.version_info[0] < 3) or (sys.version_info[0] == 3 and sys.version_info[1] < 1):
- print "Python version is %s. Parsnp requires at least 2.6"%(sys.version)
+ print("Python version is %s. Parsnp requires at least 3.1"%(sys.version))
sys.exit(1)
#complete shebang
@@ -16,4 +16,4 @@ scripts = ["parsnp.py"]
#copy to currdir
files = os.listdir(".")
for script in scripts:
- os.system("mv %s %s"%(script,script.replace(".py","")))
\ No newline at end of file
+ os.system("mv %s %s"%(script,script.replace(".py","")))
=====================================
script/shuffle.py
=====================================
@@ -51,7 +51,7 @@ if __name__ == "__main__":
rnumber = 0
shuffleperseq = 1
if len(sys.argv) < 6:
- print "\nUsage: shuffleGenome <FastA input file> <output file> <Rearrangement size> <Number of rearrangements> <Shuffled sequences per seq>"
+ print("\nUsage: shuffleGenome <FastA input file> <output file> <Rearrangement size> <Number of rearrangements> <Shuffled sequences per seq>")
sys.exit(1)
else:
infileName = sys.argv[1]
@@ -77,22 +77,22 @@ if __name__ == "__main__":
header = infile.readline()
infiled = infile.read()
filelen = len(infiled)
- print "\nInput sequence:%s"%seqfile
- print "Input sequence length: %d\n"%filelen
+ print("\nInput sequence:%s"%seqfile)
+ print("Input sequence length: %d\n"%filelen)
#split genome into filelen/rsize parts
partlist = []
pos = 0
if rsize > filelen:
rsize = filelen
- for part in xrange(0,filelen/rsize):
+ for part in range(0,filelen/rsize):
partlist.append(infiled[pos:(part+1)*rsize])
pos +=rsize
partlist.append(infiled[pos:])
- for shuffleit in xrange(0,shuffleperseq):
+ for shuffleit in range(0,shuffleperseq):
count = 0
seq = ""
parttemp = ""
@@ -114,13 +114,13 @@ if __name__ == "__main__":
parttemp = partlist[part1]
partlist[part1] = partlist[part2]
partlist[part2] = parttemp
- print "Transposition"
- print " Positions %d and %d swapped"%(part1,part2)
+ print("Transposition")
+ print(" Positions %d and %d swapped"%(part1,part2))
elif operation == 2:
#inversion
partlist[part1] = invertSeq(partlist[part1])
- print "Inversion"
- print " Position %d"%part1
+ print("Inversion")
+ print(" Position %d"%part1)
@@ -137,7 +137,7 @@ if __name__ == "__main__":
#write output to file
fname = seqfile[:-4]
fname+= "_Shuffled_%d.fna"%(shuffleit+1)
- print "output: %s"%(fname)
+ print("output: %s"%(fname))
fout = open(fname,'w')
fout.write(output)
View it on GitLab: https://salsa.debian.org/med-team/parsnp/compare/e25203ae06920c733af26b91acda6ea6230f402a...87af9db0b3b158ad7e42635f52a1e138cf988260
--
View it on GitLab: https://salsa.debian.org/med-team/parsnp/compare/e25203ae06920c733af26b91acda6ea6230f402a...87af9db0b3b158ad7e42635f52a1e138cf988260
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20191114/440b6707/attachment-0001.html>
More information about the debian-med-commit
mailing list