[med-svn] [Git][med-team/parsnp][upstream] New upstream version 1.6.1+dfsg
Nilesh Patra (@nilesh)
gitlab at salsa.debian.org
Wed Feb 16 17:58:02 GMT 2022
Nilesh Patra pushed to branch upstream at Debian Med / parsnp
Commits:
67776d17 by Nilesh Patra at 2022-02-16T17:39:46+00:00
New upstream version 1.6.1+dfsg
- - - - -
5 changed files:
- − .travis.yml
- README.md
- configure.ac
- parsnp
- src/Makefile.am
Changes:
=====================================
.travis.yml deleted
=====================================
@@ -1,13 +0,0 @@
-language: cpp
-compiler:
-- gcc
-script:
-- if [ $TRAVIS_OS_NAME == linux ]; then ./build_parsnp_linux.sh; fi
-notifications:
- email:
- recipients:
- - treangent at nbacc.net
- on_success: change
- on_failure: change
-os:
-- linux
=====================================
README.md
=====================================
@@ -51,10 +51,10 @@ Recent OSX have a Gatekeeper, that's designed to ensure that only softwre from k
# Running Parsnp
Parsnp can be run multiple ways, but the most common is with a set of genomes and a reference.
```
-parsnp -g <reference_genbank> -d <genomes
+parsnp -g <reference_genbank> -d <genomes>
```
```
-parsnp -r <reference_fasta> -d <genomes
+parsnp -r <reference_fasta> -d <genomes>
```
For example,
```
=====================================
configure.ac
=====================================
@@ -1,5 +1,5 @@
-AC_INIT(parsnp,1.5.3)
-AM_INIT_AUTOMAKE(parsnp,1.5.3)
+AC_INIT(parsnp,1.6.1)
+AM_INIT_AUTOMAKE(parsnp,1.6.1)
AC_PROG_CC(gcc)
AC_ARG_WITH(libmuscle, [ --with-libmuscle=<path/to/libmuscle> libMUSCLE install dir (default: `pwd`/muscle)])
=====================================
parsnp
=====================================
@@ -14,7 +14,7 @@ import signal
import inspect
from multiprocessing import *
-__version__ = "1.5.6"
+__version__ = "1.6.1"
reroot_tree = True #use --midpoint-reroot
try:
@@ -471,6 +471,14 @@ def parse_args():
# help = "Enable filtering of SNPs located in PhiPack identified regions of recombination")
misc_args = parser.add_argument_group("Misc")
+ misc_args.add_argument(
+ "--skip-phylogeny",
+ action="store_true",
+ help="Do not generate phylogeny from core SNPs")
+ misc_args.add_argument(
+ "--validate-input",
+ action="store_true",
+ help="Use Biopython to validate input files")
misc_args.add_argument(
"--use-fasttree",
action = "store_true",
@@ -642,6 +650,17 @@ if __name__ == "__main__":
if len(input_files) < 2:
logger.critical("Less than 2 input sequences provided...")
sys.exit(1)
+ if args.validate_input:
+ from Bio import SeqIO
+ for f in input_files + ([ref] if ref and ref != "!" else []):
+ try:
+ records = SeqIO.parse(f, "fasta")
+ except:
+ logger.error("{} is an invalid sequence file!".format(f))
+ for record in records:
+ if any(c not in "GATCRYWSMKHBVDN" + "GATCRYWSMKHBVDN".lower() for c in record.seq):
+ logger.error("Genome sequence {} has invalid characters {}! Skip!".format(f, set(str(record.seq)) - set("AGCTNagctn")))
+ continue
# Parse reference if necessary
if ref and ref != "!":
@@ -832,7 +851,7 @@ SETTINGS:
elif '-' in seq:
seq = seq.split('\n')
if any('-' in l and ('>' not in l) for l in seq):
- logger.warning("Genome sequence %s seems to be aligned! Skip!"%((input_file)))
+ logger.error("Genome sequence %s seems to be aligned! Skip!"%((input_file)))
continue
elif seqlen <= 20:
logger.error("File %s is less than or equal to 20bp in length. Skip!"%(input_file))
@@ -1325,7 +1344,7 @@ Please verify recruited genomes are all strain of interest""")
chrnum = 1
chr_spos = list(ref_seqs.keys())
for cs in ref_seqs:
- if block_spos < chr_spos:
+ if block_spos < len(chr_spos):
chrnum = ref_seqs[cs]
bedfile_dict[srpos] = "%d\t%s\t%s\tREC\t%.3f\t+\n"%(chrnum,srpos,pos+50+block_spos,eval)
@@ -1366,60 +1385,61 @@ Please verify recruited genomes are all strain of interest""")
if generate_vcf:
run_command("harvesttools -q -i %s/parsnp.ggr -V "%(outputDir)+outputDir+os.sep+"parsnp.vcf")
- logger.info("Reconstructing core genome phylogeny...")
- with open(os.path.join(outputDir, "parsnp.snps.mblocks")) as mblocks:
- for line in mblocks:
- if line[0] != ">" and len(line.rstrip()) < 6:
- logger.warning("Not enough SNPs to use RaxML. Attempting to use FastTree instead...")
- use_fasttree = True
- break
- if not use_fasttree:
- with tempfile.TemporaryDirectory() as raxml_output_dir:
- command = "raxmlHPC-PTHREADS -m GTRCAT -p 12345 -T %d -s %s -w %s -n OUTPUT"%(threads,outputDir+os.sep+"parsnp.snps.mblocks", raxml_output_dir)
- run_command(command)
- os.system("mv {}/RAxML_bestTree.OUTPUT {}".format(raxml_output_dir, outputDir+os.sep+"parsnp.tree"))
+ if not args.skip_phylogeny:
+ logger.info("Reconstructing core genome phylogeny...")
+ with open(os.path.join(outputDir, "parsnp.snps.mblocks")) as mblocks:
+ for line in mblocks:
+ if line[0] != ">" and len(line.rstrip()) < 6:
+ logger.warning("Not enough SNPs to use RaxML. Attempting to use FastTree instead...")
+ use_fasttree = True
+ break
+ if not use_fasttree:
+ with tempfile.TemporaryDirectory() as raxml_output_dir:
+ command = "raxmlHPC-PTHREADS -m GTRCAT -p 12345 -T %d -s %s -w %s -n OUTPUT"%(threads,outputDir+os.sep+"parsnp.snps.mblocks", raxml_output_dir)
+ run_command(command)
+ os.system("mv {}/RAxML_bestTree.OUTPUT {}".format(raxml_output_dir, outputDir+os.sep+"parsnp.tree"))
+
+ mblocks_file = os.path.join(outputDir, "parsnp.snps.mblocks")
+
+ if use_fasttree:
+ if shutil.which("FastTreeMP") is not None:
+ os.environ["OMP_NUM_THREADS"] = str(threads)
+ command = "FastTreeMP -nt -quote -gamma -slow -boot 100 "+outputDir+os.sep+"parsnp.snps.mblocks > "+outputDir+os.sep+"parsnp.tree"
+ run_command(command)
+ else:
+ logger.info("FastTreeMP failed. Trying fasttree...")
+ command = "fasttree -nt -quote -gamma -slow -boot 100 "+outputDir+os.sep+"parsnp.snps.mblocks > "+outputDir+os.sep+"parsnp.tree"
+ run_command(command)
+
- mblocks_file = os.path.join(outputDir, "parsnp.snps.mblocks")
- if use_fasttree:
- if shutil.which("FastTreeMP") is not None:
- os.environ["OMP_NUM_THREADS"] = str(threads)
- command = "FastTreeMP -nt -quote -gamma -slow -boot 100 "+outputDir+os.sep+"parsnp.snps.mblocks > "+outputDir+os.sep+"parsnp.tree"
- run_command(command)
- else:
- logger.info("FastTreeMP failed. Trying fasttree...")
- command = "fasttree -nt -quote -gamma -slow -boot 100 "+outputDir+os.sep+"parsnp.snps.mblocks > "+outputDir+os.sep+"parsnp.tree"
- run_command(command)
-
+ #7)reroot to midpoint
+ if os.path.exists("outtree"):
+ os.remove("outtree")
+ if reroot_tree and len(finalfiles) > 1:
+ try:
+ mtree = open("%sparsnp.tree"%(outputDir+os.sep), 'r')
+ mtreedata = mtree.read()
+ mtreedata = mtreedata.replace("\n","")
+ tree = dendropy.Tree.get_from_string(mtreedata,"newick")
+ tree.reroot_at_midpoint(update_bipartitions=False)
+ mftreef = tree.as_string('newick').split(" ",1)[1]
+ #print mftreef
+ mtreef = open(outputDir+os.sep+"parsnp.final.tree",'w')
+ mtreef.write(mftreef)
+ mtreef.close()
+ os.system("mv %s %s"%(outputDir+os.sep+"parsnp.final.tree",outputDir+os.sep+"parsnp.tree"))
+ except IOError:
+ logger.error("Cannot process {} output, skipping midpoint reroot..\n".format("fasttree" if args.use_fasttree else "RaxML"))
- #7)reroot to midpoint
- if os.path.exists("outtree"):
- os.remove("outtree")
- if reroot_tree and len(finalfiles) > 1:
- try:
- mtree = open("%sparsnp.tree"%(outputDir+os.sep), 'r')
- mtreedata = mtree.read()
- mtreedata = mtreedata.replace("\n","")
- tree = dendropy.Tree.get_from_string(mtreedata,"newick")
- tree.reroot_at_midpoint(update_bipartitions=False)
- mftreef = tree.as_string('newick').split(" ",1)[1]
- #print mftreef
- mtreef = open(outputDir+os.sep+"parsnp.final.tree",'w')
- mtreef.write(mftreef)
- mtreef.close()
- os.system("mv %s %s"%(outputDir+os.sep+"parsnp.final.tree",outputDir+os.sep+"parsnp.tree"))
- except IOError:
- logger.error("Cannot process {} output, skipping midpoint reroot..\n".format("fasttree" if args.use_fasttree else "RaxML"))
-
-
- if len(use_gingr) > 0:
- logger.info("Creating Gingr input file..")
- if xtrafast or 1:
- #if newick available, add
- #new flag to update branch lengths
- run_command("harvesttools --midpoint-reroot -u -q -i "+outputDir+os.sep+"parsnp.ggr -o "+outputDir+os.sep+"parsnp.ggr -n %s"%(outputDir+os.sep+"parsnp.tree "))
+ if len(use_gingr) > 0:
+ logger.info("Creating Gingr input file..")
+ if xtrafast or 1:
+ #if newick available, add
+ #new flag to update branch lengths
+ run_command("harvesttools --midpoint-reroot -u -q -i "+outputDir+os.sep+"parsnp.ggr -o "+outputDir+os.sep+"parsnp.ggr -n %s"%(outputDir+os.sep+"parsnp.tree "))
if float(elapsed)/float(60.0) > 60:
@@ -1441,7 +1461,7 @@ Please verify recruited genomes are all strain of interest""")
filepres = 0
logger.info("Parsnp finished! All output available in %s"%(outputDir))
logger.debug("Validating output directory contents")
- if os.path.exists("%sparsnp.tree"%(outputDir+os.sep)) and os.path.getsize("%sparsnp.tree"%(outputDir+os.sep)) > 0:
+ if args.skip_phylogeny or os.path.exists("%sparsnp.tree"%(outputDir+os.sep)) and os.path.getsize("%sparsnp.tree"%(outputDir+os.sep)) > 0:
filepres+=1
else:
logger.error("parsnp.tree:\t\tnewick format tree is missing!")
@@ -1453,8 +1473,8 @@ Please verify recruited genomes are all strain of interest""")
filepres+=1
else:
logger.error("parsnp.xmfa:\t\tXMFA formatted multi-alignment is missing")
- if filepres != 3:
- logger.critical("Output files missing, something went wrong. Check logs and relaunch or contact developers for assistance")
+ # if filepres != 3:
+ # logger.critical("Output files missing, something went wrong. Check logs and relaunch or contact developers for assistance")
if os.path.exists("%sblocks"%(outputDir+os.sep)):
os.rmdir("%sblocks"%(outputDir+os.sep))
=====================================
src/Makefile.am
=====================================
@@ -1,5 +1,5 @@
parsnp_core_CXXFLAGS = -fopenmp -O2 -m64 -funroll-all-loops -fomit-frame-pointer -ftree-vectorize
-parsnp_core_LDFLAGS = -fopenmp -lstdc++ -lpthread -std=gnu++0x -Wl,-rpath,$(libmuscle)/lib -L$(libmuscle)/lib -lMUSCLE-3.7
+parsnp_core_LDFLAGS = -fopenmp -lstdc++ -lpthread -std=gnu++0x -L${CONDA_PREFIX}/lib -lMUSCLE-3.7
bin_PROGRAMS = parsnp_core
parsnp_core_SOURCES = MuscleInterface.cpp MuscleInterface.h parsnp.cpp parsnp.hh LCB.cpp LCB.hh LCR.cpp LCR.hh TMum.cpp TMum.hh Converter.cpp Converter.hh ./ext/iniFile.cpp ./ext/iniFile.h
bindir = $(top_srcdir)/bin
View it on GitLab: https://salsa.debian.org/med-team/parsnp/-/commit/67776d1736ef2e1557d11042d0106e33d7387103
--
View it on GitLab: https://salsa.debian.org/med-team/parsnp/-/commit/67776d1736ef2e1557d11042d0106e33d7387103
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220216/5469213a/attachment-0001.htm>
More information about the debian-med-commit
mailing list