[med-svn] [Git][med-team/gubbins][upstream] New upstream version 3.4.3
Alexandre Detiste (@detiste-guest)
gitlab at salsa.debian.org
Sat Sep 27 14:05:04 BST 2025
Alexandre Detiste pushed to branch upstream at Debian Med / gubbins
Commits:
88ec10f1 by Alexandre Detiste at 2025-09-27T14:26:25+02:00
New upstream version 3.4.3
- - - - -
16 changed files:
- VERSION
- environment.yml
- python/gubbins/PreProcessFasta.py
- python/gubbins/common.py
- python/gubbins/pyjar.py
- python/gubbins/run_gubbins.py
- python/gubbins/tests/test_alignment_python_methods.py
- python/gubbins/tests/test_dependencies.py
- python/gubbins/tests/test_tree_methods.py
- python/gubbins/tests/test_utils.py
- python/gubbins/treebuilders.py
- python/setup.py
- src/gubbins.c
- src/gubbins.h
- src/main.c
- tests/check_gubbins.c
Changes:
=====================================
VERSION
=====================================
@@ -1 +1 @@
-3.4
+3.4.2
=====================================
environment.yml
=====================================
@@ -2,7 +2,6 @@ name: gubbins_env
channels:
- conda-forge
- bioconda
- - defaults
- r
dependencies:
# python
@@ -30,10 +29,11 @@ dependencies:
- numba
# phylogenetics
- raxml=8.2.12
- - iqtree>=2.2
+ - iqtree>=2.2,<3
- rapidnj
- raxml-ng=1.0.1
- fasttree=2.1.10
+ - veryfasttree
# Scripts
- ska2>=0.3.0
# R
=====================================
python/gubbins/PreProcessFasta.py
=====================================
@@ -99,7 +99,7 @@ class PreProcessFasta(object):
if number_of_included_alignments <= 1:
sys.exit("Not enough sequences are left after removing duplicates.Please check you input data.")
- with open(output_filename, "w+") as output_handle:
+ with open(output_filename, "w") as output_handle:
AlignIO.write(MultipleSeqAlignment(output_alignments), output_handle, "fasta")
return taxa_to_remove
@@ -112,3 +112,22 @@ class PreProcessFasta(object):
for record in alignment:
sequence_names.append(record.id)
return sequence_names
+
+ def get_alignment_length(self, format = "fasta"):
+ alignment = AlignIO.read(self.input_filename, format)
+ return alignment.get_alignment_length()
+
+ def get_alignment_information(self, format = "fasta"):
+ sequence_names = []
+ base_frequencies = [0,0,0,0]
+ alignment = AlignIO.read(self.input_filename, format)
+ alignment_length = alignment.get_alignment_length()
+
+ for record in alignment:
+ sequence_names.append(record.id)
+ for index,base in enumerate(['A','C','G','T']):
+ base_frequencies[index] += (record.seq.count(base) + record.seq.count(base.lower()))
+
+ base_frequencies = [b/(alignment_length*len(sequence_names)) for b in base_frequencies]
+
+ return alignment_length, sequence_names, base_frequencies
=====================================
python/gubbins/common.py
=====================================
@@ -40,21 +40,32 @@ from Bio.Seq import Seq
# Gubbins imports
from gubbins.PreProcessFasta import PreProcessFasta
from gubbins.ValidateFastaAlignment import ValidateFastaAlignment
-from gubbins.treebuilders import FastTree, IQTree, RAxML, RAxMLNG, RapidNJ, Star
+from gubbins.treebuilders import FastTree, VeryFastTree, IQTree, RAxML, RAxMLNG, RapidNJ, Star
from gubbins.pyjar import jar, get_base_patterns
from gubbins import utils
from gubbins.__init__ import version
from gubbins.pyjar import jar, get_base_patterns, Pyjar
-from gubbins.treebuilders import FastTree, IQTree, RAxML, RAxMLNG, RapidNJ, Star
+from gubbins.treebuilders import FastTree, VeryFastTree, IQTree, RAxML, RAxMLNG, RapidNJ, Star
# Phylogenetic models valid for each algorithm
tree_models = {
'star': ['JC','GTRCAT','GTRGAMMA'],
- 'raxml': ['JC','K2P','HKY','GTRCAT','GTRGAMMA'],
+ 'raxml': ['JC','K2P','HKY','GTRGAMMA'],
'raxmlng': ['JC','K2P','HKY','GTR','GTRGAMMA'],
'iqtree': ['JC','K2P','HKY','GTR','GTRGAMMA'],
'fasttree': ['JC','GTRCAT','GTRGAMMA'],
- 'rapidnj': ['JC','K2P']
+ 'rapidnj': ['JC','K2P'],
+ 'veryfasttree': ['JC','GTRCAT','GTRGAMMA']
+}
+
+suffix_dict = {
+ 'star': '.snp_sites.aln',
+ 'raxml': '.snp_sites.aln',
+ 'raxmlng': '.phylip',
+ 'iqtree': '.phylip',
+ 'fasttree': '.snp_sites.aln',
+ 'rapidnj': '.snp_sites.aln',
+ 'veryfasttree': '.snp_sites.aln'
}
def parse_and_run(input_args, program_description=""):
@@ -95,21 +106,12 @@ def parse_and_run(input_args, program_description=""):
# Select the algorithms used for the first iteration
current_tree_builder, current_model_fitter, current_model, current_recon_model, extra_tree_arguments, extra_model_arguments, custom_model, custom_recon_model = return_algorithm_choices(input_args,1)
check_model_validity(current_model,current_tree_builder,input_args.mar,current_recon_model,current_model_fitter,custom_model, custom_recon_model)
- # Initialise tree builder
- tree_builder = return_algorithm(current_tree_builder, current_model, input_args, node_labels = internal_node_label_prefix, extra = extra_tree_arguments)
- alignment_suffix = tree_builder.alignment_suffix
- methods_log = update_methods_log(methods_log, method = tree_builder, step = 'Tree constructor (1st iteration)')
- # Initialise model fitter
- model_fitter = return_algorithm(current_model_fitter, current_recon_model, input_args, node_labels = internal_node_label_prefix, extra = extra_model_arguments)
- methods_log = update_methods_log(methods_log, method = model_fitter, step = 'Model fitter (1st iteration)')
- # Initialise sequence reconstruction if MAR
- if input_args.mar:
- sequence_reconstructor = return_algorithm(input_args.seq_recon, current_recon_model, input_args, node_labels = internal_node_label_prefix, extra = input_args.seq_recon_args)
- methods_log = update_methods_log(methods_log, method = sequence_reconstructor, step = 'Sequence reconstructor (1st iteration)')
# Initialise IQtree
tree_dater = IQTree(threads = input_args.threads,
model = current_model,
+ invariant_proportion = 0,
+ constant_base_counts = [0.0,0.0,0.0,0.0],
verbose = input_args.verbose
)
@@ -126,6 +128,12 @@ def parse_and_run(input_args, program_description=""):
gaps_alignment_filename = base_filename + ".gaps.snp_sites.aln"
gaps_vcf_filename = base_filename + ".gaps.vcf"
joint_sequences_filename = base_filename + ".seq.joint.aln"
+ sequence_names_fn = base_filename + ".gaps.sequence_names.csv"
+ base_patterns_fn = base_filename + ".gaps.base_patterns.csv"
+
+ # Filter the input alignment and save as temporary alignment file
+ # Create temporary directory for storing working copies of input files
+ temp_working_dir = tempfile.mkdtemp(dir=os.getcwd())
# If restarting from a previous run
starting_iteration = 1
@@ -143,55 +151,57 @@ def parse_and_run(input_args, program_description=""):
sys.stderr.write('Resuming Gubbins analysis at iteration ' + str(starting_iteration) + '\n')
input_args.starting_tree = input_args.resume
current_tree_name = input_args.starting_tree
+ os.symlink(os.path.join(current_directory,snp_alignment_filename),os.path.join(temp_working_dir,snp_alignment_filename))
# Check if intermediate files from a previous run exist
intermediate_files = [basename + ".iteration_"]
if not input_args.no_cleanup and input_args.resume is None:
utils.delete_files(".", intermediate_files, "", input_args.verbose)
if utils.do_files_exist(".", intermediate_files, "", input_args.verbose) and input_args.resume is None:
- sys.exit("Intermediate files from a previous run exist. Please rerun without the --no_cleanup option "
- "to automatically delete them or with the --use_time_stamp to add a unique prefix.")
-
- # Filter the input alignment and save as temporary alignment file
- # Create temporary directory for storing working copies of input files
- temp_working_dir = tempfile.mkdtemp(dir=os.getcwd())
+ sys.exit("Intermediate files from a previous run exist. Please rerun without the --no-cleanup option "
+ "to automatically delete them or with the --use-time-stamp to add a unique prefix.")
# Check if the input files exist and have the right format
printer.print("\nChecking input alignment file...")
if not os.path.exists(input_args.alignment_filename):
sys.exit("The input alignment file " + input_args.alignment_filename + " does not exist")
- temp_alignment_filename = temp_working_dir + "/" + base_filename
- shutil.copyfile(input_args.alignment_filename, temp_alignment_filename)
- input_args.alignment_filename = temp_alignment_filename
- if not ValidateFastaAlignment(temp_alignment_filename).is_input_fasta_file_valid():
+ if not ValidateFastaAlignment(input_args.alignment_filename).is_input_fasta_file_valid():
sys.exit("The input alignment file " + input_args.alignment_filename + " is invalid")
+
# Filter the input alignment and save as temporary alignment file
printer.print("\nFiltering input alignment...")
- pre_process_fasta = PreProcessFasta(temp_alignment_filename,
+ temp_alignment_filename = os.path.join(temp_working_dir,base_filename)
+ pre_process_fasta = PreProcessFasta(input_args.alignment_filename,
input_args.verbose,
input_args.filter_percentage)
taxa_removed = pre_process_fasta.remove_duplicate_sequences_and_sequences_missing_too_much_data(
temp_alignment_filename, input_args.remove_identical_sequences)
+ # Get filtered alignment information
+ filtered_alignment = PreProcessFasta(temp_alignment_filename,
+ input_args.verbose,
+ 0.0)
+ overall_alignment_length, sequence_names_in_alignment, base_frequencies = filtered_alignment.get_alignment_information()
+ number_of_sequences_in_alignment = len(sequence_names_in_alignment)
+ current_alignment_length = overall_alignment_length
+ all_snp_alignment_length = overall_alignment_length
+
# Check on number of sequences remaining in alignment after validation and processing
if input_args.pairwise:
- if number_of_sequences_in_alignment(input_args.alignment_filename) != 2:
+ if number_of_sequences_in_alignment != 2:
sys.exit("Pairwise mode should only be used for two sequences.")
- else:
- if number_of_sequences_in_alignment(input_args.alignment_filename) < 3:
+ elif number_of_sequences_in_alignment < 3:
sys.exit("Three or more sequences are required for a meaningful phylogenetic analysis.")
# If outgroup is specified, check it is still in the alignment
if input_args.outgroup is not None:
- if input_args.outgroup in taxa_removed:
- sys.stderr.write('Outgroup removed due to proportion of missing bases\n')
+ if input_args.outgroup not in sequence_names_in_alignment:
+ sys.stderr.write('Outgroup not in filtered alignment - may have been removed due to proportion of missing bases\n')
sys.exit(1)
# Initialise tree dating algorithm if dates supplied
if input_args.date is not None:
if os.path.isfile(input_args.date):
- # Get sequence names from alignment
- sequence_names_in_alignment = pre_process_fasta.get_sequence_names()
# Edit taxon names as in tree
new_date_file = os.path.join(temp_working_dir,basename + '.dates')
with open(input_args.date,'r') as in_dates, open(new_date_file,'w') as out_dates:
@@ -230,7 +240,9 @@ def parse_and_run(input_args, program_description=""):
printer.print(["\nRunning Gubbins to detect SNPs...", gubbins_command])
try:
subprocess.check_call(gubbins_command, shell=True)
- except subprocess.SubprocessError:
+ except subprocess.SubprocessError as e:
+ if e.output is not None:
+ print("Gubbins error: " + e.output)
sys.exit("Gubbins crashed, please ensure you have enough free memory")
printer.print("...done. Run time: {:.2f} s".format(time.time() - start_time))
reconvert_fasta_file(snp_alignment_filename, snp_alignment_filename)
@@ -241,12 +253,58 @@ def parse_and_run(input_args, program_description=""):
printer.print("\n*** Iteration " + str(i) + " ***")
# Define file names
+ alignment_suffix = get_alignment_suffix(current_tree_builder)
if i == 1:
previous_tree_name = input_args.starting_tree
alignment_filename = base_filename + alignment_suffix
+ pre_process_snp_fasta = PreProcessFasta(snp_alignment_filename,
+ input_args.verbose,
+ 0.0)
+ all_snp_alignment_length = pre_process_snp_fasta.get_alignment_length()
+ current_alignment_length = all_snp_alignment_length
+ os.symlink(os.path.join(current_directory,snp_alignment_filename),os.path.join(temp_working_dir,snp_alignment_filename))
else:
previous_tree_name = current_tree_name
alignment_filename = previous_tree_name + alignment_suffix
+ pre_process_snp_fasta = PreProcessFasta(alignment_filename,
+ input_args.verbose,
+ 0.0)
+ if alignment_suffix.endswith('phylip'):
+ current_alignment_length = pre_process_snp_fasta.get_alignment_length('phylip-relaxed')
+ else:
+ current_alignment_length = pre_process_snp_fasta.get_alignment_length()
+
+ # Initialise model fitter
+ model_fitter = return_algorithm(current_model_fitter,
+ current_recon_model,
+ input_args,
+ all_snp_alignment_length,
+ overall_alignment_length,
+ base_frequencies,
+ node_labels = internal_node_label_prefix,
+ extra = extra_model_arguments)
+ methods_log = update_methods_log(methods_log, method = model_fitter, step = 'Model fitter (1st iteration)')
+ # Initialise sequence reconstruction if MAR
+ if input_args.mar:
+ mar_base_frequencies = base_frequencies
+ if input_args.seq_recon == "iqtree":
+ mar_base_frequencies = [0.0,0.0,0.0,0.0]
+ sequence_reconstructor = return_algorithm(input_args.seq_recon,
+ current_recon_model,
+ input_args,
+ all_snp_alignment_length,
+ overall_alignment_length,
+ mar_base_frequencies,
+ node_labels = internal_node_label_prefix,
+ extra = input_args.seq_recon_args)
+ methods_log = update_methods_log(methods_log, method = sequence_reconstructor, step = 'Sequence reconstructor (1st iteration)')
+ # Copy in phylip file used for RAxML MAR
+ if sequence_reconstructor.alignment_suffix.endswith('.phylip'):
+ recon_alignment_file = os.path.join(base_directory,temp_working_dir,os.path.basename(temp_alignment_filename) + '.phylip')
+ if not os.path.isfile(recon_alignment_file):
+ os.symlink(os.path.abspath(base_filename) + '.phylip',
+ recon_alignment_file)
+
# 1.1. Construct the tree-building command depending on the iteration and employed options
if i == 2 or input_args.resume is not None:
@@ -258,46 +316,84 @@ def parse_and_run(input_args, program_description=""):
current_model = select_best_models(alignment_filename,
basename,
current_tree_builder,
+ all_snp_alignment_length,
+ overall_alignment_length,
+ base_frequencies,
input_args)
input_args.model = current_model
if current_tree_builder != 'iqtree':
check_model_validity(current_model,current_tree_builder,input_args.mar,current_recon_model,current_model_fitter,custom_model, custom_recon_model)
printer.print("Phylogeny will be constructed with a " + current_model + " model")
# Initialise tree builder
- tree_builder = return_algorithm(current_tree_builder, current_model, input_args, node_labels = internal_node_label_prefix, extra = extra_tree_arguments)
- alignment_suffix = tree_builder.alignment_suffix
+ tree_builder = return_algorithm(current_tree_builder,
+ current_model,
+ input_args,
+ current_alignment_length,
+ overall_alignment_length,
+ base_frequencies,
+ node_labels = internal_node_label_prefix,
+ extra = extra_tree_arguments)
methods_log = update_methods_log(methods_log, method = tree_builder, step = 'Tree constructor (later iterations)')
# Update date model (should not make a difference)
if input_args.date is not None:
tree_dater.model = current_model
+ else:
+ # Initialise tree builder
+ tree_builder = return_algorithm(current_tree_builder,
+ current_model,
+ input_args,
+ current_alignment_length,
+ overall_alignment_length,
+ base_frequencies,
+ node_labels = internal_node_label_prefix,
+ extra = extra_tree_arguments)
+ alignment_suffix = tree_builder.alignment_suffix
+ methods_log = update_methods_log(methods_log, method = tree_builder, step = 'Tree constructor (1st iteration)')
current_basename = basename + ".iteration_" + str(i)
current_tree_name = current_basename + ".tre"
- if previous_tree_name and input_args.first_tree_builder != "star":
- tree_building_command = tree_builder.tree_building_command(
- os.path.abspath(alignment_filename), os.path.abspath(previous_tree_name), current_basename)
- else:
- tree_building_command = tree_builder.tree_building_command(
- os.path.abspath(alignment_filename), "", current_basename)
built_tree = temp_working_dir + "/" + tree_builder.tree_prefix + current_basename + tree_builder.tree_suffix
+ if i == starting_iteration:
+ for fn in [gaps_alignment_filename, gaps_vcf_filename, joint_sequences_filename, alignment_filename, base_filename + ".start"]:
+ target_file = os.path.join(base_directory,temp_working_dir,os.path.basename(fn))
+ if not os.path.isfile(target_file):
+ os.symlink(os.path.abspath(fn), target_file)
+ else:
+ for fn in [previous_tree_name, alignment_filename]:
+ target_path = os.path.join(base_directory,temp_working_dir,os.path.basename(fn))
+ if os.path.isfile(target_path):
+ os.remove(target_path)
+ os.symlink(os.path.abspath(fn),
+ target_path)
# 1.2. Construct the phylogenetic tree
if input_args.starting_tree is not None and i == 1:
printer.print("\nCopying the starting tree...")
shutil.copyfile(input_args.starting_tree, current_tree_name)
else:
-
- printer.print(["\nConstructing the phylogenetic tree with " + tree_builder.executable + "...",
- tree_building_command])
if current_tree_builder == "star":
+ tree_building_command = tree_builder.tree_building_command(
+ os.path.abspath(alignment_filename), "", current_basename)
# Move star tree into temp dir
shutil.move(tree_builder.tree_prefix + current_basename + tree_builder.tree_suffix,
built_tree)
else:
try:
+ # Prepare tmp directory for tree building and sequence reconstruction
os.chdir(temp_working_dir)
+ printer.print("Entering working directory " + temp_working_dir)
+ if previous_tree_name:
+ tree_building_command = tree_builder.tree_building_command(
+ os.path.abspath(alignment_filename), os.path.abspath(previous_tree_name), current_basename)
+ else:
+ tree_building_command = tree_builder.tree_building_command(
+ os.path.abspath(alignment_filename), "", current_basename)
+ printer.print(["\nConstructing the phylogenetic tree with " + tree_builder.executable + "...",
+ tree_building_command])
subprocess.check_call(tree_building_command, shell=True)
- except subprocess.SubprocessError:
+ except subprocess.SubprocessError as e:
+ if e.output is not None:
+ sys.stderr.write('Error: ' + e.output + '\n')
sys.exit("Failed while building the tree.")
os.chdir(current_directory)
shutil.copyfile(built_tree, current_tree_name)
@@ -313,7 +409,7 @@ def parse_and_run(input_args, program_description=""):
# 3.1. Construct the command for ancestral state reconstruction depending on the iteration and employed options
ancestral_sequence_basename = current_basename + ".internal"
- current_tree_name_with_internal_nodes = current_tree_name + ".internal"
+ current_tree_name_with_internal_nodes = os.path.join(temp_working_dir, current_tree_name + ".internal")
if not input_args.mar:
@@ -329,14 +425,18 @@ def parse_and_run(input_args, program_description=""):
get_base_patterns(base_filename,
input_args.verbose,
threads = input_args.threads)
+ for fn in [sequence_names_fn, base_patterns_fn]:
+ os.symlink(os.path.join(base_directory,fn),
+ os.path.join(base_directory,temp_working_dir,os.path.basename(fn)))
# 3.3b. Record in methods log (just once)
pyjar_method = Pyjar(current_model)
methods_log = update_methods_log(methods_log, method = pyjar_method, step = 'Sequence reconstructor')
# 3.4a. Re-fit full polymorphism alignment to new tree
+ os.chdir(temp_working_dir)
model_fitting_command = model_fitter.model_fitting_command(snp_alignment_filename,
- os.path.abspath(temp_rooted_tree),
- temp_working_dir + '/' + current_basename)
+ os.path.abspath(temp_rooted_tree),
+ temp_working_dir + '/' + current_basename)
printer.print(["\nFitting substitution model to tree...", model_fitting_command])
try:
subprocess.check_call(model_fitting_command, shell = True)
@@ -364,10 +464,10 @@ def parse_and_run(input_args, program_description=""):
# If this fails, continue to generate rest of output
sys.stderr.write("Unable to use time calibrated tree for sequence reconstruction in "
" iteration " + str(i))
- else:
- # Set root of reconstruction tree to match that of the current tree
- # Cannot just midpoint root both, because the branch lengths differ between them
- harmonise_roots(recontree_filename, temp_rooted_tree, algorithm = model_fitter.name)
+
+ # Set root of reconstruction tree to match that of the current tree
+ # Cannot just midpoint root both, because the branch lengths differ between them
+ harmonise_roots(recontree_filename, temp_rooted_tree, algorithm = model_fitter.name)
printer.print(["\nRunning joint ancestral reconstruction with pyjar"])
jar(sequence_names = ordered_sequence_names, # complete polymorphism alignment
@@ -389,33 +489,41 @@ def parse_and_run(input_args, program_description=""):
temp_rooted_tree,
current_tree_name_with_internal_nodes,
"pyjar")
+ os.chdir(current_directory)
printer.print(["\nDone transfer"])
else:
# 3.2b. Marginal ancestral reconstruction with RAxML, RAxML-NG or IQTree
- sequence_reconstruction_command = sequence_reconstructor.internal_sequence_reconstruction_command(
- os.path.abspath(base_filename + alignment_suffix), os.path.abspath(temp_rooted_tree),
- ancestral_sequence_basename)
- raw_internal_sequence_filename \
- = temp_working_dir + "/" + sequence_reconstructor.asr_prefix \
- + ancestral_sequence_basename + sequence_reconstructor.asr_suffix
- processed_internal_sequence_filename = temp_working_dir + "/" + ancestral_sequence_basename + ".aln"
- raw_internal_rooted_tree_filename \
- = temp_working_dir + "/" + sequence_reconstructor.asr_tree_prefix \
- + ancestral_sequence_basename + sequence_reconstructor.asr_tree_suffix
+ os.chdir(temp_working_dir)
+ sequence_reconstruction_command = \
+ sequence_reconstructor.internal_sequence_reconstruction_command(
+ os.path.abspath(base_filename + alignment_suffix),
+ os.path.abspath(temp_rooted_tree),
+ ancestral_sequence_basename)
+ raw_internal_sequence_filename = os.path.join(base_directory,
+ temp_working_dir,
+ os.path.basename(sequence_reconstructor.asr_prefix + ancestral_sequence_basename + sequence_reconstructor.asr_suffix))
+ processed_internal_sequence_filename = os.path.join(base_directory,
+ temp_working_dir,
+ os.path.basename(ancestral_sequence_basename + ".aln"))
+
+ raw_internal_rooted_tree_filename = os.path.join(base_directory,
+ temp_working_dir,
+ os.path.basename(sequence_reconstructor.asr_tree_prefix + ancestral_sequence_basename + sequence_reconstructor.asr_tree_suffix))
# 3.3b. Reconstruct the ancestral sequence
printer.print(["\nReconstructing ancestral sequences with " + sequence_reconstructor.executable + "...",
sequence_reconstruction_command])
- os.chdir(temp_working_dir)
try:
subprocess.check_call(sequence_reconstruction_command, shell=True)
- except subprocess.SubprocessError:
- sys.exit("Failed while reconstructing the ancestral sequences.")
+ except subprocess.SubprocessError as e:
+ if e.output is None:
+ e.output = 'Unspecified error'
+ sys.exit("Failed while reconstructing the ancestral sequences: " + e.output)
os.chdir(current_directory)
# 3.4b. Join ancestral sequences with given sequences
- current_tree_name_with_internal_nodes = current_tree_name + ".internal"
+ current_tree_name_with_internal_nodes = os.path.join(temp_working_dir,current_tree_name + ".internal")
sequence_reconstructor.convert_raw_ancestral_states_to_fasta(raw_internal_sequence_filename,
processed_internal_sequence_filename)
concatenate_fasta_files([snp_alignment_filename, processed_internal_sequence_filename],
@@ -453,11 +561,13 @@ def parse_and_run(input_args, program_description=""):
gubbins_command = create_gubbins_command(
gubbins_exec, gaps_alignment_filename, gaps_vcf_filename, current_tree_name,
input_args.alignment_filename, input_args.min_snps, input_args.min_window_size, input_args.max_window_size,
- input_args.p_value, input_args.trimming_ratio, input_args.extensive_search, input_args.threads)
+ input_args.p_value, input_args.trimming_ratio, input_args.extensive_search, tree_builder.invariant_site_correction, input_args.threads)
printer.print(["\nRunning Gubbins to detect recombinations...", gubbins_command])
try:
subprocess.check_call(gubbins_command, shell=True)
- except subprocess.SubprocessError:
+ except subprocess.SubprocessError as e:
+ if e.output is None:
+ print("Gubbins failed: " + e.output)
sys.exit("Failed while running Gubbins. Please ensure you have enough free memory")
printer.print("...done. Run time: {:.2f} s".format(time.time() - start_time))
shutil.copyfile(current_tree_name, current_tree_name_with_internal_nodes)
@@ -492,8 +602,8 @@ def parse_and_run(input_args, program_description=""):
bootstrap_command = tree_builder.bootstrapping_command(os.path.abspath(final_aln), os.path.abspath(current_tree_name), temp_working_dir + "/" + current_basename)
try:
subprocess.check_call(bootstrap_command, shell=True)
- except subprocess.SubprocessError:
- sys.exit("Failed while running bootstrap analysis.")
+ except subprocess.SubprocessError as e:
+ sys.exit("Failed while running bootstrap analysis: " + e.output)
transfer_bootstraps_to_tree(temp_working_dir + "/" + current_basename + ".tre.bootstrapped",
os.path.abspath(current_tree_name),
current_basename + ".tre.bootstrapped",
@@ -505,7 +615,13 @@ def parse_and_run(input_args, program_description=""):
if current_tree_builder == "raxmlng":
bootstrap_utility = tree_builder
else:
- bootstrap_utility = return_algorithm("raxmlng", current_model, input_args, node_labels = "")
+ bootstrap_utility = return_algorithm("raxmlng",
+ current_model,
+ input_args,
+ current_alignment_length,
+ overall_alignment_length,
+ base_frequencies,
+ node_labels = "")
# Generate alignments for bootstrapping if FastTree being used
if current_tree_builder == "fasttree":
bootstrap_aln = generate_bootstrap_alignments(bootstrap_aln,
@@ -515,8 +631,10 @@ def parse_and_run(input_args, program_description=""):
bootstrap_command = tree_builder.bootstrapping_command(os.path.abspath(bootstrap_aln), os.path.abspath(current_tree_name), current_basename, os.path.abspath(temp_working_dir))
try:
subprocess.check_call(bootstrap_command, shell=True)
- except subprocess.SubprocessError:
- sys.exit("Failed while running bootstrap analysis.")
+ except subprocess.SubprocessError as e:
+ if e.output is None:
+ e.output = 'Unspecified error'
+ sys.exit("Failed while running bootstrap analysis: " + e.output)
# Annotate the final tree using the bootstraps
bootstrapped_trees_file = tree_builder.get_bootstrapped_trees_file(temp_working_dir,current_basename)
annotation_command = bootstrap_utility.annotate_tree_using_bootstraps_command(os.path.abspath(final_aln),
@@ -527,8 +645,10 @@ def parse_and_run(input_args, program_description=""):
transfer = input_args.transfer_bootstrap)
try:
subprocess.check_call(annotation_command, shell=True)
- except subprocess.SubprocessError:
- sys.exit("Failed while annotating final tree with bootstrapping results.")
+ except subprocess.SubprocessError as e:
+ if e.output is None:
+ e.output = 'Unspecified error'
+ sys.exit("Failed while annotating final tree with bootstrapping results: " + e.output)
printer.print("...done. Run time: {:.2f} s".format(time.time() - start_time))
# 7. Run node branch support analysis if requested
@@ -539,8 +659,10 @@ def parse_and_run(input_args, program_description=""):
os.path.abspath(temp_working_dir))
try:
subprocess.check_call(sh_test_command, shell=True)
- except subprocess.SubprocessError:
- sys.exit("Failed while running SH test.")
+ except subprocess.SubprocessError as e:
+ if e.output is None:
+ e.output = 'Unspecified error'
+ sys.exit("Failed while running SH test: " + e.output)
reformat_sh_support(current_tree_name,
os.path.abspath(temp_working_dir),
current_tree_name,
@@ -557,15 +679,18 @@ def parse_and_run(input_args, program_description=""):
outgroup = input_args.outgroup)
try:
subprocess.check_call(dating_command, shell=True)
- except subprocess.SubprocessError:
+ except subprocess.SubprocessError as e:
# If this fails, continue to generate rest of output
- sys.stderr.write("Failed running tree time calibration with LSD.")
+ if e.output is None:
+ e.output = 'Unspecified error'
+ sys.stderr.write("Failed running tree time calibration with LSD: " + e.output)
input_args.date = None
# Create the final output
printer.print("\nCreating the final output...")
if input_args.prefix is None:
input_args.prefix = basename
+ shutil.copyfile(os.path.join(temp_working_dir, current_tree_name + ".internal"), current_tree_name + ".internal")
output_filenames_to_final_filenames = translation_of_filenames_to_final_filenames(
current_tree_name, input_args.prefix)
utils.rename_files(output_filenames_to_final_filenames)
@@ -743,18 +868,63 @@ def return_algorithm_choices(args,i):
# Return choices
return current_tree_builder, current_model_fitter, current_model, current_recon_model, extra_tree_arguments, extra_recon_arguments, custom_model, current_recon_model
-def return_algorithm(algorithm_choice, model, input_args, node_labels = None, extra = None):
+def return_algorithm(algorithm_choice, model, input_args, snp_aln_length, complete_aln_length, base_frequencies, node_labels = None, extra = None):
initialised_algorithm = None
+ constant_sites = (complete_aln_length - snp_aln_length)
+ constant_base_counts = [int(b*constant_sites) for b in base_frequencies]
if algorithm_choice == "fasttree":
- initialised_algorithm = FastTree(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, verbose = input_args.verbose, additional_args = extra)
+ initialised_algorithm = FastTree(threads = input_args.threads,
+ model = model,
+ seed = input_args.seed,
+ bootstrap = input_args.bootstrap,
+ verbose = input_args.verbose,
+ additional_args = extra)
+ elif algorithm_choice == "veryfasttree":
+ initialised_algorithm = VeryFastTree(threads = input_args.threads,
+ model = model,
+ seed = input_args.seed,
+ bootstrap = input_args.bootstrap,
+ verbose = input_args.verbose,
+ additional_args = extra)
elif algorithm_choice == "raxml":
- initialised_algorithm = RAxML(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, additional_args = extra)
+ initialised_algorithm = RAxML(threads = input_args.threads,
+ model = model,
+ constant_base_counts = constant_base_counts,
+ partition_length = snp_aln_length,
+ seed = input_args.seed,
+ bootstrap = input_args.bootstrap,
+ internal_node_prefix = node_labels,
+ verbose = input_args.verbose,
+ invariant_site_correction = input_args.invariant_site_correction,
+ additional_args = extra)
elif algorithm_choice == "raxmlng":
- initialised_algorithm = RAxMLNG(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, additional_args = extra)
+ initialised_algorithm = RAxMLNG(threads = input_args.threads,
+ model = model,
+ seed = input_args.seed,
+ constant_base_counts = constant_base_counts,
+ bootstrap = input_args.bootstrap,
+ internal_node_prefix = node_labels,
+ verbose = input_args.verbose,
+ invariant_site_correction = input_args.invariant_site_correction,
+ additional_args = extra)
elif algorithm_choice == "iqtree":
- initialised_algorithm = IQTree(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, use_best = (model is None and input_args.best_model), additional_args = extra)
+ initialised_algorithm = IQTree(threads = input_args.threads,
+ model = model,
+ seed = input_args.seed,
+ invariant_proportion = constant_sites/complete_aln_length,
+ constant_base_counts = constant_base_counts,
+ bootstrap = input_args.bootstrap,
+ internal_node_prefix = node_labels,
+ verbose = input_args.verbose,
+ use_best = (model is None and input_args.best_model),
+ invariant_site_correction = input_args.invariant_site_correction,
+ additional_args = extra)
elif algorithm_choice == "rapidnj":
- initialised_algorithm = RapidNJ(threads = input_args.threads, model = model, bootstrap = input_args.bootstrap, verbose = input_args.verbose, additional_args = extra)
+ initialised_algorithm = RapidNJ(threads = input_args.threads,
+ model = model,
+ bootstrap = input_args.bootstrap,
+ verbose = input_args.verbose,
+ additional_args = extra)
elif algorithm_choice == "star":
initialised_algorithm = Star()
else:
@@ -762,9 +932,13 @@ def return_algorithm(algorithm_choice, model, input_args, node_labels = None, ex
sys.exit()
return initialised_algorithm
-def select_best_models(snp_alignment_filename,basename,current_tree_builder,input_args):
+def select_best_models(snp_alignment_filename,basename,current_tree_builder,all_snp_alignment_length,complete_aln_length,base_frequencies,input_args):
+ constant_sites = (complete_aln_length - all_snp_alignment_length)
+ constant_base_counts = [int(b*constant_sites) for b in base_frequencies]
model_tester = IQTree(threads = input_args.threads,
model = 'GTR',
+ invariant_proportion = constant_sites/complete_aln_length,
+ constant_base_counts = constant_base_counts,
verbose = input_args.verbose
)
model_test_command = model_tester.run_model_comparison(snp_alignment_filename,basename)
@@ -798,27 +972,17 @@ def select_best_models(snp_alignment_filename,basename,current_tree_builder,inpu
def create_gubbins_command(gubbins_exec, alignment_filename, vcf_filename, current_tree_name,
original_alignment_filename, min_snps, min_window_size, max_window_size,
- p_value, trimming_ratio, extensive_search, num_threads):
+ p_value, trimming_ratio, extensive_search, invariant_site_correction, num_threads):
command = [gubbins_exec, "-r", "-v", vcf_filename, "-a", str(min_window_size),
"-b", str(max_window_size), "-f", original_alignment_filename, "-t", current_tree_name,
"-m", str(min_snps), "-p", str(p_value), "-i", str(trimming_ratio), "-n", str(num_threads)]
if extensive_search:
- command.append("-x")
+ command.append("-x")
+ if not invariant_site_correction:
+ command.append("-s")
command.append(alignment_filename)
return " ".join(command)
-def number_of_sequences_in_alignment(filename):
- return len(get_sequence_names_from_alignment(filename))
-
-
-def get_sequence_names_from_alignment(filename):
- sequence_names = []
- with open(filename, "r") as handle:
- for record in SeqIO.parse(handle, "fasta"):
- sequence_names.append(record.id)
- return sequence_names
-
-
def is_starting_tree_valid(starting_tree):
try:
Phylo.read(starting_tree, 'newick')
@@ -919,7 +1083,6 @@ def reroot_tree_with_outgroup(tree_name, outgroups):
clade_outgroups = get_monophyletic_outgroup(tree_name, outgroups)
tree = dendropy.Tree.get_from_path(tree_name, 'newick', preserve_underscores=True)
outgroup_mrca = tree.mrca(taxon_labels=clade_outgroups)
- print('Edge length is: ' + str(outgroup_mrca.edge.length))
tree.reroot_at_edge(outgroup_mrca.edge,
length1 = outgroup_mrca.edge.length/2,
length2 = outgroup_mrca.edge.length/2,
@@ -1096,6 +1259,7 @@ def transfer_internal_node_labels_to_tree(source_tree_filename, destination_tree
destination_tree,
is_bipartitions_updated=False)
if len(missing_bipartitions) > 0:
+ sys.stderr.write('Problem comparing the tree files ' + source_tree_filename + ' and ' + destination_tree_filename + '\n')
sys.stderr.write('Bipartitions missing when transferring node labels: ' + str([str(x) for x in missing_bipartitions]))
sys.exit(1)
@@ -1395,3 +1559,12 @@ def translation_of_dating_filenames_to_final_filenames(temp_working_dir,
os.path.join(temp_working_dir,basename + '.timetree.nwk'): prefix + '.final_tree.timetree.tre'
}
return dating_files
+
+def get_alignment_suffix(treebuilder_name):
+ suffix = None
+ if (treebuilder_name in suffix_dict):
+ suffix = suffix_dict[treebuilder_name]
+ else:
+ sys.stderr.write('Unknown treebuilding method\n')
+ sys.exit(1)
+ return suffix
=====================================
python/gubbins/pyjar.py
=====================================
@@ -66,7 +66,7 @@ def read_tree(treefile):
#Calculate Pij from Q matrix and branch length
def calculate_pij(branch_length,rate_matrix):
if branch_length==0:
- pij = numpy.full((4,4), numpy.NINF, dtype = numpy.float32)
+ pij = numpy.full((4,4), -numpy.inf, dtype = numpy.float32)
numpy.fill_diagonal(pij, 0.0)
else:
pij = numpy.array(numpy.log(linalg.expm(numpy.multiply(branch_length,rate_matrix))), dtype = numpy.float32) # modified
@@ -152,6 +152,30 @@ def read_info(infofile, type = 'raxml'):
f = [0.25,0.25,0.25,0.25]
r = [1.0,1.0,1.0,1.0,1.0,1.0]
elif type == 'iqtree':
+ # Base frequencies
+ words=line.split()
+ if "pi(A)" in line:
+ f[0] = float(words[2])
+ elif "pi(C)" in line:
+ f[1] = float(words[2])
+ elif "pi(G)" in line:
+ f[2] = float(words[2])
+ elif "pi(T)" in line:
+ f[3] = float(words[2])
+ # Rate estimates
+ if "A-C:" in line:
+ r[0] = float(words[1])
+ elif "A-G:" in line:
+ r[1] = float(words[1])
+ elif "A-T:" in line:
+ r[2] = float(words[1])
+ elif "C-G:" in line:
+ r[3] = float(words[1])
+ elif "C-T:" in line:
+ r[4] = float(words[1])
+ elif "G-T:" in line:
+ r[5] = float(words[1])
+
if line.startswith('Base frequencies:'):
words=line.split()
f=[float(words[3]), float(words[5]), float(words[7]), float(words[9])]
@@ -393,7 +417,7 @@ def iterate_over_base_patterns(column,
Cmat_null = numpy.array([0,1,2,3], dtype = numpy.uint8)
# Reset matrices
- Lmat.fill(numpy.NINF)
+ Lmat.fill(-numpy.inf)
Cmat[:] = Cmat_null
# Count unknown bases
@@ -605,7 +629,7 @@ def reconstruct_alignment_column(column_index,
# Generate data structures for reconstructions
num_nodes = len(tree.nodes())
- Lmat = numpy.full((num_nodes,4), numpy.NINF, dtype = numpy.float32)
+ Lmat = numpy.full((num_nodes,4), -numpy.inf, dtype = numpy.float32)
Cmat = numpy.full((num_nodes,4), [0,1,2,3], dtype = numpy.uint8)
reconstructed_base_indices = numpy.full(num_nodes, 8, dtype = numpy.uint8)
@@ -711,7 +735,7 @@ def jar(sequence_names = None,
child_nodes_array = numpy.empty(num_nodes, dtype=object)
leaf_node_list = []
node_labels = numpy.empty(num_nodes, dtype=object)
- node_pij = numpy.full((num_nodes,16), numpy.NINF, dtype=numpy.float32)
+ node_pij = numpy.full((num_nodes,16), -numpy.inf, dtype=numpy.float32)
postordered_nodes = numpy.arange(num_nodes, dtype=numpy.int32)
seed_node = None
seed_node_edge_truncation = True
=====================================
python/gubbins/run_gubbins.py
=====================================
@@ -58,13 +58,13 @@ def parse_input_args():
treeGroup = parser.add_argument_group('Tree building options')
treeGroup.add_argument('--tree-builder', '-t', help='Application to use for tree building',
default='raxml',
- choices=['raxml', 'raxmlng', 'iqtree', 'iqtree-fast', 'fasttree', 'hybrid', 'rapidnj'])
+ choices=['raxml', 'raxmlng', 'iqtree', 'iqtree-fast', 'fasttree', 'hybrid', 'rapidnj','veryfasttree'])
treeGroup.add_argument('--tree-args', help='Quoted string of further arguments passed to tree building algorithm'
' (start string with a space if there is a risk of being interpreted as a flag)',
default = None)
treeGroup.add_argument('--first-tree-builder', help='Application to use for building the first tree',
default=None,
- choices=['raxml', 'raxmlng', 'iqtree', 'iqtree-fast', 'fasttree', 'rapidnj', 'star'])
+ choices=['raxml', 'raxmlng', 'iqtree', 'iqtree-fast', 'fasttree', 'rapidnj', 'star','veryfasttree'])
treeGroup.add_argument('--first-tree-args', help='Further arguments passed to first tree building algorithm',
default = None)
treeGroup.add_argument('--outgroup', '-o', help='Outgroup name for rerooting. A list of comma separated '
@@ -78,6 +78,9 @@ def parse_input_args():
action = 'store_true')
treeGroup.add_argument('--seed', help='Set seed for reproducibility of analysis',
default = None, type = int)
+ treeGroup.add_argument('--invariant-site-correction',
+ help='Correct for invariant sites',
+ default = False, action = 'store_true')
modelGroup = parser.add_argument_group('Nucleotide substitution model options')
modelGroup.add_argument('--model', '-M', help='Nucleotide substitution model (not all available for all '
=====================================
python/gubbins/tests/test_alignment_python_methods.py
=====================================
@@ -9,6 +9,7 @@ import unittest
import filecmp
import os
from gubbins import common
+from gubbins.PreProcessFasta import PreProcessFasta
modules_dir = os.path.dirname(os.path.abspath(common.__file__))
data_dir = os.path.join(modules_dir, 'tests', 'data')
@@ -16,11 +17,20 @@ data_dir = os.path.join(modules_dir, 'tests', 'data')
class TestAlignmentMethods(unittest.TestCase):
- def test_number_of_sequences_in_alignment(self):
- assert common.number_of_sequences_in_alignment(os.path.join(data_dir, 'small_alignment.aln')) == 5
+ def test_length_of_alignment(self):
+ preprocessfasta = PreProcessFasta(os.path.join(data_dir, 'small_alignment.aln'))
+ overall_alignment_length, sequence_names_in_alignment, base_frequencies = preprocessfasta.get_alignment_information()
+ assert overall_alignment_length == 4
+
+ def test_sequence_composition(self):
+ preprocessfasta = PreProcessFasta(os.path.join(data_dir, 'small_alignment.aln'))
+ overall_alignment_length, sequence_names_in_alignment, base_frequencies = preprocessfasta.get_alignment_information()
+ assert base_frequencies == [0.4,0.2,0.2,0.2]
def test_get_sequence_names_from_alignment(self):
- assert common.get_sequence_names_from_alignment(os.path.join(data_dir, 'small_alignment.aln')) == \
+ preprocessfasta = PreProcessFasta(os.path.join(data_dir, 'small_alignment.aln'))
+ overall_alignment_length, sequence_names_in_alignment, base_frequencies = preprocessfasta.get_alignment_information()
+ assert sequence_names_in_alignment == \
['sequence1', 'sequence2', 'sequence3', 'sequence4', 'sequence5']
def test_reinsert_gaps_into_fasta_file(self):
=====================================
python/gubbins/tests/test_dependencies.py
=====================================
@@ -91,17 +91,40 @@ class TestExternalDependencies(unittest.TestCase):
self.cleanup('multiple_recombinations')
assert exit_code == 0
+ def test_veryfasttree(self):
+ exit_code = 1
+ parser = run_gubbins.parse_input_args()
+ common.parse_and_run(parser.parse_args(["--tree-builder", "veryfasttree",
+ "--verbose", "--iterations", "3",
+ "--threads", "1",
+ os.path.join(data_dir, 'multiple_recombinations.aln')]))
+ exit_code = self.check_for_output_files('multiple_recombinations')
+ self.cleanup('multiple_recombinations')
+ assert exit_code == 0
+
+ def test_veryfasttree_seed(self):
+ exit_code = 1
+ parser = run_gubbins.parse_input_args()
+ common.parse_and_run(parser.parse_args(["--tree-builder", "veryfasttree",
+ "--verbose", "--iterations", "3",
+ "--seed","42",
+ "--threads", "1",
+ os.path.join(data_dir, 'multiple_recombinations.aln')]))
+ exit_code = self.check_for_output_files('multiple_recombinations')
+ self.cleanup('multiple_recombinations')
+ assert exit_code == 0
+
# Test resuming a default analysis
- def test_fasttree_resume(self):
+ def test_veryfasttree_resume(self):
exit_code = 1
parser = run_gubbins.parse_input_args()
- common.parse_and_run(parser.parse_args(["--tree-builder", "fasttree",
+ common.parse_and_run(parser.parse_args(["--tree-builder", "veryfasttree",
"--verbose", "--iterations", "3",
"--threads", "1",
"--no-cleanup",
"--prefix", "original",
os.path.join(data_dir, 'multiple_recombinations.aln')]))
- common.parse_and_run(parser.parse_args(["--tree-builder", "fasttree",
+ common.parse_and_run(parser.parse_args(["--tree-builder", "veryfasttree",
"--verbose", "--iterations", "3",
"--resume", "multiple_recombinations.iteration_1.tre",
"--threads", "1",
@@ -635,6 +658,7 @@ class TestExternalDependencies(unittest.TestCase):
parser = run_gubbins.parse_input_args()
common.parse_and_run(parser.parse_args(["--tree-builder", "raxml",
"--verbose", "--iterations", "3",
+ "--model","HKY",
"--bootstrap","10",
"--threads", "1",
os.path.join(data_dir, 'bootstrapping_test.aln')]))
@@ -647,6 +671,7 @@ class TestExternalDependencies(unittest.TestCase):
parser = run_gubbins.parse_input_args()
common.parse_and_run(parser.parse_args(["--tree-builder", "iqtree",
"--verbose", "--iterations", "3",
+ "--model","HKY",
"--bootstrap","1000",
"--threads", "1",
os.path.join(data_dir, 'bootstrapping_test.aln')]))
@@ -671,6 +696,7 @@ class TestExternalDependencies(unittest.TestCase):
exit_code = 1
parser = run_gubbins.parse_input_args()
common.parse_and_run(parser.parse_args(["--tree-builder", "raxmlng",
+ "--model","HKY",
"--verbose", "--iterations", "3",
"--bootstrap","100",
"--threads", "1",
@@ -695,6 +721,7 @@ class TestExternalDependencies(unittest.TestCase):
exit_code = 1
parser = run_gubbins.parse_input_args()
common.parse_and_run(parser.parse_args(["--tree-builder", "fasttree",
+ "--model","JC",
"--model-fitter", "raxml",
"--verbose", "--iterations", "3",
"--bootstrap","10",
@@ -708,6 +735,7 @@ class TestExternalDependencies(unittest.TestCase):
exit_code = 1
parser = run_gubbins.parse_input_args()
common.parse_and_run(parser.parse_args(["--tree-builder", "raxmlng",
+ "--model","HKY",
"--verbose", "--iterations", "3",
"--bootstrap","100",
"--threads", "1",
@@ -721,6 +749,7 @@ class TestExternalDependencies(unittest.TestCase):
exit_code = 1
parser = run_gubbins.parse_input_args()
common.parse_and_run(parser.parse_args(["--tree-builder", "raxml",
+ "--model","HKY",
"--verbose", "--iterations", "3",
"--bootstrap","10",
"--threads", "1",
@@ -734,6 +763,7 @@ class TestExternalDependencies(unittest.TestCase):
exit_code = 1
parser = run_gubbins.parse_input_args()
common.parse_and_run(parser.parse_args(["--tree-builder", "iqtree",
+ "--model","HKY",
"--verbose", "--iterations", "3",
"--bootstrap","1000",
"--threads", "1",
@@ -749,6 +779,7 @@ class TestExternalDependencies(unittest.TestCase):
parser = run_gubbins.parse_input_args()
common.parse_and_run(parser.parse_args(["--tree-builder", "iqtree",
"--verbose", "--iterations", "3",
+ "--model","HKY",
"--threads", "1",
"--sh-test",
os.path.join(data_dir, 'bootstrapping_test.aln')]))
@@ -761,6 +792,7 @@ class TestExternalDependencies(unittest.TestCase):
parser = run_gubbins.parse_input_args()
common.parse_and_run(parser.parse_args(["--tree-builder", "raxml",
"--verbose", "--iterations", "3",
+ "--model","HKY",
"--threads", "1",
"--sh-test",
os.path.join(data_dir, 'bootstrapping_test.aln')]))
@@ -845,12 +877,9 @@ class TestExternalDependencies(unittest.TestCase):
regex_to_remove = prefix + ".*"
for file in glob.glob(regex_to_remove):
os.remove(file)
- tmp_to_remove = "./tmp*/*"
- for file in glob.glob(tmp_to_remove):
- os.remove(file)
for dir in glob.glob("./tmp*"):
if os.path.isdir(dir):
- os.rmdir(dir)
+ shutil.rmtree(dir)
if __name__ == "__main__":
unittest.main(buffer=True)
=====================================
python/gubbins/tests/test_tree_methods.py
=====================================
@@ -40,6 +40,10 @@ class TestTreeMethods(unittest.TestCase):
algorithm = treebuilders.RAxMLNG()
assert algorithm.name == 'RAxMLNG'
+ def initialise_VeryFastTree(self):
+ algorithm = treebuilders.VeryFastTree()
+ assert algorithm.name == 'VeryFastTree'
+
def test_robinson_foulds_distance(self):
# two tree with different distances
assert common.robinson_foulds_distance(os.path.join(data_dir, 'robinson_foulds_distance_tree1.tre'),
=====================================
python/gubbins/tests/test_utils.py
=====================================
@@ -20,7 +20,7 @@ data_dir = os.path.join(modules_dir, 'tests', 'data')
class TestUtilities(unittest.TestCase):
def test_gubbins_command(self):
- assert common.create_gubbins_command('AAA', 'BBB', 'CCC', 'DDD', 'EEE', 5, 10, 200, 0.05, 1.0, 0, 1) \
+ assert common.create_gubbins_command('AAA', 'BBB', 'CCC', 'DDD', 'EEE', 5, 10, 200, 0.05, 1.0, 0, 1, 1) \
== 'AAA -r -v CCC -a 10 -b 200 -f EEE -t DDD -m 5 -p 0.05 -i 1.0 -n 1 BBB'
def test_translation_of_filenames_to_final_filenames(self):
=====================================
python/gubbins/treebuilders.py
=====================================
@@ -40,6 +40,7 @@ class Star:
self.model = "-"
self.version = "unspecified"
self.citation = "no citation"
+ self.invariant_site_correction = False
def tree_building_command(self, alignment_filename: str, input_tree: str, basename: str) -> str:
# Extract taxon names from alignment
@@ -50,8 +51,8 @@ class Star:
# Write tree
star_tree_string = "("
- star_tree_string = star_tree_string + ':0.9,'.join(taxon_names)
- star_tree_string = star_tree_string + ':1);' # mid point rooting fails with equidistant taxa
+ star_tree_string = star_tree_string + ':0.09,'.join(taxon_names)
+ star_tree_string = star_tree_string + ':0.1);' # mid point rooting fails with equidistant taxa
# Print to file
output_tree = basename + self.tree_suffix
@@ -72,6 +73,7 @@ class RapidNJ:
self.model = model
self.additional_args = additional_args
self.bootstrap = bootstrap
+ self.invariant_site_correction = False
# Construct command
self.executable = "rapidnj"
@@ -141,6 +143,7 @@ class FastTree:
self.bootstrap = bootstrap
self.additional_args = additional_args
self.seed = utils.set_seed(seed)
+ self.invariant_site_correction = False
# Identify executable
self.potential_executables = ["FastTreeMP","fasttreeMP","FastTree", "fasttree"]
@@ -156,6 +159,7 @@ class FastTree:
# Function for returning base command
command = [self.executable]
command.extend(["-nt"])
+ command.extend(["-rawdist"]) # https://morgannprice.github.io/fasttree/
if self.model == 'JC':
command.extend(["-nocat"])
elif self.model == 'GTR':
@@ -259,7 +263,7 @@ class FastTree:
class IQTree:
"""Class for operations with the IQTree executable"""
- def __init__(self, threads: 1, model: str, bootstrap = 0, seed = None, internal_node_prefix="", verbose=False, use_best=False, additional_args = None):
+ def __init__(self, threads: 1, model: str, bootstrap = 0, invariant_proportion = 0, constant_base_counts = [0.0,0.0,0.0,0.0], seed = None, internal_node_prefix="", verbose=False, use_best=False, invariant_site_correction = True, additional_args = None):
"""Initialises the object"""
self.verbose = verbose
self.threads = threads
@@ -276,6 +280,7 @@ class IQTree:
self.use_best = use_best
self.seed = utils.set_seed(seed)
self.additional_args = additional_args
+ self.invariant_site_correction = (invariant_site_correction if max(constant_base_counts) > 0 else False)
# Construct base command
self.executable = "iqtree"
@@ -291,23 +296,33 @@ class IQTree:
# Set parallelisation
command.extend(["-T", str(self.threads)])
- # Add flags
+ # Define model
command.extend(["-safe","-redo"])
if self.use_best:
pass
- elif self.model == 'JC':
- command.extend(["-m", "JC"])
- elif self.model == 'K2P':
- command.extend(["-m", "K2P"])
- elif self.model == 'HKY':
- command.extend(["-m", "HKY"])
- elif self.model == 'GTR':
- command.extend(["-m","GTR"])
- elif self.model == 'GTRGAMMA':
- command.extend(["-m","GTR+G4"])
else:
- command.extend(["-m",self.model])
+ invariant_site_string = ""
+ if invariant_site_correction:
+ #invariant_site_string = "+I{" + str(invariant_proportion) + "}"
+ invariant_site_string = "+I"
+ if self.model == 'JC':
+ command.extend(["-m", "JC"+invariant_site_string])
+ elif self.model == 'K2P':
+ command.extend(["-m", "K2P"+invariant_site_string])
+ elif self.model == 'HKY':
+ command.extend(["-m", "HKY"+invariant_site_string])
+ elif self.model == 'GTR':
+ command.extend(["-m","GTR"+invariant_site_string])
+ elif self.model == 'GTRGAMMA':
+ command.extend(["-m","GTR+G4"+invariant_site_string])
+ else:
+ command.extend(["-m",self.model])
command.extend(["-seed",self.seed])
+
+ # Account for invariant sites
+ if invariant_site_correction:
+ command.extend(["-fconst",','.join([str(x) for x in constant_base_counts])])
+
# Additional arguments
if self.additional_args is not None:
command.extend([self.additional_args])
@@ -371,7 +386,7 @@ class IQTree:
def get_info_filename(self, tmp: str, basename: str) -> str:
"""Returns the name of the file containing the fitted model parameters"""
- fn = tmp + '/' + basename + '.log'
+ fn = tmp + '/' + basename + '.iqtree'
return fn
def get_recontree_filename(self, tmp: str, basename: str) -> str:
@@ -437,7 +452,7 @@ class IQTree:
class RAxML:
"""Class for operations with the RAxML executable"""
- def __init__(self, threads: 1, model='GTRCAT', bootstrap = 0, seed = None, internal_node_prefix="", verbose=False, additional_args = None):
+ def __init__(self, threads: 1, model='GTRCAT', constant_base_counts = [0,0,0,0], partition_length = 1, bootstrap = 0, seed = None, internal_node_prefix="", verbose=False, invariant_site_correction = True, additional_args = None):
"""Initialises the object"""
self.verbose = verbose
self.threads = threads
@@ -452,7 +467,10 @@ class RAxML:
self.internal_node_prefix = internal_node_prefix
self.bootstrap = bootstrap
self.seed = utils.set_seed(seed)
+ self.constant_base_counts = constant_base_counts
+ self.partition_length = partition_length
self.additional_args = additional_args
+ self.invariant_site_correction = (invariant_site_correction if max(constant_base_counts) > 0 else False)
self.single_threaded_executables = ['raxmlHPC-AVX2', 'raxmlHPC-AVX', 'raxmlHPC-SSE3', 'raxmlHPC']
self.multi_threaded_executables = ['raxmlHPC-PTHREADS-AVX2', 'raxmlHPC-PTHREADS-AVX',
@@ -473,18 +491,25 @@ class RAxML:
# Add flags
command.extend(["-safe"])
+ invariant_site_string = ""
+ model_string = "GTRGAMMA"
+ if self.invariant_site_correction:
+ model_string = "ASC_GTRGAMMA"
+ invariant_site_string = "--asc-corr=stamatakis"
if self.model == 'JC':
- command.extend(["-m", "GTRCAT","--JC69"])
+ command.extend(["-m", model_string,"--JC69",invariant_site_string])
elif self.model == 'K2P':
- command.extend(["-m", "GTRCAT","--K80"])
+ command.extend(["-m", model_string,"--K80",invariant_site_string])
elif self.model == 'HKY':
- command.extend(["-m", "GTRCAT","--HKY85"])
- elif self.model == 'GTRCAT':
- command.extend(["-m","GTRCAT", "-V"])
+ command.extend(["-m", model_string,"--HKY85",invariant_site_string])
elif self.model == 'GTRGAMMA':
- command.extend(["-m","GTRGAMMA"])
+ command.extend(["-m", model_string,invariant_site_string])
else:
- command.extend(["-m", self.model])
+ if self.model.startswith("ASC_"):
+ command.extend(["-m", self.model,"--asc-corr=stamatakis"])
+ else:
+ self.invariant_site_correction = False
+ command.extend(["-m", self.model])
command.extend(["-p",self.seed])
# Additional arguments
if self.additional_args is not None:
@@ -503,10 +528,29 @@ class RAxML:
break
return version
+ def generate_partition_files(self, command: list, basename: str) -> list:
+ """Generate the partition files enumerating invariant site counts"""
+ current_wd = os.getcwd()
+ if self.invariant_site_correction:
+ partitions_fn = 'invariant_sites.' + os.path.basename(basename) + '.partitions'
+ partition_fn = 'invariant_sites.' + os.path.basename(basename) + '.partition'
+ with open(partitions_fn,'w') as partitions_file:
+ partitions_file.write('[asc~' + partition_fn + '], ASC_DNA, p1=1-' + str(self.partition_length) + '\n')
+ partitions_file.flush()
+ with open(partition_fn,'w') as partition_file:
+ partition_file.write(' '.join([str(x) for x in self.constant_base_counts]) + '\n')
+ partition_file.flush()
+ command.extend(["-q", partitions_fn])
+ return command
+
def tree_building_command(self, alignment_filename: str, input_tree: str, basename: str) -> str:
"""Constructs the command to call the RAxML executable for tree building"""
command = self.base_command.copy()
- command.extend(["-f", "d", "-p", str(1)])
+ if self.invariant_site_correction:
+ # Write partition input files - https://cme.h-its.org/exelixis/resource/download/NewManual.pdf
+ command = self.generate_partition_files(command,basename)
+ # Complete command string
+ command.extend(["-f", "d", "-p", str(1)])
command.extend(["-s", alignment_filename, "-n", basename])
if input_tree:
command.extend(["-t", input_tree])
@@ -517,6 +561,7 @@ class RAxML:
def internal_sequence_reconstruction_command(self, alignment_filename: str, input_tree: str, basename: str) -> str:
"""Constructs the command to call the RAxML executable for ancestral sequence reconstruction"""
command = self.base_command.copy()
+ command = self.generate_partition_files(command,basename)
command.extend(["-f", "A", "-p", str(1)])
command.extend(["-s", alignment_filename, "-n", basename])
command.extend(["-t", input_tree])
@@ -572,6 +617,7 @@ class RAxML:
def model_fitting_command(self, alignment_filename: str, input_tree: str, basename: str) -> str:
"""Fits a nucleotide substitution model to a tree and an alignment"""
command = self.base_command.copy()
+ command = self.generate_partition_files(command,basename)
command.extend(["-s", alignment_filename, "-n", os.path.basename(basename) + '_reconstruction', "-t", input_tree])
command.extend(["-f e"])
command.extend(["-w",os.path.dirname(basename)])
@@ -581,6 +627,7 @@ class RAxML:
"""Runs a bootstrapping analysis and annotates the nodes of a summary tree"""
# Run bootstraps
command = self.base_command.copy()
+ command = self.generate_partition_files(command,basename)
command.extend(["-s", alignment_filename, "-n", basename + ".bootstrapped_trees"])
command.extend(["-w",tmp])
command.extend(["-x",self.seed])
@@ -594,6 +641,7 @@ class RAxML:
def sh_test(self, alignment_filename: str, input_tree: str, basename: str, tmp: str) -> str:
"""Runs a single branch support test"""
command = self.base_command.copy()
+ command = self.generate_partition_files(command,basename)
command.extend(["-f", "J"])
command.extend(["-s", alignment_filename, "-n", input_tree + ".sh_support"])
command.extend(["-t", input_tree])
@@ -610,7 +658,7 @@ class RAxML:
class RAxMLNG:
"""Class for operations with the RAxML executable"""
- def __init__(self, threads: 1, model: str, bootstrap = 0, seed = None, internal_node_prefix = "", verbose = False, additional_args = None):
+ def __init__(self, threads: 1, model: str, constant_base_counts = [0,0,0,0], bootstrap = 0, seed = None, internal_node_prefix = "", verbose = False, invariant_site_correction = True, additional_args = None):
"""Initialises the object"""
self.verbose = verbose
self.threads = threads
@@ -625,7 +673,9 @@ class RAxMLNG:
self.internal_node_prefix = internal_node_prefix
self.bootstrap = bootstrap
self.seed = utils.set_seed(seed)
+ self.constant_base_counts = constant_base_counts
self.additional_args = additional_args
+ self.invariant_site_correction = (invariant_site_correction if max(constant_base_counts) > 0 else False)
self.single_threaded_executables = ['raxml-ng']
self.multi_threaded_executables = ['raxml-ng']
@@ -644,16 +694,19 @@ class RAxMLNG:
# Add model
command.extend(["--model"])
+ invariant_site_string = ""
+ if invariant_site_correction:
+ invariant_site_string = "+ASC_STAM{" + '/'.join([str(x) for x in constant_base_counts]) + "}"
if self.model == 'JC':
- command.extend(["JC"])
+ command.extend(["JC"+invariant_site_string])
elif self.model == 'K2P':
- command.extend(["K80"])
+ command.extend(["K80"+invariant_site_string])
elif self.model == 'HKY':
- command.extend(["HKY"])
+ command.extend(["HKY"+invariant_site_string])
elif self.model == 'GTR':
- command.extend(["GTR"])
+ command.extend(["GTR"+invariant_site_string])
elif self.model == 'GTRGAMMA':
- command.extend(["GTR+G"])
+ command.extend(["GTR+G"+invariant_site_string])
else:
command.extend([self.model])
command.extend(["--seed",self.seed])
@@ -795,3 +848,130 @@ class RAxMLNG:
"""Return bootstrapped tree files name"""
file_name = tmp + "/" + basename + ".raxml.bootstraps"
return file_name
+
+class VeryFastTree:
+ """Class for operations with the VeryFastTree executable"""
+
+ def __init__(self, threads: int, bootstrap = 0, model='GTRCAT', seed = None, verbose = False, additional_args = None):
+ """Initialises the object"""
+ self.verbose = verbose
+ self.threads = threads
+ self.model = model
+ self.tree_prefix = ""
+ self.tree_suffix = ".tre"
+ self.alignment_suffix = ".snp_sites.aln"
+ self.bootstrap = bootstrap
+ self.additional_args = additional_args
+ self.seed = utils.set_seed(seed)
+ self.invariant_site_correction = False
+
+ # Identify executable
+ self.potential_executables = ["VeryFastTree", "veryfasttree"]
+ self.executable = utils.choose_executable(self.potential_executables)
+ if self.executable is None:
+ sys.exit("No usable version of VeryFastTree could be found.")
+
+ # Reproducibility
+ self.name = 'VeryFastTree'
+ self.version = self.get_version(self.executable)
+ self.citation = "https://doi.org/10.1093/gigascience/giae055"
+
+ # Function for returning base command
+ command = [self.executable]
+ command.extend(["-nt"])
+ command.extend(["-rawdist"]) # https://morgannprice.github.io/fasttree/
+ if self.model == 'JC':
+ command.extend(["-nocat"])
+ elif self.model == 'GTR':
+ command.extend(["-gtr","-nocat"])
+ elif self.model == 'GTRGAMMA':
+ command.extend(["-gtr","-gamma"])
+ elif self.model == 'GTRCAT':
+ command.extend(["-gtr"])
+ else:
+ command.extend([self.model])
+ command.extend(["-seed",self.seed])
+ # Set number threads
+ command.extend(["-threads",str(self.threads)])
+ # Additional arguments
+ if self.additional_args is not None:
+ command.extend([self.additional_args])
+ # Define final command
+ self.base_command = command
+
+ def get_version(self,exe) -> str:
+ """Gets the version of the tree building algorithm being used"""
+ version = "Not determined"
+ version_message = subprocess.run([exe], capture_output=True)
+ for line in version_message.stderr.decode().splitlines():
+ if line.startswith('VeryFastTree'):
+ info = line.split()
+ version = info[1]
+ break
+ return version
+
+ def tree_building_command(self, alignment_filename: str, input_tree: str, basename: str) -> str:
+ """Constructs the command to call the VeryFastTree executable"""
+ command = self.base_command.copy()
+ # N.B. version 4.0.4 - intree appears to cause a problem so is not used in command construction
+# if input_tree:
+# command.extend(["-intree", input_tree])
+ output_tree = basename + self.tree_suffix
+ command.extend(["-nosupport"])
+ command.extend(["-out", output_tree])
+ command.extend(["-log", basename + '.log'])
+ command.append(alignment_filename)
+ if not self.verbose:
+ command.extend([">", "/dev/null", "2>&1"])
+ return " ".join(command)
+
+ def get_info_filename(self, tmp: str, basename: str) -> str:
+ """Returns the name of the file containing the fitted model parameters"""
+ fn = tmp + '/' + basename + '.log'
+ return fn
+
+ def get_recontree_filename(self, tmp: str, basename: str) -> str:
+ """Returns the name of the tree generated by model fitting"""
+ fn = tmp + '/' + basename + '.treefile'
+ return fn
+
+ def model_fitting_command(self, alignment_filename: str, input_tree: str, basename: str) -> str:
+ """Fits a nucleotide substitution model to a tree and an alignment"""
+ command = self.base_command.copy()
+ command.extend(["-mllen","-nome"])
+ command.extend(["-nosupport"])
+ command.extend(["-intree",input_tree])
+ command.extend(["-log", basename + ".log"])
+ command.extend(["-out", basename + ".treefile"])
+ command.extend([alignment_filename])
+ return " ".join(command)
+
+ def bootstrapping_command(self, alignment_filename: str, input_tree: str, basename: str, tmp: str) -> str:
+ """Runs a bootstrapping analysis and annotates the nodes of a summary tree"""
+ command = self.base_command.copy()
+ output_tree = basename + self.tree_suffix
+ command.extend(["-nosupport"])
+ command.extend(["-intree1",input_tree]) # http://www.microbesonline.org/fasttree/
+ command.extend(["-out", tmp + "/" + basename + ".bootstrapped_trees"])
+ command.extend(["-log", basename + ".log"])
+ command.extend(["-n", str(self.bootstrap)])
+ command.append(alignment_filename + ".bootstrapping.aln")
+ if not self.verbose:
+ command.extend([">", "/dev/null", "2>&1"])
+ return " ".join(command)
+
+ def sh_test(self, alignment_filename: str, input_tree: str, basename: str, tmp: str) -> str:
+ """Runs a single branch support test"""
+ command = self.base_command.copy()
+ command.extend(["-mllen","-nome"])
+ command.extend(["-intree",input_tree])
+ command.extend(["-out",tmp + "/" + input_tree + ".sh_support"])
+ command.extend([alignment_filename])
+ if not self.verbose:
+ command.extend([">", "/dev/null", "2>&1"])
+ return " ".join(command)
+
+ def get_bootstrapped_trees_file(self, tmp: str, basename: str) -> str:
+ """Return bootstrapped tree files name"""
+ file_name = tmp + "/" + basename + ".bootstrapped_trees"
+ return file_name
=====================================
python/setup.py
=====================================
@@ -23,6 +23,7 @@ setuptools.setup(
'scripts/generate_files_for_clade_analysis.py',
'scripts/count_recombinations_per_gene.py',
'scripts/extract_recombinant_sequences.py',
+ 'scripts/extract_gubbins_clade_statistics.py',
'../R/scripts/plot_gubbins.R'
],
tests_require=[
@@ -33,7 +34,8 @@ setuptools.setup(
"multiprocess >= 0.70",
"scipy >= 1.5.3",
"numpy >= 1.19",
- "ska >= 1.0"
+ "ska >= 1.0",
+ "numba > 0.0"
],
long_description="""\
Gubbins is a tool that generates a reconstruction of
@@ -53,7 +55,8 @@ setuptools.setup(
"dendropy >= 4.0.2",
"multiprocess >= 0.70",
"scipy >= 1.5.3",
- "numpy >= 1.19"
+ "numpy >= 1.19",
+ "numba > 0.0"
],
license="GPL"
)
=====================================
src/gubbins.c
=====================================
@@ -39,16 +39,16 @@
// given a sample name extract the sequences from the vcf
// compare two sequences to get pseudo sequnece and fill in with difference from reference sequence
-void run_gubbins(char vcf_filename[], char tree_filename[],char multi_fasta_filename[], int min_snps, char original_multi_fasta_filename[], int window_min, int window_max, float uncorrected_p_value, float trimming_ratio, int extensive_search_flag, int num_threads)
+void run_gubbins(char vcf_filename[], char tree_filename[],char multi_fasta_filename[], int min_snps, char original_multi_fasta_filename[], int window_min, int window_max, float uncorrected_p_value, float trimming_ratio, int extensive_search_flag, int scaling_flag, int num_threads)
{
load_sequences_from_multifasta_file(multi_fasta_filename);
- extract_sequences(vcf_filename, tree_filename, multi_fasta_filename,min_snps,original_multi_fasta_filename,window_min, window_max, uncorrected_p_value, trimming_ratio, extensive_search_flag, num_threads);
+ extract_sequences(vcf_filename, tree_filename, multi_fasta_filename,min_snps,original_multi_fasta_filename,window_min, window_max, uncorrected_p_value, trimming_ratio, extensive_search_flag, scaling_flag, num_threads);
create_tree_statistics_file(tree_filename,get_sample_statistics(),number_of_samples_from_parse_phylip());
freeup_memory();
}
-void extract_sequences(char vcf_filename[], char tree_filename[],char multi_fasta_filename[],int min_snps, char original_multi_fasta_filename[], int window_min, int window_max, float uncorrected_p_value, float trimming_ratio, int extensive_search_flag, int num_threads)
+void extract_sequences(char vcf_filename[], char tree_filename[],char multi_fasta_filename[],int min_snps, char original_multi_fasta_filename[], int window_min, int window_max, float uncorrected_p_value, float trimming_ratio, int extensive_search_flag, int scaling_flag, int num_threads)
{
FILE *vcf_file_pointer;
vcf_file_pointer=fopen(vcf_filename, "r");
@@ -105,7 +105,12 @@ void extract_sequences(char vcf_filename[], char tree_filename[],char multi_fast
create_fasta_of_snp_sites(tree_filename, number_of_filtered_snps, filtered_bases_for_snps, sample_names, number_of_samples,internal_nodes);
// Create an new tree with updated distances
- scale_branch_distances(root_node, number_of_filtered_snps);
+ if (scaling_flag == 1) {
+ scale_branch_distances(root_node, number_of_filtered_snps);
+ } else {
+ scale_branch_distances(root_node, length_of_original_genome);
+ }
+
FILE *output_tree_pointer;
output_tree_pointer=fopen(tree_filename, "w");
=====================================
src/gubbins.h
=====================================
@@ -23,8 +23,8 @@
#include "seqUtil.h"
#include "Newickform.h"
-void run_gubbins(char vcf_filename[], char tree_filename[], char multi_fasta_filename[], int min_snps, char original_multi_fasta_filename[], int window_min, int window_max, float uncorrected_p_value, float trimming_ratio, int extensive_search_flag, int num_threads);
-void extract_sequences(char vcf_filename[], char tree_filename[],char multi_fasta_filename[], int min_snps, char original_multi_fasta_filename[], int window_min, int window_max, float uncorrected_p_value, float trimming_ratio, int extensive_search_flag, int n_threads);
+void run_gubbins(char vcf_filename[], char tree_filename[], char multi_fasta_filename[], int min_snps, char original_multi_fasta_filename[], int window_min, int window_max, float uncorrected_p_value, float trimming_ratio, int extensive_search_flag, int scaling_flag, int num_threads);
+void extract_sequences(char vcf_filename[], char tree_filename[],char multi_fasta_filename[], int min_snps, char original_multi_fasta_filename[], int window_min, int window_max, float uncorrected_p_value, float trimming_ratio, int extensive_search_flag, int scaling_flag, int n_threads);
char find_first_real_base(int base_position, int number_of_child_sequences, char ** child_sequences);
=====================================
src/main.c
=====================================
@@ -52,6 +52,7 @@ void print_usage(FILE* stream, int exit_code)
" -b Max window size\n"
" -p p value for detecting recombinations\n"
" -i p value ratio for trimming recombinations\n"
+ " -s scale branch lengths without correcting for invariant sites\n"
" -h Display this usage information.\n\n"
);
exit (exit_code);
@@ -87,6 +88,7 @@ int main (int argc, char ** argv)
float uncorrected_p_value = 0.05;
float trimming_ratio = 1.0;
int extensive_search_flag = 0;
+ int scaling_flag = 0;
int num_threads = 1;
program_name = argv[0];
@@ -106,12 +108,13 @@ int main (int argc, char ** argv)
{"trimming_ratio", required_argument, 0, 'i'},
{"extended_search", required_argument, 0, 'x'},
{"ncpu", required_argument, 0, 'n'},
+ {"scaling", required_argument, 0, 's'},
{0, 0, 0, 0}
};
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long (argc, argv, "hrxv:f:t:m:a:b:p:i:n:",
+ c = getopt_long (argc, argv, "hrxsv:f:t:m:a:b:p:i:n:",
long_options, &option_index);
/* Detect the end of the options. */
if (c == -1)
@@ -136,6 +139,9 @@ int main (int argc, char ** argv)
case 'x':
extensive_search_flag = 1;
break;
+ case 's':
+ scaling_flag = 1;
+ break;
case 'f':
memcpy(original_multi_fasta_filename, optarg, size_of_string(optarg) +1);
break;
@@ -195,6 +201,7 @@ int main (int argc, char ** argv)
uncorrected_p_value,
trimming_ratio,
extensive_search_flag,
+ scaling_flag,
num_threads);
}
else
=====================================
tests/check_gubbins.c
=====================================
@@ -10,7 +10,7 @@ START_TEST (check_gubbins_no_recombinations)
{
remove("../tests/data/no_recombinations.tre");
cp("../tests/data/no_recombinations.tre", "../tests/data/no_recombinations.original.tre");
- run_gubbins("../tests/data/no_recombinations.aln.vcf", "../tests/data/no_recombinations.tre","../tests/data/no_recombinations.aln.snp_sites.aln",3,"../tests/data/no_recombinations.aln.snp_sites.aln",100,10000,0.05,1,0,1);
+ run_gubbins("../tests/data/no_recombinations.aln.vcf", "../tests/data/no_recombinations.tre","../tests/data/no_recombinations.aln.snp_sites.aln",3,"../tests/data/no_recombinations.aln.snp_sites.aln",100,10000,0.05,1,0,0,1);
ck_assert(file_exists("../tests/data/no_recombinations.tre.tab") == 1);
ck_assert(file_exists("../tests/data/no_recombinations.tre.vcf") == 1);
ck_assert(file_exists("../tests/data/no_recombinations.tre.phylip") == 1);
@@ -37,7 +37,7 @@ START_TEST (check_gubbins_one_recombination)
{
remove("../tests/data/one_recombination.tre");
cp("../tests/data/one_recombination.tre", "../tests/data/one_recombination.original.tre");
- run_gubbins("../tests/data/one_recombination.aln.vcf", "../tests/data/one_recombination.tre","../tests/data/one_recombination.aln.snp_sites.aln",3,"../tests/data/one_recombination.aln.snp_sites.aln",100,10000,0.05,1,0,1);
+ run_gubbins("../tests/data/one_recombination.aln.vcf", "../tests/data/one_recombination.tre","../tests/data/one_recombination.aln.snp_sites.aln",3,"../tests/data/one_recombination.aln.snp_sites.aln",100,10000,0.05,1,0,0,1);
ck_assert(file_exists("../tests/data/one_recombination.tre.tab") == 1);
ck_assert(file_exists("../tests/data/one_recombination.tre.vcf") == 1);
ck_assert(file_exists("../tests/data/one_recombination.tre.phylip") == 1);
@@ -73,7 +73,7 @@ START_TEST (check_gubbins_multiple_recombinations)
"../tests/data/multiple_recombinations.jar.aln",
3,
"../tests/data/multiple_recombinations.aln",
- 30,100,0.05,1,0,1);
+ 30,100,0.05,1,0,1,1);
ck_assert(file_exists("../tests/data/multiple_recombinations.tre.tab") == 1);
ck_assert(file_exists("../tests/data/multiple_recombinations.tre.vcf") == 1);
@@ -108,7 +108,7 @@ START_TEST (check_gubbins_multithreaded)
"../tests/data/multiple_recombinations.jar.aln",
3,
"../tests/data/multiple_recombinations.aln",
- 30,100,0.05,1,0,2);
+ 30,100,0.05,1,0,0,2);
ck_assert(file_exists("../tests/data/multiple_recombinations.tre.tab") == 1);
ck_assert(file_exists("../tests/data/multiple_recombinations.tre.vcf") == 1);
@@ -143,7 +143,7 @@ START_TEST (check_recombination_at_root)
"../tests/data/recombination_at_root/recombination_at_root.aln.gaps.snp_sites.aln",
3,
"../tests/data/recombination_at_root/recombination_at_root.aln",
- 100,10000,0.05,1,0,1);
+ 100,10000,0.05,1,0,0,1);
ck_assert(compare_files("../tests/data/recombination_at_root/RAxML_result.recombination_at_root.iteration_1.tab","../tests/data/recombination_at_root/expected_RAxML_result.recombination_at_root.iteration_1.tab") == 1);
View it on GitLab: https://salsa.debian.org/med-team/gubbins/-/commit/88ec10f179c3fd209398e8f409e21cdc94338a72
--
View it on GitLab: https://salsa.debian.org/med-team/gubbins/-/commit/88ec10f179c3fd209398e8f409e21cdc94338a72
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20250927/a0ed8402/attachment-0001.htm>
More information about the debian-med-commit
mailing list