[med-svn] [Git][med-team/gubbins][upstream] New upstream version 3.4.3

Alexandre Detiste (@detiste-guest) gitlab at salsa.debian.org
Sat Sep 27 14:05:04 BST 2025



Alexandre Detiste pushed to branch upstream at Debian Med / gubbins


Commits:
88ec10f1 by Alexandre Detiste at 2025-09-27T14:26:25+02:00
New upstream version 3.4.3
- - - - -


16 changed files:

- VERSION
- environment.yml
- python/gubbins/PreProcessFasta.py
- python/gubbins/common.py
- python/gubbins/pyjar.py
- python/gubbins/run_gubbins.py
- python/gubbins/tests/test_alignment_python_methods.py
- python/gubbins/tests/test_dependencies.py
- python/gubbins/tests/test_tree_methods.py
- python/gubbins/tests/test_utils.py
- python/gubbins/treebuilders.py
- python/setup.py
- src/gubbins.c
- src/gubbins.h
- src/main.c
- tests/check_gubbins.c


Changes:

=====================================
VERSION
=====================================
@@ -1 +1 @@
-3.4
+3.4.2


=====================================
environment.yml
=====================================
@@ -2,7 +2,6 @@ name: gubbins_env
 channels:
   - conda-forge
   - bioconda
-  - defaults
   - r
 dependencies:
 # python
@@ -30,10 +29,11 @@ dependencies:
   - numba
 # phylogenetics
   - raxml=8.2.12
-  - iqtree>=2.2
+  - iqtree>=2.2,<3
   - rapidnj
   - raxml-ng=1.0.1
   - fasttree=2.1.10
+  - veryfasttree
 # Scripts
   - ska2>=0.3.0
 # R


=====================================
python/gubbins/PreProcessFasta.py
=====================================
@@ -99,7 +99,7 @@ class PreProcessFasta(object):
             if number_of_included_alignments <= 1:
                 sys.exit("Not enough sequences are left after removing duplicates.Please check you input data.")
 
-        with open(output_filename, "w+") as output_handle:
+        with open(output_filename, "w") as output_handle:
             AlignIO.write(MultipleSeqAlignment(output_alignments), output_handle, "fasta")
 
         return taxa_to_remove
@@ -112,3 +112,22 @@ class PreProcessFasta(object):
                 for record in alignment:
                     sequence_names.append(record.id)
         return sequence_names
+    
+    def get_alignment_length(self, format = "fasta"):
+        alignment = AlignIO.read(self.input_filename, format)
+        return alignment.get_alignment_length()
+
+    def get_alignment_information(self, format = "fasta"):
+        sequence_names = []
+        base_frequencies = [0,0,0,0]
+        alignment = AlignIO.read(self.input_filename, format)
+        alignment_length = alignment.get_alignment_length()
+        
+        for record in alignment:
+            sequence_names.append(record.id)
+            for index,base in enumerate(['A','C','G','T']):
+                base_frequencies[index] += (record.seq.count(base) + record.seq.count(base.lower()))
+        
+        base_frequencies = [b/(alignment_length*len(sequence_names)) for b in base_frequencies]
+        
+        return alignment_length, sequence_names, base_frequencies


=====================================
python/gubbins/common.py
=====================================
@@ -40,21 +40,32 @@ from Bio.Seq import Seq
 # Gubbins imports
 from gubbins.PreProcessFasta import PreProcessFasta
 from gubbins.ValidateFastaAlignment import ValidateFastaAlignment
-from gubbins.treebuilders import FastTree, IQTree, RAxML, RAxMLNG, RapidNJ, Star
+from gubbins.treebuilders import FastTree, VeryFastTree, IQTree, RAxML, RAxMLNG, RapidNJ, Star
 from gubbins.pyjar import jar, get_base_patterns
 from gubbins import utils
 from gubbins.__init__ import version
 from gubbins.pyjar import jar, get_base_patterns, Pyjar
-from gubbins.treebuilders import FastTree, IQTree, RAxML, RAxMLNG, RapidNJ, Star
+from gubbins.treebuilders import FastTree, VeryFastTree, IQTree, RAxML, RAxMLNG, RapidNJ, Star
 
 # Phylogenetic models valid for each algorithm
 tree_models = {
     'star': ['JC','GTRCAT','GTRGAMMA'],
-    'raxml': ['JC','K2P','HKY','GTRCAT','GTRGAMMA'],
+    'raxml': ['JC','K2P','HKY','GTRGAMMA'],
     'raxmlng': ['JC','K2P','HKY','GTR','GTRGAMMA'],
     'iqtree': ['JC','K2P','HKY','GTR','GTRGAMMA'],
     'fasttree': ['JC','GTRCAT','GTRGAMMA'],
-    'rapidnj': ['JC','K2P']
+    'rapidnj': ['JC','K2P'],
+    'veryfasttree': ['JC','GTRCAT','GTRGAMMA']
+}
+
+suffix_dict = {
+    'star': '.snp_sites.aln',
+    'raxml': '.snp_sites.aln',
+    'raxmlng': '.phylip',
+    'iqtree': '.phylip',
+    'fasttree': '.snp_sites.aln',
+    'rapidnj': '.snp_sites.aln',
+    'veryfasttree': '.snp_sites.aln'
 }
 
 def parse_and_run(input_args, program_description=""):
@@ -95,21 +106,12 @@ def parse_and_run(input_args, program_description=""):
     # Select the algorithms used for the first iteration
     current_tree_builder, current_model_fitter, current_model, current_recon_model, extra_tree_arguments, extra_model_arguments, custom_model, custom_recon_model = return_algorithm_choices(input_args,1)
     check_model_validity(current_model,current_tree_builder,input_args.mar,current_recon_model,current_model_fitter,custom_model, custom_recon_model)
-    # Initialise tree builder
-    tree_builder = return_algorithm(current_tree_builder, current_model, input_args, node_labels = internal_node_label_prefix, extra = extra_tree_arguments)
-    alignment_suffix = tree_builder.alignment_suffix
-    methods_log = update_methods_log(methods_log, method = tree_builder, step = 'Tree constructor (1st iteration)')
-    # Initialise model fitter
-    model_fitter = return_algorithm(current_model_fitter, current_recon_model, input_args, node_labels = internal_node_label_prefix, extra = extra_model_arguments)
-    methods_log = update_methods_log(methods_log, method = model_fitter, step = 'Model fitter (1st iteration)')
-    # Initialise sequence reconstruction if MAR
-    if input_args.mar:
-        sequence_reconstructor = return_algorithm(input_args.seq_recon, current_recon_model, input_args, node_labels = internal_node_label_prefix, extra = input_args.seq_recon_args)
-        methods_log = update_methods_log(methods_log, method = sequence_reconstructor, step = 'Sequence reconstructor (1st iteration)')
 
     # Initialise IQtree
     tree_dater = IQTree(threads = input_args.threads,
                             model = current_model,
+                            invariant_proportion = 0,
+                            constant_base_counts = [0.0,0.0,0.0,0.0],
                             verbose = input_args.verbose
                         )
 
@@ -126,6 +128,12 @@ def parse_and_run(input_args, program_description=""):
     gaps_alignment_filename = base_filename + ".gaps.snp_sites.aln"
     gaps_vcf_filename = base_filename + ".gaps.vcf"
     joint_sequences_filename = base_filename + ".seq.joint.aln"
+    sequence_names_fn = base_filename + ".gaps.sequence_names.csv"
+    base_patterns_fn = base_filename + ".gaps.base_patterns.csv"
+
+    # Filter the input alignment and save as temporary alignment file
+    # Create temporary directory for storing working copies of input files
+    temp_working_dir = tempfile.mkdtemp(dir=os.getcwd())
 
     # If restarting from a previous run
     starting_iteration = 1
@@ -143,55 +151,57 @@ def parse_and_run(input_args, program_description=""):
                 sys.stderr.write('Resuming Gubbins analysis at iteration ' + str(starting_iteration) + '\n')
             input_args.starting_tree = input_args.resume
             current_tree_name = input_args.starting_tree
+        os.symlink(os.path.join(current_directory,snp_alignment_filename),os.path.join(temp_working_dir,snp_alignment_filename))
 
     # Check if intermediate files from a previous run exist
     intermediate_files = [basename + ".iteration_"]
     if not input_args.no_cleanup and input_args.resume is None:
         utils.delete_files(".", intermediate_files, "", input_args.verbose)
     if utils.do_files_exist(".", intermediate_files, "", input_args.verbose) and input_args.resume is None:
-        sys.exit("Intermediate files from a previous run exist. Please rerun without the --no_cleanup option "
-                 "to automatically delete them or with the --use_time_stamp to add a unique prefix.")
-
-    # Filter the input alignment and save as temporary alignment file
-    # Create temporary directory for storing working copies of input files
-    temp_working_dir = tempfile.mkdtemp(dir=os.getcwd())
+        sys.exit("Intermediate files from a previous run exist. Please rerun without the --no-cleanup option "
+                 "to automatically delete them or with the --use-time-stamp to add a unique prefix.")
 
     # Check if the input files exist and have the right format
     printer.print("\nChecking input alignment file...")
     if not os.path.exists(input_args.alignment_filename):
         sys.exit("The input alignment file " + input_args.alignment_filename + " does not exist")
-    temp_alignment_filename = temp_working_dir + "/" + base_filename
-    shutil.copyfile(input_args.alignment_filename, temp_alignment_filename)
-    input_args.alignment_filename = temp_alignment_filename
-    if not ValidateFastaAlignment(temp_alignment_filename).is_input_fasta_file_valid():
+    if not ValidateFastaAlignment(input_args.alignment_filename).is_input_fasta_file_valid():
         sys.exit("The input alignment file " + input_args.alignment_filename + " is invalid")
+
     # Filter the input alignment and save as temporary alignment file
     printer.print("\nFiltering input alignment...")
-    pre_process_fasta = PreProcessFasta(temp_alignment_filename,
+    temp_alignment_filename = os.path.join(temp_working_dir,base_filename)
+    pre_process_fasta = PreProcessFasta(input_args.alignment_filename,
                                         input_args.verbose,
                                         input_args.filter_percentage)
     taxa_removed = pre_process_fasta.remove_duplicate_sequences_and_sequences_missing_too_much_data(
         temp_alignment_filename, input_args.remove_identical_sequences)
 
+    # Get filtered alignment information
+    filtered_alignment = PreProcessFasta(temp_alignment_filename,
+                                        input_args.verbose,
+                                        0.0)
+    overall_alignment_length, sequence_names_in_alignment, base_frequencies = filtered_alignment.get_alignment_information()
+    number_of_sequences_in_alignment = len(sequence_names_in_alignment)
+    current_alignment_length = overall_alignment_length
+    all_snp_alignment_length = overall_alignment_length
+
     # Check on number of sequences remaining in alignment after validation and processing
     if input_args.pairwise:
-        if number_of_sequences_in_alignment(input_args.alignment_filename) != 2:
+        if number_of_sequences_in_alignment != 2:
             sys.exit("Pairwise mode should only be used for two sequences.")
-    else:
-        if number_of_sequences_in_alignment(input_args.alignment_filename) < 3:
+    elif number_of_sequences_in_alignment < 3:
             sys.exit("Three or more sequences are required for a meaningful phylogenetic analysis.")
 
     # If outgroup is specified, check it is still in the alignment
     if input_args.outgroup is not None:
-        if input_args.outgroup in taxa_removed:
-            sys.stderr.write('Outgroup removed due to proportion of missing bases\n')
+        if input_args.outgroup not in sequence_names_in_alignment:
+            sys.stderr.write('Outgroup not in filtered alignment - may have been removed due to proportion of missing bases\n')
             sys.exit(1)
 
     # Initialise tree dating algorithm if dates supplied
     if input_args.date is not None:
         if os.path.isfile(input_args.date):
-            # Get sequence names from alignment
-            sequence_names_in_alignment = pre_process_fasta.get_sequence_names()
             # Edit taxon names as in tree
             new_date_file = os.path.join(temp_working_dir,basename + '.dates')
             with open(input_args.date,'r') as in_dates, open(new_date_file,'w') as out_dates:
@@ -230,7 +240,9 @@ def parse_and_run(input_args, program_description=""):
     printer.print(["\nRunning Gubbins to detect SNPs...", gubbins_command])
     try:
         subprocess.check_call(gubbins_command, shell=True)
-    except subprocess.SubprocessError:
+    except subprocess.SubprocessError as e:
+        if e.output is not None:
+            print("Gubbins error: " + e.output)
         sys.exit("Gubbins crashed, please ensure you have enough free memory")
     printer.print("...done. Run time: {:.2f} s".format(time.time() - start_time))
     reconvert_fasta_file(snp_alignment_filename, snp_alignment_filename)
@@ -241,12 +253,58 @@ def parse_and_run(input_args, program_description=""):
         printer.print("\n*** Iteration " + str(i) + " ***")
 
         # Define file names
+        alignment_suffix = get_alignment_suffix(current_tree_builder)
         if i == 1:
             previous_tree_name = input_args.starting_tree
             alignment_filename = base_filename + alignment_suffix
+            pre_process_snp_fasta = PreProcessFasta(snp_alignment_filename,
+                                                    input_args.verbose,
+                                                    0.0)
+            all_snp_alignment_length = pre_process_snp_fasta.get_alignment_length()
+            current_alignment_length = all_snp_alignment_length
+            os.symlink(os.path.join(current_directory,snp_alignment_filename),os.path.join(temp_working_dir,snp_alignment_filename))
         else:
             previous_tree_name = current_tree_name
             alignment_filename = previous_tree_name + alignment_suffix
+            pre_process_snp_fasta = PreProcessFasta(alignment_filename,
+                                                    input_args.verbose,
+                                                    0.0)
+            if alignment_suffix.endswith('phylip'):
+                current_alignment_length = pre_process_snp_fasta.get_alignment_length('phylip-relaxed')
+            else:
+                current_alignment_length = pre_process_snp_fasta.get_alignment_length()
+
+        # Initialise model fitter
+        model_fitter = return_algorithm(current_model_fitter,
+                                        current_recon_model,
+                                        input_args,
+                                        all_snp_alignment_length,
+                                        overall_alignment_length,
+                                        base_frequencies,
+                                        node_labels = internal_node_label_prefix,
+                                        extra = extra_model_arguments)
+        methods_log = update_methods_log(methods_log, method = model_fitter, step = 'Model fitter (1st iteration)')
+        # Initialise sequence reconstruction if MAR
+        if input_args.mar:
+            mar_base_frequencies = base_frequencies
+            if input_args.seq_recon == "iqtree":
+                mar_base_frequencies = [0.0,0.0,0.0,0.0]
+            sequence_reconstructor = return_algorithm(input_args.seq_recon,
+                                                      current_recon_model,
+                                                      input_args,
+                                                      all_snp_alignment_length,
+                                                      overall_alignment_length,
+                                                      mar_base_frequencies,
+                                                      node_labels = internal_node_label_prefix,
+                                                      extra = input_args.seq_recon_args)
+            methods_log = update_methods_log(methods_log, method = sequence_reconstructor, step = 'Sequence reconstructor (1st iteration)')
+            # Copy in phylip file used for RAxML MAR
+            if sequence_reconstructor.alignment_suffix.endswith('.phylip'):
+                recon_alignment_file = os.path.join(base_directory,temp_working_dir,os.path.basename(temp_alignment_filename) + '.phylip')
+                if not os.path.isfile(recon_alignment_file):
+                    os.symlink(os.path.abspath(base_filename) + '.phylip',
+                                        recon_alignment_file)
+
 
         # 1.1. Construct the tree-building command depending on the iteration and employed options
         if i == 2 or input_args.resume is not None:
@@ -258,46 +316,84 @@ def parse_and_run(input_args, program_description=""):
                 current_model = select_best_models(alignment_filename,
                                                     basename,
                                                     current_tree_builder,
+                                                    all_snp_alignment_length,
+                                                    overall_alignment_length,
+                                                    base_frequencies,
                                                     input_args)
                 input_args.model = current_model
                 if current_tree_builder != 'iqtree':
                     check_model_validity(current_model,current_tree_builder,input_args.mar,current_recon_model,current_model_fitter,custom_model, custom_recon_model)
                 printer.print("Phylogeny will be constructed with a " + current_model + " model")
             # Initialise tree builder
-            tree_builder = return_algorithm(current_tree_builder, current_model, input_args, node_labels = internal_node_label_prefix, extra = extra_tree_arguments)
-            alignment_suffix = tree_builder.alignment_suffix
+            tree_builder = return_algorithm(current_tree_builder,
+                                            current_model,
+                                            input_args,
+                                            current_alignment_length,
+                                            overall_alignment_length,
+                                            base_frequencies,
+                                            node_labels = internal_node_label_prefix,
+                                            extra = extra_tree_arguments)
             methods_log = update_methods_log(methods_log, method = tree_builder, step = 'Tree constructor (later iterations)')
             # Update date model (should not make a difference)
             if input_args.date is not None:
                 tree_dater.model = current_model
+        else:
+            # Initialise tree builder
+            tree_builder = return_algorithm(current_tree_builder,
+                                            current_model,
+                                            input_args,
+                                            current_alignment_length,
+                                            overall_alignment_length,
+                                            base_frequencies,
+                                            node_labels = internal_node_label_prefix,
+                                            extra = extra_tree_arguments)
+            alignment_suffix = tree_builder.alignment_suffix
+            methods_log = update_methods_log(methods_log, method = tree_builder, step = 'Tree constructor (1st iteration)')
 
         current_basename = basename + ".iteration_" + str(i)
         current_tree_name = current_basename + ".tre"
-        if previous_tree_name and input_args.first_tree_builder != "star":
-            tree_building_command = tree_builder.tree_building_command(
-                os.path.abspath(alignment_filename), os.path.abspath(previous_tree_name), current_basename)
-        else:
-            tree_building_command = tree_builder.tree_building_command(
-                os.path.abspath(alignment_filename), "", current_basename)
         built_tree = temp_working_dir + "/" + tree_builder.tree_prefix + current_basename + tree_builder.tree_suffix
+        if i == starting_iteration:
+            for fn in [gaps_alignment_filename, gaps_vcf_filename, joint_sequences_filename, alignment_filename, base_filename + ".start"]:
+                target_file = os.path.join(base_directory,temp_working_dir,os.path.basename(fn))
+                if not os.path.isfile(target_file):
+                    os.symlink(os.path.abspath(fn), target_file)
+        else:
+            for fn in [previous_tree_name, alignment_filename]:
+                target_path = os.path.join(base_directory,temp_working_dir,os.path.basename(fn))
+                if os.path.isfile(target_path):
+                    os.remove(target_path)
+                os.symlink(os.path.abspath(fn),
+                                        target_path)
 
         # 1.2. Construct the phylogenetic tree
         if input_args.starting_tree is not None and i == 1:
             printer.print("\nCopying the starting tree...")
             shutil.copyfile(input_args.starting_tree, current_tree_name)
         else:
-
-            printer.print(["\nConstructing the phylogenetic tree with " + tree_builder.executable + "...",
-                           tree_building_command])
             if current_tree_builder == "star":
+                tree_building_command = tree_builder.tree_building_command(
+                            os.path.abspath(alignment_filename), "", current_basename)
                 # Move star tree into temp dir
                 shutil.move(tree_builder.tree_prefix + current_basename + tree_builder.tree_suffix,
                             built_tree)
             else:
                 try:
+                    # Prepare tmp directory for tree building and sequence reconstruction
                     os.chdir(temp_working_dir)
+                    printer.print("Entering working directory " + temp_working_dir)
+                    if previous_tree_name:
+                        tree_building_command = tree_builder.tree_building_command(
+                            os.path.abspath(alignment_filename), os.path.abspath(previous_tree_name), current_basename)
+                    else:
+                        tree_building_command = tree_builder.tree_building_command(
+                            os.path.abspath(alignment_filename), "", current_basename)
+                    printer.print(["\nConstructing the phylogenetic tree with " + tree_builder.executable + "...",
+                           tree_building_command])
                     subprocess.check_call(tree_building_command, shell=True)
-                except subprocess.SubprocessError:
+                except subprocess.SubprocessError as e:
+                    if e.output is not None:
+                        sys.stderr.write('Error: ' + e.output + '\n')
                     sys.exit("Failed while building the tree.")
                 os.chdir(current_directory)
             shutil.copyfile(built_tree, current_tree_name)
@@ -313,7 +409,7 @@ def parse_and_run(input_args, program_description=""):
 
         # 3.1. Construct the command for ancestral state reconstruction depending on the iteration and employed options
         ancestral_sequence_basename = current_basename + ".internal"
-        current_tree_name_with_internal_nodes = current_tree_name + ".internal"
+        current_tree_name_with_internal_nodes = os.path.join(temp_working_dir, current_tree_name + ".internal")
 
         if not input_args.mar:
         
@@ -329,14 +425,18 @@ def parse_and_run(input_args, program_description=""):
                                                             get_base_patterns(base_filename,
                                                                                 input_args.verbose,
                                                                                 threads = input_args.threads)
+                for fn in [sequence_names_fn, base_patterns_fn]:
+                        os.symlink(os.path.join(base_directory,fn),
+                                os.path.join(base_directory,temp_working_dir,os.path.basename(fn)))
                 # 3.3b. Record in methods log (just once)
                 pyjar_method = Pyjar(current_model)
                 methods_log = update_methods_log(methods_log, method = pyjar_method, step = 'Sequence reconstructor')
 
             # 3.4a. Re-fit full polymorphism alignment to new tree
+            os.chdir(temp_working_dir)
             model_fitting_command = model_fitter.model_fitting_command(snp_alignment_filename,
-                                                                os.path.abspath(temp_rooted_tree),
-                                                                temp_working_dir + '/' + current_basename)
+                                                                        os.path.abspath(temp_rooted_tree),
+                                                                        temp_working_dir + '/' + current_basename)
             printer.print(["\nFitting substitution model to tree...", model_fitting_command])
             try:
                 subprocess.check_call(model_fitting_command, shell = True)
@@ -364,10 +464,10 @@ def parse_and_run(input_args, program_description=""):
                     # If this fails, continue to generate rest of output
                     sys.stderr.write("Unable to use time calibrated tree for sequence reconstruction in "
                     " iteration " + str(i))
-            else:
-                # Set root of reconstruction tree to match that of the current tree
-                # Cannot just midpoint root both, because the branch lengths differ between them
-                harmonise_roots(recontree_filename, temp_rooted_tree, algorithm = model_fitter.name)
+
+            # Set root of reconstruction tree to match that of the current tree
+            # Cannot just midpoint root both, because the branch lengths differ between them
+            harmonise_roots(recontree_filename, temp_rooted_tree, algorithm = model_fitter.name)
             
             printer.print(["\nRunning joint ancestral reconstruction with pyjar"])
             jar(sequence_names = ordered_sequence_names, # complete polymorphism alignment
@@ -389,33 +489,41 @@ def parse_and_run(input_args, program_description=""):
                                                   temp_rooted_tree,
                                                   current_tree_name_with_internal_nodes,
                                                   "pyjar")
+            os.chdir(current_directory)
             printer.print(["\nDone transfer"])
             
         else:
 
             # 3.2b. Marginal ancestral reconstruction with RAxML, RAxML-NG or IQTree
-            sequence_reconstruction_command = sequence_reconstructor.internal_sequence_reconstruction_command(
-                os.path.abspath(base_filename + alignment_suffix), os.path.abspath(temp_rooted_tree),
-                ancestral_sequence_basename)
-            raw_internal_sequence_filename \
-                = temp_working_dir + "/" + sequence_reconstructor.asr_prefix \
-                + ancestral_sequence_basename + sequence_reconstructor.asr_suffix
-            processed_internal_sequence_filename = temp_working_dir + "/" + ancestral_sequence_basename + ".aln"
-            raw_internal_rooted_tree_filename \
-                = temp_working_dir + "/" + sequence_reconstructor.asr_tree_prefix \
-                + ancestral_sequence_basename + sequence_reconstructor.asr_tree_suffix
+            os.chdir(temp_working_dir)
+            sequence_reconstruction_command = \
+                  sequence_reconstructor.internal_sequence_reconstruction_command(
+                                              os.path.abspath(base_filename + alignment_suffix),
+                                              os.path.abspath(temp_rooted_tree),
+                                              ancestral_sequence_basename)
+            raw_internal_sequence_filename = os.path.join(base_directory,
+                                              temp_working_dir,
+                                              os.path.basename(sequence_reconstructor.asr_prefix + ancestral_sequence_basename + sequence_reconstructor.asr_suffix))
+            processed_internal_sequence_filename = os.path.join(base_directory,
+                                              temp_working_dir,
+                                              os.path.basename(ancestral_sequence_basename + ".aln"))
+                                              
+            raw_internal_rooted_tree_filename  = os.path.join(base_directory,
+                                              temp_working_dir,
+                                              os.path.basename(sequence_reconstructor.asr_tree_prefix + ancestral_sequence_basename + sequence_reconstructor.asr_tree_suffix))
 
             # 3.3b. Reconstruct the ancestral sequence
             printer.print(["\nReconstructing ancestral sequences with " + sequence_reconstructor.executable + "...",
                            sequence_reconstruction_command])
-            os.chdir(temp_working_dir)
             try:
                 subprocess.check_call(sequence_reconstruction_command, shell=True)
-            except subprocess.SubprocessError:
-                sys.exit("Failed while reconstructing the ancestral sequences.")
+            except subprocess.SubprocessError as e:
+                if e.output is None:
+                    e.output = 'Unspecified error'
+                sys.exit("Failed while reconstructing the ancestral sequences: " + e.output)
             os.chdir(current_directory)
             # 3.4b. Join ancestral sequences with given sequences
-            current_tree_name_with_internal_nodes = current_tree_name + ".internal"
+            current_tree_name_with_internal_nodes = os.path.join(temp_working_dir,current_tree_name + ".internal")
             sequence_reconstructor.convert_raw_ancestral_states_to_fasta(raw_internal_sequence_filename,
                                                                          processed_internal_sequence_filename)
             concatenate_fasta_files([snp_alignment_filename, processed_internal_sequence_filename],
@@ -453,11 +561,13 @@ def parse_and_run(input_args, program_description=""):
         gubbins_command = create_gubbins_command(
             gubbins_exec, gaps_alignment_filename, gaps_vcf_filename, current_tree_name,
             input_args.alignment_filename, input_args.min_snps, input_args.min_window_size, input_args.max_window_size,
-            input_args.p_value, input_args.trimming_ratio, input_args.extensive_search, input_args.threads)
+            input_args.p_value, input_args.trimming_ratio, input_args.extensive_search, tree_builder.invariant_site_correction, input_args.threads)
         printer.print(["\nRunning Gubbins to detect recombinations...", gubbins_command])
         try:
             subprocess.check_call(gubbins_command, shell=True)
-        except subprocess.SubprocessError:
+        except subprocess.SubprocessError as e:
+            if e.output is None:
+                print("Gubbins failed: " + e.output)
             sys.exit("Failed while running Gubbins. Please ensure you have enough free memory")
         printer.print("...done. Run time: {:.2f} s".format(time.time() - start_time))
         shutil.copyfile(current_tree_name, current_tree_name_with_internal_nodes)
@@ -492,8 +602,8 @@ def parse_and_run(input_args, program_description=""):
             bootstrap_command = tree_builder.bootstrapping_command(os.path.abspath(final_aln), os.path.abspath(current_tree_name), temp_working_dir + "/" + current_basename)
             try:
                 subprocess.check_call(bootstrap_command, shell=True)
-            except subprocess.SubprocessError:
-                sys.exit("Failed while running bootstrap analysis.")
+            except subprocess.SubprocessError as e:
+                sys.exit("Failed while running bootstrap analysis: " + e.output)
             transfer_bootstraps_to_tree(temp_working_dir + "/" + current_basename + ".tre.bootstrapped",
                                                     os.path.abspath(current_tree_name),
                                                     current_basename + ".tre.bootstrapped",
@@ -505,7 +615,13 @@ def parse_and_run(input_args, program_description=""):
             if current_tree_builder == "raxmlng":
                 bootstrap_utility = tree_builder
             else:
-                bootstrap_utility = return_algorithm("raxmlng", current_model, input_args, node_labels = "")
+                bootstrap_utility = return_algorithm("raxmlng",
+                                                      current_model,
+                                                      input_args,
+                                                      current_alignment_length,
+                                                      overall_alignment_length,
+                                                      base_frequencies,
+                                                      node_labels = "")
             # Generate alignments for bootstrapping if FastTree being used
             if current_tree_builder == "fasttree":
                 bootstrap_aln = generate_bootstrap_alignments(bootstrap_aln,
@@ -515,8 +631,10 @@ def parse_and_run(input_args, program_description=""):
             bootstrap_command = tree_builder.bootstrapping_command(os.path.abspath(bootstrap_aln), os.path.abspath(current_tree_name), current_basename, os.path.abspath(temp_working_dir))
             try:
                 subprocess.check_call(bootstrap_command, shell=True)
-            except subprocess.SubprocessError:
-                sys.exit("Failed while running bootstrap analysis.")
+            except subprocess.SubprocessError as e:
+                if e.output is None:
+                    e.output = 'Unspecified error'
+                sys.exit("Failed while running bootstrap analysis: " + e.output)
             # Annotate the final tree using the bootstraps
             bootstrapped_trees_file = tree_builder.get_bootstrapped_trees_file(temp_working_dir,current_basename)
             annotation_command = bootstrap_utility.annotate_tree_using_bootstraps_command(os.path.abspath(final_aln),
@@ -527,8 +645,10 @@ def parse_and_run(input_args, program_description=""):
                                                                                             transfer = input_args.transfer_bootstrap)
             try:
                 subprocess.check_call(annotation_command, shell=True)
-            except subprocess.SubprocessError:
-                sys.exit("Failed while annotating final tree with bootstrapping results.")
+            except subprocess.SubprocessError as e:
+                if e.output is None:
+                    e.output = 'Unspecified error'
+                sys.exit("Failed while annotating final tree with bootstrapping results: " + e.output)
         printer.print("...done. Run time: {:.2f} s".format(time.time() - start_time))
 
     # 7. Run node branch support analysis if requested
@@ -539,8 +659,10 @@ def parse_and_run(input_args, program_description=""):
                                                 os.path.abspath(temp_working_dir))
         try:
             subprocess.check_call(sh_test_command, shell=True)
-        except subprocess.SubprocessError:
-            sys.exit("Failed while running SH test.")
+        except subprocess.SubprocessError as e:
+            if e.output is None:
+                e.output = 'Unspecified error'
+            sys.exit("Failed while running SH test: " + e.output)
         reformat_sh_support(current_tree_name,
                             os.path.abspath(temp_working_dir),
                             current_tree_name,
@@ -557,15 +679,18 @@ def parse_and_run(input_args, program_description=""):
                                                     outgroup = input_args.outgroup)
         try:
             subprocess.check_call(dating_command, shell=True)
-        except subprocess.SubprocessError:
+        except subprocess.SubprocessError as e:
             # If this fails, continue to generate rest of output
-            sys.stderr.write("Failed running tree time calibration with LSD.")
+            if e.output is None:
+                e.output = 'Unspecified error'
+            sys.stderr.write("Failed running tree time calibration with LSD: " + e.output)
             input_args.date = None
 
     # Create the final output
     printer.print("\nCreating the final output...")
     if input_args.prefix is None:
         input_args.prefix = basename
+    shutil.copyfile(os.path.join(temp_working_dir, current_tree_name + ".internal"), current_tree_name + ".internal")
     output_filenames_to_final_filenames = translation_of_filenames_to_final_filenames(
         current_tree_name, input_args.prefix)
     utils.rename_files(output_filenames_to_final_filenames)
@@ -743,18 +868,63 @@ def return_algorithm_choices(args,i):
     # Return choices
     return current_tree_builder, current_model_fitter, current_model, current_recon_model, extra_tree_arguments, extra_recon_arguments, custom_model, current_recon_model
 
-def return_algorithm(algorithm_choice, model, input_args, node_labels = None, extra = None):
+def return_algorithm(algorithm_choice, model, input_args, snp_aln_length, complete_aln_length, base_frequencies, node_labels = None, extra = None):
     initialised_algorithm = None
+    constant_sites = (complete_aln_length - snp_aln_length)
+    constant_base_counts = [int(b*constant_sites) for b in base_frequencies]
     if algorithm_choice == "fasttree":
-        initialised_algorithm = FastTree(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, verbose = input_args.verbose, additional_args = extra)
+        initialised_algorithm = FastTree(threads = input_args.threads,
+                                          model = model,
+                                          seed = input_args.seed,
+                                          bootstrap = input_args.bootstrap,
+                                          verbose = input_args.verbose,
+                                          additional_args = extra)
+    elif algorithm_choice == "veryfasttree":
+        initialised_algorithm = VeryFastTree(threads = input_args.threads,
+                                              model = model,
+                                              seed = input_args.seed,
+                                              bootstrap = input_args.bootstrap,
+                                              verbose = input_args.verbose,
+                                              additional_args = extra)
     elif algorithm_choice == "raxml":
-        initialised_algorithm = RAxML(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, additional_args = extra)
+        initialised_algorithm = RAxML(threads = input_args.threads,
+                                      model = model,
+                                      constant_base_counts = constant_base_counts,
+                                      partition_length = snp_aln_length,
+                                      seed = input_args.seed,
+                                      bootstrap = input_args.bootstrap,
+                                      internal_node_prefix = node_labels,
+                                      verbose = input_args.verbose,
+                                      invariant_site_correction = input_args.invariant_site_correction,
+                                      additional_args = extra)
     elif algorithm_choice == "raxmlng":
-        initialised_algorithm = RAxMLNG(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, additional_args = extra)
+        initialised_algorithm = RAxMLNG(threads = input_args.threads,
+                                        model = model,
+                                        seed = input_args.seed,
+                                        constant_base_counts = constant_base_counts,
+                                        bootstrap = input_args.bootstrap,
+                                        internal_node_prefix = node_labels,
+                                        verbose = input_args.verbose,
+                                        invariant_site_correction = input_args.invariant_site_correction,
+                                        additional_args = extra)
     elif algorithm_choice == "iqtree":
-        initialised_algorithm = IQTree(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, use_best = (model is None and input_args.best_model), additional_args = extra)
+        initialised_algorithm = IQTree(threads = input_args.threads,
+                                        model = model,
+                                        seed = input_args.seed,
+                                        invariant_proportion = constant_sites/complete_aln_length,
+                                        constant_base_counts = constant_base_counts,
+                                        bootstrap = input_args.bootstrap,
+                                        internal_node_prefix = node_labels,
+                                        verbose = input_args.verbose,
+                                        use_best = (model is None and input_args.best_model),
+                                        invariant_site_correction = input_args.invariant_site_correction,
+                                        additional_args = extra)
     elif algorithm_choice == "rapidnj":
-        initialised_algorithm = RapidNJ(threads = input_args.threads, model = model, bootstrap = input_args.bootstrap, verbose = input_args.verbose, additional_args = extra)
+        initialised_algorithm = RapidNJ(threads = input_args.threads,
+                                        model = model,
+                                        bootstrap = input_args.bootstrap,
+                                        verbose = input_args.verbose,
+                                        additional_args = extra)
     elif algorithm_choice == "star":
         initialised_algorithm = Star()
     else:
@@ -762,9 +932,13 @@ def return_algorithm(algorithm_choice, model, input_args, node_labels = None, ex
         sys.exit()
     return initialised_algorithm
 
-def select_best_models(snp_alignment_filename,basename,current_tree_builder,input_args):
+def select_best_models(snp_alignment_filename,basename,current_tree_builder,all_snp_alignment_length,complete_aln_length,base_frequencies,input_args):
+    constant_sites = (complete_aln_length - all_snp_alignment_length)
+    constant_base_counts = [int(b*constant_sites) for b in base_frequencies]
     model_tester = IQTree(threads = input_args.threads,
                             model = 'GTR',
+                            invariant_proportion = constant_sites/complete_aln_length,
+                            constant_base_counts = constant_base_counts,
                             verbose = input_args.verbose
                     )
     model_test_command = model_tester.run_model_comparison(snp_alignment_filename,basename)
@@ -798,27 +972,17 @@ def select_best_models(snp_alignment_filename,basename,current_tree_builder,inpu
 
 def create_gubbins_command(gubbins_exec, alignment_filename, vcf_filename, current_tree_name,
                            original_alignment_filename, min_snps, min_window_size, max_window_size,
-                           p_value, trimming_ratio, extensive_search, num_threads):
+                           p_value, trimming_ratio, extensive_search, invariant_site_correction, num_threads):
     command = [gubbins_exec, "-r", "-v", vcf_filename, "-a", str(min_window_size),
                "-b", str(max_window_size), "-f", original_alignment_filename, "-t", current_tree_name,
                "-m", str(min_snps), "-p", str(p_value), "-i", str(trimming_ratio), "-n", str(num_threads)]
     if extensive_search:
-            command.append("-x")
+        command.append("-x")
+    if not invariant_site_correction:
+        command.append("-s")
     command.append(alignment_filename)
     return " ".join(command)
 
-def number_of_sequences_in_alignment(filename):
-    return len(get_sequence_names_from_alignment(filename))
-
-
-def get_sequence_names_from_alignment(filename):
-    sequence_names = []
-    with open(filename, "r") as handle:
-        for record in SeqIO.parse(handle, "fasta"):
-            sequence_names.append(record.id)
-    return sequence_names
-
-
 def is_starting_tree_valid(starting_tree):
     try:
         Phylo.read(starting_tree, 'newick')
@@ -919,7 +1083,6 @@ def reroot_tree_with_outgroup(tree_name, outgroups):
     clade_outgroups = get_monophyletic_outgroup(tree_name, outgroups)
     tree = dendropy.Tree.get_from_path(tree_name, 'newick', preserve_underscores=True)
     outgroup_mrca = tree.mrca(taxon_labels=clade_outgroups)
-    print('Edge length is: ' + str(outgroup_mrca.edge.length))
     tree.reroot_at_edge(outgroup_mrca.edge,
                         length1 = outgroup_mrca.edge.length/2,
                         length2 = outgroup_mrca.edge.length/2,
@@ -1096,6 +1259,7 @@ def transfer_internal_node_labels_to_tree(source_tree_filename, destination_tree
                                                                                     destination_tree,
                                                                                     is_bipartitions_updated=False)
     if len(missing_bipartitions) > 0:
+        sys.stderr.write('Problem comparing the tree files ' + source_tree_filename + ' and ' + destination_tree_filename + '\n')
         sys.stderr.write('Bipartitions missing when transferring node labels: ' + str([str(x) for x in missing_bipartitions]))
         sys.exit(1)
     
@@ -1395,3 +1559,12 @@ def translation_of_dating_filenames_to_final_filenames(temp_working_dir,
         os.path.join(temp_working_dir,basename + '.timetree.nwk'): prefix + '.final_tree.timetree.tre'
     }
     return dating_files
+
+def get_alignment_suffix(treebuilder_name):
+    suffix = None
+    if (treebuilder_name in suffix_dict):
+        suffix = suffix_dict[treebuilder_name]
+    else:
+        sys.stderr.write('Unknown treebuilding method\n')
+        sys.exit(1)
+    return suffix


=====================================
python/gubbins/pyjar.py
=====================================
@@ -66,7 +66,7 @@ def read_tree(treefile):
 #Calculate Pij from Q matrix and branch length
 def calculate_pij(branch_length,rate_matrix):
     if branch_length==0:
-        pij = numpy.full((4,4), numpy.NINF, dtype = numpy.float32)
+        pij = numpy.full((4,4), -numpy.inf, dtype = numpy.float32)
         numpy.fill_diagonal(pij, 0.0)
     else:
         pij = numpy.array(numpy.log(linalg.expm(numpy.multiply(branch_length,rate_matrix))), dtype = numpy.float32) # modified
@@ -152,6 +152,30 @@ def read_info(infofile, type = 'raxml'):
                     f = [0.25,0.25,0.25,0.25]
                     r = [1.0,1.0,1.0,1.0,1.0,1.0]
             elif type == 'iqtree':
+                # Base frequencies
+                words=line.split()
+                if "pi(A)" in line:
+                    f[0] = float(words[2])
+                elif "pi(C)" in line:
+                    f[1] = float(words[2])
+                elif "pi(G)" in line:
+                    f[2] = float(words[2])
+                elif "pi(T)" in line:
+                    f[3] = float(words[2])
+                # Rate estimates
+                if "A-C:" in line:
+                    r[0] = float(words[1])
+                elif "A-G:" in line:
+                    r[1] = float(words[1])
+                elif "A-T:" in line:
+                    r[2] = float(words[1])
+                elif "C-G:" in line:
+                    r[3] = float(words[1])
+                elif "C-T:" in line:
+                    r[4] = float(words[1])
+                elif "G-T:" in line:
+                    r[5] = float(words[1])
+            
                 if line.startswith('Base frequencies:'):
                     words=line.split()
                     f=[float(words[3]), float(words[5]), float(words[7]), float(words[9])]
@@ -393,7 +417,7 @@ def iterate_over_base_patterns(column,
     Cmat_null = numpy.array([0,1,2,3], dtype = numpy.uint8)
         
     # Reset matrices
-    Lmat.fill(numpy.NINF)
+    Lmat.fill(-numpy.inf)
     Cmat[:] = Cmat_null
 
     # Count unknown bases
@@ -605,7 +629,7 @@ def reconstruct_alignment_column(column_index,
     
     # Generate data structures for reconstructions
     num_nodes = len(tree.nodes())
-    Lmat = numpy.full((num_nodes,4), numpy.NINF, dtype = numpy.float32)
+    Lmat = numpy.full((num_nodes,4), -numpy.inf, dtype = numpy.float32)
     Cmat = numpy.full((num_nodes,4), [0,1,2,3], dtype = numpy.uint8)
     reconstructed_base_indices = numpy.full(num_nodes, 8, dtype = numpy.uint8)
 
@@ -711,7 +735,7 @@ def jar(sequence_names = None,
     child_nodes_array = numpy.empty(num_nodes, dtype=object)
     leaf_node_list = []
     node_labels = numpy.empty(num_nodes, dtype=object)
-    node_pij = numpy.full((num_nodes,16), numpy.NINF, dtype=numpy.float32)
+    node_pij = numpy.full((num_nodes,16), -numpy.inf, dtype=numpy.float32)
     postordered_nodes = numpy.arange(num_nodes, dtype=numpy.int32)
     seed_node = None
     seed_node_edge_truncation = True


=====================================
python/gubbins/run_gubbins.py
=====================================
@@ -58,13 +58,13 @@ def parse_input_args():
     treeGroup = parser.add_argument_group('Tree building options')
     treeGroup.add_argument('--tree-builder',    '-t', help='Application to use for tree building',
                                                       default='raxml',
-                                                      choices=['raxml', 'raxmlng', 'iqtree', 'iqtree-fast', 'fasttree', 'hybrid', 'rapidnj'])
+                                                      choices=['raxml', 'raxmlng', 'iqtree', 'iqtree-fast', 'fasttree', 'hybrid', 'rapidnj','veryfasttree'])
     treeGroup.add_argument('--tree-args',             help='Quoted string of further arguments passed to tree building algorithm'
                                                       ' (start string with a space if there is a risk of being interpreted as a flag)',
                                                       default = None)
     treeGroup.add_argument('--first-tree-builder',    help='Application to use for building the first tree',
                                                       default=None,
-                                                      choices=['raxml', 'raxmlng', 'iqtree', 'iqtree-fast', 'fasttree', 'rapidnj', 'star'])
+                                                      choices=['raxml', 'raxmlng', 'iqtree', 'iqtree-fast', 'fasttree', 'rapidnj', 'star','veryfasttree'])
     treeGroup.add_argument('--first-tree-args',       help='Further arguments passed to first tree building algorithm',
                                                       default = None)
     treeGroup.add_argument('--outgroup',        '-o', help='Outgroup name for rerooting. A list of comma separated '
@@ -78,6 +78,9 @@ def parse_input_args():
                                                       action = 'store_true')
     treeGroup.add_argument('--seed',                  help='Set seed for reproducibility of analysis',
                                                       default = None, type = int)
+    treeGroup.add_argument('--invariant-site-correction',
+                                                      help='Correct for invariant sites',
+                                                      default = False, action = 'store_true')
                                                           
     modelGroup = parser.add_argument_group('Nucleotide substitution model options')
     modelGroup.add_argument('--model',          '-M', help='Nucleotide substitution model (not all available for all '


=====================================
python/gubbins/tests/test_alignment_python_methods.py
=====================================
@@ -9,6 +9,7 @@ import unittest
 import filecmp
 import os
 from gubbins import common
+from gubbins.PreProcessFasta import PreProcessFasta
 
 modules_dir = os.path.dirname(os.path.abspath(common.__file__))
 data_dir = os.path.join(modules_dir, 'tests', 'data')
@@ -16,11 +17,20 @@ data_dir = os.path.join(modules_dir, 'tests', 'data')
 
 class TestAlignmentMethods(unittest.TestCase):
 
-    def test_number_of_sequences_in_alignment(self):
-        assert common.number_of_sequences_in_alignment(os.path.join(data_dir, 'small_alignment.aln')) == 5
+    def test_length_of_alignment(self):
+        preprocessfasta = PreProcessFasta(os.path.join(data_dir, 'small_alignment.aln'))
+        overall_alignment_length, sequence_names_in_alignment, base_frequencies = preprocessfasta.get_alignment_information()
+        assert overall_alignment_length == 4
+
+    def test_sequence_composition(self):
+        preprocessfasta = PreProcessFasta(os.path.join(data_dir, 'small_alignment.aln'))
+        overall_alignment_length, sequence_names_in_alignment, base_frequencies = preprocessfasta.get_alignment_information()
+        assert base_frequencies == [0.4,0.2,0.2,0.2]
 
     def test_get_sequence_names_from_alignment(self):
-        assert common.get_sequence_names_from_alignment(os.path.join(data_dir, 'small_alignment.aln')) == \
+        preprocessfasta = PreProcessFasta(os.path.join(data_dir, 'small_alignment.aln'))
+        overall_alignment_length, sequence_names_in_alignment, base_frequencies = preprocessfasta.get_alignment_information()
+        assert sequence_names_in_alignment == \
                ['sequence1', 'sequence2', 'sequence3', 'sequence4', 'sequence5']
 
     def test_reinsert_gaps_into_fasta_file(self):


=====================================
python/gubbins/tests/test_dependencies.py
=====================================
@@ -91,17 +91,40 @@ class TestExternalDependencies(unittest.TestCase):
         self.cleanup('multiple_recombinations')
         assert exit_code == 0
 
+    def test_veryfasttree(self):
+        exit_code = 1
+        parser = run_gubbins.parse_input_args()
+        common.parse_and_run(parser.parse_args(["--tree-builder", "veryfasttree",
+                                                    "--verbose", "--iterations", "3",
+                                                    "--threads", "1",
+                                                    os.path.join(data_dir, 'multiple_recombinations.aln')]))
+        exit_code = self.check_for_output_files('multiple_recombinations')
+        self.cleanup('multiple_recombinations')
+        assert exit_code == 0
+
+    def test_veryfasttree_seed(self):
+        exit_code = 1
+        parser = run_gubbins.parse_input_args()
+        common.parse_and_run(parser.parse_args(["--tree-builder", "veryfasttree",
+                                                    "--verbose", "--iterations", "3",
+                                                    "--seed","42",
+                                                    "--threads", "1",
+                                                    os.path.join(data_dir, 'multiple_recombinations.aln')]))
+        exit_code = self.check_for_output_files('multiple_recombinations')
+        self.cleanup('multiple_recombinations')
+        assert exit_code == 0
+
     # Test resuming a default analysis
-    def test_fasttree_resume(self):
+    def test_veryfasttree_resume(self):
         exit_code = 1
         parser = run_gubbins.parse_input_args()
-        common.parse_and_run(parser.parse_args(["--tree-builder", "fasttree",
+        common.parse_and_run(parser.parse_args(["--tree-builder", "veryfasttree",
                                                     "--verbose", "--iterations", "3",
                                                     "--threads", "1",
                                                     "--no-cleanup",
                                                     "--prefix", "original",
                                                     os.path.join(data_dir, 'multiple_recombinations.aln')]))
-        common.parse_and_run(parser.parse_args(["--tree-builder", "fasttree",
+        common.parse_and_run(parser.parse_args(["--tree-builder", "veryfasttree",
                                             "--verbose", "--iterations", "3",
                                             "--resume", "multiple_recombinations.iteration_1.tre",
                                             "--threads", "1",
@@ -635,6 +658,7 @@ class TestExternalDependencies(unittest.TestCase):
         parser = run_gubbins.parse_input_args()
         common.parse_and_run(parser.parse_args(["--tree-builder", "raxml",
                                                     "--verbose", "--iterations", "3",
+                                                    "--model","HKY",
                                                     "--bootstrap","10",
                                                     "--threads", "1",
                                                     os.path.join(data_dir, 'bootstrapping_test.aln')]))
@@ -647,6 +671,7 @@ class TestExternalDependencies(unittest.TestCase):
         parser = run_gubbins.parse_input_args()
         common.parse_and_run(parser.parse_args(["--tree-builder", "iqtree",
                                                     "--verbose", "--iterations", "3",
+                                                    "--model","HKY",
                                                     "--bootstrap","1000",
                                                     "--threads", "1",
                                                     os.path.join(data_dir, 'bootstrapping_test.aln')]))
@@ -671,6 +696,7 @@ class TestExternalDependencies(unittest.TestCase):
         exit_code = 1
         parser = run_gubbins.parse_input_args()
         common.parse_and_run(parser.parse_args(["--tree-builder", "raxmlng",
+                                                    "--model","HKY",
                                                     "--verbose", "--iterations", "3",
                                                     "--bootstrap","100",
                                                     "--threads", "1",
@@ -695,6 +721,7 @@ class TestExternalDependencies(unittest.TestCase):
         exit_code = 1
         parser = run_gubbins.parse_input_args()
         common.parse_and_run(parser.parse_args(["--tree-builder", "fasttree",
+                                                    "--model","JC",
                                                     "--model-fitter", "raxml",
                                                     "--verbose", "--iterations", "3",
                                                     "--bootstrap","10",
@@ -708,6 +735,7 @@ class TestExternalDependencies(unittest.TestCase):
         exit_code = 1
         parser = run_gubbins.parse_input_args()
         common.parse_and_run(parser.parse_args(["--tree-builder", "raxmlng",
+                                                    "--model","HKY",
                                                     "--verbose", "--iterations", "3",
                                                     "--bootstrap","100",
                                                     "--threads", "1",
@@ -721,6 +749,7 @@ class TestExternalDependencies(unittest.TestCase):
         exit_code = 1
         parser = run_gubbins.parse_input_args()
         common.parse_and_run(parser.parse_args(["--tree-builder", "raxml",
+                                                    "--model","HKY",
                                                     "--verbose", "--iterations", "3",
                                                     "--bootstrap","10",
                                                     "--threads", "1",
@@ -734,6 +763,7 @@ class TestExternalDependencies(unittest.TestCase):
         exit_code = 1
         parser = run_gubbins.parse_input_args()
         common.parse_and_run(parser.parse_args(["--tree-builder", "iqtree",
+                                                    "--model","HKY",
                                                     "--verbose", "--iterations", "3",
                                                     "--bootstrap","1000",
                                                     "--threads", "1",
@@ -749,6 +779,7 @@ class TestExternalDependencies(unittest.TestCase):
         parser = run_gubbins.parse_input_args()
         common.parse_and_run(parser.parse_args(["--tree-builder", "iqtree",
                                                     "--verbose", "--iterations", "3",
+                                                    "--model","HKY",
                                                     "--threads", "1",
                                                     "--sh-test",
                                                     os.path.join(data_dir, 'bootstrapping_test.aln')]))
@@ -761,6 +792,7 @@ class TestExternalDependencies(unittest.TestCase):
         parser = run_gubbins.parse_input_args()
         common.parse_and_run(parser.parse_args(["--tree-builder", "raxml",
                                                     "--verbose", "--iterations", "3",
+                                                    "--model","HKY",
                                                     "--threads", "1",
                                                     "--sh-test",
                                                     os.path.join(data_dir, 'bootstrapping_test.aln')]))
@@ -845,12 +877,9 @@ class TestExternalDependencies(unittest.TestCase):
         regex_to_remove = prefix + ".*"
         for file in glob.glob(regex_to_remove):
             os.remove(file)
-        tmp_to_remove = "./tmp*/*"
-        for file in glob.glob(tmp_to_remove):
-            os.remove(file)
         for dir in glob.glob("./tmp*"):
             if os.path.isdir(dir):
-                os.rmdir(dir)
+                shutil.rmtree(dir)
 
 if __name__ == "__main__":
     unittest.main(buffer=True)


=====================================
python/gubbins/tests/test_tree_methods.py
=====================================
@@ -40,6 +40,10 @@ class TestTreeMethods(unittest.TestCase):
         algorithm = treebuilders.RAxMLNG()
         assert algorithm.name == 'RAxMLNG'
 
+    def initialise_VeryFastTree(self):
+        algorithm = treebuilders.VeryFastTree()
+        assert algorithm.name == 'VeryFastTree'
+
     def test_robinson_foulds_distance(self):
         # two tree with different distances
         assert common.robinson_foulds_distance(os.path.join(data_dir, 'robinson_foulds_distance_tree1.tre'),


=====================================
python/gubbins/tests/test_utils.py
=====================================
@@ -20,7 +20,7 @@ data_dir = os.path.join(modules_dir, 'tests', 'data')
 class TestUtilities(unittest.TestCase):
 
     def test_gubbins_command(self):
-        assert common.create_gubbins_command('AAA', 'BBB', 'CCC', 'DDD', 'EEE', 5, 10, 200, 0.05, 1.0, 0, 1) \
+        assert common.create_gubbins_command('AAA', 'BBB', 'CCC', 'DDD', 'EEE', 5, 10, 200, 0.05, 1.0, 0, 1, 1) \
                == 'AAA -r -v CCC -a 10 -b 200 -f EEE -t DDD -m 5 -p 0.05 -i 1.0 -n 1 BBB'
 
     def test_translation_of_filenames_to_final_filenames(self):


=====================================
python/gubbins/treebuilders.py
=====================================
@@ -40,6 +40,7 @@ class Star:
         self.model = "-"
         self.version = "unspecified"
         self.citation = "no citation"
+        self.invariant_site_correction = False
     
     def tree_building_command(self, alignment_filename: str, input_tree: str, basename: str) -> str:
         # Extract taxon names from alignment
@@ -50,8 +51,8 @@ class Star:
 
         # Write tree
         star_tree_string = "("
-        star_tree_string = star_tree_string + ':0.9,'.join(taxon_names)
-        star_tree_string = star_tree_string + ':1);' # mid point rooting fails with equidistant taxa
+        star_tree_string = star_tree_string + ':0.09,'.join(taxon_names)
+        star_tree_string = star_tree_string + ':0.1);' # mid point rooting fails with equidistant taxa
 
         # Print to file
         output_tree = basename + self.tree_suffix
@@ -72,6 +73,7 @@ class RapidNJ:
         self.model = model
         self.additional_args = additional_args
         self.bootstrap = bootstrap
+        self.invariant_site_correction = False
 
         # Construct command
         self.executable = "rapidnj"
@@ -141,6 +143,7 @@ class FastTree:
         self.bootstrap = bootstrap
         self.additional_args = additional_args
         self.seed = utils.set_seed(seed)
+        self.invariant_site_correction = False
 
         # Identify executable
         self.potential_executables = ["FastTreeMP","fasttreeMP","FastTree", "fasttree"]
@@ -156,6 +159,7 @@ class FastTree:
         # Function for returning base command
         command = [self.executable]
         command.extend(["-nt"])
+        command.extend(["-rawdist"]) # https://morgannprice.github.io/fasttree/
         if self.model == 'JC':
             command.extend(["-nocat"])
         elif self.model == 'GTR':
@@ -259,7 +263,7 @@ class FastTree:
 class IQTree:
     """Class for operations with the IQTree executable"""
 
-    def __init__(self, threads: 1, model: str, bootstrap = 0, seed = None, internal_node_prefix="", verbose=False, use_best=False, additional_args = None):
+    def __init__(self, threads: 1, model: str, bootstrap = 0, invariant_proportion = 0, constant_base_counts = [0.0,0.0,0.0,0.0], seed = None, internal_node_prefix="", verbose=False, use_best=False, invariant_site_correction = True, additional_args = None):
         """Initialises the object"""
         self.verbose = verbose
         self.threads = threads
@@ -276,6 +280,7 @@ class IQTree:
         self.use_best = use_best
         self.seed = utils.set_seed(seed)
         self.additional_args = additional_args
+        self.invariant_site_correction = (invariant_site_correction if max(constant_base_counts) > 0 else False)
     
         # Construct base command
         self.executable = "iqtree"
@@ -291,23 +296,33 @@ class IQTree:
         # Set parallelisation
         command.extend(["-T", str(self.threads)])
 
-        # Add flags
+        # Define model
         command.extend(["-safe","-redo"])
         if self.use_best:
             pass
-        elif self.model == 'JC':
-            command.extend(["-m", "JC"])
-        elif self.model == 'K2P':
-            command.extend(["-m", "K2P"])
-        elif self.model == 'HKY':
-            command.extend(["-m", "HKY"])
-        elif self.model == 'GTR':
-            command.extend(["-m","GTR"])
-        elif self.model == 'GTRGAMMA':
-            command.extend(["-m","GTR+G4"])
         else:
-            command.extend(["-m",self.model])
+            invariant_site_string = ""
+            if invariant_site_correction:
+                #invariant_site_string = "+I{" + str(invariant_proportion) + "}"
+                invariant_site_string = "+I"
+            if self.model == 'JC':
+                command.extend(["-m", "JC"+invariant_site_string])
+            elif self.model == 'K2P':
+                command.extend(["-m", "K2P"+invariant_site_string])
+            elif self.model == 'HKY':
+                command.extend(["-m", "HKY"+invariant_site_string])
+            elif self.model == 'GTR':
+                command.extend(["-m","GTR"+invariant_site_string])
+            elif self.model == 'GTRGAMMA':
+                command.extend(["-m","GTR+G4"+invariant_site_string])
+            else:
+                command.extend(["-m",self.model])
         command.extend(["-seed",self.seed])
+        
+        # Account for invariant sites
+        if invariant_site_correction:
+            command.extend(["-fconst",','.join([str(x) for x in constant_base_counts])])
+        
         # Additional arguments
         if self.additional_args is not None:
             command.extend([self.additional_args])
@@ -371,7 +386,7 @@ class IQTree:
 
     def get_info_filename(self, tmp: str, basename: str) -> str:
         """Returns the name of the file containing the fitted model parameters"""
-        fn = tmp + '/' + basename + '.log'
+        fn = tmp + '/' + basename + '.iqtree'
         return fn
     
     def get_recontree_filename(self, tmp: str, basename: str) -> str:
@@ -437,7 +452,7 @@ class IQTree:
 class RAxML:
     """Class for operations with the RAxML executable"""
 
-    def __init__(self, threads: 1, model='GTRCAT', bootstrap = 0, seed = None, internal_node_prefix="", verbose=False, additional_args = None):
+    def __init__(self, threads: 1, model='GTRCAT', constant_base_counts = [0,0,0,0], partition_length = 1, bootstrap = 0, seed = None, internal_node_prefix="", verbose=False, invariant_site_correction = True, additional_args = None):
         """Initialises the object"""
         self.verbose = verbose
         self.threads = threads
@@ -452,7 +467,10 @@ class RAxML:
         self.internal_node_prefix = internal_node_prefix
         self.bootstrap = bootstrap
         self.seed = utils.set_seed(seed)
+        self.constant_base_counts = constant_base_counts
+        self.partition_length = partition_length
         self.additional_args = additional_args
+        self.invariant_site_correction = (invariant_site_correction if max(constant_base_counts) > 0 else False)
 
         self.single_threaded_executables = ['raxmlHPC-AVX2', 'raxmlHPC-AVX', 'raxmlHPC-SSE3', 'raxmlHPC']
         self.multi_threaded_executables = ['raxmlHPC-PTHREADS-AVX2', 'raxmlHPC-PTHREADS-AVX',
@@ -473,18 +491,25 @@ class RAxML:
 
         # Add flags
         command.extend(["-safe"])
+        invariant_site_string = ""
+        model_string = "GTRGAMMA"
+        if self.invariant_site_correction:
+            model_string = "ASC_GTRGAMMA"
+            invariant_site_string = "--asc-corr=stamatakis"
         if self.model == 'JC':
-            command.extend(["-m", "GTRCAT","--JC69"])
+            command.extend(["-m", model_string,"--JC69",invariant_site_string])
         elif self.model == 'K2P':
-            command.extend(["-m", "GTRCAT","--K80"])
+            command.extend(["-m", model_string,"--K80",invariant_site_string])
         elif self.model == 'HKY':
-            command.extend(["-m", "GTRCAT","--HKY85"])
-        elif self.model == 'GTRCAT':
-            command.extend(["-m","GTRCAT", "-V"])
+            command.extend(["-m", model_string,"--HKY85",invariant_site_string])
         elif self.model == 'GTRGAMMA':
-            command.extend(["-m","GTRGAMMA"])
+            command.extend(["-m", model_string,invariant_site_string])
         else:
-            command.extend(["-m", self.model])
+            if self.model.startswith("ASC_"):
+                command.extend(["-m", self.model,"--asc-corr=stamatakis"])
+            else:
+                self.invariant_site_correction = False
+                command.extend(["-m", self.model])
         command.extend(["-p",self.seed])
         # Additional arguments
         if self.additional_args is not None:
@@ -503,10 +528,29 @@ class RAxML:
                 break
         return version
 
+    def generate_partition_files(self, command: list, basename: str) -> list:
+        """Generate the partition files enumerating invariant site counts"""
+        current_wd = os.getcwd()
+        if self.invariant_site_correction:
+            partitions_fn = 'invariant_sites.' + os.path.basename(basename) + '.partitions'
+            partition_fn = 'invariant_sites.' + os.path.basename(basename) + '.partition'
+            with open(partitions_fn,'w') as partitions_file:
+                partitions_file.write('[asc~' + partition_fn + '], ASC_DNA, p1=1-' + str(self.partition_length) + '\n')
+                partitions_file.flush()
+            with open(partition_fn,'w') as partition_file:
+                partition_file.write(' '.join([str(x) for x in self.constant_base_counts]) + '\n')
+                partition_file.flush()
+            command.extend(["-q", partitions_fn])
+        return command
+        
     def tree_building_command(self, alignment_filename: str, input_tree: str, basename: str) -> str:
         """Constructs the command to call the RAxML executable for tree building"""
         command = self.base_command.copy()
-        command.extend(["-f", "d", "-p", str(1)])
+        if self.invariant_site_correction:
+            # Write partition input files - https://cme.h-its.org/exelixis/resource/download/NewManual.pdf
+            command = self.generate_partition_files(command,basename)
+            # Complete command string
+            command.extend(["-f", "d", "-p", str(1)])
         command.extend(["-s", alignment_filename, "-n", basename])
         if input_tree:
             command.extend(["-t", input_tree])
@@ -517,6 +561,7 @@ class RAxML:
     def internal_sequence_reconstruction_command(self, alignment_filename: str, input_tree: str, basename: str) -> str:
         """Constructs the command to call the RAxML executable for ancestral sequence reconstruction"""
         command = self.base_command.copy()
+        command = self.generate_partition_files(command,basename)
         command.extend(["-f", "A", "-p", str(1)])
         command.extend(["-s", alignment_filename, "-n", basename])
         command.extend(["-t", input_tree])
@@ -572,6 +617,7 @@ class RAxML:
     def model_fitting_command(self, alignment_filename: str, input_tree: str, basename: str) -> str:
         """Fits a nucleotide substitution model to a tree and an alignment"""
         command = self.base_command.copy()
+        command = self.generate_partition_files(command,basename)
         command.extend(["-s", alignment_filename, "-n", os.path.basename(basename) + '_reconstruction', "-t", input_tree])
         command.extend(["-f e"])
         command.extend(["-w",os.path.dirname(basename)])
@@ -581,6 +627,7 @@ class RAxML:
         """Runs a bootstrapping analysis and annotates the nodes of a summary tree"""
         # Run bootstraps
         command = self.base_command.copy()
+        command = self.generate_partition_files(command,basename)
         command.extend(["-s", alignment_filename, "-n", basename + ".bootstrapped_trees"])
         command.extend(["-w",tmp])
         command.extend(["-x",self.seed])
@@ -594,6 +641,7 @@ class RAxML:
     def sh_test(self, alignment_filename: str, input_tree: str, basename: str, tmp: str) -> str:
         """Runs a single branch support test"""
         command = self.base_command.copy()
+        command = self.generate_partition_files(command,basename)
         command.extend(["-f", "J"])
         command.extend(["-s", alignment_filename, "-n", input_tree + ".sh_support"])
         command.extend(["-t", input_tree])
@@ -610,7 +658,7 @@ class RAxML:
 class RAxMLNG:
     """Class for operations with the RAxML executable"""
 
-    def __init__(self, threads: 1, model: str, bootstrap = 0, seed = None, internal_node_prefix = "", verbose = False, additional_args = None):
+    def __init__(self, threads: 1, model: str, constant_base_counts = [0,0,0,0], bootstrap = 0, seed = None, internal_node_prefix = "", verbose = False, invariant_site_correction = True, additional_args = None):
         """Initialises the object"""
         self.verbose = verbose
         self.threads = threads
@@ -625,7 +673,9 @@ class RAxMLNG:
         self.internal_node_prefix = internal_node_prefix
         self.bootstrap = bootstrap
         self.seed = utils.set_seed(seed)
+        self.constant_base_counts = constant_base_counts
         self.additional_args = additional_args
+        self.invariant_site_correction = (invariant_site_correction if max(constant_base_counts) > 0 else False)
 
         self.single_threaded_executables = ['raxml-ng']
         self.multi_threaded_executables = ['raxml-ng']
@@ -644,16 +694,19 @@ class RAxMLNG:
 
         # Add model
         command.extend(["--model"])
+        invariant_site_string = ""
+        if invariant_site_correction:
+            invariant_site_string = "+ASC_STAM{" + '/'.join([str(x) for x in constant_base_counts]) + "}"
         if self.model == 'JC':
-            command.extend(["JC"])
+            command.extend(["JC"+invariant_site_string])
         elif self.model == 'K2P':
-            command.extend(["K80"])
+            command.extend(["K80"+invariant_site_string])
         elif self.model == 'HKY':
-            command.extend(["HKY"])
+            command.extend(["HKY"+invariant_site_string])
         elif self.model == 'GTR':
-            command.extend(["GTR"])
+            command.extend(["GTR"+invariant_site_string])
         elif self.model == 'GTRGAMMA':
-            command.extend(["GTR+G"])
+            command.extend(["GTR+G"+invariant_site_string])
         else:
             command.extend([self.model])
         command.extend(["--seed",self.seed])
@@ -795,3 +848,130 @@ class RAxMLNG:
         """Return bootstrapped tree files name"""
         file_name = tmp + "/" + basename + ".raxml.bootstraps"
         return file_name
+
+class VeryFastTree:
+    """Class for operations with the VeryFastTree executable"""
+
+    def __init__(self, threads: int, bootstrap = 0, model='GTRCAT', seed = None, verbose = False, additional_args = None):
+        """Initialises the object"""
+        self.verbose = verbose
+        self.threads = threads
+        self.model = model
+        self.tree_prefix = ""
+        self.tree_suffix = ".tre"
+        self.alignment_suffix = ".snp_sites.aln"
+        self.bootstrap = bootstrap
+        self.additional_args = additional_args
+        self.seed = utils.set_seed(seed)
+        self.invariant_site_correction = False
+
+        # Identify executable
+        self.potential_executables = ["VeryFastTree", "veryfasttree"]
+        self.executable = utils.choose_executable(self.potential_executables)
+        if self.executable is None:
+            sys.exit("No usable version of VeryFastTree could be found.")
+
+        # Reproducibility
+        self.name = 'VeryFastTree'
+        self.version = self.get_version(self.executable)
+        self.citation = "https://doi.org/10.1093/gigascience/giae055"
+
+        # Function for returning base command
+        command = [self.executable]
+        command.extend(["-nt"])
+        command.extend(["-rawdist"]) # https://morgannprice.github.io/fasttree/
+        if self.model == 'JC':
+            command.extend(["-nocat"])
+        elif self.model == 'GTR':
+            command.extend(["-gtr","-nocat"])
+        elif self.model == 'GTRGAMMA':
+            command.extend(["-gtr","-gamma"])
+        elif self.model == 'GTRCAT':
+            command.extend(["-gtr"])
+        else:
+            command.extend([self.model])
+        command.extend(["-seed",self.seed])
+        # Set number threads
+        command.extend(["-threads",str(self.threads)])
+        # Additional arguments
+        if self.additional_args is not None:
+            command.extend([self.additional_args])
+        # Define final command
+        self.base_command = command
+
+    def get_version(self,exe) -> str:
+        """Gets the version of the tree building algorithm being used"""
+        version = "Not determined"
+        version_message = subprocess.run([exe], capture_output=True)
+        for line in version_message.stderr.decode().splitlines():
+            if line.startswith('VeryFastTree'):
+                info = line.split()
+                version = info[1]
+                break
+        return version
+
+    def tree_building_command(self, alignment_filename: str, input_tree: str, basename: str) -> str:
+        """Constructs the command to call the VeryFastTree executable"""
+        command = self.base_command.copy()
+        # N.B. version 4.0.4 - intree appears to cause a problem so is not used in command construction
+#        if input_tree:
+#            command.extend(["-intree", input_tree])
+        output_tree = basename + self.tree_suffix
+        command.extend(["-nosupport"])
+        command.extend(["-out", output_tree])
+        command.extend(["-log", basename + '.log'])
+        command.append(alignment_filename)
+        if not self.verbose:
+            command.extend([">", "/dev/null", "2>&1"])
+        return " ".join(command)
+    
+    def get_info_filename(self, tmp: str, basename: str) -> str:
+        """Returns the name of the file containing the fitted model parameters"""
+        fn = tmp + '/' + basename + '.log'
+        return fn
+    
+    def get_recontree_filename(self, tmp: str, basename: str) -> str:
+        """Returns the name of the tree generated by model fitting"""
+        fn = tmp + '/' + basename + '.treefile'
+        return fn
+
+    def model_fitting_command(self, alignment_filename: str, input_tree: str, basename: str) -> str:
+        """Fits a nucleotide substitution model to a tree and an alignment"""
+        command = self.base_command.copy()
+        command.extend(["-mllen","-nome"])
+        command.extend(["-nosupport"])
+        command.extend(["-intree",input_tree])
+        command.extend(["-log", basename + ".log"])
+        command.extend(["-out", basename + ".treefile"])
+        command.extend([alignment_filename])
+        return " ".join(command)
+        
+    def bootstrapping_command(self, alignment_filename: str, input_tree: str, basename: str, tmp: str) -> str:
+        """Runs a bootstrapping analysis and annotates the nodes of a summary tree"""
+        command = self.base_command.copy()
+        output_tree = basename + self.tree_suffix
+        command.extend(["-nosupport"])
+        command.extend(["-intree1",input_tree]) # http://www.microbesonline.org/fasttree/
+        command.extend(["-out", tmp + "/" + basename + ".bootstrapped_trees"])
+        command.extend(["-log", basename + ".log"])
+        command.extend(["-n", str(self.bootstrap)])
+        command.append(alignment_filename + ".bootstrapping.aln")
+        if not self.verbose:
+            command.extend([">", "/dev/null", "2>&1"])
+        return " ".join(command)
+    
+    def sh_test(self, alignment_filename: str, input_tree: str, basename: str, tmp: str) -> str:
+        """Runs a single branch support test"""
+        command = self.base_command.copy()
+        command.extend(["-mllen","-nome"])
+        command.extend(["-intree",input_tree])
+        command.extend(["-out",tmp + "/" + input_tree + ".sh_support"])
+        command.extend([alignment_filename])
+        if not self.verbose:
+            command.extend([">", "/dev/null", "2>&1"])
+        return " ".join(command)
+
+    def get_bootstrapped_trees_file(self, tmp: str, basename: str) -> str:
+        """Return bootstrapped tree files name"""
+        file_name = tmp + "/" + basename + ".bootstrapped_trees"
+        return file_name


=====================================
python/setup.py
=====================================
@@ -23,6 +23,7 @@ setuptools.setup(
         'scripts/generate_files_for_clade_analysis.py',
         'scripts/count_recombinations_per_gene.py',
         'scripts/extract_recombinant_sequences.py',
+        'scripts/extract_gubbins_clade_statistics.py',
         '../R/scripts/plot_gubbins.R'
     ],
     tests_require=[
@@ -33,7 +34,8 @@ setuptools.setup(
         "multiprocess >= 0.70",
         "scipy >= 1.5.3",
         "numpy >= 1.19",
-        "ska >= 1.0"
+        "ska >= 1.0",
+        "numba > 0.0"
     ],
     long_description="""\
       Gubbins is a tool that generates a reconstruction of
@@ -53,7 +55,8 @@ setuptools.setup(
         "dendropy  >= 4.0.2",
         "multiprocess >= 0.70",
         "scipy >= 1.5.3",
-        "numpy >= 1.19"
+        "numpy >= 1.19",
+        "numba > 0.0"
     ],
     license="GPL"
 )


=====================================
src/gubbins.c
=====================================
@@ -39,16 +39,16 @@
 // given a sample name extract the sequences from the vcf
 // compare two sequences to get pseudo sequnece and fill in with difference from reference sequence
 
-void run_gubbins(char vcf_filename[], char tree_filename[],char multi_fasta_filename[], int min_snps, char original_multi_fasta_filename[], int window_min, int window_max, float uncorrected_p_value, float trimming_ratio, int extensive_search_flag, int num_threads)
+void run_gubbins(char vcf_filename[], char tree_filename[],char multi_fasta_filename[], int min_snps, char original_multi_fasta_filename[], int window_min, int window_max, float uncorrected_p_value, float trimming_ratio, int extensive_search_flag, int scaling_flag, int num_threads)
 {
 	load_sequences_from_multifasta_file(multi_fasta_filename);
-	extract_sequences(vcf_filename, tree_filename, multi_fasta_filename,min_snps,original_multi_fasta_filename,window_min, window_max, uncorrected_p_value, trimming_ratio, extensive_search_flag, num_threads);
+	extract_sequences(vcf_filename, tree_filename, multi_fasta_filename,min_snps,original_multi_fasta_filename,window_min, window_max, uncorrected_p_value, trimming_ratio, extensive_search_flag, scaling_flag, num_threads);
 	create_tree_statistics_file(tree_filename,get_sample_statistics(),number_of_samples_from_parse_phylip());
 	freeup_memory();
 }
 
 
-void extract_sequences(char vcf_filename[], char tree_filename[],char multi_fasta_filename[],int min_snps, char original_multi_fasta_filename[], int window_min, int window_max, float uncorrected_p_value, float trimming_ratio, int extensive_search_flag, int num_threads)
+void extract_sequences(char vcf_filename[], char tree_filename[],char multi_fasta_filename[],int min_snps, char original_multi_fasta_filename[], int window_min, int window_max, float uncorrected_p_value, float trimming_ratio, int extensive_search_flag, int scaling_flag, int num_threads)
 {
 	FILE *vcf_file_pointer;
 	vcf_file_pointer=fopen(vcf_filename, "r");
@@ -105,7 +105,12 @@ void extract_sequences(char vcf_filename[], char tree_filename[],char multi_fast
 	create_fasta_of_snp_sites(tree_filename, number_of_filtered_snps, filtered_bases_for_snps, sample_names, number_of_samples,internal_nodes);
 	
 	// Create an new tree with updated distances
-	scale_branch_distances(root_node, number_of_filtered_snps);
+  if (scaling_flag == 1) {
+    scale_branch_distances(root_node, number_of_filtered_snps);
+  } else {
+    scale_branch_distances(root_node, length_of_original_genome);
+  }
+	
 
 	FILE *output_tree_pointer;
 	output_tree_pointer=fopen(tree_filename, "w");


=====================================
src/gubbins.h
=====================================
@@ -23,8 +23,8 @@
 #include "seqUtil.h"
 #include "Newickform.h"
 
-void run_gubbins(char vcf_filename[], char tree_filename[], char multi_fasta_filename[], int min_snps, char original_multi_fasta_filename[], int window_min, int window_max, float uncorrected_p_value, float trimming_ratio, int extensive_search_flag, int num_threads);
-void extract_sequences(char vcf_filename[], char tree_filename[],char multi_fasta_filename[], int min_snps, char original_multi_fasta_filename[], int window_min, int window_max, float uncorrected_p_value, float trimming_ratio, int extensive_search_flag, int n_threads);
+void run_gubbins(char vcf_filename[], char tree_filename[], char multi_fasta_filename[], int min_snps, char original_multi_fasta_filename[], int window_min, int window_max, float uncorrected_p_value, float trimming_ratio, int extensive_search_flag, int scaling_flag, int num_threads);
+void extract_sequences(char vcf_filename[], char tree_filename[],char multi_fasta_filename[], int min_snps, char original_multi_fasta_filename[], int window_min, int window_max, float uncorrected_p_value, float trimming_ratio, int extensive_search_flag, int scaling_flag, int n_threads);
 char find_first_real_base(int base_position,  int number_of_child_sequences, char ** child_sequences);
 
 


=====================================
src/main.c
=====================================
@@ -52,6 +52,7 @@ void print_usage(FILE* stream, int exit_code)
            "  -b    Max window size\n"
            "  -p    p value for detecting recombinations\n"
            "  -i    p value ratio for trimming recombinations\n"
+           "  -s    scale branch lengths without correcting for invariant sites\n"
            "  -h    Display this usage information.\n\n"
 );
   exit (exit_code);
@@ -87,6 +88,7 @@ int main (int argc, char ** argv)
     float uncorrected_p_value = 0.05;
     float trimming_ratio = 1.0;
     int extensive_search_flag = 0;
+    int scaling_flag = 0;
     int num_threads = 1;
     program_name = argv[0];
   
@@ -106,12 +108,13 @@ int main (int argc, char ** argv)
             {"trimming_ratio",      required_argument, 0, 'i'},
             {"extended_search",     required_argument, 0, 'x'},
             {"ncpu",                required_argument, 0, 'n'},
+            {"scaling",             required_argument, 0, 's'},
 
             {0, 0, 0, 0}
         };
         /* getopt_long stores the option index here. */
         int option_index = 0;
-        c = getopt_long (argc, argv, "hrxv:f:t:m:a:b:p:i:n:",
+        c = getopt_long (argc, argv, "hrxsv:f:t:m:a:b:p:i:n:",
                            long_options, &option_index);
         /* Detect the end of the options. */
         if (c == -1)
@@ -136,6 +139,9 @@ int main (int argc, char ** argv)
             case 'x':
                 extensive_search_flag = 1;
                 break;
+            case 's':
+                scaling_flag = 1;
+                break;
             case 'f':
                 memcpy(original_multi_fasta_filename, optarg, size_of_string(optarg) +1);
                 break;
@@ -195,6 +201,7 @@ int main (int argc, char ** argv)
                     uncorrected_p_value,
                     trimming_ratio,
                     extensive_search_flag,
+                    scaling_flag,
                     num_threads);
     }
     else


=====================================
tests/check_gubbins.c
=====================================
@@ -10,7 +10,7 @@ START_TEST (check_gubbins_no_recombinations)
 {
 	remove("../tests/data/no_recombinations.tre");
 	cp("../tests/data/no_recombinations.tre", "../tests/data/no_recombinations.original.tre");
-	run_gubbins("../tests/data/no_recombinations.aln.vcf", "../tests/data/no_recombinations.tre","../tests/data/no_recombinations.aln.snp_sites.aln",3,"../tests/data/no_recombinations.aln.snp_sites.aln",100,10000,0.05,1,0,1);
+	run_gubbins("../tests/data/no_recombinations.aln.vcf", "../tests/data/no_recombinations.tre","../tests/data/no_recombinations.aln.snp_sites.aln",3,"../tests/data/no_recombinations.aln.snp_sites.aln",100,10000,0.05,1,0,0,1);
 	ck_assert(file_exists("../tests/data/no_recombinations.tre.tab") == 1);
 	ck_assert(file_exists("../tests/data/no_recombinations.tre.vcf") == 1);
 	ck_assert(file_exists("../tests/data/no_recombinations.tre.phylip") == 1);
@@ -37,7 +37,7 @@ START_TEST (check_gubbins_one_recombination)
 {
 	remove("../tests/data/one_recombination.tre");
 	cp("../tests/data/one_recombination.tre", "../tests/data/one_recombination.original.tre");
-	run_gubbins("../tests/data/one_recombination.aln.vcf", "../tests/data/one_recombination.tre","../tests/data/one_recombination.aln.snp_sites.aln",3,"../tests/data/one_recombination.aln.snp_sites.aln",100,10000,0.05,1,0,1);
+	run_gubbins("../tests/data/one_recombination.aln.vcf", "../tests/data/one_recombination.tre","../tests/data/one_recombination.aln.snp_sites.aln",3,"../tests/data/one_recombination.aln.snp_sites.aln",100,10000,0.05,1,0,0,1);
 	ck_assert(file_exists("../tests/data/one_recombination.tre.tab") == 1);
 	ck_assert(file_exists("../tests/data/one_recombination.tre.vcf") == 1);
 	ck_assert(file_exists("../tests/data/one_recombination.tre.phylip") == 1);
@@ -73,7 +73,7 @@ START_TEST (check_gubbins_multiple_recombinations)
                 "../tests/data/multiple_recombinations.jar.aln",
                 3,
                 "../tests/data/multiple_recombinations.aln",
-                30,100,0.05,1,0,1);
+                30,100,0.05,1,0,1,1);
 
 	ck_assert(file_exists("../tests/data/multiple_recombinations.tre.tab") == 1);
 	ck_assert(file_exists("../tests/data/multiple_recombinations.tre.vcf") == 1);
@@ -108,7 +108,7 @@ START_TEST (check_gubbins_multithreaded)
                 "../tests/data/multiple_recombinations.jar.aln",
                 3,
                 "../tests/data/multiple_recombinations.aln",
-                30,100,0.05,1,0,2);
+                30,100,0.05,1,0,0,2);
 
   ck_assert(file_exists("../tests/data/multiple_recombinations.tre.tab") == 1);
   ck_assert(file_exists("../tests/data/multiple_recombinations.tre.vcf") == 1);
@@ -143,7 +143,7 @@ START_TEST (check_recombination_at_root)
                 "../tests/data/recombination_at_root/recombination_at_root.aln.gaps.snp_sites.aln",
                 3,
                 "../tests/data/recombination_at_root/recombination_at_root.aln",
-                100,10000,0.05,1,0,1);
+                100,10000,0.05,1,0,0,1);
 
     ck_assert(compare_files("../tests/data/recombination_at_root/RAxML_result.recombination_at_root.iteration_1.tab","../tests/data/recombination_at_root/expected_RAxML_result.recombination_at_root.iteration_1.tab") == 1);
 



View it on GitLab: https://salsa.debian.org/med-team/gubbins/-/commit/88ec10f179c3fd209398e8f409e21cdc94338a72

-- 
View it on GitLab: https://salsa.debian.org/med-team/gubbins/-/commit/88ec10f179c3fd209398e8f409e21cdc94338a72
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20250927/a0ed8402/attachment-0001.htm>


More information about the debian-med-commit mailing list