[med-svn] [gubbins] 01/03: Imported Upstream version 2.1.0
Andreas Tille
tille at debian.org
Fri Aug 5 21:53:16 UTC 2016
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository gubbins.
commit 63f5943e24e8a4818a457721343347e0ceaea01d
Author: Andreas Tille <tille at debian.org>
Date: Fri Aug 5 23:46:56 2016 +0200
Imported Upstream version 2.1.0
---
CHANGELOG | 5 ++
INSTALL.md | 2 +
VERSION | 2 +-
python/gubbins/RAxMLExecutable.py | 12 +++-
python/gubbins/common.py | 2 +-
.../input_alignment.fasta | 2 +-
python/gubbins/tests/test_external_dependancies.py | 1 +
python/scripts/run_gubbins.py | 2 +
src/branch_sequences.c | 83 +++++++---------------
src/branch_sequences.h | 1 -
src/string_cat.c | 9 +--
tests/check_branch_sequences.c | 18 ++---
12 files changed, 53 insertions(+), 86 deletions(-)
diff --git a/CHANGELOG b/CHANGELOG
index 69f2409..223b398 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,8 @@
+v2.1.0 - 22 July 2016
+------
+Use GTRCAT model by default in RAxML instead of GTRGAMMA (massive speedup).
+C code optimisations in Gubbins.
+
v2.0.0 - 26 May 2016
------
Reconstruct internal sequences by default using RAxML rather than fastML.
diff --git a/INSTALL.md b/INSTALL.md
index 41e524f..f3de67b 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -97,6 +97,8 @@ autoreconf -i
./configure
make
sudo make install
+cd python
+sudo python3 setup.py install
```
## OSX/Linux/Windows - Virtual Machine
diff --git a/VERSION b/VERSION
index 227cea2..7ec1d6d 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.0.0
+2.1.0
diff --git a/python/gubbins/RAxMLExecutable.py b/python/gubbins/RAxMLExecutable.py
index a27188b..2401a32 100644
--- a/python/gubbins/RAxMLExecutable.py
+++ b/python/gubbins/RAxMLExecutable.py
@@ -23,18 +23,24 @@ import subprocess
import re
class RAxMLExecutable(object):
- def __init__(self, threads, verbose = False ):
+ def __init__(self, threads, model = 'GTRCAT', verbose = False ):
self.verbose = verbose
self.threads = threads
self.single_threaded_executables = ['raxmlHPC-AVX','raxmlHPC-SSE3','raxmlHPC']
self.multi_threaded_executables = ['raxmlHPC-PTHREADS-AVX','raxmlHPC-PTHREADS-SSE3','raxmlHPC-PTHREADS']
+ self.model = model
self.raxml_executable = self.select_executable_based_on_threads()
- self.tree_building_parameters = ' -f d -p 1 -m GTRGAMMA '
+ self.tree_building_parameters_gtrgamma = ' -f d -p 1 -m GTRGAMMA '
+ self.tree_building_parameters_gtrcat = ' -f d -p 1 -m GTRCAT -V '
self.internal_sequence_parameters = ' -f A -p 1 -m GTRGAMMA '
def tree_building_command(self):
- command = self.raxml_executable + self.threads_parameter() + self.tree_building_parameters
+ tree_building_parameters = self.tree_building_parameters_gtrcat
+ if self.model == 'GTRGAMMA':
+ tree_building_parameters =self.tree_building_parameters_gtrgamma
+
+ command = self.raxml_executable + self.threads_parameter() + tree_building_parameters
if self.verbose:
print("Tree building command: "+command)
return command
diff --git a/python/gubbins/common.py b/python/gubbins/common.py
index e1ae21d..b1f2e4e 100644
--- a/python/gubbins/common.py
+++ b/python/gubbins/common.py
@@ -102,7 +102,7 @@ class GubbinsCommon():
def parse_and_run(self):
# Default parameters
- raxml_executable_obj = RAxMLExecutable(self.args.threads, self.args.verbose)
+ raxml_executable_obj = RAxMLExecutable(self.args.threads, self.args.raxml_model, self.args.verbose)
fasttree_executables = ['FastTree','fasttree']
FASTTREE_EXEC = GubbinsCommon.choose_executable(fasttree_executables)
diff --git a/python/gubbins/tests/data/raxml_sequence_reconstruction/input_alignment.fasta b/python/gubbins/tests/data/raxml_sequence_reconstruction/input_alignment.fasta
index 6d75ec7..2367ec0 100644
--- a/python/gubbins/tests/data/raxml_sequence_reconstruction/input_alignment.fasta
+++ b/python/gubbins/tests/data/raxml_sequence_reconstruction/input_alignment.fasta
@@ -9,4 +9,4 @@ CCCTT
>E
CCTTT
>F
-CCGGG
\ No newline at end of file
+CCGGG
diff --git a/python/gubbins/tests/test_external_dependancies.py b/python/gubbins/tests/test_external_dependancies.py
index ef03d04..6066171 100644
--- a/python/gubbins/tests/test_external_dependancies.py
+++ b/python/gubbins/tests/test_external_dependancies.py
@@ -212,6 +212,7 @@ class TestExternalDependancies(unittest.TestCase):
parser.add_argument('--threads', '-c', help='Number of threads to run with RAXML, but only if a PTHREADS version is available', type=int, default = 1)
parser.add_argument('--converge_method', '-z', help='Criteria to use to know when to halt iterations [weighted_robinson_foulds|robinson_foulds|recombination]', default = 'weighted_robinson_foulds')
parser.add_argument('--version', action='version', version=str(pkg_resources.get_distribution("gubbins").version))
+ parser.add_argument('--raxml_model', '-r', help='RAxML model [GTRGAMMA|GTRCAT], default GTRCAT', default = 'GTRCAT')
return parser
def default_arg_parse(self):
diff --git a/python/scripts/run_gubbins.py b/python/scripts/run_gubbins.py
index 592a64d..35fc07c 100755
--- a/python/scripts/run_gubbins.py
+++ b/python/scripts/run_gubbins.py
@@ -21,6 +21,7 @@
import sys
sys.path.append(".")
+sys.path.append("..")
import argparse
import pkg_resources
from gubbins import common
@@ -44,6 +45,7 @@ parser.add_argument('--converge_method', '-z', help='Criteria to use to know wh
parser.add_argument('--version', action='version', version=str(pkg_resources.get_distribution("gubbins").version))
parser.add_argument('--min_window_size', '-a', help='Minimum window size, default 100', type=int, default = 100)
parser.add_argument('--max_window_size', '-b', help='Maximum window size, default 10000', type=int, default = 10000)
+parser.add_argument('--raxml_model', '-r', help='RAxML model [GTRGAMMA|GTRCAT], default GTRCAT', default = 'GTRCAT')
gubbins_runner = common.GubbinsCommon(parser.parse_args())
gubbins_runner.parse_and_run()
diff --git a/src/branch_sequences.c b/src/branch_sequences.c
index fcdbb2b..a7989e1 100644
--- a/src/branch_sequences.c
+++ b/src/branch_sequences.c
@@ -78,37 +78,31 @@ int get_list_of_snp_indices_which_fall_in_downstream_recombinations(int ** curre
{
int num_snps_in_recombinations =0;
int i = 0;
+
+ // loop over each block
for(i = 0; i<num_blocks; i++ )
{
int current_index = 0;
+ // convert the starting coordinates of block to the nearest SNP index
current_index = find_starting_index(current_block_coordinates[0][i],snp_locations,0, number_of_snps);
- int j;
- for(j = current_index; (j < number_of_snps && snp_locations[j] <= current_block_coordinates[1][i]); j++)
+ //make sure that the index begins at start of block
+ int beginning_j = current_index;
+ for(beginning_j = current_index; snp_locations[beginning_j] < current_block_coordinates[0][i];beginning_j++)
+ {
+ }
+
+ int j;
+ // starting at the begining index of block, count all the snps until the end of the bock.
+ for(j = beginning_j; (j < number_of_snps && snp_locations[j] <= current_block_coordinates[1][i]); j++)
{
- if(snp_locations[j] >= current_block_coordinates[0][i] && snp_locations[j] <= current_block_coordinates[1][i])
- {
- int k = 0;
- int seen_before = 0;
- // has this snp index been flagged before?
- for(k =0; k < num_snps_in_recombinations; k++)
- {
- if(snps_in_recombinations[k] == j)
- {
- seen_before = 1;
- break;
- }
- }
- if(seen_before == 0)
- {
- snps_in_recombinations[num_snps_in_recombinations] = j;
- num_snps_in_recombinations++;
- }
- }
+ snps_in_recombinations[num_snps_in_recombinations] = j;
+ num_snps_in_recombinations++;
}
}
+
+ // may contain duplications
return num_snps_in_recombinations;
-
}
@@ -573,11 +567,6 @@ int get_blocks(int ** block_coordinates, int genome_size,int * snp_site_coords,i
// Set up the window counter with 1 value per base in the branch
int * window_count;
window_count = (int *) calloc((genome_size+1),sizeof(int));
- int i;
- for(i =0; i< genome_size; i++)
- {
- window_count[i] = 0;
- }
// Integer array with location of gaps
int * gaps_in_original_genome_space;
@@ -626,7 +615,7 @@ int get_blocks(int ** block_coordinates, int genome_size,int * snp_site_coords,i
int in_block = 0;
int block_lower_bound = 0;
// Scan across the pileup and record where blocks are above the cutoff
-
+ int i;
for(i = 0; i < genome_size; i++)
{
// Just entered the start of a block
@@ -1043,12 +1032,14 @@ int calculate_genome_length_excluding_blocks_and_gaps(char * sequence, int lengt
int * bases_to_be_excluded;
bases_to_be_excluded = (int*) calloc((length_of_sequence + 1),sizeof(int));
+ int genome_length = length_of_sequence;
int i = 0;
for(i = 0; i<length_of_sequence; i++)
{
if(sequence[i] == 'N' || sequence[i] == '-' )
{
bases_to_be_excluded[i] = 1;
+ genome_length--;
}
}
@@ -1064,41 +1055,15 @@ int calculate_genome_length_excluding_blocks_and_gaps(char * sequence, int lengt
int block_index = 0;
for(block_index = block_coordinates[0][j]; block_index <= block_coordinates[1][j]; block_index++ )
{
- bases_to_be_excluded[block_index-1] = 1;
+ if(bases_to_be_excluded[block_index-1] == 0)
+ {
+ bases_to_be_excluded[block_index-1] = 1;
+ genome_length--;
+ }
}
}
- int genome_length = 0;
- for(i = 0; i<length_of_sequence; i++)
- {
- if(bases_to_be_excluded[i] == 0 )
- {
- genome_length++;
- }
- }
return genome_length;
}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/src/branch_sequences.h b/src/branch_sequences.h
index 64a3591..c8f6847 100644
--- a/src/branch_sequences.h
+++ b/src/branch_sequences.h
@@ -47,7 +47,6 @@ int get_list_of_snp_indices_which_fall_in_downstream_recombinations(int ** curre
int calculate_genome_length_excluding_blocks_and_gaps(char * sequence, int length_of_sequence, int ** block_coordinates, int num_blocks);
-
#define WINDOW_SNP_MODE_TARGET 10
#define RANDOMNESS_DAMPNER 0.05
#define MAX_SAMPLE_NAME_SIZE 1024
diff --git a/src/string_cat.c b/src/string_cat.c
index 50b3df3..17f3d5b 100644
--- a/src/string_cat.c
+++ b/src/string_cat.c
@@ -21,16 +21,9 @@
#include <stdlib.h>
#include <string.h>
-
int size_of_string(char *input_string)
{
- int i = 0;
-
- while( input_string[i] != '\0')
- {
- i++;
- }
- return i;
+ return strlen(input_string);
}
void concat_strings_created_with_malloc(char *input_string, char *string_to_concat)
diff --git a/tests/check_branch_sequences.c b/tests/check_branch_sequences.c
index b6f8c90..78b9eca 100644
--- a/tests/check_branch_sequences.c
+++ b/tests/check_branch_sequences.c
@@ -153,22 +153,19 @@ START_TEST (check_get_list_of_snp_indices_which_fall_in_downstream_recombination
{
int ** block_coords;
block_coords = (int **) malloc(2*sizeof(int*));
- block_coords[0] = (int*) malloc((4)*sizeof(int ));
- block_coords[1] = (int*) malloc((4)*sizeof(int ));
+ block_coords[0] = (int*) malloc((2)*sizeof(int ));
+ block_coords[1] = (int*) malloc((2)*sizeof(int ));
block_coords[0][0] = 5;
block_coords[1][0] = 10;
block_coords[0][1] = 30;
block_coords[1][1] = 35;
- block_coords[0][2] = 20;
- block_coords[1][2] = 25;
- block_coords[0][3] = 7;
- block_coords[1][3] = 15;
+
int snp_locations[16] = {1,4,5,6,7,10,11,15,19,20,29,30,35,36,40,50};
- int * snps_in_recombinations = (int *) calloc((16 +1),sizeof(int));
+ int * snps_in_recombinations = (int *) calloc((16 +1),sizeof(int));
int num_snps_in_recombinations = 0;
- num_snps_in_recombinations = get_list_of_snp_indices_which_fall_in_downstream_recombinations(block_coords,4,snp_locations,16, snps_in_recombinations);
- fail_unless(num_snps_in_recombinations == 9);
+ num_snps_in_recombinations = get_list_of_snp_indices_which_fall_in_downstream_recombinations(block_coords,2,snp_locations,16, snps_in_recombinations);
+ fail_unless(num_snps_in_recombinations == 6);
fail_unless(snps_in_recombinations[0] == 2);
fail_unless(snps_in_recombinations[1] == 3);
@@ -176,9 +173,6 @@ START_TEST (check_get_list_of_snp_indices_which_fall_in_downstream_recombination
fail_unless(snps_in_recombinations[3] == 5);
fail_unless(snps_in_recombinations[4] == 11);
fail_unless(snps_in_recombinations[5] == 12);
- fail_unless(snps_in_recombinations[6] == 9);
- fail_unless(snps_in_recombinations[7] == 6);
- fail_unless(snps_in_recombinations[8] == 7);
}
END_TEST
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gubbins.git
More information about the debian-med-commit
mailing list