[med-svn] [roary] 01/07: New upstream version 3.11.0+dfsg
Sascha Steinbiss
satta at debian.org
Wed Oct 11 08:51:48 UTC 2017
This is an automated email from the git hooks/post-receive script.
satta pushed a commit to branch master
in repository roary.
commit 622661bfc8d75dd459351df08730a74ea63c24ed
Author: Sascha Steinbiss <satta at debian.org>
Date: Wed Oct 11 10:17:28 2017 +0200
New upstream version 3.11.0+dfsg
---
.travis.yml | 11 ++-
dist.ini | 4 +-
install_dependencies.sh | 12 ++--
lib/Bio/Roary/CommandLine/Roary.pm | 2 +-
lib/Bio/Roary/External/CheckTools.pm | 8 +--
lib/Bio/Roary/MergeMultifastaAlignments.pm | 2 +-
lib/Bio/Roary/PrepareInputFiles.pm | 81 +++++++++++++++-------
lib/Bio/Roary/ReformatInputGFFs.pm | 72 +++++++++++++++----
t/Bio/Roary/ReformatInputGFFs.t | 12 ++--
t/data/expected_core_gene_alignment_core0.66.aln | 2 +-
t/data/overall_gene_presence_absence.csv | 42 +++++------
.../reformat_input_gffs/expected_fixed_query_2.gff | 10 +--
12 files changed, 168 insertions(+), 90 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index 8896bae..5789ffe 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -8,20 +8,19 @@ addons:
cache:
directories:
- build/parallel-20160722
- - build/parallel-20130922
+ - build/parallel-20170822
- build/bedtools2
- - build/cd-hit-v4.6.6-2016-0711
+ - build/cd-hit-v4.6.8-2017-0621
- build/prank-msa-master
- - build/ncbi-blast-2.4.0+
+ - build/ncbi-blast-2.6.0+
- build/mcl-14-137
- build/fasttree
perl:
- "5.14"
- - "5.20"
- - "5.24"
+ - "5.26"
env:
- PARALLEL_VERSION=20160722
- - PARALLEL_VERSION=20130922
+ - PARALLEL_VERSION=20170822
install:
- "source ./install_dependencies.sh"
script: "ROARY_FULL_TESTS=1 dzil test"
diff --git a/dist.ini b/dist.ini
index 4d8b414..e673cd7 100644
--- a/dist.ini
+++ b/dist.ini
@@ -1,9 +1,9 @@
name = Bio-Roary
-version = 3.9.1
author = Andrew J. Page <ap13 at sanger.ac.uk>
license = GPL_3
copyright_holder = Wellcome Trust Sanger Institute
copyright_year = 2013
+version = 3.11.0
main_module = lib/Bio/Roary.pm
[MetaResources]
@@ -12,7 +12,7 @@ repository.web = http://sanger-pathogens.github.io/Roary/
repository.url = https://github.com/sanger-pathogens/Roary.git
repository.type = git
-[@Basic]
+[@Starter]
[PruneCruft]
[ExtraTests]
[AutoPrereqs]
diff --git a/install_dependencies.sh b/install_dependencies.sh
index 41aecc2..253ed7b 100755
--- a/install_dependencies.sh
+++ b/install_dependencies.sh
@@ -15,16 +15,16 @@ BEDTOOLS_VERSION="2.26.0"
BEDTOOLS_DOWNLOAD_FILENAME="bedtools-${BEDTOOLS_VERSION}.tar.gz"
BEDTOOLS_URL="https://github.com/arq5x/bedtools2/releases/download/v${BEDTOOLS_VERSION}/${BEDTOOLS_DOWNLOAD_FILENAME}"
-CDHIT_SHORT_VERSION="4.6.6"
-CDHIT_LONG_VERSION="4.6.6-2016-0711"
+CDHIT_SHORT_VERSION="4.6.8"
+CDHIT_LONG_VERSION="4.6.8-2017-0621"
CDHIT_DOWNLOAD_FILENAME="cd-hit-${CDHIT_SHORT_VERSION}.tar.gz"
-CDHIT_URL="https://github.com/weizhongli/cdhit/releases/download/V${CDHIT_SHORT_VERSION}/cd-hit-v${CDHIT_LONG_VERSION}.tar.gz"
+CDHIT_URL="https://github.com/weizhongli/cdhit/releases/download/V${CDHIT_SHORT_VERSION}/cd-hit-v${CDHIT_LONG_VERSION}-source.tar.gz"
PRANK_VERSION="0.140603"
PRANK_DOWNLOAD_FILENAME="prank-msa-master.tar.gz"
PRANK_URL="https://github.com/ariloytynoja/prank-msa/archive/master.tar.gz"
-BLAST_VERSION="2.4.0"
+BLAST_VERSION="2.6.0"
BLAST_DOWNLOAD_FILENAME="ncbi-blast-${BLAST_VERSION}+-x64-linux.tar.gz"
BLAST_URL="ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VERSION}/${BLAST_DOWNLOAD_FILENAME}"
@@ -32,11 +32,11 @@ MCL_VERSION="14-137"
MCL_DOWNLOAD_FILENAME="mcl-${MCL_VERSION}.tar.gz"
MCL_URL="http://micans.org/mcl/src/mcl-${MCL_VERSION}.tar.gz"
-FASTTREE_VERSION="2.1.9"
+FASTTREE_VERSION="2.1.10"
FASTTREE_DOWNLOAD_FILENAME="FastTree-${FASTTREE_VERSION}.c"
FASTTREE_URL="http://microbesonline.org/fasttree/FastTree-${FASTTREE_VERSION}.c"
-MAFFT_VERSION="7.271"
+MAFFT_VERSION="7.310"
MAFFT_DOWNLOAD_FILENAME="mafft-${MAFFT_VERSION}-without-extensions-src.tgz"
MAFFT_URL="http://mafft.cbrc.jp/alignment/software/${MAFFT_DOWNLOAD_FILENAME}"
diff --git a/lib/Bio/Roary/CommandLine/Roary.pm b/lib/Bio/Roary/CommandLine/Roary.pm
index 3052b6c..e2335a0 100644
--- a/lib/Bio/Roary/CommandLine/Roary.pm
+++ b/lib/Bio/Roary/CommandLine/Roary.pm
@@ -154,7 +154,7 @@ sub BUILD {
$self->perc_identity($perc_identity);
if ( $perc_identity < 50 ) {
$self->logger->error(
-"The percentage identity is too low. Either somethings wrong with your data, like contamination, or your doing something that the software isnt designed to support."
+"The percentage identity is too low. Either something is wrong with your data, like contamination, or your doing something that the software isnt designed to support."
);
}
}
diff --git a/lib/Bio/Roary/External/CheckTools.pm b/lib/Bio/Roary/External/CheckTools.pm
index b05593a..dc01fd5 100644
--- a/lib/Bio/Roary/External/CheckTools.pm
+++ b/lib/Bio/Roary/External/CheckTools.pm
@@ -57,17 +57,17 @@ my %tools = (
},
'mafft' => {
GETVER => "mafft --version < /dev/null 2>&1",
- REGEXP => qr/v($BIDEC) /,
+ REGEXP => qr/(\d+\.\d+) /,
NEEDED => 1,
},
'kraken' => {
GETVER => "kraken --version | head -n 1",
- REGEXP => qr/Kraken version kraken-(\d+\.\d+\.\d+.*)/,
+ REGEXP => qr/(\d+\.\d+\.\d+.*)/,
NEEDED => 0,
},
'kraken-report' => {
GETVER => "kraken-report --version | head -n 1",
- REGEXP => qr/Kraken version kraken-(\d+\.\d+\.\d+.*)/,
+ REGEXP => qr/(\d+\.\d+\.\d+.*)/,
NEEDED => 0,
},
'Rscript' => {
@@ -78,7 +78,7 @@ my %tools = (
},
# prank version also performs an update check so cant use it
- 'prank' => { NEEDED => 1 },
+ 'prank' => { NEEDED => 0 },
# now just the standard unix tools we need
'grep' => { NEEDED => 1 },
diff --git a/lib/Bio/Roary/MergeMultifastaAlignments.pm b/lib/Bio/Roary/MergeMultifastaAlignments.pm
index f74d4ac..1355cf5 100644
--- a/lib/Bio/Roary/MergeMultifastaAlignments.pm
+++ b/lib/Bio/Roary/MergeMultifastaAlignments.pm
@@ -83,7 +83,7 @@ sub _sequence_for_sample_from_gene_file {
sub _padded_string_for_gene_file {
my ( $self, $gene_file ) = @_;
return '' unless ( defined( $self->_gene_lengths->{$gene_file} ) );
- return 'N' x ( $self->_gene_lengths->{$gene_file} );
+ return '-' x ( $self->_gene_lengths->{$gene_file} );
}
sub _create_merged_sequence_for_sample {
diff --git a/lib/Bio/Roary/PrepareInputFiles.pm b/lib/Bio/Roary/PrepareInputFiles.pm
index bedb9bb..aaf2a19 100644
--- a/lib/Bio/Roary/PrepareInputFiles.pm
+++ b/lib/Bio/Roary/PrepareInputFiles.pm
@@ -18,32 +18,39 @@ use Moose;
use Bio::Roary::Exceptions;
use Bio::Roary::ExtractProteomeFromGFFs;
use Bio::Roary::FilterUnknownsFromFasta;
-use Cwd qw(getcwd);
+use Cwd qw(getcwd);
use File::Temp;
-
-has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
-has 'job_runner' => ( is => 'ro', isa => 'Str', default => 'Local' );
-has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 );
-has '_input_gff_files' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__input_gff_files' );
-has '_input_fasta_files' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__input_fasta_files' );
-has '_input_fasta_files_filtered' =>
- ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__input_fasta_files_filtered' );
+use Log::Log4perl qw(:easy);
+
+has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'job_runner' => ( is => 'ro', isa => 'Str', default => 'Local' );
+has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 );
+has '_input_gff_files' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__input_gff_files' );
+has '_input_fasta_files' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__input_fasta_files' );
+has '_input_fasta_files_filtered' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__input_fasta_files_filtered' );
has '_input_fasta_files_filtered_obj' =>
- ( is => 'ro', isa => 'Bio::Roary::FilterUnknownsFromFasta', lazy => 1, builder => '_build__input_fasta_files_filtered_obj' );
+ ( is => 'ro', isa => 'Bio::Roary::FilterUnknownsFromFasta', lazy => 1, builder => '_build__input_fasta_files_filtered_obj' );
-has '_derived_fasta_files' =>
- ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__derived_fasta_files' );
+has '_derived_fasta_files' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__derived_fasta_files' );
has '_extract_proteome_obj' => (
is => 'ro',
isa => 'Bio::Roary::ExtractProteomeFromGFFs',
lazy => 1,
builder => '_build__extract_proteome_obj'
);
-has 'apply_unknowns_filter' => ( is => 'rw', isa => 'Bool', default => 1 );
-has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 );
-has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
-has '_fasta_filter_obj' => ( is => 'ro', isa => 'Bio::Roary::FilterUnknowsFromFasta', lazy => 1, builder => '_fasta_filter_obj' );
-has 'working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
+has 'apply_unknowns_filter' => ( is => 'rw', isa => 'Bool', default => 1 );
+has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 );
+has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
+has '_fasta_filter_obj' => ( is => 'ro', isa => 'Bio::Roary::FilterUnknowsFromFasta', lazy => 1, builder => '_fasta_filter_obj' );
+has 'working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
+has 'logger' => ( is => 'ro', lazy => 1, builder => '_build_logger' );
+
+sub _build_logger {
+ my ($self) = @_;
+ Log::Log4perl->easy_init($ERROR);
+ my $logger = get_logger();
+ return $logger;
+}
sub _build__input_gff_files {
my ($self) = @_;
@@ -54,6 +61,33 @@ sub _build__input_gff_files {
sub _build__input_fasta_files {
my ($self) = @_;
my @fasta_files = grep( !/\.gff$/, @{ $self->input_files } );
+
+ my @validated_fasta_files;
+
+ for my $fasta_file (@fasta_files) {
+ eval {
+ my $inseq = Bio::SeqIO->new(
+ -file => $fasta_file,
+ -format => 'fasta',
+ -alphabet => 'protein'
+ );
+ while ( my $seq = $inseq->next_seq ) {
+
+ # do something to force the reading.
+ $seq->seq;
+ }
+ };
+ if ($@) {
+ $self->logger->warn(
+ "Input file doesnt have a .gff extension and isnt a protein FASTA file so excluding it from further analysis: $fasta_file"
+ );
+ }
+ else {
+ push( @validated_fasta_files, $fasta_file );
+ }
+
+ }
+
return \@fasta_files;
}
@@ -62,11 +96,10 @@ sub _build__input_fasta_files_filtered_obj {
return Bio::Roary::FilterUnknownsFromFasta->new( fasta_files => $self->_input_fasta_files );
}
-sub _build__input_fasta_files_filtered
-{
- my ($self) = @_;
- return undef if ( !defined( $self->_input_fasta_files ) );
- return $self->_input_fasta_files_filtered_obj->filtered_fasta_files();
+sub _build__input_fasta_files_filtered {
+ my ($self) = @_;
+ return undef if ( !defined( $self->_input_fasta_files ) );
+ return $self->_input_fasta_files_filtered_obj->filtered_fasta_files();
}
sub _build__extract_proteome_obj {
@@ -77,8 +110,8 @@ sub _build__extract_proteome_obj {
apply_unknowns_filter => $self->apply_unknowns_filter,
translation_table => $self->translation_table,
cpus => $self->cpus,
- verbose => $self->verbose,
- working_directory => $self->working_directory,
+ verbose => $self->verbose,
+ working_directory => $self->working_directory,
);
}
diff --git a/lib/Bio/Roary/ReformatInputGFFs.pm b/lib/Bio/Roary/ReformatInputGFFs.pm
index 0ae52ce..4d1dd03 100644
--- a/lib/Bio/Roary/ReformatInputGFFs.pm
+++ b/lib/Bio/Roary/ReformatInputGFFs.pm
@@ -18,10 +18,12 @@ Take in gff files and add suffix where a gene id is seen twice
use Moose;
use Bio::Roary::Exceptions;
use Cwd;
+use File::Copy;
use Log::Log4perl qw(:easy);
use Bio::Tools::GFF;
use File::Path qw(make_path);
use File::Basename;
+use Digest::MD5::File qw(file_md5_hex);
has 'gff_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'logger' => ( is => 'ro', lazy => 1, builder => '_build_logger' );
@@ -42,47 +44,93 @@ sub fix_duplicate_gene_ids {
my ($self) = @_;
my %gene_ids_seen_before;
+
+ my %file_md5s;
+
for my $file ( @{ $self->gff_files } ) {
-
+ my $digest = file_md5_hex($file);
+
+ if(defined($file_md5s{$digest}))
+ {
+ $self->logger->warn(
+ "Input files have identical MD5 hashes, only using the first file: ".$file_md5s{$digest}." == ".$file
+ );
+ next;
+ }
+ else
+ {
+ $file_md5s{$digest} = $file;
+ }
+
my $ids_seen = 0;
my $ids_from_file = $self->_get_ids_for_gff_file($file);
if ( @{$ids_from_file} < 1 ) {
- $self->logger->warn(
+ $self->logger->error(
"Input GFF file doesnt contain annotation we can use so excluding it from the analysis: $file"
);
}
else {
for my $gene_id ( @{$ids_from_file} ) {
if ( $gene_ids_seen_before{$gene_id} ) {
- $self->logger->warn(
- "Input file contains duplicate gene IDs, attempting to fix by adding a unique suffix. New GFF in the fixed_input_files directory. $file "
+ $self->logger->error(
+ "Input file contains duplicate gene IDs, attempting to fix by adding a unique suffix, new GFF in the fixed_input_files directory: $file "
);
- my $updated_file = $self->_add_suffix_to_gene_ids_and_return_new_file($file);
+ my $updated_file = $self->_add_suffix_to_gene_ids_and_return_new_file($file, $digest);
push( @{ $self->fixed_gff_files }, $updated_file ) if ( defined($updated_file) );
$ids_seen = 1;
last;
}
$gene_ids_seen_before{$gene_id}++;
}
+
+ # We know its a valid GFF file since we could open it and extract IDs.
+ # We need to make sure the filenames end in .gff. If it contained duplicate IDs, then they are fixed so nothing to do, but
+ # if they didnt, then we have to double check and repair if necessary.
if ( $ids_seen == 0 ) {
- push( @{ $self->fixed_gff_files }, $file );
+
+
+ push( @{ $self->fixed_gff_files }, $self->_fix_gff_file_extension($file) );
}
}
}
return 1;
}
+sub _fix_gff_file_extension
+{
+ my ( $self, $input_file ) = @_;
+
+ my ( $filename, $directories, $suffix ) = fileparse( $input_file, qr/\.[^.]*/ );
+ return $input_file if($suffix eq '.gff');
+
+
+ make_path( $self->output_directory ) if ( !( -d $self->output_directory ) );
+ my $output_file = $self->output_directory . '/' . $filename . '.gff';
+ copy($input_file, $output_file) or $self->logger->error("Couldnt copy file with invalid gff extention: $input_file -> $output_file");
+ return $output_file;
+}
+
+
sub _add_suffix_to_gene_ids_and_return_new_file {
- my ( $self, $input_file ) = @_;
+ my ( $self, $input_file, $digest ) = @_;
my ( $filename, $directories, $suffix ) = fileparse( $input_file, qr/\.[^.]*/ );
make_path( $self->output_directory ) if ( !( -d $self->output_directory ) );
- my $output_file = $self->output_directory . '/' . $filename . $suffix;
+ my $output_file = $self->output_directory . '/' . $filename . '.gff';
open( my $input_gff_fh, $input_file );
open( my $out_gff_fh, '>', $output_file );
+
+ # There is a chance that there can be a collision here, but its remote.
+ my $random_locus_tag = "".$digest;
+
+ $self->logger->warn(
+ "Renamed GFF file from: $input_file -> $output_file" );
+ $self->logger->warn(
+ "Locus tag used is '$random_locus_tag' for file: $input_file" );
my $found_fasta = 0;
+ my $gene_counter = 1;
while (<$input_gff_fh>) {
my $line = $_;
@@ -102,15 +150,15 @@ sub _add_suffix_to_gene_ids_and_return_new_file {
if ( $tags[$i] =~ /^(ID=["']?)([^;"']+)(["']?)/ ) {
my $current_id = $2;
$current_id .= '___' . $self->suffix_counter;
- $tags[$i] = $1 . $current_id . $3;
- $self->suffix_counter( $self->suffix_counter + 1 );
+ $tags[$i] = $1 .$random_locus_tag.'_'. $gene_counter . $3;
+ $gene_counter++;
$found_id++;
last;
}
}
if ( $found_id == 0 ) {
- unshift( @tags, 'ID=id___' . $self->suffix_counter );
- $self->suffix_counter( $self->suffix_counter + 1 );
+ unshift( @tags, 'ID=' . $random_locus_tag.'_'. $gene_counter );
+ $gene_counter++;
}
$cells[8] = join( ';', @tags );
print {$out_gff_fh} join( "\t", @cells );
diff --git a/t/Bio/Roary/ReformatInputGFFs.t b/t/Bio/Roary/ReformatInputGFFs.t
index 16ad53b..dd68fc1 100755
--- a/t/Bio/Roary/ReformatInputGFFs.t
+++ b/t/Bio/Roary/ReformatInputGFFs.t
@@ -45,21 +45,19 @@ ok(( -e 'fixed_input_files/query_2.gff'), 'fixed file should exist');
compare_ok('fixed_input_files/query_2.gff', 't/data/reformat_input_gffs/expected_fixed_query_2.gff', 'fixed file should have expected changes');
remove_tree('fixed_input_files');
-
-ok($obj = Bio::Roary::ReformatInputGFFs->new(gff_files => ['t/data/reformat_input_gffs/query_1.gff', 't/data/reformat_input_gffs/query_2.gff', 't/data/reformat_input_gffs/query_3.gff']), 'initialise with 3 input gffs');
+ok($obj = Bio::Roary::ReformatInputGFFs->new(gff_files => ['t/data/reformat_input_gffs/query_1.gff', 't/data/reformat_input_gffs/query_2.gff', 't/data/reformat_input_gffs/query_3.gff']), 'initialise with 3 input gffs, 2 identical duplicates');
ok(!( -d 'fixed_input_files'), 'Directory shouldnt exist before running');
ok($obj->fix_duplicate_gene_ids, 'fix duplicates with 3 input gffs');
-ok(( -d 'fixed_input_files'), 'Directory should exist because there is 2 gffs thats fixed');
-is_deeply($obj->fixed_gff_files, ['t/data/reformat_input_gffs/query_1.gff','fixed_input_files/query_2.gff','fixed_input_files/query_3.gff' ] ,'list of gff files 2 in the fixed directory');
+ok(( -d 'fixed_input_files'), 'Directory should exist because there are 2 gffs thats fixed');
+is_deeply($obj->fixed_gff_files, ['t/data/reformat_input_gffs/query_1.gff','fixed_input_files/query_2.gff' ] ,'list of gff files 2 in the fixed directory');
ok(( -e 'fixed_input_files/query_2.gff'), 'fixed file should exist');
-ok(( -e 'fixed_input_files/query_3.gff'), 'fixed file should exist');
+ok(!( -e 'fixed_input_files/query_3.gff'), 'fixed file should exist');
compare_ok('fixed_input_files/query_2.gff','t/data/reformat_input_gffs/expected_fixed_query_2.gff', 'fixed file should have expected changes');
-compare_ok('fixed_input_files/query_3.gff', 't/data/reformat_input_gffs/expected_fixed_query_3.gff', 'fixed file should have expected changes');
remove_tree('fixed_input_files');
ok($obj = Bio::Roary::ReformatInputGFFs->new(gff_files => ['t/data/reformat_input_gffs/real_1.gff']), 'initialise with 1 gff that has shown to have a bug');
-ok(my $fixed_file = $obj->_add_suffix_to_gene_ids_and_return_new_file('t/data/reformat_input_gffs/real_1.gff'), 'fix duplicates');
+ok(my $fixed_file = $obj->_add_suffix_to_gene_ids_and_return_new_file('t/data/reformat_input_gffs/real_1.gff', 'id__'), 'fix duplicates');
ok(( -e 'fixed_input_files/real_1.gff'), 'fixed file should exist');
compare_ok('fixed_input_files/real_1.gff', 't/data/reformat_input_gffs/expected_real_1.gff', 'fixed file should have expected changes');
remove_tree('fixed_input_files');
diff --git a/t/data/expected_core_gene_alignment_core0.66.aln b/t/data/expected_core_gene_alignment_core0.66.aln
index e542299..9309965 100644
--- a/t/data/expected_core_gene_alignment_core0.66.aln
+++ b/t/data/expected_core_gene_alignment_core0.66.aln
@@ -3,4 +3,4 @@ TTTTT
>query_2
GGGGG
>query_3
-NNNNN
+-----
diff --git a/t/data/overall_gene_presence_absence.csv b/t/data/overall_gene_presence_absence.csv
index d828265..fd16e85 100644
--- a/t/data/overall_gene_presence_absence.csv
+++ b/t/data/overall_gene_presence_absence.csv
@@ -1,22 +1,22 @@
"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","query_1","query_2","query_5"
-"hly","","Alpha-toxin","2","2","1","1","1","1","7","","959","959","959","1_1","2_1___1",""
-"group_10","","hypothetical protein","2","2","1","1","10","1","6","","227","227","227","abc_00010","abc_00010___10",""
-"group_11","","C4-dicarboxylate transporter/malic acid transport protein","2","2","1","1","11","1","5","","947","947","947","abc_00011","abc_00011___11",""
-"group_12","","hypothetical protein","2","2","1","1","12","1","4","","188","188","188","abc_00012","abc_00012___12",""
-"group_13","","Gonococcal growth inhibitor III","2","2","1","1","13","1","3","","134","134","134","abc_00014","abc_00014___14",""
-"group_14","","Gonococcal growth inhibitor III","2","2","1","1","14","1","2","","134","134","134","1_6","2_7___15",""
-"yfnB","","Putative HAD-hydrolase yfnB","2","2","1","1","15","1","1","","686","686","686","abc_00016","abc_00016___16",""
-"group_2","","hypothetical protein","2","2","1","1","2","1","8","","146","146","146","abc_00002","abc_00002___2",""
-"group_3","","hypothetical protein","2","2","1","1","3","1","9","","197","197","197","abc_00003","abc_00003___3",""
-"group_4","","superantigen-like protein","2","2","1","1","4","1","10","","716","716","716","abc_00004","abc_00004___4",""
-"speH","","hypothetical protein","2","2","1","1","5","1","11","","725","725","725","1_2","2_2___5",""
-"group_6","","superantigen-like protein","2","2","1","1","6","1","12","","725","725","725","abc_00006","abc_00006___6",""
-"argF","","Ornithine carbamoyltransferase","2","2","1","1","7","1","13","","1001","1001","1001","1_3","2_3___7",""
-"arcC1","","Carbamate kinase 1","2","2","1","1","8","1","14","","935","935","935","abc_00008","abc_00008___8",""
-"group_9","","16S ribosomal RNA","2","2","1","1","9","1","15","","1556","1556","1556","abc_01705","abc_01705___9",""
-"group_16","","hypothetical protein","1","1","1","2","1","2","6","","146","146","146","","","abc_50002"
-"group_17","argF","Ornithine carbamoyltransferase","1","1","1","2","6","2","5","","1001","1001","1001","","","3_3"
-"group_18","","hypothetical protein","1","1","1","2","5","2","4","","227","227","227","","","abc_50010"
-"group_19","","hypothetical protein","1","1","1","2","4","2","3","","188","188","188","","","abc_50012"
-"group_20","","Gonococcal growth inhibitor III","1","1","1","2","3","2","2","","134","134","134","","","abc_50014"
-"group_21","yfnB","Putative HAD-hydrolase yfnB","1","1","1","2","2","2","1","","686","686","686","","","3_5"
+"hly","","Alpha-toxin","2","2","1","1","14","1","1","","959","959","959","1_1","05a85fcc1cbac7027ac3689992006154_1",""
+"group_10","","hypothetical protein","2","2","1","1","6","1","11","","227","227","227","abc_00010","05a85fcc1cbac7027ac3689992006154_10",""
+"group_11","","C4-dicarboxylate transporter/malic acid transport protein","2","2","1","1","5","1","10","","947","947","947","abc_00011","05a85fcc1cbac7027ac3689992006154_11",""
+"group_12","","hypothetical protein","2","2","1","1","4","1","9","","188","188","188","abc_00012","05a85fcc1cbac7027ac3689992006154_12",""
+"group_13","","Gonococcal growth inhibitor III","2","2","1","1","3","1","8","","134","134","134","abc_00014","05a85fcc1cbac7027ac3689992006154_14",""
+"group_14","","Gonococcal growth inhibitor III","2","2","1","1","2","1","7","","134","134","134","1_6","05a85fcc1cbac7027ac3689992006154_15",""
+"yfnB","","Putative HAD-hydrolase yfnB","2","2","1","1","1","1","6","","686","686","686","abc_00016","05a85fcc1cbac7027ac3689992006154_16",""
+"group_2","","hypothetical protein","2","2","1","1","15","1","2","","146","146","146","abc_00002","05a85fcc1cbac7027ac3689992006154_2",""
+"group_3","","hypothetical protein","2","2","1","1","13","1","3","","197","197","197","abc_00003","05a85fcc1cbac7027ac3689992006154_3",""
+"group_4","","superantigen-like protein","2","2","1","1","12","1","4","","716","716","716","abc_00004","05a85fcc1cbac7027ac3689992006154_4",""
+"speH","","hypothetical protein","2","2","1","1","11","1","5","","725","725","725","1_2","05a85fcc1cbac7027ac3689992006154_5",""
+"group_6","","superantigen-like protein","2","2","1","1","10","1","15","","725","725","725","abc_00006","05a85fcc1cbac7027ac3689992006154_6",""
+"argF","","Ornithine carbamoyltransferase","2","2","1","1","9","1","14","","1001","1001","1001","1_3","05a85fcc1cbac7027ac3689992006154_7",""
+"arcC1","","Carbamate kinase 1","2","2","1","1","8","1","13","","935","935","935","abc_00008","05a85fcc1cbac7027ac3689992006154_8",""
+"group_9","","16S ribosomal RNA","2","2","1","1","7","1","12","","1556","1556","1556","abc_01705","05a85fcc1cbac7027ac3689992006154_9",""
+"group_16","","hypothetical protein","1","1","1","2","6","2","3","","146","146","146","","","abc_50002"
+"group_17","argF","Ornithine carbamoyltransferase","1","1","1","2","5","2","4","","1001","1001","1001","","","3_3"
+"group_18","","hypothetical protein","1","1","1","2","4","2","5","","227","227","227","","","abc_50010"
+"group_19","","hypothetical protein","1","1","1","2","3","2","6","","188","188","188","","","abc_50012"
+"group_20","","Gonococcal growth inhibitor III","1","1","1","2","2","2","2","","134","134","134","","","abc_50014"
+"group_21","yfnB","Putative HAD-hydrolase yfnB","1","1","1","2","1","2","1","","686","686","686","","","3_5"
diff --git a/t/data/reformat_input_gffs/expected_fixed_query_2.gff b/t/data/reformat_input_gffs/expected_fixed_query_2.gff
index 1cabef6..a425bb4 100644
--- a/t/data/reformat_input_gffs/expected_fixed_query_2.gff
+++ b/t/data/reformat_input_gffs/expected_fixed_query_2.gff
@@ -1,10 +1,10 @@
##gff-version 3
##sequence-region abc|SC|contig000001 1 15000
-abc|SC|contig000001 Prodigal:2.60 CDS 172 1131 . - 0 ID=1_1___1;gene=different;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
-abc|SC|contig000001 Prodigal:2.60 CDS 1804 1950 . + 0 ID=abc_00002___2;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
-abc|SC|contig000001 Prodigal:2.60 CDS 1934 2131 . + 0 inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;ID=abc_00003___3;product=hypothetical protein;protein_id=gnl|SC|abc_00003
-abc|SC|contig000001 Prodigal:2.60 CDS 2621 3337 . - 0 ID=abc_00004___4;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
-abc|SC|contig000001 Prodigal:2.60 CDS 3445 4170 . - 0 gene=speH;ID=1_2___5;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+abc|SC|contig000001 Prodigal:2.60 CDS 172 1131 . - 0 ID=5d3897f59edf296200f1c7de895509e1_1;gene=different;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001 Prodigal:2.60 CDS 1804 1950 . + 0 ID=5d3897f59edf296200f1c7de895509e1_2;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001 Prodigal:2.60 CDS 1934 2131 . + 0 inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;ID=5d3897f59edf296200f1c7de895509e1_3;product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001 Prodigal:2.60 CDS 2621 3337 . - 0 ID=5d3897f59edf296200f1c7de895509e1_4;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001 Prodigal:2.60 CDS 3445 4170 . - 0 gene=speH;ID=5d3897f59edf296200f1c7de895509e1_5;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
##FASTA
>abc|SC|contig000001
ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/roary.git
More information about the debian-med-commit
mailing list