[med-svn] [roary] 01/04: New upstream version 3.9.0+dfsg
Sascha Steinbiss
satta at debian.org
Thu Aug 10 19:34:25 UTC 2017
This is an automated email from the git hooks/post-receive script.
satta pushed a commit to branch master
in repository roary.
commit ae627224f3f022731407e4700c986d6702680a5f
Author: Sascha Steinbiss <satta at debian.org>
Date: Thu Aug 10 21:25:02 2017 +0200
New upstream version 3.9.0+dfsg
---
README.md | 20 ++++-
bin/roary-unique_genes_per_sample | 19 +++++
dist.ini | 2 +-
lib/Bio/Roary/CommandLine/UniqueGenesPerSample.pm | 92 ++++++++++++++++++++++
lib/Bio/Roary/UniqueGenesPerSample.pm | 80 +++++++++++++++++++
t/Bio/Roary/UniqueGenesPerSample.t | 37 +++++++++
.../clustered_proteins_valid | 6 ++
.../expected_unique_genes_per_sample.tsv | 4 +
8 files changed, 257 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index a0758a8..e65364c 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,8 @@ Roary is a high speed stand alone pan genome pipeline, which takes annotated ass
Theres are a number of dependancies required for Roary, with instructions specific to the type of system you have:
* Ubuntu/Debian
* CentOS/RedHat
+* Bioconda - OSX/Linux
+* Galaxy
* Homebrew/Linuxbrew - OSX/Linux
* Guix - Linux
* Virtual Machine - OSX/Linux/Windows
@@ -43,6 +45,20 @@ Some of the software versions in apt are quite old so follow the instructions fo
## CentOS/RedHat
To install the dependancies, the easiest way is to install [LinuxBrew](http://brew.sh/linuxbrew/) using the steps for Fedora, then follow the steps below for installing Roary on LinuxBrew.
+## Bioconda - OSX/Linux
+Install conda. Then install bioconda and roary:
+
+```
+conda config --add channels r
+conda config --add channels defaults
+conda config --add channels conda-forge
+conda config --add channels bioconda
+conda install roary
+```
+
+## Galaxy
+Roary is available from the Galaxy toolshed ( as is Prokka).
+
## Homebrew/Linuxbrew - OSX/Linux
Assuming you have [homebrew](http://brew.sh/) (OSX) or [linuxbrew](http://brew.sh/linuxbrew/) (Linux) setup and installed on your system:
@@ -64,7 +80,7 @@ Roary wont run natively on Windows but we have created virtual machine which has
ftp://ftp.sanger.ac.uk/pub/pathogens/pathogens-vm/pathogens-vm.latest.ova
-More importantly though, if your trying to do bioinformatics on Windows, your not going to get very far and you should seriously consider upgrading to Linux.
+More importantly though, if you're trying to do bioinformatics on Windows, you're not going to get very far and you should seriously consider upgrading to Linux.
## Docker - OSX/Linux/Windows/Cloud
We have a docker container which gets automatically built from the latest version of Roary in Debian Med. To install it:
@@ -107,7 +123,7 @@ bedtools cd-hit blast mcl GNUparallel prank mafft fasttree
```
## Ancient systems and versions of perl
-The code will not work with perl 5.8 or below (pre-modern perl). We no longer test against 5.10 (released 2007). If your running a very old verison of Linux, your also in trouble.
+The code will not work with perl 5.8 or below (pre-modern perl). We no longer test against 5.10 (released 2007). If you're running a very old verison of Linux, you're also in trouble.
# Versions of software we test against
* Perl 5.14, 5.16, 5.20, 5.24
diff --git a/bin/roary-unique_genes_per_sample b/bin/roary-unique_genes_per_sample
new file mode 100755
index 0000000..bb926b8
--- /dev/null
+++ b/bin/roary-unique_genes_per_sample
@@ -0,0 +1,19 @@
+#!/usr/bin/env perl
+
+package Bio::Roary::Main::UniqueGenesPerSample;
+
+# ABSTRACT: Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
+# PODNAME: roary-unique_genes_per_sample
+
+=head1 SYNOPSIS
+
+Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
+
+=cut
+
+use Cwd qw(abs_path);
+BEGIN { unshift( @INC, abs_path('./lib') ) }
+BEGIN { unshift( @INC, abs_path('./t/lib') ) }
+use Bio::Roary::CommandLine::UniqueGenesPerSample;
+
+Bio::Roary::CommandLine::UniqueGenesPerSample->new(args => \@ARGV, script_name => $0)->run;
diff --git a/dist.ini b/dist.ini
index 5d2900f..39fc6d2 100644
--- a/dist.ini
+++ b/dist.ini
@@ -1,5 +1,5 @@
name = Bio-Roary
-version = 3.8.2
+version = 3.9.0
author = Andrew J. Page <ap13 at sanger.ac.uk>
license = GPL_3
copyright_holder = Wellcome Trust Sanger Institute
diff --git a/lib/Bio/Roary/CommandLine/UniqueGenesPerSample.pm b/lib/Bio/Roary/CommandLine/UniqueGenesPerSample.pm
new file mode 100644
index 0000000..5a8fd6c
--- /dev/null
+++ b/lib/Bio/Roary/CommandLine/UniqueGenesPerSample.pm
@@ -0,0 +1,92 @@
+undef $VERSION;
+
+package Bio::Roary::CommandLine::UniqueGenesPerSample;
+
+# ABSTRACT: Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
+
+=head1 SYNOPSIS
+
+Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
+
+=cut
+
+use Moose;
+use Getopt::Long qw(GetOptionsFromArray);
+use Bio::Roary::UniqueGenesPerSample;
+
+extends 'Bio::Roary::CommandLine::Common';
+
+has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 );
+has 'help' => ( is => 'rw', isa => 'Bool', default => 0 );
+
+has 'clustered_proteins' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins' );
+has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'unique_genes_per_sample.tsv' );
+has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
+has '_error_message' => ( is => 'rw', isa => 'Str' );
+
+sub BUILD {
+ my ($self) = @_;
+
+ my ( $clustered_proteins, $output_filename, $verbose, $help );
+
+ GetOptionsFromArray(
+ $self->args,
+ 'o|output=s' => \$output_filename,
+ 'c|clustered_proteins=s' => \$clustered_proteins,
+ 'v|verbose' => \$verbose,
+ 'h|help' => \$help,
+ );
+
+ if ( defined($verbose) ) {
+ $self->verbose($verbose);
+ $self->logger->level(10000);
+ }
+
+ $self->help($help) if ( defined($help) );
+ ( !$self->help ) or die $self->usage_text;
+
+ $self->output_filename($output_filename) if ( defined($output_filename) );
+ if ( defined($clustered_proteins) && ( -e $clustered_proteins ) ) {
+ $self->clustered_proteins($clustered_proteins);
+ }
+ else {
+ $self->_error_message("Error: Cant access the clustered proteins file");
+ }
+}
+
+sub run {
+ my ($self) = @_;
+
+ if ( defined( $self->_error_message ) ) {
+ print $self->_error_message . "\n";
+ die $self->usage_text;
+ }
+
+ my $obj = Bio::Roary::UniqueGenesPerSample->new(
+ clustered_proteins => $self->clustered_proteins,
+ output_filename => $self->output_filename,
+ );
+ $obj->write_unique_frequency;
+
+}
+
+sub usage_text {
+ my ($self) = @_;
+
+ return <<USAGE;
+Usage: roary-unique_genes_per_sample [options] -c clustered_proteins
+Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
+
+Options: -o STR output filename [unique_genes_per_sample.tsv]
+ -c STR clusters filename [clustered_proteins]
+ -v verbose output to STDOUT
+ -h this help message
+
+For further info see: http://sanger-pathogens.github.io/Roary/
+USAGE
+}
+
+__PACKAGE__->meta->make_immutable;
+no Moose;
+1;
diff --git a/lib/Bio/Roary/UniqueGenesPerSample.pm b/lib/Bio/Roary/UniqueGenesPerSample.pm
new file mode 100644
index 0000000..86ed762
--- /dev/null
+++ b/lib/Bio/Roary/UniqueGenesPerSample.pm
@@ -0,0 +1,80 @@
+package Bio::Roary::UniqueGenesPerSample;
+
+# ABSTRACT: Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
+
+=head1 SYNOPSIS
+
+Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
+ use Bio::Roary::UniqueGenesPerSample;
+
+ my $obj = Bio::Roary::SequenceLengths->new(
+ clustered_proteins => 'clustered_proteins',
+ output_filename => 'output_filename',
+ );
+ $obj->write_unique_frequency;
+
+=cut
+
+use Moose;
+use Bio::Roary::Exceptions;
+
+has 'clustered_proteins' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins' );
+has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'unique_genes_per_sample.tsv' );
+
+has '_output_fh' => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
+
+sub _build__output_fh {
+ my ($self) = @_;
+ open( my $fh, '>', $self->output_filename )
+ or Bio::Roary::Exceptions::CouldntWriteToFile->throw( error => "Couldnt write output file:" . $self->output_filename );
+ return $fh;
+}
+
+#group_17585: 14520_6#21_00645
+sub _sample_to_gene_freq {
+ my ($self) = @_;
+
+ open( my $input_fh, $self->clustered_proteins )
+ or Bio::Roary::Exceptions::FileNotFound->throw( error => "Couldnt read input file:" . $self->clustered_proteins );
+
+ my %sample_to_gene_freq;
+ while (<$input_fh>) {
+ chomp;
+ my $line = $_;
+ next if ( length( $line ) < 6 );
+ if ( $line =~ /^.+: ([^\s]+)$/ ) {
+ my $gene_id = $1;
+ if ( $gene_id =~ /^(.+)_[\d]+$/ ) {
+ my $sample_name = $1;
+ $sample_to_gene_freq{$sample_name}++;
+ }
+ else {
+ # gene id may not be valid so ignore
+ next;
+ }
+ }
+ else {
+ # its either an invalid line or theres more than 1 gene in the cluster
+ next;
+ }
+ }
+
+ return \%sample_to_gene_freq;
+}
+
+sub write_unique_frequency {
+ my ($self) = @_;
+
+ my %sample_to_gene_freq = %{$self->_sample_to_gene_freq};
+
+ for my $sample ( sort { $sample_to_gene_freq{$b} <=> $sample_to_gene_freq{$a} || $a cmp $b } keys %sample_to_gene_freq ) {
+ print { $self->_output_fh } $sample . "\t" . $sample_to_gene_freq{$sample} . "\n";
+ }
+ close($self->_output_fh);
+ return 1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/t/Bio/Roary/UniqueGenesPerSample.t b/t/Bio/Roary/UniqueGenesPerSample.t
new file mode 100755
index 0000000..8cfcf49
--- /dev/null
+++ b/t/Bio/Roary/UniqueGenesPerSample.t
@@ -0,0 +1,37 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Test::Files;
+use Data::Dumper;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+ use Test::Most;
+ use_ok('Bio::Roary::UniqueGenesPerSample');
+}
+
+ok(
+ my $obj = Bio::Roary::UniqueGenesPerSample->new(
+ clustered_proteins => 't/data/unique_genes_per_sample/clustered_proteins_valid',
+ ),
+ 'Initialise object'
+);
+
+is_deeply($obj->_sample_to_gene_freq, {
+ '11111_4#44' => 1,
+ '123_4#5' => 2,
+ '999_4#5' => 1,
+ '22222_6#21' => 1
+ }, 'sample frequencies');
+
+
+ok($obj->write_unique_frequency, 'create output file');
+ok(-e $obj->output_filename, 'output file exists');
+
+compare_ok($obj->output_filename, 't/data/unique_genes_per_sample/expected_unique_genes_per_sample.tsv', 'got expected unique gene frequency');
+
+unlink($obj->output_filename);
+
+done_testing();
diff --git a/t/data/unique_genes_per_sample/clustered_proteins_valid b/t/data/unique_genes_per_sample/clustered_proteins_valid
new file mode 100644
index 0000000..3fc178d
--- /dev/null
+++ b/t/data/unique_genes_per_sample/clustered_proteins_valid
@@ -0,0 +1,6 @@
+group_2: 123_4#5_02659 999_4#5_02659
+group_2: 123_4#5_02654
+group_8: 999_4#5_02651
+group_7: 123_4#5_02674
+nagK: 11111_4#44_01973
+dnaA: 22222_6#21_00645
diff --git a/t/data/unique_genes_per_sample/expected_unique_genes_per_sample.tsv b/t/data/unique_genes_per_sample/expected_unique_genes_per_sample.tsv
new file mode 100644
index 0000000..2f9bfc9
--- /dev/null
+++ b/t/data/unique_genes_per_sample/expected_unique_genes_per_sample.tsv
@@ -0,0 +1,4 @@
+123_4#5 2
+11111_4#44 1
+22222_6#21 1
+999_4#5 1
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/roary.git
More information about the debian-med-commit
mailing list