[med-svn] [roary] 01/04: New upstream version 3.9.0+dfsg

Sascha Steinbiss satta at debian.org
Thu Aug 10 19:34:25 UTC 2017


This is an automated email from the git hooks/post-receive script.

satta pushed a commit to branch master
in repository roary.

commit ae627224f3f022731407e4700c986d6702680a5f
Author: Sascha Steinbiss <satta at debian.org>
Date:   Thu Aug 10 21:25:02 2017 +0200

    New upstream version 3.9.0+dfsg
---
 README.md                                          | 20 ++++-
 bin/roary-unique_genes_per_sample                  | 19 +++++
 dist.ini                                           |  2 +-
 lib/Bio/Roary/CommandLine/UniqueGenesPerSample.pm  | 92 ++++++++++++++++++++++
 lib/Bio/Roary/UniqueGenesPerSample.pm              | 80 +++++++++++++++++++
 t/Bio/Roary/UniqueGenesPerSample.t                 | 37 +++++++++
 .../clustered_proteins_valid                       |  6 ++
 .../expected_unique_genes_per_sample.tsv           |  4 +
 8 files changed, 257 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index a0758a8..e65364c 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,8 @@ Roary is a high speed stand alone pan genome pipeline, which takes annotated ass
 Theres are a number of dependancies required for Roary, with instructions specific to the type of system you have:
 * Ubuntu/Debian
 * CentOS/RedHat
+* Bioconda - OSX/Linux
+* Galaxy
 * Homebrew/Linuxbrew - OSX/Linux
 * Guix - Linux
 * Virtual Machine - OSX/Linux/Windows
@@ -43,6 +45,20 @@ Some of the software versions in apt are quite old so follow the instructions fo
 ## CentOS/RedHat
 To install the dependancies, the easiest way is to install [LinuxBrew](http://brew.sh/linuxbrew/) using the steps for Fedora, then follow the steps below for installing Roary on LinuxBrew.
 
+## Bioconda - OSX/Linux
+Install conda. Then install bioconda and roary:
+
+```
+conda config --add channels r
+conda config --add channels defaults
+conda config --add channels conda-forge
+conda config --add channels bioconda
+conda install roary
+```
+
+## Galaxy
+Roary is available from the Galaxy toolshed ( as is Prokka).
+
 ## Homebrew/Linuxbrew - OSX/Linux
 Assuming you have [homebrew](http://brew.sh/) (OSX) or [linuxbrew](http://brew.sh/linuxbrew/) (Linux) setup and installed on your system:
 
@@ -64,7 +80,7 @@ Roary wont run natively on Windows but we have created virtual machine which has
 
 ftp://ftp.sanger.ac.uk/pub/pathogens/pathogens-vm/pathogens-vm.latest.ova
 
-More importantly though, if your trying to do bioinformatics on Windows, your not going to get very far and you should seriously consider upgrading to Linux.
+More importantly though, if you're trying to do bioinformatics on Windows, you're not going to get very far and you should seriously consider upgrading to Linux.
 
 ## Docker - OSX/Linux/Windows/Cloud
 We have a docker container which gets automatically built from the latest version of Roary in Debian Med. To install it:
@@ -107,7 +123,7 @@ bedtools cd-hit blast mcl GNUparallel prank mafft fasttree
 ```
 
 ## Ancient systems and versions of perl
-The code will not work with perl 5.8 or below (pre-modern perl). We no longer test against 5.10 (released 2007). If your running a very old verison of Linux, your also in trouble.
+The code will not work with perl 5.8 or below (pre-modern perl). We no longer test against 5.10 (released 2007). If you're running a very old verison of Linux, you're also in trouble.
 
 # Versions of software we test against
 * Perl 5.14, 5.16, 5.20, 5.24
diff --git a/bin/roary-unique_genes_per_sample b/bin/roary-unique_genes_per_sample
new file mode 100755
index 0000000..bb926b8
--- /dev/null
+++ b/bin/roary-unique_genes_per_sample
@@ -0,0 +1,19 @@
+#!/usr/bin/env perl
+
+package Bio::Roary::Main::UniqueGenesPerSample;
+
+# ABSTRACT: Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
+# PODNAME: roary-unique_genes_per_sample
+
+=head1 SYNOPSIS
+
+Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
+
+=cut
+
+use Cwd qw(abs_path); 
+BEGIN { unshift( @INC, abs_path('./lib') ) }
+BEGIN { unshift( @INC, abs_path('./t/lib') ) }
+use Bio::Roary::CommandLine::UniqueGenesPerSample;
+
+Bio::Roary::CommandLine::UniqueGenesPerSample->new(args => \@ARGV, script_name => $0)->run;
diff --git a/dist.ini b/dist.ini
index 5d2900f..39fc6d2 100644
--- a/dist.ini
+++ b/dist.ini
@@ -1,5 +1,5 @@
 name    = Bio-Roary
-version = 3.8.2
+version = 3.9.0
 author  = Andrew J. Page <ap13 at sanger.ac.uk>
 license = GPL_3
 copyright_holder = Wellcome Trust Sanger Institute
diff --git a/lib/Bio/Roary/CommandLine/UniqueGenesPerSample.pm b/lib/Bio/Roary/CommandLine/UniqueGenesPerSample.pm
new file mode 100644
index 0000000..5a8fd6c
--- /dev/null
+++ b/lib/Bio/Roary/CommandLine/UniqueGenesPerSample.pm
@@ -0,0 +1,92 @@
+undef $VERSION;
+
+package Bio::Roary::CommandLine::UniqueGenesPerSample;
+
+# ABSTRACT: Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
+
+=head1 SYNOPSIS
+
+Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
+
+=cut
+
+use Moose;
+use Getopt::Long qw(GetOptionsFromArray);
+use Bio::Roary::UniqueGenesPerSample;
+
+extends 'Bio::Roary::CommandLine::Common';
+
+has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );
+
+has 'clustered_proteins' => ( is => 'rw', isa => 'Str',  default => 'clustered_proteins' );
+has 'output_filename'    => ( is => 'rw', isa => 'Str',  default => 'unique_genes_per_sample.tsv' );
+has 'verbose'            => ( is => 'rw', isa => 'Bool', default => 0 );
+has '_error_message'     => ( is => 'rw', isa => 'Str' );
+
+sub BUILD {
+    my ($self) = @_;
+
+    my ( $clustered_proteins, $output_filename, $verbose, $help );
+
+    GetOptionsFromArray(
+        $self->args,
+        'o|output=s'             => \$output_filename,
+        'c|clustered_proteins=s' => \$clustered_proteins,
+        'v|verbose'              => \$verbose,
+        'h|help'                 => \$help,
+    );
+
+    if ( defined($verbose) ) {
+        $self->verbose($verbose);
+        $self->logger->level(10000);
+    }
+
+    $self->help($help) if ( defined($help) );
+    ( !$self->help ) or die $self->usage_text;
+
+    $self->output_filename($output_filename) if ( defined($output_filename) );
+    if ( defined($clustered_proteins) && ( -e $clustered_proteins ) ) {
+        $self->clustered_proteins($clustered_proteins);
+    }
+    else {
+        $self->_error_message("Error: Cant access the clustered proteins file");
+    }
+}
+
+sub run {
+    my ($self) = @_;
+
+    if ( defined( $self->_error_message ) ) {
+        print $self->_error_message . "\n";
+        die $self->usage_text;
+    }
+
+    my $obj = Bio::Roary::UniqueGenesPerSample->new(
+        clustered_proteins  => $self->clustered_proteins,
+        output_filename => $self->output_filename,
+    );
+    $obj->write_unique_frequency;
+
+}
+
+sub usage_text {
+    my ($self) = @_;
+
+    return <<USAGE;
+Usage: roary-unique_genes_per_sample [options] -c clustered_proteins
+Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
+
+Options: -o STR output filename [unique_genes_per_sample.tsv]
+         -c STR clusters filename [clustered_proteins]
+         -v     verbose output to STDOUT
+         -h     this help message
+
+For further info see: http://sanger-pathogens.github.io/Roary/
+USAGE
+}
+
+__PACKAGE__->meta->make_immutable;
+no Moose;
+1;
diff --git a/lib/Bio/Roary/UniqueGenesPerSample.pm b/lib/Bio/Roary/UniqueGenesPerSample.pm
new file mode 100644
index 0000000..86ed762
--- /dev/null
+++ b/lib/Bio/Roary/UniqueGenesPerSample.pm
@@ -0,0 +1,80 @@
+package Bio::Roary::UniqueGenesPerSample;
+
+# ABSTRACT:  Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
+
+=head1 SYNOPSIS
+
+Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
+   use Bio::Roary::UniqueGenesPerSample;
+   
+   my $obj = Bio::Roary::SequenceLengths->new(
+     clustered_proteins   => 'clustered_proteins',
+     output_filename   => 'output_filename',
+   );
+   $obj->write_unique_frequency;
+
+=cut
+
+use Moose;
+use Bio::Roary::Exceptions;
+
+has 'clustered_proteins' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins' );
+has 'output_filename'    => ( is => 'rw', isa => 'Str', default => 'unique_genes_per_sample.tsv' );
+
+has '_output_fh' => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
+
+sub _build__output_fh {
+    my ($self) = @_;
+    open( my $fh, '>', $self->output_filename )
+      or Bio::Roary::Exceptions::CouldntWriteToFile->throw( error => "Couldnt write output file:" . $self->output_filename );
+    return $fh;
+}
+
+#group_17585: 14520_6#21_00645
+sub _sample_to_gene_freq {
+    my ($self) = @_;
+
+    open( my $input_fh, $self->clustered_proteins )
+      or Bio::Roary::Exceptions::FileNotFound->throw( error => "Couldnt read input file:" . $self->clustered_proteins );
+
+    my %sample_to_gene_freq;
+    while (<$input_fh>) {
+        chomp;
+        my $line = $_;
+        next if ( length( $line ) < 6 );
+        if ( $line =~ /^.+: ([^\s]+)$/ ) {
+            my $gene_id = $1;
+            if ( $gene_id =~ /^(.+)_[\d]+$/ ) {
+                my $sample_name = $1;
+                $sample_to_gene_freq{$sample_name}++;
+            }
+            else {
+                # gene id may not be valid so ignore
+                next;
+            }
+        }
+        else {
+            # its either an invalid line or theres more than 1 gene in the cluster
+            next;
+        }
+    }
+
+    return \%sample_to_gene_freq;
+}
+
+sub write_unique_frequency {
+    my ($self) = @_;
+
+    my %sample_to_gene_freq = %{$self->_sample_to_gene_freq};
+	
+    for my $sample ( sort { $sample_to_gene_freq{$b} <=> $sample_to_gene_freq{$a}  || $a cmp $b } keys %sample_to_gene_freq ) {
+        print { $self->_output_fh } $sample . "\t" . $sample_to_gene_freq{$sample} . "\n";
+    }
+	close($self->_output_fh);
+	return 1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/t/Bio/Roary/UniqueGenesPerSample.t b/t/Bio/Roary/UniqueGenesPerSample.t
new file mode 100755
index 0000000..8cfcf49
--- /dev/null
+++ b/t/Bio/Roary/UniqueGenesPerSample.t
@@ -0,0 +1,37 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Test::Files;
+use Data::Dumper;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::UniqueGenesPerSample');
+}
+
+ok(
+    my $obj = Bio::Roary::UniqueGenesPerSample->new(
+        clustered_proteins => 't/data/unique_genes_per_sample/clustered_proteins_valid',
+    ),
+    'Initialise object'
+);
+
+is_deeply($obj->_sample_to_gene_freq, {
+          '11111_4#44' => 1,
+          '123_4#5' => 2,
+          '999_4#5' => 1,
+          '22222_6#21' => 1
+        }, 'sample frequencies');
+
+
+ok($obj->write_unique_frequency, 'create output file');
+ok(-e $obj->output_filename, 'output file exists');
+
+compare_ok($obj->output_filename, 't/data/unique_genes_per_sample/expected_unique_genes_per_sample.tsv', 'got expected unique gene frequency');
+
+unlink($obj->output_filename);
+
+done_testing();
diff --git a/t/data/unique_genes_per_sample/clustered_proteins_valid b/t/data/unique_genes_per_sample/clustered_proteins_valid
new file mode 100644
index 0000000..3fc178d
--- /dev/null
+++ b/t/data/unique_genes_per_sample/clustered_proteins_valid
@@ -0,0 +1,6 @@
+group_2: 123_4#5_02659 999_4#5_02659
+group_2: 123_4#5_02654
+group_8: 999_4#5_02651
+group_7: 123_4#5_02674
+nagK: 11111_4#44_01973
+dnaA: 22222_6#21_00645
diff --git a/t/data/unique_genes_per_sample/expected_unique_genes_per_sample.tsv b/t/data/unique_genes_per_sample/expected_unique_genes_per_sample.tsv
new file mode 100644
index 0000000..2f9bfc9
--- /dev/null
+++ b/t/data/unique_genes_per_sample/expected_unique_genes_per_sample.tsv
@@ -0,0 +1,4 @@
+123_4#5	2
+11111_4#44	1
+22222_6#21	1
+999_4#5	1

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/roary.git



More information about the debian-med-commit mailing list