[med-svn] [Git][med-team/allelecount][upstream] New upstream version 4.3.0

Andreas Tille (@tille) gitlab at salsa.debian.org
Thu Oct 7 21:06:34 BST 2021



Andreas Tille pushed to branch upstream at Debian Med / allelecount


Commits:
60b41071 by Andreas Tille at 2021-09-10T09:45:17+02:00
New upstream version 4.3.0
- - - - -


9 changed files:

- CHANGES.md
- perl/Makefile.PL
- + perl/bin/alleleCounterToJson.pl
- perl/lib/Sanger/CGP/AlleleCount.pm
- + perl/lib/Sanger/CGP/AlleleCount/ToJson.pm
- perl/t/2_pl_compile.t
- + perl/t/tojson.t
- + testData/test_ac_out.txt
- + testData/test_loci.txt


Changes:

=====================================
CHANGES.md
=====================================
@@ -1,5 +1,9 @@
 # CHANGES
 
+## v4.3.0
+
+* Add script to convert allelecount output to JSON
+
 ## v4.2.1
 
 * Update so docker and native install use same install scripts behind the scenes


=====================================
perl/Makefile.PL
=====================================
@@ -28,7 +28,7 @@ WriteMakefile(
   NAME          => 'alleleCount',
   LICENSE       => 'agpl_3', # http://search.cpan.org/~dagolden/CPAN-Meta-2.142690/lib/CPAN/Meta/Spec.pm#license
   VERSION_FROM  => 'lib/Sanger/CGP/AlleleCount.pm',
-  EXE_FILES     => [qw( bin/alleleCounter.pl )],
+  EXE_FILES     => [qw( bin/alleleCounter.pl bin/alleleCounterToJson.pl )],
   PREREQ_PM     => {
                      'Const::Fast' => 0.014,
                      'Try::Tiny' => 0.19,
@@ -38,5 +38,6 @@ WriteMakefile(
                      'Devel::Cover' => 1.09,
                      'Pod::Coverage' => 0.23,
                      'IPC::System::Simple' => 1.25,
+                     'JSON' =>  2.90,
                    }
 );


=====================================
perl/bin/alleleCounterToJson.pl
=====================================
@@ -0,0 +1,99 @@
+#!/usr/bin/perl
+
+##########LICENCE##########
+# Copyright (c) 2014-2021 Genome Research Ltd.
+#
+# Author: CASM/Cancer IT <cgphelp at sanger.ac.uk>
+#
+# This file is part of alleleCount.
+#
+# alleleCount is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+##########LICENCE##########
+
+use strict;
+use Carp;
+use English qw( -no_match_vars );
+use warnings FATAL => 'all';
+
+use Getopt::Long 'GetOptions';
+use Pod::Usage;
+
+use Sanger::CGP::AlleleCount;
+use Sanger::CGP::AlleleCount::ToJson;
+
+{
+  my $options = option_builder();
+  $options->{'o'} = '/dev/stdout' unless(defined $options->{'o'});
+  run($options);
+}
+
+sub run {
+  my ($options) = @_;
+  my $json_string = Sanger::CGP::AlleleCount::ToJson::alleleCountToJson($options->{'a'}, $options->{'l'});
+  my $OUT;
+  if($options->{'o'}){
+    open($OUT, '>', $options->{'o'}) or croak("Error opening file for output: $!");
+  }
+  print $OUT "$json_string";
+  if($options->{'o'}){ 
+    close($OUT) or croak("Error closing output file for JSON conversion: $!");
+  }
+}
+
+
+sub option_builder {
+  my ($factory) = @_;
+
+  my %opts;
+
+  &GetOptions (
+        'h|help'    => \$opts{'h'},
+        'l|locus-file=s' => \$opts{'l'},
+        'a|allelecount-file=s' => \$opts{'a'},
+        'o|output-file:s' => \$opts{'o'},
+        'v|version'   => \$opts{'v'},
+  );
+
+  pod2usage(0) if($opts{'h'});
+  if($opts{'v'}){
+    print Sanger::CGP::AlleleCount->VERSION."\n";
+    exit;
+  }
+  pod2usage(1) if(!$opts{'l'} || !$opts{'a'});
+  croak("Locus file ".$opts{'l'}." does not exist.") if(! -e $opts{'l'});
+  croak("Allele count output file ".$opts{'a'}." does not exist.") if(! -e $opts{'a'});
+  return \%opts;
+}
+
+__END__
+
+=head1 NAME
+
+alleleCounterToJson.pl - Generate JSON format file from the tab seperated format
+
+=head1 SYNOPSIS
+
+alleleCounterToJson.pl
+
+  Required:
+
+    -locus-file          -l     File containing SNP positions used for allelecounter
+    -allelecount-file    -a     Allelecounter output file
+
+  Optional:
+    -output-file         -o      Output file (default: stdout)
+    -help                -h      This message
+    -version             -v      Version number
+
+=cut


=====================================
perl/lib/Sanger/CGP/AlleleCount.pm
=====================================
@@ -25,7 +25,7 @@ package Sanger::CGP::AlleleCount;
 use strict;
 
 use base 'Exporter';
-our $VERSION = '4.2.1';
+our $VERSION = '4.3.0';
 our @EXPORT = qw($VERSION);
 
 1;


=====================================
perl/lib/Sanger/CGP/AlleleCount/ToJson.pm
=====================================
@@ -0,0 +1,130 @@
+package Sanger::CGP::AlleleCount::ToJson;
+
+##########LICENCE##########
+# Copyright (c) 2014-2021 Genome Research Ltd.
+#
+# Author: CASM/Cancer IT <cgphelp at sanger.ac.uk>
+#
+# This file is part of alleleCount.
+#
+# alleleCount is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+##########LICENCE##########
+
+
+use strict;
+
+use Carp;
+use English qw( -no_match_vars );
+use warnings FATAL => 'all';
+
+use JSON;
+use IO::Zlib;
+
+use Sanger::CGP::AlleleCount;
+
+use Const::Fast qw(const);
+
+const my %ALLELECOUNT_CONST => (
+  MIN_PROP => 0.21,
+  MIN_READS => 5,
+);
+
+sub allelecount_val {
+  my ($item) = @_;
+  return $ALLELECOUNT_CONST{$item};
+}
+
+=item new
+
+Null constructor
+
+=cut
+
+sub new {
+  my ($class) = @_;
+  my $self = { };
+  bless $self, $class;
+  return $self;
+}
+
+=item alleleCountToJson
+
+Convert allele count file result format to JSON
+
+=cut
+
+sub alleleCountToJson{
+  my ($countsfile, $snpsfile) = @_;
+  my $tmp;
+  my $SNPS;
+  my $snp_list;
+
+  #TODO open gzipped file....
+  open($SNPS, '<', $snpsfile) or croak("Error opening allele count locus file '$snpsfile' for JSON conversion: $!");
+    while(<$SNPS>){
+      my $line = $_;
+      next if($line =~ m/^\s*#/);
+      chomp($line);
+      my ($chr,$pos,$name,undef) = split(/\s+/,$line);
+      $snp_list->{$chr}->{$pos} = $name;
+    }
+  close($SNPS) or croak("Error closing allele count locus file '$snpsfile' for JSON conversion: $!");
+
+  my $fh = new IO::Zlib;
+  if($fh->open($countsfile, "rb")){
+    while(<$fh>){
+      my $line = $_;
+      next if($line =~ m/^\s*#/);
+      chomp($line);
+      my ($chr,$pos,$a,$c,$g,$t,$good) = split(/\s+/,$line);
+      my $nom = $snp_list->{$chr}->{$pos};
+      my $genotype = _calculate_genotype_from_allele_count($a,$c,$g,$t,$good);
+      $tmp->{$nom} = $genotype;
+    }
+    $fh->close;
+  }else{
+    croak("Error trying to open file for SNP locus loading '$countsfile': $!\n");
+  }
+  my $jsonstr = encode_json($tmp);
+  return $jsonstr;
+}
+
+sub _calculate_genotype_from_allele_count{
+  my ($a_a,$a_c,$a_g,$a_t,$good) = @_;
+  my $geno;
+  return q{.} if($good < allelecount_val('MIN_READS'));
+
+  my @counts;
+  push @counts, ['A', $a_a] if($a_a/$good >= allelecount_val('MIN_PROP'));
+  push @counts, ['C', $a_c] if($a_c/$good >= allelecount_val('MIN_PROP'));
+  push @counts, ['G', $a_g] if($a_g/$good >= allelecount_val('MIN_PROP'));
+  push @counts, ['T', $a_t] if($a_t/$good >= allelecount_val('MIN_PROP'));
+
+  my $entries = scalar @counts;
+  if($entries == 0) {
+    $geno = q{.};
+  }
+  elsif($entries == 1) {
+    $geno = $counts[0][0].$counts[0][0];
+  }
+  else {
+    @counts = sort {$b->[1]<=>$a->[1]}  @counts; # reverse sorts by the counts
+    $geno = join(q{}, sort {$a cmp $b} $counts[0][0], $counts[1][0]); # then sort the alleles into the string
+  }
+  croak("Error calculating genotype from allele counts $a_a,$a_c,$a_g,$a_t,$good.\n") if((length $geno)>2 || (length $geno) == 0);
+  return $geno;
+
+}
+
+1;


=====================================
perl/t/2_pl_compile.t
=====================================
@@ -49,16 +49,17 @@ for(@scripts) {
     next;
   }
   my $message = "Compilation check: $script";
+  my $output = "";
   my $command = "$perl -c $script";
   my ($pid, $process);
   try {
     $pid = open $process, $command.' 2>&1 |';
-    while(<$process>){};
+    while(<$process>){ $output.=$_;};
     close $process;
     pass($message);
   }
   catch {
-    fail($message);
+    fail($message."\n".$output);
   };
 }
 


=====================================
perl/t/tojson.t
=====================================
@@ -0,0 +1,67 @@
+##########LICENCE##########
+# Copyright (c) 2014-2020 Genome Research Ltd.
+#
+# Author: CASM/Cancer IT <cgphelp at sanger.ac.uk>
+#
+# This file is part of alleleCount.
+#
+# alleleCount is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+##########LICENCE##########
+
+use strict;
+use Test::More;
+use Test::Fatal;
+use Const::Fast qw(const);
+use File::Temp qw(tempdir);
+use File::Slurp;
+
+use Sanger::CGP::AlleleCount::ToJson;
+
+
+const my $MOD => 'Sanger::CGP::AlleleCount::ToJson';
+const my $EXP_JSON  => '{"rs2369898":"CT"}';
+
+const my @DEFAULT_RESULT => ( ['#CHR',qw(POS Count_A Count_C Count_G Count_T Good_depth)],
+                              [qw(22 16165776 1 12 0 0 13)]
+                              );
+const my @PBQ20_RESULT => ( ['#CHR',qw(POS Count_A Count_C Count_G Count_T Good_depth)],
+                            [qw(22 16165776 2 17 0 0 19)]
+                              );
+
+const my @MAPQ0_RESULT => ( ['#CHR',qw(POS Count_A Count_C Count_G Count_T Good_depth)],
+                            [qw(22 16165776 1 17 0 0 18)]
+                              );
+
+const my @DEFAULT_RESULT_SNP6 => (['#CHR',qw(POS Count_Allele_A Count_Allele_B Good_depth)],
+                                  [qw(22 16165776 12 1 13)]
+                                  );
+const my @PBQ20_RESULT_SNP6 => (['#CHR',qw(POS Count_Allele_A Count_Allele_B Good_depth)],
+                                  [qw(22 16165776 17 2 19)]
+                                  );
+const my @MAPQ0_RESULT_SNP6 => (['#CHR',qw(POS Count_Allele_A Count_Allele_B Good_depth)],
+                                  [qw(22 16165776 17 1 18)]
+                                  );
+
+use FindBin qw($Bin);
+my $data_root = "$Bin/../../testData";
+
+my $loci = "$data_root/test_loci.txt";
+my $ac_output = "$data_root/test_ac_out.txt";
+
+my $obj = new_ok($MOD); # no options
+
+is($EXP_JSON, Sanger::CGP::AlleleCount::ToJson::alleleCountToJson($ac_output, $loci), "Check conversion to JSON"); 
+
+done_testing();
+


=====================================
testData/test_ac_out.txt
=====================================
@@ -0,0 +1 @@
+chr14	96772879	0	31	0	31	62


=====================================
testData/test_loci.txt
=====================================
@@ -0,0 +1 @@
+chr14	96772879	rs2369898



View it on GitLab: https://salsa.debian.org/med-team/allelecount/-/commit/60b41071e5516e275a7adea934fd49d950b5761b

-- 
View it on GitLab: https://salsa.debian.org/med-team/allelecount/-/commit/60b41071e5516e275a7adea934fd49d950b5761b
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20211007/ef721b3e/attachment-0001.htm>


More information about the debian-med-commit mailing list