[med-svn] [Git][med-team/allelecount][upstream] New upstream version 4.3.0
Andreas Tille (@tille)
gitlab at salsa.debian.org
Thu Oct 7 21:06:34 BST 2021
Andreas Tille pushed to branch upstream at Debian Med / allelecount
Commits:
60b41071 by Andreas Tille at 2021-09-10T09:45:17+02:00
New upstream version 4.3.0
- - - - -
9 changed files:
- CHANGES.md
- perl/Makefile.PL
- + perl/bin/alleleCounterToJson.pl
- perl/lib/Sanger/CGP/AlleleCount.pm
- + perl/lib/Sanger/CGP/AlleleCount/ToJson.pm
- perl/t/2_pl_compile.t
- + perl/t/tojson.t
- + testData/test_ac_out.txt
- + testData/test_loci.txt
Changes:
=====================================
CHANGES.md
=====================================
@@ -1,5 +1,9 @@
# CHANGES
+## v4.3.0
+
+* Add script to convert allelecount output to JSON
+
## v4.2.1
* Update so docker and native install use same install scripts behind the scenes
=====================================
perl/Makefile.PL
=====================================
@@ -28,7 +28,7 @@ WriteMakefile(
NAME => 'alleleCount',
LICENSE => 'agpl_3', # http://search.cpan.org/~dagolden/CPAN-Meta-2.142690/lib/CPAN/Meta/Spec.pm#license
VERSION_FROM => 'lib/Sanger/CGP/AlleleCount.pm',
- EXE_FILES => [qw( bin/alleleCounter.pl )],
+ EXE_FILES => [qw( bin/alleleCounter.pl bin/alleleCounterToJson.pl )],
PREREQ_PM => {
'Const::Fast' => 0.014,
'Try::Tiny' => 0.19,
@@ -38,5 +38,6 @@ WriteMakefile(
'Devel::Cover' => 1.09,
'Pod::Coverage' => 0.23,
'IPC::System::Simple' => 1.25,
+ 'JSON' => 2.90,
}
);
=====================================
perl/bin/alleleCounterToJson.pl
=====================================
@@ -0,0 +1,99 @@
+#!/usr/bin/perl
+
+##########LICENCE##########
+# Copyright (c) 2014-2021 Genome Research Ltd.
+#
+# Author: CASM/Cancer IT <cgphelp at sanger.ac.uk>
+#
+# This file is part of alleleCount.
+#
+# alleleCount is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+##########LICENCE##########
+
+use strict;
+use Carp;
+use English qw( -no_match_vars );
+use warnings FATAL => 'all';
+
+use Getopt::Long 'GetOptions';
+use Pod::Usage;
+
+use Sanger::CGP::AlleleCount;
+use Sanger::CGP::AlleleCount::ToJson;
+
+{
+ my $options = option_builder();
+ $options->{'o'} = '/dev/stdout' unless(defined $options->{'o'});
+ run($options);
+}
+
+sub run {
+ my ($options) = @_;
+ my $json_string = Sanger::CGP::AlleleCount::ToJson::alleleCountToJson($options->{'a'}, $options->{'l'});
+ my $OUT;
+ if($options->{'o'}){
+ open($OUT, '>', $options->{'o'}) or croak("Error opening file for output: $!");
+ }
+ print $OUT "$json_string";
+ if($options->{'o'}){
+ close($OUT) or croak("Error closing output file for JSON conversion: $!");
+ }
+}
+
+
+sub option_builder {
+ my ($factory) = @_;
+
+ my %opts;
+
+ &GetOptions (
+ 'h|help' => \$opts{'h'},
+ 'l|locus-file=s' => \$opts{'l'},
+ 'a|allelecount-file=s' => \$opts{'a'},
+ 'o|output-file:s' => \$opts{'o'},
+ 'v|version' => \$opts{'v'},
+ );
+
+ pod2usage(0) if($opts{'h'});
+ if($opts{'v'}){
+ print Sanger::CGP::AlleleCount->VERSION."\n";
+ exit;
+ }
+ pod2usage(1) if(!$opts{'l'} || !$opts{'a'});
+ croak("Locus file ".$opts{'l'}." does not exist.") if(! -e $opts{'l'});
+ croak("Allele count output file ".$opts{'a'}." does not exist.") if(! -e $opts{'a'});
+ return \%opts;
+}
+
+__END__
+
+=head1 NAME
+
+alleleCounterToJson.pl - Generate JSON format file from the tab seperated format
+
+=head1 SYNOPSIS
+
+alleleCounterToJson.pl
+
+ Required:
+
+ -locus-file -l File containing SNP positions used for allelecounter
+ -allelecount-file -a Allelecounter output file
+
+ Optional:
+ -output-file -o Output file (default: stdout)
+ -help -h This message
+ -version -v Version number
+
+=cut
=====================================
perl/lib/Sanger/CGP/AlleleCount.pm
=====================================
@@ -25,7 +25,7 @@ package Sanger::CGP::AlleleCount;
use strict;
use base 'Exporter';
-our $VERSION = '4.2.1';
+our $VERSION = '4.3.0';
our @EXPORT = qw($VERSION);
1;
=====================================
perl/lib/Sanger/CGP/AlleleCount/ToJson.pm
=====================================
@@ -0,0 +1,130 @@
+package Sanger::CGP::AlleleCount::ToJson;
+
+##########LICENCE##########
+# Copyright (c) 2014-2021 Genome Research Ltd.
+#
+# Author: CASM/Cancer IT <cgphelp at sanger.ac.uk>
+#
+# This file is part of alleleCount.
+#
+# alleleCount is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+##########LICENCE##########
+
+
+use strict;
+
+use Carp;
+use English qw( -no_match_vars );
+use warnings FATAL => 'all';
+
+use JSON;
+use IO::Zlib;
+
+use Sanger::CGP::AlleleCount;
+
+use Const::Fast qw(const);
+
+const my %ALLELECOUNT_CONST => (
+ MIN_PROP => 0.21,
+ MIN_READS => 5,
+);
+
+sub allelecount_val {
+ my ($item) = @_;
+ return $ALLELECOUNT_CONST{$item};
+}
+
+=item new
+
+Null constructor
+
+=cut
+
+sub new {
+ my ($class) = @_;
+ my $self = { };
+ bless $self, $class;
+ return $self;
+}
+
+=item alleleCountToJson
+
+Convert allele count file result format to JSON
+
+=cut
+
+sub alleleCountToJson{
+ my ($countsfile, $snpsfile) = @_;
+ my $tmp;
+ my $SNPS;
+ my $snp_list;
+
+ #TODO open gzipped file....
+ open($SNPS, '<', $snpsfile) or croak("Error opening allele count locus file '$snpsfile' for JSON conversion: $!");
+ while(<$SNPS>){
+ my $line = $_;
+ next if($line =~ m/^\s*#/);
+ chomp($line);
+ my ($chr,$pos,$name,undef) = split(/\s+/,$line);
+ $snp_list->{$chr}->{$pos} = $name;
+ }
+ close($SNPS) or croak("Error closing allele count locus file '$snpsfile' for JSON conversion: $!");
+
+ my $fh = new IO::Zlib;
+ if($fh->open($countsfile, "rb")){
+ while(<$fh>){
+ my $line = $_;
+ next if($line =~ m/^\s*#/);
+ chomp($line);
+ my ($chr,$pos,$a,$c,$g,$t,$good) = split(/\s+/,$line);
+ my $nom = $snp_list->{$chr}->{$pos};
+ my $genotype = _calculate_genotype_from_allele_count($a,$c,$g,$t,$good);
+ $tmp->{$nom} = $genotype;
+ }
+ $fh->close;
+ }else{
+ croak("Error trying to open file for SNP locus loading '$countsfile': $!\n");
+ }
+ my $jsonstr = encode_json($tmp);
+ return $jsonstr;
+}
+
+sub _calculate_genotype_from_allele_count{
+ my ($a_a,$a_c,$a_g,$a_t,$good) = @_;
+ my $geno;
+ return q{.} if($good < allelecount_val('MIN_READS'));
+
+ my @counts;
+ push @counts, ['A', $a_a] if($a_a/$good >= allelecount_val('MIN_PROP'));
+ push @counts, ['C', $a_c] if($a_c/$good >= allelecount_val('MIN_PROP'));
+ push @counts, ['G', $a_g] if($a_g/$good >= allelecount_val('MIN_PROP'));
+ push @counts, ['T', $a_t] if($a_t/$good >= allelecount_val('MIN_PROP'));
+
+ my $entries = scalar @counts;
+ if($entries == 0) {
+ $geno = q{.};
+ }
+ elsif($entries == 1) {
+ $geno = $counts[0][0].$counts[0][0];
+ }
+ else {
+ @counts = sort {$b->[1]<=>$a->[1]} @counts; # reverse sorts by the counts
+ $geno = join(q{}, sort {$a cmp $b} $counts[0][0], $counts[1][0]); # then sort the alleles into the string
+ }
+ croak("Error calculating genotype from allele counts $a_a,$a_c,$a_g,$a_t,$good.\n") if((length $geno)>2 || (length $geno) == 0);
+ return $geno;
+
+}
+
+1;
=====================================
perl/t/2_pl_compile.t
=====================================
@@ -49,16 +49,17 @@ for(@scripts) {
next;
}
my $message = "Compilation check: $script";
+ my $output = "";
my $command = "$perl -c $script";
my ($pid, $process);
try {
$pid = open $process, $command.' 2>&1 |';
- while(<$process>){};
+ while(<$process>){ $output.=$_;};
close $process;
pass($message);
}
catch {
- fail($message);
+ fail($message."\n".$output);
};
}
=====================================
perl/t/tojson.t
=====================================
@@ -0,0 +1,67 @@
+##########LICENCE##########
+# Copyright (c) 2014-2020 Genome Research Ltd.
+#
+# Author: CASM/Cancer IT <cgphelp at sanger.ac.uk>
+#
+# This file is part of alleleCount.
+#
+# alleleCount is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+##########LICENCE##########
+
+use strict;
+use Test::More;
+use Test::Fatal;
+use Const::Fast qw(const);
+use File::Temp qw(tempdir);
+use File::Slurp;
+
+use Sanger::CGP::AlleleCount::ToJson;
+
+
+const my $MOD => 'Sanger::CGP::AlleleCount::ToJson';
+const my $EXP_JSON => '{"rs2369898":"CT"}';
+
+const my @DEFAULT_RESULT => ( ['#CHR',qw(POS Count_A Count_C Count_G Count_T Good_depth)],
+ [qw(22 16165776 1 12 0 0 13)]
+ );
+const my @PBQ20_RESULT => ( ['#CHR',qw(POS Count_A Count_C Count_G Count_T Good_depth)],
+ [qw(22 16165776 2 17 0 0 19)]
+ );
+
+const my @MAPQ0_RESULT => ( ['#CHR',qw(POS Count_A Count_C Count_G Count_T Good_depth)],
+ [qw(22 16165776 1 17 0 0 18)]
+ );
+
+const my @DEFAULT_RESULT_SNP6 => (['#CHR',qw(POS Count_Allele_A Count_Allele_B Good_depth)],
+ [qw(22 16165776 12 1 13)]
+ );
+const my @PBQ20_RESULT_SNP6 => (['#CHR',qw(POS Count_Allele_A Count_Allele_B Good_depth)],
+ [qw(22 16165776 17 2 19)]
+ );
+const my @MAPQ0_RESULT_SNP6 => (['#CHR',qw(POS Count_Allele_A Count_Allele_B Good_depth)],
+ [qw(22 16165776 17 1 18)]
+ );
+
+use FindBin qw($Bin);
+my $data_root = "$Bin/../../testData";
+
+my $loci = "$data_root/test_loci.txt";
+my $ac_output = "$data_root/test_ac_out.txt";
+
+my $obj = new_ok($MOD); # no options
+
+is($EXP_JSON, Sanger::CGP::AlleleCount::ToJson::alleleCountToJson($ac_output, $loci), "Check conversion to JSON");
+
+done_testing();
+
=====================================
testData/test_ac_out.txt
=====================================
@@ -0,0 +1 @@
+chr14 96772879 0 31 0 31 62
=====================================
testData/test_loci.txt
=====================================
@@ -0,0 +1 @@
+chr14 96772879 rs2369898
View it on GitLab: https://salsa.debian.org/med-team/allelecount/-/commit/60b41071e5516e275a7adea934fd49d950b5761b
--
View it on GitLab: https://salsa.debian.org/med-team/allelecount/-/commit/60b41071e5516e275a7adea934fd49d950b5761b
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20211007/ef721b3e/attachment-0001.htm>
More information about the debian-med-commit
mailing list