[med-svn] [libbio-graphics-perl] 01/09: New upstream version 2.40
Andreas Tille
tille at debian.org
Tue Dec 20 12:49:43 UTC 2016
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository libbio-graphics-perl.
commit 9eaceee862978f3108b0af30bfb5087724463163
Author: Andreas Tille <tille at debian.org>
Date: Sat Dec 17 19:37:04 2016 +0100
New upstream version 2.40
---
Build.PL | 10 +-
Changes | 4 +
MANIFEST | 9 +-
META.json | 11 +-
META.yml | 137 ++-------
Makefile.PL | 2 +-
README | 1 +
lib/Bio/Graphics.pm | 3 +-
lib/Bio/Graphics/Glyph/gene.pm | 20 +-
lib/Bio/Graphics/Glyph/rainbow_gene.pm | 4 +-
lib/Bio/Graphics/Glyph/segments.pm | 33 +--
lib/Bio/Graphics/Glyph/topoview.pm | 496 +++++++++++++++++++++++++++++++++
scripts/bam_coverage_windows.pl | 131 +++++++++
scripts/coverage_to_topoview.pl | 247 ++++++++++++++++
14 files changed, 940 insertions(+), 168 deletions(-)
diff --git a/Build.PL b/Build.PL
index 9e6c9cf..e89c025 100755
--- a/Build.PL
+++ b/Build.PL
@@ -12,11 +12,13 @@ my $build = Module::Build->new(
requires => {
'Bio::Root::Version' => '1.005009001',
'GD' => 2.30,
- 'Statistics::Descriptive' => 2.6, # required for Bio::Graphics::Wiggle::Loader
- #and tests fail without it
+ 'CGI' => 0,
+ 'Bio::Coordinate::Pair' => '1.005009001',
+ 'Statistics::Descriptive' => 2.6, # required for Bio::Graphics::Wiggle::Loader
+ #and tests fail without it
},
recommends => {
- 'GD::SVG' => 0.32,
+ 'GD::SVG' => 0.32,
'Text::ParseWords' => 3.26, # required for Bio::Graphics::Wiggle::Loader
# 'Bio::SCF' => 1.01, # required for Bio::Graphics::Glyph::trace
},
@@ -26,6 +28,8 @@ my $build = Module::Build->new(
'scripts/glyph_help.pl',
'scripts/search_overview.pl',
'scripts/render_msa.pl',
+ 'scripts/bam_coverage_windows.pl',
+ 'scripts/coverage_to_topoview.pl'
],
create_makefile_pl => 'passthrough',
build_class => 'Module::Build',
diff --git a/Changes b/Changes
index 50ed607..dd66663 100755
--- a/Changes
+++ b/Changes
@@ -1,4 +1,8 @@
Revision history for Perl extension Bio::Graphics.
+2.40
+ - Add CGI as a requirement as it is not part of core perl distributions, post-5.21
+ - Add Bio::Coordinate::Pair as a requirement (now released separately from BioPerl core)
+
2.39
- Turn off image-based regression tests becuase libgd has started to produce visually-identical
images that are different at the byte level.
diff --git a/MANIFEST b/MANIFEST
index afbf76f..25f62a5 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -90,6 +90,7 @@ lib/Bio/Graphics/Glyph/text_in_box.pm
lib/Bio/Graphics/Glyph/three_letters.pm
lib/Bio/Graphics/Glyph/tic_tac_toe.pm
lib/Bio/Graphics/Glyph/toomany.pm
+lib/Bio/Graphics/Glyph/topoview.pm
lib/Bio/Graphics/Glyph/trace.pm
lib/Bio/Graphics/Glyph/track.pm
lib/Bio/Graphics/Glyph/transcript.pm
@@ -114,14 +115,13 @@ lib/Bio/Graphics/RendererI.pm
lib/Bio/Graphics/Util.pm
lib/Bio/Graphics/Wiggle.pm
lib/Bio/Graphics/Wiggle/Loader.pm
-Makefile.PL
MANIFEST
MANIFEST.SKIP
-META.json
-META.yml
README
README.bioperl
+scripts/bam_coverage_windows.pl
scripts/contig_draw.pl
+scripts/coverage_to_topoview.pl
scripts/feature_draw.pl
scripts/frend.pl
scripts/glyph_help.pl
@@ -223,3 +223,6 @@ t/data/t3/version9.png
t/data/wig_data.wig
t/decorated_transcript_t1.pl
t/Wiggle.t
+Makefile.PL
+META.yml
+META.json
diff --git a/META.json b/META.json
index 4b679b6..a066e90 100644
--- a/META.json
+++ b/META.json
@@ -4,7 +4,7 @@
"Lincoln Stein <lincoln.stein at oicr.on.ca>"
],
"dynamic_config" : 1,
- "generated_by" : "Module::Build version 0.4205",
+ "generated_by" : "Module::Build version 0.4218",
"license" : [
"perl_5"
],
@@ -25,7 +25,9 @@
"Text::ParseWords" : "3.26"
},
"requires" : {
+ "Bio::Coordinate::Pair" : "1.005009001",
"Bio::Root::Version" : "1.005009001",
+ "CGI" : "0",
"GD" : "2.3",
"Statistics::Descriptive" : "2.6"
}
@@ -34,7 +36,7 @@
"provides" : {
"Bio::Graphics" : {
"file" : "lib/Bio/Graphics.pm",
- "version" : "2.39"
+ "version" : "2.40"
},
"Bio::Graphics::ConfiguratorI" : {
"file" : "lib/Bio/Graphics/ConfiguratorI.pm"
@@ -289,6 +291,9 @@
"Bio::Graphics::Glyph::toomany" : {
"file" : "lib/Bio/Graphics/Glyph/toomany.pm"
},
+ "Bio::Graphics::Glyph::topoview" : {
+ "file" : "lib/Bio/Graphics/Glyph/topoview.pm"
+ },
"Bio::Graphics::Glyph::trace" : {
"file" : "lib/Bio/Graphics/Glyph/trace.pm"
},
@@ -376,5 +381,5 @@
"http://dev.perl.org/licenses/"
]
},
- "version" : "2.39"
+ "version" : "2.40"
}
diff --git a/META.yml b/META.yml
index 7f229d0..f69ec9e 100644
--- a/META.yml
+++ b/META.yml
@@ -4,355 +4,252 @@ author:
- 'Lincoln Stein <lincoln.stein at oicr.on.ca>'
build_requires: {}
configure_requires:
- Module::Build: 0.42
+ Module::Build: '0.42'
dynamic_config: 1
-generated_by: 'Module::Build version 0.4205, CPAN::Meta::Converter version 2.120351'
+generated_by: 'Module::Build version 0.4218, CPAN::Meta::Converter version 2.150001'
license: perl
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
- version: 1.4
+ version: '1.4'
name: Bio-Graphics
provides:
Bio::Graphics:
file: lib/Bio/Graphics.pm
- version: 2.39
+ version: '2.40'
Bio::Graphics::ConfiguratorI:
file: lib/Bio/Graphics/ConfiguratorI.pm
- version: 0
Bio::Graphics::DrawTransmembrane:
file: lib/Bio/Graphics/DrawTransmembrane.pm
- version: 0
Bio::Graphics::Feature:
file: lib/Bio/Graphics/Feature.pm
- version: 0
Bio::Graphics::FeatureBase:
file: lib/Bio/Graphics/FeatureBase.pm
- version: 0
Bio::Graphics::FeatureDir:
file: lib/Bio/Graphics/FeatureDir.pm
- version: 0
Bio::Graphics::FeatureFile:
file: lib/Bio/Graphics/FeatureFile.pm
- version: 0
Bio::Graphics::FeatureFile::Iterator:
file: lib/Bio/Graphics/FeatureFile/Iterator.pm
- version: 0
Bio::Graphics::FileSplitter:
file: lib/Bio/Graphics/FeatureDir.pm
- version: 0
Bio::Graphics::GDWrapper:
file: lib/Bio/Graphics/GDWrapper.pm
- version: 0
Bio::Graphics::Glyph:
file: lib/Bio/Graphics/Glyph.pm
- version: 0
Bio::Graphics::Glyph::Factory:
file: lib/Bio/Graphics/Glyph/Factory.pm
- version: 0
Bio::Graphics::Glyph::alignment:
file: lib/Bio/Graphics/Glyph/alignment.pm
- version: 0
Bio::Graphics::Glyph::allele_tower:
file: lib/Bio/Graphics/Glyph/allele_tower.pm
- version: 0
Bio::Graphics::Glyph::anchored_arrow:
file: lib/Bio/Graphics/Glyph/anchored_arrow.pm
- version: 0
Bio::Graphics::Glyph::arrow:
file: lib/Bio/Graphics/Glyph/arrow.pm
- version: 0
Bio::Graphics::Glyph::box:
file: lib/Bio/Graphics/Glyph/box.pm
- version: 0
Bio::Graphics::Glyph::broken_line:
file: lib/Bio/Graphics/Glyph/broken_line.pm
- version: 0
Bio::Graphics::Glyph::cds:
file: lib/Bio/Graphics/Glyph/cds.pm
- version: 0
Bio::Graphics::Glyph::christmas_arrow:
file: lib/Bio/Graphics/Glyph/christmas_arrow.pm
- version: 0
Bio::Graphics::Glyph::cross:
file: lib/Bio/Graphics/Glyph/cross.pm
- version: 0
Bio::Graphics::Glyph::crossbox:
file: lib/Bio/Graphics/Glyph/crossbox.pm
- version: 0
Bio::Graphics::Glyph::dashed_line:
file: lib/Bio/Graphics/Glyph/dashed_line.pm
- version: 0
Bio::Graphics::Glyph::decorated_gene:
file: lib/Bio/Graphics/Glyph/decorated_gene.pm
- version: 0
Bio::Graphics::Glyph::decorated_transcript:
file: lib/Bio/Graphics/Glyph/decorated_transcript.pm
- version: 0
Bio::Graphics::Glyph::diamond:
file: lib/Bio/Graphics/Glyph/diamond.pm
- version: 0
Bio::Graphics::Glyph::dna:
file: lib/Bio/Graphics/Glyph/dna.pm
- version: 0
Bio::Graphics::Glyph::dot:
file: lib/Bio/Graphics/Glyph/dot.pm
- version: 0
Bio::Graphics::Glyph::dumbbell:
file: lib/Bio/Graphics/Glyph/dumbbell.pm
- version: 0
Bio::Graphics::Glyph::ellipse:
file: lib/Bio/Graphics/Glyph/ellipse.pm
- version: 0
Bio::Graphics::Glyph::ex:
file: lib/Bio/Graphics/Glyph/ex.pm
- version: 0
Bio::Graphics::Glyph::extending_arrow:
file: lib/Bio/Graphics/Glyph/extending_arrow.pm
- version: 0
Bio::Graphics::Glyph::fb_shmiggle:
file: lib/Bio/Graphics/Glyph/fb_shmiggle.pm
- version: 0
Bio::Graphics::Glyph::fixedwidth:
file: lib/Bio/Graphics/Glyph/fixedwidth.pm
- version: 0
Bio::Graphics::Glyph::flag:
file: lib/Bio/Graphics/Glyph/flag.pm
- version: 0
Bio::Graphics::Glyph::gene:
file: lib/Bio/Graphics/Glyph/gene.pm
- version: 0
Bio::Graphics::Glyph::generic:
file: lib/Bio/Graphics/Glyph/generic.pm
- version: 0
Bio::Graphics::Glyph::graded_segments:
file: lib/Bio/Graphics/Glyph/graded_segments.pm
- version: 0
Bio::Graphics::Glyph::group:
file: lib/Bio/Graphics/Glyph/group.pm
- version: 0
Bio::Graphics::Glyph::hat:
file: lib/Bio/Graphics/Glyph/hat.pm
- version: 0
Bio::Graphics::Glyph::heat_map:
file: lib/Bio/Graphics/Glyph/heat_map.pm
- version: 0
Bio::Graphics::Glyph::heat_map_ideogram:
file: lib/Bio/Graphics/Glyph/heat_map_ideogram.pm
- version: 0
Bio::Graphics::Glyph::heterogeneous_segments:
file: lib/Bio/Graphics/Glyph/heterogeneous_segments.pm
- version: 0
Bio::Graphics::Glyph::hidden:
file: lib/Bio/Graphics/Glyph/hidden.pm
- version: 0
Bio::Graphics::Glyph::hybrid_plot:
file: lib/Bio/Graphics/Glyph/hybrid_plot.pm
- version: 1.0
+ version: '1.0'
Bio::Graphics::Glyph::ideogram:
file: lib/Bio/Graphics/Glyph/ideogram.pm
- version: 0
Bio::Graphics::Glyph::image:
file: lib/Bio/Graphics/Glyph/image.pm
- version: 0
Bio::Graphics::Glyph::lightning:
file: lib/Bio/Graphics/Glyph/lightning.pm
- version: 0
Bio::Graphics::Glyph::line:
file: lib/Bio/Graphics/Glyph/line.pm
- version: 0
Bio::Graphics::Glyph::merge_parts:
file: lib/Bio/Graphics/Glyph/merge_parts.pm
- version: 0
Bio::Graphics::Glyph::merged_alignment:
file: lib/Bio/Graphics/Glyph/merged_alignment.pm
- version: 0
Bio::Graphics::Glyph::minmax:
file: lib/Bio/Graphics/Glyph/minmax.pm
- version: 0
Bio::Graphics::Glyph::operon:
file: lib/Bio/Graphics/Glyph/operon.pm
- version: 0
Bio::Graphics::Glyph::oval:
file: lib/Bio/Graphics/Glyph/oval.pm
- version: 0
Bio::Graphics::Glyph::pairplot:
file: lib/Bio/Graphics/Glyph/pairplot.pm
- version: 0
Bio::Graphics::Glyph::pentagram:
file: lib/Bio/Graphics/Glyph/pentagram.pm
- version: 0
Bio::Graphics::Glyph::phylo_align:
file: lib/Bio/Graphics/Glyph/phylo_align.pm
- version: 0
Bio::Graphics::Glyph::pinsertion:
file: lib/Bio/Graphics/Glyph/pinsertion.pm
- version: 0
Bio::Graphics::Glyph::point_glyph:
file: lib/Bio/Graphics/Glyph/point_glyph.pm
- version: 0
Bio::Graphics::Glyph::primers:
file: lib/Bio/Graphics/Glyph/primers.pm
- version: 0
Bio::Graphics::Glyph::processed_transcript:
file: lib/Bio/Graphics/Glyph/processed_transcript.pm
- version: 0
Bio::Graphics::Glyph::protein:
file: lib/Bio/Graphics/Glyph/protein.pm
- version: 0
Bio::Graphics::Glyph::ragged_ends:
file: lib/Bio/Graphics/Glyph/ragged_ends.pm
- version: 0
Bio::Graphics::Glyph::rainbow_gene:
file: lib/Bio/Graphics/Glyph/rainbow_gene.pm
- version: 0
Bio::Graphics::Glyph::read_pair:
file: lib/Bio/Graphics/Glyph/read_pair.pm
- version: 0
Bio::Graphics::Glyph::redgreen_box:
file: lib/Bio/Graphics/Glyph/redgreen_box.pm
- version: 0
Bio::Graphics::Glyph::redgreen_segment:
file: lib/Bio/Graphics/Glyph/redgreen_segment.pm
- version: 0
Bio::Graphics::Glyph::repeating_shape:
file: lib/Bio/Graphics/Glyph/repeating_shape.pm
- version: 0
Bio::Graphics::Glyph::rndrect:
file: lib/Bio/Graphics/Glyph/rndrect.pm
- version: 0
Bio::Graphics::Glyph::ruler_arrow:
file: lib/Bio/Graphics/Glyph/ruler_arrow.pm
- version: 0
Bio::Graphics::Glyph::saw_teeth:
file: lib/Bio/Graphics/Glyph/saw_teeth.pm
- version: 0
Bio::Graphics::Glyph::scale:
file: lib/Bio/Graphics/Glyph/scale.pm
- version: 0
Bio::Graphics::Glyph::segmented_keyglyph:
file: lib/Bio/Graphics/Glyph/segmented_keyglyph.pm
- version: 0
Bio::Graphics::Glyph::segments:
file: lib/Bio/Graphics/Glyph/segments.pm
- version: 0
Bio::Graphics::Glyph::smoothing:
file: lib/Bio/Graphics/Glyph/smoothing.pm
- version: 0
Bio::Graphics::Glyph::so_transcript:
file: lib/Bio/Graphics/Glyph/so_transcript.pm
- version: 0
Bio::Graphics::Glyph::span:
file: lib/Bio/Graphics/Glyph/span.pm
- version: 0
Bio::Graphics::Glyph::spectrogram:
file: lib/Bio/Graphics/Glyph/spectrogram.pm
- version: 0
Bio::Graphics::Glyph::splice_site:
file: lib/Bio/Graphics/Glyph/splice_site.pm
- version: 0
Bio::Graphics::Glyph::stackedplot:
file: lib/Bio/Graphics/Glyph/stackedplot.pm
- version: 0
Bio::Graphics::Glyph::ternary_plot:
file: lib/Bio/Graphics/Glyph/ternary_plot.pm
- version: 0
Bio::Graphics::Glyph::text_in_box:
file: lib/Bio/Graphics/Glyph/text_in_box.pm
- version: 0
Bio::Graphics::Glyph::three_letters:
file: lib/Bio/Graphics/Glyph/three_letters.pm
- version: 0
Bio::Graphics::Glyph::tic_tac_toe:
file: lib/Bio/Graphics/Glyph/tic_tac_toe.pm
- version: 0
Bio::Graphics::Glyph::toomany:
file: lib/Bio/Graphics/Glyph/toomany.pm
- version: 0
+ Bio::Graphics::Glyph::topoview:
+ file: lib/Bio/Graphics/Glyph/topoview.pm
Bio::Graphics::Glyph::trace:
file: lib/Bio/Graphics/Glyph/trace.pm
- version: 0
Bio::Graphics::Glyph::track:
file: lib/Bio/Graphics/Glyph/track.pm
- version: 0
Bio::Graphics::Glyph::transcript:
file: lib/Bio/Graphics/Glyph/transcript.pm
- version: 0
Bio::Graphics::Glyph::transcript2:
file: lib/Bio/Graphics/Glyph/transcript2.pm
- version: 0
Bio::Graphics::Glyph::translation:
file: lib/Bio/Graphics/Glyph/translation.pm
- version: 0
Bio::Graphics::Glyph::triangle:
file: lib/Bio/Graphics/Glyph/triangle.pm
- version: 0
Bio::Graphics::Glyph::two_bolts:
file: lib/Bio/Graphics/Glyph/two_bolts.pm
- version: 0
Bio::Graphics::Glyph::vista_plot:
file: lib/Bio/Graphics/Glyph/vista_plot.pm
- version: 1.0
+ version: '1.0'
Bio::Graphics::Glyph::wave:
file: lib/Bio/Graphics/Glyph/wave.pm
- version: 0
Bio::Graphics::Glyph::weighted_arrow:
file: lib/Bio/Graphics/Glyph/weighted_arrow.pm
- version: 0
Bio::Graphics::Glyph::whiskerplot:
file: lib/Bio/Graphics/Glyph/whiskerplot.pm
- version: 0
Bio::Graphics::Glyph::wiggle_box:
file: lib/Bio/Graphics/Glyph/wiggle_box.pm
- version: 0
Bio::Graphics::Glyph::wiggle_data:
file: lib/Bio/Graphics/Glyph/wiggle_data.pm
- version: 0
Bio::Graphics::Glyph::wiggle_density:
file: lib/Bio/Graphics/Glyph/wiggle_density.pm
- version: 0
Bio::Graphics::Glyph::wiggle_whiskers:
file: lib/Bio/Graphics/Glyph/wiggle_whiskers.pm
- version: 0
Bio::Graphics::Glyph::wiggle_xyplot:
file: lib/Bio/Graphics/Glyph/wiggle_xyplot.pm
- version: 0
Bio::Graphics::Glyph::xyplot:
file: lib/Bio/Graphics/Glyph/xyplot.pm
- version: 0
Bio::Graphics::Layout:
file: lib/Bio/Graphics/Layout.pm
- version: 0
Bio::Graphics::Layout::Contour:
file: lib/Bio/Graphics/Layout.pm
- version: 0
Bio::Graphics::Math:
file: lib/Bio/Graphics/Layout.pm
- version: 0
Bio::Graphics::Panel:
file: lib/Bio/Graphics/Panel.pm
- version: 0
Bio::Graphics::Pictogram:
file: lib/Bio/Graphics/Pictogram.pm
- version: 0
Bio::Graphics::RendererI:
file: lib/Bio/Graphics/RendererI.pm
- version: 0
Bio::Graphics::Util:
file: lib/Bio/Graphics/Util.pm
- version: 0
Bio::Graphics::Wiggle:
file: lib/Bio/Graphics/Wiggle.pm
- version: 1.0
+ version: '1.0'
Bio::Graphics::Wiggle::Loader:
file: lib/Bio/Graphics/Wiggle/Loader.pm
- version: 0
recommends:
- GD::SVG: 0.32
- Text::ParseWords: 3.26
+ GD::SVG: '0.32'
+ Text::ParseWords: '3.26'
requires:
- Bio::Root::Version: 1.005009001
- GD: 2.3
- Statistics::Descriptive: 2.6
+ Bio::Coordinate::Pair: '1.005009001'
+ Bio::Root::Version: '1.005009001'
+ CGI: '0'
+ GD: '2.3'
+ Statistics::Descriptive: '2.6'
resources:
license: http://dev.perl.org/licenses/
-version: 2.39
+version: '2.40'
diff --git a/Makefile.PL b/Makefile.PL
index 34def82..5e79e67 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -1,4 +1,4 @@
-# Note: this file was auto-generated by Module::Build::Compat version 0.4205
+# Note: this file was auto-generated by Module::Build::Compat version 0.4218
unless (eval "use Module::Build::Compat 0.02; 1" ) {
print "This module requires Module::Build to install itself.\n";
diff --git a/README b/README
index 4cf6084..f0a99a2 100755
--- a/README
+++ b/README
@@ -1,5 +1,6 @@
Bio::Graphics - Generate GD images of Bio::Seq objects
+
SYNOPSIS
This is a simple GD-based renderer (diagram drawer) for DNA and
diff --git a/lib/Bio/Graphics.pm b/lib/Bio/Graphics.pm
index a02c97a..3fb5a68 100755
--- a/lib/Bio/Graphics.pm
+++ b/lib/Bio/Graphics.pm
@@ -2,7 +2,7 @@ package Bio::Graphics;
use strict;
use Bio::Graphics::Panel;
-our $VERSION = '2.39';
+our $VERSION = '2.40';
1;
@@ -129,4 +129,3 @@ it under the same terms as Perl itself. See DISCLAIMER.txt for
disclaimers of warranty.
=cut
-
diff --git a/lib/Bio/Graphics/Glyph/gene.pm b/lib/Bio/Graphics/Glyph/gene.pm
index e9ef7ce..7e21623 100755
--- a/lib/Bio/Graphics/Glyph/gene.pm
+++ b/lib/Bio/Graphics/Glyph/gene.pm
@@ -70,7 +70,7 @@ sub extra_arrow_length {
sub pad_left {
my $self = shift;
my $type = $self->feature->primary_tag;
- return 0 unless $type =~ /gene|mRNA|transcript/;
+ return 0 unless $type =~ /gene|rna|transcript/i;
$self->SUPER::pad_left;
}
@@ -148,6 +148,12 @@ sub maxdepth {
return 2;
}
+sub fixup_glyph {
+ my $self = shift;
+ return unless $self->level == 1;
+ $self->create_implied_utrs if $self->option('implied_utrs');
+ $self->adjust_exons if $self->option('implied_utrs') || $self->option('adjust_exons');
+}
sub _subfeat {
my $class = shift;
@@ -155,7 +161,7 @@ sub _subfeat {
if ($feature->primary_tag =~ /^gene/i) {
my @transcripts;
- for my $t (qw/mRNA tRNA snRNA snoRNA miRNA ncRNA pseudogene/) {
+ for my $t (qw/mRNA tRNA snRNA snoRNA miRNA ncRNA pseudogene transcript/) {
push @transcripts, $feature->get_SeqFeatures($t);
}
return @transcripts if @transcripts;
@@ -176,7 +182,11 @@ sub _subfeat {
@subparts = $feature->get_SeqFeatures($class->option('sub_part'));
}
elsif ($feature->primary_tag =~ /^mRNA/i) {
- @subparts = $feature->get_SeqFeatures(qw(CDS five_prime_UTR three_prime_UTR UTR));
+ if ($class->option('implied_utrs') || $class->option('adjust_exons')) {
+ @subparts = $feature->get_SeqFeatures(qw(CDS exon five_prime_UTR three_prime_UTR UTR));
+ } else {
+ @subparts = $feature->get_SeqFeatures(qw(CDS five_prime_UTR three_prime_UTR UTR));
+ }
}
else {
@subparts = $feature->get_SeqFeatures('exon');
@@ -299,10 +309,6 @@ options:
Draw strand with little arrows undef (false)
on the intron.
-The B<-adjust_exons> and B<-implied_utrs> options are inherited from
-processed_transcript, but are quietly ignored. Please use the
-processed_transcript glyph for this type of processing.
-
=head1 BUGS
The SO terms are hard-coded. They should be more flexible and should
diff --git a/lib/Bio/Graphics/Glyph/rainbow_gene.pm b/lib/Bio/Graphics/Glyph/rainbow_gene.pm
index a7df05c..62eb929 100755
--- a/lib/Bio/Graphics/Glyph/rainbow_gene.pm
+++ b/lib/Bio/Graphics/Glyph/rainbow_gene.pm
@@ -82,7 +82,7 @@ sub extra_arrow_length {
sub pad_left {
my $self = shift;
my $type = $self->feature->primary_tag;
- return 0 unless $type =~ /gene|mRNA/;
+ return 0 unless $type =~ /gene|rna|transcript/i;
$self->SUPER::pad_left;
}
@@ -167,7 +167,7 @@ sub _subfeat {
if ($feature->primary_tag =~ /^gene/i) {
my @transcripts;
- for my $t (qw/mRNA tRNA snRNA snoRNA miRNA ncRNA pseudogene/) {
+ for my $t (qw/mRNA tRNA snRNA snoRNA miRNA ncRNA pseudogene transcript/) {
push @transcripts, $feature->get_SeqFeatures($t);
}
return @transcripts if @transcripts;
diff --git a/lib/Bio/Graphics/Glyph/segments.pm b/lib/Bio/Graphics/Glyph/segments.pm
index 7626fbb..49df27e 100755
--- a/lib/Bio/Graphics/Glyph/segments.pm
+++ b/lib/Bio/Graphics/Glyph/segments.pm
@@ -268,15 +268,16 @@ sub _split_on_cigar {
my $source_end = $feature->end;
my $ss = $feature->strand;
my $ts = $feature->hit->strand;
+
+ # Render -/- alignments on the + strand
+ if ($ss == -1 && $ts == -1) {
+ $ss = 1;
+ }
+
my $target_start = eval {$feature->hit->start} || return $feature;
my (@parts);
- # BUG: we handle +/+ and -/+ alignments, but not +/- or -/-
- # (i.e. the target has got to have forward strand coordinates)
-
- # forward strand
- if ($ss >= 0) {
for my $event (@$cigar) {
my ($op,$count) = @$event;
if ($op eq 'I' || $op eq 'S' || $op eq 'H') {
@@ -297,28 +298,6 @@ sub _split_on_cigar {
}
}
- # minus strand
- } else {
- for my $event (@$cigar) {
- my ($op,$count) = @$event;
- if ($op eq 'I' || $op eq 'S' || $op eq 'H') {
- $target_start += $count;
- }
- elsif ($op eq 'D' || $op eq 'N') {
- $source_end -= $count;
- }
- elsif ($op eq 'P') {
- # do nothing for pads
- } else { # everything else is assumed to be a match -- revisit
- push @parts,[$source_end-$count+1,$source_end,
- $target_start,$target_start+$count-1];
- $source_end -= $count;
- $target_start += $count;
- }
- }
-
- }
-
my $id = $feature->seq_id;
my $tid = $feature->hit->seq_id;
my @result = map {
diff --git a/lib/Bio/Graphics/Glyph/topoview.pm b/lib/Bio/Graphics/Glyph/topoview.pm
new file mode 100755
index 0000000..607cb2b
--- /dev/null
+++ b/lib/Bio/Graphics/Glyph/topoview.pm
@@ -0,0 +1,496 @@
+package Bio::Graphics::Glyph::topoview;
+
+# Based on the fb_shmiggle glyph by Victor Strelets, FlyBase.org
+# 2009-2010 Victor Strelets, FlyBase.org
+# Sheldon McKay <sheldon.mckay at gmail.com> 2015
+
+
+# "topoview.pm" the TopoView glyph was developed for fast
+# 3D-like demonstration of RNA-seq data consisting of multiple
+# individual subsets. The main purposes were to compact presentation
+# as much as possible (in one reasonably sized track) and
+# to allow easy visual detection of coordinated behavior
+# of the expression profiles of different subsets.
+
+# See http://gmod.org/wiki/Using_the_topoview_Glyph for complete documentation
+
+use strict;
+use GD;
+use base 'Bio::Graphics::Glyph::generic';
+use Text::ParseWords 'shellwords';
+use Data::Dumper;
+use List::Util qw/min max shuffle/;
+use constant DEBUG => 0;
+
+use vars qw/$colors_selected $black $red $white $grey @colors %Indices
+ $black $darkgrey $lightgrey $charcoal/;
+
+sub draw {
+ my $self = shift;
+ my $gd = shift;
+ my ($left,$top,$partno,$total_parts) = @_;
+ my $ft = $self->feature;
+
+ $black = $gd->colorClosest(0,0,0);
+ $lightgrey = $gd->colorClosest(225,225,225);
+ $grey = $gd->colorClosest(200,200,200);
+ $darkgrey = $gd->colorClosest(125,125,125);
+ $charcoal = $gd->colorClosest(75,75,75);
+
+ # User specified edge color or else charcoal gray (black is very harsh)
+ if (my $edge_color = $self->option('edge color')) {
+ my $col = $self->factory->translate_color($edge_color);
+ $self->{fgcolor} = $col;
+ }
+ else {
+ $self->{fgcolor} = $charcoal;
+ }
+
+ my($pnstart,$pnstop) = ($self->panel->start,$self->panel->end); # in seq coordinates
+ my($xf1,$yf1,$xf2,$yf2) = $self->calculate_boundaries($left,$top);
+ my $nseqpoints = $pnstop - $pnstart + 1;
+ my $leftpad = $self->panel->pad_left;
+ $ft->{datadir} ||= $self->option('datadir');
+ my $datadir = $ENV{SERVER_PATH} . $ft->{datadir};
+
+ my($start,$stop) = $self->panel->location2pixel(($ft->{start},$ft->{end}));
+ my $ftscrstop = $stop + $leftpad;
+ my $ftscrstart = $start + $leftpad;
+
+ my $chromosome = $ft->{ref};
+ my $flipped = $self->{flip} ? 1 : 0;
+ my($subsets,$subsetsnames,$signals) = $self->getData($ft,$datadir,$chromosome,$pnstart,$pnstop,$xf1,$xf2,$flipped,$gd);
+
+ my $poly_pkg = $self->polygon_package;
+
+ my @orderedsubsets = @{$subsets};
+ my $nsets = $#orderedsubsets+1;
+
+ # x and y steps
+ my $xstep = $self->option('x_step');
+ my $ystep = $self->option('y_step');
+ unless ($ystep) {
+ $ystep = int(100/$nsets);
+ $ystep = 7 if $ystep >= 7; # empiricaly found - to read lines of tiny fonts
+ $ystep = 12 if $ystep > 12; # empirically found - to preserve topo feel when number of subsets is small
+ }
+ unless ($xstep) {
+ $xstep = 4;
+ }
+ my($xw,$yw) = ( $nsets*$xstep, ($nsets-1)*$ystep );
+
+ my $polybg = $poly_pkg->new();
+ $polybg->addPt($xf1,$yf2-$yw);
+ $polybg->addPt($xf2,$yf2-$yw);
+ $polybg->addPt($xf2-$xw, $yf2);
+ $polybg->addPt($xf1-$xw, $yf2);
+ $gd->polygon($polybg,$lightgrey); # background
+ for( my $xx = $xf1+2; $xx<$xf2; $xx+=6 ) { $gd->line($xx,$yf2-$yw,$xx-$xw,$yf2,$grey); } # grid-helper
+
+ my $xshift = 0;
+ my $yshift = $nsets * $ystep;
+
+ my @screencoords = @{$signals->{screencoords}};
+ my $max_signal = 30;
+ my $koeff = 4;
+ if( my $max = $self->max_score ) {
+ $max_signal = $max;
+ $koeff = 80.0/$max_signal;
+ }
+ my $predictor_cutoff = int($max_signal*0.95); # empirically found
+ my @prevx = ();
+ my @prevy = ();
+ my @prevvals = ();
+ my $profilen = $self->{no_max} ? 1 : 0;
+ my %SPEEDUP = ();
+ my $scrx = 0;
+ my $no_fill = defined $self->option('fill') && !$self->option('fill');
+
+ foreach my $subset ( @orderedsubsets ) {
+ my ($color,$bgcolor,$edgecolor);
+ if ( $self->{bgcolor} ) {
+ $bgcolor = $self->{bgcolor}->{$subset};
+ }
+
+ if ($profilen == 0) {
+ ($color,$edgecolor) = ($darkgrey,$charcoal);
+ }
+ else {
+ $color = $bgcolor;
+ $edgecolor = $self->{fgcolor};
+ }
+
+ $xshift -= $xstep;
+ $yshift -= $ystep;
+ my @values = @{$signals->{$subset}};
+ my($xold,$yold)= ($xf1+$xshift,$yf2-$yshift+1);
+ my $xpos = 0;
+ my $poly = $poly_pkg->new();
+ $poly->addPt($xold,$yold+1);
+ my @allx = ($xold);
+ my @ally = ($yold);
+ my @allvals = (0);
+ my $runx = $xf1 + $xshift;
+
+ foreach my $val ( @values ) {
+ $scrx += $leftpad;
+ my $x = $screencoords[$xpos] + $xshift;
+ my $visval;
+ if( exists $SPEEDUP{$val} ) { $visval = $SPEEDUP{$val}; }
+ else { $visval = int($val*$koeff); $SPEEDUP{$val}= $visval; }
+ my $y = $yf2 - $yshift - $visval;
+ push(@allx,$x);
+ push(@ally,$y);
+ push(@allvals,$visval);
+ if( $xpos>0 ) {
+ $poly->addPt($x,$y+1);
+ }
+ ($xold,$yold)= ($x,$y);
+ $xpos++;
+ }
+ $poly->addPt($xf2+$xshift, $yf2-$yshift+1);
+ unless ($profilen == 0 || $no_fill) {
+ $gd->filledPolygon($poly,$color);
+ }
+ ($xold,$yold)= ($allx[0],$ally[0]);
+ for( my $en =1; $en<=$#allx; $en++ ) {
+ my $x = $allx[$en];
+ my $y = $ally[$en];
+ $gd->line($xold,$yold,$x,$y,$edgecolor);
+ ($xold,$yold)= ($x,$y);
+ }
+
+ # add scale bar to left and mid-point
+ if ($profilen == 1) {
+ my($xmin,$yyy,$ymax) = ($allx[1]-1,$yf2-$yw,$ally[0]);
+ my $xmid = int(($allx[-1] - $xmin)/2);
+ $self->add_scale_bar([$xmin,$xmid],$yyy,$max_signal,$gd);
+ $self->{_xmid} = $xmid;
+ $self->{_ymin} = $ymax;
+ }
+
+ $self->{_key}->{$subsetsnames->{$subset}} = $color;
+
+ $gd->string(GD::Font->Tiny,$xf2+$xshift+3, $yf2-$yshift-5,$subsetsnames->{$subset},$color);
+
+ unless( $profilen ==0 ) { @prevx = @allx; @prevy = @ally; @prevvals = @allvals; }
+ $profilen++;
+ }
+
+ my $hide_key = defined $self->option('show key') && !$self->option('show key');
+ $self->add_subset_key($gd,$subsets) unless $hide_key;
+
+ $gd->flipVertical() if $self->option('flip vertical');
+
+ return;
+}
+
+sub add_scale_bar {
+ my ($self,$xx,$y,$max_signal,$gd) = @_;
+ return if $self->option('flip vertical');
+ for my $x (@$xx) {
+ $gd->string(GD::Font->Tiny,$x-12, $y-3,'0',$black);
+ $gd->line($x-2,$y,$x-2,$y-50,$black);
+ $gd->line($x-4,$y-47,$x-2,$y-50,$black);
+ $gd->line($x,$y-47,$x-2,$y-50,$black);
+ $gd->line($x-4,$y-44,$x-2,$y-44,$black);
+ $gd->string(GD::Font->Tiny,$x-18, $y-47,int($max_signal+0.5),$black);
+ }
+}
+
+sub add_subset_key {
+ my ($self,$gd,$subsets) = @_;
+ my @subsets = grep {!/MAX/} @$subsets;
+ return if $self->option('flip vertical');
+ my $first_x = $self->{_xmid}-250;
+ my $first_y = 12;
+ my $key_colors = $self->{_key};
+ my $font = GD::Font->MediumBold;
+ my $width = $self->width;
+ my $total_key_width = 18;
+
+ my ($longest_string) = sort {$b <=> $a}
+ map {$self->string_width($_,$font)} @subsets;
+
+ my $count;
+ my $x = $first_x;
+ my $y = $first_y;
+
+ my $cutoff = 100;
+ if (@subsets > 8 && !(@subsets %2)) {
+ $cutoff = @subsets/2 + 1;
+ }
+ elsif (@subsets > 8) {
+ $cutoff = int(@subsets/2 + 0.5);
+ }
+
+ for my $subset (@subsets) {
+ if (++$count == $cutoff) {
+ $x = $first_x;
+ $y = $first_y + 12;
+ }
+ my $color = $key_colors->{$subset};
+ my $edgecolor = $self->{fgcolor};
+ my $string_width = $self->string_width($subset,$font);
+ $gd->rectangle($x,$y,$x+10,$y+10,$edgecolor);
+ $gd->filledRectangle($x,$y,$x+10,$y+10,$color);
+ $x += 14;
+ $gd->string($font,$x,$y,$subset,$black);
+ $x += $longest_string + 8;
+ }
+}
+
+#--------------------------
+sub getData {
+ my $self = shift;
+ my($ft,$datadir,$chromosome,$start,$stop,$scrstart,$scrstop,$flipped,$gd) = @_;
+ my $global_max_signal = $self->option('max_score') || 0;
+ my %Signals = ();
+ $self->openDataFiles($datadir);
+
+ my $subset_text = $self->option('subset order');
+ if ($subset_text) {
+ my @words = shellwords($subset_text);
+
+ # subset + color
+ if (!(@words %2) && $words[1] =~ /^[0-9A-F]{6}$/ && $words[2] !~ /^[.0-9]+$/) {
+ while (@words) {
+ push @{$ft->{subsetsorder}}, [splice(@words,0,2)];
+ }
+ }
+ # subset + color + alpha
+ elsif (!(@words %3) && $words[1] =~ /^[0-9A-F]{6}$/) {
+ while (@words) {
+ push @{$ft->{subsetsorder}}, [splice(@words,0,3)];
+ }
+ }
+ # no color specified? Random color for you. Good luck!
+ else {
+ for my $word (@words) {
+ push @{$ft->{subsetsorder}}, [$word,$self->random_color()];
+ }
+ }
+
+ }
+
+ my @subsets = (exists $ft->{'subsetsorder'}) ? @{$ft->{'subsetsorder'}} : sort split(/\t+/,$Indices{'subsets'});
+
+ my $user_max = $self->option('max_score');
+
+ # This bit of code reads in user-specified bgcolor, if provided
+ if ( ref $subsets[0] eq 'ARRAY' ) {
+ for (@subsets) {
+ next unless ref $_ eq 'ARRAY';
+ my ($subset,$color,$alpha) = @$_;
+ $alpha ||= $self->option('fill opacity') || 1.0;
+
+ if ($alpha && $alpha > 1) {
+ die "Alpha must be between zero and 1";
+ }
+
+ # make it hex if it looks like hex
+ if ((length $color == 6) && $color =~ /^[0-9A-F]+$/) {
+ $color = '#'.$color;
+ }
+ my $bgcolor = $self->factory->transparent_color($alpha,$color);
+ my $fgcolor = $self->translate_color($color);
+ $self->{bgcolor}->{$subset} = $bgcolor;
+
+ # We will re-use this array later
+ $_ = $subset;
+ }
+ }
+
+ shift(@subsets) if $subsets[0] eq 'MAX';
+ warn("subsets: @subsets\n") if DEBUG;
+
+ my %SubsetsNames = (exists $ft->{'subsetsnames'}) ? %{$ft->{'subsetsnames'}} : map { $_, $_ } @subsets;
+ $SubsetsNames{MAX}= 'MAX';
+ my $screenstep = ($scrstop-$scrstart+1) * 1.0 / ($stop-$start+1);
+ my $donecoords = 0;
+ my $local_max_signal = 0;
+
+ foreach my $subset ( @subsets ) {
+ my $nstrings = 0;
+ # scan seq ranges offsets to see where to start reading
+ my $key = $subset.':'.$chromosome;
+ my $poskey = $key.':offsets';
+ my $ranges_pos = (exists $Indices{$poskey}) ? int($Indices{$poskey}) : -1;
+ if( $ranges_pos == -1 ) { next; } # no such signal..
+ warn(" positioning for $poskey starts at $ranges_pos\n") if DEBUG;
+ if( $start>=1000000 ) {
+ my $bigstep = int($start/1000000.0);
+ if( exists $Indices{$key.':offsets:'.$bigstep} ) {
+ my $jumpval = $Indices{$key.':offsets:'.$bigstep};
+ warn(" jump in offset search to $jumpval\n") if DEBUG;
+ $ranges_pos = int($jumpval); }
+ }
+ seek(DATF,$ranges_pos,0);
+ my($offset,$offset1)= (0,0);
+ my $lastseqloc = -999999999;
+ my $useoffset = 0;
+ while( (my $strs =<DATF>) ) {
+ $nstrings++ if DEBUG;
+ if( DEBUG ) {
+ chop($strs); warn(" positioning read for coord $start ($strs)\n"); }
+ last unless $strs =~m/^(-?\d+)[ \t]+(\d+)/;
+ my($seqloc,$fileoffset)= ($1,$2);
+ if( DEBUG ) {
+ chop($strs); warn(" positioning read for $poskey => $seqloc, $fileoffset ($strs)\n"); }
+ $offset1 = $offset;
+ $offset = $fileoffset;
+ $lastseqloc = $seqloc;
+ if( $seqloc > $start ) { $useoffset = int($offset1); last; }
+ }
+ warn(" will use offset $useoffset\n") if DEBUG;
+ warn(" (scanned $nstrings offset strings)\n") if DEBUG;
+ if( $useoffset ==0 ) { # data offset cannot be 0 - means didn't find where to read required data..
+ next;
+ my @emptyvals = ();
+ for( my $ii = $scrstart; $ii++ <= $scrstop; ) { push(@emptyvals,0); }
+ $Signals{$subset}= \@emptyvals;
+ }
+ $nstrings = 0;
+ # read signal profile
+ seek(DATF,$useoffset,0);
+ $lastseqloc = -999999999;
+ my $lastsignal = 0;
+ my($scrx,$scrxold)= ($scrstart,$scrstart-1);
+ my $runmax = 0;
+ my @values = ();
+ my @xscreencoords = ();
+
+ while( (my $str =<DATF>) ) {
+ $nstrings++ if DEBUG;
+ unless( $str =~m/^(-?\d+)[ \t]+(\d+)/ ) {
+ warn(" header read: $str") if DEBUG;
+ last; # because no headers were indexed at the beginning of data packs
+ }
+ my($seqloc,$signal)= ($1,$2);
+ my $real_signal = $signal;
+ $signal = $user_max if $user_max && $signal > $user_max;
+ $local_max_signal = $signal if $signal > $local_max_signal;
+
+ warn(" signal read: $seqloc, $signal line: $str") if DEBUG;
+ last if $lastseqloc > $seqloc; # just in case, as all sits merged in one file..
+ if( $seqloc>=$start ) { # current is the next one after the one we need to start from..
+ unless( $lastseqloc == -999999999 ) { # expand previous
+ $lastseqloc = $start-2 if $lastseqloc<$start; # limit empty steps (they may start from -200000)
+ while( $lastseqloc < $seqloc ) { # until another (one we just retrieved) wiggle reading
+ last if $lastseqloc > $stop; # end of subset data
+ next if $lastseqloc++ < $start;
+ # we have actual new seq position in our required range
+ my $scrpos = int($scrx);
+ $runmax = $lastsignal if $runmax < $lastsignal;
+ if( $scrpos != $scrxold ) { # we have actual new seq _and_ screen position
+ push(@values,$runmax);
+ push(@xscreencoords,$scrpos) unless $donecoords;
+ #print STDERR Dumper \@xscreencoords unless $donecoords;
+ $scrxold = $scrpos;
+ $runmax = 0;
+ }
+ $scrx += $screenstep; # remember - it is not integer
+ }
+ }
+ }
+ ($lastseqloc,$lastsignal)= ($seqloc,$signal);
+ last if $seqloc > $stop; # end of subset data
+ }
+ if( $lastseqloc < $stop ) { # if on the end of signal profile, but still in screen range
+ # just assume that we are getting one more reading with signal == 0
+ my $signal = 0;
+ while( $lastseqloc++ < $stop ) {
+ my $scrpos = int($scrx);
+ if( $scrpos != $scrxold ) { # we have actual new seq _and_ screen position
+ push(@values,$signal);
+ push(@xscreencoords,$scrpos) unless $donecoords;
+ $scrxold = $scrpos;
+ }
+ $scrx += $screenstep;
+ }
+ }
+ warn(" (scanned $nstrings signal strings)\n") if DEBUG;
+ $nstrings = 0;
+ if( $flipped ) {
+ my @ch = reverse @values; @values = @ch;
+ }
+ warn(" ".$subset."=> ". at values." values @values\n") if DEBUG && $#values<1000;
+ $Signals{$subset}= \@values;
+ $Signals{screencoords}= \@xscreencoords unless $donecoords;
+ $donecoords = 1;
+ } # foreach my $subset ( @subsets ) {
+
+ # scaling can be local, user-defined max or global max
+ my $scale = $self->option('autoscale') || 'global';
+ if ($scale eq 'local' && !$user_max) {
+ $self->max_score($local_max_signal);
+ }
+ else {
+ $self->max_score($user_max || $Indices{max_signal});
+ }
+
+ warn(" max_signal => ".$self->max_score." \n") if DEBUG;
+
+ # prepare MAX profile - will be used as a base for exon/UTR prediction
+ $self->{no_max} = defined $self->option('show max') && ! $self->option('show max');
+ unless ($self->{no_max}) {
+ my @maxprofile = ();
+ my @ruler = @{$Signals{screencoords}};
+ for( my $npos = 0; $npos<=$#ruler; $npos++ ) {
+ my $maxval = 0;
+ foreach my $subset ( @subsets ) {
+ my $p = $Signals{$subset};
+ my $val = $p->[$npos];
+ $maxval = $val if $maxval < $val;
+ }
+ push(@maxprofile,$maxval);
+ }
+ $Signals{MAX}= \@maxprofile;
+ warn(" MAX => ". at maxprofile." values @maxprofile\n") if DEBUG && $#maxprofile<1000;
+ unshift(@subsets,'MAX');
+ }
+
+ return(\@subsets,\%SubsetsNames, \%Signals);
+}
+
+#--------------------------
+sub openDataFiles {
+ my $self = shift;
+ my $datadir = shift;
+ $datadir.= '/' unless $datadir =~m|/$|;
+ my $datafile = $datadir.'data.cat';
+ open(DATF,$datafile) || die("cannot open $datafile\n");
+ use BerkeleyDB; # caller should already used proper 'use lib' command with path
+ my $bdbfile = $datadir . 'index.bdbhash';
+ tie %Indices, "BerkeleyDB::Hash", -Filename => $bdbfile, -Flags => DB_RDONLY || warn("can't read BDBHash $bdbfile\n");
+ if( DEBUG ) { foreach my $kk ( sort keys %Indices ) { warn(" $kk => ".$Indices{$kk}."\n"); } }
+ return;
+}
+
+sub min_score {
+ # not implemented
+}
+
+sub max_score {
+ my $self = shift;
+ my $score = shift;
+ $self->{max_score} ||= $score;
+ return $self->{max_score};
+}
+
+sub random_color {
+ my $self = shift;
+ my @nums = 0..9,'A'..'F';
+ my $color;
+ for (0..5) {
+ my @array = shuffle(@nums);
+ my $char = shift @array;
+ $color .= $char;
+ }
+ return $color;
+}
+
+
+1;
+
+
diff --git a/scripts/bam_coverage_windows.pl b/scripts/bam_coverage_windows.pl
new file mode 100755
index 0000000..f593df3
--- /dev/null
+++ b/scripts/bam_coverage_windows.pl
@@ -0,0 +1,131 @@
+#!/usr/bin/perl -w
+use strict;
+use List::Util 'sum';
+use Getopt::Long;
+
+use vars qw/$start $end $current_chr @scores %seen %highest $win $normal $bam/;
+
+use constant WIN => 25;
+
+# A script to make bam coverage windows in WIG/BED 4 format
+# requires that samtools be installed
+# This script operates on one bam file at a time. If you are comparing
+# across bam files of diffenrent sizes (read numbers), take note
+# of the normalization option.
+# Sheldon McKay (sheldon.mckay at gmail.com)
+
+
+BEGIN {
+ die "samtools must be installed!" unless `which samtools`;
+}
+
+
+GetOptions ("normalize=i" => \$normal,
+ "bam=s" => \$bam,
+ "window=i" => \$win);
+
+$bam or usage();
+
+open BAM, "samtools depth $bam |";
+
+$win ||= WIN;
+$start = 1;
+$end = $win;
+
+my $factor = normalization_factor($normal);
+
+chomp(my $name = `basename $bam .bam`);
+print qq(track type=wiggle_0 name="$name" description="read coverage for $bam (window size $win)"\n);
+
+while (<BAM>) {
+ chomp;
+ my ($chr,$coord,$score) = split;
+ $current_chr ||= $chr;
+
+ check_sorted($chr,$coord);
+
+ if ( $chr ne $current_chr ||
+ $coord > $end ) {
+ open_window($chr,$coord);
+ }
+
+ push @scores, $score;
+ $current_chr = $chr;
+}
+
+
+sub open_window {
+ my ($chr,$coord) = @_;
+
+ if ($chr ne $current_chr) {
+ $seen{$current_chr}++;
+ }
+
+ # close the last window, if needed
+ if (@scores > 0) {
+ close_window();
+ }
+
+ $start = nearest_start($coord);
+ $end = $start + $win;
+}
+
+sub close_window {
+ my $sum = sum(@scores) or return 0;
+ my $score = $sum/$win;
+ $score *= $factor;
+ print join("\t",$current_chr,$start,$end,$score), "\n";
+ @scores = ();
+ exit 0 unless $score;
+}
+
+sub nearest_start {
+ my $start = shift;
+
+ return 1 if $start < $win;
+
+ while ($start % $win) {
+ $start--;
+ }
+
+ return $start;
+}
+
+sub normalization_factor {
+ return 1 unless my $nahm = shift;
+ print STDERR "Calculating total number of reads in $bam\n";
+ chomp(my $total = `samtools view -c $bam`);
+ print STDERR "$bam has $total reads\n";
+ return $total/$nahm;
+}
+
+# sanity check for unsorted bam
+sub check_sorted {
+ my ($chr,$coord) = @_;
+
+ return 1 if $coord > ($highest{$chr} || 0);
+ $highest{$chr} = $coord;
+
+ return 1 unless $seen{$chr};
+
+ die_unsorted($chr,$coord);
+}
+
+sub die_unsorted {
+ my ($chr,$coord) = @_;
+ die "$chr $coord: $bam does not appear to be sorted\n",
+ "Please try 'samtools sort' first";
+}
+
+sub usage {
+ die '
+Usage: perl bam_coverage_windows.pl -b bamfile -n 10_000_000 -w 25
+ -b name of bam file to read REQUIRED
+ -w window size (default 25)
+ -n normalized read number -- if you will be comparing multiple bam files
+ select the read number to normalize against.
+ all counts will be adjusted by a factor of:
+ actual readnum/normalized read num
+
+'
+}
diff --git a/scripts/coverage_to_topoview.pl b/scripts/coverage_to_topoview.pl
new file mode 100755
index 0000000..cc432d6
--- /dev/null
+++ b/scripts/coverage_to_topoview.pl
@@ -0,0 +1,247 @@
+#!/usr/bin/perl -w
+use strict;
+use BerkeleyDB;
+use Data::Dumper;
+use Getopt::Long;
+
+# Based on http:/flybase.org/static_pages/docs/software/index_cov_files.pl
+# This script will process the output of bam_coverage_windows.pl
+# to the data structure needed by the topoview glyph
+# Sheldon McKay (sheldon.mckay at gmail.com)
+
+my ($log,$outdir,$help);
+GetOptions (
+ "log" => \$log,
+ "output-dir=s" => \$outdir,
+ "help" => \$help
+);
+
+my $usage = q(
+Usage: perl coverage_to_topoview.pl [-o output_dir] [-h] [-l] file1.wig.gz file2.wig.gz ...
+ -o output directory (default 'topoview')
+ -l use log2 for read counts (recommended)
+ -h this help message
+);
+
+die $usage if !@ARGV || $help;
+
+$outdir ||= 'topoview';
+
+my (%bdb_hash,$max_signal, at SubsetNames);
+
+system "mkdir -p $outdir";
+
+my $outfile = "$outdir/data.cat";
+unlink $outfile if -e $outfile;
+
+open( COV, '>' . $outfile ) || die "Cannot open $outfile!";
+my $hashfile = "$outdir/index.bdbhash";
+unlink $hashfile if -e $hashfile;
+
+%bdb_hash = ();
+tie(%bdb_hash, "BerkeleyDB::Hash",
+ -Filename => $hashfile,
+ -Flags => DB_CREATE);
+
+$max_signal = 0;
+ at SubsetNames = ();
+
+for my $file ( sort @ARGV ) {
+ next unless is_bed4($file);
+ indexCoverageFile($file);
+}
+
+$bdb_hash{'subsets'} = join( "\t", @SubsetNames );
+$bdb_hash{'max_signal'} = $max_signal;
+
+my @all_keys = keys %bdb_hash;
+
+for my $kkey ( sort @all_keys ) {
+ print "\t$kkey => " . $bdb_hash{$kkey} . "\n";
+}
+
+if ( $max_signal > 10000 ) {
+ warn "WARNING: max_signal=$max_signal - TOO HIGH. Consider log2?\n";
+}
+
+untie %bdb_hash;
+chmod( 0666, $hashfile ); # ! sometimes very important
+
+close COV;
+
+
+sub is_bed4 {
+ my $file = shift;
+ my $cat = $file =~ /gz$/ ? 'zcat'
+ : $file =~ /bz2/ ? 'bzcat'
+ : 'cat';
+ open WIG, "$cat $file |" or die "could not open $file: $!";
+
+ my $idx;
+ while (<WIG>) {
+ next if /^track/;
+ last if ++$idx > 9;
+
+ my ($ref,$start,$end,$score, at other) = split "\t";
+ if (@other > 0) {
+ die "Extra fields, I was expecting BED4";
+ }
+ unless ($ref && $start && $end && $score) {
+ die "Not enough fields, I was expecting BED4";
+ }
+ unless (is_numeric($start) && is_numeric($end)) {
+ die "start ($start) and end ($end) are supposed to be numbers";
+ }
+ unless (is_numeric($score)) {
+ die "score ($score) is not numeric"
+ }
+ }
+
+ return 1;
+}
+
+sub is_numeric {
+ no warnings;
+ return defined eval { $_[ 0] == 0 };
+}
+
+sub indexCoverageFile {
+ my $file = shift;
+ my $zcat = get_zcat($file);
+
+ open( INF, "$zcat $file |" ) || die "Can't open $file";
+
+ chomp(my $SubsetName = `basename $file .wig.gz`);
+
+ print STDERR "Subset=$SubsetName\n";
+
+ push( @SubsetNames, $SubsetName );
+
+ my $old_ref = "";
+ my @offsets = ();
+
+ my $step = 1000;
+ my $coordstep = 5000;
+ my $counter = 0;
+ my $offset = tell(COV);
+
+ $bdb_hash{$SubsetName} = $offset; # record offset where new subset data starts
+
+ my $old_signal = 0;
+ my $old_coord = -200000;
+ my $start = 0;
+ my $lastRecordedCoord = -200000;
+
+ my @signals;
+ while (<INF>) {
+ $offset = tell(COV);
+
+ next if /^$/ || /^\#/;
+ my ($ref,$start,$end,$signal) = split;
+
+ $signal = log($signal)/log(2) if $log;
+
+ $start += 1; # zero-based, half-open coords in BED
+
+ $signal = 0 if $signal < 0;
+
+ # New chromosome
+ if ( $ref ne $old_ref ) {
+ print STDERR "chromosome = $ref\n";
+ dumpOffsets( $start, $SubsetName . ':' . $old_ref, @offsets )
+ unless $old_ref eq ""; # previous subset:arm
+ $old_ref = $ref;
+ print COV "# subset=$SubsetName chromosome=$old_ref\n";
+ $offset = tell(COV);
+ $bdb_hash{ $SubsetName . ':' . $old_ref } =
+ $offset; # record offset where new subset:arm data starts
+ @offsets = ("-200000\t$offset");
+ print COV "-200000\t0\n"; # insert one fictive zero read
+ $offset = tell(COV);
+ print COV "0\t0\n"; # insert one more fictive zero read
+ push( @offsets, "0\t$offset" );
+ $counter = 0;
+ $old_signal = 0;
+ $old_coord = 0;
+ $lastRecordedCoord = 0;
+ }
+
+
+ # fill in holes in coverage with 0
+ if ($start > $old_coord+1 && $old_signal > 0) {
+ print COV join("\t",++$old_coord,0), "\n";
+ $old_coord++;
+ $counter++;
+ $offset = tell(COV);
+ $old_signal = 0;
+ }
+
+ if ( $signal == $old_signal) {
+ $old_coord = $end;
+ next;
+ }
+
+ $max_signal = $signal if $max_signal < $signal;
+ if ( $counter++ > $step
+ || $start - $lastRecordedCoord > $coordstep )
+ {
+ push( @offsets, "$start\t$offset" );
+ $counter = 0;
+ $lastRecordedCoord = $start;
+ }
+
+ $old_coord = $end;
+ $old_signal = $signal;
+
+ print COV join("\t",$start,$signal), "\n";
+ }
+
+ # don't forget to dump offsets data on file end..
+ dumpOffsets( $start,$SubsetName . ':' . $old_ref, @offsets )
+ unless $old_ref eq ""; # previous subset:arm
+ close(INF);
+ return;
+}
+
+sub dumpOffsets {
+ my ( $start, $key, @offsetlines ) = @_;
+ print COV "# offsets for $key\n";
+ my $offset = tell(COV);
+ my $prevoffset = $offset;
+ $bdb_hash{ $key . ':offsets' } = $offset
+ ; # record offset where offsets VALUES for subset:arm data start (skip header)
+ my $oldbigstep = 0;
+ foreach my $str (@offsetlines) {
+ print COV $str . "\n";
+ my ( $start, $floffset ) = split( /[ \t]+/, $str );
+
+ # following wasn't working properly..
+ my $newbigstep = int( $start / 1000000.0 );
+ if ( $newbigstep > $oldbigstep ) {
+ $bdb_hash{ $key . ':offsets:' . $newbigstep } =
+ $prevoffset; # one before is the right start
+ $oldbigstep = $newbigstep;
+ }
+ $prevoffset = $offset;
+ $offset = tell(COV);
+ }
+ return;
+}
+
+#***********************************************************
+#
+#***********************************************************
+
+sub get_zcat {
+ my $fullfile = shift;
+ if ( $fullfile =~ /\.gz$/i ) {
+ my $zcat = `which zcat`;
+ if ( $? != 0 ) { $zcat = `which gzcat`; }
+ chomp($zcat);
+ return ($zcat);
+ }
+ elsif ( $fullfile =~ /\.bz2$/i ) { return ('bzcat'); }
+ return ('/bin/cat');
+}
+
+#*******
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libbio-graphics-perl.git
More information about the debian-med-commit
mailing list