[med-svn] [Git][med-team/mcl][master] 2 commits: Re-vendor mcxdeblast back

Nilesh Patra (@nilesh) gitlab at salsa.debian.org
Wed Nov 16 12:31:53 GMT 2022



Nilesh Patra pushed to branch master at Debian Med / mcl


Commits:
fc7cd979 by Nilesh Patra at 2022-11-16T17:56:11+05:30
Re-vendor mcxdeblast back

- - - - -
ae5328e8 by Nilesh Patra at 2022-11-16T17:56:11+05:30
Upload to unstable

- - - - -


4 changed files:

- debian/changelog
- debian/copyright
- debian/mcl.install
- + debian/mcxdeblast


Changes:

=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+mcl (1:22-282+ds-2) unstable; urgency=medium
+
+  * Team Upload.
+  * Re-vendor mcxdeblast back (Closes: #1024170)
+
+ -- Nilesh Patra <nilesh at debian.org>  Wed, 16 Nov 2022 17:51:43 +0530
+
 mcl (1:22-282+ds-1) unstable; urgency=medium
 
   * New upstream version


=====================================
debian/copyright
=====================================
@@ -40,22 +40,17 @@ Comment: Citation
  An efficient algorithm for large-scale detection of protein families,
  Nucleic Acids Research 30(7):1575-1584 (2002).
 
-Files: src/alien/oxygen/src/mcxdeblast
-Copyright: © 2002-2009 Stijn van Dongen,
-           © 2004 Jason Stajich
-License: GPL-3+
-
-Files: util/rand.*
-Copyright: © 2004-2009 Stijn van Dongen,
-           © 2005 Jochen Voss
-License: GPL-3+
-
 Files: debian/*
 Copyright: © 2001-2007 Joost van Baal <joostvb at debian.org>,
            © 2007-2013 Philipp Benner <philipp at debian.org>,
            © 2014-2017 Andreas Tille <tille at debian.org>
 License: GPL-3+
 
+Files: debian/mcxdeblast
+Copyright: © 2002-2009 Stijn van Dongen,
+           © 2004 Jason Stajich
+License: GPL-3+
+
 License: GPL-3+
  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by


=====================================
debian/mcl.install
=====================================
@@ -1,3 +1,4 @@
 usr/bin
-scripts/clx*	usr/share/doc/mcl/scripts
-scripts/minimcl	usr/share/doc/mcl/scripts
+debian/mcxdeblast	usr/bin
+scripts/clx*		usr/share/doc/mcl/scripts
+scripts/minimcl		usr/share/doc/mcl/scripts


=====================================
debian/mcxdeblast
=====================================
@@ -0,0 +1,489 @@
+#!/usr/bin/perl -w
+
+use Getopt::Long;
+
+# (c) Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Stijn van Dongen
+# (c) Copyright 2004 Jason Stajich (column output parsing)
+#
+# Other (C)ontributors;
+#     Dinakarpandian Deendayal      (general comments)
+#     Abel Ureta-Vidal              (general comments)
+#     Daniel Lundin                 (regexp tweak, warning)
+#
+# You can redistribute and/or modify this program under the terms of the GNU
+# General Public License;  either version 3 of the License or (at your option)
+# any later version.
+
+#
+# TODO
+#
+#  -  Optionally read from STDIN
+#  -  Require tab file for line-mode={123,packed}
+#
+#  -  Infer hsp_len from long output (?)
+#  -  (Perhaps) enable section parsing ala tribe, for finer control.   
+#  -  $:: usage is arbitrary and ugly. pipe down.
+#  -  Is the m9/seenlfgt combination correct?
+#  -  Check whether m9 output state is correct if errors occur.
+
+
+$^W = 1;
+use strict;
+
+$::mode_sort   = 'o';      # a lphabetical
+                           # o ccurrence
+
+$::mode_score  = 'e';      # e value
+                           # b it
+
+my $line_mode  =  "";      # default is raw mode, not line mode
+$::name_out    =  '-';
+
+$::bcut = 0;
+$::ecut = 0;
+$::rcut = 0;
+
+my $blastfix = "";
+my $addfix = "";
+
+my $user_tabfile = "";
+
+my %line_modes = qw ( abc 1 123 1);
+
+my $obase = "";
+my $stdhandler = 0;
+
+my $help = 0;
+my $m9 = 0;
+my $abc = 0;
+
+
+
+if
+(! GetOptions
+   (  "help"            =>    \$help
+   ,  "apropos"         =>    \$help
+   ,  "sort=s"          =>    \$::mode_sort
+   ,  "ecut=f"          =>    \$::ecut
+   ,  "bcut=f"          =>    \$::bcut
+   ,  "rcut=f"          =>    \$::rcut
+   ,  "score=s"         =>    \$::mode_score
+   ,  "m9"              =>    \$m9
+   ,  "tab=s"           =>    \$user_tabfile
+   ,  "xo-dat=s"        =>    \$addfix
+   ,  "xi-dat=s"        =>    \$blastfix
+   ,  "base=s"          =>    \$obase
+   ,  "stdhandler"      =>    \$stdhandler
+   ,  "line-mode=s"     =>    \$line_mode
+   ,  "out=s"           =>    \$::name_out
+   )
+)
+   {  print STDERR "option processing failed\n";
+      exit(1);
+   }
+
+help() && exit(0) if $help;
+
+die "unknown sort mode <$::mode_sort>"
+   unless $::mode_sort =~ /^[ao]$/;
+die "unknown line mode <$line_mode>"
+   if ($line_mode && !defined($line_modes{$line_mode}));
+die "unknown sort mode <$::mode_score>"
+   unless $::mode_score =~ /^[ebr]$/;
+   
+
+my $fname = shift || die "please submit name of blast file\n";
+$obase = $fname unless $obase;
+
+
+if ($blastfix) {
+   $obase = $fname;
+   if ($fname =~ /\Q.$blastfix\E$/) {
+      $obase =~ s/\Q.$blastfix\E$//;
+   }
+   else {
+      $fname .= ".$blastfix";
+   }
+}
+if ($addfix) {
+   $obase .=  ".$addfix";
+}
+
+my ($gix, $giy);
+$::seenlft = {};
+$::seenrgt = {};
+my $tagTocc = {};
+my $me = "[$0] ";
+my $lc = 0;
+my $fhin = \*STDIN;
+
+if ($fname ne '-') {
+   open(F_BLAST, "<$fname") || die "cannot open $fname\n";
+   $fhin = \*F_BLAST;
+}
+
+$::TAB_user = {};
+
+if ($user_tabfile) {
+   read_tab($user_tabfile, $::TAB_user);
+}
+
+
+$::f_raw = undef;
+my $f_err = undef;
+
+if ($line_mode) {
+   if (!$::name_out || $::name_out eq '-') {
+      $::f_raw = \*STDOUT;
+   }
+   else {
+      open(F_RAW, ">$::name_out") || die "cannot open $::name_out";
+      $::f_raw = \*F_RAW;
+   }
+   $f_err = \*STDERR;
+
+   if ($line_mode eq 'abc') {
+      # print $::f_raw "#aa# (stream abc cookie)\n";
+   }
+   elsif ($line_mode eq '123') {
+      # print $::f_raw "#11# (stream abc cookie)\n";
+   }
+}
+else {
+   if ($stdhandler) {
+      $::f_raw = \*STDOUT;
+      $f_err = \*STDERR;
+   }
+   else {
+      open(F_RAW, ">$obase.raw") || die "cannot open $obase.raw\n";
+      $::f_raw = \*F_RAW;
+      open(F_ERR, ">$obase.err") || die "cannot open $obase.err\n";
+      $f_err = \*F_ERR;
+   }
+}
+
+$::ID = 0;
+
+if ($m9) {
+   munge_linewise($fhin, $::f_raw, $line_mode);
+}
+else {
+   munge_long($fhin, $::f_raw, $line_mode);
+}
+
+close $::f_raw;
+$::CT = scalar keys %$::seenrgt;
+
+
+my $alnum = 0;
+my $occTmisc = {};
+
+if (!$user_tabfile && !$line_mode) {
+   open(F_HDR, ">$obase.hdr") || die "cannot open $obase.hdr\n";    # header
+   open(F_MAP, ">$obase.map") || die "cannot open $obase.idx\n";    # map file
+   open(F_TAB, ">$obase.tab") || die "cannot open $obase.idx\n";    # indices
+   print F_TAB "#<mapped index> <tag>\n";
+   if ($::mode_sort eq 'o') {
+      print F_TAB "# sort mode is by occurrence\n";
+   }
+   elsif ($::mode_sort eq 'a') {
+      print F_TAB "# sort mode is alphabetical\n";
+   }
+   if ($::mode_sort eq 'a') {
+      for (sort {$::a cmp $::b; } keys %$tagTocc) {
+         print F_TAB "$alnum $_\n";
+         $occTmisc->{$tagTocc->{$_}} = [ $alnum, $_ ];
+         $alnum++;
+      }
+      print STDERR "Index [$obase.tab] is sorted by alphabetic order\n";
+   }
+   elsif ($::mode_sort eq 'o' || 1) {
+      for (sort {$tagTocc->{$::a} <=> $tagTocc->{$::b}; } keys %$tagTocc) {
+         print F_TAB "$alnum $_\n";
+         $occTmisc->{$tagTocc->{$_}} = [ $alnum, $_ ];
+         $alnum++;
+      }
+      print STDERR "Index [$obase.tab] is sorted by occurrence order\n";
+      print STDERR "Primary and secondary occurrences are considered equal\n";
+   }
+   my $ct = keys %$occTmisc;
+   print F_MAP "(mclheader\nmcltype matrix\ndimensions $ct", 'x',
+               "$ct\n)\n(mclmatrix\nbegin\n";
+   for (sort {$::a <=> $::b; } keys %$occTmisc) {
+      # print F_MAP "$_ ", $occTmisc->{$_}[0], " ", $occTmisc->{$_}[1], "\n";
+      print F_MAP  "$_ $occTmisc->{$_}[0] \$\n";
+   }
+   print F_MAP ")\n";
+
+   print F_HDR "(mclheader\nmcltype matrix\ndimensions ";
+   print F_HDR $::ID . 'x' . $::ID;
+   print F_HDR "\n)\n";
+   close F_TAB;
+   close F_HDR;
+   close F_MAP;
+}
+
+
+my $n_err = 0;
+for (sort keys %$::seenrgt) {
+   if (!$::seenlft->{$_}) {
+      print $f_err "secondary element $_ not seen as primary element\n";
+      print $f_err "emergency measure: added the element to the primary list\n";
+      $n_err++;
+   }
+}
+
+if ($n_err) {
+   print STDERR $me, "$n_err secondary elements not seen as primary element\n";
+   print STDERR $me, "I added all of them\n";
+   print STDERR $me, "There were $::CT elements in all\n";
+}
+else {
+   print STDERR $me,
+   "all secondary elements were also seen as primary elements (check ok)\n";
+}
+
+
+sub munge_linewise {
+
+   my ($fh_in, $fh_raw, $line_mode) = @_;
+
+   my $gix_prev = "";
+
+   while (<$fh_in>) {
+
+      next if /^#/;
+      chomp;
+      my $sc_abc = 0;
+
+      my ($gix, $giy, $percent_id, $hsp_len, $mismatches, $gapsm,
+          $qstart, $qend,$hstart, $hend, $e, $b)
+          =
+          split;
+
+      my $s = 0;
+
+      my $idx = getid($gix, 1);
+      next unless $idx >= 0;
+
+      if ($gix_prev ne $gix) {
+          if (!$line_mode) {
+             if ($gix_prev) {
+               print $fh_raw "\$\n";
+             }
+             if( $idx >= 0 ) {
+               print $fh_raw "$idx ";
+             }
+         }
+      }
+      my $idy = getid($giy, 0);
+
+      $s = getscore($e, $b, $hsp_len);
+
+      if ($idy >= 0) {
+          if (!$line_mode) {
+             print $fh_raw "$idy:$s ";
+          }
+          else {
+             print $fh_raw "$gix\t$giy\t$s\n";
+          }
+      }
+      $gix_prev = $gix;
+   }
+
+   if ($gix_prev) {
+      print $fh_raw "\$\n" if !$line_mode;
+   }
+}
+
+
+sub munge_long {
+
+   my ($fh_in, $fh_raw, $line_mode) = @_;
+
+   my $need_query = 1;
+   my $need_hits = 2;
+   my $need_gi = 3;
+
+   my $state = $need_query;
+
+   while (<$fh_in>) {
+      $lc++;
+      chomp;
+      if (/^Query=\s+gi\|(\d+(_\d+)?)/ || /^Query=\s+(\S+).*$/) {
+# warn "STATE query $1\n";
+         if ($state != $need_query) {
+            print STDERR "Unexpected 'Query=' line\n";
+         }
+         $gix = $1;
+         my $idx = getid($gix, 1);
+
+         if ($idx >= 0) {
+            print $fh_raw "$idx " if !$line_mode;
+         }
+         $state = $need_hits;
+      }
+      elsif (/^Query=/) {
+         print STDERR "Query string not recognized: $_\n";
+      }
+      elsif
+      ($state == $need_hits && /sequences producing significant alignments/i) {
+         $state = $need_gi;
+# warn "STATE significant\n";
+      }
+      elsif ($state == $need_hits && /no hits found/i) {
+         print STDERR "no hits found for gi $gix\n";
+         print $fh_raw "\$\n" if !$line_mode;
+         $state = $need_query;
+      }
+      elsif
+      (  $state == $need_gi
+      && ! /^>/
+      && (/^gi\|(\d+(_\d+)?)/ || /^(\S+)\s+.*$/)
+      )
+      {  $giy = $1;
+         my $idy = getid($giy, 0);
+         my ($s, $b, $e);
+
+         if (/(\S+)\s+(\S+)\s*$/) {
+            $b = +$1;
+            $e = +$2;
+         }
+         else {
+            print STDERR "no scores in line $lc [$_]!\n";
+            next;
+         }
+
+         $s = getscore($e, $b, 0);
+
+         if ($idy >= 0) {               # fixme, void or explain.
+            if (!$line_mode) {
+               print $fh_raw "$idy:$s ";
+            }
+            else {
+               print $fh_raw "$giy\t$gix\t$s\n";
+            }
+         }
+      }
+      elsif (/^\s*$/) {
+         # paragraph skip does not change state, including the $need_gi case.
+      }
+      elsif (/(Statistics|Parameters):/) {
+         $state = $need_query;
+         # this provides WU-blast compatibility.
+      }
+      elsif ($state == $need_gi) {
+         print $fh_raw "\$\n" if !$line_mode;
+         $state = $need_query;
+      }
+   }
+
+   if ($state == $need_gi) {
+      print $f_err "run ended while expecting more secondary scores\n";
+      print STDERR "run ended while expecting more secondary scores\n";
+      print $fh_raw "\$\n" if !$line_mode;
+   }
+}
+
+
+
+
+sub read_tab {
+   my $file = shift;
+   my $tab = shift;
+   open (U_TAB, "<$file") || die "cannot open $file\n";
+   while (<U_TAB>) {
+      if (/^\s*#/) {
+         next;
+      }
+      else {
+         if (/^(\d+)\s+(.*)/) {
+            $tab->{$2} = $1;
+         }
+         else {
+            print STDERR "___ cannot parse line: $_";
+         }
+      }
+   }
+}
+
+
+sub getscore {
+   my ($e, $b, $hl) = @_;
+   my $s = 0;
+
+   if ($::mode_score eq 'e') {
+      $e = "1$e" if $e =~ /^e/;
+      $s = $e > 0 ? -log($e)/log(10) : 200;
+      if ($s > 200) {
+         $s = 200;
+      }
+      $s = $s > $::ecut ? $s : 0;
+   }
+   elsif ($::mode_score eq 'b') {
+      $s = $b > $::bcut ? $b : 0;
+   }
+   elsif ($::mode_score eq 'r' && $hl) {
+      $s = $b / $hl;
+      $s = $s > $::rcut ? $s : 0;
+   }
+   return $s;
+}
+
+
+sub getid {
+   my ($gi, $is_a_query) = @_;
+   my $id = -1;
+
+   if ($user_tabfile) {
+      if (defined($::TAB_user->{$gi})) {
+         $id = $::TAB_user->{$gi};
+      }
+      else {
+         print STDERR "___ no user tab entry for label <$gi>\n";
+         return -1;
+      }
+   }
+   else {
+      if (!exists($tagTocc->{$gi})) {
+# warn "$is_a_query $gi <-> $::ID\n";
+         $tagTocc->{$gi} = $::ID++;
+      }
+      $id = $tagTocc->{$gi};
+   }
+
+   $::seenrgt->{$id}++;
+   if ($is_a_query) {
+      $::seenlft->{$id}++;
+   }
+   return $id;
+}
+
+
+sub help {
+   print <<_help_;
+Usage: mcxdeblast <options> file-name
+where file-name is in BLAST NCBI format.
+mcxdeblast will create
+   base.hdr    [to be read by mcxassemble]
+   base.raw    [to be read by mcxassemble]
+   base.map    [to be read by mcxassemble]
+   base.tab    [to be read by clmformat]
+   base.err    [error log]
+where base is derived from or equal to file-name
+Options:
+   --m9              Expect column (-m 9) input.
+   --line-mode=abc   Output simple ID1 ID2 SCORE format.
+   --score=<b|e|r>   Use bit scores, E-values,
+                        or bit scores normalized by hsp-length
+   --sort=<a|o>      Use alphabetic sorting (default) or occurrence.
+   --tab=<fname>     Use user-supplied tab file.
+   --xi-dat=<suf>    Strip <suf> from file-name to create output base name.
+   --xo-dat=<suf>    Add <suf> to base name.
+   --bcut=<val>      Ignore hits below bit score <val>
+   --ecut=<val>      Ignore hits below E-value <val>
+   --rcut=<val>      Ignore hits below raw value <val>
+   --out=<fname>     Output file name ('-' for STDOUT)
+_help_
+}
+



View it on GitLab: https://salsa.debian.org/med-team/mcl/-/compare/09e39c4da245e588aaa2b17c03b64bfff9a904c1...ae5328e84c9476a5c3bf6e85da4f290d041ace8c

-- 
View it on GitLab: https://salsa.debian.org/med-team/mcl/-/compare/09e39c4da245e588aaa2b17c03b64bfff9a904c1...ae5328e84c9476a5c3bf6e85da4f290d041ace8c
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20221116/6a0bc76a/attachment-0001.htm>


More information about the debian-med-commit mailing list