[med-svn] r12427 - in trunk/packages/hmmer/branches: . lkajan/debian lkajan/debian/patches
Laszlo Kajan
lkajan-guest at alioth.debian.org
Mon Oct 15 13:44:02 UTC 2012
Author: lkajan-guest
Date: 2012-10-15 13:44:02 +0000 (Mon, 15 Oct 2012)
New Revision: 12427
Added:
trunk/packages/hmmer/branches/lkajan/debian/README.source
trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_chkhmmstop
trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_restartfromhmm
trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_stop_restart_test
Modified:
trunk/packages/hmmer/branches/
trunk/packages/hmmer/branches/lkajan/debian/changelog
trunk/packages/hmmer/branches/lkajan/debian/patches/series
Log:
lkajan branch for features restart from HMM model and skip last search
Property changes on: trunk/packages/hmmer/branches
___________________________________________________________________
Added: svn:ignore
+ tarballs
build-area
Added: trunk/packages/hmmer/branches/lkajan/debian/README.source
===================================================================
--- trunk/packages/hmmer/branches/lkajan/debian/README.source (rev 0)
+++ trunk/packages/hmmer/branches/lkajan/debian/README.source 2012-10-15 13:44:02 UTC (rev 12427)
@@ -0,0 +1,7 @@
+hmmer for Debian
+================
+This branch was created to add --chkhmmskip and --hmmprime options. For justification see [1] and [2].
+
+[1] http://lists.alioth.debian.org/pipermail/debian-med-packaging/2012-October/017512.html
+[2] http://lists.alioth.debian.org/pipermail/debian-med-packaging/2012-October/017510.html
+
Modified: trunk/packages/hmmer/branches/lkajan/debian/changelog
===================================================================
--- trunk/packages/hmmer/branches/lkajan/debian/changelog 2012-10-15 13:19:24 UTC (rev 12426)
+++ trunk/packages/hmmer/branches/lkajan/debian/changelog 2012-10-15 13:44:02 UTC (rev 12427)
@@ -1,3 +1,11 @@
+hmmer (3.0-4lkajan1) UNRELEASED; urgency=low
+
+ * Local package.
+ * Added --chkhmmskip and --hmmprime options. This is a branch, for
+ justification see d/README.source.
+
+ -- Laszlo Kajan <lkajan at rostlab.org> Mon, 15 Oct 2012 12:53:17 +0200
+
hmmer (3.0-4) unstable; urgency=low
[ Charles Plessy ]
Added: trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_chkhmmstop
===================================================================
--- trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_chkhmmstop (rev 0)
+++ trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_chkhmmstop 2012-10-15 13:44:02 UTC (rev 12427)
@@ -0,0 +1,74 @@
+Author: Laszlo Kajan <lkajan at rostlab.org>
+Description: --chkhmmskip option
+Forwarded: http://lists.alioth.debian.org/pipermail/debian-med-packaging/2012-October/017501.html
+--- a/src/jackhmmer.c
++++ b/src/jackhmmer.c
+@@ -68,6 +68,7 @@
+ { "--tblout", eslARG_OUTFILE, NULL, NULL, NULL, NULL, NULL, NULL, "save parseable table of per-sequence hits to file <s>", 2 },
+ { "--domtblout", eslARG_OUTFILE, NULL, NULL, NULL, NULL, NULL, NULL, "save parseable table of per-domain hits to file <s>", 2 },
+ { "--chkhmm", eslARG_OUTFILE, NULL, NULL, NULL, NULL, NULL, NULL, "save HMM checkpoints to files <s>-<iteration>.hmm", 2 },
++ { "--chkhmmskip", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "skip search after saving HMM checkpoint for last iteration", 2 },
+ { "--chkali", eslARG_OUTFILE, NULL, NULL, NULL, NULL, NULL, NULL, "save alignment checkpoints to files <s>-<iteration>.sto", 2 },
+ { "--acc", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "prefer accessions over names in output", 2 },
+ { "--noali", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "don't output alignments, so output is smaller", 2 },
+@@ -260,6 +261,7 @@
+ if (esl_opt_IsUsed(go, "--tblout")) fprintf(ofp, "# per-seq hits tabular output: %s\n", esl_opt_GetString(go, "--tblout"));
+ if (esl_opt_IsUsed(go, "--domtblout")) fprintf(ofp, "# per-dom hits tabular output: %s\n", esl_opt_GetString(go, "--domtblout"));
+ if (esl_opt_IsUsed(go, "--chkhmm")) fprintf(ofp, "# HMM checkpoint files output: %s-<i>.hmm\n", esl_opt_GetString(go, "--chkhmm"));
++ if (esl_opt_IsUsed(go, "--chkhmmskip"))fprintf(ofp, "# skipped search after saving HMM checkpoint for last iteration\n");
+ if (esl_opt_IsUsed(go, "--chkali")) fprintf(ofp, "# MSA checkpoint files output: %s-<i>.sto\n", esl_opt_GetString(go, "--chkali"));
+ if (esl_opt_IsUsed(go, "--acc")) fprintf(ofp, "# prefer accessions over names: yes\n");
+ if (esl_opt_IsUsed(go, "--noali")) fprintf(ofp, "# show alignments in output: no\n");
+@@ -599,6 +601,10 @@
+ #endif
+ }
+
++ // lkajan: chkhmmskip
++ if( !( esl_opt_GetBoolean(go, "--chkhmmskip") == TRUE && esl_opt_IsOn(go, "--chkhmm") && iteration == maxiterations ) )
++ {
++
+ #ifdef HMMER_THREADS
+ if (ncpus > 0) sstatus = thread_loop(threadObj, queue, dbfp);
+ else sstatus = serial_loop(info, dbfp);
+@@ -619,6 +625,8 @@
+ sstatus, dbfp->filename);
+ }
+
++ } // lkajan: chkhmmskip
++
+ /* merge the results of the search results */
+ for (i = 1; i < infocnt; ++i)
+ {
+--- a/documentation/man/jackhmmer.man
++++ b/documentation/man/jackhmmer.man
+@@ -615,6 +615,21 @@
+ .SH OTHER OPTIONS
+
+ .TP
++.BI --chkhmmskip
++Skip search after saving HMM checkpoint for last iteration. Example: to save
++the model after
++.I 3
++iterations, you would use "\-N 4 \-\-chkhmmskip", and the HMM model is saved to
++.I <chkhmm_prefix>-4.hmm
++\&. This is because model
++.I N
++is saved at the start of iteration
++.I N
++and is built from the results of iteration
++.I N-1
++\&.
++
++.TP
+ .B --nonull2
+ Turn off the null2 score corrections for biased composition.
+
+@@ -685,7 +700,7 @@
+ support. This is the default, but it may have been turned off at
+ compile-time for your site or machine for some reason.
+
+-
++.TP
+ .BI --stall
+ For debugging the MPI master/worker version: pause after start, to
+ enable the developer to attach debuggers to the running master and
Added: trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_restartfromhmm
===================================================================
--- trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_restartfromhmm (rev 0)
+++ trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_restartfromhmm 2012-10-15 13:44:02 UTC (rev 12427)
@@ -0,0 +1,132 @@
+Author: Laszlo Kajan <lkajan at rostlab.org>
+Description: --hmmprime option
+Forwarded: http://lists.alioth.debian.org/pipermail/debian-med-packaging/2012-October/017501.html
+--- a/src/jackhmmer.c
++++ b/src/jackhmmer.c
+@@ -62,6 +62,7 @@
+ /* name type default env range toggles reqs incomp help docgroup*/
+ { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 1 },
+ { "-N", eslARG_INT, "5", NULL, "n>0", NULL, NULL, NULL, "set maximum number of iterations to <n>", 1 },
++ { "--hmmprime", eslARG_INFILE, NULL, NULL, NULL, NULL, NULL, NULL, "HMM checkpoint to prime search with", 1 },
+ /* Control of output */
+ { "-o", eslARG_OUTFILE, NULL, NULL, NULL, NULL, NULL, NULL, "direct output to file <f>, not stdout", 2 },
+ { "-A", eslARG_OUTFILE, NULL, NULL, NULL, NULL, NULL, NULL, "save multiple alignment of hits to file <s>", 2 },
+@@ -256,6 +257,7 @@
+ fprintf(ofp, "# query sequence file: %s\n", qfile);
+ fprintf(ofp, "# target sequence database: %s\n", dbfile);
+ if (esl_opt_IsUsed(go, "-N")) fprintf(ofp, "# maximum iterations set to: %d\n", esl_opt_GetInteger(go, "-N"));
++ if (esl_opt_IsUsed(go, "--hmmprime")) fprintf(ofp, "# priming HMM checkpoint: %s\n", esl_opt_GetString(go, "--hmmprime"));
+ if (esl_opt_IsUsed(go, "-o")) fprintf(ofp, "# output directed to file: %s\n", esl_opt_GetString(go, "-o"));
+ if (esl_opt_IsUsed(go, "-A")) fprintf(ofp, "# MSA of hits saved to file: %s\n", esl_opt_GetString(go, "-A"));
+ if (esl_opt_IsUsed(go, "--tblout")) fprintf(ofp, "# per-seq hits tabular output: %s\n", esl_opt_GetString(go, "--tblout"));
+@@ -401,6 +403,7 @@
+ int qformat = eslSQFILE_UNKNOWN; /* format of qfile */
+ int dbformat = eslSQFILE_UNKNOWN; /* format of dbfile */
+ ESL_SQFILE *qfp = NULL; /* open qfile */
++ P7_HMMFILE *hfp = NULL; /* open input HMM file */
+ ESL_SQFILE *dbfp = NULL; /* open dbfile */
+ ESL_ALPHABET *abc = NULL; /* sequence alphabet */
+ P7_BUILDER *bld = NULL; /* HMM construction configuration */
+@@ -414,6 +417,7 @@
+ int nnew_targets;
+ int prv_msa_nseq;
+ int status = eslOK;
++ int hstatus = eslOK;
+ int qstatus = eslOK;
+ int sstatus = eslOK;
+
+@@ -480,6 +484,15 @@
+ else if (status != eslOK) esl_fatal ("Unexpected error %d opening sequence file %s\n", status, cfg->qfile);
+ qsq = esl_sq_CreateDigital(abc);
+
++ // lkajan: open HMM file
++ char *hmmfile = esl_opt_GetString(go, "--hmmprime"); // query HMM file
++ if( hmmfile )
++ {
++ status = p7_hmmfile_Open(hmmfile, NULL, &hfp);
++ if (status == eslENOTFOUND) p7_Fail("Failed to open hmm file %s for reading.\n", hmmfile);
++ else if (status == eslEFORMAT) p7_Fail("Unrecognized format, trying to open hmm file %s for reading.\n", hmmfile);
++ else if (status != eslOK) p7_Fail("Unexpected error %d in opening hmm file %s.\n", status, hmmfile);
++ }
+ #ifdef HMMER_THREADS
+ /* initialize thread data */
+ if (esl_opt_IsOn(go, "--cpu")) ncpus = esl_opt_GetInteger(go, "--cpu");
+@@ -530,13 +543,22 @@
+ while ((qstatus = esl_sqio_Read(qfp, qsq)) == eslOK)
+ {
+ P7_HMM *hmm = NULL; /* HMM - only needed if checkpointed */
++ P7_HMM *phmm = NULL; /* priming HMM - only needed if primed */
+ P7_HMM **ret_hmm = NULL; /* HMM - only needed if checkpointed */
++ P7_PROFILE *gm = NULL;
+ P7_OPROFILE *om = NULL; /* optimized query profile */
+ P7_TRACE *qtr = NULL; /* faux trace for query sequence */
+ ESL_MSA *msa = NULL; /* multiple alignment of included hits */
+
+ if (esl_opt_IsOn(go, "--chkhmm")) ret_hmm = &hmm;
+
++ // lkajan: read in an HMM and prime search with it - here, so that the below 'continue' does not affect this
++ if(hfp)
++ {
++ hstatus = p7_hmmfile_Read(hfp, &abc, &phmm);
++ if (hstatus != eslOK){ p7_Fail("Failed to read from hmm file %s.\n", hmmfile); phmm = NULL; }
++ }
++
+ nquery++;
+ if (qsq->n == 0) continue; /* skip zero length queries as if they aren't even present. */
+
+@@ -560,6 +582,20 @@
+ {
+ p7_SingleBuilder(bld, qsq, info->bg, ret_hmm, &qtr, NULL, &om); /* bypass HMM - only need model */
+
++ // lkajan: from hmmsearch.c:425
++ if( phmm )
++ {
++ p7_hmm_Destroy(hmm);
++ p7_oprofile_Destroy(om);
++ p7_profile_Destroy(gm);
++
++ hmm = p7_hmm_Clone(phmm);
++ gm = p7_profile_Create (hmm->M, abc);
++ om = p7_oprofile_Create(hmm->M, abc);
++ p7_ProfileConfig(hmm, info->bg, gm, 100, p7_LOCAL); /* 100 is a dummy length for now; and MSVFilter requires local mode */
++ p7_oprofile_Convert(gm, om); /* <om> is now p7_LOCAL, multihit */
++ }
++
+ prv_msa_nseq = 1;
+ }
+ else
+@@ -699,6 +735,8 @@
+
+ esl_msa_Destroy(msa);
+ p7_oprofile_Destroy(om);
++ p7_profile_Destroy(gm);
++ p7_hmm_Destroy(phmm);
+ p7_trace_Destroy(qtr);
+ esl_sq_Reuse(qsq);
+ esl_keyhash_Reuse(kh);
+@@ -732,6 +770,7 @@
+ esl_keyhash_Destroy(kh);
+ esl_sqfile_Close(qfp);
+ esl_sqfile_Close(dbfp);
++ p7_hmmfile_Close(hfp);
+ esl_sq_Destroy(qsq);
+ esl_stopwatch_Destroy(w);
+ p7_builder_Destroy(bld);
+--- a/documentation/man/jackhmmer.man
++++ b/documentation/man/jackhmmer.man
+@@ -62,6 +62,15 @@
+ .B phmmer
+ search.
+
++.TP
++.BI --hmmprime " <hmmfile>"
++Prime search with HMM model
++.I <hmmfile>
++\&. Optional. You can obtain the checkpoint HMM
++using the
++.B --chkhmm
++option, for example. A search restarted from an appropriate HMM checkpoint
++yields the same results as an uninterrupted search.
+
+
+
Added: trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_stop_restart_test
===================================================================
--- trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_stop_restart_test (rev 0)
+++ trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_stop_restart_test 2012-10-15 13:44:02 UTC (rev 12427)
@@ -0,0 +1,60 @@
+Author: Laszlo Kajan <lkajan at rostlab.org>
+Description: tests for --chkhmmskip and --hmmprime
+Forwarded: http://lists.alioth.debian.org/pipermail/debian-med-packaging/2012-October/017501.html
+--- a/testsuite/testsuite.sqc
++++ b/testsuite/testsuite.sqc
+@@ -163,6 +163,8 @@
+ 1 exercise j/--tblout @src/jackhmmer@ --tblout %PHMMER.tbl% !tutorial/HBB_HUMAN! !tutorial/globins45.fa!
+ 1 exercise j/--domtblout @src/jackhmmer@ --domtblout %PHMMER.dtbl% !tutorial/HBB_HUMAN! !tutorial/globins45.fa!
+ 1 exercise j/--chkhmm @src/jackhmmer@ --chkhmm %PHMMER.ch% !tutorial/HBB_HUMAN! !tutorial/globins45.fa!
++1 exercise j/--chkhmmskip @src/jackhmmer@ -N 2 --chkhmm %PHMMER.ch% --chkhmmskip !tutorial/HBB_HUMAN! !tutorial/globins45.fa!
++1 exercise j/--hmmprime @src/jackhmmer@ --hmmprime %PHMMER.ch-2.hmm% !tutorial/HBB_HUMAN! !tutorial/globins45.fa!
+ 1 exercise j/--chkali @src/jackhmmer@ --chkali %PHMMER.ca% !tutorial/HBB_HUMAN! !tutorial/globins45.fa!
+ 1 exercise j/--acc @src/jackhmmer@ --acc !tutorial/HBB_HUMAN! !tutorial/globins45.fa!
+ 1 exercise j/--noali @src/jackhmmer@ --noali !tutorial/HBB_HUMAN! !tutorial/globins45.fa!
+@@ -286,6 +288,7 @@
+ 1 exercise dup-names !testsuite/i10-duplicate-names.pl! @@ !! %OUTFILES%
+ 1 exercise mapali-again !testsuite/i11-hmmalign-mapali.pl! @@ !! %OUTFILES%
+ 1 exercise delete-corruption !testsuite/i12-delete-corruption.pl! @@ !! %OUTFILES%
++1 exercise jackhmmer-restart /usr/bin/perl !testsuite/i13-jackhmmer-restart.pl! @@ !! %OUTFILES%
+
+ 1 exercise brute-itest @src/itest_brute@
+ 1 exercise hmmpress-itest !src/hmmpress.itest.pl! @src/hmmpress@ %MINIFAM.HMM% %TMPPFX%
+--- /dev/null
++++ b/testsuite/i13-jackhmmer-restart.pl
+@@ -0,0 +1,35 @@
++#!/usr/bin/perl
++
++# Usage: ./i13-jackhmmer-restart.pl <builddir> <srcdir> <tmpfile prefix>
++# Example: ./i13-jackhmmer-restart.pl .. .. tmpfoo
++#
++# Laszlo Kajan <lkajan at rostlab.org> Fri, 12 Oct 2012 23:30:37 +0200
++
++
++BEGIN {
++ $builddir = shift;
++ $srcdir = shift;
++ $tmppfx = shift;
++}
++
++# Verify that we have all the executables we need for the test.
++if (! -x "$builddir/src/jackhmmer") { die "FAIL: didn't find jackhmmer binary in $builddir/src\n"; }
++
++my $cmd = "$builddir/src/jackhmmer --notextw -N 2 -A $tmppfx.sto -o /dev/null $srcdir/tutorial/HBB_HUMAN $srcdir/tutorial/globins45.fa";
++system( $cmd ) && die("FAIL: failed in call '$cmd'\n");
++
++my $cmd = "$builddir/src/jackhmmer --notextw -N 2 --chkhmm $tmppfx.ch --chkhmmskip -o /dev/null $srcdir/tutorial/HBB_HUMAN $srcdir/tutorial/globins45.fa";
++system( $cmd ) && die("FAIL: failed in call '$cmd'\n");
++
++$cmd = "$builddir/src/jackhmmer --notextw -N 1 -A $tmppfx.R.sto --hmmprime $tmppfx.ch-2.hmm -o /dev/null $srcdir/tutorial/HBB_HUMAN $srcdir/tutorial/globins45.fa";
++system( $cmd ) && die("FAIL: failed in call '$cmd'\n");
++
++$cmd = "sed -i -e '/^#=GF/d;' $tmppfx.sto $tmppfx.R.sto";
++system( $cmd ) && die("FAIL: failed in call '$cmd'\n");
++
++$cmd = "diff -q $tmppfx.sto $tmppfx.R.sto";
++system( $cmd ) && die("FAIL ('$cmd'): files differ\n");
++
++print "ok\n";
++unlink( "$tmppfx.sto", glob("$tmppfx.ch-*"), "$tmppfx.R.sto" );
++exit 0;
Modified: trunk/packages/hmmer/branches/lkajan/debian/patches/series
===================================================================
--- trunk/packages/hmmer/branches/lkajan/debian/patches/series 2012-10-15 13:19:24 UTC (rev 12426)
+++ trunk/packages/hmmer/branches/lkajan/debian/patches/series 2012-10-15 13:44:02 UTC (rev 12427)
@@ -1,2 +1,5 @@
debian-changes-3.0-1
debian-changes-3.0-2.1
+jackhmmer_chkhmmstop
+jackhmmer_restartfromhmm
+jackhmmer_stop_restart_test
More information about the debian-med-commit
mailing list