[med-svn] r12427 - in trunk/packages/hmmer/branches: . lkajan/debian lkajan/debian/patches

Laszlo Kajan lkajan-guest at alioth.debian.org
Mon Oct 15 13:44:02 UTC 2012


Author: lkajan-guest
Date: 2012-10-15 13:44:02 +0000 (Mon, 15 Oct 2012)
New Revision: 12427

Added:
   trunk/packages/hmmer/branches/lkajan/debian/README.source
   trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_chkhmmstop
   trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_restartfromhmm
   trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_stop_restart_test
Modified:
   trunk/packages/hmmer/branches/
   trunk/packages/hmmer/branches/lkajan/debian/changelog
   trunk/packages/hmmer/branches/lkajan/debian/patches/series
Log:
lkajan branch for features restart from HMM model and skip last search


Property changes on: trunk/packages/hmmer/branches
___________________________________________________________________
Added: svn:ignore
   + tarballs
build-area


Added: trunk/packages/hmmer/branches/lkajan/debian/README.source
===================================================================
--- trunk/packages/hmmer/branches/lkajan/debian/README.source	                        (rev 0)
+++ trunk/packages/hmmer/branches/lkajan/debian/README.source	2012-10-15 13:44:02 UTC (rev 12427)
@@ -0,0 +1,7 @@
+hmmer for Debian
+================
+This branch was created to add --chkhmmskip and --hmmprime options. For justification see [1] and [2].
+
+[1] http://lists.alioth.debian.org/pipermail/debian-med-packaging/2012-October/017512.html
+[2] http://lists.alioth.debian.org/pipermail/debian-med-packaging/2012-October/017510.html
+

Modified: trunk/packages/hmmer/branches/lkajan/debian/changelog
===================================================================
--- trunk/packages/hmmer/branches/lkajan/debian/changelog	2012-10-15 13:19:24 UTC (rev 12426)
+++ trunk/packages/hmmer/branches/lkajan/debian/changelog	2012-10-15 13:44:02 UTC (rev 12427)
@@ -1,3 +1,11 @@
+hmmer (3.0-4lkajan1) UNRELEASED; urgency=low
+
+  * Local package.
+  * Added --chkhmmskip and --hmmprime options. This is a branch, for
+    justification see d/README.source.
+
+ -- Laszlo Kajan <lkajan at rostlab.org>  Mon, 15 Oct 2012 12:53:17 +0200
+
 hmmer (3.0-4) unstable; urgency=low
 
   [ Charles Plessy ]

Added: trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_chkhmmstop
===================================================================
--- trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_chkhmmstop	                        (rev 0)
+++ trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_chkhmmstop	2012-10-15 13:44:02 UTC (rev 12427)
@@ -0,0 +1,74 @@
+Author: Laszlo Kajan <lkajan at rostlab.org>
+Description: --chkhmmskip option
+Forwarded: http://lists.alioth.debian.org/pipermail/debian-med-packaging/2012-October/017501.html
+--- a/src/jackhmmer.c
++++ b/src/jackhmmer.c
+@@ -68,6 +68,7 @@
+   { "--tblout",     eslARG_OUTFILE, NULL, NULL, NULL,      NULL,    NULL,  NULL,            "save parseable table of per-sequence hits to file <s>",        2 },
+   { "--domtblout",  eslARG_OUTFILE, NULL, NULL, NULL,      NULL,    NULL,  NULL,            "save parseable table of per-domain hits to file <s>",          2 },
+   { "--chkhmm",     eslARG_OUTFILE, NULL, NULL, NULL,      NULL,    NULL,  NULL,            "save HMM checkpoints to files <s>-<iteration>.hmm",            2 },
++  { "--chkhmmskip", eslARG_NONE,   FALSE, NULL, NULL,      NULL,    NULL,  NULL,            "skip search after saving HMM checkpoint for last iteration",   2 },
+   { "--chkali",     eslARG_OUTFILE, NULL, NULL, NULL,      NULL,    NULL,  NULL,            "save alignment checkpoints to files <s>-<iteration>.sto",      2 },
+   { "--acc",        eslARG_NONE,   FALSE, NULL, NULL,      NULL,    NULL,  NULL,            "prefer accessions over names in output",                       2 },
+   { "--noali",      eslARG_NONE,   FALSE, NULL, NULL,      NULL,    NULL,  NULL,            "don't output alignments, so output is smaller",                2 },
+@@ -260,6 +261,7 @@
+   if (esl_opt_IsUsed(go, "--tblout"))    fprintf(ofp, "# per-seq hits tabular output:     %s\n",      esl_opt_GetString(go, "--tblout"));
+   if (esl_opt_IsUsed(go, "--domtblout")) fprintf(ofp, "# per-dom hits tabular output:     %s\n",      esl_opt_GetString(go, "--domtblout"));
+   if (esl_opt_IsUsed(go, "--chkhmm"))    fprintf(ofp, "# HMM checkpoint files output:     %s-<i>.hmm\n", esl_opt_GetString(go, "--chkhmm"));
++  if (esl_opt_IsUsed(go, "--chkhmmskip"))fprintf(ofp, "# skipped search after saving HMM checkpoint for last iteration\n");
+   if (esl_opt_IsUsed(go, "--chkali"))    fprintf(ofp, "# MSA checkpoint files output:     %s-<i>.sto\n", esl_opt_GetString(go, "--chkali"));
+   if (esl_opt_IsUsed(go, "--acc"))       fprintf(ofp, "# prefer accessions over names:    yes\n");
+   if (esl_opt_IsUsed(go, "--noali"))     fprintf(ofp, "# show alignments in output:       no\n");
+@@ -599,6 +601,10 @@
+ #endif
+ 	    }
+ 
++		// lkajan: chkhmmskip
++		if( !( esl_opt_GetBoolean(go, "--chkhmmskip") == TRUE && esl_opt_IsOn(go, "--chkhmm") && iteration == maxiterations ) )
++		{
++
+ #ifdef HMMER_THREADS
+ 	  if (ncpus > 0) sstatus = thread_loop(threadObj, queue, dbfp);
+ 	  else           sstatus = serial_loop(info, dbfp);
+@@ -619,6 +625,8 @@
+ 			sstatus, dbfp->filename);
+ 	    }
+ 
++	    	} // lkajan: chkhmmskip
++
+ 	  /* merge the results of the search results */
+ 	  for (i = 1; i < infocnt; ++i)
+ 	    {
+--- a/documentation/man/jackhmmer.man
++++ b/documentation/man/jackhmmer.man
+@@ -615,6 +615,21 @@
+ .SH OTHER OPTIONS
+ 
+ .TP
++.BI --chkhmmskip
++Skip search after saving HMM checkpoint for last iteration.  Example: to save
++the model after
++.I 3
++iterations, you would use "\-N 4 \-\-chkhmmskip", and the HMM model is saved to
++.I <chkhmm_prefix>-4.hmm
++\&. This is because model
++.I N
++is saved at the start of iteration
++.I N
++and is built from the results of iteration
++.I N-1
++\&.
++
++.TP
+ .B --nonull2
+ Turn off the null2 score corrections for biased composition.
+ 
+@@ -685,7 +700,7 @@
+ support. This is the default, but it may have been turned off at
+ compile-time for your site or machine for some reason.
+ 
+-
++.TP
+ .BI --stall
+ For debugging the MPI master/worker version: pause after start, to
+ enable the developer to attach debuggers to the running master and

Added: trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_restartfromhmm
===================================================================
--- trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_restartfromhmm	                        (rev 0)
+++ trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_restartfromhmm	2012-10-15 13:44:02 UTC (rev 12427)
@@ -0,0 +1,132 @@
+Author: Laszlo Kajan <lkajan at rostlab.org>
+Description: --hmmprime option
+Forwarded: http://lists.alioth.debian.org/pipermail/debian-med-packaging/2012-October/017501.html
+--- a/src/jackhmmer.c
++++ b/src/jackhmmer.c
+@@ -62,6 +62,7 @@
+   /* name           type         default   env  range   toggles     reqs   incomp                             help                                                  docgroup*/
+   { "-h",           eslARG_NONE,   FALSE, NULL, NULL,      NULL,    NULL,  NULL,            "show brief help on version and usage",                         1 },
+   { "-N",           eslARG_INT,      "5", NULL, "n>0",     NULL,    NULL,  NULL,            "set maximum number of iterations to <n>",                      1 },
++  { "--hmmprime",   eslARG_INFILE,  NULL, NULL, NULL,      NULL,    NULL,  NULL,            "HMM checkpoint to prime search with",                          1 },
+ /* Control of output */
+   { "-o",           eslARG_OUTFILE, NULL, NULL, NULL,      NULL,    NULL,  NULL,            "direct output to file <f>, not stdout",                        2 },
+   { "-A",           eslARG_OUTFILE, NULL, NULL, NULL,      NULL,    NULL,  NULL,            "save multiple alignment of hits to file <s>",                  2 },
+@@ -256,6 +257,7 @@
+   fprintf(ofp, "# query sequence file:             %s\n", qfile);
+   fprintf(ofp, "# target sequence database:        %s\n", dbfile);
+   if (esl_opt_IsUsed(go, "-N"))          fprintf(ofp, "# maximum iterations set to:       %d\n",      esl_opt_GetInteger(go, "-N"));
++  if (esl_opt_IsUsed(go, "--hmmprime"))  fprintf(ofp, "# priming HMM checkpoint:          %s\n",      esl_opt_GetString(go, "--hmmprime"));
+   if (esl_opt_IsUsed(go, "-o"))          fprintf(ofp, "# output directed to file:         %s\n",      esl_opt_GetString(go, "-o"));
+   if (esl_opt_IsUsed(go, "-A"))          fprintf(ofp, "# MSA of hits saved to file:       %s\n",      esl_opt_GetString(go, "-A"));
+   if (esl_opt_IsUsed(go, "--tblout"))    fprintf(ofp, "# per-seq hits tabular output:     %s\n",      esl_opt_GetString(go, "--tblout"));
+@@ -401,6 +403,7 @@
+   int              qformat  = eslSQFILE_UNKNOWN;  /* format of qfile                                 */
+   int              dbformat = eslSQFILE_UNKNOWN;  /* format of dbfile                                */
+   ESL_SQFILE      *qfp      = NULL;		  /* open qfile                                      */
++  P7_HMMFILE      *hfp      = NULL;		  /* open input HMM file                             */
+   ESL_SQFILE      *dbfp     = NULL;               /* open dbfile                                     */
+   ESL_ALPHABET    *abc      = NULL;               /* sequence alphabet                               */
+   P7_BUILDER      *bld      = NULL;               /* HMM construction configuration                  */
+@@ -414,6 +417,7 @@
+   int              nnew_targets;
+   int              prv_msa_nseq;
+   int              status   = eslOK;
++  int              hstatus  = eslOK;
+   int              qstatus  = eslOK;
+   int              sstatus  = eslOK;
+ 
+@@ -480,6 +484,15 @@
+   else if (status != eslOK)        esl_fatal ("Unexpected error %d opening sequence file %s\n", status, cfg->qfile);
+   qsq = esl_sq_CreateDigital(abc);
+ 
++  // lkajan: open HMM file
++  char		*hmmfile = esl_opt_GetString(go, "--hmmprime"); // query HMM file
++  if( hmmfile )
++  {
++      status = p7_hmmfile_Open(hmmfile, NULL, &hfp);
++      if      (status == eslENOTFOUND) p7_Fail("Failed to open hmm file %s for reading.\n",                      hmmfile);
++      else if (status == eslEFORMAT)   p7_Fail("Unrecognized format, trying to open hmm file %s for reading.\n", hmmfile);
++      else if (status != eslOK)        p7_Fail("Unexpected error %d in opening hmm file %s.\n", status,          hmmfile);
++  }
+ #ifdef HMMER_THREADS
+   /* initialize thread data */
+   if (esl_opt_IsOn(go, "--cpu")) ncpus = esl_opt_GetInteger(go, "--cpu");
+@@ -530,13 +543,22 @@
+   while ((qstatus = esl_sqio_Read(qfp, qsq)) == eslOK)
+     {
+       P7_HMM          *hmm     = NULL;	     /* HMM - only needed if checkpointed        */
++      P7_HMM          *phmm    = NULL;	     /* priming HMM - only needed if primed      */
+       P7_HMM         **ret_hmm = NULL;	     /* HMM - only needed if checkpointed        */
++      P7_PROFILE      *gm      = NULL;
+       P7_OPROFILE     *om      = NULL;       /* optimized query profile                  */
+       P7_TRACE        *qtr     = NULL;       /* faux trace for query sequence            */
+       ESL_MSA         *msa     = NULL;       /* multiple alignment of included hits      */
+       
+       if (esl_opt_IsOn(go, "--chkhmm")) ret_hmm = &hmm;
+ 
++      // lkajan: read in an HMM and prime search with it - here, so that the below 'continue' does not affect this
++      if(hfp)
++      {
++	  hstatus = p7_hmmfile_Read(hfp, &abc, &phmm);
++	  if (hstatus != eslOK){ p7_Fail("Failed to read from hmm file %s.\n", hmmfile); phmm = NULL; }
++      }
++
+       nquery++;
+       if (qsq->n == 0) continue; /* skip zero length queries as if they aren't even present. */
+ 
+@@ -560,6 +582,20 @@
+ 	    {
+ 	      p7_SingleBuilder(bld, qsq, info->bg, ret_hmm, &qtr, NULL, &om); /* bypass HMM - only need model */
+ 
++	      // lkajan: from hmmsearch.c:425
++	      if( phmm )
++	      {
++      		p7_hmm_Destroy(hmm);
++		p7_oprofile_Destroy(om);
++      		p7_profile_Destroy(gm);
++
++		hmm = p7_hmm_Clone(phmm);
++		gm = p7_profile_Create (hmm->M, abc);
++		om = p7_oprofile_Create(hmm->M, abc);
++      		p7_ProfileConfig(hmm, info->bg, gm, 100, p7_LOCAL); /* 100 is a dummy length for now; and MSVFilter requires local mode */
++      		p7_oprofile_Convert(gm, om);                  /* <om> is now p7_LOCAL, multihit */
++	      }
++
+ 	      prv_msa_nseq = 1;
+ 	    }
+ 	  else 
+@@ -699,6 +735,8 @@
+ 
+       esl_msa_Destroy(msa);
+       p7_oprofile_Destroy(om);
++      p7_profile_Destroy(gm);
++      p7_hmm_Destroy(phmm);
+       p7_trace_Destroy(qtr);
+       esl_sq_Reuse(qsq);
+       esl_keyhash_Reuse(kh);
+@@ -732,6 +770,7 @@
+   esl_keyhash_Destroy(kh);
+   esl_sqfile_Close(qfp);
+   esl_sqfile_Close(dbfp);
++  p7_hmmfile_Close(hfp);
+   esl_sq_Destroy(qsq);  
+   esl_stopwatch_Destroy(w);
+   p7_builder_Destroy(bld);
+--- a/documentation/man/jackhmmer.man
++++ b/documentation/man/jackhmmer.man
+@@ -62,6 +62,15 @@
+ .B phmmer
+ search.
+ 
++.TP
++.BI --hmmprime " <hmmfile>"
++Prime search with HMM model
++.I <hmmfile>
++\&. Optional. You can obtain the checkpoint HMM
++using the
++.B --chkhmm
++option, for example. A search restarted from an appropriate HMM checkpoint
++yields the same results as an uninterrupted search.
+ 
+ 
+ 

Added: trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_stop_restart_test
===================================================================
--- trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_stop_restart_test	                        (rev 0)
+++ trunk/packages/hmmer/branches/lkajan/debian/patches/jackhmmer_stop_restart_test	2012-10-15 13:44:02 UTC (rev 12427)
@@ -0,0 +1,60 @@
+Author: Laszlo Kajan <lkajan at rostlab.org>
+Description: tests for --chkhmmskip and --hmmprime
+Forwarded: http://lists.alioth.debian.org/pipermail/debian-med-packaging/2012-October/017501.html
+--- a/testsuite/testsuite.sqc
++++ b/testsuite/testsuite.sqc
+@@ -163,6 +163,8 @@
+ 1 exercise  j/--tblout          @src/jackhmmer@  --tblout    %PHMMER.tbl%  !tutorial/HBB_HUMAN! !tutorial/globins45.fa!
+ 1 exercise  j/--domtblout       @src/jackhmmer@  --domtblout %PHMMER.dtbl% !tutorial/HBB_HUMAN! !tutorial/globins45.fa!
+ 1 exercise  j/--chkhmm          @src/jackhmmer@  --chkhmm    %PHMMER.ch%   !tutorial/HBB_HUMAN! !tutorial/globins45.fa!
++1 exercise  j/--chkhmmskip      @src/jackhmmer@  -N 2 --chkhmm %PHMMER.ch% --chkhmmskip !tutorial/HBB_HUMAN! !tutorial/globins45.fa!
++1 exercise  j/--hmmprime        @src/jackhmmer@  --hmmprime %PHMMER.ch-2.hmm% !tutorial/HBB_HUMAN! !tutorial/globins45.fa!
+ 1 exercise  j/--chkali          @src/jackhmmer@  --chkali    %PHMMER.ca%   !tutorial/HBB_HUMAN! !tutorial/globins45.fa!
+ 1 exercise  j/--acc             @src/jackhmmer@  --acc                     !tutorial/HBB_HUMAN! !tutorial/globins45.fa!
+ 1 exercise  j/--noali           @src/jackhmmer@  --noali                   !tutorial/HBB_HUMAN! !tutorial/globins45.fa!
+@@ -286,6 +288,7 @@
+ 1 exercise  dup-names             !testsuite/i10-duplicate-names.pl!    @@ !! %OUTFILES%
+ 1 exercise  mapali-again          !testsuite/i11-hmmalign-mapali.pl!    @@ !! %OUTFILES%
+ 1 exercise  delete-corruption     !testsuite/i12-delete-corruption.pl!  @@ !! %OUTFILES%
++1 exercise  jackhmmer-restart     /usr/bin/perl !testsuite/i13-jackhmmer-restart.pl!  @@ !! %OUTFILES%
+ 
+ 1 exercise  brute-itest           @src/itest_brute@  
+ 1 exercise  hmmpress-itest        !src/hmmpress.itest.pl! @src/hmmpress@ %MINIFAM.HMM% %TMPPFX%
+--- /dev/null
++++ b/testsuite/i13-jackhmmer-restart.pl
+@@ -0,0 +1,35 @@
++#!/usr/bin/perl
++
++# Usage:   ./i13-jackhmmer-restart.pl <builddir> <srcdir> <tmpfile prefix>
++# Example: ./i13-jackhmmer-restart.pl ..         ..       tmpfoo
++#
++# Laszlo Kajan <lkajan at rostlab.org>  Fri, 12 Oct 2012 23:30:37 +0200
++
++
++BEGIN {
++    $builddir  = shift;
++    $srcdir    = shift;
++    $tmppfx    = shift;
++}
++
++# Verify that we have all the executables we need for the test.
++if (! -x "$builddir/src/jackhmmer")   { die "FAIL: didn't find jackhmmer binary in $builddir/src\n";  }
++
++my $cmd = "$builddir/src/jackhmmer --notextw -N 2 -A $tmppfx.sto -o /dev/null $srcdir/tutorial/HBB_HUMAN $srcdir/tutorial/globins45.fa";
++system( $cmd ) && die("FAIL: failed in call '$cmd'\n");
++
++my $cmd = "$builddir/src/jackhmmer --notextw -N 2 --chkhmm $tmppfx.ch --chkhmmskip -o /dev/null $srcdir/tutorial/HBB_HUMAN $srcdir/tutorial/globins45.fa";
++system( $cmd ) && die("FAIL: failed in call '$cmd'\n");
++
++$cmd = "$builddir/src/jackhmmer --notextw -N 1 -A $tmppfx.R.sto --hmmprime $tmppfx.ch-2.hmm -o /dev/null $srcdir/tutorial/HBB_HUMAN $srcdir/tutorial/globins45.fa";
++system( $cmd ) && die("FAIL: failed in call '$cmd'\n");
++
++$cmd = "sed -i -e '/^#=GF/d;' $tmppfx.sto $tmppfx.R.sto";
++system( $cmd ) && die("FAIL: failed in call '$cmd'\n");
++
++$cmd = "diff -q $tmppfx.sto $tmppfx.R.sto";
++system( $cmd ) && die("FAIL ('$cmd'): files differ\n");
++
++print "ok\n";
++unlink( "$tmppfx.sto", glob("$tmppfx.ch-*"), "$tmppfx.R.sto" );
++exit 0;

Modified: trunk/packages/hmmer/branches/lkajan/debian/patches/series
===================================================================
--- trunk/packages/hmmer/branches/lkajan/debian/patches/series	2012-10-15 13:19:24 UTC (rev 12426)
+++ trunk/packages/hmmer/branches/lkajan/debian/patches/series	2012-10-15 13:44:02 UTC (rev 12427)
@@ -1,2 +1,5 @@
 debian-changes-3.0-1
 debian-changes-3.0-2.1
+jackhmmer_chkhmmstop
+jackhmmer_restartfromhmm
+jackhmmer_stop_restart_test




More information about the debian-med-commit mailing list