[med-svn] r13016 - trunk/packages/genometools/trunk/debian/patches

Sat Feb 16 15:46:10 UTC 2013

Author: sascha-guest
Date: 2013-02-16 15:46:10 +0000 (Sat, 16 Feb 2013)
New Revision: 13016

Added:
   trunk/packages/genometools/trunk/debian/patches/split_manuals
Modified:
   trunk/packages/genometools/trunk/debian/patches/series
Log:
split manuals (LaTeX fails to finish correctly on some archs)


Modified: trunk/packages/genometools/trunk/debian/patches/series
===================================================================

--- trunk/packages/genometools/trunk/debian/patches/series	2013-02-16 10:12:30 UTC (rev 13015)
+++ trunk/packages/genometools/trunk/debian/patches/series	2013-02-16 15:46:10 UTC (rev 13016)
@@ -1,3 +1,4 @@
+split_manuals
 adding_soname
 libbam-fix
 remove-gitignores

Added: trunk/packages/genometools/trunk/debian/patches/split_manuals
===================================================================
--- trunk/packages/genometools/trunk/debian/patches/split_manuals	                        (rev 0)
+++ trunk/packages/genometools/trunk/debian/patches/split_manuals	2013-02-16 15:46:10 UTC (rev 13016)
@@ -0,0 +1,408 @@
+--- a/doc/manuals/matstat.tex
++++ b/doc/manuals/matstat.tex
+@@ -1,2 +1,158 @@
+-\def\BuildMatstat{}
+-\input{uniquesub}
++\documentclass[12pt]{article}
++\usepackage[a4paper,top=20mm,bottom=20mm,left=20mm,right=20mm]{geometry}
++\usepackage{url}
++\usepackage{alltt}
++\usepackage{xspace}
++\usepackage{times}
++\usepackage{listings}
++\usepackage{bbm}
++\usepackage{verbatim}
++\usepackage{optionman}
++
++\newcommand{\Substring}[3]{#1[#2..#3]}
++
++\newcommand{\Program}[0]{\texttt{matstat}\xspace}
++\newcommand{\MS}[1]{\mathit{ms(s,#1)}}
++\title{\Program: a program for computing\\
++       matching statistics\\
++       a manual}
++
++\author{\begin{tabular}{c}
++         \textit{Stefan Kurtz}\\
++         Center for Bioinformatics,\\
++         University of Hamburg
++        \end{tabular}}
++
++\begin{document}
++\maketitle
++
++\section{The program \Program}
++
++The program \Program is called as follows:
++\par
++\noindent\Program [\textit{options}] \Showoption{query} \Showoptionarg{files} [\textit{options}]
++\par
++\Showoptionarg{files} is a white space separated list of at least one
++filename. Any sequence occurring in any file specified in \Showoptionarg{files}
++is called \textit{unit} in the following.
++In addition to the mandatory option \Showoption{query}, the program
++must be called with either option \Showoption{pck} or \Showoption{esa}
++which specify to use a packed index or an enhanced suffix array for
++a given set of subject sequences.
++
++\Program computes the  \textit{matching statistics} for each unit. That is,
++for each position \(i\) in
++each unit, say \(s\) of length \(n\), \(\MS{i}=(l,j)\) is computed. Here
++\(l\) is the largest integer such that \(\Substring{s}{i}{i+l-1}\) matches
++a substring represented by the index and \(j\) is a start position of the
++matching substring in the index. We say that \(l\) is the length of \(\MS{i}\)
++and \(j\) is the subject position of \(\MS{i}\).
++
++The following options are available in \Program:
++
++\begin{Justshowoptions}
++\begin{comment}
++\Option{fmi}{$\Showoptionarg{indexname}$}{
++Use the old implementation of the FMindex. This option is not recommended.
++}
++\end{comment}
++
++\Option{esa}{$\Showoptionarg{indexname}$}{
++Use the given enhanced suffix array to compute the matches.
++}
++
++\Option{pck}{$\Showoptionarg{indexname}$}{
++Use the packed index (an efficient representation of the FMindex)
++to compute the matches.
++}
++
++
++\Option{query}{$\Showoptionarg{files}$}{
++Specify a white space separated list of query files containing the units.
++At least one query file must be given. The files may be in
++gzipped format, in which case they have to end with the suffix \texttt{.gz}.
++}
++
++\Option{min}{$\ell$}{
++Specify the minimum value $\ell$ for the length of the matching statistics.
++That is, for each unit \(s\) and each position \(i\) in \(s\), the program
++reports all values \(i\) and \(\MS{i}\) if the
++length of \(\MS{i}\) is at least \(\ell\).
++}
++
++\Option{max}{$\ell$}{
++Specify the maximum length $\ell$ for the length of the matching statistics.
++That is, for each unit \(s\) and each positions \(i\) in \(s\), the program
++reports the values \(i\) and \(\MS{i}\) if the length of \(\MS{i}\)
++is at most \(\ell\).
++}
++
++\Option{output}{(\Showoptionkey{subjectpos}$\mid$\Showoptionkey{querypos}$\mid$\Showoptionkey{sequence})}{
++Specify what to output. At least one of the three keys words
++$\Showoptionkey{subjectpos}$,
++$\Showoptionkey{querypos}$, and
++$\Showoptionkey{sequence}$ must be used.
++Using the keyword $\Showoptionkey{subjectpos}$ shows the
++subject position of the matching statistics.
++Using the keyword $\Showoptionkey{querypos}$ shows the query position.
++Using the keyword $\Showoptionkey{sequence}$ shows the sequence content
++}
++
++\Helpoption
++
++\end{Justshowoptions}
++The following conditions must be satisfied:
++\begin{enumerate}
++\item
++Either option  \Showoption{min} or option \Showoption{max} must be used.
++\item
++If both options \Showoption{min} and \Showoption{max} are used, then
++the value specified by option \(\Showoption{min}\) must be smaller
++than the value specified by option \(\Showoption{max}\).
++\item
++Either option \Showoption{pck} or \Showoption{esa} must be used. Both cannot
++be combined.
++\end{enumerate}
++
++\section{Examples}
++
++Suppose that in some directory, say \texttt{homo-sapiens}, we have 25 gzipped
++fasta files containing all 24 human chromomsomes plus one file with
++mitrochondrial sequences. These may have been downloaded from
++\url{ftp://ftp.ensembl.org/pub/current_fasta/homo_sapiens_47_36i/dna}.
++
++In the first step, we construct the packed index for the entire genome:
++
++\begin{Output}
++gt packedindex mkindex -dna -dir rev -parts 15 -bsize 10 -locfreq 32
++                       -indexname human-all -db homo-sapiens/*.gz
++\end{Output}
++
++The program runs for almost two hours and delivers
++an index \texttt{human-all} consisting of three files:
++
++\begin{Output}
++ls -lh human-all.*
++-rw-r----- 1 kurtz gistaff   37 2008-01-24 00:47 human-all.al1
++-rw-r----- 1 kurtz gistaff 1.9G 2008-01-24 02:37 human-all.bdx
++-rw-r----- 1 kurtz gistaff 3.4K 2008-01-24 02:37 human-all.prj
++\end{Output}
++
++This is used in the following call to the program \Program:
++
++\begin{Output}
++gt matstat -output subjectpos querypos sequence -min 20 -max 30
++           -query queryfile.fna -pck human-all
++unit 0 (Mus musculus, chr 1, complete sequence)
++22 20 390765125 actgtatctcaaaatataaa
++253 21 258488266 gggaataaacatgtcattgag
++254 20 258488267 ggaataaacatgtcattgag
++275 20 900483549 taattctatttttctttctt
++480 20 1008274536 gcttgaagatcatgatccag
++..
++\end{Output}
++Here, the first column shows the relative positions in unit 0 for which the
++length of the matching statistics is between 20 and 30. The second column is
++the corresponding length value. The third column shows position of the
++matching sequence in the index, and the fourth shows the sequence content.
++\end{document}
+--- a/doc/manuals/uniquesub.tex
++++ b/doc/manuals/uniquesub.tex
+@@ -7,39 +7,14 @@
+ \usepackage{listings}
+ \usepackage{bbm}
+ \usepackage{verbatim}
+-\usepackage{ifthen}
+-\usepackage{comment}
+ \usepackage{optionman}
+-
+-\ifthenelse{\isundefined{\BuildMatstat}}{%
+-\includecomment{AboutUniquesub}
+-\excludecomment{AboutMatstat}
+-\newcommand{\AboutUniquesubcmd}[1]{#1}
+-\newcommand{\AboutMatstatcmd}[1]{}
+-}{%
+-\includecomment{AboutMatstat}
+-\excludecomment{AboutUniquesub}
+-\newcommand{\AboutMatstatcmd}[1]{#1}
+-\newcommand{\AboutUniquesubcmd}[1]{}
+-}
+-
+ \newcommand{\Substring}[3]{#1[#2..#3]}
+ 
+-\begin{AboutUniquesub}
+ \newcommand{\Program}[0]{\texttt{uniquesub}\xspace}
+ \newcommand{\Mup}[1]{\mathit{mup(s,#1)}}
+ \title{\Program: a program for computing\\
+        minimum unique substrings\\
+        a manual}
+-\end{AboutUniquesub}
+-
+-\begin{AboutMatstat}
+-\newcommand{\Program}[0]{\texttt{matstat}\xspace}
+-\newcommand{\MS}[1]{\mathit{ms(s,#1)}}
+-\title{\Program: a program for computing\\
+-       matching statistics\\
+-       a manual}
+-\end{AboutMatstat}
+ 
+ \author{\begin{tabular}{c}
+          \textit{Stefan Kurtz}\\
+@@ -54,47 +29,35 @@
+ 
+ The program \Program is called as follows:
+ \par
+-\noindent\Program [\textit{options}] \Showoption{query} \Showoptionarg{files} [\textit{options}] 
++\noindent\Program [\textit{options}] \Showoption{query} \Showoptionarg{files} [\textit{options}]
+ \par
+-\Showoptionarg{files} is a white space separated list of at least one 
++\Showoptionarg{files} is a white space separated list of at least one
+ filename. Any sequence occurring in any file specified in \Showoptionarg{files}
+ is called \textit{unit} in the following.
+ In addition to the mandatory option \Showoption{query}, the program
+ must be called with either option \Showoption{pck} or \Showoption{esa}
+-which specify to use a packed index or an enhanced suffix array for 
++which specify to use a packed index or an enhanced suffix array for
+ a given set of subject sequences.
+ 
+-\begin{AboutUniquesub}
+ \Program computes for all positions \(i\) in each unit, say \(s\) of length
+-\(n\), the length \(\Mup{i}\) of the minimum unique prefix 
++\(n\), the length \(\Mup{i}\) of the minimum unique prefix
+ at position \(i\), if it exists. Uniqueness always refers to all substrings
+-represented by the index. \(\Mup{i}\) is defined by the following two 
++represented by the index. \(\Mup{i}\) is defined by the following two
+ statements:
+ \begin{itemize}
+ \item
+ If \(\Substring{s}{i}{n-1}\) is not unique in the index, then \(\Mup{i}=\bot\).
+ That is, it is undefined.
+ \item
+-If \(\Substring{s}{i}{n-1}\) is unique in the index, then \(\Mup{i}=m\), where 
+-\(m\) is the smallest value such that \(i+m-1\leq n-1\) and 
++If \(\Substring{s}{i}{n-1}\) is unique in the index, then \(\Mup{i}=m\), where
++\(m\) is the smallest value such that \(i+m-1\leq n-1\) and
+ \(\Substring{s}{i}{i+m-1}\) occurs exactly once as a substring in the index.
+ \end{itemize}
+-Note that it is possible that for all \(i\in[0,n-1]\) we have 
+-\(\Mup{i}=\bot\), which means that unit \(s\) does not contain any unique 
++Note that it is possible that for all \(i\in[0,n-1]\) we have
++\(\Mup{i}=\bot\), which means that unit \(s\) does not contain any unique
+ substring. In this case, the program reports nothing for the corresponding
+ unit. The program was developed for designing whole genome tiling arrays.
+ The corresponding publication is \cite{GRAE:NIE:KUR:HUY:BIR:STU:FLI:2007}.
+-\end{AboutUniquesub}
+-
+-\begin{AboutMatstat}
+-\Program computes the  \textit{matching statistics} for each unit. That is, 
+-for each position \(i\) in 
+-each unit, say \(s\) of length \(n\), \(\MS{i}=(l,j)\) is computed. Here
+-\(l\) is the largest integer such that \(\Substring{s}{i}{i+l-1}\) matches
+-a substring represented by the index and \(j\) is a start position of the
+-matching substring in the index. We say that \(l\) is the length of \(\MS{i}\)
+-and \(j\) is the subject position of \(\MS{i}\).
+-\end{AboutMatstat}
+ 
+ The following options are available in \Program:
+ 
+@@ -117,58 +80,29 @@
+ 
+ \Option{query}{$\Showoptionarg{files}$}{
+ Specify a white space separated list of query files containing the units.
+-At least one query file must be given. The files may be in 
++At least one query file must be given. The files may be in
+ gzipped format, in which case they have to end with the suffix \texttt{.gz}.
+ }
+ 
+-\begin{AboutUniquesub}
+ \Option{min}{$\ell$}{
+ Specify the minimum length $\ell$ of the minimum unique prefixes.
+-That is, for each unit \(s\) and each positions \(i\) in \(s\), the program 
+-reports the values \(i\) and \(\Mup{i}\) whenever \(\Mup{i}\geq\ell\). 
++That is, for each unit \(s\) and each positions \(i\) in \(s\), the program
++reports the values \(i\) and \(\Mup{i}\) whenever \(\Mup{i}\geq\ell\).
+ }
+ 
+ \Option{max}{$\ell$}{
+ Specify the maximum length $\ell$ of the minimum unique prefixes.
+-That is, for each unit \(s\) and each positions \(i\) in \(s\), the program 
++That is, for each unit \(s\) and each positions \(i\) in \(s\), the program
+ reports the values \(i\) and \(\Mup{i}\) whenever \(\Mup{i}\leq\ell\).
+ }
+ 
+ \Option{output}{(\Showoptionkey{querypos}$\mid$\Showoptionkey{sequence})}{
+ Specify what to output. At least one of the two keys words
+-$\Showoptionkey{querypos}$ and $\Showoptionkey{sequence}$ must be used. 
++$\Showoptionkey{querypos}$ and $\Showoptionkey{sequence}$ must be used.
+ Using the keyword $\Showoptionkey{querypos}$ shows the query position.
+ Using the keyword $\Showoptionkey{sequence}$ shows the sequence content
+ of the match.
+ }
+-\end{AboutUniquesub}
+-
+-\begin{AboutMatstat}
+-\Option{min}{$\ell$}{
+-Specify the minimum value $\ell$ for the length of the matching statistics.
+-That is, for each unit \(s\) and each position \(i\) in \(s\), the program 
+-reports all values \(i\) and \(\MS{i}\) if the 
+-length of \(\MS{i}\) is at least \(\ell\).
+-}
+-
+-\Option{max}{$\ell$}{
+-Specify the maximum length $\ell$ for the length of the matching statistics.
+-That is, for each unit \(s\) and each positions \(i\) in \(s\), the program 
+-reports the values \(i\) and \(\MS{i}\) if the length of \(\MS{i}\)
+-is at most \(\ell\).
+-}
+-
+-\Option{output}{(\Showoptionkey{subjectpos}$\mid$\Showoptionkey{querypos}$\mid$\Showoptionkey{sequence})}{
+-Specify what to output. At least one of the three keys words
+-$\Showoptionkey{subjectpos}$,
+-$\Showoptionkey{querypos}$, and
+-$\Showoptionkey{sequence}$ must be used.
+-Using the keyword $\Showoptionkey{subjectpos}$ shows the 
+-subject position of the matching statistics.
+-Using the keyword $\Showoptionkey{querypos}$ shows the query position.
+-Using the keyword $\Showoptionkey{sequence}$ shows the sequence content
+-}
+-\end{AboutMatstat}
+ 
+ \Helpoption
+ 
+@@ -189,7 +123,7 @@
+ \section{Examples}
+ 
+ Suppose that in some directory, say \texttt{homo-sapiens}, we have 25 gzipped
+-fasta files containing all 24 human chromomsomes plus one file with 
++fasta files containing all 24 human chromomsomes plus one file with
+ mitrochondrial sequences. These may have been downloaded from
+ \url{ftp://ftp.ensembl.org/pub/current_fasta/homo_sapiens_47_36i/dna}.
+ 
+@@ -200,7 +134,7 @@
+                        -indexname human-all -db homo-sapiens/*.gz
+ \end{Output}
+ 
+-The program runs for almost two hours and delivers 
++The program runs for almost two hours and delivers
+ an index \texttt{human-all} consisting of three files:
+ 
+ \begin{Output}
+@@ -212,9 +146,8 @@
+ 
+ This is used in the following call to the program \Program:
+ 
+-\begin{AboutUniquesub}
+ \begin{Output}
+-gt uniquesub -output querypos -min 20 -max 30 -query queryfile.fna 
++gt uniquesub -output querypos -min 20 -max 30 -query queryfile.fna
+              -pck human-all
+ unit 0 (Mus musculus, chr 1, complete sequence)
+ 1007 20
+@@ -227,7 +160,7 @@
+ 
+ For all units \(s\) in the multiple \Fasta file \texttt{queryfile.fna},
+ a line is shown, reporting the number of the unit and the original fasta
+-header. Also, all for positions \(i\) in \(s\) satisfying 
++header. Also, all for positions \(i\) in \(s\) satisfying
+ \(20\leq \Mup{i}\leq 30\), \(i\) and \(\Mup{i}\) is reported.
+ 
+ The first column is the relative position in the unit sequence (counting
+@@ -238,7 +171,7 @@
+ \Showoption{output}:
+ 
+ \begin{Output}
+-gt uniquesub -output querypos sequence -min 20 -max 30 
++gt uniquesub -output querypos sequence -min 20 -max 30
+              -query queryfile.fna -pck human-all
+ unit 0 (Mus musculus, chr 1, complete sequence)
+ 1007 20 ctgacagtttttttttttta
+@@ -248,27 +181,7 @@
+ 1013 21 gttttttttttttactttata
+ ...
+ \end{Output}
+-\end{AboutUniquesub}
+ 
+-\begin{AboutMatstat}
+-\begin{Output}
+-gt matstat -output subjectpos querypos sequence -min 20 -max 30 
+-           -query queryfile.fna -pck human-all
+-unit 0 (Mus musculus, chr 1, complete sequence)
+-22 20 390765125 actgtatctcaaaatataaa
+-253 21 258488266 gggaataaacatgtcattgag
+-254 20 258488267 ggaataaacatgtcattgag
+-275 20 900483549 taattctatttttctttctt
+-480 20 1008274536 gcttgaagatcatgatccag
+-..
+-\end{Output}
+-Here, the first column shows the relative positions in unit 0 for which the
+-length of the matching statistics is between 20 and 30. The second column is
+-the corresponding length value. The third column shows position of the
+-matching sequence in the index, and the fourth shows the sequence content.
+-\end{AboutMatstat}
+-
+-\begin{AboutUniquesub}
+ %\bibliographystyle{plain}
+ %\bibliography{defines,kurtz}
+ \begin{thebibliography}{1}
+@@ -280,5 +193,5 @@
+ \newblock {\em {Bioinformatics}}, {23 ISMB/ECCB 2007}:{i195--i204}, 2007.
+ 
+ \end{thebibliography}
+-\end{AboutUniquesub}
++
+ \end{document}