[irstlm] 33/146: Updated online_documentation_enhancments.patch.

Giulio Paci giuliopaci-guest at moszumanska.debian.org
Tue May 17 07:37:04 UTC 2016


This is an automated email from the git hooks/post-receive script.

giuliopaci-guest pushed a commit to branch master
in repository irstlm.

commit 371f782cb2f210456988a63589a3649ff2cda591
Author: Giulio Paci <giuliopaci at gmail.com>
Date:   Tue Sep 4 03:48:26 2012 +0200

    Updated online_documentation_enhancments.patch.
---
 .../patches/online_documentation_enhancments.patch | 380 ++++++++++-----------
 1 file changed, 188 insertions(+), 192 deletions(-)

diff --git a/debian/patches/online_documentation_enhancments.patch b/debian/patches/online_documentation_enhancments.patch
index 39bce80..a6e9558 100644
--- a/debian/patches/online_documentation_enhancments.patch
+++ b/debian/patches/online_documentation_enhancments.patch
@@ -156,7 +156,7 @@ Forwarded: no
           v)
               verbose="--verbose";
 @@ -107,7 +110,6 @@
- 		 exit 4;
+ 		 exit 4
  	     esac
               ;;
 -  
@@ -522,116 +522,122 @@ Forwarded: no
  
 --- a/src/compile-lm.cpp
 +++ b/src/compile-lm.cpp
-@@ -55,27 +55,29 @@
- void usage(const char *msg = 0) {
- 
-   if (msg) { std::cerr << msg << std::endl; }
+@@ -58,27 +58,29 @@
+   if (msg) {
+     std::cerr << msg << std::endl;
+   }
 -  std::cerr << "Usage: compile-lm [options] input-file.lm [output-file.blm]" << std::endl;
 -  if (!msg) std::cerr << std::endl
--		      << "  compile-lm reads a standard LM file in ARPA format and produces" << std::endl
--		      << "  a compiled representation that the IRST LM toolkit can quickly" << std::endl
--		      << "  read and process. LM file can be compressed with gzip." << std::endl << std::endl;
+-                        << "  compile-lm reads a standard LM file in ARPA format and produces" << std::endl
+-                        << "  a compiled representation that the IRST LM toolkit can quickly" << std::endl
+-                        << "  read and process. LM file can be compressed with gzip." << std::endl << std::endl;
 -  std::cerr << "Options:\n"
--	    << "--text|-t [yes|no]  (output is again in text format)" << std::endl
--	    << "--invert|-i [yes|no]  (build an inverted n-gram binary table for fast access: default no)" << std::endl
--	    << "--filter|-f wordlist (filter a binary language model with a word list)"<< std::endl
--	    << "--keepunigrams|-ku [yes|no] (filter by keeping all unigrams in the table: default yes)"<< std::endl
--	    << "--eval|-e text-file (computes perplexity of text-file and returns)"<< std::endl
--	    << "--randcalls|-r N (computes N random calls on the eval text-file)"<< std::endl
--	    << "--dub dict-size (dictionary upperbound to compute OOV word penalty: default 10^7)"<< std::endl
--	    << "--score|-s [yes|no]  (computes log-prob scores from standard input)"<< std::endl
--	    << "--debug|-d 1 (verbose output for --eval option)"<< std::endl
--	    << "--sentence [yes|no] (compute pperplexity at sentence level (identified through the end symbol)"<< std::endl
--	    << "--memmap|-mm 1 (uses memory map to read a binary LM)"<< std::endl
--	    << "--ngram_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
--	    << "--dict_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
--	    << "--level|l <value> (set the maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken)" << std::endl
--	    << "--tmpdir <directory> (directory for temporary computation, default is either the environment variable TMP if defined or \"/tmp\")" << std::endl;
+-            << "--text|-t [yes|no]  (output is again in text format)" << std::endl
+-            << "--invert|-i [yes|no]  (build an inverted n-gram binary table for fast access: default no)" << std::endl
+-            << "--filter|-f wordlist (filter a binary language model with a word list)"<< std::endl
+-            << "--keepunigrams|-ku [yes|no] (filter by keeping all unigrams in the table: default yes)"<< std::endl
+-            << "--eval|-e text-file (computes perplexity of text-file and returns)"<< std::endl
+-            << "--randcalls|-r N (computes N random calls on the eval text-file)"<< std::endl
+-            << "--dub dict-size (dictionary upperbound to compute OOV word penalty: default 10^7)"<< std::endl
+-            << "--score|-s [yes|no]  (computes log-prob scores from standard input)"<< std::endl
+-            << "--debug|-d 1 (verbose output for --eval option)"<< std::endl
+-            << "--sentence [yes|no] (compute pperplexity at sentence level (identified through the end symbol)"<< std::endl
+-            << "--memmap|-mm 1 (uses memory map to read a binary LM)"<< std::endl
+-            << "--ngram_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
+-            << "--dict_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
+-            << "--level|l <value> (set the maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken)" << std::endl
+-            << "--tmpdir <directory> (directory for temporary computation, default is either the environment variable TMP if defined or \"/tmp\")" << std::endl;
 +  std::cerr << std::endl << "compile-lm - compiles an ARPA format LM into an IRSTLM format one" << std::endl;
 +  std::cerr << std::endl << "USAGE:"  << std::endl
 +	    << "       compile-lm [options] <input-file.lm> [output-file.blm]" << std::endl;
 +  if (!msg) std::cerr << std::endl << "DESCRIPTION:" << std::endl
-+		      << "       compile-lm reads a standard LM file in ARPA format and produces" << std::endl
-+		      << "       a compiled representation that the IRST LM toolkit can quickly" << std::endl
-+		      << "       read and process. LM file can be compressed with gzip." << std::endl;
-+  std::cerr << std::endl << "OPTIONS:" << std::endl
-+	    << "       --text|-t [yes|no]  (output is again in text format)" << std::endl
-+	    << "       --invert|-i [yes|no]  (build an inverted n-gram binary table for fast access: default no)" << std::endl
-+	    << "       --filter|-f wordlist (filter a binary language model with a word list)"<< std::endl
-+	    << "       --keepunigrams|-ku [yes|no] (filter by keeping all unigrams in the table: default yes)"<< std::endl
-+	    << "       --eval|-e text-file (computes perplexity of text-file and returns)"<< std::endl
-+	    << "       --randcalls|-r N (computes N random calls on the eval text-file)"<< std::endl
-+	    << "       --dub dict-size (dictionary upperbound to compute OOV word penalty: default 10^7)"<< std::endl
-+	    << "       --score|-s [yes|no]  (computes log-prob scores of n-grams from standard input)"<< std::endl
-+	    << "       --debug|-d 1 (verbose output for --eval option)"<< std::endl
-+	    << "       --sentence [yes|no] (compute pperplexity at sentence level (identified through the end symbol)"<< std::endl
-+	    << "       --memmap|-mm 1 (uses memory map to read a binary LM)" << std::endl
-+	    << "       --ngram_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
-+	    << "       --dict_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
-+	    << "       --level|l <value> (set the maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken)" << std::endl
-+	    << "       --tmpdir <directory> (directory for temporary computation, default is either the environment variable TMP if defined or \"/tmp\")" << std::endl;
++                        << "       compile-lm reads a standard LM file in ARPA format and produces" << std::endl
++                        << "       a compiled representation that the IRST LM toolkit can quickly" << std::endl
++                        << "       read and process. LM file can be compressed with gzip." << std::endl;
++ std::cerr << std::endl << "OPTIONS:" << std::endl
++            << "       --text|-t [yes|no]  (output is again in text format)" << std::endl
++            << "       --invert|-i [yes|no]  (build an inverted n-gram binary table for fast access: default no)" << std::endl
++            << "       --filter|-f wordlist (filter a binary language model with a word list)"<< std::endl
++            << "       --keepunigrams|-ku [yes|no] (filter by keeping all unigrams in the table: default yes)"<< std::endl
++            << "       --eval|-e text-file (computes perplexity of text-file and returns)"<< std::endl
++            << "       --randcalls|-r N (computes N random calls on the eval text-file)"<< std::endl
++            << "       --dub dict-size (dictionary upperbound to compute OOV word penalty: default 10^7)"<< std::endl
++            << "       --score|-s [yes|no]  (computes log-prob scores of n-grams from standard input)"<< std::endl
++            << "       --debug|-d 1 (verbose output for --eval option)"<< std::endl
++            << "       --sentence [yes|no] (compute pperplexity at sentence level (identified through the end symbol)"<< std::endl
++            << "       --memmap|-mm 1 (uses memory map to read a binary LM)" << std::endl
++            << "       --ngram_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
++            << "       --dict_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
++            << "       --level|l <value> (set the maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken)" << std::endl
++            << "       --tmpdir <directory> (directory for temporary computation, default is either the environment variable TMP if defined or \"/tmp\")" << std::endl;
  }
  
- bool starts_with(const std::string &s, const std::string &pre) {
+ bool starts_with(const std::string &s, const std::string &pre)
 --- a/src/dict.cpp
 +++ b/src/dict.cpp
-@@ -78,23 +78,26 @@
- 	
- 	if (inp==NULL)
-     {
--		std::cerr << "\nUsage: \ndict -i=inputfile [options]\n";
--		std::cerr << "(inputfile can be a corpus or a dictionary)\n\n";
--		std::cerr << "Options:\n";
--		std::cerr << "-o=outputfile\n";
--		std::cerr << "-f=[yes|no] (output word frequencies, default is false)\n";
--		std::cerr << "-sort=[yes|no] (sort dictionary by frequency, default is false)\n";
--		std::cerr << "-pf=<freq>  (prune words with frequency below <freq>\n";
--		std::cerr << "-pr=<rank>  (prune words with frequency rank above <rank>\n";
--		std::cerr << "-is= (interruption symbol) \n";
--		std::cerr << "-c=[yes|no] (show dictionary growth curve)\n";
--		std::cerr << "-cs=curvesize (default 10)\n";
--		std::cerr << "-t=testfile (compute OOV rates on test corpus)\n";
--		std::cerr << "-LoadFactor=<value> (set the load factor for cache; it should be a positive real value; if not defined a default value is used)\n";
--		std::cerr << "-listOOV=[yes|no] (print OOV words to stderr, default is false)\n\n";
-+  std::cerr << std::endl << "dict - extracts a dictionary" << std::endl;
-+  std::cerr << std::endl << "USAGE:"  << std::endl
+@@ -77,20 +77,23 @@
+   GetParams(&argc, &argv, (char*) NULL);
+ 
+   if (inp==NULL) {
+-    std::cerr << "\nUsage: \ndict -i=inputfile [options]\n";
+-    std::cerr << "(inputfile can be a corpus or a dictionary)\n\n";
+-    std::cerr << "Options:\n";
+-    std::cerr << "-o=outputfile\n";
+-    std::cerr << "-f=[yes|no] (output word frequencies, default is false)\n";
+-    std::cerr << "-sort=[yes|no] (sort dictionary by frequency, default is false)\n";
+-    std::cerr << "-pf=<freq>  (prune words with frequency below <freq>\n";
+-    std::cerr << "-pr=<rank>  (prune words with frequency rank above <rank>\n";
+-    std::cerr << "-is= (interruption symbol) \n";
+-    std::cerr << "-c=[yes|no] (show dictionary growth curve)\n";
+-    std::cerr << "-cs=curvesize (default 10)\n";
+-    std::cerr << "-t=testfile (compute OOV rates on test corpus)\n";
+-    std::cerr << "-LoadFactor=<value> (set the load factor for cache; it should be a positive real value; if not defined a default value is used)\n";
+-    std::cerr << "-listOOV=[yes|no] (print OOV words to stderr, default is false)\n\n";
++    std::cerr << std::endl << "dict - extracts a dictionary" << std::endl;
++    std::cerr << std::endl << "USAGE:"  << std::endl
 +	    << "       dict -i=<inputfile> [options]" << std::endl;
-+  std::cerr << std::endl << "DESCRIPTION:" << std::endl
++    std::cerr << std::endl << "DESCRIPTION:" << std::endl
 +	    << "       dict extracts a dictionary from a corpus or a dictionary." << std::endl;
-+  std::cerr << std::endl << "OPTIONS:" << std::endl;
-+  std::cerr << "       -o=outputfile" << std::endl;
-+  std::cerr << "       -f=[yes|no] (output word frequencies, default is false)" << std::endl;
-+  std::cerr << "       -sort=[yes|no] (sort dictionary by frequency, default is false)" << std::endl;
-+  std::cerr << "       -pf=<freq>  (prune words with frequency below <freq>" << std::endl;
-+  std::cerr << "       -pr=<rank>  (prune words with frequency rank above <rank>" << std::endl;
-+  std::cerr << "       -is= (interruption symbol)" << std::endl;
-+  std::cerr << "       -c=[yes|no] (show dictionary growth curve)" << std::endl;
-+  std::cerr << "       -cs=curvesize (default 10)" << std::endl;
-+  std::cerr << "       -t=testfile (compute OOV rates on test corpus)" << std::endl;
-+  std::cerr << "       -LoadFactor=<value> (set the load factor for cache; it should be a positive real value; if not defined a default value is used)" << std::endl;
-+  std::cerr << "       -listOOV=[yes|no] (print OOV words to stderr, default is false)" << std::endl << std::endl;
- 		
- 		
--		exit(1);
-+  exit(1);
-     };
- 	
- 	// options compatibility issues:
++    std::cerr << std::endl << "OPTIONS:" << std::endl;
++    std::cerr << "       -o=outputfile" << std::endl;
++    std::cerr << "       -f=[yes|no] (output word frequencies, default is false)" << std::endl;
++    std::cerr << "       -sort=[yes|no] (sort dictionary by frequency, default is false)" << std::endl;
++    std::cerr << "       -pf=<freq>  (prune words with frequency below <freq>" << std::endl;
++    std::cerr << "       -pr=<rank>  (prune words with frequency rank above <rank>" << std::endl;
++    std::cerr << "       -is= (interruption symbol)" << std::endl;
++    std::cerr << "       -c=[yes|no] (show dictionary growth curve)" << std::endl;
++    std::cerr << "       -cs=curvesize (default 10)" << std::endl;
++    std::cerr << "       -t=testfile (compute OOV rates on test corpus)" << std::endl;
++    std::cerr << "       -LoadFactor=<value> (set the load factor for cache; it should be a positive real value; if not defined a default value is used)" << std::endl;
++    std::cerr << "       -listOOV=[yes|no] (print OOV words to stderr, default is false)" << std::endl << std::endl;
+ 
+ 
+     exit(1);
 --- a/src/interpolate-lm.cpp
 +++ b/src/interpolate-lm.cpp
-@@ -51,27 +51,28 @@
- 
- void usage(const char *msg = 0) {
-   if (msg) { std::cerr << msg << std::endl; }
+@@ -62,27 +62,29 @@
+   if (msg) {
+     std::cerr << msg << std::endl;
+   }
 -  std::cerr << "Usage: interpolate-lm [options] lm-list-file [lm-list-file.out]" << std::endl;
 -  if (!msg) std::cerr << std::endl
--		      << "  interpolate-lm reads a LM list file including interpolation weights " << std::endl
--		      << "  with the format: N\\n w1 lm1 \\n w2 lm2 ...\\n wN lmN\n" << std::endl
--		      << "  It estimates new weights on a development text, " << std::endl
--		      << "  computes the perplexity on an evaluation text, " << std::endl
--		      << "  computes probabilities of n-grams read from stdin." << std::endl
--		      << "  It reads LMs in ARPA and IRSTLM binary format." << std::endl  << std::endl;
--			
+-                        << "  interpolate-lm reads a LM list file including interpolation weights " << std::endl
+-                        << "  with the format: N\\n w1 lm1 \\n w2 lm2 ...\\n wN lmN\n" << std::endl
+-                        << "  It estimates new weights on a development text, " << std::endl
+-                        << "  computes the perplexity on an evaluation text, " << std::endl
+-                        << "  computes probabilities of n-grams read from stdin." << std::endl
+-                        << "  It reads LMs in ARPA and IRSTLM binary format." << std::endl  << std::endl;
++  std::cerr << std::endl << "interpolate-lm - interpolates language models" << std::endl;
++  std::cerr << std::endl << "USAGE:"  << std::endl
++	    << "       interpolate-lm [options] <lm-list-file> [lm-list-file.out]" << std::endl;
++  if (!msg) std::cerr << std::endl << "DESCRIPTION:" << std::endl
++                        << "       interpolate-lm reads a LM list file including interpolation weights " << std::endl
++                        << "       with the format: N\\n w1 lm1 \\n w2 lm2 ...\\n wN lmN\n" << std::endl
++                        << "       It estimates new weights on a development text, " << std::endl
++                        << "       computes the perplexity on an evaluation text, " << std::endl
++                        << "       computes probabilities of n-grams read from stdin." << std::endl
++                        << "       It reads LMs in ARPA and IRSTLM binary format." << std::endl;
+ 
 -  std::cerr << "Options:\n"
 -            << "--learn|-l text-file learn optimal interpolation for text-file"<< std::endl
 -            << "--order|-o n         order of n-grams used in --learn (optional)"<< std::endl
@@ -643,41 +649,30 @@ Forwarded: no
 -            << "--memmap| -mm 1      use memory map to read a binary LM" << std::endl
 -            << "--ngram_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
 -            << "--dict_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
--            << "--level|lev <value> (set the maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken)" << std::endl; 
-+  std::cerr << std::endl << "interpolate-lm - interpolates language models" << std::endl;
-+  std::cerr << std::endl << "USAGE:"  << std::endl
-+	    << "       interpolate-lm [options] <lm-list-file> [lm-list-file.out]" << std::endl;
-+  if (!msg) std::cerr << std::endl << "DESCRIPTION:" << std::endl
-+		      << "       interpolate-lm reads a LM list file including interpolation weights " << std::endl
-+		      << "       with the format: N\\n w1 lm1 \\n w2 lm2 ...\\n wN lmN\n" << std::endl
-+		      << "       It estimates new weights on a development text, " << std::endl
-+		      << "       computes the perplexity on an evaluation text, " << std::endl
-+		      << "       computes probabilities of n-grams read from stdin." << std::endl
-+		      << "       It reads LMs in ARPA and IRSTLM binary format." << std::endl;
+-            << "--level|lev <value> (set the maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken)" << std::endl;
 +  std::cerr << std::endl << "OPTIONS:" << std::endl
-+	    << "       --learn|-l text-file learn optimal interpolation for text-file"<< std::endl
-+	    << "       --order|-o n order of n-grams used in --learn (optional)"<< std::endl
-+	    << "       --eval|-e text-file (computes perplexity of text-file and returns)"<< std::endl
-+	    << "       --dub dict-size (dictionary upperbound to compute OOV word penalty: default 10^7)"<< std::endl
-+	    << "       --score|-s [yes|no]  (computes log-prob scores of n-grams from standard input)"<< std::endl
-+	    << "       --debug|-d [1-3] verbose output for --eval option (see compile-lm)"<< std::endl
-+	    << "       --sentence [yes|no] (compute pperplexity at sentence level (identified through the end symbol)"<< std::endl
-+	    << "       --memmap|-mm 1 (uses memory map to read a binary LM)" << std::endl
-+	    << "       --ngram_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
-+	    << "       --dict_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
-+	    << "       --level|lev <value> (set the maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken)" << std::endl;
++            << "       --learn|-l text-file learn optimal interpolation for text-file"<< std::endl
++            << "       --order|-o n order of n-grams used in --learn (optional)"<< std::endl
++            << "       --eval|-e text-file (computes perplexity of text-file and returns)"<< std::endl
++            << "       --dub dict-size (dictionary upperbound to compute OOV word penalty: default 10^7)"<< std::endl
++            << "       --score|-s [yes|no]  (computes log-prob scores of n-grams from standard input)"<< std::endl
++            << "       --debug|-d [1-3] verbose output for --eval option (see compile-lm)"<< std::endl
++            << "       --sentence [yes|no] (compute pperplexity at sentence level (identified through the end symbol)"<< std::endl
++            << "       --memmap|-mm 1 (uses memory map to read a binary LM)" << std::endl
++            << "       --ngram_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
++            << "       --dict_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
++            << "       --level|lev <value> (set the maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken)" << std::endl;
  }
  
  
 --- a/src/ngt.cpp
 +++ b/src/ngt.cpp
-@@ -125,9 +125,35 @@
+@@ -125,8 +125,34 @@
    GetParams(&argc, &argv, (char*) NULL);
-   
-   if (inp==NULL){
+ 
+   if (inp==NULL) {
 -    cerr <<"No input was specified\n";
 -    exit(1);
--  };
 +  std::cerr << std::endl << "ngt - collects n-grams" << std::endl;
 +  std::cerr << std::endl << "USAGE:"  << std::endl
 +	    << "       ngt -i=<inputfile> [options]" << std::endl;
@@ -706,38 +701,39 @@ Forwarded: no
 +  std::cerr << "       -iknstat=file (filename to save IKN statistics)" << std::endl;
 +  std::cerr << std::endl;
 +  exit(1);
-+    };
- 	
+   };
+ 
    if (out==NULL)
-     cerr << "Warning: no output file specified!\n";
 --- a/src/plsa.cpp
 +++ b/src/plsa.cpp
-@@ -124,28 +124,55 @@
- 	GetParams(&argc, &argv, (char*) NULL);
- 	
- 	if (argc==1 || help){
--		cerr <<"plsa: IRSTLM tool for Probabilistic Latent Semantic Analysis LM inference\n\n";
+@@ -123,29 +123,57 @@
+ 
+   GetParams(&argc, &argv, (char*) NULL);
+ 
+-  if (argc==1 || help) {
+-    cerr <<"plsa: IRSTLM tool for Probabilistic Latent Semantic Analysis LM inference\n\n";
+ 
+-    cerr <<"Usage (1): plsa -c=<collection> -d=<dictionary> -m=<model> -t=<topics> -it=<iter>\n\n";
+-    cerr <<"Train a PLSA model. Parameters specify collection and dictionary filenames\n";
+-    cerr <<"number of EM iterations, number of topics, and model filename. The collection\n";
+-    cerr <<"must begin with the number of documents and documents should be separated\n";
+-    cerr <<"with the </d> tag. The begin document tag <d> is not considered.\n";
+-    cerr <<"Example:\n";
+-    cerr <<"3\n";
+-    cerr <<"<d> hello world ! </d>\n";
+-    cerr <<"<d> good morning good afternoon </d>\n";
+-    cerr <<"<d> welcome aboard </d>\n\n";
 -
--		cerr <<"Usage (1): plsa -c=<collection> -d=<dictionary> -m=<model> -t=<topics> -it=<iter>\n\n";
--		cerr <<"Train a PLSA model. Parameters specify collection and dictionary filenames\n";
--		cerr <<"number of EM iterations, number of topics, and model filename. The collection\n";
--		cerr <<"must begin with the number of documents and documents should be separated\n";
--		cerr <<"with the </d> tag. The begin document tag <d> is not considered.\n";
--		cerr <<"Example:\n";
--		cerr <<"3\n";
--		cerr <<"<d> hello world ! </d>\n";
--		cerr <<"<d> good morning good afternoon </d>\n";
--		cerr <<"<d> welcome aboard </d>\n\n";
+-    cerr <<"Usage (2): plsa -c=<text collection> -d=<dictionary> -b=<binary collection>\n\n";
+-    cerr <<"Binarize a textual document collection to speed-up training (1)\n";
+-    cerr <<"\n";
 -
--		cerr <<"Usage (2): plsa -c=<text collection> -d=<dictionary> -b=<binary collection>\n\n";
--		cerr <<"Binarize a textual document collection to speed-up training (1)\n";
--		cerr <<"\n";
--		
--		cerr <<"Usage (3): plsa -d=<dictionary> -m=<model> -t=<topics> -inf=<text> -f=<features> -it=<iterations>\n\n";
--		cerr <<"Infer a full 1-gram distribution from a model and a small text. The 1-gram\n";
--		cerr <<"is saved in the feature file. The 1-gram\n";
--		cerr <<"\n";
--		exit(1);	
+-    cerr <<"Usage (3): plsa -d=<dictionary> -m=<model> -t=<topics> -inf=<text> -f=<features> -it=<iterations>\n\n";
+-    cerr <<"Infer a full 1-gram distribution from a model and a small text. The 1-gram\n";
+-    cerr <<"is saved in the feature file. The 1-gram\n";
+-    cerr <<"\n";
+-    exit(1);
++	if (argc==1 || help){
 +  std::cerr << std::endl << "plsa - performs probabilistic latent semantic analysis LM inference" << std::endl;
 +  std::cerr << std::endl << "USAGE:"  << std::endl
 +	    << "       plsa -c=<text_collection> -d=<dictionary> -m=<model> -t=<topics> -it=<iter> [options]" << std::endl
@@ -787,15 +783,15 @@ Forwarded: no
 +  std::cerr <<"           text. The 1-gram is saved in the feature file. The 1-gram" << std::endl;
 +  std::cerr << std::endl;
 +  exit(1);
- 	}
- 	
- 	if (!dictfile)
+   }
+ 
+   if (!dictfile) {
 --- a/src/prune-lm.cpp
 +++ b/src/prune-lm.cpp
-@@ -41,16 +41,20 @@
- 
- void usage(const char *msg = 0) {
-   if (msg) { std::cerr << msg << std::endl; }
+@@ -44,16 +44,20 @@
+   if (msg) {
+     std::cerr << msg << std::endl;
+   }
 -  std::cerr << "Usage: prune-lm [--threshold=th2,th3,...] [--abs=1|0] input-file [output-file]" << std::endl << std::endl;
 -  std::cerr << "    prune-lm reads a LM in either ARPA or compiled format and" << std::endl;
 -  std::cerr << "    prunes out n-grams (n=2,3,..) for which backing-off to the" << std::endl;
@@ -825,19 +821,19 @@ Forwarded: no
  
 --- a/src/quantize-lm.cpp
 +++ b/src/quantize-lm.cpp
-@@ -74,17 +74,25 @@
- 
- void usage(const char *msg = 0) {
-   if (msg) { std::cerr << msg << std::endl; }
+@@ -78,15 +78,23 @@
+   if (msg) {
+     std::cerr << msg << std::endl;
+   }
 -  std::cerr << "Usage: quantize-lm input-file.lm [output-file.qlm [tmpfile]] " << std::endl;
 -  if (!msg) std::cerr << std::endl
--    << "  quantize-lm reads a standard LM file in ARPA format and produces" << std::endl
--    << "  a version of it with quantized probabilities and back-off weights"<< std::endl
--    << "  that the IRST LMtoolkit can compile. Accepts LMs with .gz suffix." << std::endl
--    << "  You can specify the output file to be created and also the pathname " << std::endl
--    << "  of a temporary file used by the program. As default, the temporary "  << std::endl 
--    << "  file is created in the /tmp directory. Output file can be " << std::endl
--    << "  written to standard output by using the special name -. "  << std::endl;
+-                        << "  quantize-lm reads a standard LM file in ARPA format and produces" << std::endl
+-                        << "  a version of it with quantized probabilities and back-off weights"<< std::endl
+-                        << "  that the IRST LMtoolkit can compile. Accepts LMs with .gz suffix." << std::endl
+-                        << "  You can specify the output file to be created and also the pathname " << std::endl
+-                        << "  of a temporary file used by the program. As default, the temporary "  << std::endl
+-                        << "  file is created in the /tmp directory. Output file can be " << std::endl
+-                        << "  written to standard output by using the special name -. "  << std::endl;
 +  std::cerr << std::endl << "quantize-lm - quantizes probabilities and back-off weights" << std::endl;
 +  std::cerr << std::endl << "USAGE:"  << std::endl
 +	    << "       quantize-lm <input-file.lm> [<output-file.qlm> [<tmpfile>]]" << std::endl;
@@ -850,36 +846,35 @@ Forwarded: no
 +    << "       file is created in the /tmp directory. Output file can be" << std::endl
 +    << "       written to standard output by using the special name -."  << std::endl;
 +  std::cerr << std::endl;
-   }
- 
++  }
++
 +void handle_option(const std::string& opt, int argc, const char **argv, int& argi)
 +{
 +  if (opt == "--help" || opt == "-h") { usage(); exit(1); }
-+}
-+
+ }
  
- int main(int argc, const char **argv)
- {
-@@ -95,6 +103,7 @@
+ 
+@@ -102,6 +110,7 @@
    std::vector<std::string> files;
    for (int i=1; i < argc; i++) {
      std::string opt = argv[i];
 +    if(opt[0] == '-') handle_option(opt, argc, argv, i);
      files.push_back(opt);
    }
-   if (files.size() > 3) { usage("Too many arguments"); exit(1); }
+   if (files.size() > 3) {
 --- a/src/score-lm.cpp
 +++ b/src/score-lm.cpp
-@@ -30,12 +30,16 @@
- 
+@@ -31,13 +31,17 @@
  
- void usage() {
--	std::cerr <<	"Usage: score-lm -lm <model> [-dub <dub>] [-mm 1]\n"
--			"       score sentences with a language model\n"
--			"       -lm      language model to use (must be specified)\n"
--			"       -dub     dictionary upper bound (default: 10000000)\n"
--			"       -level   max level to load from the language models (default: 1000, meaning the actual LM order)\n"
--			"       -mm 1    memory-mapped access to lm\n";
+ void usage()
+ {
+-  std::cerr <<	"Usage: score-lm -lm <model> [-dub <dub>] [-mm 1]\n"
+-            "       score sentences with a language model\n"
+-            "       -lm      language model to use (must be specified)\n"
+-            "       -dub     dictionary upper bound (default: 10000000)\n"
+-            "       -level   max level to load from the language models (default: 1000, meaning the actual LM order)\n"
+-            "       -mm 1    memory-mapped access to lm\n";
+-  exit(1);
 +  std::cerr << std::endl << "score-lm - scores sentences with a language model" << std::endl;
 +  std::cerr << std::endl << "USAGE:"  << std::endl
 +	    << "       score-lm -lm <model>  [options]" << std::endl;
@@ -890,17 +885,18 @@ Forwarded: no
 +  std::cerr << "           meaning the actual LM order)" << std::endl;
 +  std::cerr << "       -mm 1    memory-mapped access to lm (default: 0)" << std::endl;
 +  std::cerr << std::endl;
- 	exit(1);
++	exit(1);
  }
  
+ int main(int argc, char **argv)
 --- a/src/tlm.cpp
 +++ b/src/tlm.cpp
-@@ -236,8 +236,53 @@
- 	
- 	if (!trainfile || !lmtype)
-     {
--		cerr <<"Missing parameters\n";
--		exit(1);
+@@ -240,8 +240,53 @@
+   GetParams(&argc, &argv, (char*) NULL);
+ 
+   if (!lmtype || (!trainfile && lmtype!=MIXTURE)) {
+-    cerr <<"Missing parameters\n";
+-    exit(1);
 +  std::cerr << std::endl << "tlm - trains and tests language models" << std::endl;
 +  std::cerr << std::endl << "USAGE:"  << std::endl
 +	    << "       tlm [options]" << std::endl;
@@ -948,6 +944,6 @@ Forwarded: no
 +  std::cerr << "       -Beta|-beta=<double> (default: -1.0)" << std::endl;
 +  std::cerr << std::endl;
 +  exit(1);
-     };
- 	
- 	
+   };
+ 
+ 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/irstlm.git



More information about the debian-science-commits mailing list