[irstlm] 33/146: Updated online_documentation_enhancments.patch.
Giulio Paci
giuliopaci-guest at moszumanska.debian.org
Tue May 17 07:37:04 UTC 2016
This is an automated email from the git hooks/post-receive script.
giuliopaci-guest pushed a commit to branch master
in repository irstlm.
commit 371f782cb2f210456988a63589a3649ff2cda591
Author: Giulio Paci <giuliopaci at gmail.com>
Date: Tue Sep 4 03:48:26 2012 +0200
Updated online_documentation_enhancments.patch.
---
.../patches/online_documentation_enhancments.patch | 380 ++++++++++-----------
1 file changed, 188 insertions(+), 192 deletions(-)
diff --git a/debian/patches/online_documentation_enhancments.patch b/debian/patches/online_documentation_enhancments.patch
index 39bce80..a6e9558 100644
--- a/debian/patches/online_documentation_enhancments.patch
+++ b/debian/patches/online_documentation_enhancments.patch
@@ -156,7 +156,7 @@ Forwarded: no
v)
verbose="--verbose";
@@ -107,7 +110,6 @@
- exit 4;
+ exit 4
esac
;;
-
@@ -522,116 +522,122 @@ Forwarded: no
--- a/src/compile-lm.cpp
+++ b/src/compile-lm.cpp
-@@ -55,27 +55,29 @@
- void usage(const char *msg = 0) {
-
- if (msg) { std::cerr << msg << std::endl; }
+@@ -58,27 +58,29 @@
+ if (msg) {
+ std::cerr << msg << std::endl;
+ }
- std::cerr << "Usage: compile-lm [options] input-file.lm [output-file.blm]" << std::endl;
- if (!msg) std::cerr << std::endl
-- << " compile-lm reads a standard LM file in ARPA format and produces" << std::endl
-- << " a compiled representation that the IRST LM toolkit can quickly" << std::endl
-- << " read and process. LM file can be compressed with gzip." << std::endl << std::endl;
+- << " compile-lm reads a standard LM file in ARPA format and produces" << std::endl
+- << " a compiled representation that the IRST LM toolkit can quickly" << std::endl
+- << " read and process. LM file can be compressed with gzip." << std::endl << std::endl;
- std::cerr << "Options:\n"
-- << "--text|-t [yes|no] (output is again in text format)" << std::endl
-- << "--invert|-i [yes|no] (build an inverted n-gram binary table for fast access: default no)" << std::endl
-- << "--filter|-f wordlist (filter a binary language model with a word list)"<< std::endl
-- << "--keepunigrams|-ku [yes|no] (filter by keeping all unigrams in the table: default yes)"<< std::endl
-- << "--eval|-e text-file (computes perplexity of text-file and returns)"<< std::endl
-- << "--randcalls|-r N (computes N random calls on the eval text-file)"<< std::endl
-- << "--dub dict-size (dictionary upperbound to compute OOV word penalty: default 10^7)"<< std::endl
-- << "--score|-s [yes|no] (computes log-prob scores from standard input)"<< std::endl
-- << "--debug|-d 1 (verbose output for --eval option)"<< std::endl
-- << "--sentence [yes|no] (compute pperplexity at sentence level (identified through the end symbol)"<< std::endl
-- << "--memmap|-mm 1 (uses memory map to read a binary LM)"<< std::endl
-- << "--ngram_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
-- << "--dict_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
-- << "--level|l <value> (set the maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken)" << std::endl
-- << "--tmpdir <directory> (directory for temporary computation, default is either the environment variable TMP if defined or \"/tmp\")" << std::endl;
+- << "--text|-t [yes|no] (output is again in text format)" << std::endl
+- << "--invert|-i [yes|no] (build an inverted n-gram binary table for fast access: default no)" << std::endl
+- << "--filter|-f wordlist (filter a binary language model with a word list)"<< std::endl
+- << "--keepunigrams|-ku [yes|no] (filter by keeping all unigrams in the table: default yes)"<< std::endl
+- << "--eval|-e text-file (computes perplexity of text-file and returns)"<< std::endl
+- << "--randcalls|-r N (computes N random calls on the eval text-file)"<< std::endl
+- << "--dub dict-size (dictionary upperbound to compute OOV word penalty: default 10^7)"<< std::endl
+- << "--score|-s [yes|no] (computes log-prob scores from standard input)"<< std::endl
+- << "--debug|-d 1 (verbose output for --eval option)"<< std::endl
+- << "--sentence [yes|no] (compute pperplexity at sentence level (identified through the end symbol)"<< std::endl
+- << "--memmap|-mm 1 (uses memory map to read a binary LM)"<< std::endl
+- << "--ngram_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
+- << "--dict_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
+- << "--level|l <value> (set the maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken)" << std::endl
+- << "--tmpdir <directory> (directory for temporary computation, default is either the environment variable TMP if defined or \"/tmp\")" << std::endl;
+ std::cerr << std::endl << "compile-lm - compiles an ARPA format LM into an IRSTLM format one" << std::endl;
+ std::cerr << std::endl << "USAGE:" << std::endl
+ << " compile-lm [options] <input-file.lm> [output-file.blm]" << std::endl;
+ if (!msg) std::cerr << std::endl << "DESCRIPTION:" << std::endl
-+ << " compile-lm reads a standard LM file in ARPA format and produces" << std::endl
-+ << " a compiled representation that the IRST LM toolkit can quickly" << std::endl
-+ << " read and process. LM file can be compressed with gzip." << std::endl;
-+ std::cerr << std::endl << "OPTIONS:" << std::endl
-+ << " --text|-t [yes|no] (output is again in text format)" << std::endl
-+ << " --invert|-i [yes|no] (build an inverted n-gram binary table for fast access: default no)" << std::endl
-+ << " --filter|-f wordlist (filter a binary language model with a word list)"<< std::endl
-+ << " --keepunigrams|-ku [yes|no] (filter by keeping all unigrams in the table: default yes)"<< std::endl
-+ << " --eval|-e text-file (computes perplexity of text-file and returns)"<< std::endl
-+ << " --randcalls|-r N (computes N random calls on the eval text-file)"<< std::endl
-+ << " --dub dict-size (dictionary upperbound to compute OOV word penalty: default 10^7)"<< std::endl
-+ << " --score|-s [yes|no] (computes log-prob scores of n-grams from standard input)"<< std::endl
-+ << " --debug|-d 1 (verbose output for --eval option)"<< std::endl
-+ << " --sentence [yes|no] (compute pperplexity at sentence level (identified through the end symbol)"<< std::endl
-+ << " --memmap|-mm 1 (uses memory map to read a binary LM)" << std::endl
-+ << " --ngram_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
-+ << " --dict_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
-+ << " --level|l <value> (set the maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken)" << std::endl
-+ << " --tmpdir <directory> (directory for temporary computation, default is either the environment variable TMP if defined or \"/tmp\")" << std::endl;
++ << " compile-lm reads a standard LM file in ARPA format and produces" << std::endl
++ << " a compiled representation that the IRST LM toolkit can quickly" << std::endl
++ << " read and process. LM file can be compressed with gzip." << std::endl;
++ std::cerr << std::endl << "OPTIONS:" << std::endl
++ << " --text|-t [yes|no] (output is again in text format)" << std::endl
++ << " --invert|-i [yes|no] (build an inverted n-gram binary table for fast access: default no)" << std::endl
++ << " --filter|-f wordlist (filter a binary language model with a word list)"<< std::endl
++ << " --keepunigrams|-ku [yes|no] (filter by keeping all unigrams in the table: default yes)"<< std::endl
++ << " --eval|-e text-file (computes perplexity of text-file and returns)"<< std::endl
++ << " --randcalls|-r N (computes N random calls on the eval text-file)"<< std::endl
++ << " --dub dict-size (dictionary upperbound to compute OOV word penalty: default 10^7)"<< std::endl
++ << " --score|-s [yes|no] (computes log-prob scores of n-grams from standard input)"<< std::endl
++ << " --debug|-d 1 (verbose output for --eval option)"<< std::endl
++ << " --sentence [yes|no] (compute pperplexity at sentence level (identified through the end symbol)"<< std::endl
++ << " --memmap|-mm 1 (uses memory map to read a binary LM)" << std::endl
++ << " --ngram_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
++ << " --dict_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
++ << " --level|l <value> (set the maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken)" << std::endl
++ << " --tmpdir <directory> (directory for temporary computation, default is either the environment variable TMP if defined or \"/tmp\")" << std::endl;
}
- bool starts_with(const std::string &s, const std::string &pre) {
+ bool starts_with(const std::string &s, const std::string &pre)
--- a/src/dict.cpp
+++ b/src/dict.cpp
-@@ -78,23 +78,26 @@
-
- if (inp==NULL)
- {
-- std::cerr << "\nUsage: \ndict -i=inputfile [options]\n";
-- std::cerr << "(inputfile can be a corpus or a dictionary)\n\n";
-- std::cerr << "Options:\n";
-- std::cerr << "-o=outputfile\n";
-- std::cerr << "-f=[yes|no] (output word frequencies, default is false)\n";
-- std::cerr << "-sort=[yes|no] (sort dictionary by frequency, default is false)\n";
-- std::cerr << "-pf=<freq> (prune words with frequency below <freq>\n";
-- std::cerr << "-pr=<rank> (prune words with frequency rank above <rank>\n";
-- std::cerr << "-is= (interruption symbol) \n";
-- std::cerr << "-c=[yes|no] (show dictionary growth curve)\n";
-- std::cerr << "-cs=curvesize (default 10)\n";
-- std::cerr << "-t=testfile (compute OOV rates on test corpus)\n";
-- std::cerr << "-LoadFactor=<value> (set the load factor for cache; it should be a positive real value; if not defined a default value is used)\n";
-- std::cerr << "-listOOV=[yes|no] (print OOV words to stderr, default is false)\n\n";
-+ std::cerr << std::endl << "dict - extracts a dictionary" << std::endl;
-+ std::cerr << std::endl << "USAGE:" << std::endl
+@@ -77,20 +77,23 @@
+ GetParams(&argc, &argv, (char*) NULL);
+
+ if (inp==NULL) {
+- std::cerr << "\nUsage: \ndict -i=inputfile [options]\n";
+- std::cerr << "(inputfile can be a corpus or a dictionary)\n\n";
+- std::cerr << "Options:\n";
+- std::cerr << "-o=outputfile\n";
+- std::cerr << "-f=[yes|no] (output word frequencies, default is false)\n";
+- std::cerr << "-sort=[yes|no] (sort dictionary by frequency, default is false)\n";
+- std::cerr << "-pf=<freq> (prune words with frequency below <freq>\n";
+- std::cerr << "-pr=<rank> (prune words with frequency rank above <rank>\n";
+- std::cerr << "-is= (interruption symbol) \n";
+- std::cerr << "-c=[yes|no] (show dictionary growth curve)\n";
+- std::cerr << "-cs=curvesize (default 10)\n";
+- std::cerr << "-t=testfile (compute OOV rates on test corpus)\n";
+- std::cerr << "-LoadFactor=<value> (set the load factor for cache; it should be a positive real value; if not defined a default value is used)\n";
+- std::cerr << "-listOOV=[yes|no] (print OOV words to stderr, default is false)\n\n";
++ std::cerr << std::endl << "dict - extracts a dictionary" << std::endl;
++ std::cerr << std::endl << "USAGE:" << std::endl
+ << " dict -i=<inputfile> [options]" << std::endl;
-+ std::cerr << std::endl << "DESCRIPTION:" << std::endl
++ std::cerr << std::endl << "DESCRIPTION:" << std::endl
+ << " dict extracts a dictionary from a corpus or a dictionary." << std::endl;
-+ std::cerr << std::endl << "OPTIONS:" << std::endl;
-+ std::cerr << " -o=outputfile" << std::endl;
-+ std::cerr << " -f=[yes|no] (output word frequencies, default is false)" << std::endl;
-+ std::cerr << " -sort=[yes|no] (sort dictionary by frequency, default is false)" << std::endl;
-+ std::cerr << " -pf=<freq> (prune words with frequency below <freq>" << std::endl;
-+ std::cerr << " -pr=<rank> (prune words with frequency rank above <rank>" << std::endl;
-+ std::cerr << " -is= (interruption symbol)" << std::endl;
-+ std::cerr << " -c=[yes|no] (show dictionary growth curve)" << std::endl;
-+ std::cerr << " -cs=curvesize (default 10)" << std::endl;
-+ std::cerr << " -t=testfile (compute OOV rates on test corpus)" << std::endl;
-+ std::cerr << " -LoadFactor=<value> (set the load factor for cache; it should be a positive real value; if not defined a default value is used)" << std::endl;
-+ std::cerr << " -listOOV=[yes|no] (print OOV words to stderr, default is false)" << std::endl << std::endl;
-
-
-- exit(1);
-+ exit(1);
- };
-
- // options compatibility issues:
++ std::cerr << std::endl << "OPTIONS:" << std::endl;
++ std::cerr << " -o=outputfile" << std::endl;
++ std::cerr << " -f=[yes|no] (output word frequencies, default is false)" << std::endl;
++ std::cerr << " -sort=[yes|no] (sort dictionary by frequency, default is false)" << std::endl;
++ std::cerr << " -pf=<freq> (prune words with frequency below <freq>" << std::endl;
++ std::cerr << " -pr=<rank> (prune words with frequency rank above <rank>" << std::endl;
++ std::cerr << " -is= (interruption symbol)" << std::endl;
++ std::cerr << " -c=[yes|no] (show dictionary growth curve)" << std::endl;
++ std::cerr << " -cs=curvesize (default 10)" << std::endl;
++ std::cerr << " -t=testfile (compute OOV rates on test corpus)" << std::endl;
++ std::cerr << " -LoadFactor=<value> (set the load factor for cache; it should be a positive real value; if not defined a default value is used)" << std::endl;
++ std::cerr << " -listOOV=[yes|no] (print OOV words to stderr, default is false)" << std::endl << std::endl;
+
+
+ exit(1);
--- a/src/interpolate-lm.cpp
+++ b/src/interpolate-lm.cpp
-@@ -51,27 +51,28 @@
-
- void usage(const char *msg = 0) {
- if (msg) { std::cerr << msg << std::endl; }
+@@ -62,27 +62,29 @@
+ if (msg) {
+ std::cerr << msg << std::endl;
+ }
- std::cerr << "Usage: interpolate-lm [options] lm-list-file [lm-list-file.out]" << std::endl;
- if (!msg) std::cerr << std::endl
-- << " interpolate-lm reads a LM list file including interpolation weights " << std::endl
-- << " with the format: N\\n w1 lm1 \\n w2 lm2 ...\\n wN lmN\n" << std::endl
-- << " It estimates new weights on a development text, " << std::endl
-- << " computes the perplexity on an evaluation text, " << std::endl
-- << " computes probabilities of n-grams read from stdin." << std::endl
-- << " It reads LMs in ARPA and IRSTLM binary format." << std::endl << std::endl;
--
+- << " interpolate-lm reads a LM list file including interpolation weights " << std::endl
+- << " with the format: N\\n w1 lm1 \\n w2 lm2 ...\\n wN lmN\n" << std::endl
+- << " It estimates new weights on a development text, " << std::endl
+- << " computes the perplexity on an evaluation text, " << std::endl
+- << " computes probabilities of n-grams read from stdin." << std::endl
+- << " It reads LMs in ARPA and IRSTLM binary format." << std::endl << std::endl;
++ std::cerr << std::endl << "interpolate-lm - interpolates language models" << std::endl;
++ std::cerr << std::endl << "USAGE:" << std::endl
++ << " interpolate-lm [options] <lm-list-file> [lm-list-file.out]" << std::endl;
++ if (!msg) std::cerr << std::endl << "DESCRIPTION:" << std::endl
++ << " interpolate-lm reads a LM list file including interpolation weights " << std::endl
++ << " with the format: N\\n w1 lm1 \\n w2 lm2 ...\\n wN lmN\n" << std::endl
++ << " It estimates new weights on a development text, " << std::endl
++ << " computes the perplexity on an evaluation text, " << std::endl
++ << " computes probabilities of n-grams read from stdin." << std::endl
++ << " It reads LMs in ARPA and IRSTLM binary format." << std::endl;
+
- std::cerr << "Options:\n"
- << "--learn|-l text-file learn optimal interpolation for text-file"<< std::endl
- << "--order|-o n order of n-grams used in --learn (optional)"<< std::endl
@@ -643,41 +649,30 @@ Forwarded: no
- << "--memmap| -mm 1 use memory map to read a binary LM" << std::endl
- << "--ngram_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
- << "--dict_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
-- << "--level|lev <value> (set the maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken)" << std::endl;
-+ std::cerr << std::endl << "interpolate-lm - interpolates language models" << std::endl;
-+ std::cerr << std::endl << "USAGE:" << std::endl
-+ << " interpolate-lm [options] <lm-list-file> [lm-list-file.out]" << std::endl;
-+ if (!msg) std::cerr << std::endl << "DESCRIPTION:" << std::endl
-+ << " interpolate-lm reads a LM list file including interpolation weights " << std::endl
-+ << " with the format: N\\n w1 lm1 \\n w2 lm2 ...\\n wN lmN\n" << std::endl
-+ << " It estimates new weights on a development text, " << std::endl
-+ << " computes the perplexity on an evaluation text, " << std::endl
-+ << " computes probabilities of n-grams read from stdin." << std::endl
-+ << " It reads LMs in ARPA and IRSTLM binary format." << std::endl;
+- << "--level|lev <value> (set the maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken)" << std::endl;
+ std::cerr << std::endl << "OPTIONS:" << std::endl
-+ << " --learn|-l text-file learn optimal interpolation for text-file"<< std::endl
-+ << " --order|-o n order of n-grams used in --learn (optional)"<< std::endl
-+ << " --eval|-e text-file (computes perplexity of text-file and returns)"<< std::endl
-+ << " --dub dict-size (dictionary upperbound to compute OOV word penalty: default 10^7)"<< std::endl
-+ << " --score|-s [yes|no] (computes log-prob scores of n-grams from standard input)"<< std::endl
-+ << " --debug|-d [1-3] verbose output for --eval option (see compile-lm)"<< std::endl
-+ << " --sentence [yes|no] (compute pperplexity at sentence level (identified through the end symbol)"<< std::endl
-+ << " --memmap|-mm 1 (uses memory map to read a binary LM)" << std::endl
-+ << " --ngram_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
-+ << " --dict_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
-+ << " --level|lev <value> (set the maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken)" << std::endl;
++ << " --learn|-l text-file learn optimal interpolation for text-file"<< std::endl
++ << " --order|-o n order of n-grams used in --learn (optional)"<< std::endl
++ << " --eval|-e text-file (computes perplexity of text-file and returns)"<< std::endl
++ << " --dub dict-size (dictionary upperbound to compute OOV word penalty: default 10^7)"<< std::endl
++ << " --score|-s [yes|no] (computes log-prob scores of n-grams from standard input)"<< std::endl
++ << " --debug|-d [1-3] verbose output for --eval option (see compile-lm)"<< std::endl
++ << " --sentence [yes|no] (compute pperplexity at sentence level (identified through the end symbol)"<< std::endl
++ << " --memmap|-mm 1 (uses memory map to read a binary LM)" << std::endl
++ << " --ngram_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
++ << " --dict_load_factor <value> (set the load factor for ngram cache ; it should be a positive real value; if not defined a default value is used)" << std::endl
++ << " --level|lev <value> (set the maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken)" << std::endl;
}
--- a/src/ngt.cpp
+++ b/src/ngt.cpp
-@@ -125,9 +125,35 @@
+@@ -125,8 +125,34 @@
GetParams(&argc, &argv, (char*) NULL);
-
- if (inp==NULL){
+
+ if (inp==NULL) {
- cerr <<"No input was specified\n";
- exit(1);
-- };
+ std::cerr << std::endl << "ngt - collects n-grams" << std::endl;
+ std::cerr << std::endl << "USAGE:" << std::endl
+ << " ngt -i=<inputfile> [options]" << std::endl;
@@ -706,38 +701,39 @@ Forwarded: no
+ std::cerr << " -iknstat=file (filename to save IKN statistics)" << std::endl;
+ std::cerr << std::endl;
+ exit(1);
-+ };
-
+ };
+
if (out==NULL)
- cerr << "Warning: no output file specified!\n";
--- a/src/plsa.cpp
+++ b/src/plsa.cpp
-@@ -124,28 +124,55 @@
- GetParams(&argc, &argv, (char*) NULL);
-
- if (argc==1 || help){
-- cerr <<"plsa: IRSTLM tool for Probabilistic Latent Semantic Analysis LM inference\n\n";
+@@ -123,29 +123,57 @@
+
+ GetParams(&argc, &argv, (char*) NULL);
+
+- if (argc==1 || help) {
+- cerr <<"plsa: IRSTLM tool for Probabilistic Latent Semantic Analysis LM inference\n\n";
+
+- cerr <<"Usage (1): plsa -c=<collection> -d=<dictionary> -m=<model> -t=<topics> -it=<iter>\n\n";
+- cerr <<"Train a PLSA model. Parameters specify collection and dictionary filenames\n";
+- cerr <<"number of EM iterations, number of topics, and model filename. The collection\n";
+- cerr <<"must begin with the number of documents and documents should be separated\n";
+- cerr <<"with the </d> tag. The begin document tag <d> is not considered.\n";
+- cerr <<"Example:\n";
+- cerr <<"3\n";
+- cerr <<"<d> hello world ! </d>\n";
+- cerr <<"<d> good morning good afternoon </d>\n";
+- cerr <<"<d> welcome aboard </d>\n\n";
-
-- cerr <<"Usage (1): plsa -c=<collection> -d=<dictionary> -m=<model> -t=<topics> -it=<iter>\n\n";
-- cerr <<"Train a PLSA model. Parameters specify collection and dictionary filenames\n";
-- cerr <<"number of EM iterations, number of topics, and model filename. The collection\n";
-- cerr <<"must begin with the number of documents and documents should be separated\n";
-- cerr <<"with the </d> tag. The begin document tag <d> is not considered.\n";
-- cerr <<"Example:\n";
-- cerr <<"3\n";
-- cerr <<"<d> hello world ! </d>\n";
-- cerr <<"<d> good morning good afternoon </d>\n";
-- cerr <<"<d> welcome aboard </d>\n\n";
+- cerr <<"Usage (2): plsa -c=<text collection> -d=<dictionary> -b=<binary collection>\n\n";
+- cerr <<"Binarize a textual document collection to speed-up training (1)\n";
+- cerr <<"\n";
-
-- cerr <<"Usage (2): plsa -c=<text collection> -d=<dictionary> -b=<binary collection>\n\n";
-- cerr <<"Binarize a textual document collection to speed-up training (1)\n";
-- cerr <<"\n";
--
-- cerr <<"Usage (3): plsa -d=<dictionary> -m=<model> -t=<topics> -inf=<text> -f=<features> -it=<iterations>\n\n";
-- cerr <<"Infer a full 1-gram distribution from a model and a small text. The 1-gram\n";
-- cerr <<"is saved in the feature file. The 1-gram\n";
-- cerr <<"\n";
-- exit(1);
+- cerr <<"Usage (3): plsa -d=<dictionary> -m=<model> -t=<topics> -inf=<text> -f=<features> -it=<iterations>\n\n";
+- cerr <<"Infer a full 1-gram distribution from a model and a small text. The 1-gram\n";
+- cerr <<"is saved in the feature file. The 1-gram\n";
+- cerr <<"\n";
+- exit(1);
++ if (argc==1 || help){
+ std::cerr << std::endl << "plsa - performs probabilistic latent semantic analysis LM inference" << std::endl;
+ std::cerr << std::endl << "USAGE:" << std::endl
+ << " plsa -c=<text_collection> -d=<dictionary> -m=<model> -t=<topics> -it=<iter> [options]" << std::endl
@@ -787,15 +783,15 @@ Forwarded: no
+ std::cerr <<" text. The 1-gram is saved in the feature file. The 1-gram" << std::endl;
+ std::cerr << std::endl;
+ exit(1);
- }
-
- if (!dictfile)
+ }
+
+ if (!dictfile) {
--- a/src/prune-lm.cpp
+++ b/src/prune-lm.cpp
-@@ -41,16 +41,20 @@
-
- void usage(const char *msg = 0) {
- if (msg) { std::cerr << msg << std::endl; }
+@@ -44,16 +44,20 @@
+ if (msg) {
+ std::cerr << msg << std::endl;
+ }
- std::cerr << "Usage: prune-lm [--threshold=th2,th3,...] [--abs=1|0] input-file [output-file]" << std::endl << std::endl;
- std::cerr << " prune-lm reads a LM in either ARPA or compiled format and" << std::endl;
- std::cerr << " prunes out n-grams (n=2,3,..) for which backing-off to the" << std::endl;
@@ -825,19 +821,19 @@ Forwarded: no
--- a/src/quantize-lm.cpp
+++ b/src/quantize-lm.cpp
-@@ -74,17 +74,25 @@
-
- void usage(const char *msg = 0) {
- if (msg) { std::cerr << msg << std::endl; }
+@@ -78,15 +78,23 @@
+ if (msg) {
+ std::cerr << msg << std::endl;
+ }
- std::cerr << "Usage: quantize-lm input-file.lm [output-file.qlm [tmpfile]] " << std::endl;
- if (!msg) std::cerr << std::endl
-- << " quantize-lm reads a standard LM file in ARPA format and produces" << std::endl
-- << " a version of it with quantized probabilities and back-off weights"<< std::endl
-- << " that the IRST LMtoolkit can compile. Accepts LMs with .gz suffix." << std::endl
-- << " You can specify the output file to be created and also the pathname " << std::endl
-- << " of a temporary file used by the program. As default, the temporary " << std::endl
-- << " file is created in the /tmp directory. Output file can be " << std::endl
-- << " written to standard output by using the special name -. " << std::endl;
+- << " quantize-lm reads a standard LM file in ARPA format and produces" << std::endl
+- << " a version of it with quantized probabilities and back-off weights"<< std::endl
+- << " that the IRST LMtoolkit can compile. Accepts LMs with .gz suffix." << std::endl
+- << " You can specify the output file to be created and also the pathname " << std::endl
+- << " of a temporary file used by the program. As default, the temporary " << std::endl
+- << " file is created in the /tmp directory. Output file can be " << std::endl
+- << " written to standard output by using the special name -. " << std::endl;
+ std::cerr << std::endl << "quantize-lm - quantizes probabilities and back-off weights" << std::endl;
+ std::cerr << std::endl << "USAGE:" << std::endl
+ << " quantize-lm <input-file.lm> [<output-file.qlm> [<tmpfile>]]" << std::endl;
@@ -850,36 +846,35 @@ Forwarded: no
+ << " file is created in the /tmp directory. Output file can be" << std::endl
+ << " written to standard output by using the special name -." << std::endl;
+ std::cerr << std::endl;
- }
-
++ }
++
+void handle_option(const std::string& opt, int argc, const char **argv, int& argi)
+{
+ if (opt == "--help" || opt == "-h") { usage(); exit(1); }
-+}
-+
+ }
- int main(int argc, const char **argv)
- {
-@@ -95,6 +103,7 @@
+
+@@ -102,6 +110,7 @@
std::vector<std::string> files;
for (int i=1; i < argc; i++) {
std::string opt = argv[i];
+ if(opt[0] == '-') handle_option(opt, argc, argv, i);
files.push_back(opt);
}
- if (files.size() > 3) { usage("Too many arguments"); exit(1); }
+ if (files.size() > 3) {
--- a/src/score-lm.cpp
+++ b/src/score-lm.cpp
-@@ -30,12 +30,16 @@
-
+@@ -31,13 +31,17 @@
- void usage() {
-- std::cerr << "Usage: score-lm -lm <model> [-dub <dub>] [-mm 1]\n"
-- " score sentences with a language model\n"
-- " -lm language model to use (must be specified)\n"
-- " -dub dictionary upper bound (default: 10000000)\n"
-- " -level max level to load from the language models (default: 1000, meaning the actual LM order)\n"
-- " -mm 1 memory-mapped access to lm\n";
+ void usage()
+ {
+- std::cerr << "Usage: score-lm -lm <model> [-dub <dub>] [-mm 1]\n"
+- " score sentences with a language model\n"
+- " -lm language model to use (must be specified)\n"
+- " -dub dictionary upper bound (default: 10000000)\n"
+- " -level max level to load from the language models (default: 1000, meaning the actual LM order)\n"
+- " -mm 1 memory-mapped access to lm\n";
+- exit(1);
+ std::cerr << std::endl << "score-lm - scores sentences with a language model" << std::endl;
+ std::cerr << std::endl << "USAGE:" << std::endl
+ << " score-lm -lm <model> [options]" << std::endl;
@@ -890,17 +885,18 @@ Forwarded: no
+ std::cerr << " meaning the actual LM order)" << std::endl;
+ std::cerr << " -mm 1 memory-mapped access to lm (default: 0)" << std::endl;
+ std::cerr << std::endl;
- exit(1);
++ exit(1);
}
+ int main(int argc, char **argv)
--- a/src/tlm.cpp
+++ b/src/tlm.cpp
-@@ -236,8 +236,53 @@
-
- if (!trainfile || !lmtype)
- {
-- cerr <<"Missing parameters\n";
-- exit(1);
+@@ -240,8 +240,53 @@
+ GetParams(&argc, &argv, (char*) NULL);
+
+ if (!lmtype || (!trainfile && lmtype!=MIXTURE)) {
+- cerr <<"Missing parameters\n";
+- exit(1);
+ std::cerr << std::endl << "tlm - trains and tests language models" << std::endl;
+ std::cerr << std::endl << "USAGE:" << std::endl
+ << " tlm [options]" << std::endl;
@@ -948,6 +944,6 @@ Forwarded: no
+ std::cerr << " -Beta|-beta=<double> (default: -1.0)" << std::endl;
+ std::cerr << std::endl;
+ exit(1);
- };
-
-
+ };
+
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/irstlm.git
More information about the debian-science-commits
mailing list