[Debtags-commits] [svn] r1381 - in tagcoll/trunk: . tagcoll tests
Enrico Zini
enrico at costa.debian.org
Tue Sep 27 15:26:49 UTC 2005
Author: enrico
Date: Tue Sep 27 15:26:49 2005
New Revision: 1381
Modified:
tagcoll/trunk/ (props changed)
tagcoll/trunk/tagcoll/experiments.cc
tagcoll/trunk/tagcoll/experiments.h
tagcoll/trunk/tests/normalize.cc
Log:
r5427 at viaza: enrico | 2005-09-26 08:28:08 -0500
Normalization implemented with tag removals only; now supports scoring of tags
Modified: tagcoll/trunk/tagcoll/experiments.cc
==============================================================================
--- tagcoll/trunk/tagcoll/experiments.cc (original)
+++ tagcoll/trunk/tagcoll/experiments.cc Tue Sep 27 15:26:49 2005
@@ -46,7 +46,27 @@
namespace Tagcoll {
template<typename ITEM, typename TAG>
-void Normalizer<ITEM,TAG>::buildGraph()
+bool Normalizer<ITEM,TAG>::addToGraph(const Scores<TAG>& scores, const OpSet<TAG>& ts1, const OpSet<TAG>& ts2)
+{
+ if (this->tagsets.find(ts2) != this->tagsets.end()
+ && scores.distance(ts1, ts2) <= 1.0)
+ {
+ distGraph[ts2].push_back(ts1);
+ distGraph[ts1].push_back(ts2);
+
+ // Try removing more tags to see if there is still something with distance <= 1.0
+ for (typename OpSet<TAG>::const_iterator i = ts2.begin();
+ i != ts2.end(); i++)
+ addToGraph(scores, ts1, ts2 - *i);
+
+ return true;
+ }
+ return false;
+}
+
+
+template<typename ITEM, typename TAG>
+void Normalizer<ITEM,TAG>::buildGraph(const Scores<TAG>& scores)
{
distGraph.clear();
@@ -56,12 +76,26 @@
j != i->first.end(); j++)
{
OpSet<TAG> test = i->first - *j;
+ if (addToGraph(scores, i->first, test))
+ {
+ }
+ }
+
+ /*
+ -- Build algorithm for fixed-score distance of 1
+ for (typename tagsets_t::const_iterator i = this->tagsets.begin();
+ i != this->tagsets.end(); i++)
+ for (typename OpSet<TAG>::const_iterator j = i->first.begin();
+ j != i->first.end(); j++)
+ {
+ OpSet<TAG> test = i->first - *j;
if (this->tagsets.find(test) != this->tagsets.end())
{
distGraph[test].push_back(i->first);
distGraph[i->first].push_back(test);
}
}
+ */
}
template<typename ITEM, typename TAG>
@@ -128,9 +162,11 @@
template<typename ITEM, typename TAG>
void Normalizer<ITEM,TAG>::normalize()
{
+ /*
cerr << "Building graph..." << endl;
buildGraph();
cerr << "Built graph." << endl;
+ */
bool done = false;
@@ -186,6 +222,7 @@
namespace Tagcoll {
template class Normalizer<std::string, std::string>;
+ template class Scores<std::string>;
}
#endif
Modified: tagcoll/trunk/tagcoll/experiments.h
==============================================================================
--- tagcoll/trunk/tagcoll/experiments.h (original)
+++ tagcoll/trunk/tagcoll/experiments.h Tue Sep 27 15:26:49 2005
@@ -24,12 +24,58 @@
*/
#include <tagcoll/CardinalityStore.h>
+#include <tagcoll/Expression.h>
#include <vector>
namespace Tagcoll
{
+template <typename TAG>
+class Scores
+{
+protected:
+ struct Score
+ {
+ Expression expr;
+ float score;
+ Score(const std::string& expr, float score) :
+ expr(expr), score(score) {}
+ };
+ float defaultScore;
+ std::vector<Score> scores;
+public:
+ Scores(float def = 1.0) : defaultScore(def) {}
+
+ void add(const std::string& expr, float score)
+ {
+ scores.push_back(Score(expr, score));
+ }
+
+ float operator()(const TAG& tag) const
+ {
+ for (typename std::vector<Score>::const_iterator i = scores.begin();
+ i != scores.end(); i++)
+ {
+ OpSet<TAG> tags;
+ tags += tag;
+ if (i->expr(tags))
+ return i->score;
+ }
+ return defaultScore;
+ }
+
+ float distance(const OpSet<TAG>& ts1, const OpSet<TAG>& ts2) const
+ {
+ float res = 0;
+ OpSet<TAG> diff = (ts1 - ts2) + (ts2 - ts1);
+ for (typename OpSet<TAG>::const_iterator i = diff.begin();
+ i != diff.end(); i++)
+ res += (*this)(*i);
+ return res;
+ }
+};
+
template <typename ITEM, typename TAG>
class Normalizer : public CardinalityStore<ITEM, TAG>
{
@@ -43,7 +89,7 @@
typedef std::map< OpSet<TAG>, std::vector< OpSet<TAG> > > distgraph_t;
distgraph_t distGraph;
- void buildGraph();
+ bool addToGraph(const Scores<TAG>& scores, const OpSet<TAG>& ts1, const OpSet<TAG>& ts2);
bool mergeTagsets(const OpSet<TAG>& ts1, const OpSet<TAG>& ts2);
void removeAfterMerge(const OpSet<TAG>& ts, const OpSet<TAG>& merged);
@@ -54,6 +100,8 @@
merge_threshold(7),
min_threshold(2) {}
+ void buildGraph(const Scores<TAG>& scores);
+
void normalize();
};
Modified: tagcoll/trunk/tests/normalize.cc
==============================================================================
--- tagcoll/trunk/tests/normalize.cc (original)
+++ tagcoll/trunk/tests/normalize.cc Tue Sep 27 15:26:49 2005
@@ -17,8 +17,14 @@
Normalizer<string, string> norm;
StdioParserInput in(stdin, "(stdin)");
+ Scores<string> scores(0.7);
+ scores.add("culture::* || use::*", 1.1);
+ scores.add("implemented-in::*", 0.3);
+ scores.add("*::TODO", 0.1);
+
TextFormat<string, string>::parse(conv, conv, in, norm);
+ norm.buildGraph(scores);
norm.normalize();
TextFormat<string, string> writer(conv, conv, stdout);
More information about the Debtags-commits
mailing list