[Debtags-commits] [svn] r1381 - in tagcoll/trunk: . tagcoll tests

Enrico Zini enrico at costa.debian.org
Tue Sep 27 15:26:49 UTC 2005


Author: enrico
Date: Tue Sep 27 15:26:49 2005
New Revision: 1381

Modified:
   tagcoll/trunk/   (props changed)
   tagcoll/trunk/tagcoll/experiments.cc
   tagcoll/trunk/tagcoll/experiments.h
   tagcoll/trunk/tests/normalize.cc
Log:
 r5427 at viaza:  enrico | 2005-09-26 08:28:08 -0500
 Normalization implemented with tag removals only; now supports scoring of tags


Modified: tagcoll/trunk/tagcoll/experiments.cc
==============================================================================
--- tagcoll/trunk/tagcoll/experiments.cc	(original)
+++ tagcoll/trunk/tagcoll/experiments.cc	Tue Sep 27 15:26:49 2005
@@ -46,7 +46,27 @@
 namespace Tagcoll {
 
 template<typename ITEM, typename TAG>
-void Normalizer<ITEM,TAG>::buildGraph()
+bool Normalizer<ITEM,TAG>::addToGraph(const Scores<TAG>& scores, const OpSet<TAG>& ts1, const OpSet<TAG>& ts2)
+{
+	if (this->tagsets.find(ts2) != this->tagsets.end()
+		&& scores.distance(ts1, ts2) <= 1.0)
+	{
+		distGraph[ts2].push_back(ts1);
+		distGraph[ts1].push_back(ts2);
+
+		// Try removing more tags to see if there is still something with distance <= 1.0
+		for (typename OpSet<TAG>::const_iterator i = ts2.begin();
+				i != ts2.end(); i++)
+			addToGraph(scores, ts1, ts2 - *i);
+
+		return true;
+	}
+	return false;
+}
+
+
+template<typename ITEM, typename TAG>
+void Normalizer<ITEM,TAG>::buildGraph(const Scores<TAG>& scores)
 {
 	distGraph.clear();
 
@@ -56,12 +76,26 @@
 				j != i->first.end(); j++)
 		{
 			OpSet<TAG> test = i->first - *j;
+			if (addToGraph(scores, i->first, test))
+			{
+			}
+		}
+
+	/*
+    -- Build algorithm for fixed-score distance of 1
+	for (typename tagsets_t::const_iterator i = this->tagsets.begin();
+			i != this->tagsets.end(); i++)
+		for (typename OpSet<TAG>::const_iterator j = i->first.begin();
+				j != i->first.end(); j++)
+		{
+			OpSet<TAG> test = i->first - *j;
 			if (this->tagsets.find(test) != this->tagsets.end())
 			{
 				distGraph[test].push_back(i->first);
 				distGraph[i->first].push_back(test);
 			}
 		}
+	*/
 }
 
 template<typename ITEM, typename TAG>
@@ -128,9 +162,11 @@
 template<typename ITEM, typename TAG>
 void Normalizer<ITEM,TAG>::normalize()
 {
+	/*
 	cerr << "Building graph..." << endl;
 	buildGraph();
 	cerr << "Built graph." << endl;
+	*/
 
 	bool done = false;
 
@@ -186,6 +222,7 @@
 
 namespace Tagcoll {
 	    template class Normalizer<std::string, std::string>;
+	    template class Scores<std::string>;
 }
 #endif
 

Modified: tagcoll/trunk/tagcoll/experiments.h
==============================================================================
--- tagcoll/trunk/tagcoll/experiments.h	(original)
+++ tagcoll/trunk/tagcoll/experiments.h	Tue Sep 27 15:26:49 2005
@@ -24,12 +24,58 @@
  */
 
 #include <tagcoll/CardinalityStore.h>
+#include <tagcoll/Expression.h>
 
 #include <vector>
 
 namespace Tagcoll
 {
 
+template <typename TAG>
+class Scores
+{
+protected:
+	struct Score
+	{
+		Expression expr;
+		float score;
+		Score(const std::string& expr, float score) :
+			expr(expr), score(score) {}
+	};
+	float defaultScore;
+	std::vector<Score> scores;
+public:
+	Scores(float def = 1.0) : defaultScore(def) {}
+
+	void add(const std::string& expr, float score)
+	{
+		scores.push_back(Score(expr, score));
+	}
+
+	float operator()(const TAG& tag) const
+	{
+		for (typename std::vector<Score>::const_iterator i = scores.begin();
+				i != scores.end(); i++)
+		{
+			OpSet<TAG> tags;
+			tags += tag;
+			if (i->expr(tags))
+				return i->score;
+		}
+		return defaultScore;
+	}
+
+	float distance(const OpSet<TAG>& ts1, const OpSet<TAG>& ts2) const
+	{
+		float res = 0;
+		OpSet<TAG> diff = (ts1 - ts2) + (ts2 - ts1);
+		for (typename OpSet<TAG>::const_iterator i = diff.begin();
+				i != diff.end(); i++)
+			res += (*this)(*i);
+		return res;
+	}
+};
+
 template <typename ITEM, typename TAG>
 class Normalizer : public CardinalityStore<ITEM, TAG>
 {
@@ -43,7 +89,7 @@
 	typedef std::map< OpSet<TAG>, std::vector< OpSet<TAG> > > distgraph_t;
 	distgraph_t distGraph;
 
-	void buildGraph();
+	bool addToGraph(const Scores<TAG>& scores, const OpSet<TAG>& ts1, const OpSet<TAG>& ts2);
 	bool mergeTagsets(const OpSet<TAG>& ts1, const OpSet<TAG>& ts2);
 	void removeAfterMerge(const OpSet<TAG>& ts, const OpSet<TAG>& merged);
 
@@ -54,6 +100,8 @@
 		merge_threshold(7),
 		min_threshold(2) {}
 
+	void buildGraph(const Scores<TAG>& scores);
+
 	void normalize();
 };
 

Modified: tagcoll/trunk/tests/normalize.cc
==============================================================================
--- tagcoll/trunk/tests/normalize.cc	(original)
+++ tagcoll/trunk/tests/normalize.cc	Tue Sep 27 15:26:49 2005
@@ -17,8 +17,14 @@
 		Normalizer<string, string> norm;
 		StdioParserInput in(stdin, "(stdin)");
 
+		Scores<string> scores(0.7);
+		scores.add("culture::* || use::*", 1.1);
+		scores.add("implemented-in::*", 0.3);
+		scores.add("*::TODO", 0.1);
+
 		TextFormat<string, string>::parse(conv, conv, in, norm);
 
+		norm.buildGraph(scores);
 		norm.normalize();
 
 		TextFormat<string, string> writer(conv, conv, stdout);



More information about the Debtags-commits mailing list