[Debtags-commits] [svn] r1819 - in debtags/1.6.0: . tools

Enrico Zini enrico at costa.debian.org
Sun Jul 9 07:51:14 UTC 2006


Author: enrico
Date: Sun Jul  9 07:51:12 2006
New Revision: 1819

Modified:
   debtags/1.6.0/   (props changed)
   debtags/1.6.0/tools/debtags.cc
Log:
 r3007 at viaza:  enrico | 2006-07-08 20:27:48 +0200
 Added a faster variant of the smart search algorithm (can be further optimized)


Modified: debtags/1.6.0/tools/debtags.cc
==============================================================================
--- debtags/1.6.0/tools/debtags.cc	(original)
+++ debtags/1.6.0/tools/debtags.cc	Sun Jul  9 07:51:12 2006
@@ -755,6 +755,33 @@
 			return metric.discriminance(t1, itemCount) < metric.discriminance(t2, itemCount);
 		}
 	};
+	class RelevanceOrder
+	{
+		const TagMetrics& first;
+		const TagMetrics& second;
+	public:
+		RelevanceOrder(const TagMetrics& first, const TagMetrics& second)
+			: first(first), second(second) {}
+		bool operator()(const Tag& t1, const Tag& t2)
+		{
+			// Replace 10000* with (double) to be formally precise but
+			// use floating point aritmetics
+			return 10000 * second.get(t1) / first.get(t1) < 10000 * second.get(t2) / first.get(t2);
+		}
+	};
+
+	#if 0
+	TagMetrics<Tag, double> jumpsFrom(const TagMetrics<Tag, Number>& other) const
+	{
+		TagMetrics<Tag, double> res;
+		for (typename TagMetrics<Tag, Number>::const_iterator i = this->begin(); i != this->end(); ++i)
+		{
+			//cout << i->first.fullname() << " was " << other.get(i->first) << " is " << i->second << " kept " << i->second * 100 / other.get(i->first) << "%" << endl;
+			res.add(i->first, (double)i->second / other.get(i->first));
+		}
+		return res;
+	}
+	#endif
 
 public:
 	void add(const Tag& tag, const Number& val)
@@ -816,6 +843,7 @@
 		return res;
 	}
 
+
 	vector<Tag> tagsSortedByMetrics() const
 	{
 		vector<Tag> res;
@@ -834,6 +862,15 @@
 		return res;
 	}
 
+	vector<Tag> tagsSortedByRelevance(const TagMetrics<Tag, Number>& other)
+	{
+		vector<Tag> res;
+		for (typename TagMetrics::const_iterator i = this->begin(); i != this->end(); ++i)
+			res.push_back(i->first);
+		std::sort(res.begin(), res.end(), RelevanceOrder(other, *this));
+		return res;
+	}
+
 	template<typename COLL>
 	static TagMetrics<Tag, Number> computeFromTags(const COLL& coll)
 	{
@@ -1136,8 +1173,15 @@
 		// Compute the set of tags that better represent the keyword search
 		TagMetrics<Tag, int> metrics1 = TagMetrics<Tag, int>::computeFromTags(fullColl);
 		TagMetrics<Tag, int> metrics2 = TagMetrics<Tag, int>::computeFromTags(filtered);
+#if 0
+		// Use the jump algorithm
 		TagMetrics<Tag, double> jumps = metrics2.jumpsFrom(metrics1);
 		interesting = jumps.tagsSortedByMetrics();
+#else
+		// Use the reduction percentage algorithm (a bit faster, and more
+		// optimizable)
+		interesting = metrics2.tagsSortedByRelevance(metrics1);
+#endif
 	}
 
 public:



More information about the Debtags-commits mailing list