[Debtags-commits] [svn] r1819 - in debtags/1.6.0: . tools
Enrico Zini
enrico at costa.debian.org
Sun Jul 9 07:51:14 UTC 2006
Author: enrico
Date: Sun Jul 9 07:51:12 2006
New Revision: 1819
Modified:
debtags/1.6.0/ (props changed)
debtags/1.6.0/tools/debtags.cc
Log:
r3007 at viaza: enrico | 2006-07-08 20:27:48 +0200
Added a faster variant of the smart search algorithm (can be further optimized)
Modified: debtags/1.6.0/tools/debtags.cc
==============================================================================
--- debtags/1.6.0/tools/debtags.cc (original)
+++ debtags/1.6.0/tools/debtags.cc Sun Jul 9 07:51:12 2006
@@ -755,6 +755,33 @@
return metric.discriminance(t1, itemCount) < metric.discriminance(t2, itemCount);
}
};
+ class RelevanceOrder
+ {
+ const TagMetrics& first;
+ const TagMetrics& second;
+ public:
+ RelevanceOrder(const TagMetrics& first, const TagMetrics& second)
+ : first(first), second(second) {}
+ bool operator()(const Tag& t1, const Tag& t2)
+ {
+ // Replace 10000* with (double) to be formally precise but
+ // use floating point aritmetics
+ return 10000 * second.get(t1) / first.get(t1) < 10000 * second.get(t2) / first.get(t2);
+ }
+ };
+
+ #if 0
+ TagMetrics<Tag, double> jumpsFrom(const TagMetrics<Tag, Number>& other) const
+ {
+ TagMetrics<Tag, double> res;
+ for (typename TagMetrics<Tag, Number>::const_iterator i = this->begin(); i != this->end(); ++i)
+ {
+ //cout << i->first.fullname() << " was " << other.get(i->first) << " is " << i->second << " kept " << i->second * 100 / other.get(i->first) << "%" << endl;
+ res.add(i->first, (double)i->second / other.get(i->first));
+ }
+ return res;
+ }
+ #endif
public:
void add(const Tag& tag, const Number& val)
@@ -816,6 +843,7 @@
return res;
}
+
vector<Tag> tagsSortedByMetrics() const
{
vector<Tag> res;
@@ -834,6 +862,15 @@
return res;
}
+ vector<Tag> tagsSortedByRelevance(const TagMetrics<Tag, Number>& other)
+ {
+ vector<Tag> res;
+ for (typename TagMetrics::const_iterator i = this->begin(); i != this->end(); ++i)
+ res.push_back(i->first);
+ std::sort(res.begin(), res.end(), RelevanceOrder(other, *this));
+ return res;
+ }
+
template<typename COLL>
static TagMetrics<Tag, Number> computeFromTags(const COLL& coll)
{
@@ -1136,8 +1173,15 @@
// Compute the set of tags that better represent the keyword search
TagMetrics<Tag, int> metrics1 = TagMetrics<Tag, int>::computeFromTags(fullColl);
TagMetrics<Tag, int> metrics2 = TagMetrics<Tag, int>::computeFromTags(filtered);
+#if 0
+ // Use the jump algorithm
TagMetrics<Tag, double> jumps = metrics2.jumpsFrom(metrics1);
interesting = jumps.tagsSortedByMetrics();
+#else
+ // Use the reduction percentage algorithm (a bit faster, and more
+ // optimizable)
+ interesting = metrics2.tagsSortedByRelevance(metrics1);
+#endif
}
public:
More information about the Debtags-commits
mailing list