[Debtags-commits] [svn] r1814 - in debtags/1.6.0: . tools

Enrico Zini enrico at costa.debian.org
Sat Jul 8 09:37:01 UTC 2006


Author: enrico
Date: Sat Jul  8 09:36:59 2006
New Revision: 1814

Modified:
   debtags/1.6.0/   (props changed)
   debtags/1.6.0/tools/debtags.cc
Log:
 r2997 at viaza:  enrico | 2006-07-08 11:14:15 +0200
 Allow to choose both by keyword-related tags and by most discriminant tags


Modified: debtags/1.6.0/tools/debtags.cc
==============================================================================
--- debtags/1.6.0/tools/debtags.cc	(original)
+++ debtags/1.6.0/tools/debtags.cc	Sat Jul  8 09:36:59 2006
@@ -752,7 +752,7 @@
 		bool operator()(const Tag& t1, const Tag& t2)
 		{
 			// Returns true if t1 precedes t2, and false otherwise
-			return abs(itemCount / 2 - metric.get(t2)) < abs(itemCount / 2 - metric.get(t1));
+			return metric.discriminance(t1, itemCount) < metric.discriminance(t2, itemCount);
 		}
 	};
 
@@ -775,6 +775,13 @@
 			return i->second;
 	}
 
+	// Get the minimum number of packages that would be eliminated by choosing
+	// this tag either as 'wanted' or as 'unwanted'
+	Number discriminance(const Tag& tag, Number itemCount) const
+	{
+		return get(tag) < itemCount - get(tag) ? get(tag) : itemCount - get(tag);
+	}
+
 	TagMetrics<Tag, Number> rankMetrics() const
 	{
 		vector<Tag> sorted = tagsSortedByMetrics();
@@ -887,6 +894,7 @@
 
 	coll::Fast<Package, Tag> fullColl;
 	coll::Fast<Package, Tag> coll;
+	TagMetrics<Tag, int> collMetrics;
 
 	std::string pattern;
 	std::set<Tag> wanted;
@@ -960,6 +968,7 @@
 		return Filter<OUT>(*this, out);
 	}
 
+	#if 0
 	void autoSelect(const std::vector<Tag>& tags, size_t maxAuto = 5, size_t maxUser = 7)
 	{
 		interesting.clear();
@@ -983,57 +992,108 @@
 		for (size_t i = tags.size() - autoCount - 1; i >= tags.size() - autoCount - userCount; --i)
 			interesting.push_back(tags[i]);
 	}
+	#endif
 
-	void showTags()
+	void showSet(const std::set<Tag>& tags, const std::string& type)
 	{
-		tagsInMenu.clear();
-		int idx = 1;
-
-		for (std::set<Tag>::const_iterator i = wanted.begin();
-				i != wanted.end(); ++i)
+		for (std::set<Tag>::const_iterator i = tags.begin();
+				i != tags.end(); ++i)
 		{
-			cout << idx << ") " << i->fullname() << " (wanted)" << endl;
 			tagsInMenu.push_back(*i);
-			++idx;
+			cout << tagsInMenu.size() << ") " << i->fullname() << " (" << type << ")" << endl;
 		}
+	}
 
-		for (std::set<Tag>::const_iterator i = unwanted.begin();
-				i != unwanted.end(); ++i)
+	void showInteresting(int max = 7)
+	{
+		for (std::vector<Tag>::const_reverse_iterator i = interesting.rbegin();
+				i != interesting.rend() && max > 0; ++i)
 		{
-			cout << idx << ") " << i->fullname() << " (unwanted)" << endl;
+			using namespace wibble::operators;
+			if (utils::set_contains(wanted, *i)
+				|| utils::set_contains(unwanted, *i)
+				|| utils::set_contains(ignored, *i)
+				|| collMetrics.get(*i) == 0)
+				continue;
+			//size_t itemCount = coll.itemCount();
+			//int discr = collMetrics.discriminance(*i, itemCount) * 200 / itemCount;
+			//int discr = collMetrics.discriminance(*i, itemCount);
 			tagsInMenu.push_back(*i);
-			++idx;
+			cout << tagsInMenu.size() << ") " << i->fullname()
+			     << " (" << collMetrics.get(*i) << "/" << coll.itemCount() << ")" << endl;
+			--max;
 		}
+	}
 
-		for (std::vector<Tag>::const_iterator i = interesting.begin();
-				i != interesting.end(); ++i)
-		{
-			cout << idx << ") " << i->fullname() << endl;
-			tagsInMenu.push_back(*i);
-			++idx;
-		}
+	void showDiscriminant(int max = 7)
+	{
+		// Compute the most interesting tags by discriminance
+		vector<Tag> discr = collMetrics.tagsSortedByDiscriminance(coll.itemCount());
 
-		for (std::set<Tag>::const_iterator i = ignored.begin();
-				i != ignored.end(); ++i)
+		for (std::vector<Tag>::const_reverse_iterator i = discr.rbegin();
+				i != discr.rend() && max > 0; ++i)
 		{
-			cout << idx << ") " << i->fullname() << " (ignored)" << endl;
+			using namespace wibble::operators;
+			if (utils::set_contains(wanted, *i)
+				|| utils::set_contains(unwanted, *i)
+				|| utils::set_contains(ignored, *i))
+				continue;
+			//size_t itemCount = coll.itemCount();
+			//int discr = collMetrics.discriminance(*i, itemCount) * 200 / itemCount;
+			//int discr = collMetrics.discriminance(*i, itemCount);
 			tagsInMenu.push_back(*i);
-			++idx;
+			cout << tagsInMenu.size() << ") " << i->fullname()
+			     << " (" << collMetrics.get(*i) << "/" << coll.itemCount() << ")" << endl;
+			--max;
 		}
 	}
 
+	void showTags()
+	{
+		tagsInMenu.clear();
+
+		showSet(wanted, "wanted");
+		showSet(unwanted, "unwanted");
+		showSet(ignored, "ignored");
+		cout << endl;
+		showInteresting();
+		cout << endl;
+		showDiscriminant();
+	}
+
 	void refilter()
 	{
 		// Regenerate coll
 		coll = coll::Fast<Package, Tag>();
 		fullColl.output(filter(inserter(coll)));
 
-		// Compute the most interesting tags
-		TagMetrics<Tag, int> collMetrics = TagMetrics<Tag, int>::computeFromTags(coll);
-		vector<Tag> tags = collMetrics.tagsSortedByDiscriminance(coll.itemCount());
+		collMetrics = TagMetrics<Tag, int>::computeFromTags(coll);
+
+#if 0
+		// Compute the most interesting tags by discriminance
+		interesting = collMetrics.tagsSortedByDiscriminance(coll.itemCount());
 
 		// Select them as interesting to be displayed
 		autoSelect(tags, 0);
+
+		// Compute the most interesting tags by jumps
+		coll::Fast<Package, Tag> coll1;
+		for (coll::Fast<Package, Tag>::const_iterator i = coll.begin();
+				i != coll.end(); ++i)
+			if (patternMatch(i->first))
+				coll1.insert(wibble::singleton(i->first), i->second);
+		
+		// Compute the set of tags that better represent the keyword search
+		TagMetrics<Tag, int> metrics2 = TagMetrics<Tag, int>::computeFromTags(coll1);
+		TagMetrics<Tag, int> jumps = metrics2.jumpsFrom(collMetrics);
+		interesting = jumps.tagsSortedByMetrics();
+
+		//metrics1.dump("BEF ", cout);
+		//metrics2.dump("AFT ", cout);
+		//jumps.dump("JMP ", cout);
+
+		//autoSelect(tags, 0);
+#endif
 	}
 
 public:
@@ -1050,15 +1110,16 @@
 
 		// Compute the set of tags that better represent the keyword search
 		TagMetrics<Tag, int> metrics1 = TagMetrics<Tag, int>::computeFromTags(fullColl);
-		TagMetrics<Tag, int> metrics2 = TagMetrics<Tag, int>::computeFromTags(coll);
-		TagMetrics<Tag, int> jumps = metrics2.jumpsFrom(metrics1);
-		vector<Tag> tags = jumps.tagsSortedByMetrics();
+		collMetrics = TagMetrics<Tag, int>::computeFromTags(coll);
+		TagMetrics<Tag, int> jumps = collMetrics.jumpsFrom(metrics1);
+		interesting = jumps.tagsSortedByMetrics();
+		coll = fullColl;
 
 		//metrics1.dump("BEF ", cout);
 		//metrics2.dump("AFT ", cout);
 		//jumps.dump("JMP ", cout);
 
-		autoSelect(tags, 1);
+		//autoSelect(tags, 1);
 		//autoSelect(tags, 0);
 	}
 
@@ -1091,53 +1152,57 @@
 			showTags();
 			cout << coll.itemCount() << " packages selected so far." << endl;
 			string ans;
-			bool badAnswer = true;
-			do {
-				badAnswer = false;
-				// TODO: allow to add tags based on a keyword search on coll
-				cout << "Your choice (+#, -#, =#, View, Done, Quit): ";
-				cin >> ans;
-
-				if (ans == "")
-					badAnswer = true;
-				else if (ans[0] == '+') {
-					int idx = strtoul(ans.substr(1).c_str(), NULL, 10);
-					if (idx < 0 || (unsigned)idx >= tagsInMenu.size())
-						badAnswer = true;
-					else
-					{
-						Tag tag = tagsInMenu[idx - 1];
-						cout << "Selected: " << tag.fullname() << endl;
-						wanted.insert(tag);
-						unwanted.erase(tag);
-						ignored.erase(tag);
-						refilter();
-					}
-				} else if (ans[0] == '-') {
-					int idx = strtoul(ans.substr(1).c_str(), NULL, 10);
-					if (idx < 0 || (unsigned)idx >= tagsInMenu.size())
-						badAnswer = true;
-					else
-					{
-						Tag tag = tagsInMenu[idx - 1];
-						cout << "Selected: " << tag.fullname() << endl;
-						wanted.erase(tag);
-						unwanted.insert(tag);
-						ignored.erase(tag);
-						refilter();
-					}
-				} else if (ans[0] == '=') {
+			bool changed = false;
+
+			// TODO: allow to add tags based on a keyword search on coll
+			cout << "Your choice (+#, -#, =#, View, Done, Quit): ";
+			if (!getline(cin, ans))
+			{
+				cout << endl;
+				done = true;
+			}
+
+			while (ans != "")
+			{
+				// Split the answer by spaces
+				string rest;
+				size_t pos = ans.find(" ");
+				if (pos != string::npos)
+				{
+					// Skip spaces
+					for ( ; pos < ans.size() && isspace(ans[pos]); ++pos)
+						;
+					rest = ans.substr(pos);
+					ans = ans.substr(0, pos);
+				}
+				if (ans[0] == '+' || ans[0] == '-' || ans[0] == '=') {
 					int idx = strtoul(ans.substr(1).c_str(), NULL, 10);
-					if (idx < 0 || (unsigned)idx >= tagsInMenu.size())
-						badAnswer = true;
+					if (idx <= 0 || (unsigned)idx > tagsInMenu.size())
+						cout << "Tag " << idx << " was not on the menu." << endl;
 					else
 					{
 						Tag tag = tagsInMenu[idx - 1];
-						cout << "Selected: " << tag.fullname() << endl;
-						wanted.erase(tag);
-						unwanted.erase(tag);
-						ignored.insert(tag);
-						refilter();
+						cout << "Understood " << ans << " as " << ans[0] << tag.fullname() << endl;
+
+						switch (ans[0])
+						{
+							case '+':
+								wanted.insert(tag);
+								unwanted.erase(tag);
+								ignored.erase(tag);
+								break;
+							case '-':
+								wanted.erase(tag);
+								unwanted.insert(tag);
+								ignored.erase(tag);
+								break;
+							case '=':
+								wanted.erase(tag);
+								unwanted.erase(tag);
+								ignored.insert(tag);
+								break;
+						}
+						changed = true;
 					}
 				} else if (ans == "V" || ans == "v") {
 					coll.output(PackagePrinter(ept.tagmap(), PackagePrinter::SHORT));
@@ -1146,10 +1211,13 @@
 					done = true;
 				} else if (ans == "Q" || ans == "q") {
 					done = true;
-				} else
-					badAnswer = true;
-			} while (badAnswer);
-
+				} else {
+					cout << "Ignoring command \"" << ans << "\"" << endl;
+				}
+				ans = rest;
+			}
+			if (changed)
+				refilter();
 		}
 	}
 



More information about the Debtags-commits mailing list