[Debtags-commits] [svn] r1397 - debtags/trunk/tools

Enrico Zini enrico at costa.debian.org
Fri Oct 21 22:30:45 UTC 2005


Author: enrico
Date: Fri Oct 21 22:30:44 2005
New Revision: 1397

Modified:
   debtags/trunk/tools/Printer.h
   debtags/trunk/tools/debtags.cc
Log:
Implemented first prototype of smart search

Modified: debtags/trunk/tools/Printer.h
==============================================================================
--- debtags/trunk/tools/Printer.h	(original)
+++ debtags/trunk/tools/Printer.h	Fri Oct 21 22:30:44 2005
@@ -93,10 +93,10 @@
 	virtual void consumeItem(const Package& pkg, const Tagcoll::OpSet<Tag>& tags)
 	{
 	}
-	virtual void consumeItemsUntagged(const Tagcoll::OpSet<Package>& pkg)
+	virtual void consumeItemsUntagged(const Tagcoll::OpSet<Package>& pkgs)
 	{
 	}
-	virtual void consumeItems(const Tagcoll::OpSet<Package>& pkg, const Tagcoll::OpSet<Tag>& tags)
+	virtual void consumeItems(const Tagcoll::OpSet<Package>& pkgs, const Tagcoll::OpSet<Tag>& tags)
 	{
 	}
 

Modified: debtags/trunk/tools/debtags.cc
==============================================================================
--- debtags/trunk/tools/debtags.cc	(original)
+++ debtags/trunk/tools/debtags.cc	Fri Oct 21 22:30:44 2005
@@ -35,12 +35,15 @@
 #include <apt-front/cache/component/debtags/update.h>
 #include <apt-front/cache/component/debtags/utils.h>
 #include <apt-front/cache/entity/tag.h>
+#include <apt-front/predicate/predicate.h>
+#include <apt-front/predicate/factory.h>
 #include <apt-front/utils/paths.h>
 
 #include <tagcoll/StdioParserInput.h>
 #include <tagcoll/TextFormat.h>
 #include <tagcoll/SmartHierarchy.h>
 #include <tagcoll/InputMerger.h>
+#include <tagcoll/experiments.h>
 
 #include "Environment.h"
 #include "CommandlineParser.h"
@@ -63,6 +66,19 @@
 using namespace aptFront::cache::component::debtags;
 using namespace Tagcoll;
 
+template<typename TAG, typename _Traits>
+basic_ostream<char, _Traits>& operator<<(basic_ostream<char, _Traits>& out, const Tagcoll::OpSet<TAG>& tags)
+{
+	for (typename Tagcoll::OpSet<TAG>::const_iterator i = tags.begin();
+			i != tags.end(); i++)
+		if (i == tags.begin())
+			out << i->fullname();
+		else
+			out << ", " << i->fullname();
+	return out;
+}
+
+
 component::PackageTags& debtagsInit(bool write = false)
 {
 	    aptFront::init();
@@ -85,15 +101,6 @@
 		return Global::get().debtags();
 }
 
-/*
-void wantTagDatabase() throw ()
-{
-	if (!Debtags::Environment::get().hasTagDatabase())
-		fatal_error(
-				"The tag database has not yet been generated: you need to run \"debtags update\" (as root) before using the package tags\n");
-}
-*/
-
 void wantTagDatabase() throw ()
 {
 	if (!component::PackageTags::hasTagDatabase())
@@ -407,6 +414,33 @@
 
 }
 
+template< typename PKG, typename TAG >
+class TagcollConsumerAdaptor : public utils::ConsumerImpl< PKG, TagcollConsumerAdaptor<PKG, TAG> >
+{
+protected:
+	Consumer<PKG, TAG>& m_out;
+
+public:
+	TagcollConsumerAdaptor( Consumer<PKG, TAG>& out) : m_out( out ) {}
+	virtual void consume( const PKG& a ) {
+		if (a != PKG())
+			m_out.consume(a, a.tags());
+	}
+};
+
+template< typename ITEM >
+class RangeCounter : public utils::ConsumerImpl< ITEM, RangeCounter<ITEM> >
+{
+protected:
+	int* count;
+
+public:
+	RangeCounter(int* count) : count(count) {}
+	virtual void consume(const ITEM& a) {
+		++*count;
+	}
+	
+};
 
 class Searcher
 {
@@ -434,6 +468,139 @@
 		printer->flush();
 		return filter.countMatched();
 	}
+
+	int output(predicate::Predicate<Package> p)
+	{
+		aptFront::cache::component::Packages& packages = Global::get().packages();
+		utils::Range<Package> r = filteredRange(
+				utils::Range<Package>(range(packages.packagesBegin(), packages.packagesEnd())), p);
+		r.output(TagcollConsumerAdaptor<Package, Tag>(*printer));
+		printer->flush();
+		int count = 0;
+		RangeCounter<Package> counter(&count);
+		r.output(counter);
+		return count;
+	}
+};
+
+class SmartSearcher : public Printer<Package, Tag>, private CardinalityStore<Package, Tag>
+{
+protected:
+	component::PackageTags& debtags;
+	Printer<Package, Tag>* printer;
+	map<Tag, size_t> tagCount;
+	OpSet<Tag> m_topTags;
+
+	virtual void consumeItemUntagged(const Package& pkg)
+	{
+		printer->consume(pkg);
+		CardinalityStore<Package, Tag>::consumeItemUntagged(pkg);
+	}
+	virtual void consumeItem(const Package& pkg, const Tagcoll::OpSet<Tag>& tags)
+	{
+		printer->consume(pkg, tags);
+		CardinalityStore<Package, Tag>::consumeItem(pkg, tags);
+		for (OpSet<Tag>::const_iterator i = tags.begin();
+				i != tags.end(); i++)
+			++tagCount[*i];
+	}
+	virtual void consumeItemsUntagged(const Tagcoll::OpSet<Package>& pkgs)
+	{
+		printer->consume(pkgs);
+		CardinalityStore<Package, Tag>::consumeItemsUntagged(pkgs);
+	}
+	virtual void consumeItems(const Tagcoll::OpSet<Package>& pkgs, const Tagcoll::OpSet<Tag>& tags)
+	{
+		printer->consume(pkgs, tags);
+		CardinalityStore<Package, Tag>::consumeItems(pkgs, tags);
+		for (OpSet<Tag>::const_iterator i = tags.begin();
+				i != tags.end(); i++)
+			tagCount[*i] += pkgs.size();
+	}
+
+	vector< pair<Tag, size_t> > getTopTags(size_t count)
+	{
+		vector< pair<Tag, size_t> > res;
+		for (map<Tag, size_t>::const_iterator i = tagCount.begin();
+				i != tagCount.end(); i++)
+		{
+			pair<Tag, size_t> hand = *i;
+			for (size_t j = 0; j < count; j++)
+			{
+				if (j >= res.size())
+				{
+					res.push_back(hand);
+					break;
+				}
+				else if (hand.second > res[j].second)
+				{
+					pair<Tag, size_t> tmp = res[j];
+					res[j] = hand;
+					hand = tmp;
+				}
+			}
+		}
+		return res;
+	}
+
+public:
+	SmartSearcher(component::PackageTags& debtags, Printer<Package, Tag>* printer) :
+		debtags(debtags), printer(printer) {}
+
+	void clear()
+	{
+		CardinalityStore<Package, Tag>::operator=(CardinalityStore<Package, Tag>());
+		tagCount.clear();
+		m_topTags.clear();
+	}
+
+	OpSet<Tag> topTags() { return m_topTags; }
+
+	int outputRelated()
+	{
+		const size_t relevantTags = 5;
+		vector< pair<Tag, size_t> > topTagList = getTopTags(relevantTags);
+		Scores<Tag> scores;
+		vector< OpSet<Tag> > tagsets;
+		float distance = relevantTags;
+		int count = 0;
+
+		m_topTags.clear();
+		for (vector< pair<Tag, size_t> >::const_iterator i = topTagList.begin();
+				i != topTagList.end(); i++)
+			m_topTags += i->first;
+
+		// Get the tagsets that score less in weighted distance from topTags
+		for (tagsets_t::const_iterator i = this->tagsets.begin();
+				i != this->tagsets.end(); i++)
+		{
+			float d = scores.distance(m_topTags, i->first);
+			if (d <= distance)
+			{
+				if (d < distance)
+				{
+					tagsets.clear();
+					distance = d;
+				}
+				tagsets.push_back(i->first);
+			}
+		}
+
+		// print the items they have
+		for (vector< OpSet<Tag> >::const_iterator i = tagsets.begin();
+				i != tagsets.end(); i++)
+		{
+			OpSet<Package> pkgs = debtags.tagdb().getItems(*i);
+			printer->consume(pkgs, *i);
+			count += pkgs.size();
+		}
+		return count;
+	}
+
+	virtual void flush()
+	{
+		printer->flush();
+	}
 };
 
 
@@ -1005,7 +1172,7 @@
 	}
 };
 
-enum valid_command { UPDATE, CHECK, TAGSHOW, TAGSEARCH, TAGCAT, SHOW, RELATED, CAT, SEARCH, GREP, INSTALL, MKPATCH, MAINTAINERS, TAG, SUBMIT, TODO, SCORE, FACETCOLL, STATS, TODOREPORT };
+enum valid_command { UPDATE, CHECK, TAGSHOW, TAGSEARCH, TAGCAT, SHOW, RELATED, CAT, SEARCH, GREP, INSTALL, MKPATCH, MAINTAINERS, TAG, SUBMIT, TODO, SCORE, FACETCOLL, STATS, TODOREPORT, SMARTSEARCH };
 
 int main(int argc, const char* argv[])
 {
@@ -1054,7 +1221,13 @@
 				"  facetcoll     Print the tagged collection where each package is tagged with\n"
 				"                its facets only\n"
 				"  stats         Print statistics about Debtags\n"
-				"  todoreport    Print a report of packages needing work\n");
+				"  todoreport    Print a report of packages needing work\n"
+				"  ssearch <word [word1 [+tag [-tag1 ...]]]>\n"
+				"                Perform a keyword search integrated with related packages.\n"
+				"                A + prefix indicates a wanted tag.  A - prefix indicates\n"
+				"                an unwanted tag.  Other words indicate keywords to search.\n"
+				"                Remember to use '--' before unwanted tags to avoid to have\n"
+				"                them interpreted as commandline switches.\n");
 
 
 		/*
@@ -1094,6 +1267,7 @@
 		opts.addCommand("facetcoll", (int)FACETCOLL);
 		opts.addCommand("stats", (int)STATS);
 		opts.addCommand("todoreport", (int)TODOREPORT);
+		opts.addCommand("ssearch", (int)SMARTSEARCH);
 
 		// Process the commandline
 		valid_command cmd = (valid_command)opts.parse(argc, argv);
@@ -1129,7 +1303,7 @@
 				Searcher searcher(debtags, printer.get());
 				searcher.output();
 
-				break;
+				return 0;
 			}
 
 			// search [-v] <tag expression>\n"
@@ -1683,6 +1857,47 @@
 
 				break;
 			}
+			// ssearch <word [word1 [word2 ...]]>
+			// Perform a keyword search integrated with related packages
+			case SMARTSEARCH:
+			{
+				component::PackageTags& debtags = debtagsInit();
+				wantTagDatabase();
+				APTPrinter printer;
+				SmartSearcher smart(debtags, &printer);
+				Searcher searcher(debtags, &smart);
+				predicate::Predicate<Package> p = predicate::True<Package>();
+					//predicate::Factory<Package>::description(args.next());
+				while (args.hasNext())
+				{
+					string arg = args.next();
+					switch (arg[0])
+					{
+						case '+':
+							p = p and predicate::Factory<Package>::tag(
+									Global::get().tags().tagByName(arg.substr(1)));
+							break;
+						case '-':
+							p = p and not predicate::Factory<Package>::tag(
+									Global::get().tags().tagByName(arg.substr(1)));
+							break;
+						default:
+							p = p and predicate::Factory<Package>::description(arg);
+							break;
+					}
+				}
+
+				int step1 = searcher.output(p);
+				int step2 = smart.outputRelated();
+
+				cout << step1 << " normal matches plus " << step2 << " related packages." << endl;
+				OpSet<Tag> topTags = smart.topTags();
+				cout << "Top tags were: " << topTags << endl;
+
+				return step1 + step2 > 0 ? 0 : 1;
+			}
+			
+
 		}
 
 		return 0;



More information about the Debtags-commits mailing list