[Debtags-commits] [svn] r1595 - in tagcoll/trunk: . tagcoll

Enrico Zini enrico at costa.debian.org
Mon Feb 20 20:34:42 UTC 2006


Author: enrico
Date: Mon Feb 20 20:34:40 2006
New Revision: 1595

Modified:
   tagcoll/trunk/   (props changed)
   tagcoll/trunk/tagcoll/IntDiskIndex.cc
   tagcoll/trunk/tagcoll/IntDiskIndex.h
   tagcoll/trunk/tagcoll/OpSet.h
Log:
 r7412 at viaza:  enrico | 2006-02-20 21:34:18 +0100
 Optimized IntDiskIndex::getItems[tags] as well


Modified: tagcoll/trunk/tagcoll/IntDiskIndex.cc
==============================================================================
--- tagcoll/trunk/tagcoll/IntDiskIndex.cc	(original)
+++ tagcoll/trunk/tagcoll/IntDiskIndex.cc	Mon Feb 20 20:34:40 2006
@@ -26,13 +26,13 @@
 
 //#define TRACE_ISI
 
-#ifdef TRACE_ISI
+#ifdef TRACE_IS
 #include <iostream>
 #endif
 
-class IntSetIntersection : public std::list< std::pair<size_t, const int*> >
+class IntSets : public std::list< std::pair<size_t, const int*> >
 {
-#ifdef TRACE_ISI
+#ifdef TRACE_IS
 	void print(const std::string& title)
 	{
 		cerr << " * " << title << ":" << endl;
@@ -133,7 +133,8 @@
 		}
 	}
 
-	vector<int> intersect()
+	template<typename T>
+	OpSet<T> intersect(const Converter<int, T>& conv)
 	{
 		print("begin");
 		/*
@@ -141,7 +142,7 @@
 		 *   and move on
 		 * * Else, advance the minor ones until they all get the same
 		 */
-		vector<int> res;
+		OpSet<T> res;
 		while (size() > 1)
 		{
 			print("pre-flatten");
@@ -150,7 +151,7 @@
 			if (size() > 1)
 			{
 				// Store the common item
-				res.push_back(*(begin()->second));
+				res += conv(*(begin()->second));
 				//cerr << "Selected: " << *(begin()->second) << endl;
 
 				// Advance all lists
@@ -166,6 +167,87 @@
 		}
 		return res;
 	}
+
+	int extractmin()
+	{
+		// Find the minimum item
+		int min = *(begin()->second);
+		for (const_iterator i = begin(); i != end(); i++)
+			if (*(i->second) < min)
+				min = *(i->second);
+
+		// Advance all the lists which have the minimum item as first item
+		iterator i = begin();
+		while (i != end())
+		{
+			iterator next = i; ++next;
+			if (*(i->second) == min)
+				advance(i);
+			i = next;
+		}
+
+		return min;
+	}
+
+	template<typename T>
+	class MergeIterator
+	{
+		IntSets& is;
+		const Converter<int, T>& conv;
+		int val;
+
+	public:
+		// End iterator
+		MergeIterator(IntSets& is, const Converter<int, T>& conv) : is(is), conv(conv), val(-1) {}
+
+		MergeIterator(IntSets& is, const Converter<int, T>& conv, int val) : is(is), conv(conv), val(val) {}
+
+		MergeIterator& operator++()
+		{
+			if (is.empty())
+				val = -1;
+			else
+				val = is.extractmin();
+			return *this;
+		}
+		MergeIterator operator++(int)
+		{
+			int oval = val;
+			if (is.empty())
+				val = -1;
+			else
+				val = is.extractmin();
+			return MergeIterator(is, oval);
+		}
+		T operator*() { return conv(val); }
+
+		bool operator==(const MergeIterator& mi) { return val == -1 && mi.val == -1; }
+		bool operator!=(const MergeIterator& mi) { return val != -1 || mi.val != -1; }
+	};
+	
+	template<typename T>
+	MergeIterator<T> mergeBegin(const Converter<int, T>& conv)
+	{
+		return MergeIterator<T>(*this, conv, extractmin());
+	}
+
+	template<typename T>
+	MergeIterator<T> mergeEnd(const Converter<int, T>& conv)
+	{
+		return MergeIterator<T>(*this, conv);
+	}
+
+	template<typename T>
+	OpSet<T> merge(const Converter<int, T>& conv)
+	{
+		/*
+		vector<T> sortedmerge;
+		while (!empty())
+			sortedmerge.push_back(conv(extractmin()));
+		return OpSet<T>(sortedmerge.begin(), sortedmerge.end());
+		*/
+		return OpSet<T>(mergeBegin<T>(conv), mergeEnd<T>(conv));
+	}
 };
 
 template<typename ITEM, typename TAG>
@@ -177,18 +259,29 @@
 		return getItemsHavingTag(*tags.begin());
 
 	// Create a vector with the item lists
-	IntSetIntersection items;
+	IntSets items;
 	for (typename OpSet<TAG>::const_iterator i = tags.begin(); i != tags.end(); i++)
 	{
 		int id = fromtag(*i);
 		items.push_back(make_pair(tagidx.size(id), tagidx.data(id)));
 	}
-	std::vector<int> ires = items.intersect();
-	OpSet<ITEM> res;
-	for (std::vector<int>::const_iterator i = ires.begin(); i != ires.end(); i++)
-		res += toitem(*i);
+	return items.intersect<ITEM>(*m_toitem);
+}
 
-	return res;
+template<typename ITEM, typename TAG>
+OpSet<TAG> IntDiskIndex<ITEM, TAG>::getTagsOfItems(const OpSet<ITEM>& items) const
+{
+	if (items.empty())
+		return OpSet<ITEM>();
+
+	// Create a vector with the item lists
+	IntSets tags;
+	for (typename OpSet<TAG>::const_iterator i = items.begin(); i != items.end(); i++)
+	{
+		int id = fromitem(*i);
+		tags.push_back(make_pair(pkgidx.size(id), pkgidx.data(id)));
+	}
+	return tags.merge<TAG>(*m_totag);
 }
 
 

Modified: tagcoll/trunk/tagcoll/IntDiskIndex.h
==============================================================================
--- tagcoll/trunk/tagcoll/IntDiskIndex.h	(original)
+++ tagcoll/trunk/tagcoll/IntDiskIndex.h	Mon Feb 20 20:34:40 2006
@@ -84,15 +84,7 @@
 			return OpSet<TAG>();
 	}
 
-	virtual OpSet<TAG> getTagsOfItems(const OpSet<ITEM>& items) const
-	{
-		// FIXME: reimplement
-		OpSet<TAG> res;
-		for (typename OpSet<ITEM>::const_iterator i = items.begin();
-				i != items.end(); i++)
-			res += getTagsOfItem(*i);
-		return res;
-	}
+	virtual OpSet<TAG> getTagsOfItems(const OpSet<ITEM>& items) const;
 
 public:
 	/**

Modified: tagcoll/trunk/tagcoll/OpSet.h
==============================================================================
--- tagcoll/trunk/tagcoll/OpSet.h	(original)
+++ tagcoll/trunk/tagcoll/OpSet.h	Mon Feb 20 20:34:40 2006
@@ -61,6 +61,11 @@
 	using std::set<T>::begin;
 	using std::set<T>::end;
 
+	OpSet() : std::set<T>() {}
+
+	template<typename A, typename B>
+	OpSet(A a, B b) : std::set<T>(a, b) {}
+
 	/** Return true if the tag set contains tag, else false */
 	bool contains(const T& item) const { return find(item) != end(); }
 



More information about the Debtags-commits mailing list