[Aptitude-svn-commit] r4060 - in branches/aptitude-0.3/aptitude: . src/generic

Daniel Burrows dburrows at costa.debian.org
Tue Sep 6 22:06:54 UTC 2005


Author: dburrows
Date: Tue Sep  6 22:06:52 2005
New Revision: 4060

Modified:
   branches/aptitude-0.3/aptitude/ChangeLog
   branches/aptitude-0.3/aptitude/src/generic/setset.h
Log:
Speed up the dist-upgrade-with-rejects test case significantly
by preallocating space to store the hit counts in find_subset.

Modified: branches/aptitude-0.3/aptitude/ChangeLog
==============================================================================
--- branches/aptitude-0.3/aptitude/ChangeLog	(original)
+++ branches/aptitude-0.3/aptitude/ChangeLog	Tue Sep  6 22:06:52 2005
@@ -1,5 +1,18 @@
 2005-09-06  Daniel Burrows  <dburrows at debian.org>
 
+	* src/generic/setset.h:
+
+	  Significantly speed up the operation of find_subset by
+	  preallocating space to store its hit counts.  This means that it
+	  has to pass over the whole conflict set, but it appears that
+	  it's better to do that than to allocate; I get a speedup of 2-3
+	  times out of this change, which is fairly important when you
+	  consider that find_subset is invoked almost a million times and
+	  is where most of the time of the test case I'm considering is
+	  spent.
+
+	  Note that this makes find_subset NOT threadsafe.
+
 	* doc/cs/aptitude.xml:
 
 	  Correct several XML errors in the Czech documentation.

Modified: branches/aptitude-0.3/aptitude/src/generic/setset.h
==============================================================================
--- branches/aptitude-0.3/aptitude/src/generic/setset.h	(original)
+++ branches/aptitude-0.3/aptitude/src/generic/setset.h	Tue Sep  6 22:06:52 2005
@@ -24,6 +24,13 @@
 // R to an equivalent element of T" as well as its dual.  (with R
 // being the universal relation, this is just a straight subset
 // relation, but other general types of subsumption are also posible)
+//
+// WARNING: this class is completely non-threadsafe.  Because
+// find_subset is very heavily used (over a million calls in many test
+// cases), it has a number of optimizations, including one that means
+// YOU MAY NOT HAVE MULTIPLE SIMULTANEOUS CALLERS OF FIND_SUBSET as it
+// uses pre-allocated data structures to speed up its operation (by a
+// factor of 3 or so).
 
 #ifndef SETSET_H
 #define SETSET_H
@@ -59,9 +66,25 @@
 class setset
 {
 private:
-  std::vector<imm::set<Val, Compare> > entries;
+  struct entry
+  {
+    imm::set<Val, Compare> s;
+    /** Field used in the subset-testing algorithm. */
+    mutable unsigned int hit_count;
+
+    entry(const imm::set<Val, Compare> &_s)
+      :s(_s), hit_count(0)
+    {
+    }
+
+    entry()
+      :hit_count(0)
+    {
+    }
+  };
 
-  typedef std::vector<imm::set<Val, Compare> > entries_list;
+  typedef std::vector<entry> entries_list;
+  entries_list entries;
 
   typedef std::pair<typename entries_list::size_type, Val> index_entry;
 
@@ -69,11 +92,103 @@
 
   index_type sets_by_key;
 
+  void reset_counts() const
+  {
+    for(typename entries_list::const_iterator i = entries.begin();
+	i != entries.end(); ++i)
+      i->hit_count = 0;
+  }
 
 public:
-  typedef typename entries_list::const_iterator const_iterator;
+  class const_iterator
+  {
+    typename entries_list::const_iterator real_iter;
+  public:
+    const_iterator(const typename entries_list::const_iterator &_real_iter)
+      :real_iter(_real_iter)
+    {
+    }
+
+    const_iterator()
+    {
+    }
+
+    const imm::set<Val, Compare> &operator*() const
+    {
+      return real_iter->s;
+    }
+
+    const imm::set<Val, Compare> *operator->() const
+    {
+      return &real_iter->s;
+    }
+
+    const_iterator &operator++()
+    {
+      ++real_iter;
+      return *this;
+    }
+
+    const_iterator &operator--()
+    {
+      --real_iter;
+      return *this;
+    }
+
+    bool operator<(const const_iterator &other) const
+    {
+      return real_iter < other.real_iter;
+    }
+
+    bool operator<=(const const_iterator &other) const
+    {
+      return real_iter <= other.real_iter;
+    }
+
+    bool operator>(const const_iterator &other) const
+    {
+      return real_iter > other.real_iter;
+    }
+
+    bool operator>=(const const_iterator &other) const
+    {
+      return real_iter >= other.real_iter;
+    }
+
+    bool operator==(const const_iterator &other) const
+    {
+      return real_iter == other.real_iter;
+    }
+
+    bool operator!=(const const_iterator &other) const
+    {
+      return real_iter != other.real_iter;
+    }
+
+    const_iterator operator-(typename entries_list::size_type x) const
+    {
+      return real_iter - x;
+    }
+
+    const_iterator operator+(typename entries_list::size_type x) const
+    {
+      return real_iter + x;
+    }
+
+    const_iterator &operator-=(typename entries_list::size_type x)
+    {
+      real_iter -= x;
+      return *this;
+    }
+
+    const_iterator &operator+=(typename entries_list::size_type x)
+    {
+      real_iter += x;
+      return *this;
+    }
+  };
 
-  typedef typename index_type::size_type size_type;
+  typedef typename entries_list::size_type size_type;
 
 private:
   // Used to construct a set traversal that populates the sets_by_key
@@ -100,41 +215,24 @@
   template<typename R>
   struct tally_intersections
   {
-    HASH_NAMESPACE::hash_map<typename entries_list::size_type, unsigned int> &counts;
-
+    const entries_list &entries;
     const index_type &sets_by_key;
 
     const R &r;
 
-    inline typename index_type::const_iterator find_val(const Val &v) const
-    {
-      return sets_by_key.find(v);
-    }
-
-    inline void process_entry(const typename std::vector<index_entry>::const_iterator &vi,
-			      const Val &v) const
-    {
-      typename HASH_NAMESPACE::hash_map<typename entries_list::size_type, unsigned int>::iterator found
-	= counts.find(vi->first);
-
-      if(found == counts.end())
-	counts[vi->first] = 1;
-      else
-	++found->second;
-    }
   public:
-    tally_intersections(HASH_NAMESPACE::hash_map<typename entries_list::size_type, unsigned int> &_counts,
+    tally_intersections(const entries_list &_entries,
 			const index_type &_sets_by_key,
 			const R &_r)
-      :counts(_counts), sets_by_key(_sets_by_key), r(_r)
+      :entries(_entries), sets_by_key(_sets_by_key), r(_r)
     {
     }
 
-    // For each set containing v, add 1 to its tally in counts.
+    // For each set containing v, add 1 to its hit count.
     void operator()(const Val &v) const
     {
       typename index_type::const_iterator found
-	= find_val(v);
+	= sets_by_key.find(v);
 
       if(found != sets_by_key.end())
 	{
@@ -143,7 +241,7 @@
 	  for(typename std::vector<index_entry>::const_iterator vi
 		= vals.begin(); vi != vals.end(); ++vi)
 	    if(r(vi->second, v))
-	      process_entry(vi, v);
+	      ++entries[vi->first].hit_count;
 	}
     }
   };
@@ -199,20 +297,16 @@
   const_iterator find_subset(const imm::set<Val, Compare> &s,
 			     const R &r) const
   {
+    reset_counts();
     // For each element that intersects s, count how many times it
     // intersects.  If every element of a set intersects s (i.e., the
     // count is equal to the set's size) then it is a subset of s.
-    HASH_NAMESPACE::hash_map<typename entries_list::size_type, unsigned int> counts;
-    s.for_each(tally_intersections<R>(counts, sets_by_key, r));
+    s.for_each(tally_intersections<R>(entries, sets_by_key, r));
 
-    for(typename HASH_NAMESPACE::hash_map<typename entries_list::size_type, unsigned int>::const_iterator ci
-	  = counts.begin(); ci != counts.end(); ++ci)
-      {
-	typename entries_list::const_iterator found = entries.begin()+ci->first;
-
-	if(found->size() == ci->second)
-	  return found;
-      }
+    for(typename entries_list::const_iterator i
+	  = entries.begin(); i != entries.end(); ++i)
+      if(i->s.size() == i->hit_count)
+        return i;
 
     return end();
   }
@@ -370,7 +464,7 @@
 
   const_iterator end() const
   {
-    return S.end();
+    return const_iterator(S.end());
   }
 
   size_type size() const



More information about the Aptitude-svn-commit mailing list