[Aptitude-svn-commit] r4060 - in branches/aptitude-0.3/aptitude: .
src/generic
Daniel Burrows
dburrows at costa.debian.org
Tue Sep 6 22:06:54 UTC 2005
Author: dburrows
Date: Tue Sep 6 22:06:52 2005
New Revision: 4060
Modified:
branches/aptitude-0.3/aptitude/ChangeLog
branches/aptitude-0.3/aptitude/src/generic/setset.h
Log:
Speed up the dist-upgrade-with-rejects test case significantly
by preallocating space to store the hit counts in find_subset.
Modified: branches/aptitude-0.3/aptitude/ChangeLog
==============================================================================
--- branches/aptitude-0.3/aptitude/ChangeLog (original)
+++ branches/aptitude-0.3/aptitude/ChangeLog Tue Sep 6 22:06:52 2005
@@ -1,5 +1,18 @@
2005-09-06 Daniel Burrows <dburrows at debian.org>
+ * src/generic/setset.h:
+
+ Significantly speed up the operation of find_subset by
+ preallocating space to store its hit counts. This means that it
+ has to pass over the whole conflict set, but it appears that
+ it's better to do that than to allocate; I get a speedup of 2-3
+ times out of this change, which is fairly important when you
+ consider that find_subset is invoked almost a million times and
+ is where most of the time of the test case I'm considering is
+ spent.
+
+ Note that this makes find_subset NOT threadsafe.
+
* doc/cs/aptitude.xml:
Correct several XML errors in the Czech documentation.
Modified: branches/aptitude-0.3/aptitude/src/generic/setset.h
==============================================================================
--- branches/aptitude-0.3/aptitude/src/generic/setset.h (original)
+++ branches/aptitude-0.3/aptitude/src/generic/setset.h Tue Sep 6 22:06:52 2005
@@ -24,6 +24,13 @@
// R to an equivalent element of T" as well as its dual. (with R
// being the universal relation, this is just a straight subset
// relation, but other general types of subsumption are also posible)
+//
+// WARNING: this class is completely non-threadsafe. Because
+// find_subset is very heavily used (over a million calls in many test
+// cases), it has a number of optimizations, including one that means
+// YOU MAY NOT HAVE MULTIPLE SIMULTANEOUS CALLERS OF FIND_SUBSET as it
+// uses pre-allocated data structures to speed up its operation (by a
+// factor of 3 or so).
#ifndef SETSET_H
#define SETSET_H
@@ -59,9 +66,25 @@
class setset
{
private:
- std::vector<imm::set<Val, Compare> > entries;
+ struct entry
+ {
+ imm::set<Val, Compare> s;
+ /** Field used in the subset-testing algorithm. */
+ mutable unsigned int hit_count;
+
+ entry(const imm::set<Val, Compare> &_s)
+ :s(_s), hit_count(0)
+ {
+ }
+
+ entry()
+ :hit_count(0)
+ {
+ }
+ };
- typedef std::vector<imm::set<Val, Compare> > entries_list;
+ typedef std::vector<entry> entries_list;
+ entries_list entries;
typedef std::pair<typename entries_list::size_type, Val> index_entry;
@@ -69,11 +92,103 @@
index_type sets_by_key;
+ void reset_counts() const
+ {
+ for(typename entries_list::const_iterator i = entries.begin();
+ i != entries.end(); ++i)
+ i->hit_count = 0;
+ }
public:
- typedef typename entries_list::const_iterator const_iterator;
+ class const_iterator
+ {
+ typename entries_list::const_iterator real_iter;
+ public:
+ const_iterator(const typename entries_list::const_iterator &_real_iter)
+ :real_iter(_real_iter)
+ {
+ }
+
+ const_iterator()
+ {
+ }
+
+ const imm::set<Val, Compare> &operator*() const
+ {
+ return real_iter->s;
+ }
+
+ const imm::set<Val, Compare> *operator->() const
+ {
+ return &real_iter->s;
+ }
+
+ const_iterator &operator++()
+ {
+ ++real_iter;
+ return *this;
+ }
+
+ const_iterator &operator--()
+ {
+ --real_iter;
+ return *this;
+ }
+
+ bool operator<(const const_iterator &other) const
+ {
+ return real_iter < other.real_iter;
+ }
+
+ bool operator<=(const const_iterator &other) const
+ {
+ return real_iter <= other.real_iter;
+ }
+
+ bool operator>(const const_iterator &other) const
+ {
+ return real_iter > other.real_iter;
+ }
+
+ bool operator>=(const const_iterator &other) const
+ {
+ return real_iter >= other.real_iter;
+ }
+
+ bool operator==(const const_iterator &other) const
+ {
+ return real_iter == other.real_iter;
+ }
+
+ bool operator!=(const const_iterator &other) const
+ {
+ return real_iter != other.real_iter;
+ }
+
+ const_iterator operator-(typename entries_list::size_type x) const
+ {
+ return real_iter - x;
+ }
+
+ const_iterator operator+(typename entries_list::size_type x) const
+ {
+ return real_iter + x;
+ }
+
+ const_iterator &operator-=(typename entries_list::size_type x)
+ {
+ real_iter -= x;
+ return *this;
+ }
+
+ const_iterator &operator+=(typename entries_list::size_type x)
+ {
+ real_iter += x;
+ return *this;
+ }
+ };
- typedef typename index_type::size_type size_type;
+ typedef typename entries_list::size_type size_type;
private:
// Used to construct a set traversal that populates the sets_by_key
@@ -100,41 +215,24 @@
template<typename R>
struct tally_intersections
{
- HASH_NAMESPACE::hash_map<typename entries_list::size_type, unsigned int> &counts;
-
+ const entries_list &entries;
const index_type &sets_by_key;
const R &r;
- inline typename index_type::const_iterator find_val(const Val &v) const
- {
- return sets_by_key.find(v);
- }
-
- inline void process_entry(const typename std::vector<index_entry>::const_iterator &vi,
- const Val &v) const
- {
- typename HASH_NAMESPACE::hash_map<typename entries_list::size_type, unsigned int>::iterator found
- = counts.find(vi->first);
-
- if(found == counts.end())
- counts[vi->first] = 1;
- else
- ++found->second;
- }
public:
- tally_intersections(HASH_NAMESPACE::hash_map<typename entries_list::size_type, unsigned int> &_counts,
+ tally_intersections(const entries_list &_entries,
const index_type &_sets_by_key,
const R &_r)
- :counts(_counts), sets_by_key(_sets_by_key), r(_r)
+ :entries(_entries), sets_by_key(_sets_by_key), r(_r)
{
}
- // For each set containing v, add 1 to its tally in counts.
+ // For each set containing v, add 1 to its hit count.
void operator()(const Val &v) const
{
typename index_type::const_iterator found
- = find_val(v);
+ = sets_by_key.find(v);
if(found != sets_by_key.end())
{
@@ -143,7 +241,7 @@
for(typename std::vector<index_entry>::const_iterator vi
= vals.begin(); vi != vals.end(); ++vi)
if(r(vi->second, v))
- process_entry(vi, v);
+ ++entries[vi->first].hit_count;
}
}
};
@@ -199,20 +297,16 @@
const_iterator find_subset(const imm::set<Val, Compare> &s,
const R &r) const
{
+ reset_counts();
// For each element that intersects s, count how many times it
// intersects. If every element of a set intersects s (i.e., the
// count is equal to the set's size) then it is a subset of s.
- HASH_NAMESPACE::hash_map<typename entries_list::size_type, unsigned int> counts;
- s.for_each(tally_intersections<R>(counts, sets_by_key, r));
+ s.for_each(tally_intersections<R>(entries, sets_by_key, r));
- for(typename HASH_NAMESPACE::hash_map<typename entries_list::size_type, unsigned int>::const_iterator ci
- = counts.begin(); ci != counts.end(); ++ci)
- {
- typename entries_list::const_iterator found = entries.begin()+ci->first;
-
- if(found->size() == ci->second)
- return found;
- }
+ for(typename entries_list::const_iterator i
+ = entries.begin(); i != entries.end(); ++i)
+ if(i->s.size() == i->hit_count)
+ return i;
return end();
}
@@ -370,7 +464,7 @@
const_iterator end() const
{
- return S.end();
+ return const_iterator(S.end());
}
size_type size() const
More information about the Aptitude-svn-commit
mailing list