[Debtags-commits] [svn] r1557 - in tagcoll/trunk: tagcoll tests
Enrico Zini
enrico at costa.debian.org
Fri Feb 10 01:12:03 UTC 2006
Author: enrico
Date: Fri Feb 10 01:12:01 2006
New Revision: 1557
Added:
tagcoll/trunk/tagcoll/IntDiskIndex.cc
- copied, changed from r1555, tagcoll/trunk/tagcoll/TDBDiskIndex.cc
tagcoll/trunk/tagcoll/IntDiskIndex.h
- copied, changed from r1555, tagcoll/trunk/tagcoll/TDBDiskIndex.h
Modified:
tagcoll/trunk/tagcoll/IntIndex.cc
tagcoll/trunk/tagcoll/IntIndex.h
tagcoll/trunk/tagcoll/Makefile.am
tagcoll/trunk/tagcoll/OpSet.cc
tagcoll/trunk/tagcoll/test-utils.cc
tagcoll/trunk/tests/test-utils.h
Log:
Refactored the test code a bit
Created IntDiskIndex to make use of IntIndex to provide a high-performance on-disk mmapped index for item<->tag mapping
Copied: tagcoll/trunk/tagcoll/IntDiskIndex.cc (from r1555, tagcoll/trunk/tagcoll/TDBDiskIndex.cc)
==============================================================================
--- tagcoll/trunk/tagcoll/TDBDiskIndex.cc (original)
+++ tagcoll/trunk/tagcoll/IntDiskIndex.cc Fri Feb 10 01:12:01 2006
@@ -1,7 +1,7 @@
/*
* Fast index for tag data
*
- * Copyright (C) 2005 Enrico Zini <enrico at debian.org>
+ * Copyright (C) 2006 Enrico Zini <enrico at debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -18,256 +18,315 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#include <tagcoll/TDBDiskIndex.h>
-
-#include <tdb.h>
-#include <fcntl.h> // O_RDONLY
-#include <string.h> // strlen
-#include <errno.h>
-#include <assert.h>
-
-/*
-#include <stdlib.h>
-*/
+#include <tagcoll/IntDiskIndex.h>
using namespace std;
using namespace Tagcoll;
template<class ITEM, class TAG>
-TDBDiskIndex<ITEM, TAG>::TDBDiskIndex(
- const std::string& pkgidx,
- const std::string& tagidx,
- Converter<ITEM, std::string>& fromitem,
- Converter<TAG, std::string>& fromtag,
- Converter<std::string, ITEM>& toitem,
- Converter<std::string, TAG>& totag,
- bool write) :
- pkgdb(pkgidx), tagdb(tagidx),
+IntDiskIndex<ITEM, TAG>::IntDiskIndex(
+ const std::string& pkgidxfile,
+ const std::string& tagidxfile,
+ Converter<ITEM, int>& fromitem,
+ Converter<TAG, int>& fromtag,
+ Converter<int, ITEM>& toitem,
+ Converter<int, TAG>& totag) :
+ pkgidx(pkgidxfile), tagidx(tagidxfile),
fromitem(fromitem), fromtag(fromtag),
- toitem(toitem), totag(totag)
-{
- if (write)
- {
- pkgdb.open(0, O_RDWR | O_CREAT);
- tagdb.open(0, O_RDWR | O_CREAT);
- } else {
- pkgdb.open(0, O_RDONLY);
- tagdb.open(0, O_RDONLY);
- }
-}
+ toitem(toitem), totag(totag) {}
template<class ITEM, class TAG>
-bool TDBDiskIndex<ITEM, TAG>::hasTag(const TAG& tag) const
+bool IntDiskIndex<ITEM, TAG>::hasTag(const TAG& tag) const
{
- return tagdb.has(fromtag(tag));
+ return tagidx.size(fromtag(tag)) > 0;
}
template<class ITEM, class TAG>
-OpSet<ITEM> TDBDiskIndex<ITEM, TAG>::getItemsHavingTag(const TAG& tag) const
+OpSet<ITEM> IntDiskIndex<ITEM, TAG>::getItemsHavingTag(const TAG& tag) const
{
- return toitem(tagdb.getStringSet(fromtag(tag)));
+ OpSet<ITEM> res;
+ int itag = fromtag(tag);
+ for (unsigned int i = 0; i < tagidx.size(itag); i++)
+ res += toitem(tagidx.data(itag)[i]);
+ return res;
}
template<class ITEM, class TAG>
-OpSet<TAG> TDBDiskIndex<ITEM, TAG>::getTagsOfItem(const ITEM& item) const
+OpSet<TAG> IntDiskIndex<ITEM, TAG>::getTagsOfItem(const ITEM& item) const
{
if (item != ITEM())
- return totag(pkgdb.getStringSet(fromitem(item)));
+ {
+ OpSet<TAG> res;
+ int iitem = fromitem(item);
+ for (unsigned int i = 0; i < pkgidx.size(iitem); i++)
+ res += totag(pkgidx.data(iitem)[i]);
+ return res;
+ }
else
return OpSet<TAG>();
}
-#ifndef INSTANTIATING_TEMPLATES
-template<>
-OpSet<string> TDBDiskIndex<string, string>::getItemsHavingTag(const string& tag) const
+template<class ITEM, class TAG>
+int IntDiskIndex<ITEM, TAG>::getCardinality(const TAG& tag) const
{
- return tagdb.getStringSet(tag);
+ return tagidx.size(fromtag(tag));
}
-template<>
-OpSet<string> TDBDiskIndex<string, string>::getTagsOfItem(const string& item) const
+template<class ITEM, class TAG>
+OpSet<ITEM> IntDiskIndex<ITEM, TAG>::getTaggedItems() const
{
- return pkgdb.getStringSet(item);
+ OpSet<ITEM> res;
+ for (unsigned int i = 0; i < pkgidx.size(); i++)
+ if (pkgidx.size(i) > 0)
+ res += toitem(i);
+ return res;
}
-#endif
-
template<class ITEM, class TAG>
-int TDBDiskIndex<ITEM, TAG>::getCardinality(const TAG& tag) const
+OpSet<TAG> IntDiskIndex<ITEM, TAG>::getAllTags() const
{
- return tagdb.getStringSet(fromtag(tag)).size();
+ OpSet<TAG> res;
+ for (unsigned int i = 0; i < tagidx.size(); i++)
+ res += totag(i);
+ return res;
}
-static int collect_items(TDB_CONTEXT*, TDB_DATA key, TDB_DATA, void* data)
-{
- if (key.dsize >= 1)
- {
- OpSet<string>* coll = (OpSet<string>*)data;
- (*coll) += string(key.dptr, key.dsize);
- }
- return 0;
-}
-static int collect_tags(TDB_CONTEXT*, TDB_DATA key, TDB_DATA, void* data)
+template<class ITEM, class TAG>
+void IntDiskIndex<ITEM, TAG>::output(Consumer<ITEM, TAG>& consumer) const
{
- if (key.dsize >= 1)
- {
- OpSet<string>* coll = (OpSet<string>*)data;
- (*coll) += string(key.dptr, key.dsize);
- }
- return 0;
+ for (unsigned int i = 0; i < pkgidx.size(); i++)
+ if (pkgidx.size(i) > 0)
+ {
+ OpSet<TAG> tags;
+ for (unsigned int j = 0; j < pkgidx.size(i); j++)
+ tags += totag(pkgidx.data(i)[j]);
+ consumer.consume(toitem(i), tags);
+ }
}
template<class ITEM, class TAG>
-OpSet<ITEM> TDBDiskIndex<ITEM, TAG>::getTaggedItems() const
+void IntDiskIndex<ITEM, TAG>::applyChange(const PatchList<ITEM, TAG>& change)
{
- OpSet<string> res;
- pkgdb.traverse(collect_items, &res);
- return toitem(res);
}
template<class ITEM, class TAG>
-OpSet<TAG> TDBDiskIndex<ITEM, TAG>::getAllTags() const
+void IntDiskIndex<ITEM, TAG>::consumeItem(const ITEM& item, const OpSet<TAG>& tags)
{
- OpSet<string> res;
- tagdb.traverse(collect_tags, &res);
- return totag(res);
}
template<class ITEM, class TAG>
-struct out_data
+void IntDiskIndex<ITEM, TAG>::consumeItems(const OpSet<ITEM>& items, const OpSet<TAG>& tags)
{
- Converter<std::string, ITEM>& toitem;
- Converter<std::string, TAG>& totag;
- Consumer<ITEM, TAG>& consumer;
+}
- out_data(
- Converter<std::string, ITEM>& toitem,
- Converter<std::string, TAG>& totag,
- Consumer<ITEM, TAG>& consumer) throw ()
- : toitem(toitem), totag(totag), consumer(consumer) {}
-};
template<class ITEM, class TAG>
-static int outputter(TDB_CONTEXT* db, TDB_DATA key, TDB_DATA val, void* data) throw ()
+IntDiskIndexer<ITEM, TAG>::IntDiskIndexer(
+ Converter<ITEM, int>& fromitem,
+ Converter<TAG, int>& fromtag,
+ Converter<int, ITEM>& toitem,
+ Converter<int, TAG>& totag)
+ : fromitem(fromitem), fromtag(fromtag),
+ toitem(toitem), totag(totag) {}
+
+template<class ITEM, class TAG>
+void IntDiskIndexer<ITEM, TAG>::consumeItem(const ITEM& item, const OpSet<TAG>& tags)
{
- if (key.dsize >= 1)
+ int iitem = fromitem(item);
+ for (typename OpSet<TAG>::iterator i = tags.begin(); i != tags.end(); i++)
{
- out_data<ITEM, TAG>* d = (out_data<ITEM, TAG>*)data;
- // Deserialize the key into a string
- string item(key.dptr, key.dsize);
-
- // Deserialize the tags into a string list
- OpSet<string> tags = TDBFile::deserialize_stringset(val);
-
- // Send the data to the consumer
- ITEM it = d->toitem(item);
- if (it != ITEM())
- d->consumer.consume(it, d->totag(tags));
+ int itag = fromtag(*i);
+ pkgidx.map(iitem, itag);
+ tagidx.map(itag, iitem);
}
- return 0;
}
template<class ITEM, class TAG>
-void TDBDiskIndex<ITEM, TAG>::output(Consumer<ITEM, TAG>& consumer) const
+void IntDiskIndexer<ITEM, TAG>::write(const std::string& pkgidxfile, const std::string& tagidxfile)
{
- out_data<ITEM, TAG> data(toitem, totag, consumer);
- pkgdb.traverse(outputter<ITEM, TAG>, &data);
+ pkgidx.write(pkgidxfile);
+ tagidx.write(tagidxfile);
}
-template<class ITEM, class TAG>
-void TDBDiskIndex<ITEM, TAG>::applyChange(const PatchList<ITEM, TAG>& change)
-{
- for (typename PatchList<ITEM, TAG>::const_iterator i = change.begin(); i != change.end(); i++)
- {
- // Save the previous tagset in `rev'
- OpSet<TAG> prevTags = getTags(i->first);
- OpSet<TAG> nextTags = i->second.apply(prevTags);
- string sitem = fromitem(i->first);
- OpSet<string> stags = fromtag(nextTags);
- OpSet<string> sprev_tags = fromtag(prevTags);
- // Set the new tagset in the item
- pkgdb.setStringSet(sitem, stags);
+#ifndef INSTANTIATING_TEMPLATES
+#include <string>
- // Fix the itemsets in the involved tags
- OpSet<string> t = sprev_tags - stags;
- for (OpSet<string>::const_iterator j = t.begin(); j != t.end(); j++)
- {
- OpSet<string> items = tagdb.getStringSet(*j) - sitem;
- if (items.empty())
- tagdb.remove(*j);
- else
- tagdb.setStringSet(*j, items);
- }
- t = stags - sprev_tags;
- for (OpSet<string>::const_iterator j = t.begin(); j != t.end(); j++)
- tagdb.setStringSet(*j, tagdb.getStringSet(*j) + sitem);
- }
+namespace Tagcoll {
+ template class IntDiskIndex<int, int>;
}
+#endif
-template<class ITEM, class TAG>
-void TDBDiskIndex<ITEM, TAG>::consumeItem(const ITEM& item, const OpSet<TAG>& tags)
+
+#ifdef COMPILE_TESTSUITE
+
+#include <tests/test-utils.h>
+#include <string>
+#include <map>
+
+namespace tut {
+using namespace tut_tagcoll;
+using namespace std;
+
+static const char* pkgfname = "tagcoll_intdiskindex_pkgs.tmp";
+static const char* tagfname = "tagcoll_intdiskindex_tags.tmp";
+
+class BigMap
{
- string sitem = fromitem(item);
- OpSet<string> stags = fromtag(tags);
- OpSet<string> prev_stags = pkgdb.getStringSet(sitem);
+ int seq;
+ map<int, string> tostring;
+ map<string, int> toint;
+public:
+ BigMap() : seq(0) {}
+
+ void map(int a, const string& b)
+ {
+ tostring[a] = b;
+ toint[b] = a;
+ }
- // Add the tags to the item
- pkgdb.setStringSet(sitem, prev_stags + stags);
+ int get(const string& val)
+ {
+ std::map<string, int>::const_iterator i = toint.find(val);
+ if (i == toint.end())
+ {
+ map(seq, val);
+ return seq++;
+ } else
+ return i->second;
+ }
- // Add the item to the tags
- for (typename OpSet<string>::const_iterator i = stags.begin(); i != stags.end(); i++)
- tagdb.setStringSet(*i, tagdb.getStringSet(*i) + sitem);
+ string get(int val)
+ {
+ std::map<int, string>::const_iterator i = tostring.find(val);
+ gen_ensure(i != tostring.end());
+ return i->second;
+ }
+};
}
-template<class ITEM, class TAG>
-void TDBDiskIndex<ITEM, TAG>::consumeItems(const OpSet<ITEM>& items, const OpSet<TAG>& tags)
+namespace Tagcoll {
+
+template<>
+class Converter<int, std::string>
{
- OpSet<string> sitems = fromitem(items);
- OpSet<string> stags = fromtag(tags);
+ tut::BigMap& map;
+
+public:
+ Converter(tut::BigMap& map) : map(map) {}
+ std::string operator()(const int& item) { return map.get(item); }
+ OpSet<std::string> operator()(const OpSet<int>& items)
+ {
+ OpSet<string> res;
- for (typename OpSet<string>::const_iterator i = sitems.begin(); i != sitems.end(); i++)
- // Add the tags to the item
- pkgdb.setStringSet(*i, pkgdb.getStringSet(*i) + stags);
+ for (OpSet<int>::const_iterator i = items.begin();
+ i != items.end(); i++)
+ {
+ string t = (*this)(*i);
+ if (t != string())
+ res += t;
+ }
- for (typename OpSet<string>::const_iterator i = stags.begin(); i != stags.end(); i++)
- // Add the items to the tag
- tagdb.setStringSet(*i, tagdb.getStringSet(*i) + sitems);
-}
+ return res;
+ }
+};
+template<>
+class Converter<std::string, int>
+{
+ tut::BigMap& map;
-#ifndef INSTANTIATING_TEMPLATES
-#include <string>
+public:
+ Converter(tut::BigMap& map) : map(map) {}
-namespace Tagcoll {
- template class TDBDiskIndex<std::string, std::string>;
-}
-#endif
+ int operator()(const std::string& item) { return map.get(item); }
+ OpSet<int> operator()(const OpSet<std::string>& items)
+ {
+ OpSet<int> res;
+ for (OpSet<string>::const_iterator i = items.begin();
+ i != items.end(); i++)
+ {
+ int t = (*this)(*i);
+ if (t != int())
+ res += t;
+ }
-#ifdef COMPILE_TESTSUITE
+ return res;
+ }
+};
-#include <tests/test-utils.h>
+}
namespace tut {
-using namespace tut_tagcoll;
-struct tagcoll_tdbdiskindex_shar {
+struct tagcoll_intdiskindex_shar {
+ BigMap items;
+ BigMap tags;
+ Converter<string, int> conv1;
+ Converter<string, int> conv2;
+ Converter<int, string> conv3;
+ Converter<int, string> conv4;
+
+ tagcoll_intdiskindex_shar()
+ : conv1(items), conv2(tags), conv3(items), conv4(tags)
+ {
+ IntDiskIndexer<string, string> indexer(conv1, conv2, conv3, conv4);
+ output_test_collection(indexer);
+ indexer.write(pkgfname, tagfname);
+ }
+ ~tagcoll_intdiskindex_shar()
+ {
+ unlink("pkgfname.test");
+ unlink("tagfname.test");
+ }
};
-TESTGRP(tagcoll_tdbdiskindex);
+TESTGRP(tagcoll_intdiskindex);
+
+#include <iostream>
+
+void outts(const OpSet<string>& s)
+{
+ for (OpSet<string>::const_iterator i = s.begin(); i != s.end(); i++)
+ if (i == s.begin())
+ cerr << *i;
+ else
+ cerr << ", " << *i;
+}
template<> template<>
void to::test<1>()
{
- Converter<string, string> a;
- TDBDiskIndex<string, string> coll("pkgidx.test", "tagidx.test", a, a, a, a);
+ IntDiskIndex<string, string> idx(pkgfname, tagfname, conv1, conv2, conv3, conv4);
+
+#if 0
+ cerr << "Items: ";
+ OpSet<string> s = idx.getTaggedItems();
+ outts(s);
+ cerr << endl;
+ for (OpSet<string>::const_iterator i = s.begin(); i != s.end(); i++)
+ {
+ cerr << " " << *i << ": ";
+ outts(idx.getTags(*i));
+ cerr << endl;
+ }
- test_tagged_collection(coll);
- unlink("pkgidx.test");
- unlink("tagidx.test");
+ cerr << "Tags: ";
+ s = idx.getAllTags();
+ outts(s);
+ cerr << endl;
+ for (OpSet<string>::const_iterator i = s.begin(); i != s.end(); i++)
+ {
+ cerr << " " << *i << ": ";
+ outts(idx.getItems(*i));
+ cerr << endl;
+ }
+#endif
+
+ test_tagged_collection_ro(idx);
}
}
Copied: tagcoll/trunk/tagcoll/IntDiskIndex.h (from r1555, tagcoll/trunk/tagcoll/TDBDiskIndex.h)
==============================================================================
--- tagcoll/trunk/tagcoll/TDBDiskIndex.h (original)
+++ tagcoll/trunk/tagcoll/IntDiskIndex.h Fri Feb 10 01:12:01 2006
@@ -1,12 +1,12 @@
-#ifndef TAGCOLL_TDB_DISK_INDEX_H
-#define TAGCOLL_TDB_DISK_INDEX_H
+#ifndef TAGCOLL_INT_DISK_INDEX_H
+#define TAGCOLL_INT_DISK_INDEX_H
/** \file
* Fast on-disk index for tag data
*/
/*
- * Copyright (C) 2005 Enrico Zini <enrico at debian.org>
+ * Copyright (C) 2006 Enrico Zini <enrico at debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -26,10 +26,7 @@
#include <tagcoll/Collection.h>
#include <tagcoll/Serializer.h>
#include <tagcoll/Exception.h>
-#include <tagcoll/TDBFile.h>
-#include <map>
-
-#include <string>
+#include <tagcoll/IntIndex.h>
namespace Tagcoll
{
@@ -47,15 +44,15 @@
* TDBDiskIndex to access it afterwards.
*/
template<class ITEM, class TAG>
-class TDBDiskIndex : public Collection<ITEM, TAG>
+class IntDiskIndex : public Collection<ITEM, TAG>
{
protected:
- TDBFile pkgdb;
- TDBFile tagdb;
- mutable Converter<ITEM, std::string>& fromitem;
- mutable Converter<TAG, std::string>& fromtag;
- mutable Converter<std::string, ITEM>& toitem;
- mutable Converter<std::string, TAG>& totag;
+ IntIndex pkgidx;
+ IntIndex tagidx;
+ mutable Converter<ITEM, int>& fromitem;
+ mutable Converter<TAG, int>& fromtag;
+ mutable Converter<int, ITEM>& toitem;
+ mutable Converter<int, TAG>& totag;
virtual void consumeItem(const ITEM& item, const OpSet<TAG>& tags);
virtual void consumeItems(const OpSet<ITEM>& items, const OpSet<TAG>& tags);
@@ -81,15 +78,14 @@
* throw an exception if invoked.
* It defaults to true.
*/
- TDBDiskIndex(
- const std::string& pkgidx,
- const std::string& tagidx,
- Converter<ITEM, std::string>& fromitem,
- Converter<TAG, std::string>& fromtag,
- Converter<std::string, ITEM>& toitem,
- Converter<std::string, TAG>& totag,
- bool write = true);
- virtual ~TDBDiskIndex() {}
+ IntDiskIndex(
+ const std::string& pkgidxfile,
+ const std::string& tagidxfile,
+ Converter<ITEM, int>& fromitem,
+ Converter<TAG, int>& fromtag,
+ Converter<int, ITEM>& toitem,
+ Converter<int, TAG>& totag);
+ virtual ~IntDiskIndex() {}
virtual bool hasTag(const TAG& tag) const;
@@ -103,6 +99,32 @@
virtual void applyChange(const PatchList<ITEM, TAG>& change);
};
+template<class ITEM, class TAG>
+class IntDiskIndexer : public Consumer<ITEM, TAG>
+{
+protected:
+ IntIndexer pkgidx;
+ IntIndexer tagidx;
+ mutable Converter<ITEM, int>& fromitem;
+ mutable Converter<TAG, int>& fromtag;
+ mutable Converter<int, ITEM>& toitem;
+ mutable Converter<int, TAG>& totag;
+
+ virtual void consumeItemUntagged(const ITEM& item) {}
+ virtual void consumeItem(const ITEM& item, const OpSet<TAG>& tags);
+
+public:
+ IntDiskIndexer(
+ Converter<ITEM, int>& fromitem,
+ Converter<TAG, int>& fromtag,
+ Converter<int, ITEM>& toitem,
+ Converter<int, TAG>& totag);
+ virtual ~IntDiskIndexer() {}
+
+ void write(const std::string& pkgidx, const std::string& tagidx);
+};
+
+
};
// vim:set ts=4 sw=4:
Modified: tagcoll/trunk/tagcoll/IntIndex.cc
==============================================================================
--- tagcoll/trunk/tagcoll/IntIndex.cc (original)
+++ tagcoll/trunk/tagcoll/IntIndex.cc Fri Feb 10 01:12:01 2006
@@ -71,7 +71,8 @@
for (const_iterator i = begin(); i != end(); i++)
bufsize += i->size();
- return bufsize * sizeof(int);
+ // Then the int with the number of items
+ return (bufsize + 1) * sizeof(int);
}
void IntIndexer::encode(int* buf) const
@@ -84,6 +85,7 @@
for (set<int>::const_iterator j = (*this)[i].begin(); j != (*this)[i].end(); j++)
buf[pos++] = *j;
}
+ buf[pos++] = size();
}
void IntIndexer::write(const std::string& filename)
@@ -171,29 +173,37 @@
namespace tut {
using namespace tut_tagcoll;
+static const char* fname = "tagcoll_intindex.tmp";
+
struct tagcoll_intindex_shar {
+ tagcoll_intindex_shar() {
+ // Create the index
+ IntIndexer indexer;
+ indexer.map(4, 1);
+ indexer.map(4, 2);
+ indexer.map(2, 1);
+ indexer.map(0, 5);
+ indexer.map(0, 8);
+ indexer.map(0, 1);
+ indexer.map(0, 7);
+ indexer.write(fname);
+ }
+ ~tagcoll_intindex_shar() {
+ // Delete the test index
+ unlink(fname);
+ }
};
TESTGRP(tagcoll_intindex);
template<> template<>
void to::test<1>()
{
- static const char* fname = "tagcoll_intindex.tmp";
-
- // Create the index
- IntIndexer indexer;
- indexer.map(4, 1);
- indexer.map(4, 2);
- indexer.map(2, 1);
- indexer.map(0, 5);
- indexer.map(0, 8);
- indexer.map(0, 1);
- indexer.map(0, 7);
- indexer.write(fname);
-
// Read the index
IntIndex index(fname);
+ // Check the number of mapped items
+ ensure_equals(index.size(), 5);
+
// Check that the arrays have the right size
ensure_equals(index.size(0), 4);
ensure_equals(index.size(1), 0);
@@ -209,9 +219,6 @@
ensure_equals(index.data(2)[0], 1);
ensure_equals(index.data(4)[0], 1);
ensure_equals(index.data(4)[1], 2);
-
- // Delete the test index
- //unlink(fname);
}
}
Modified: tagcoll/trunk/tagcoll/IntIndex.h
==============================================================================
--- tagcoll/trunk/tagcoll/IntIndex.h (original)
+++ tagcoll/trunk/tagcoll/IntIndex.h Fri Feb 10 01:12:01 2006
@@ -59,8 +59,9 @@
IntIndex(const std::string& filename);
~IntIndex();
- const int* data(unsigned int val) { return m_buf + m_buf[val] + 1; }
- int size(unsigned int val) { return m_buf[m_buf[val]]; }
+ const int* data(unsigned int val) const { return m_buf + m_buf[val] + 1; }
+ unsigned int size(unsigned int val) const { return val < size() ? m_buf[m_buf[val]] : 0; }
+ unsigned int size() const { return m_buf[m_size / sizeof(int) - 1]; }
};
/**
Modified: tagcoll/trunk/tagcoll/Makefile.am
==============================================================================
--- tagcoll/trunk/tagcoll/Makefile.am (original)
+++ tagcoll/trunk/tagcoll/Makefile.am Fri Feb 10 01:12:01 2006
@@ -39,6 +39,7 @@
TDBReadonlyDiskIndex.h \
\
IntIndex.h \
+ IntDiskIndex.h \
\
Filters.h \
Implications.h \
@@ -78,6 +79,7 @@
TDBReadonlyDiskIndex.cc \
\
IntIndex.cc \
+ IntDiskIndex.cc \
\
Filters.cc \
Implications.cc \
Modified: tagcoll/trunk/tagcoll/OpSet.cc
==============================================================================
--- tagcoll/trunk/tagcoll/OpSet.cc (original)
+++ tagcoll/trunk/tagcoll/OpSet.cc Fri Feb 10 01:12:01 2006
@@ -160,6 +160,7 @@
#ifndef INSTANTIATING_TEMPLATES
template class OpSet<string>;
+template class OpSet<int>;
#endif
#ifdef COMPILE_TESTSUITE
Modified: tagcoll/trunk/tagcoll/test-utils.cc
==============================================================================
--- tagcoll/trunk/tagcoll/test-utils.cc (original)
+++ tagcoll/trunk/tagcoll/test-utils.cc Fri Feb 10 01:12:01 2006
@@ -39,6 +39,12 @@
using namespace Tagcoll;
using namespace tut;
+void _gen_ensure(const Location& loc, bool res)
+{
+ if (!res)
+ throw failure(loc.locstr());
+}
+
void outputCollection(const std::string& str, Tagcoll::Consumer<string, string>& cons)
{
StringParserInput input(str);
@@ -94,6 +100,235 @@
}
}
+void __output_test_collection(const Location& loc, Consumer<string, string>& tc)
+{
+ OpSet<string> tagset;
+
+ tc.consume("gnocco");
+
+ tagset += "tomato"; tagset += "mozzarella";
+ tc.consume("margherita", tagset);
+
+ tagset += "mushrooms";
+ tc.consume("funghi", tagset);
+
+ tagset.clear();
+ tagset += "garlic";
+ tagset += "rosemerry";
+ tc.consume("rosmarino", tagset);
+}
+
+void __test_tagged_collection_ro(const Location& loc, Collection<string, string>& tc)
+{
+ OpSet<string> s, s1;
+
+ // hasTag
+ inner_ensure(tc.hasTag("tomato"));
+ inner_ensure(tc.hasTag("mozzarella"));
+ inner_ensure(tc.hasTag("mushrooms"));
+ inner_ensure(tc.hasTag("garlic"));
+ inner_ensure(tc.hasTag("rosemerry"));
+ inner_ensure(!tc.hasTag("ketchup"));
+
+ // getTags(item)
+ s = tc.getTags("funghi");
+ inner_ensure_contains(s, string("tomato"));
+ inner_ensure_contains(s, string("mozzarella"));
+ inner_ensure_contains(s, string("mushrooms"));
+ inner_ensure_not_contains(s, string("garlic"));
+ inner_ensure_not_contains(s, string("rosemerry"));
+
+ s = tc.getTags("margherita");
+ inner_ensure_contains(s, string("tomato"));
+ inner_ensure_contains(s, string("mozzarella"));
+ inner_ensure_not_contains(s, string("mushrooms"));
+ inner_ensure_not_contains(s, string("garlic"));
+ inner_ensure_not_contains(s, string("rosemerry"));
+
+ s = tc.getTags("rosmarino");
+ inner_ensure(!s.contains("tomato"));
+ inner_ensure(!s.contains("mozzarella"));
+ inner_ensure(!s.contains("mushrooms"));
+ inner_ensure(s.contains("garlic"));
+ inner_ensure(s.contains("rosemerry"));
+
+ s = tc.getTags("gnocco");
+ inner_ensure(s.empty());
+
+ // getTags(items)
+ s1.clear();
+ s1 += "funghi"; s1 += "margherita";
+ s = tc.getTags(s1);
+ inner_ensure(s.contains("tomato"));
+ inner_ensure(s.contains("mozzarella"));
+ inner_ensure(s.contains("mushrooms"));
+ inner_ensure(!s.contains("garlic"));
+ inner_ensure(!s.contains("rosemerry"));
+
+ s1.clear();
+ s1 += "rosmarino"; s1 += "margherita";
+ s = tc.getTags(s1);
+ inner_ensure(s.contains("tomato"));
+ inner_ensure(s.contains("mozzarella"));
+ inner_ensure(!s.contains("mushrooms"));
+ inner_ensure(s.contains("garlic"));
+ inner_ensure(s.contains("rosemerry"));
+}
+
+void __test_tagged_collection(const Location& loc, Collection<string, string>& tc)
+{
+ // Test handling of untagged items (they are not stored)
+ tc.consume("untagged");
+ inner_ensure(tc.getTags("untagged").empty());
+
+ // Test handling of tagged items
+ OpSet<string> tagset;
+ tagset += "tag1"; tagset += "tag2";
+ tc.consume("tagged", tagset);
+ inner_ensure(tc.getTaggedItems().contains("tagged"));
+ //inner_ensure(tc.hasTag("tag1"));
+ //inner_ensure(tc.hasTag("tag2"));
+ tagset = tc.getTags("tagged");
+ inner_ensure(tagset.contains("tag1"));
+ inner_ensure(tagset.contains("tag2"));
+ OpSet<string> itemset = tc.getItems("tag1");
+ inner_ensure(itemset.contains("tagged"));
+ itemset = tc.getItems("tag2");
+ inner_ensure(itemset.contains("tagged"));
+ tagset = tc.getAllTags();
+ inner_ensure(tagset.contains("tag1"));
+ inner_ensure(tagset.contains("tag2"));
+ tagset.clear();
+ tagset += "tag1";
+ tagset = tc.getCompanionTags(tagset);
+ inner_ensure(!tagset.contains("tag1"));
+ inner_ensure(tagset.contains("tag2"));
+
+ // Test handling of changes
+ PatchList<string, string> change;
+ Patch<string, string> p("tagged");
+ tagset.clear();
+ p.remove("tag1");
+ p.remove("tag2");
+ change.addPatch(p);
+
+ tc.applyChange(change);
+
+ // "tagged" should now be untagged
+ inner_ensure(tc.getTags("tagged").empty());
+
+ tc.applyChange(change.getReverse());
+
+ // "tagged" should now be as before
+ //inner_ensure(tc.hasTag("tag1"));
+ //inner_ensure(tc.hasTag("tag2"));
+ tagset = tc.getTags("tagged");
+ inner_ensure(tagset.contains("tag1"));
+ inner_ensure(tagset.contains("tag2"));
+ itemset = tc.getItems("tag1");
+ inner_ensure(itemset.contains("tagged"));
+ itemset = tc.getItems("tag2");
+ inner_ensure(itemset.contains("tagged"));
+ tagset = tc.getAllTags();
+ inner_ensure(tagset.contains("tag1"));
+ inner_ensure(tagset.contains("tag2"));
+ tagset.clear();
+ tagset += "tag1";
+ tagset = tc.getCompanionTags(tagset);
+ inner_ensure(!tagset.contains("tag1"));
+ inner_ensure(tagset.contains("tag2"));
+
+ // Try a patch that adds a tag
+ change = PatchList<string, string>();
+ p = Patch<string, string>("tagged");
+ p.add("tag3");
+ change.addPatch(p);
+ tc.applyChange(change);
+
+ //inner_ensure(tc.hasTag("tag1"));
+ //inner_ensure(tc.hasTag("tag2"));
+ //inner_ensure(tc.hasTag("tag3"));
+ tagset = tc.getTags("tagged");
+ inner_ensure(tagset.contains("tag1"));
+ inner_ensure(tagset.contains("tag2"));
+ inner_ensure(tagset.contains("tag3"));
+ itemset = tc.getItems("tag1");
+ inner_ensure(itemset.contains("tagged"));
+ itemset = tc.getItems("tag2");
+ inner_ensure(itemset.contains("tagged"));
+ itemset = tc.getItems("tag3");
+ inner_ensure(itemset.contains("tagged"));
+ tagset = tc.getAllTags();
+ inner_ensure(tagset.contains("tag1"));
+ inner_ensure(tagset.contains("tag2"));
+ inner_ensure(tagset.contains("tag3"));
+ tagset.clear();
+ tagset += "tag1";
+ tagset = tc.getCompanionTags(tagset);
+ inner_ensure(!tagset.contains("tag1"));
+ inner_ensure(tagset.contains("tag2"));
+ inner_ensure(tagset.contains("tag3"));
+
+ // Try a patch that adds some items
+ change = PatchList<string, string>();
+ p = Patch<string, string>("tagged1");
+ p.add("tag1");
+ p.add("tag2");
+ p.add("tag4");
+ change.addPatch(p);
+ tc.applyChange(change);
+
+ tagset = tc.getTags("tagged1");
+ inner_ensure(tagset.contains("tag1"));
+ inner_ensure(tagset.contains("tag2"));
+ inner_ensure(!tagset.contains("tag3"));
+ inner_ensure(tagset.contains("tag4"));
+ itemset = tc.getItems("tag1");
+ inner_ensure(itemset.contains("tagged1"));
+ itemset = tc.getItems("tag2");
+ inner_ensure(itemset.contains("tagged1"));
+ itemset = tc.getItems("tag3");
+ inner_ensure(!itemset.contains("tagged1"));
+ itemset = tc.getItems("tag4");
+ inner_ensure(!itemset.contains("tagged"));
+ inner_ensure(itemset.contains("tagged1"));
+ tagset = tc.getAllTags();
+ inner_ensure(tagset.contains("tag1"));
+ inner_ensure(tagset.contains("tag2"));
+ inner_ensure(tagset.contains("tag3"));
+ inner_ensure(tagset.contains("tag4"));
+ tagset.clear();
+ tagset += "tag1";
+ tagset = tc.getCompanionTags(tagset);
+ inner_ensure(!tagset.contains("tag1"));
+ inner_ensure(tagset.contains("tag2"));
+ inner_ensure(tagset.contains("tag3"));
+ inner_ensure(tagset.contains("tag4"));
+
+ // And reverse it
+ tc.applyChange(change.getReverse());
+
+ itemset = tc.getItems("tag1");
+ inner_ensure(!itemset.contains("tagged1"));
+ itemset = tc.getItems("tag2");
+ inner_ensure(!itemset.contains("tagged1"));
+ itemset = tc.getItems("tag3");
+ inner_ensure(!itemset.contains("tagged1"));
+ inner_ensure(tc.getItems("tag4") == OpSet<string>());
+ tagset = tc.getAllTags();
+ inner_ensure(tagset.contains("tag1"));
+ inner_ensure(tagset.contains("tag2"));
+ inner_ensure(tagset.contains("tag3"));
+ inner_ensure(!tagset.contains("tag4"));
+ tagset.clear();
+ tagset += "tag1";
+ tagset = tc.getCompanionTags(tagset);
+ inner_ensure(!tagset.contains("tag1"));
+ inner_ensure(tagset.contains("tag2"));
+ inner_ensure(tagset.contains("tag3"));
+ inner_ensure(!tagset.contains("tag4"));
+}
+
}
#endif
Modified: tagcoll/trunk/tests/test-utils.h
==============================================================================
--- tagcoll/trunk/tests/test-utils.h (original)
+++ tagcoll/trunk/tests/test-utils.h Fri Feb 10 01:12:01 2006
@@ -54,196 +54,113 @@
TestConsumer() : items(0), tags(0) {}
};
+class Location
+{
+ string file;
+ int line;
+ string str;
+ string testfile;
+ int testline;
+ string teststr;
+public:
+ Location(const std::string& file, int line, const std::string& str)
+ : file(file), line(line), str(str) {}
+ Location(const Location& loc,
+ const std::string& testfile, int testline, const std::string& str) :
+ file(loc.file), line(loc.line), str(loc.str),
+ testfile(testfile), testline(testline), teststr(str) {}
+ string locstr() const
+ {
+ std::stringstream ss;
+ ss << file << ":" << line << "(" << str << ")";
+ if (!testfile.empty())
+ ss << "[" << testfile << ":" << testline << "(" << teststr << ")]";
+ ss << ": ";
+ return ss.str();
+ }
+ string msg(const std::string m) const
+ {
+ return locstr() + ": " + m;
+ }
+};
+
void outputCollection(const std::string& str, Tagcoll::Consumer<string, string>& cons);
+#define ensure_coll_equal(a, b) \
+ __tc_ensure_coll_equal(__FILE__, __LINE__, #a " == " #b, a, b)
void __tc_ensure_coll_equal(std::string f, int l, std::string s,
const Tagcoll::Collection<string, string>& c1,
const Tagcoll::Collection<string, string>& c2);
-#define ensure_coll_equal(a, b) \
- __tc_ensure_coll_equal(__FILE__, __LINE__, #a " == " #b, a, b)
-
-inline static std::string __ensure_errmsg(std::string f, int l, std::string msg)
+#define gen_ensure(x) _gen_ensure(Location(__FILE__, __LINE__, #x), (x))
+#define inner_ensure(x) _gen_ensure(Location(loc, __FILE__, __LINE__, #x), (x))
+void _gen_ensure(const Location& loc, bool res);
+
+#define ensure_contains(a, b) \
+ _ensure_contains(Location(__FILE__, __LINE__, #a " contains " #b), a, b)
+#define inner_ensure_contains(a, b) \
+ _ensure_contains(Location(loc, __FILE__, __LINE__, #a " contains " #b), a, b)
+template<class T>
+void _ensure_contains(const Location& loc, const OpSet<T>& s, const T& item)
{
- char buf[64];
- snprintf(buf, 63, "%d", l);
- buf[63] = 0;
- std::string ln = buf;
- f.append(":");
- f.append(ln);
- f.append(": '");
- f.append(msg);
- f.append("'");
- return f;
+ if (!s.contains(item))
+ {
+ std::stringstream ss;
+ ss << "tagset ";
+ for (typename OpSet<T>::const_iterator i = s.begin();
+ i != s.end(); i++)
+ if (i == s.begin())
+ ss << *i;
+ else
+ ss << ", " << *i;
+ ss << " does not contain " << item;
+
+ throw failure(loc.msg(ss.str()));
+ }
}
-#define gen_ensure(x) ensure (__ensure_errmsg(__FILE__, __LINE__, #x).c_str(), (x))
-inline static std::string __tc_ensure_errmsg(std::string f, int l, std::string f1, int l1, std::string msg)
+#define ensure_not_contains(a, b) \
+ _ensure_not_contains(Location(__FILE__, __LINE__, #a " contains " #b), a, b)
+#define inner_ensure_not_contains(a, b) \
+ _ensure_not_contains(Location(loc, __FILE__, __LINE__, #a " contains " #b), a, b)
+template<class T>
+void _ensure_not_contains(const Location& loc, const OpSet<T>& s, const T& item)
{
- return f + ":" + fmt(l) + ": '" + f1 + ":" + fmt(l1) + ": " + msg + "'";
+ if (s.contains(item))
+ {
+ std::stringstream ss;
+ ss << "tagset ";
+ for (typename OpSet<T>::const_iterator i = s.begin();
+ i != s.end(); i++)
+ if (i == s.begin())
+ ss << *i;
+ else
+ ss << ", " << *i;
+ ss << " does not contain " << item;
+
+ throw failure(loc.msg(ss.str()));
+ }
}
#ifdef TEST_TAGCOLL
-inline static void __test_tagged_collection(std::string f, int l, Collection<string, string>& tc)
-{
-#define ttc_ensure(x) ensure (__tc_ensure_errmsg(f, l, __FILE__, __LINE__, #x).c_str(), (x))
- // Test handling of untagged items (they are not stored)
- tc.consume("untagged");
- ttc_ensure(tc.getTags("untagged").empty());
-
- // Test handling of tagged items
- OpSet<string> tagset;
- tagset += "tag1"; tagset += "tag2";
- tc.consume("tagged", tagset);
- ttc_ensure(tc.getTaggedItems().contains("tagged"));
- //ttc_ensure(tc.hasTag("tag1"));
- //ttc_ensure(tc.hasTag("tag2"));
- tagset = tc.getTags("tagged");
- ttc_ensure(tagset.contains("tag1"));
- ttc_ensure(tagset.contains("tag2"));
- OpSet<string> itemset = tc.getItems("tag1");
- ttc_ensure(itemset.contains("tagged"));
- itemset = tc.getItems("tag2");
- ttc_ensure(itemset.contains("tagged"));
- tagset = tc.getAllTags();
- ttc_ensure(tagset.contains("tag1"));
- ttc_ensure(tagset.contains("tag2"));
- tagset.clear();
- tagset += "tag1";
- tagset = tc.getCompanionTags(tagset);
- ttc_ensure(!tagset.contains("tag1"));
- ttc_ensure(tagset.contains("tag2"));
-
- // Test handling of changes
- PatchList<string, string> change;
- Patch<string, string> p("tagged");
- tagset.clear();
- p.remove("tag1");
- p.remove("tag2");
- change.addPatch(p);
-
- tc.applyChange(change);
-
- // "tagged" should now be untagged
- ttc_ensure(tc.getTags("tagged").empty());
-
- tc.applyChange(change.getReverse());
-
- // "tagged" should now be as before
- //ttc_ensure(tc.hasTag("tag1"));
- //ttc_ensure(tc.hasTag("tag2"));
- tagset = tc.getTags("tagged");
- ttc_ensure(tagset.contains("tag1"));
- ttc_ensure(tagset.contains("tag2"));
- itemset = tc.getItems("tag1");
- ttc_ensure(itemset.contains("tagged"));
- itemset = tc.getItems("tag2");
- ttc_ensure(itemset.contains("tagged"));
- tagset = tc.getAllTags();
- ttc_ensure(tagset.contains("tag1"));
- ttc_ensure(tagset.contains("tag2"));
- tagset.clear();
- tagset += "tag1";
- tagset = tc.getCompanionTags(tagset);
- ttc_ensure(!tagset.contains("tag1"));
- ttc_ensure(tagset.contains("tag2"));
-
- // Try a patch that adds a tag
- change = PatchList<string, string>();
- p = Patch<string, string>("tagged");
- p.add("tag3");
- change.addPatch(p);
- tc.applyChange(change);
-
- //ttc_ensure(tc.hasTag("tag1"));
- //ttc_ensure(tc.hasTag("tag2"));
- //ttc_ensure(tc.hasTag("tag3"));
- tagset = tc.getTags("tagged");
- ttc_ensure(tagset.contains("tag1"));
- ttc_ensure(tagset.contains("tag2"));
- ttc_ensure(tagset.contains("tag3"));
- itemset = tc.getItems("tag1");
- ttc_ensure(itemset.contains("tagged"));
- itemset = tc.getItems("tag2");
- ttc_ensure(itemset.contains("tagged"));
- itemset = tc.getItems("tag3");
- ttc_ensure(itemset.contains("tagged"));
- tagset = tc.getAllTags();
- ttc_ensure(tagset.contains("tag1"));
- ttc_ensure(tagset.contains("tag2"));
- ttc_ensure(tagset.contains("tag3"));
- tagset.clear();
- tagset += "tag1";
- tagset = tc.getCompanionTags(tagset);
- ttc_ensure(!tagset.contains("tag1"));
- ttc_ensure(tagset.contains("tag2"));
- ttc_ensure(tagset.contains("tag3"));
-
- // Try a patch that adds some items
- change = PatchList<string, string>();
- p = Patch<string, string>("tagged1");
- p.add("tag1");
- p.add("tag2");
- p.add("tag4");
- change.addPatch(p);
- tc.applyChange(change);
-
- tagset = tc.getTags("tagged1");
- ttc_ensure(tagset.contains("tag1"));
- ttc_ensure(tagset.contains("tag2"));
- ttc_ensure(!tagset.contains("tag3"));
- ttc_ensure(tagset.contains("tag4"));
- itemset = tc.getItems("tag1");
- ttc_ensure(itemset.contains("tagged1"));
- itemset = tc.getItems("tag2");
- ttc_ensure(itemset.contains("tagged1"));
- itemset = tc.getItems("tag3");
- ttc_ensure(!itemset.contains("tagged1"));
- itemset = tc.getItems("tag4");
- ttc_ensure(!itemset.contains("tagged"));
- ttc_ensure(itemset.contains("tagged1"));
- tagset = tc.getAllTags();
- ttc_ensure(tagset.contains("tag1"));
- ttc_ensure(tagset.contains("tag2"));
- ttc_ensure(tagset.contains("tag3"));
- ttc_ensure(tagset.contains("tag4"));
- tagset.clear();
- tagset += "tag1";
- tagset = tc.getCompanionTags(tagset);
- ttc_ensure(!tagset.contains("tag1"));
- ttc_ensure(tagset.contains("tag2"));
- ttc_ensure(tagset.contains("tag3"));
- ttc_ensure(tagset.contains("tag4"));
-
- // And reverse it
- tc.applyChange(change.getReverse());
-
- itemset = tc.getItems("tag1");
- ttc_ensure(!itemset.contains("tagged1"));
- itemset = tc.getItems("tag2");
- ttc_ensure(!itemset.contains("tagged1"));
- itemset = tc.getItems("tag3");
- ttc_ensure(!itemset.contains("tagged1"));
- ttc_ensure(tc.getItems("tag4") == OpSet<string>());
- tagset = tc.getAllTags();
- ttc_ensure(tagset.contains("tag1"));
- ttc_ensure(tagset.contains("tag2"));
- ttc_ensure(tagset.contains("tag3"));
- ttc_ensure(!tagset.contains("tag4"));
- tagset.clear();
- tagset += "tag1";
- tagset = tc.getCompanionTags(tagset);
- ttc_ensure(!tagset.contains("tag1"));
- ttc_ensure(tagset.contains("tag2"));
- ttc_ensure(tagset.contains("tag3"));
- ttc_ensure(!tagset.contains("tag4"));
-#undef ttc_ensure
-}
+
+// Output a test collection for later testing with test_tagged_collection_ro
+
+#define output_test_collection(x) (__output_test_collection(Location(__FILE__, __LINE__, #x), (x)))
+#define inner_output_test_collection(x) (__output_test_collection(Location(loc, __FILE__, __LINE__, $x), (x)))
+void __output_test_collection(const Location& loc, Consumer<string, string>& tc);
+
+#define test_tagged_collection_ro(x) (__test_tagged_collection_ro(Location(__FILE__, __LINE__, #x), (x)))
+#define inner_test_tagged_collection_ro(x) (__test_tagged_collection_ro(Location(loc, __FILE__, __LINE__, #x), (x)))
+void __test_tagged_collection_ro(const Location& loc, Collection<string, string>& tc);
+
+
+#define test_tagged_collection(x) (__test_tagged_collection(Location(__FILE__, __LINE__, #x), (x)))
+void __test_tagged_collection(const Location& loc, Collection<string, string>& tc);
+
#endif
}
-#define test_tagged_collection(x) (__test_tagged_collection(__FILE__, __LINE__, (x)))
-
/*
namespace tut {
static void aptInit () {
More information about the Debtags-commits
mailing list