[Debtags-commits] [svn] r1581 - in tagcoll/trunk: . debian tagcoll
tools
Enrico Zini
enrico at costa.debian.org
Tue Feb 14 10:13:22 UTC 2006
Author: enrico
Date: Tue Feb 14 10:13:19 2006
New Revision: 1581
Modified:
tagcoll/trunk/ (props changed)
tagcoll/trunk/README
tagcoll/trunk/configure.ac
tagcoll/trunk/debian/changelog
tagcoll/trunk/tagcoll/BasicStringDiskIndex.cc
tagcoll/trunk/tagcoll/BasicStringDiskIndex.h
tagcoll/trunk/tagcoll/IntDiskIndex.cc
tagcoll/trunk/tagcoll/IntDiskIndex.h
tagcoll/trunk/tagcoll/IntIndex.cc
tagcoll/trunk/tagcoll/IntIndex.h
tagcoll/trunk/tagcoll/MMapIndex.cc
tagcoll/trunk/tagcoll/MMapIndex.h
tagcoll/trunk/tagcoll/StringIndex.cc
tagcoll/trunk/tagcoll/StringIndex.h
tagcoll/trunk/tools/tagidx.cc
Log:
r7316 at viaza: enrico | 2006-02-14 01:05:17 +0100
Refactored the MMap indexes to fit more indexes inside the same file
Modified: tagcoll/trunk/README
==============================================================================
--- tagcoll/trunk/README (original)
+++ tagcoll/trunk/README Tue Feb 14 10:13:19 2006
@@ -176,7 +176,6 @@
- Make tagidx usable to power the central database on Alioth:
+ mmap-based fast index
+ patch directory
- -
- Create more IntIndex specific optimized methods instead of using the default
ones
Modified: tagcoll/trunk/configure.ac
==============================================================================
--- tagcoll/trunk/configure.ac (original)
+++ tagcoll/trunk/configure.ac Tue Feb 14 10:13:19 2006
@@ -1,6 +1,6 @@
dnl Process this file with autoconf to produce a configure script.
-AC_INIT(tagcoll, 1.5.2, [enrico at debian.org])
+AC_INIT(tagcoll, 1.6, [enrico at debian.org])
AC_CONFIG_SRCDIR([configure.ac])
AM_CONFIG_HEADER(config.h)
AM_INIT_AUTOMAKE([foreign])
Modified: tagcoll/trunk/debian/changelog
==============================================================================
--- tagcoll/trunk/debian/changelog (original)
+++ tagcoll/trunk/debian/changelog Tue Feb 14 10:13:19 2006
@@ -1,4 +1,4 @@
-tagcoll (1.5.2-1) unstable; urgency=low
+tagcoll (1.6-1) unstable; urgency=low
* New upstream version
* Added grep and items commands
Modified: tagcoll/trunk/tagcoll/BasicStringDiskIndex.cc
==============================================================================
--- tagcoll/trunk/tagcoll/BasicStringDiskIndex.cc (original)
+++ tagcoll/trunk/tagcoll/BasicStringDiskIndex.cc Tue Feb 14 10:13:19 2006
@@ -31,35 +31,13 @@
using namespace Tagcoll;
-BasicStringDiskIndex::BasicStringDiskIndex(const std::string& dir)
- : IntDiskIndex<std::string, std::string>(
- dir + "/pkgtags.idx", dir + "/tagpkgs.idx",
- pkgs, tags,
- pkgs, tags),
- dir(dir), pkgs(dir + "/pkgs.idx"), tags(dir + "/tags.idx")
+BasicStringDiskIndex::BasicStringDiskIndex(const std::string& file)
+ : MasterMMapIndex(file), IntDiskIndex<std::string, std::string>(
+ *this, 0, 1, pkgs, tags, pkgs, tags),
+ pkgs(*this, 2), tags(*this, 3)
{
}
-static void unlink(const std::string& file)
-{
- if (unlink(file.c_str()) == -1)
- throw SystemException(errno, "Deleting file " + file);
-}
-
-void BasicStringDiskIndex::remove(const std::string& dir)
-{
- unlink(dir + "/pkgtags.idx");
- unlink(dir + "/tagpkgs.idx");
- unlink(dir + "/pkgs.idx");
- unlink(dir + "/tags.idx");
- if (rmdir(dir.c_str()) == -1)
- if (errno == ENOTEMPTY)
- cerr << "Directory " + dir + " was not empty: skipping removing it";
- else
- throw SystemException(errno, "Removing directory " + dir);
-}
-
-
void BasicStringDiskIndexer::consumeItemUntagged(const string& item)
{
cache.consume(item);
@@ -74,22 +52,24 @@
this->tags.map(i->c_str());
}
-void BasicStringDiskIndexer::write(const std::string& dir)
+void BasicStringDiskIndexer::write(const std::string& file)
{
- // First write the string indexes
- pkgs.write(dir + "/pkgs.idx");
- tags.write(dir + "/tags.idx");
-
- // Then create string->int mappers that use the string indexes
- StringIndex ipkgs(dir + "/pkgs.idx");
- StringIndex itags(dir + "/tags.idx");
+ MasterMMapIndexer master(file);
// Feed the mapped strings to the IntDiskIndexer
- IntDiskIndexer<string, string> idx(ipkgs, itags);
+ IntDiskIndexer<string, string> idx(pkgs, tags);
cache.output(idx);
- // Finally write the pkg<->tag mappings
- idx.write(dir + "/pkgtags.idx", dir + "/tagpkgs.idx");
+ // Append package index and tag index
+ master.append(idx.pkgIndexer());
+ master.append(idx.tagIndexer());
+
+ // Then append the two string indexes
+ master.append(pkgs);
+ master.append(tags);
+
+ // Finally commit
+ master.commit();
}
@@ -115,19 +95,18 @@
cerr << ", " << *i;
}
-static const string dir = "test-basicdiskindex-collection";
+static const string fname = "test-basicdiskindex-collection";
struct tagcoll_basicstringdiskindex_shar {
tagcoll_basicstringdiskindex_shar()
{
- mkdir(dir.c_str(), 0777);
BasicStringDiskIndexer indexer;
output_test_collection(indexer);
- indexer.write(dir);
+ indexer.write(fname);
}
~tagcoll_basicstringdiskindex_shar()
{
- BasicStringDiskIndex::remove(dir);
+ unlink(fname.c_str());
}
};
TESTGRP(tagcoll_basicstringdiskindex);
@@ -135,7 +114,7 @@
template<> template<>
void to::test<1>()
{
- BasicStringDiskIndex idx(dir);
+ BasicStringDiskIndex idx(fname);
#if 0
cerr << "Items: ";
Modified: tagcoll/trunk/tagcoll/BasicStringDiskIndex.h
==============================================================================
--- tagcoll/trunk/tagcoll/BasicStringDiskIndex.h (original)
+++ tagcoll/trunk/tagcoll/BasicStringDiskIndex.h Tue Feb 14 10:13:19 2006
@@ -40,10 +40,9 @@
* It allows to efficiently query a collection without having to store it all
* into memory.
*/
-class BasicStringDiskIndex : public IntDiskIndex<std::string, std::string>
+class BasicStringDiskIndex : public MasterMMapIndex, public IntDiskIndex<std::string, std::string>
{
protected:
- std::string dir;
StringIndex pkgs;
StringIndex tags;
@@ -54,10 +53,10 @@
* @param dir
* The directory with the index files
*/
- BasicStringDiskIndex(const std::string& dir);
+ BasicStringDiskIndex(const std::string& file);
virtual ~BasicStringDiskIndex() {}
- const std::string& directory() const { return dir; }
+ const std::string& filename() const { return m_filename; }
static void remove(const std::string& dir);
};
@@ -75,7 +74,7 @@
public:
virtual ~BasicStringDiskIndexer() {}
- void write(const std::string& dir);
+ void write(const std::string& file);
};
Modified: tagcoll/trunk/tagcoll/IntDiskIndex.cc
==============================================================================
--- tagcoll/trunk/tagcoll/IntDiskIndex.cc (original)
+++ tagcoll/trunk/tagcoll/IntDiskIndex.cc Tue Feb 14 10:13:19 2006
@@ -44,13 +44,6 @@
}
}
-template<class ITEM, class TAG>
-void IntDiskIndexer<ITEM, TAG>::write(const std::string& pkgidxfile, const std::string& tagidxfile)
-{
- pkgidx.write(pkgidxfile);
- tagidx.write(tagidxfile);
-}
-
#ifndef INSTANTIATING_TEMPLATES
#include <string>
@@ -71,8 +64,7 @@
using namespace tut_tagcoll;
using namespace std;
-static const char* pkgfname = "tagcoll_intdiskindex_pkgs.tmp";
-static const char* tagfname = "tagcoll_intdiskindex_tags.tmp";
+static const char* fname = "tagcoll_intdiskindex.tmp";
class BigMap
{
@@ -144,20 +136,25 @@
tagcoll_intdiskindex_shar()
: conv1(items), conv2(tags), conv3(items), conv4(tags)
{
+ MasterMMapIndexer master(fname);
+
IntDiskIndexer<string, string> indexer(conv1, conv2);
output_test_collection(indexer);
- indexer.write(pkgfname, tagfname);
+
+ master.append(indexer.pkgIndexer());
+ master.append(indexer.tagIndexer());
+ master.commit();
}
~tagcoll_intdiskindex_shar()
{
- unlink("pkgfname.test");
- unlink("tagfname.test");
+ unlink(fname);
}
};
TESTGRP(tagcoll_intdiskindex);
#include <iostream>
+#if 0
static void outts(const OpSet<string>& s)
{
for (OpSet<string>::const_iterator i = s.begin(); i != s.end(); i++)
@@ -166,11 +163,14 @@
else
cerr << ", " << *i;
}
+#endif
template<> template<>
void to::test<1>()
{
- IntDiskIndex<string, string> idx(pkgfname, tagfname, conv1, conv2, conv3, conv4);
+ MasterMMapIndex master(fname);
+
+ IntDiskIndex<string, string> idx(master, 0, 1, conv1, conv2, conv3, conv4);
#if 0
cerr << "Items: ";
Modified: tagcoll/trunk/tagcoll/IntDiskIndex.h
==============================================================================
--- tagcoll/trunk/tagcoll/IntDiskIndex.h (original)
+++ tagcoll/trunk/tagcoll/IntDiskIndex.h Tue Feb 14 10:13:19 2006
@@ -79,9 +79,9 @@
public:
/**
- * Create a new TDBDiskIndex
+ * Create a new IntDiskIndex
*
- * @param pkgidx
+ * @param filename
* The file name of the package index
* @param tagidx
* The file name of the tag index
@@ -96,13 +96,13 @@
* It defaults to true.
*/
IntDiskIndex(
- const std::string& pkgidxfile,
- const std::string& tagidxfile,
+ const MasterMMapIndex& master,
+ int pkgindex, int tagindex,
const Converter<ITEM, int>& fromitem,
const Converter<TAG, int>& fromtag,
const Converter<int, ITEM>& toitem,
const Converter<int, TAG>& totag)
- : pkgidx(pkgidxfile), tagidx(tagidxfile),
+ : pkgidx(master, pkgindex), tagidx(master, tagindex),
fromitem(fromitem), fromtag(fromtag),
toitem(toitem), totag(totag) {}
virtual ~IntDiskIndex() {}
@@ -165,7 +165,8 @@
const Converter<TAG, int>& fromtag);
virtual ~IntDiskIndexer() {}
- void write(const std::string& pkgidx, const std::string& tagidx);
+ const MMapIndexer& pkgIndexer() const { return pkgidx; }
+ const MMapIndexer& tagIndexer() const { return tagidx; }
};
Modified: tagcoll/trunk/tagcoll/IntIndex.cc
==============================================================================
--- tagcoll/trunk/tagcoll/IntIndex.cc (original)
+++ tagcoll/trunk/tagcoll/IntIndex.cc Tue Feb 14 10:13:19 2006
@@ -60,6 +60,8 @@
struct tagcoll_intindex_shar {
tagcoll_intindex_shar() {
+ MasterMMapIndexer master(fname);
+
// Create the index
IntIndexer indexer;
indexer.map(4, 1);
@@ -69,7 +71,8 @@
indexer.map(0, 8);
indexer.map(0, 1);
indexer.map(0, 7);
- indexer.write(fname);
+ master.append(indexer);
+ master.commit();
}
~tagcoll_intindex_shar() {
// Delete the test index
@@ -81,8 +84,10 @@
template<> template<>
void to::test<1>()
{
+ MasterMMapIndex master(fname);
+
// Read the index
- IntIndex index(fname);
+ IntIndex index(master, 0);
// Check the number of mapped items
ensure_equals(index.size(), 5u);
Modified: tagcoll/trunk/tagcoll/IntIndex.h
==============================================================================
--- tagcoll/trunk/tagcoll/IntIndex.h (original)
+++ tagcoll/trunk/tagcoll/IntIndex.h Tue Feb 14 10:13:19 2006
@@ -55,7 +55,7 @@
inline size_t ofs(int val) const { return buf()[val]; }
public:
- IntIndex(const std::string& filename) : MMapIndex(filename) {}
+ IntIndex(const MasterMMapIndex& master, int idx) : MMapIndex(master, idx) {}
const int* data(int val) const { return (val >= 0 && (unsigned)val < size()) ? buf() + ofs(val) + 1 : 0; }
size_t size(int val) const { return (val >= 0 && (unsigned)val < size()) ? buf()[ofs(val)] : 0; }
Modified: tagcoll/trunk/tagcoll/MMapIndex.cc
==============================================================================
--- tagcoll/trunk/tagcoll/MMapIndex.cc (original)
+++ tagcoll/trunk/tagcoll/MMapIndex.cc Tue Feb 14 10:13:19 2006
@@ -19,6 +19,7 @@
*/
#include <tagcoll/MMapIndex.h>
+#include <tagcoll/stringf.h>
#include <stdlib.h>
#include <stdio.h>
@@ -32,8 +33,9 @@
using namespace std;
using namespace Tagcoll;
+using namespace stringf;
-MMapIndex::MMapIndex(const std::string& filename) : m_filename(filename), m_fd(-1), m_buf(0)
+MasterMMapIndex::MasterMMapIndex(const std::string& filename) : m_filename(filename), m_fd(-1), m_buf(0)
{
// Open the file
if ((m_fd = open(m_filename.c_str(), O_RDONLY)) == -1)
@@ -55,66 +57,56 @@
}
}
-MMapIndex::~MMapIndex()
+MasterMMapIndex::~MasterMMapIndex()
{
// Unmap and close the file
munmap((void*)m_buf, m_size);
close(m_fd);
}
-void MMapIndexer::write(const std::string& filename)
+MMapIndex::MMapIndex(const MasterMMapIndex& master, size_t idx)
+ : m_master(master), m_buf(m_master.m_buf), m_size(*(int*)m_buf)
+{
+ // Jump to the idx-th index
+ for (size_t i = 0; i < idx; i++)
+ {
+ m_buf = m_buf + m_size + sizeof(int);
+ if (m_buf > master.m_buf + master.m_size)
+ throw NotFoundException("looking for subindex " + fmt(idx) + " in " + master.m_filename);
+ m_size = *(int*)m_buf;
+ }
+
+ // Work with the subindex data, skipping the subindex chain size word
+ m_buf += sizeof(int);
+}
+
+
+MasterMMapIndexer::MasterMMapIndexer(const std::string& filename)
+ : finalname(filename)
{
// Create a temporary file next to the target file
char name[filename.size() + 8];
memcpy(name, filename.data(), filename.size());
memcpy(name + filename.size(), ".XXXXXX", 8);
- int fd = mkstemp(name);
+
+ fd = mkstemp(name);
+ tmpname = name;
if (fd == -1)
- throw SystemException(errno, "creating temporary file " + filename + ".XXXXXX");
+ throw SystemException(errno, "creating temporary file " + tmpname);
-#if 0
- int size = encodedSize();
- int buf[size];
- encode(buf);
-#else
- // Enlarge the temporary file to fit the data
- int size = encodedSize();
- if (lseek(fd, size - 1, SEEK_SET) == -1)
- {
- close(fd);
- unlink(name);
- throw SystemException(errno, string("enlarging file (seek) ") + name);
- }
+}
- // Write one byte at the end to actually resize the file
- if (::write(fd, &fd, 1) == -1)
- {
+MasterMMapIndexer::~MasterMMapIndexer()
+{
+ if (fd != -1)
close(fd);
- unlink(name);
- throw SystemException(errno, string("enlarging file (write) ") + name);
- }
- // Map the file into memory
- void* buf = mmap(0, size, PROT_WRITE, MAP_SHARED, fd, 0);
- if (buf == MAP_FAILED)
- {
- close(fd);
- unlink(name);
- throw SystemException(errno, string("mmapping file ") + name);
- }
-
- // Write the index data to the file
- encode((char*)buf);
-
- // Unmap the file
- if (munmap(buf, size) == -1)
- {
- close(fd);
- unlink(name);
- throw SystemException(errno, string("munmapping file ") + name);
- }
-#endif
+ if (!tmpname.empty())
+ unlink(tmpname.c_str());
+}
+void MasterMMapIndexer::commit()
+{
// Give the file the right permissions according to umask
// Read the current umask
@@ -122,21 +114,32 @@
umask(mask);
// Set the file permissions
if (fchmod(fd, 0666 & ~mask) == -1)
- {
- close(fd);
- unlink(name);
- throw SystemException(errno, string("setting permissions on file ") + name);
- }
+ throw SystemException(errno, string("setting permissions on file ") + tmpname);
// Close the file
close(fd);
+ fd = -1;
// Rename to the final file name, performing the atomic update
- if (rename(name, filename.c_str()) == -1)
- {
- unlink(name);
- throw SystemException(errno, string("renaming file ") + name + " into " + filename);
- }
+ if (rename(tmpname.c_str(), finalname.c_str()) == -1)
+ throw SystemException(errno, string("renaming file ") + tmpname + " into " + finalname);
+ tmpname.clear();
+}
+
+void MasterMMapIndexer::append(const MMapIndexer& idx)
+{
+ // Get the encoded data
+ int size = idx.encodedSize();
+ char buf[size];
+ idx.encode(buf);
+
+ // Write one byte at the end to actually resize the file
+ if (::write(fd, &size, sizeof(int)) != sizeof(int))
+ throw SystemException(errno, "writing size word to file " + tmpname);
+
+ // Write the index data
+ if (::write(fd, buf, size) != size)
+ throw SystemException(errno, "writing subindex to file " + tmpname);
}
@@ -152,7 +155,7 @@
class TestIndex : public MMapIndex
{
public:
- TestIndex(const std::string& filename) : MMapIndex(filename) {}
+ TestIndex(MasterMMapIndex& master, int idx) : MMapIndex(master, idx) {}
const char* get() const { return m_buf; }
unsigned int size() const { return m_size; }
@@ -179,8 +182,14 @@
struct tagcoll_mmapindex_shar {
tagcoll_mmapindex_shar() {
- TestIndexer indexer("pippo");
- indexer.write(fname);
+ TestIndexer index1("pippo");
+ TestIndexer index2("pluto");
+ TestIndexer index3("paperino");
+ MasterMMapIndexer master(fname);
+ master.append(index1);
+ master.append(index2);
+ master.append(index3);
+ master.commit();
}
~tagcoll_mmapindex_shar() {
// Delete the test index
@@ -193,10 +202,19 @@
template<> template<>
void to::test<1>()
{
- TestIndex index(fname);
+ MasterMMapIndex master(fname);
- ensure_equals(index.size(), 6u);
- ensure_equals(string(index.get()), string("pippo"));
+ TestIndex index1(master, 0);
+ gen_ensure_equals(index1.size(), 6u);
+ gen_ensure_equals(string(index1.get()), string("pippo"));
+
+ TestIndex index2(master, 1);
+ gen_ensure_equals(index2.size(), 6u);
+ gen_ensure_equals(string(index2.get()), string("pluto"));
+
+ TestIndex index3(master, 2);
+ gen_ensure_equals(index3.size(), 9u);
+ gen_ensure_equals(string(index3.get()), string("paperino"));
}
}
Modified: tagcoll/trunk/tagcoll/MMapIndex.h
==============================================================================
--- tagcoll/trunk/tagcoll/MMapIndex.h (original)
+++ tagcoll/trunk/tagcoll/MMapIndex.h Tue Feb 14 10:13:19 2006
@@ -30,10 +30,17 @@
namespace Tagcoll
{
+class MMapIndex;
+
/**
* Performs the memory management and mmapping tasks for mmapped indexes.
+ *
+ * One MMap can contain many indexes. Indexes come chained one after the
+ * other, prefixed by an int that specifies their length:
+ *
+ * [size of index 1][index1][size of index 2][index]...
*/
-class MMapIndex
+class MasterMMapIndex
{
protected:
std::string m_filename;
@@ -42,12 +49,25 @@
const char* m_buf;
public:
- MMapIndex(const std::string& filename);
- ~MMapIndex();
+ MasterMMapIndex(const std::string& filename);
+ ~MasterMMapIndex();
+
+ friend class MMapIndex;
+};
+
+class MMapIndex
+{
+protected:
+ const MasterMMapIndex& m_master;
+ const char* m_buf;
+ size_t m_size;
+
+public:
+ MMapIndex(const MasterMMapIndex& master, size_t idx);
};
/**
- * Simple framework for implementing indexers.
+ * Interface for indexers.
*/
class MMapIndexer
{
@@ -60,11 +80,32 @@
/// Write the index data in the given buffer, which should be at least
/// encodedSize bytes
virtual void encode(char* buf) const = 0;
+};
+
+/**
+ * Master index writer. It allows to write many indexes in the same file,
+ * atomically: the file will be created as a tempfile and atomically renamed to
+ * the destination filename on class destruction.
+ */
+class MasterMMapIndexer
+{
+protected:
+ std::string finalname;
+ std::string tmpname;
+ int fd;
+
+public:
+ MasterMMapIndexer(const std::string& filename);
+ ~MasterMMapIndexer();
- /// Atomically write the index to the given file
- void write(const std::string& filename);
+ /// Close the file and perform the final rename
+ void commit();
+
+ /// Append one subindex
+ void append(const MMapIndexer& idx);
};
+
};
// vim:set ts=4 sw=4:
Modified: tagcoll/trunk/tagcoll/StringIndex.cc
==============================================================================
--- tagcoll/trunk/tagcoll/StringIndex.cc (original)
+++ tagcoll/trunk/tagcoll/StringIndex.cc Tue Feb 14 10:13:19 2006
@@ -45,13 +45,52 @@
return begin;
}
+
+int StringIndexer::operator()(const std::string& item) const
+{
+ int begin, end;
+
+ /* Binary search */
+ begin = -1, end = data.size();
+ while (end - begin > 1)
+ {
+ int cur = (end + begin) / 2;
+ if (data[cur] > item)
+ end = cur;
+ else
+ begin = cur;
+ }
+
+ if (begin == -1 || data[begin] != item)
+ //throw NotFoundException(string("looking for the ID of string ") + str);
+ return -1;
+ else
+ return begin;
+}
+
+void StringIndexer::map(const std::string& str)
+{
+ if ((*this)(str) != -1)
+ return;
+
+ // Insertion sort
+ int pos = data.size();
+ data.push_back(string());
+ for (; pos > 0; pos--)
+ if (data[pos - 1] > str)
+ data[pos] = data[pos - 1];
+ else
+ break;
+ data[pos] = str;
+}
+
int StringIndexer::encodedSize() const
{
// First the size of the offset array
- int bufsize = size() * sizeof(int);
+ int bufsize = data.size() * sizeof(int);
// Then the size of all the 0-terminated strings
- for (const_iterator i = begin(); i != end(); i++)
+ for (vector<string>::const_iterator i = data.begin(); i != data.end(); i++)
bufsize += i->size() + 1;
// Then the int with the number of items
@@ -60,9 +99,9 @@
void StringIndexer::encode(char* buf) const
{
- int pos = size() * sizeof(int);
+ int pos = data.size() * sizeof(int);
int idx = 0;
- for (const_iterator i = begin(); i != end(); i++)
+ for (vector<string>::const_iterator i = data.begin(); i != data.end(); i++)
{
((int*)buf)[idx++] = pos;
memcpy(buf + pos, i->c_str(), i->size() + 1);
@@ -83,6 +122,8 @@
struct tagcoll_stringindex_shar {
tagcoll_stringindex_shar() {
// Create the index
+ MasterMMapIndexer master(fname);
+
StringIndexer indexer;
indexer.map("pizza");
indexer.map("spaghetti");
@@ -91,7 +132,10 @@
indexer.map("polpettone");
indexer.map("friggione");
indexer.map("arrosto");
- indexer.write(fname);
+ indexer.map("pizza");
+ indexer.map("lasagne");
+ master.append(indexer);
+ master.commit();
}
~tagcoll_stringindex_shar() {
// Delete the test index
@@ -103,8 +147,10 @@
template<> template<>
void to::test<1>()
{
+ MasterMMapIndex master(fname);
+
// Read the index
- StringIndex index(fname);
+ StringIndex index(master, 0);
// Check the number of mapped items
ensure_equals(index.size(), 7u);
Modified: tagcoll/trunk/tagcoll/StringIndex.h
==============================================================================
--- tagcoll/trunk/tagcoll/StringIndex.h (original)
+++ tagcoll/trunk/tagcoll/StringIndex.h Tue Feb 14 10:13:19 2006
@@ -49,7 +49,7 @@
int offset(int val) const { return ((const int*)m_buf)[val]; }
public:
- StringIndex(const std::string& filename) : MMapIndex(filename) {}
+ StringIndex(const MasterMMapIndex& master, int idx) : MMapIndex(master, idx) {}
virtual ~StringIndex() {}
virtual std::string operator()(const int& item) const { return data(item); }
@@ -63,14 +63,17 @@
/**
* Creates an on-disk index to use for IntIndex
*/
-class StringIndexer : public std::set<std::string>, public MMapIndexer
+class StringIndexer : public MMapIndexer, public Converter<int, std::string>, public Converter<std::string, int>
{
+protected:
+ std::vector<std::string> data;
+
public:
+ virtual std::string operator()(const int& item) const { return data[item]; }
+ virtual int operator()(const std::string& item) const;
+
/// Store the key->val mapping into the indexer
- void map(const std::string& str)
- {
- insert(str);
- }
+ void map(const std::string& str);
/// Return the size of the encoded index data
int encodedSize() const;
Modified: tagcoll/trunk/tools/tagidx.cc
==============================================================================
--- tagcoll/trunk/tools/tagidx.cc (original)
+++ tagcoll/trunk/tools/tagidx.cc Tue Feb 14 10:13:19 2006
@@ -27,13 +27,13 @@
#ifdef HAVE_CONFIG_H
#include <config.h>
-#define APPNAME PACKAGE
#else
#warning No config.h found: using fallback values
-#define APPNAME __FILE__
#define VERSION "unknown"
#endif
+#define APPNAME "tagidx"
+
#include "CommandlineParser.h"
#include <tagcoll/BasicStringDiskIndex.h>
#include <tagcoll/ItemGrouper.h>
@@ -46,25 +46,16 @@
#include <pwd.h>
#include <dirent.h>
#include <time.h>
+#include <stdlib.h> // getenv
#include <errno.h>
#include <tagcoll/StdioParserInput.h>
#include <tagcoll/TextFormat.h>
#include <iostream>
+#include <sstream>
#if 0
-#include <stdio.h>
-
-#include <stdlib.h> // getenv
-
-
-#include <tagcoll/stringf.h>
-#include <tagcoll/Exception.h>
-
-#include <tagcoll/CardinalityStore.h>
-#include <tagcoll/SmartHierarchy.h>
-
#include <tagcoll/Consumer.h>
#include <tagcoll/Filter.h>
#include <tagcoll/InputMerger.h>
@@ -72,16 +63,22 @@
#include <tagcoll/Filters.h>
#include <tagcoll/Patches.h>
#include <tagcoll/DerivedTags.h>
-
-#include <tagcoll/Serializer.h>
#include <tagcoll/Expression.h>
-
#include <algorithm>
#endif
using namespace std;
using namespace Tagcoll;
+string get_username()
+{
+ struct passwd* pw = getpwuid(getuid());
+ if (pw != NULL)
+ return pw->pw_name;
+ else
+ return string();
+}
+
void mkpath(const std::string& dir)
{
size_t sep = dir.rfind('/');
@@ -243,12 +240,12 @@
BasicStringDiskIndex ro_idx;
public:
- Index(const std::string& dir)
+ Index(const std::string& file)
: PatchCollection<std::string, std::string>(ro_idx),
- ro_idx(dir)
+ ro_idx(file)
{
// Read the patches
- setChanges(readPatchesFromDir(dir + "/patches"));
+ setChanges(readPatchesFromDir(file + ".patches"));
}
virtual ~Index() {}
@@ -256,7 +253,7 @@
void addPatch(const PatchList<std::string, std::string>& patch, const std::string& name, bool overwrite = false)
{
TrivialConverter<string, string> conv;
- string patchfile = ro_idx.directory() + "/patches/" + name;
+ string patchfile = ro_idx.filename() + ".patches/" + name;
int fd = open(patchfile.c_str(), O_CREAT | O_WRONLY | (overwrite ? O_TRUNC : O_EXCL), 0666);
if (fd == -1)
@@ -272,9 +269,9 @@
addChanges(patch);
}
- static void remove(const std::string& dir)
+ static void remove(const std::string& file)
{
- string patchdir = dir + "/patches";
+ string patchdir = file + ".patches";
removePatchesFromDir(patchdir);
if (rmdir(patchdir.c_str()) == -1)
@@ -283,7 +280,7 @@
else
throw SystemException(errno, "Removing directory " + patchdir);
- BasicStringDiskIndex::remove(dir);
+ unlink(file.c_str());
}
};
@@ -292,15 +289,34 @@
public:
virtual ~Indexer() {}
- void write(const std::string& dir)
+ void write(const std::string& file)
{
- BasicStringDiskIndexer::write(dir);
+ BasicStringDiskIndexer::write(file);
// Create the patches directory if it does not exist yet
- mkpath(dir + "/patches");
+ mkpath(file + ".patches");
}
};
+void addPatch(Index& idx, PatchList<string, string>& patch)
+{
+ static int seq = 1;
+
+ if (patch.size() > 0)
+ {
+ // Compute a default name from username, current time and pid
+ char timestr[50];
+ time_t t = time(NULL);
+ strftime(timestr, 50, "%Y%m%d-%H%M%S", localtime(&t));
+
+ std::stringstream ss;
+ ss << get_username() << "-" << timestr << "-" << getpid() << "-" << seq++;
+
+ idx.addPatch(patch, ss.str(), false);
+ } else
+ cerr << "ignoring request to add an empty patch" << endl;
+}
+
#if 0
PatchList<string, string> readPatches(const string& file)
throw (FileException, ParserException)
@@ -641,16 +657,7 @@
}
}
-string get_username()
-{
- struct passwd* pw = getpwuid(getuid());
- if (pw != NULL)
- return pw->pw_name;
- else
- return string();
-}
-
-string indexdir(CommandlineParserWithCommand& opts)
+string indexname(CommandlineParserWithCommand& opts)
{
if (opts.get("index").defined())
return opts.get("index").stringVal();
@@ -752,8 +759,7 @@
{
case CREATE:
{
- string dir = indexdir(opts);
- mkpath(dir);
+ string fname = indexname(opts);
Indexer indexer;
@@ -763,14 +769,14 @@
else
readCollection("-", indexer);
- indexer.write(dir);
+ indexer.write(fname);
break;
}
case ADDPATCH:
{
- string dir = indexdir(opts);
- Index idx(dir);
+ string fname = indexname(opts);
+ Index idx(fname);
if (args.hasNext())
{
@@ -792,50 +798,42 @@
TrivialConverter<string, string> conv;
StdioParserInput input(stdin, "(stdin)");
PatchList<string, string> patch = TextFormat<string, string>::parsePatch(conv, conv, input);
- // Compute a default name from username and current time
- char name[50];
- time_t t = time(NULL);
- strftime(name, 50, "%Y%m%d-%H%M%S", localtime(&t));
- if (patch.size() > 0)
- {
- idx.addPatch(patch, get_username() + "-" + name, opts.get("force").defined());
- } else
- cerr << "not adding empty patch" << endl;
+
+ addPatch(idx, patch);
}
break;
}
case COMPACT:
{
- string dir = indexdir(opts);
+ string fname = indexname(opts);
Indexer indexer;
// Output the patched collection to the indexer
{
- Index idx(dir);
+ Index idx(fname);
idx.output(indexer);
}
// Delete the old index and patches
- Index::remove(dir);
+ Index::remove(fname);
// Write the new index
- mkpath(dir);
- indexer.write(dir);
+ indexer.write(fname);
break;
}
case REMOVE:
{
- string dir = indexdir(opts);
- Index::remove(dir);
+ string fname = indexname(opts);
+ Index::remove(fname);
break;
}
case CAT:
{
- string dir = indexdir(opts);
- Index idx(dir);
+ string fname = indexname(opts);
+ Index idx(fname);
output(opts, idx);
break;
More information about the Debtags-commits
mailing list