[Debtags-commits] [svn] r1581 - in tagcoll/trunk: . debian tagcoll tools

Enrico Zini enrico at costa.debian.org
Tue Feb 14 10:13:22 UTC 2006


Author: enrico
Date: Tue Feb 14 10:13:19 2006
New Revision: 1581

Modified:
   tagcoll/trunk/   (props changed)
   tagcoll/trunk/README
   tagcoll/trunk/configure.ac
   tagcoll/trunk/debian/changelog
   tagcoll/trunk/tagcoll/BasicStringDiskIndex.cc
   tagcoll/trunk/tagcoll/BasicStringDiskIndex.h
   tagcoll/trunk/tagcoll/IntDiskIndex.cc
   tagcoll/trunk/tagcoll/IntDiskIndex.h
   tagcoll/trunk/tagcoll/IntIndex.cc
   tagcoll/trunk/tagcoll/IntIndex.h
   tagcoll/trunk/tagcoll/MMapIndex.cc
   tagcoll/trunk/tagcoll/MMapIndex.h
   tagcoll/trunk/tagcoll/StringIndex.cc
   tagcoll/trunk/tagcoll/StringIndex.h
   tagcoll/trunk/tools/tagidx.cc
Log:
 r7316 at viaza:  enrico | 2006-02-14 01:05:17 +0100
 Refactored the MMap indexes to fit more indexes inside the same file


Modified: tagcoll/trunk/README
==============================================================================
--- tagcoll/trunk/README	(original)
+++ tagcoll/trunk/README	Tue Feb 14 10:13:19 2006
@@ -176,7 +176,6 @@
  - Make tagidx usable to power the central database on Alioth:
     + mmap-based fast index
     + patch directory
-    - 
 
  - Create more IntIndex specific optimized methods instead of using the default
    ones

Modified: tagcoll/trunk/configure.ac
==============================================================================
--- tagcoll/trunk/configure.ac	(original)
+++ tagcoll/trunk/configure.ac	Tue Feb 14 10:13:19 2006
@@ -1,6 +1,6 @@
 dnl Process this file with autoconf to produce a configure script.
 
-AC_INIT(tagcoll, 1.5.2, [enrico at debian.org])
+AC_INIT(tagcoll, 1.6, [enrico at debian.org])
 AC_CONFIG_SRCDIR([configure.ac])
 AM_CONFIG_HEADER(config.h)
 AM_INIT_AUTOMAKE([foreign])

Modified: tagcoll/trunk/debian/changelog
==============================================================================
--- tagcoll/trunk/debian/changelog	(original)
+++ tagcoll/trunk/debian/changelog	Tue Feb 14 10:13:19 2006
@@ -1,4 +1,4 @@
-tagcoll (1.5.2-1) unstable; urgency=low
+tagcoll (1.6-1) unstable; urgency=low
 
   * New upstream version
      * Added grep and items commands

Modified: tagcoll/trunk/tagcoll/BasicStringDiskIndex.cc
==============================================================================
--- tagcoll/trunk/tagcoll/BasicStringDiskIndex.cc	(original)
+++ tagcoll/trunk/tagcoll/BasicStringDiskIndex.cc	Tue Feb 14 10:13:19 2006
@@ -31,35 +31,13 @@
 using namespace Tagcoll;
 
 
-BasicStringDiskIndex::BasicStringDiskIndex(const std::string& dir)
-	: IntDiskIndex<std::string, std::string>(
-			  dir + "/pkgtags.idx", dir + "/tagpkgs.idx",
-			  pkgs, tags,
-			  pkgs, tags),
-	  dir(dir), pkgs(dir + "/pkgs.idx"), tags(dir + "/tags.idx")
+BasicStringDiskIndex::BasicStringDiskIndex(const std::string& file)
+	: MasterMMapIndex(file), IntDiskIndex<std::string, std::string>(
+			*this, 0, 1, pkgs, tags, pkgs, tags),
+	  pkgs(*this, 2), tags(*this, 3)
 {
 }
 
-static void unlink(const std::string& file)
-{
-	if (unlink(file.c_str()) == -1)
-		throw SystemException(errno, "Deleting file " + file);
-}
-
-void BasicStringDiskIndex::remove(const std::string& dir)
-{
-	unlink(dir + "/pkgtags.idx");
-	unlink(dir + "/tagpkgs.idx");
-	unlink(dir + "/pkgs.idx");
-	unlink(dir + "/tags.idx");
-	if (rmdir(dir.c_str()) == -1)
-		if (errno == ENOTEMPTY)
-			cerr << "Directory " + dir + " was not empty: skipping removing it";
-		else
-			throw SystemException(errno, "Removing directory " + dir);
-}
-
-
 void BasicStringDiskIndexer::consumeItemUntagged(const string& item)
 {
 	cache.consume(item);
@@ -74,22 +52,24 @@
 		this->tags.map(i->c_str());
 }
 
-void BasicStringDiskIndexer::write(const std::string& dir)
+void BasicStringDiskIndexer::write(const std::string& file)
 {
-	// First write the string indexes
-	pkgs.write(dir + "/pkgs.idx");
-	tags.write(dir + "/tags.idx");
-
-	// Then create string->int mappers that use the string indexes
-	StringIndex ipkgs(dir + "/pkgs.idx");
-	StringIndex itags(dir + "/tags.idx");
+	MasterMMapIndexer master(file);
 
 	// Feed the mapped strings to the IntDiskIndexer
-	IntDiskIndexer<string, string> idx(ipkgs, itags);
+	IntDiskIndexer<string, string> idx(pkgs, tags);
 	cache.output(idx);
 
-	// Finally write the pkg<->tag mappings
-	idx.write(dir + "/pkgtags.idx", dir + "/tagpkgs.idx");
+	// Append package index and tag index
+	master.append(idx.pkgIndexer());
+	master.append(idx.tagIndexer());
+
+	// Then append the two string indexes
+	master.append(pkgs);
+	master.append(tags);
+
+	// Finally commit
+	master.commit();
 }
 
 
@@ -115,19 +95,18 @@
 			cerr << ", " << *i;
 }
 
-static const string dir = "test-basicdiskindex-collection";
+static const string fname = "test-basicdiskindex-collection";
 
 struct tagcoll_basicstringdiskindex_shar {
 	tagcoll_basicstringdiskindex_shar()
 	{
-		mkdir(dir.c_str(), 0777);
 		BasicStringDiskIndexer indexer;
 		output_test_collection(indexer);
-		indexer.write(dir);
+		indexer.write(fname);
 	}
 	~tagcoll_basicstringdiskindex_shar()
 	{
-		BasicStringDiskIndex::remove(dir);
+		unlink(fname.c_str());
 	}
 };
 TESTGRP(tagcoll_basicstringdiskindex);
@@ -135,7 +114,7 @@
 template<> template<>
 void to::test<1>()
 {
-	BasicStringDiskIndex idx(dir);
+	BasicStringDiskIndex idx(fname);
 
 #if 0
 	cerr << "Items: ";

Modified: tagcoll/trunk/tagcoll/BasicStringDiskIndex.h
==============================================================================
--- tagcoll/trunk/tagcoll/BasicStringDiskIndex.h	(original)
+++ tagcoll/trunk/tagcoll/BasicStringDiskIndex.h	Tue Feb 14 10:13:19 2006
@@ -40,10 +40,9 @@
  * It allows to efficiently query a collection without having to store it all
  * into memory.
  */
-class BasicStringDiskIndex : public IntDiskIndex<std::string, std::string>
+class BasicStringDiskIndex : public MasterMMapIndex, public IntDiskIndex<std::string, std::string>
 {
 protected:
-	std::string dir;
 	StringIndex pkgs;
 	StringIndex tags;
 
@@ -54,10 +53,10 @@
 	 * @param dir
 	 *   The directory with the index files
 	 */
-	BasicStringDiskIndex(const std::string& dir);
+	BasicStringDiskIndex(const std::string& file);
 	virtual ~BasicStringDiskIndex() {}
 
-	const std::string& directory() const { return dir; }
+	const std::string& filename() const { return m_filename; }
 
 	static void remove(const std::string& dir);
 };
@@ -75,7 +74,7 @@
 public:
 	virtual ~BasicStringDiskIndexer() {}
 
-	void write(const std::string& dir);
+	void write(const std::string& file);
 };
 
 

Modified: tagcoll/trunk/tagcoll/IntDiskIndex.cc
==============================================================================
--- tagcoll/trunk/tagcoll/IntDiskIndex.cc	(original)
+++ tagcoll/trunk/tagcoll/IntDiskIndex.cc	Tue Feb 14 10:13:19 2006
@@ -44,13 +44,6 @@
 	}
 }
 
-template<class ITEM, class TAG>
-void IntDiskIndexer<ITEM, TAG>::write(const std::string& pkgidxfile, const std::string& tagidxfile)
-{
-	pkgidx.write(pkgidxfile);
-	tagidx.write(tagidxfile);
-}
-
 
 #ifndef INSTANTIATING_TEMPLATES
 #include <string>
@@ -71,8 +64,7 @@
 using namespace tut_tagcoll;
 using namespace std;
 
-static const char* pkgfname = "tagcoll_intdiskindex_pkgs.tmp";
-static const char* tagfname = "tagcoll_intdiskindex_tags.tmp";
+static const char* fname = "tagcoll_intdiskindex.tmp";
 
 class BigMap
 {
@@ -144,20 +136,25 @@
 	tagcoll_intdiskindex_shar()
 		: conv1(items), conv2(tags), conv3(items), conv4(tags)
 	{
+		MasterMMapIndexer master(fname);
+
 		IntDiskIndexer<string, string> indexer(conv1, conv2);
 		output_test_collection(indexer);
-		indexer.write(pkgfname, tagfname);
+		
+		master.append(indexer.pkgIndexer());
+		master.append(indexer.tagIndexer());
+		master.commit();
 	}
 	~tagcoll_intdiskindex_shar()
 	{
-		unlink("pkgfname.test");
-		unlink("tagfname.test");
+		unlink(fname);
 	}
 };
 TESTGRP(tagcoll_intdiskindex);
 
 #include <iostream>
 
+#if 0
 static void outts(const OpSet<string>& s)
 {
 	for (OpSet<string>::const_iterator i = s.begin(); i != s.end(); i++)
@@ -166,11 +163,14 @@
 		else
 			cerr << ", " << *i;
 }
+#endif
 
 template<> template<>
 void to::test<1>()
 {
-	IntDiskIndex<string, string> idx(pkgfname, tagfname, conv1, conv2, conv3, conv4);
+	MasterMMapIndex master(fname);
+
+	IntDiskIndex<string, string> idx(master, 0, 1, conv1, conv2, conv3, conv4);
 
 #if 0
 	cerr << "Items: ";

Modified: tagcoll/trunk/tagcoll/IntDiskIndex.h
==============================================================================
--- tagcoll/trunk/tagcoll/IntDiskIndex.h	(original)
+++ tagcoll/trunk/tagcoll/IntDiskIndex.h	Tue Feb 14 10:13:19 2006
@@ -79,9 +79,9 @@
 
 public:
 	/**
-	 * Create a new TDBDiskIndex
+	 * Create a new IntDiskIndex
 	 *
-	 * @param pkgidx
+	 * @param filename
 	 *   The file name of the package index
 	 * @param tagidx
 	 *   The file name of the tag index
@@ -96,13 +96,13 @@
 	 *   It defaults to true.
 	 */
 	IntDiskIndex(
-			const std::string& pkgidxfile,
-			const std::string& tagidxfile,
+			const MasterMMapIndex& master,
+			int pkgindex, int tagindex,
 			const Converter<ITEM, int>& fromitem,
 			const Converter<TAG, int>& fromtag,
 			const Converter<int, ITEM>& toitem,
 			const Converter<int, TAG>& totag)
-		:   pkgidx(pkgidxfile), tagidx(tagidxfile),
+		:   pkgidx(master, pkgindex), tagidx(master, tagindex),
 			fromitem(fromitem), fromtag(fromtag),
 			toitem(toitem), totag(totag) {}
 	virtual ~IntDiskIndex() {}
@@ -165,7 +165,8 @@
 			const Converter<TAG, int>& fromtag);
 	virtual ~IntDiskIndexer() {}
 
-	void write(const std::string& pkgidx, const std::string& tagidx);
+	const MMapIndexer& pkgIndexer() const { return pkgidx; }
+	const MMapIndexer& tagIndexer() const { return tagidx; }
 };
 
 

Modified: tagcoll/trunk/tagcoll/IntIndex.cc
==============================================================================
--- tagcoll/trunk/tagcoll/IntIndex.cc	(original)
+++ tagcoll/trunk/tagcoll/IntIndex.cc	Tue Feb 14 10:13:19 2006
@@ -60,6 +60,8 @@
 
 struct tagcoll_intindex_shar {
 	tagcoll_intindex_shar() {
+		MasterMMapIndexer master(fname);
+
 		// Create the index
 		IntIndexer indexer;
 		indexer.map(4, 1);
@@ -69,7 +71,8 @@
 		indexer.map(0, 8);
 		indexer.map(0, 1);
 		indexer.map(0, 7);
-		indexer.write(fname);
+		master.append(indexer);
+		master.commit();
 	}
 	~tagcoll_intindex_shar() {
 		// Delete the test index
@@ -81,8 +84,10 @@
 template<> template<>
 void to::test<1>()
 {
+	MasterMMapIndex master(fname);
+
 	// Read the index
-	IntIndex index(fname);
+	IntIndex index(master, 0);
 
 	// Check the number of mapped items
 	ensure_equals(index.size(), 5u);

Modified: tagcoll/trunk/tagcoll/IntIndex.h
==============================================================================
--- tagcoll/trunk/tagcoll/IntIndex.h	(original)
+++ tagcoll/trunk/tagcoll/IntIndex.h	Tue Feb 14 10:13:19 2006
@@ -55,7 +55,7 @@
 	inline size_t ofs(int val) const { return buf()[val]; }
 
 public:
-	IntIndex(const std::string& filename) : MMapIndex(filename) {}
+	IntIndex(const MasterMMapIndex& master, int idx) : MMapIndex(master, idx) {}
 
 	const int* data(int val) const { return (val >= 0 && (unsigned)val < size()) ? buf() + ofs(val) + 1 : 0; }
 	size_t size(int val) const { return (val >= 0 && (unsigned)val < size()) ? buf()[ofs(val)] : 0; }

Modified: tagcoll/trunk/tagcoll/MMapIndex.cc
==============================================================================
--- tagcoll/trunk/tagcoll/MMapIndex.cc	(original)
+++ tagcoll/trunk/tagcoll/MMapIndex.cc	Tue Feb 14 10:13:19 2006
@@ -19,6 +19,7 @@
  */
 
 #include <tagcoll/MMapIndex.h>
+#include <tagcoll/stringf.h>
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -32,8 +33,9 @@
 
 using namespace std;
 using namespace Tagcoll;
+using namespace stringf;
 
-MMapIndex::MMapIndex(const std::string& filename) : m_filename(filename), m_fd(-1), m_buf(0)
+MasterMMapIndex::MasterMMapIndex(const std::string& filename) : m_filename(filename), m_fd(-1), m_buf(0)
 {
 	// Open the file
 	if ((m_fd = open(m_filename.c_str(), O_RDONLY)) == -1)
@@ -55,66 +57,56 @@
 	}
 }
 
-MMapIndex::~MMapIndex()
+MasterMMapIndex::~MasterMMapIndex()
 {
 	// Unmap and close the file
 	munmap((void*)m_buf, m_size);
 	close(m_fd);
 }
 
-void MMapIndexer::write(const std::string& filename)
+MMapIndex::MMapIndex(const MasterMMapIndex& master, size_t idx)
+	: m_master(master), m_buf(m_master.m_buf), m_size(*(int*)m_buf)
+{
+	// Jump to the idx-th index
+	for (size_t i = 0; i < idx; i++)
+	{
+		m_buf = m_buf + m_size + sizeof(int);
+		if (m_buf > master.m_buf + master.m_size)
+			throw NotFoundException("looking for subindex " + fmt(idx) + " in " + master.m_filename);
+		m_size = *(int*)m_buf;
+	}
+	
+	// Work with the subindex data, skipping the subindex chain size word
+	m_buf += sizeof(int);
+}
+
+
+MasterMMapIndexer::MasterMMapIndexer(const std::string& filename)
+	: finalname(filename)
 {
 	// Create a temporary file next to the target file
 	char name[filename.size() + 8];
 	memcpy(name, filename.data(), filename.size());
 	memcpy(name + filename.size(), ".XXXXXX", 8);
-	int fd = mkstemp(name);
+
+	fd = mkstemp(name);
+	tmpname = name;
 	if (fd == -1)
-		throw SystemException(errno, "creating temporary file " + filename + ".XXXXXX");
+		throw SystemException(errno, "creating temporary file " + tmpname);
 
-#if 0
-	int size = encodedSize();
-	int buf[size];
-	encode(buf);
-#else
-	// Enlarge the temporary file to fit the data
-	int size = encodedSize();
-	if (lseek(fd, size - 1, SEEK_SET) == -1)
-	{
-		close(fd);
-		unlink(name);
-		throw SystemException(errno, string("enlarging file (seek) ") + name);
-	}
+}
 
-	// Write one byte at the end to actually resize the file
-	if (::write(fd, &fd, 1) == -1)
-	{
+MasterMMapIndexer::~MasterMMapIndexer()
+{
+	if (fd != -1)
 		close(fd);
-		unlink(name);
-		throw SystemException(errno, string("enlarging file (write) ") + name);
-	}
 
-	// Map the file into memory
-	void* buf = mmap(0, size, PROT_WRITE, MAP_SHARED, fd, 0);
-	if (buf == MAP_FAILED)
-	{
-		close(fd);
-		unlink(name);
-		throw SystemException(errno, string("mmapping file ") + name);
-	}
-
-	// Write the index data to the file
-	encode((char*)buf);
-
-	// Unmap the file
-	if (munmap(buf, size) == -1)
-	{
-		close(fd);
-		unlink(name);
-		throw SystemException(errno, string("munmapping file ") + name);
-	}
-#endif
+	if (!tmpname.empty())
+		unlink(tmpname.c_str());
+}
 
+void MasterMMapIndexer::commit()
+{
 	// Give the file the right permissions according to umask
 
 	// Read the current umask
@@ -122,21 +114,32 @@
 	umask(mask);
 	// Set the file permissions
 	if (fchmod(fd, 0666 & ~mask) == -1)
-	{
-		close(fd);
-		unlink(name);
-		throw SystemException(errno, string("setting permissions on file ") + name);
-	}
+		throw SystemException(errno, string("setting permissions on file ") + tmpname);
 
 	// Close the file
 	close(fd);
+	fd = -1;
 
 	// Rename to the final file name, performing the atomic update
-	if (rename(name, filename.c_str()) == -1)
-	{
-		unlink(name);
-		throw SystemException(errno, string("renaming file ") + name + " into " + filename);
-	}
+	if (rename(tmpname.c_str(), finalname.c_str()) == -1)
+		throw SystemException(errno, string("renaming file ") + tmpname + " into " + finalname);
+	tmpname.clear();
+}
+
+void MasterMMapIndexer::append(const MMapIndexer& idx)
+{
+	// Get the encoded data
+	int size = idx.encodedSize();
+	char buf[size];
+	idx.encode(buf);
+
+	// Write one byte at the end to actually resize the file
+	if (::write(fd, &size, sizeof(int)) != sizeof(int))
+		throw SystemException(errno, "writing size word to file " + tmpname);
+	
+	// Write the index data
+	if (::write(fd, buf, size) != size)
+		throw SystemException(errno, "writing subindex to file " + tmpname);
 }
 
 
@@ -152,7 +155,7 @@
 class TestIndex : public MMapIndex
 {
 public:
-	TestIndex(const std::string& filename) : MMapIndex(filename) {}
+	TestIndex(MasterMMapIndex& master, int idx) : MMapIndex(master, idx) {}
 
 	const char* get() const { return m_buf; }
 	unsigned int size() const { return m_size; }
@@ -179,8 +182,14 @@
 
 struct tagcoll_mmapindex_shar {
 	tagcoll_mmapindex_shar() {
-		TestIndexer indexer("pippo");
-		indexer.write(fname);
+		TestIndexer index1("pippo");
+		TestIndexer index2("pluto");
+		TestIndexer index3("paperino");
+		MasterMMapIndexer master(fname);
+		master.append(index1);
+		master.append(index2);
+		master.append(index3);
+		master.commit();
 	}
 	~tagcoll_mmapindex_shar() {
 		// Delete the test index
@@ -193,10 +202,19 @@
 template<> template<>
 void to::test<1>()
 {
-	TestIndex index(fname);
+	MasterMMapIndex master(fname);
 
-	ensure_equals(index.size(), 6u);
-	ensure_equals(string(index.get()), string("pippo"));
+	TestIndex index1(master, 0);
+	gen_ensure_equals(index1.size(), 6u);
+	gen_ensure_equals(string(index1.get()), string("pippo"));
+
+	TestIndex index2(master, 1);
+	gen_ensure_equals(index2.size(), 6u);
+	gen_ensure_equals(string(index2.get()), string("pluto"));
+
+	TestIndex index3(master, 2);
+	gen_ensure_equals(index3.size(), 9u);
+	gen_ensure_equals(string(index3.get()), string("paperino"));
 }
 
 }

Modified: tagcoll/trunk/tagcoll/MMapIndex.h
==============================================================================
--- tagcoll/trunk/tagcoll/MMapIndex.h	(original)
+++ tagcoll/trunk/tagcoll/MMapIndex.h	Tue Feb 14 10:13:19 2006
@@ -30,10 +30,17 @@
 namespace Tagcoll
 {
 
+class MMapIndex;
+
 /**
  * Performs the memory management and mmapping tasks for mmapped indexes.
+ *
+ * One MMap can contain many indexes.  Indexes come chained one after the
+ * other, prefixed by an int that specifies their length:
+ *
+ * [size of index 1][index1][size of index 2][index]...
  */
-class MMapIndex
+class MasterMMapIndex
 {
 protected:
 	std::string m_filename;
@@ -42,12 +49,25 @@
 	const char* m_buf;
 
 public:
-	MMapIndex(const std::string& filename);
-	~MMapIndex();
+	MasterMMapIndex(const std::string& filename);
+	~MasterMMapIndex();
+	
+	friend class MMapIndex;
+};
+
+class MMapIndex
+{
+protected:
+	const MasterMMapIndex& m_master;
+	const char* m_buf;
+	size_t m_size;
+
+public:
+	MMapIndex(const MasterMMapIndex& master, size_t idx);
 };
 
 /**
- * Simple framework for implementing indexers.
+ * Interface for indexers.
  */
 class MMapIndexer
 {
@@ -60,11 +80,32 @@
 	/// Write the index data in the given buffer, which should be at least
 	/// encodedSize bytes
 	virtual void encode(char* buf) const = 0;
+};
+
+/**
+ * Master index writer.  It allows to write many indexes in the same file,
+ * atomically: the file will be created as a tempfile and atomically renamed to
+ * the destination filename on class destruction.
+ */
+class MasterMMapIndexer
+{
+protected:
+	std::string finalname;
+	std::string tmpname;
+	int fd;
+
+public:
+	MasterMMapIndexer(const std::string& filename);
+	~MasterMMapIndexer();
 
-	/// Atomically write the index to the given file
-	void write(const std::string& filename);
+	/// Close the file and perform the final rename
+	void commit();
+
+	/// Append one subindex
+	void append(const MMapIndexer& idx);
 };
 
+
 };
 
 // vim:set ts=4 sw=4:

Modified: tagcoll/trunk/tagcoll/StringIndex.cc
==============================================================================
--- tagcoll/trunk/tagcoll/StringIndex.cc	(original)
+++ tagcoll/trunk/tagcoll/StringIndex.cc	Tue Feb 14 10:13:19 2006
@@ -45,13 +45,52 @@
 		return begin;
 }
 
+
+int StringIndexer::operator()(const std::string& item) const
+{
+	int begin, end;
+
+	/* Binary search */
+	begin = -1, end = data.size();
+	while (end - begin > 1)
+	{
+		int cur = (end + begin) / 2;
+		if (data[cur] > item)
+			end = cur;
+		else
+			begin = cur;
+	}
+
+	if (begin == -1 || data[begin] != item)
+		//throw NotFoundException(string("looking for the ID of string ") + str);
+		return -1;
+	else
+		return begin;
+}
+
+void StringIndexer::map(const std::string& str)
+{
+	if ((*this)(str) != -1)
+		return;
+
+	// Insertion sort
+	int pos = data.size();
+	data.push_back(string());
+	for (; pos > 0; pos--)
+		if (data[pos - 1] > str)
+			data[pos] = data[pos - 1];
+		else
+			break;
+	data[pos] = str;
+}
+
 int StringIndexer::encodedSize() const
 {
 	// First the size of the offset array
-	int bufsize = size() * sizeof(int);
+	int bufsize = data.size() * sizeof(int);
 	
 	// Then the size of all the 0-terminated strings
-	for (const_iterator i = begin(); i != end(); i++)
+	for (vector<string>::const_iterator i = data.begin(); i != data.end(); i++)
 		bufsize += i->size() + 1;
 
 	// Then the int with the number of items
@@ -60,9 +99,9 @@
 
 void StringIndexer::encode(char* buf) const
 {
-	int pos = size() * sizeof(int);
+	int pos = data.size() * sizeof(int);
 	int idx = 0;
-	for (const_iterator i = begin(); i != end(); i++)
+	for (vector<string>::const_iterator i = data.begin(); i != data.end(); i++)
 	{
 		((int*)buf)[idx++] = pos;
 		memcpy(buf + pos, i->c_str(), i->size() + 1);
@@ -83,6 +122,8 @@
 struct tagcoll_stringindex_shar {
 	tagcoll_stringindex_shar() {
 		// Create the index
+		MasterMMapIndexer master(fname);
+
 		StringIndexer indexer;
 		indexer.map("pizza");
 		indexer.map("spaghetti");
@@ -91,7 +132,10 @@
 		indexer.map("polpettone");
 		indexer.map("friggione");
 		indexer.map("arrosto");
-		indexer.write(fname);
+		indexer.map("pizza");
+		indexer.map("lasagne");
+		master.append(indexer);
+		master.commit();
 	}
 	~tagcoll_stringindex_shar() {
 		// Delete the test index
@@ -103,8 +147,10 @@
 template<> template<>
 void to::test<1>()
 {
+	MasterMMapIndex master(fname);
+
 	// Read the index
-	StringIndex index(fname);
+	StringIndex index(master, 0);
 
 	// Check the number of mapped items
 	ensure_equals(index.size(), 7u);

Modified: tagcoll/trunk/tagcoll/StringIndex.h
==============================================================================
--- tagcoll/trunk/tagcoll/StringIndex.h	(original)
+++ tagcoll/trunk/tagcoll/StringIndex.h	Tue Feb 14 10:13:19 2006
@@ -49,7 +49,7 @@
 	int offset(int val) const { return ((const int*)m_buf)[val]; }
 	
 public:
-	StringIndex(const std::string& filename) : MMapIndex(filename) {}
+	StringIndex(const MasterMMapIndex& master, int idx) : MMapIndex(master, idx) {}
 	virtual ~StringIndex() {}
 
 	virtual std::string operator()(const int& item) const { return data(item); }
@@ -63,14 +63,17 @@
 /**
  * Creates an on-disk index to use for IntIndex
  */
-class StringIndexer : public std::set<std::string>, public MMapIndexer
+class StringIndexer : public MMapIndexer, public Converter<int, std::string>, public Converter<std::string, int>
 {
+protected:
+	std::vector<std::string> data;
+
 public:
+	virtual std::string operator()(const int& item) const { return data[item]; }
+	virtual int operator()(const std::string& item) const;
+
 	/// Store the key->val mapping into the indexer
-	void map(const std::string& str)
-	{
-		insert(str);
-	}
+	void map(const std::string& str);
 	
 	/// Return the size of the encoded index data
 	int encodedSize() const;

Modified: tagcoll/trunk/tools/tagidx.cc
==============================================================================
--- tagcoll/trunk/tools/tagidx.cc	(original)
+++ tagcoll/trunk/tools/tagidx.cc	Tue Feb 14 10:13:19 2006
@@ -27,13 +27,13 @@
 
 #ifdef HAVE_CONFIG_H
 #include <config.h>
-#define APPNAME PACKAGE
 #else
 #warning No config.h found: using fallback values
-#define APPNAME __FILE__
 #define VERSION "unknown"
 #endif
 
+#define APPNAME "tagidx"
+
 #include "CommandlineParser.h"
 #include <tagcoll/BasicStringDiskIndex.h>
 #include <tagcoll/ItemGrouper.h>
@@ -46,25 +46,16 @@
 #include <pwd.h>
 #include <dirent.h>
 #include <time.h>
+#include <stdlib.h>	// getenv
 #include <errno.h>
 
 #include <tagcoll/StdioParserInput.h>
 #include <tagcoll/TextFormat.h>
 
 #include <iostream>
+#include <sstream>
 
 #if 0
-#include <stdio.h>
-
-#include <stdlib.h>	// getenv
-
-
-#include <tagcoll/stringf.h>
-#include <tagcoll/Exception.h>
-
-#include <tagcoll/CardinalityStore.h>
-#include <tagcoll/SmartHierarchy.h>
-
 #include <tagcoll/Consumer.h>
 #include <tagcoll/Filter.h>
 #include <tagcoll/InputMerger.h>
@@ -72,16 +63,22 @@
 #include <tagcoll/Filters.h>
 #include <tagcoll/Patches.h>
 #include <tagcoll/DerivedTags.h>
-
-#include <tagcoll/Serializer.h>
 #include <tagcoll/Expression.h>
-
 #include <algorithm>
 #endif
 
 using namespace std;
 using namespace Tagcoll;
 
+string get_username()
+{
+	struct passwd* pw = getpwuid(getuid());
+	if (pw != NULL)
+		return pw->pw_name;
+	else
+		return string();
+}
+
 void mkpath(const std::string& dir)
 {
 	size_t sep = dir.rfind('/');
@@ -243,12 +240,12 @@
 	BasicStringDiskIndex ro_idx;
 
 public:
-	Index(const std::string& dir)
+	Index(const std::string& file)
 		: PatchCollection<std::string, std::string>(ro_idx),
-			ro_idx(dir)
+			ro_idx(file)
 	{
 		// Read the patches
-		setChanges(readPatchesFromDir(dir + "/patches"));
+		setChanges(readPatchesFromDir(file + ".patches"));
 	}
 
 	virtual ~Index() {}
@@ -256,7 +253,7 @@
 	void addPatch(const PatchList<std::string, std::string>& patch, const std::string& name, bool overwrite = false)
 	{
 		TrivialConverter<string, string> conv;
-		string patchfile = ro_idx.directory() + "/patches/" + name;
+		string patchfile = ro_idx.filename() + ".patches/" + name;
 		
 		int fd = open(patchfile.c_str(), O_CREAT | O_WRONLY | (overwrite ? O_TRUNC : O_EXCL), 0666);
 		if (fd == -1)
@@ -272,9 +269,9 @@
 		addChanges(patch);
 	}
 
-	static void remove(const std::string& dir)
+	static void remove(const std::string& file)
 	{
-		string patchdir = dir + "/patches";
+		string patchdir = file + ".patches";
 		removePatchesFromDir(patchdir);
 
 		if (rmdir(patchdir.c_str()) == -1)
@@ -283,7 +280,7 @@
 			else
 				throw SystemException(errno, "Removing directory " + patchdir);
 
-		BasicStringDiskIndex::remove(dir);
+		unlink(file.c_str());
 	}
 };
 
@@ -292,15 +289,34 @@
 public:
 	virtual ~Indexer() {}
 
-	void write(const std::string& dir)
+	void write(const std::string& file)
 	{
-		BasicStringDiskIndexer::write(dir);
+		BasicStringDiskIndexer::write(file);
 
 		// Create the patches directory if it does not exist yet
-		mkpath(dir + "/patches");
+		mkpath(file + ".patches");
 	}
 };
 
+void addPatch(Index& idx, PatchList<string, string>& patch)
+{
+	static int seq = 1;
+
+	if (patch.size() > 0)
+	{
+		// Compute a default name from username, current time and pid
+		char timestr[50];
+		time_t t = time(NULL);
+		strftime(timestr, 50, "%Y%m%d-%H%M%S", localtime(&t));
+
+		std::stringstream ss;
+		ss << get_username() << "-" << timestr << "-" << getpid() << "-" << seq++;
+	
+		idx.addPatch(patch, ss.str(), false);
+	} else
+		cerr << "ignoring request to add an empty patch" << endl;
+}
+
 #if 0
 PatchList<string, string> readPatches(const string& file)
 	throw (FileException, ParserException)
@@ -641,16 +657,7 @@
 	}
 }
 
-string get_username()
-{
-	struct passwd* pw = getpwuid(getuid());
-	if (pw != NULL)
-		return pw->pw_name;
-	else
-		return string();
-}
-
-string indexdir(CommandlineParserWithCommand& opts)
+string indexname(CommandlineParserWithCommand& opts)
 {
 	if (opts.get("index").defined())
 		return opts.get("index").stringVal();
@@ -752,8 +759,7 @@
 		{
 			case CREATE:
 			{
-				string dir = indexdir(opts);
-				mkpath(dir);
+				string fname = indexname(opts);
 
 				Indexer indexer;
 				
@@ -763,14 +769,14 @@
 				else
 					readCollection("-",  indexer);
 
-				indexer.write(dir);
+				indexer.write(fname);
 
 				break;
 			}
 			case ADDPATCH:
 			{
-				string dir = indexdir(opts);
-				Index idx(dir);
+				string fname = indexname(opts);
+				Index idx(fname);
 
 				if (args.hasNext())
 				{
@@ -792,50 +798,42 @@
 					TrivialConverter<string, string> conv;
 					StdioParserInput input(stdin, "(stdin)");
 					PatchList<string, string> patch = TextFormat<string, string>::parsePatch(conv, conv, input);
-					// Compute a default name from username and current time
-					char name[50];
-					time_t t = time(NULL);
-					strftime(name, 50, "%Y%m%d-%H%M%S", localtime(&t));
-					if (patch.size() > 0)
-					{
-						idx.addPatch(patch, get_username() + "-" + name, opts.get("force").defined());
-					} else
-						cerr << "not adding empty patch" << endl;
+
+					addPatch(idx, patch);
 				}
 
 				break;
 			}
 			case COMPACT:
 			{
-				string dir = indexdir(opts);
+				string fname = indexname(opts);
 
 				Indexer indexer;
 
 				// Output the patched collection to the indexer
 				{
-					Index idx(dir);
+					Index idx(fname);
 					idx.output(indexer);
 				}
 
 				// Delete the old index and patches
-				Index::remove(dir);
+				Index::remove(fname);
 				
 				// Write the new index
-				mkpath(dir);
-				indexer.write(dir);
+				indexer.write(fname);
 
 				break;
 			}
 			case REMOVE:
 			{
-				string dir = indexdir(opts);
-				Index::remove(dir);
+				string fname = indexname(opts);
+				Index::remove(fname);
 				break;
 			}
 			case CAT:
 			{
-				string dir = indexdir(opts);
-				Index idx(dir);
+				string fname = indexname(opts);
+				Index idx(fname);
 
 				output(opts, idx);
 				break;



More information about the Debtags-commits mailing list