[med-svn] [hat-trie] 01/07: Imported Upstream version 0.0~git25f9e946

Sascha Steinbiss satta at debian.org
Wed Jun 22 17:12:31 UTC 2016


This is an automated email from the git hooks/post-receive script.

satta pushed a commit to branch master
in repository hat-trie.

commit 81e9ce8ed8a695d64de04d5c9a20f1971f0d3c43
Author: Sascha Steinbiss <satta at debian.org>
Date:   Wed Jun 22 16:45:55 2016 +0000

    Imported Upstream version 0.0~git25f9e946
---
 .gitignore               |  21 ++
 .travis.yml              |   6 +
 COPYING                  |  19 ++
 Makefile.am              |  10 +
 README.md                |  47 +++
 TODO                     |   6 +
 configure.ac             |  34 ++
 hat-trie-0.1.pc.in       |  12 +
 m4/.gitignore            |   0
 src/Makefile.am          |  11 +
 src/ahtable.c            | 564 ++++++++++++++++++++++++++++++++
 src/ahtable.h            | 115 +++++++
 src/common.h             |  22 ++
 src/hat-trie.c           | 711 +++++++++++++++++++++++++++++++++++++++++
 src/hat-trie.h           |  74 +++++
 src/misc.c               |  46 +++
 src/misc.h               |  22 ++
 src/murmurhash3.c        |  77 +++++
 src/murmurhash3.h        |  12 +
 src/pstdint.h            | 813 +++++++++++++++++++++++++++++++++++++++++++++++
 test/Makefile.am         |  15 +
 test/bench_sorted_iter.c |  69 ++++
 test/check_ahtable.c     | 222 +++++++++++++
 test/check_hattrie.c     | 270 ++++++++++++++++
 test/str_map.c           | 241 ++++++++++++++
 test/str_map.h           |  54 ++++
 26 files changed, 3493 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6a9dcba
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,21 @@
+*.la
+*.lo
+*.o
+*~
+.DS_Store
+.deps
+.libs
+Makefile
+Makefile.in
+aclocal.m4
+autom4te.cache
+config.*
+configure
+depcomp
+hat-trie-*.pc
+hat-trie-*.tar.gz
+install-sh
+libtool
+ltmain.sh
+m4
+missing
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..4c10ab2
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,6 @@
+language: c
+compiler:
+    - clang
+    - gcc
+before_script: autoreconf -i
+script: ./configure && make && make check
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..bbc6dc3
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,19 @@
+Copyright (C) 2011 by Daniel C. Jones <dcjones at cs.washington.edu>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000..9df925f
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,10 @@
+
+SUBDIRS = src test
+
+EXTRA_DIST = README.md COPYING
+
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = hat-trie-0.1.pc
+
+ACLOCAL_AMFLAGS=-I m4
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f0bee4f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,47 @@
+
+Hat-Trie
+========
+
+[![Build Status](https://travis-ci.org/dcjones/hat-trie.svg)](https://travis-ci.org/dcjones/hat-trie)
+
+This a ANSI C99 implementation of the HAT-trie data structure of Askitis and
+Sinha, an extremely efficient (space and time) modern variant of tries.
+
+The version implemented here maps arrays of bytes to words (i.e., unsigned
+longs), which can be used to store counts, pointers, etc, or not used at all if
+you simply want to maintain a set of unique strings.
+
+For details see,
+
+  1. Askitis, N., & Sinha, R. (2007). HAT-trie: a cache-conscious trie-based data
+     structure for strings. Proceedings of the thirtieth Australasian conference on
+     Computer science-Volume 62 (pp. 97–105). Australian Computer Society, Inc.
+
+  2. Askitis, N., & Zobel, J. (2005). Cache-conscious collision resolution in
+     string hash tables. String Processing and Information Retrieval (pp.
+     91–102). Springer.
+
+
+Installation
+------------
+
+    git clone git at github.com:dcjones/hat-trie.git
+    cd hat-trie
+    autoreconf -i
+    ./configure
+    make install
+
+To use the library, include `hat-trie.h` and link using `-lhat-trie`.
+
+
+Tests
+-----
+
+Build and run the tests:
+
+    make check
+
+Other Language Bindings
+-----------------------
+ * Ruby - https://github.com/luikore/triez
+ * Python - https://github.com/kmike/hat-trie
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..be8bd3a
--- /dev/null
+++ b/TODO
@@ -0,0 +1,6 @@
+
+todo:
+  * Deletion in ahtable.
+  * Deletion in hattrie.
+
+
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 0000000..870b786
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,34 @@
+
+AC_INIT([hat-trie], [0.1.0], [dcjones at cs.washington.edu])
+AM_INIT_AUTOMAKE([foreign])
+m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])])
+AC_CONFIG_MACRO_DIR([m4])
+
+base_CFLAGS="-std=c99 -Wall -Wextra -pedantic"
+opt_CFLAGS="${base_CFLAGS} -O3"
+dbg_CFLAGS="${base_CFLAGS} -g -O0"
+
+AC_ARG_ENABLE([debugging],
+              [AS_HELP_STRING([--enable-debugging],
+	                      [enable debugging info (default is no)])],
+              [], [enable_debugging=no])
+
+AS_IF([test "x$enable_debugging" = xyes],
+      [CFLAGS="$dbg_CFLAGS"],
+      [CFLAGS="$opt_CFLAGS"])
+
+
+AC_PROG_CC
+AC_PROG_CPP
+AC_PROG_INSTALL
+AC_PROG_LN_S
+AC_PROG_MAKE_SET
+AC_DISABLE_SHARED
+AC_PROG_LIBTOOL
+
+AC_C_BIGENDIAN([AC_MSG_ERROR([Big-endian systems are not currently supported.])])
+AC_HEADER_STDBOOL
+
+AC_CONFIG_FILES([hat-trie-0.1.pc Makefile src/Makefile test/Makefile])
+AC_OUTPUT
+
diff --git a/hat-trie-0.1.pc.in b/hat-trie-0.1.pc.in
new file mode 100644
index 0000000..b694008
--- /dev/null
+++ b/hat-trie-0.1.pc.in
@@ -0,0 +1,12 @@
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: @PACKAGE_NAME@
+Description: An efficient trie implementation.
+Version: @PACKAGE_VERSION@
+Cflags: -I{includedir}
+Libs: -L${libdir}
+
diff --git a/m4/.gitignore b/m4/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 0000000..942bc65
--- /dev/null
+++ b/src/Makefile.am
@@ -0,0 +1,11 @@
+
+lib_LTLIBRARIES = libhat-trie.la
+
+libhat_trie_la_SOURCES = common.h \
+                         ahtable.h        ahtable.c \
+                         hat-trie.h       hat-trie.c \
+                         misc.h           misc.c \
+                         murmurhash3.h    murmurhash3.c
+
+pkginclude_HEADERS = hat-trie.h ahtable.h common.h pstdint.h
+
diff --git a/src/ahtable.c b/src/ahtable.c
new file mode 100644
index 0000000..c0f6fb3
--- /dev/null
+++ b/src/ahtable.c
@@ -0,0 +1,564 @@
+/*
+ * This file is part of hat-trie.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones at cs.washington.edu>
+ *
+ * See ahtable.h for description of the Array Hash Table.
+ *
+ */
+
+#include "ahtable.h"
+#include "misc.h"
+#include "murmurhash3.h"
+#include <assert.h>
+#include <string.h>
+
+const double ahtable_max_load_factor = 100000.0; /* arbitrary large number => don't resize */
+const size_t ahtable_initial_size = 4096;
+
+static size_t keylen(slot_t s) {
+    if (0x1 & *s) {
+        return (size_t) (*((uint16_t*) s) >> 1);
+    }
+    else {
+        return (size_t) (*s >> 1);
+    }
+}
+
+
+ahtable_t* ahtable_create()
+{
+    return ahtable_create_n(ahtable_initial_size);
+}
+
+
+ahtable_t* ahtable_create_n(size_t n)
+{
+    ahtable_t* table = malloc_or_die(sizeof(ahtable_t));
+    table->flag = 0;
+    table->c0 = table->c1 = '\0';
+
+    table->n = n;
+    table->m = 0;
+    table->max_m = (size_t) (ahtable_max_load_factor * (double) table->n);
+    table->slots = malloc_or_die(n * sizeof(slot_t));
+    memset(table->slots, 0, n * sizeof(slot_t));
+
+    table->slot_sizes = malloc_or_die(n * sizeof(size_t));
+    memset(table->slot_sizes, 0, n * sizeof(size_t));
+
+    return table;
+}
+
+
+void ahtable_free(ahtable_t* table)
+{
+    if (table == NULL) return;
+    size_t i;
+    for (i = 0; i < table->n; ++i) free(table->slots[i]);
+    free(table->slots);
+    free(table->slot_sizes);
+    free(table);
+}
+
+
+size_t ahtable_size(const ahtable_t* table)
+{
+    return table->m;
+}
+
+
+size_t ahtable_sizeof(const ahtable_t* table)
+{
+    size_t nbytes = sizeof(ahtable_t) +
+                    table->n * (sizeof(size_t) + sizeof(slot_t));
+    size_t i;
+    for (i = 0; i < table->n; ++i) {
+        nbytes += table->slot_sizes[i];
+    }
+    return nbytes;
+}
+
+
+void ahtable_clear(ahtable_t* table)
+{
+    size_t i;
+    for (i = 0; i < table->n; ++i) free(table->slots[i]);
+    table->n = ahtable_initial_size;
+    table->slots = realloc_or_die(table->slots, table->n * sizeof(slot_t));
+    memset(table->slots, 0, table->n * sizeof(slot_t));
+
+    table->slot_sizes = realloc_or_die(table->slot_sizes, table->n * sizeof(size_t));
+    memset(table->slot_sizes, 0, table->n * sizeof(size_t));
+}
+
+/** Inserts a key with value into slot s, and returns a pointer to the
+  * space immediately after.
+  */
+static slot_t ins_key(slot_t s, const char* key, size_t len, value_t** val)
+{
+    // key length
+    if (len < 128) {
+        s[0] = (unsigned char) (len << 1);
+        s += 1;
+    }
+    else {
+        /* The least significant bit is set to indicate that two bytes are
+         * being used to store the key length. */
+        *((uint16_t*) s) = ((uint16_t) len << 1) | 0x1;
+        s += 2;
+    }
+
+    // key
+    memcpy(s, key, len * sizeof(unsigned char));
+    s += len;
+
+    // value
+    *val = (value_t*) s;
+    **val = 0;
+    s += sizeof(value_t);
+
+    return s;
+}
+
+
+static void ahtable_expand(ahtable_t* table)
+{
+    /* Resizing a table is essentially building a brand new one.
+     * One little shortcut we can take on the memory allocation front is to
+     * figure out how much memory each slot needs in advance.
+     */
+    assert(table->n > 0);
+    size_t new_n = 2 * table->n;
+    size_t* slot_sizes = malloc_or_die(new_n * sizeof(size_t));
+    memset(slot_sizes, 0, new_n * sizeof(size_t));
+
+    const char* key;
+    size_t len = 0;
+    size_t m = 0;
+    ahtable_iter_t* i = ahtable_iter_begin(table, false);
+    while (!ahtable_iter_finished(i)) {
+        key = ahtable_iter_key(i, &len);
+        slot_sizes[hash(key, len) % new_n] +=
+            len + sizeof(value_t) + (len >= 128 ? 2 : 1);
+
+        ++m;
+        ahtable_iter_next(i);
+    }
+    assert(m == table->m);
+    ahtable_iter_free(i);
+
+
+    /* allocate slots */
+    slot_t* slots = malloc_or_die(new_n * sizeof(slot_t));
+    size_t j;
+    for (j = 0; j < new_n; ++j) {
+        if (slot_sizes[j] > 0) {
+            slots[j] = malloc_or_die(slot_sizes[j]);
+        }
+        else slots[j] = NULL;
+    }
+
+    /* rehash values. A few shortcuts can be taken here as well, as we know
+     * there will be no collisions. Instead of the regular insertion routine,
+     * we keep track of the ends of every slot and simply insert keys.
+     * */
+    slot_t* slots_next = malloc_or_die(new_n * sizeof(slot_t));
+    memcpy(slots_next, slots, new_n * sizeof(slot_t));
+    size_t h;
+    m = 0;
+    value_t* u;
+    value_t* v;
+    i = ahtable_iter_begin(table, false);
+    while (!ahtable_iter_finished(i)) {
+
+        key = ahtable_iter_key(i, &len);
+        h = hash(key, len) % new_n;
+
+        slots_next[h] = ins_key(slots_next[h], key, len, &u);
+        v = ahtable_iter_val(i);
+        *u = *v;
+
+        ++m;
+        ahtable_iter_next(i);
+    }
+    assert(m == table->m);
+    ahtable_iter_free(i);
+
+
+    free(slots_next);
+    for (j = 0; j < table->n; ++j) free(table->slots[j]);
+
+    free(table->slots);
+    table->slots = slots;
+
+    free(table->slot_sizes);
+    table->slot_sizes = slot_sizes;
+
+    table->n = new_n;
+    table->max_m = (size_t) (ahtable_max_load_factor * (double) table->n);
+}
+
+
+static value_t* get_key(ahtable_t* table, const char* key, size_t len, bool insert_missing)
+{
+    /* if we are at capacity, preemptively resize */
+    if (insert_missing && table->m >= table->max_m) {
+        ahtable_expand(table);
+    }
+
+
+    uint32_t i = hash(key, len) % table->n;
+    size_t k;
+    slot_t s;
+    value_t* val;
+
+    /* search the array for our key */
+    s = table->slots[i];
+    while ((size_t) (s - table->slots[i]) < table->slot_sizes[i]) {
+        /* get the key length */
+        k = keylen(s);
+        s += k < 128 ? 1 : 2;
+
+        /* skip keys that are longer than ours */
+        if (k != len) {
+            s += k + sizeof(value_t);
+            continue;
+        }
+
+        /* key found. */
+        if (memcmp(s, key, len) == 0) {
+            return (value_t*) (s + len);
+        }
+        /* key not found. */
+        else {
+            s += k + sizeof(value_t);
+            continue;
+        }
+    }
+
+
+    if (insert_missing) {
+        /* the key was not found, so we must insert it. */
+        size_t new_size = table->slot_sizes[i];
+        new_size += 1 + (len >= 128 ? 1 : 0);    // key length
+        new_size += len * sizeof(unsigned char); // key
+        new_size += sizeof(value_t);             // value
+
+        table->slots[i] = realloc_or_die(table->slots[i], new_size);
+
+        ++table->m;
+        ins_key(table->slots[i] + table->slot_sizes[i], key, len, &val);
+        table->slot_sizes[i] = new_size;
+
+        return val;
+    }
+    else return NULL;
+}
+
+
+value_t* ahtable_get(ahtable_t* table, const char* key, size_t len)
+{
+    return get_key(table, key, len, true);
+}
+
+
+value_t* ahtable_tryget(ahtable_t* table, const char* key, size_t len )
+{
+    return get_key(table, key, len, false);
+}
+
+
+int ahtable_del(ahtable_t* table, const char* key, size_t len)
+{
+    uint32_t i = hash(key, len) % table->n;
+    size_t k;
+    slot_t s;
+
+    /* search the array for our key */
+    s = table->slots[i];
+    while ((size_t) (s - table->slots[i]) < table->slot_sizes[i]) {
+        /* get the key length */
+        k = keylen(s);
+        s += k < 128 ? 1 : 2;
+
+        /* skip keys that are longer than ours */
+        if (k != len) {
+            s += k + sizeof(value_t);
+            continue;
+        }
+
+        /* key found. */
+        if (memcmp(s, key, len) == 0) {
+            /* move everything over, resize the array */
+            unsigned char* t = s + len + sizeof(value_t);
+            s -= k < 128 ? 1 : 2;
+            memmove(s, t, table->slot_sizes[i] - (size_t) (t - table->slots[i]));
+            table->slot_sizes[i] -= (size_t) (t - s);
+            --table->m;
+            return 0;
+        }
+        /* key not found. */
+        else {
+            s += k + sizeof(value_t);
+            continue;
+        }
+    }
+
+    // Key was not found. Do nothing.
+    return -1;
+}
+
+
+
+static int cmpkey(const void* a_, const void* b_)
+{
+    slot_t a = *(slot_t*) a_;
+    slot_t b = *(slot_t*) b_;
+
+    size_t ka = keylen(a), kb = keylen(b);
+
+    a += ka < 128 ? 1 : 2;
+    b += kb < 128 ? 1 : 2;
+
+    int c = memcmp(a, b, ka < kb ? ka : kb);
+    return c == 0 ? (int) ka - (int) kb : c;
+}
+
+
+/* Sorted/unsorted iterators are kept private and exposed by passing the
+sorted flag to ahtable_iter_begin. */
+
+typedef struct ahtable_sorted_iter_t_
+{
+    const ahtable_t* table; // parent
+    slot_t* xs; // pointers to keys
+    size_t i; // current key
+} ahtable_sorted_iter_t;
+
+
+static ahtable_sorted_iter_t* ahtable_sorted_iter_begin(const ahtable_t* table)
+{
+    ahtable_sorted_iter_t* i = malloc_or_die(sizeof(ahtable_sorted_iter_t));
+    i->table = table;
+    i->xs = malloc_or_die(table->m * sizeof(slot_t));
+    i->i = 0;
+
+    slot_t s;
+    size_t j, k, u;
+    for (j = 0, u = 0; j < table->n; ++j) {
+        s = table->slots[j];
+        while (s < table->slots[j] + table->slot_sizes[j]) {
+            i->xs[u++] = s;
+            k = keylen(s);
+            s += k < 128 ? 1 : 2;
+            s += k + sizeof(value_t);
+        }
+    }
+
+    qsort(i->xs, table->m, sizeof(slot_t), cmpkey);
+
+    return i;
+}
+
+
+static bool ahtable_sorted_iter_finished(ahtable_sorted_iter_t* i)
+{
+    return i->i >= i->table->m;
+}
+
+
+static void ahtable_sorted_iter_next(ahtable_sorted_iter_t* i)
+{
+    if (ahtable_sorted_iter_finished(i)) return;
+    ++i->i;
+}
+
+
+static void ahtable_sorted_iter_free(ahtable_sorted_iter_t* i)
+{
+    if (i == NULL) return;
+    free(i->xs);
+    free(i);
+}
+
+
+static const char* ahtable_sorted_iter_key(ahtable_sorted_iter_t* i, size_t* len)
+{
+    if (ahtable_sorted_iter_finished(i)) return NULL;
+
+    slot_t s = i->xs[i->i];
+    if (len) *len = keylen(s);
+
+    return (const char*) (s + (*len < 128 ? 1 : 2));
+}
+
+
+static value_t*  ahtable_sorted_iter_val(ahtable_sorted_iter_t* i)
+{
+    if (ahtable_sorted_iter_finished(i)) return NULL;
+
+    slot_t s = i->xs[i->i];
+    size_t k = keylen(s);
+
+    s += k < 128 ? 1 : 2;
+    s += k;
+
+    return (value_t*) s;
+}
+
+
+typedef struct ahtable_unsorted_iter_t_
+{
+    const ahtable_t* table; // parent
+    size_t i;           // slot index
+    slot_t s;           // slot position
+} ahtable_unsorted_iter_t;
+
+
+static ahtable_unsorted_iter_t* ahtable_unsorted_iter_begin(const ahtable_t* table)
+{
+    ahtable_unsorted_iter_t* i = malloc_or_die(sizeof(ahtable_unsorted_iter_t));
+    i->table = table;
+
+    for (i->i = 0; i->i < i->table->n; ++i->i) {
+        i->s = table->slots[i->i];
+        if ((size_t) (i->s - table->slots[i->i]) >= table->slot_sizes[i->i]) continue;
+        break;
+    }
+
+    return i;
+}
+
+
+static bool ahtable_unsorted_iter_finished(ahtable_unsorted_iter_t* i)
+{
+    return i->i >= i->table->n;
+}
+
+
+static void ahtable_unsorted_iter_next(ahtable_unsorted_iter_t* i)
+{
+    if (ahtable_unsorted_iter_finished(i)) return;
+
+    /* get the key length */
+    size_t k = keylen(i->s);
+    i->s += k < 128 ? 1 : 2;
+
+    /* skip to the next key */
+    i->s += k + sizeof(value_t);
+
+    if ((size_t) (i->s - i->table->slots[i->i]) >= i->table->slot_sizes[i->i]) {
+        do {
+            ++i->i;
+        } while(i->i < i->table->n &&
+                i->table->slot_sizes[i->i] == 0);
+
+        if (i->i < i->table->n) i->s = i->table->slots[i->i];
+        else i->s = NULL;
+    }
+}
+
+
+static void ahtable_unsorted_iter_free(ahtable_unsorted_iter_t* i)
+{
+    free(i);
+}
+
+
+static const char* ahtable_unsorted_iter_key(ahtable_unsorted_iter_t* i, size_t* len)
+{
+    if (ahtable_unsorted_iter_finished(i)) return NULL;
+
+    slot_t s = i->s;
+    size_t k;
+    if (0x1 & *s) {
+        k = (size_t) (*((uint16_t*) s)) >> 1;
+        s += 2;
+    }
+    else {
+        k = (size_t) (*s >> 1);
+        s += 1;
+    }
+
+    if(len) *len = k;
+    return (const char*) s;
+}
+
+
+static value_t* ahtable_unsorted_iter_val(ahtable_unsorted_iter_t* i)
+{
+    if (ahtable_unsorted_iter_finished(i)) return NULL;
+
+    slot_t s = i->s;
+
+    size_t k;
+    if (0x1 & *s) {
+        k = (size_t) (*((uint16_t*) s)) >> 1;
+        s += 2;
+    }
+    else {
+        k = (size_t) (*s >> 1);
+        s += 1;
+    }
+
+    s += k;
+    return (value_t*) s;
+}
+
+
+struct ahtable_iter_t_
+{
+    bool sorted;
+    union {
+        ahtable_unsorted_iter_t* unsorted;
+        ahtable_sorted_iter_t* sorted;
+    } i;
+};
+
+
+ahtable_iter_t* ahtable_iter_begin(const ahtable_t* table, bool sorted) {
+    ahtable_iter_t* i = malloc_or_die(sizeof(ahtable_iter_t));
+    i->sorted = sorted;
+    if (sorted) i->i.sorted   = ahtable_sorted_iter_begin(table);
+    else        i->i.unsorted = ahtable_unsorted_iter_begin(table);
+    return i;
+}
+
+
+void ahtable_iter_next(ahtable_iter_t* i)
+{
+    if (i->sorted) ahtable_sorted_iter_next(i->i.sorted);
+    else           ahtable_unsorted_iter_next(i->i.unsorted);
+}
+
+
+bool ahtable_iter_finished(ahtable_iter_t* i)
+{
+    if (i->sorted) return ahtable_sorted_iter_finished(i->i.sorted);
+    else           return ahtable_unsorted_iter_finished(i->i.unsorted);
+}
+
+
+void ahtable_iter_free(ahtable_iter_t* i)
+{
+    if (i == NULL) return;
+    if (i->sorted) ahtable_sorted_iter_free(i->i.sorted);
+    else           ahtable_unsorted_iter_free(i->i.unsorted);
+    free(i);
+}
+
+
+const char* ahtable_iter_key(ahtable_iter_t* i, size_t* len)
+{
+    if (i->sorted) return ahtable_sorted_iter_key(i->i.sorted, len);
+    else           return ahtable_unsorted_iter_key(i->i.unsorted, len);
+}
+
+
+value_t* ahtable_iter_val(ahtable_iter_t* i)
+{
+    if (i->sorted) return ahtable_sorted_iter_val(i->i.sorted);
+    else           return ahtable_unsorted_iter_val(i->i.unsorted);
+}
+
diff --git a/src/ahtable.h b/src/ahtable.h
new file mode 100644
index 0000000..15e8e21
--- /dev/null
+++ b/src/ahtable.h
@@ -0,0 +1,115 @@
+/*
+ * This file is part of hat-trie.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones at cs.washington.edu>
+ *
+ *
+ * This is an implementation of the 'cache-conscious' hash tables described in,
+ *
+ *    Askitis, N., & Zobel, J. (2005). Cache-conscious collision resolution in
+ *    string hash tables. String Processing and Information Retrieval (pp.
+ *    91–102). Springer.
+ *
+ *    http://naskitis.com/naskitis-spire05.pdf
+ *
+ * Briefly, the idea behind an Array Hash Table is, as opposed to separate
+ * chaining with linked lists, to store keys contiguously in one big array,
+ * thereby improving the caching behavior, and reducing space requirements.
+ *
+ * ahtable keeps a fixed number (array) of slots, each of which contains a
+ * variable number of key/value pairs. Each key is preceded by its length--
+ * one byte for lengths < 128 bytes, and TWO bytes for longer keys. The least
+ * significant bit of the first byte indicates, if set, that the size is two
+ * bytes. The slot number where a key/value pair goes is determined by finding
+ * the murmurhashed integer value of its key, modulus the number of slots.
+ * The number of slots expands in a stepwise fashion when the number of
+ # key/value pairs reaches an arbitrarily large number.
+ *
+ * +-------+-------+-------+-------+-------+-------+
+ * |   0   |   1   |   2   |   3   |  ...  |   N   |
+ * +-------+-------+-------+-------+-------+-------+
+ *     |       |       |       |               |
+ *     v       |       |       v               v
+ *    NULL     |       |     4html[VALUE]     etc.
+ *             |       v
+ *             |     5space[VALUE]4jury[VALUE]
+ *             v
+ *           6justice[VALUE]3car[VALUE]4star[VALUE]
+ *
+ */
+
+#ifndef HATTRIE_AHTABLE_H
+#define HATTRIE_AHTABLE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include "pstdint.h"
+#include "common.h"
+
+typedef unsigned char* slot_t;
+
+typedef struct ahtable_t_
+{
+    /* these fields are reserved for hattrie to fiddle with */
+    uint8_t flag;
+    unsigned char c0;
+    unsigned char c1;
+
+    size_t n;        // number of slots
+    size_t m;        // number of key/value pairs stored
+    size_t max_m;    // number of stored keys before we resize
+
+    size_t*  slot_sizes;
+    slot_t*  slots;
+} ahtable_t;
+
+extern const double ahtable_max_load_factor;
+extern const size_t ahtable_initial_size;
+
+ahtable_t* ahtable_create   (void);         // Create an empty hash table.
+ahtable_t* ahtable_create_n (size_t n);     // Create an empty hash table, with
+                                            //  n slots reserved.
+
+void       ahtable_free   (ahtable_t*);       // Free all memory used by a table.
+void       ahtable_clear  (ahtable_t*);       // Remove all entries.
+size_t     ahtable_size   (const ahtable_t*); // Number of stored keys.
+size_t     ahtable_sizeof (const ahtable_t*); // Memory used by the table in bytes.
+
+
+/** Find the given key in the table, inserting it if it does not exist, and
+ * returning a pointer to it's value.
+ *
+ * This pointer is not guaranteed to be valid after additional calls to
+ * ahtable_get, ahtable_del, ahtable_clear, or other functions that modify the
+ * table.
+ */
+value_t* ahtable_get (ahtable_t*, const char* key, size_t len);
+
+
+/* Find a given key in the table, return a NULL pointer if it does not exist. */
+value_t* ahtable_tryget (ahtable_t*, const char* key, size_t len);
+
+
+int ahtable_del(ahtable_t*, const char* key, size_t len);
+
+
+typedef struct ahtable_iter_t_ ahtable_iter_t;
+
+ahtable_iter_t* ahtable_iter_begin     (const ahtable_t*, bool sorted);
+void            ahtable_iter_next      (ahtable_iter_t*);
+bool            ahtable_iter_finished  (ahtable_iter_t*);
+void            ahtable_iter_free      (ahtable_iter_t*);
+const char*     ahtable_iter_key       (ahtable_iter_t*, size_t* len);
+value_t*        ahtable_iter_val       (ahtable_iter_t*);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/src/common.h b/src/common.h
new file mode 100644
index 0000000..7a3116a
--- /dev/null
+++ b/src/common.h
@@ -0,0 +1,22 @@
+/*
+ * This file is part of hat-trie.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones at cs.washington.edu>
+ *
+ *
+ * Common typedefs, etc.
+ *
+ */
+
+
+#ifndef HATTRIE_COMMON_H
+#define HATTRIE_COMMON_H
+
+#include "pstdint.h"
+
+// an unsigned int that is guaranteed to be the same size as a pointer
+typedef uintptr_t value_t;
+
+#endif
+
+
diff --git a/src/hat-trie.c b/src/hat-trie.c
new file mode 100644
index 0000000..6121bb7
--- /dev/null
+++ b/src/hat-trie.c
@@ -0,0 +1,711 @@
+/*
+ * This file is part of hat-trie.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones at cs.washington.edu>
+ *
+ */
+
+#include "hat-trie.h"
+#include "ahtable.h"
+#include "misc.h"
+#include "pstdint.h"
+#include <assert.h>
+#include <string.h>
+
+#define HT_UNUSED(x) x=x
+
+/* maximum number of keys that may be stored in a bucket before it is burst */
+static const size_t MAX_BUCKET_SIZE = 16384;
+#define NODE_MAXCHAR 0xff // 0x7f for 7-bit ASCII
+#define NODE_CHILDS (NODE_MAXCHAR+1)
+
+static const uint8_t NODE_TYPE_TRIE          = 0x1;
+static const uint8_t NODE_TYPE_PURE_BUCKET   = 0x2;
+static const uint8_t NODE_TYPE_HYBRID_BUCKET = 0x4;
+static const uint8_t NODE_HAS_VAL            = 0x8;
+
+
+struct trie_node_t_;
+
+/* Node's may be trie nodes or buckets. This union allows us to keep
+ * non-specific pointer. */
+typedef union node_ptr_
+{
+    ahtable_t*           b;
+    struct trie_node_t_* t;
+    uint8_t*             flag;
+} node_ptr;
+
+
+typedef struct trie_node_t_
+{
+    uint8_t flag;
+
+    /* the value for the key that is consumed on a trie node */
+    value_t val;
+
+    /* Map a character to either a trie_node_t or a ahtable_t. The first byte
+     * must be examined to determine which. */
+    node_ptr xs[NODE_CHILDS];
+
+} trie_node_t;
+
+struct hattrie_t_
+{
+    node_ptr root; // root node
+    size_t m;      // number of stored keys
+};
+
+
+
+size_t hattrie_size(const hattrie_t* T)
+{
+    return T->m;
+}
+
+
+static size_t node_sizeof(node_ptr node)
+{
+    if (*node.flag & NODE_TYPE_TRIE) {
+        size_t nbytes = sizeof(trie_node_t);
+        size_t i;
+        nbytes += node_sizeof(node.t->xs[0]);
+        for (i = 1; i < NODE_CHILDS; ++i) {
+            if (node.t->xs[i].t != node.t->xs[i-1].t) nbytes += node_sizeof(node.t->xs[i]);
+        }
+        return nbytes;
+    }
+    else {
+        return ahtable_sizeof(node.b);
+    }
+}
+
+
+size_t hattrie_sizeof(const hattrie_t* T)
+{
+    return sizeof(hattrie_t) + node_sizeof(T->root);
+}
+
+
+/* Create a new trie node with all pointers pointing to the given child (which
+ * can be NULL). */
+static trie_node_t* alloc_trie_node(hattrie_t* T, node_ptr child)
+{
+    trie_node_t* node = malloc_or_die(sizeof(trie_node_t));
+    node->flag = NODE_TYPE_TRIE;
+    node->val  = 0;
+
+    /* pass T to allow custom allocator for trie. */
+    HT_UNUSED(T); /* unused now */
+
+    size_t i;
+    for (i = 0; i < NODE_CHILDS; ++i) node->xs[i] = child;
+    return node;
+}
+
+/* iterate trie nodes until string is consumed or bucket is found */
+static node_ptr hattrie_consume(node_ptr *p, const char **k, size_t *l, unsigned brk)
+{
+    node_ptr node = p->t->xs[(unsigned char) **k];
+    while (*node.flag & NODE_TYPE_TRIE && *l > brk) {
+        ++*k;
+        --*l;
+        *p   = node;
+        node = node.t->xs[(unsigned char) **k];
+    }
+
+    /* copy and writeback variables if it's faster */
+
+    assert(*p->flag & NODE_TYPE_TRIE);
+    return node;
+}
+
+/* use node value and return pointer to it */
+static inline value_t* hattrie_useval(hattrie_t *T, node_ptr n)
+{
+    if (!(n.t->flag & NODE_HAS_VAL)) {
+        n.t->flag |= NODE_HAS_VAL;
+        ++T->m;
+    }
+    return &n.t->val;
+}
+
+/* clear node value if exists */
+static inline int hattrie_clrval(hattrie_t *T, node_ptr n)
+{
+    if (n.t->flag & NODE_HAS_VAL) {
+        n.t->flag &= ~NODE_HAS_VAL;
+        n.t->val = 0;
+        --T->m;
+        return 0;
+    }
+    return -1;
+}
+
+/* find node in trie */
+static node_ptr hattrie_find(hattrie_t* T, const char **key, size_t *len)
+{
+    node_ptr parent = T->root;
+    assert(*parent.flag & NODE_TYPE_TRIE);
+
+    if (*len == 0) return parent;
+
+    node_ptr node = hattrie_consume(&parent, key, len, 1);
+
+    /* if the trie node consumes value, use it */
+    if (*node.flag & NODE_TYPE_TRIE) {
+        if (!(node.t->flag & NODE_HAS_VAL)) {
+            node.flag = NULL;
+        }
+        return node;
+    }
+
+    /* pure bucket holds only key suffixes, skip current char */
+    if (*node.flag & NODE_TYPE_PURE_BUCKET) {
+        *key += 1;
+        *len -= 1;
+    }
+
+    /* do not scan bucket, it's not needed for this operation */
+    return node;
+}
+
+hattrie_t* hattrie_create()
+{
+    hattrie_t* T = malloc_or_die(sizeof(hattrie_t));
+    T->m = 0;
+
+    node_ptr node;
+    node.b = ahtable_create();
+    node.b->flag = NODE_TYPE_HYBRID_BUCKET;
+    node.b->c0 = 0x00;
+    node.b->c1 = NODE_MAXCHAR;
+    T->root.t = alloc_trie_node(T, node);
+
+    return T;
+}
+
+
+static void hattrie_free_node(node_ptr node)
+{
+    if (*node.flag & NODE_TYPE_TRIE) {
+        size_t i;
+        for (i = 0; i < NODE_CHILDS; ++i) {
+            if (i > 0 && node.t->xs[i].t == node.t->xs[i - 1].t) continue;
+
+            /* XXX: recursion might not be the best choice here. It is possible
+             * to build a very deep trie. */
+            if (node.t->xs[i].t) hattrie_free_node(node.t->xs[i]);
+        }
+        free(node.t);
+    }
+    else {
+        ahtable_free(node.b);
+    }
+}
+
+
+void hattrie_free(hattrie_t* T)
+{
+    hattrie_free_node(T->root);
+    free(T);
+}
+
+
+void hattrie_clear(hattrie_t* T)
+{
+    hattrie_free_node(T->root);
+    node_ptr node;
+    node.b = ahtable_create();
+    node.b->flag = NODE_TYPE_HYBRID_BUCKET;
+    node.b->c0 = 0x00;
+    node.b->c1 = 0xff;
+    T->root.t = alloc_trie_node(T, node);
+}
+
+
+/* Perform one split operation on the given node with the given parent.
+ */
+static void hattrie_split(hattrie_t* T, node_ptr parent, node_ptr node)
+{
+    /* only buckets may be split */
+    assert(*node.flag & NODE_TYPE_PURE_BUCKET ||
+           *node.flag & NODE_TYPE_HYBRID_BUCKET);
+
+    assert(*parent.flag & NODE_TYPE_TRIE);
+
+    if (*node.flag & NODE_TYPE_PURE_BUCKET) {
+        /* turn the pure bucket into a hybrid bucket */
+        parent.t->xs[node.b->c0].t = alloc_trie_node(T, node);
+
+        /* if the bucket had an empty key, move it to the new trie node */
+        value_t* val = ahtable_tryget(node.b, NULL, 0);
+        if (val) {
+            parent.t->xs[node.b->c0].t->val     = *val;
+            parent.t->xs[node.b->c0].t->flag |= NODE_HAS_VAL;
+            *val = 0;
+            ahtable_del(node.b, NULL, 0);
+        }
+
+        node.b->c0   = 0x00;
+        node.b->c1   = NODE_MAXCHAR;
+        node.b->flag = NODE_TYPE_HYBRID_BUCKET;
+
+        return;
+    }
+
+    /* This is a hybrid bucket. Perform a proper split. */
+
+    /* count the number of occourances of every leading character */
+    unsigned int cs[NODE_CHILDS]; // occurance count for leading chars
+    memset(cs, 0, NODE_CHILDS * sizeof(unsigned int));
+    size_t len;
+    const char* key;
+
+    ahtable_iter_t* i = ahtable_iter_begin(node.b, false);
+    while (!ahtable_iter_finished(i)) {
+        key = ahtable_iter_key(i, &len);
+        assert(len > 0);
+        cs[(unsigned char) key[0]] += 1;
+        ahtable_iter_next(i);
+    }
+    ahtable_iter_free(i);
+
+    /* choose a split point */
+    unsigned int left_m, right_m, all_m;
+    unsigned char j = node.b->c0;
+    all_m   = ahtable_size(node.b);
+    left_m  = cs[j];
+    right_m = all_m - left_m;
+    int d;
+
+    while (j + 1 < node.b->c1) {
+        d = abs((int) (left_m + cs[j + 1]) - (int) (right_m - cs[j + 1]));
+        if (d <= abs(left_m - right_m) && left_m + cs[j + 1] < all_m) {
+            j += 1;
+            left_m  += cs[j];
+            right_m -= cs[j];
+        }
+        else break;
+    }
+
+    /* now split into two node cooresponding to ranges [0, j] and
+     * [j + 1, NODE_MAXCHAR], respectively. */
+
+
+    /* create new left and right nodes */
+
+    /* TODO: Add a special case if either node is a hybrid bucket containing all
+     * the keys. In such a case, do not build a new table, just use the old one.
+     * */
+    size_t num_slots;
+
+
+    for (num_slots = ahtable_initial_size;
+            (double) left_m > ahtable_max_load_factor * (double) num_slots;
+            num_slots *= 2);
+
+    node_ptr left, right;
+    left.b  = ahtable_create_n(num_slots);
+    left.b->c0   = node.b->c0;
+    left.b->c1   = j;
+    left.b->flag = left.b->c0 == left.b->c1 ?
+                      NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET;
+
+
+    for (num_slots = ahtable_initial_size;
+            (double) right_m > ahtable_max_load_factor * (double) num_slots;
+            num_slots *= 2);
+
+    right.b = ahtable_create_n(num_slots);
+    right.b->c0   = j + 1;
+    right.b->c1   = node.b->c1;
+    right.b->flag = right.b->c0 == right.b->c1 ?
+                      NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET;
+
+
+    /* update the parent's pointer */
+
+    unsigned int c;
+    for (c = node.b->c0; c <= j; ++c) parent.t->xs[c] = left;
+    for (; c <= node.b->c1; ++c)      parent.t->xs[c] = right;
+
+
+
+    /* distribute keys to the new left or right node */
+    value_t* u;
+    value_t* v;
+    i = ahtable_iter_begin(node.b, false);
+    while (!ahtable_iter_finished(i)) {
+        key = ahtable_iter_key(i, &len);
+        u   = ahtable_iter_val(i);
+        assert(len > 0);
+
+        /* left */
+        if ((unsigned char) key[0] <= j) {
+            if (*left.flag & NODE_TYPE_PURE_BUCKET) {
+                v = ahtable_get(left.b, key + 1, len - 1);
+            }
+            else {
+                v = ahtable_get(left.b, key, len);
+            }
+            *v = *u;
+        }
+
+        /* right */
+        else {
+            if (*right.flag & NODE_TYPE_PURE_BUCKET) {
+                v = ahtable_get(right.b, key + 1, len - 1);
+            }
+            else {
+                v = ahtable_get(right.b, key, len);
+            }
+            *v = *u;
+        }
+
+        ahtable_iter_next(i);
+    }
+
+    ahtable_iter_free(i);
+    ahtable_free(node.b);
+}
+
+value_t* hattrie_get(hattrie_t* T, const char* key, size_t len)
+{
+    node_ptr parent = T->root;
+    assert(*parent.flag & NODE_TYPE_TRIE);
+
+    if (len == 0) return &parent.t->val;
+
+    /* consume all trie nodes, now parent must be trie and child anything */
+    node_ptr node = hattrie_consume(&parent, &key, &len, 0);
+    assert(*parent.flag & NODE_TYPE_TRIE);
+
+    /* if the key has been consumed on a trie node, use its value */
+    if (len == 0) {
+        if (*node.flag & NODE_TYPE_TRIE) {
+            return hattrie_useval(T, node);
+        }
+        else if (*node.flag & NODE_TYPE_HYBRID_BUCKET) {
+            return hattrie_useval(T, parent);
+        }
+    }
+
+
+    /* preemptively split the bucket if it is full */
+    while (ahtable_size(node.b) >= MAX_BUCKET_SIZE) {
+        hattrie_split(T, parent, node);
+
+        /* after the split, the node pointer is invalidated, so we search from
+         * the parent again. */
+        node = hattrie_consume(&parent, &key, &len, 0);
+
+        /* if the key has been consumed on a trie node, use its value */
+        if (len == 0) {
+            if (*node.flag & NODE_TYPE_TRIE) {
+                return hattrie_useval(T, node);
+            }
+            else if (*node.flag & NODE_TYPE_HYBRID_BUCKET) {
+                return hattrie_useval(T, parent);
+            }
+        }
+    }
+
+    assert(*node.flag & NODE_TYPE_PURE_BUCKET || *node.flag & NODE_TYPE_HYBRID_BUCKET);
+
+    assert(len > 0);
+    size_t m_old = node.b->m;
+    value_t* val;
+    if (*node.flag & NODE_TYPE_PURE_BUCKET) {
+        val = ahtable_get(node.b, key + 1, len - 1);
+    }
+    else {
+        val = ahtable_get(node.b, key, len);
+    }
+    T->m += (node.b->m - m_old);
+
+    return val;
+}
+
+
+value_t* hattrie_tryget(hattrie_t* T, const char* key, size_t len)
+{
+    /* find node for given key */
+    node_ptr node = hattrie_find(T, &key, &len);
+    if (node.flag == NULL) {
+        return NULL;
+    }
+
+    /* if the trie node consumes value, use it */
+    if (*node.flag & NODE_TYPE_TRIE) {
+        return &node.t->val;
+    }
+
+    return ahtable_tryget(node.b, key, len);
+}
+
+
+int hattrie_del(hattrie_t* T, const char* key, size_t len)
+{
+    node_ptr parent = T->root;
+    HT_UNUSED(parent);
+    assert(*parent.flag & NODE_TYPE_TRIE);
+
+    /* find node for deletion */
+    node_ptr node = hattrie_find(T, &key, &len);
+    if (node.flag == NULL) {
+        return -1;
+    }
+
+    /* if consumed on a trie node, clear the value */
+    if (*node.flag & NODE_TYPE_TRIE) {
+        return hattrie_clrval(T, node);
+    }
+
+    /* remove from bucket */
+    size_t m_old = ahtable_size(node.b);
+    int ret =  ahtable_del(node.b, key, len);
+    T->m -= (m_old - ahtable_size(node.b));
+
+    /* merge empty buckets */
+    /*! \todo */
+
+    return ret;
+}
+
+
+/* plan for iteration:
+ * This is tricky, as we have no parent pointers currently, and I would like to
+ * avoid adding them. That means maintaining a stack
+ *
+ */
+
+typedef struct hattrie_node_stack_t_
+{
+    unsigned char   c;
+    size_t level;
+
+    node_ptr node;
+    struct hattrie_node_stack_t_* next;
+
+} hattrie_node_stack_t;
+
+
+struct hattrie_iter_t_
+{
+    char* key;
+    size_t keysize; // space reserved for the key
+    size_t level;
+
+    /* keep track of keys stored in trie nodes */
+    bool    has_nil_key;
+    value_t nil_val;
+
+    const hattrie_t* T;
+    bool sorted;
+    ahtable_iter_t* i;
+    hattrie_node_stack_t* stack;
+};
+
+
+static void hattrie_iter_pushchar(hattrie_iter_t* i, size_t level, char c)
+{
+    if (i->keysize < level) {
+        i->keysize *= 2;
+        i->key = realloc_or_die(i->key, i->keysize * sizeof(char));
+    }
+
+    if (level > 0) {
+        i->key[level - 1] = c;
+    }
+
+    i->level = level;
+}
+
+
+static void hattrie_iter_nextnode(hattrie_iter_t* i)
+{
+    if (i->stack == NULL) return;
+
+    /* pop the stack */
+    node_ptr node;
+    hattrie_node_stack_t* next;
+    unsigned char   c;
+    size_t level;
+
+    node  = i->stack->node;
+    next  = i->stack->next;
+    c     = i->stack->c;
+    level = i->stack->level;
+
+    free(i->stack);
+    i->stack = next;
+
+    if (*node.flag & NODE_TYPE_TRIE) {
+        hattrie_iter_pushchar(i, level, c);
+
+        if(node.t->flag & NODE_HAS_VAL) {
+            i->has_nil_key = true;
+            i->nil_val = node.t->val;
+        }
+
+        /* push all child nodes from right to left */
+        int j;
+        for (j = NODE_MAXCHAR; j >= 0; --j) {
+
+            /* skip repeated pointers to hybrid bucket */
+            if (j < NODE_MAXCHAR && node.t->xs[j].t == node.t->xs[j + 1].t) continue;
+
+            // push stack
+            next = i->stack;
+            i->stack = malloc_or_die(sizeof(hattrie_node_stack_t));
+            i->stack->node  = node.t->xs[j];
+            i->stack->next  = next;
+            i->stack->level = level + 1;
+            i->stack->c     = (unsigned char) j;
+        }
+    }
+    else {
+        if (*node.flag & NODE_TYPE_PURE_BUCKET) {
+            hattrie_iter_pushchar(i, level, c);
+        }
+        else {
+            i->level = level - 1;
+        }
+
+        i->i = ahtable_iter_begin(node.b, i->sorted);
+    }
+}
+
+
+hattrie_iter_t* hattrie_iter_begin(const hattrie_t* T, bool sorted)
+{
+    hattrie_iter_t* i = malloc_or_die(sizeof(hattrie_iter_t));
+    i->T = T;
+    i->sorted = sorted;
+    i->i = NULL;
+    i->keysize = 16;
+    i->key = malloc_or_die(i->keysize * sizeof(char));
+    i->level   = 0;
+    i->has_nil_key = false;
+    i->nil_val     = 0;
+
+    i->stack = malloc_or_die(sizeof(hattrie_node_stack_t));
+    i->stack->next   = NULL;
+    i->stack->node   = T->root;
+    i->stack->c      = '\0';
+    i->stack->level  = 0;
+
+
+    while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) &&
+           i->stack != NULL ) {
+
+        ahtable_iter_free(i->i);
+        i->i = NULL;
+        hattrie_iter_nextnode(i);
+    }
+
+    if (i->i != NULL && ahtable_iter_finished(i->i)) {
+        ahtable_iter_free(i->i);
+        i->i = NULL;
+    }
+
+    return i;
+}
+
+
+void hattrie_iter_next(hattrie_iter_t* i)
+{
+    if (hattrie_iter_finished(i)) return;
+
+    if (i->i != NULL && !ahtable_iter_finished(i->i)) {
+        ahtable_iter_next(i->i);
+    }
+    else if (i->has_nil_key) {
+        i->has_nil_key = false;
+        i->nil_val = 0;
+        hattrie_iter_nextnode(i);
+    }
+
+    while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) &&
+           i->stack != NULL ) {
+
+        ahtable_iter_free(i->i);
+        i->i = NULL;
+        hattrie_iter_nextnode(i);
+    }
+
+    if (i->i != NULL && ahtable_iter_finished(i->i)) {
+        ahtable_iter_free(i->i);
+        i->i = NULL;
+    }
+}
+
+
+bool hattrie_iter_finished(hattrie_iter_t* i)
+{
+    return i->stack == NULL && i->i == NULL && !i->has_nil_key;
+}
+
+
+void hattrie_iter_free(hattrie_iter_t* i)
+{
+    if (i == NULL) return;
+    if (i->i) ahtable_iter_free(i->i);
+
+    hattrie_node_stack_t* next;
+    while (i->stack) {
+        next = i->stack->next;
+        free(i->stack);
+        i->stack = next;
+    }
+
+    free(i->key);
+    free(i);
+}
+
+
+const char* hattrie_iter_key(hattrie_iter_t* i, size_t* len)
+{
+    if (hattrie_iter_finished(i)) return NULL;
+
+    size_t sublen;
+    const char* subkey;
+
+    if (i->has_nil_key) {
+        subkey = NULL;
+        sublen = 0;
+    }
+    else subkey = ahtable_iter_key(i->i, &sublen);
+
+    if (i->keysize < i->level + sublen + 1) {
+        while (i->keysize < i->level + sublen + 1) i->keysize *= 2;
+        i->key = realloc_or_die(i->key, i->keysize * sizeof(char));
+    }
+
+    memcpy(i->key + i->level, subkey, sublen);
+    i->key[i->level + sublen] = '\0';
+
+    if (len) *len = i->level + sublen;
+    return i->key;
+}
+
+
+value_t* hattrie_iter_val(hattrie_iter_t* i)
+{
+    if (i->has_nil_key) return &i->nil_val;
+
+    if (hattrie_iter_finished(i)) return NULL;
+
+    return ahtable_iter_val(i->i);
+}
+
+
+
+bool hattrie_iter_equal(const hattrie_iter_t* a,
+                        const hattrie_iter_t* b)
+{
+    return a->T      == b->T &&
+           a->sorted == b->sorted &&
+           a->i      == b->i;
+}
diff --git a/src/hat-trie.h b/src/hat-trie.h
new file mode 100644
index 0000000..b6b0653
--- /dev/null
+++ b/src/hat-trie.h
@@ -0,0 +1,74 @@
+/*
+ * This file is part of hat-trie
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones at cs.washington.edu>
+ *
+ *
+ * This is an implementation of the HAT-trie data structure described in,
+ *
+ *    Askitis, N., & Sinha, R. (2007). HAT-trie: a cache-conscious trie-based data
+ *    structure for strings. Proceedings of the thirtieth Australasian conference on
+ *    Computer science-Volume 62 (pp. 97–105). Australian Computer Society, Inc.
+ *
+ * The HAT-trie is in essence a hybrid data structure, combining tries and hash
+ * tables in a clever way to try to get the best of both worlds.
+ *
+ */
+
+#ifndef HATTRIE_HATTRIE_H
+#define HATTRIE_HATTRIE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "common.h"
+#include <stdlib.h>
+#include <stdbool.h>
+
+typedef struct hattrie_t_ hattrie_t;
+
+hattrie_t* hattrie_create (void);             // Create an empty hat-trie.
+void       hattrie_free   (hattrie_t*);       // Free all memory used by a trie.
+hattrie_t* hattrie_dup    (const hattrie_t*); // Duplicate an existing trie.
+void       hattrie_clear  (hattrie_t*);       // Remove all entries.
+size_t     hattrie_size   (const hattrie_t*); // Number of stored keys.
+size_t     hattrie_sizeof (const hattrie_t*); // Memory used in structure in bytes.
+
+
+/** Find the given key in the trie, inserting it if it does not exist, and
+ * returning a pointer to it's key.
+ *
+ * This pointer is not guaranteed to be valid after additional calls to
+ * hattrie_get, hattrie_del, hattrie_clear, or other functions that modifies the
+ * trie.
+ */
+value_t* hattrie_get (hattrie_t*, const char* key, size_t len);
+
+
+/** Find a given key in the table, returning a NULL pointer if it does not
+ * exist. */
+value_t* hattrie_tryget (hattrie_t*, const char* key, size_t len);
+
+/** Delete a given key from trie. Returns 0 if successful or -1 if not found.
+ */
+int hattrie_del(hattrie_t* T, const char* key, size_t len);
+
+typedef struct hattrie_iter_t_ hattrie_iter_t;
+
+hattrie_iter_t* hattrie_iter_begin     (const hattrie_t*, bool sorted);
+void            hattrie_iter_next      (hattrie_iter_t*);
+bool            hattrie_iter_finished  (hattrie_iter_t*);
+void            hattrie_iter_free      (hattrie_iter_t*);
+const char*     hattrie_iter_key       (hattrie_iter_t*, size_t* len);
+value_t*        hattrie_iter_val       (hattrie_iter_t*);
+
+/* Return true if two iterators are equal. */
+bool            hattrie_iter_equal     (const hattrie_iter_t* a,
+                                        const hattrie_iter_t* b);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/misc.c b/src/misc.c
new file mode 100644
index 0000000..0530c34
--- /dev/null
+++ b/src/misc.c
@@ -0,0 +1,46 @@
+/*
+ * This file is part of hat-trie.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones at cs.washington.edu>
+ *
+ */
+
+#include "misc.h"
+#include <stdlib.h>
+
+
+void* malloc_or_die(size_t n)
+{
+    void* p = malloc(n);
+    if (p == NULL && n != 0) {
+        fprintf(stderr, "Cannot allocate %zu bytes.\n", n);
+        exit(EXIT_FAILURE);
+    }
+    return p;
+}
+
+
+void* realloc_or_die(void* ptr, size_t n)
+{
+    void* p = realloc(ptr, n);
+    if (p == NULL && n != 0) {
+        fprintf(stderr, "Cannot allocate %zu bytes.\n", n);
+        exit(EXIT_FAILURE);
+    }
+    return p;
+}
+
+
+FILE* fopen_or_die(const char* path, const char* mode)
+{
+    FILE* f = fopen(path, mode);
+    if (f == NULL) {
+        fprintf(stderr, "Cannot open file %s with mode %s.\n", path, mode);
+        exit(EXIT_FAILURE);
+    }
+    return f;
+}
+
+
+
+
diff --git a/src/misc.h b/src/misc.h
new file mode 100644
index 0000000..7223b8b
--- /dev/null
+++ b/src/misc.h
@@ -0,0 +1,22 @@
+/*
+ * This file is part of hat-trie.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones at cs.washington.edu>
+ *
+ * misc :
+ * miscelaneous functions.
+ *
+ */
+
+#ifndef LINESET_MISC_H
+#define LINESET_MISC_H
+
+#include <stdio.h>
+
+void* malloc_or_die(size_t);
+void* realloc_or_die(void*, size_t);
+FILE* fopen_or_die(const char*, const char*);
+
+#endif
+
+
diff --git a/src/murmurhash3.c b/src/murmurhash3.c
new file mode 100644
index 0000000..cb24c8f
--- /dev/null
+++ b/src/murmurhash3.c
@@ -0,0 +1,77 @@
+/* This is MurmurHash3. The original C++ code was placed in the public domain
+ * by its author, Austin Appleby. */
+
+#include "murmurhash3.h"
+
+static inline uint32_t fmix(uint32_t h)
+{
+    h ^= h >> 16;
+    h *= 0x85ebca6b;
+    h ^= h >> 13;
+    h *= 0xc2b2ae35;
+    h ^= h >> 16;
+
+    return h;
+}
+
+
+static inline uint32_t rotl32(uint32_t x, int8_t r)
+{
+    return (x << r) | (x >> (32 - r));
+}
+
+
+uint32_t hash(const char* data, size_t len_)
+{
+    const int len = (int) len_;
+    const int nblocks = len / 4;
+
+    uint32_t h1 = 0xc062fb4a;
+
+    uint32_t c1 = 0xcc9e2d51;
+    uint32_t c2 = 0x1b873593;
+
+    //----------
+    // body
+
+    const uint32_t * blocks = (const uint32_t*) (data + nblocks * 4);
+
+    int i;
+    for(i = -nblocks; i; i++)
+    {
+        uint32_t k1 = blocks[i];
+
+        k1 *= c1;
+        k1 = rotl32(k1, 15);
+        k1 *= c2;
+
+        h1 ^= k1;
+        h1 = rotl32(h1, 13);
+        h1 = h1*5+0xe6546b64;
+    }
+
+    //----------
+    // tail
+
+    const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+    uint32_t k1 = 0;
+
+    switch(len & 3)
+    {
+        case 3: k1 ^= tail[2] << 16;
+        case 2: k1 ^= tail[1] << 8;
+        case 1: k1 ^= tail[0];
+              k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1;
+    }
+
+    //----------
+    // finalization
+
+    h1 ^= len;
+
+    h1 = fmix(h1);
+
+    return h1;
+}
+
diff --git a/src/murmurhash3.h b/src/murmurhash3.h
new file mode 100644
index 0000000..37fbf41
--- /dev/null
+++ b/src/murmurhash3.h
@@ -0,0 +1,12 @@
+
+#ifndef MURMURHASH3_H
+#define MURMURHASH3_H
+
+#include <stdlib.h>
+
+#include "pstdint.h"
+
+uint32_t hash(const char* data, size_t len);
+
+#endif
+
diff --git a/src/pstdint.h b/src/pstdint.h
new file mode 100644
index 0000000..18a26b5
--- /dev/null
+++ b/src/pstdint.h
@@ -0,0 +1,813 @@
+/*  A portable stdint.h
+ ****************************************************************************
+ *  BSD License:
+ ****************************************************************************
+ *
+ *  Copyright (c) 2005-2014 Paul Hsieh
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. The name of the author may not be used to endorse or promote products
+ *     derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************
+ *
+ *  Version 0.1.14
+ *
+ *  The ANSI C standard committee, for the C99 standard, specified the
+ *  inclusion of a new standard include file called stdint.h.  This is
+ *  a very useful and long desired include file which contains several
+ *  very precise definitions for integer scalar types that is
+ *  critically important for making portable several classes of
+ *  applications including cryptography, hashing, variable length
+ *  integer libraries and so on.  But for most developers its likely
+ *  useful just for programming sanity.
+ *
+ *  The problem is that most compiler vendors have decided not to
+ *  implement the C99 standard, and the next C++ language standard
+ *  (which has a lot more mindshare these days) will be a long time in
+ *  coming and its unknown whether or not it will include stdint.h or
+ *  how much adoption it will have.  Either way, it will be a long time
+ *  before all compilers come with a stdint.h and it also does nothing
+ *  for the extremely large number of compilers available today which
+ *  do not include this file, or anything comparable to it.
+ *
+ *  So that's what this file is all about.  Its an attempt to build a
+ *  single universal include file that works on as many platforms as
+ *  possible to deliver what stdint.h is supposed to.  A few things
+ *  that should be noted about this file:
+ *
+ *    1) It is not guaranteed to be portable and/or present an identical
+ *       interface on all platforms.  The extreme variability of the
+ *       ANSI C standard makes this an impossibility right from the
+ *       very get go. Its really only meant to be useful for the vast
+ *       majority of platforms that possess the capability of
+ *       implementing usefully and precisely defined, standard sized
+ *       integer scalars.  Systems which are not intrinsically 2s
+ *       complement may produce invalid constants.
+ *
+ *    2) There is an unavoidable use of non-reserved symbols.
+ *
+ *    3) Other standard include files are invoked.
+ *
+ *    4) This file may come in conflict with future platforms that do
+ *       include stdint.h.  The hope is that one or the other can be
+ *       used with no real difference.
+ *
+ *    5) In the current verison, if your platform can't represent
+ *       int32_t, int16_t and int8_t, it just dumps out with a compiler
+ *       error.
+ *
+ *    6) 64 bit integers may or may not be defined.  Test for their
+ *       presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX.
+ *       Note that this is different from the C99 specification which
+ *       requires the existence of 64 bit support in the compiler.  If
+ *       this is not defined for your platform, yet it is capable of
+ *       dealing with 64 bits then it is because this file has not yet
+ *       been extended to cover all of your system's capabilities.
+ *
+ *    7) (u)intptr_t may or may not be defined.  Test for its presence
+ *       with the test: #ifdef PTRDIFF_MAX.  If this is not defined
+ *       for your platform, then it is because this file has not yet
+ *       been extended to cover all of your system's capabilities, not
+ *       because its optional.
+ *
+ *    8) The following might not been defined even if your platform is
+ *       capable of defining it:
+ *
+ *       WCHAR_MIN
+ *       WCHAR_MAX
+ *       (u)int64_t
+ *       PTRDIFF_MIN
+ *       PTRDIFF_MAX
+ *       (u)intptr_t
+ *
+ *    9) The following have not been defined:
+ *
+ *       WINT_MIN
+ *       WINT_MAX
+ *
+ *   10) The criteria for defining (u)int_least(*)_t isn't clear,
+ *       except for systems which don't have a type that precisely
+ *       defined 8, 16, or 32 bit types (which this include file does
+ *       not support anyways). Default definitions have been given.
+ *
+ *   11) The criteria for defining (u)int_fast(*)_t isn't something I
+ *       would trust to any particular compiler vendor or the ANSI C
+ *       committee.  It is well known that "compatible systems" are
+ *       commonly created that have very different performance
+ *       characteristics from the systems they are compatible with,
+ *       especially those whose vendors make both the compiler and the
+ *       system.  Default definitions have been given, but its strongly
+ *       recommended that users never use these definitions for any
+ *       reason (they do *NOT* deliver any serious guarantee of
+ *       improved performance -- not in this file, nor any vendor's
+ *       stdint.h).
+ *
+ *   12) The following macros:
+ *
+ *       PRINTF_INTMAX_MODIFIER
+ *       PRINTF_INT64_MODIFIER
+ *       PRINTF_INT32_MODIFIER
+ *       PRINTF_INT16_MODIFIER
+ *       PRINTF_LEAST64_MODIFIER
+ *       PRINTF_LEAST32_MODIFIER
+ *       PRINTF_LEAST16_MODIFIER
+ *       PRINTF_INTPTR_MODIFIER
+ *
+ *       are strings which have been defined as the modifiers required
+ *       for the "d", "u" and "x" printf formats to correctly output
+ *       (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t,
+ *       (u)least32_t, (u)least16_t and (u)intptr_t types respectively.
+ *       PRINTF_INTPTR_MODIFIER is not defined for some systems which
+ *       provide their own stdint.h.  PRINTF_INT64_MODIFIER is not
+ *       defined if INT64_MAX is not defined.  These are an extension
+ *       beyond what C99 specifies must be in stdint.h.
+ *
+ *       In addition, the following macros are defined:
+ *
+ *       PRINTF_INTMAX_HEX_WIDTH
+ *       PRINTF_INT64_HEX_WIDTH
+ *       PRINTF_INT32_HEX_WIDTH
+ *       PRINTF_INT16_HEX_WIDTH
+ *       PRINTF_INT8_HEX_WIDTH
+ *       PRINTF_INTMAX_DEC_WIDTH
+ *       PRINTF_INT64_DEC_WIDTH
+ *       PRINTF_INT32_DEC_WIDTH
+ *       PRINTF_INT16_DEC_WIDTH
+ *       PRINTF_INT8_DEC_WIDTH
+ *
+ *       Which specifies the maximum number of characters required to
+ *       print the number of that type in either hexadecimal or decimal.
+ *       These are an extension beyond what C99 specifies must be in
+ *       stdint.h.
+ *
+ *  Compilers tested (all with 0 warnings at their highest respective
+ *  settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32
+ *  bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio
+ *  .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3
+ *
+ *  This file should be considered a work in progress.  Suggestions for
+ *  improvements, especially those which increase coverage are strongly
+ *  encouraged.
+ *
+ *  Acknowledgements
+ *
+ *  The following people have made significant contributions to the
+ *  development and testing of this file:
+ *
+ *  Chris Howie
+ *  John Steele Scott
+ *  Dave Thorup
+ *  John Dill
+ *  Florian Wobbe
+ *  Christopher Sean Morrison
+ *
+ */
+
+#include <stddef.h>
+#include <limits.h>
+#include <signal.h>
+
+/*
+ *  For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and
+ *  do nothing else.  On the Mac OS X version of gcc this is _STDINT_H_.
+ */
+
+#if ((defined(__STDC__) && __STDC__ && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (__GNUC__ > 3 || defined(_STDINT_H) || defined(_STDINT_H_) || defined (__UINT_FAST64_TYPE__)) )) && !defined (_PSTDINT_H_INCLUDED)
+#include <stdint.h>
+#define _PSTDINT_H_INCLUDED
+# if defined(__GNUC__) && (defined(__x86_64__) || defined(__ppc64__))
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "l"
+#  endif
+#  ifndef PRINTF_INT32_MODIFIER
+#   define PRINTF_INT32_MODIFIER ""
+#  endif
+# else
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+#  ifndef PRINTF_INT32_MODIFIER
+#   define PRINTF_INT32_MODIFIER "l"
+#  endif
+# endif
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+# ifndef PRINTF_INTMAX_MODIFIER
+#  define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
+# endif
+# ifndef PRINTF_INT64_HEX_WIDTH
+#  define PRINTF_INT64_HEX_WIDTH "16"
+# endif
+# ifndef PRINTF_INT32_HEX_WIDTH
+#  define PRINTF_INT32_HEX_WIDTH "8"
+# endif
+# ifndef PRINTF_INT16_HEX_WIDTH
+#  define PRINTF_INT16_HEX_WIDTH "4"
+# endif
+# ifndef PRINTF_INT8_HEX_WIDTH
+#  define PRINTF_INT8_HEX_WIDTH "2"
+# endif
+# ifndef PRINTF_INT64_DEC_WIDTH
+#  define PRINTF_INT64_DEC_WIDTH "20"
+# endif
+# ifndef PRINTF_INT32_DEC_WIDTH
+#  define PRINTF_INT32_DEC_WIDTH "10"
+# endif
+# ifndef PRINTF_INT16_DEC_WIDTH
+#  define PRINTF_INT16_DEC_WIDTH "5"
+# endif
+# ifndef PRINTF_INT8_DEC_WIDTH
+#  define PRINTF_INT8_DEC_WIDTH "3"
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
+# endif
+
+/*
+ *  Something really weird is going on with Open Watcom.  Just pull some of
+ *  these duplicated definitions from Open Watcom's stdint.h file for now.
+ */
+
+# if defined (__WATCOMC__) && __WATCOMC__ >= 1250
+#  if !defined (INT64_C)
+#   define INT64_C(x)   (x + (INT64_MAX - INT64_MAX))
+#  endif
+#  if !defined (UINT64_C)
+#   define UINT64_C(x)  (x + (UINT64_MAX - UINT64_MAX))
+#  endif
+#  if !defined (INT32_C)
+#   define INT32_C(x)   (x + (INT32_MAX - INT32_MAX))
+#  endif
+#  if !defined (UINT32_C)
+#   define UINT32_C(x)  (x + (UINT32_MAX - UINT32_MAX))
+#  endif
+#  if !defined (INT16_C)
+#   define INT16_C(x)   (x)
+#  endif
+#  if !defined (UINT16_C)
+#   define UINT16_C(x)  (x)
+#  endif
+#  if !defined (INT8_C)
+#   define INT8_C(x)   (x)
+#  endif
+#  if !defined (UINT8_C)
+#   define UINT8_C(x)  (x)
+#  endif
+#  if !defined (UINT64_MAX)
+#   define UINT64_MAX  18446744073709551615ULL
+#  endif
+#  if !defined (INT64_MAX)
+#   define INT64_MAX  9223372036854775807LL
+#  endif
+#  if !defined (UINT32_MAX)
+#   define UINT32_MAX  4294967295UL
+#  endif
+#  if !defined (INT32_MAX)
+#   define INT32_MAX  2147483647L
+#  endif
+#  if !defined (INTMAX_MAX)
+#   define INTMAX_MAX INT64_MAX
+#  endif
+#  if !defined (INTMAX_MIN)
+#   define INTMAX_MIN INT64_MIN
+#  endif
+# endif
+#endif
+
+#ifndef _PSTDINT_H_INCLUDED
+#define _PSTDINT_H_INCLUDED
+
+#ifndef SIZE_MAX
+# define SIZE_MAX (~(size_t)0)
+#endif
+
+/*
+ *  Deduce the type assignments from limits.h under the assumption that
+ *  integer sizes in bits are powers of 2, and follow the ANSI
+ *  definitions.
+ */
+
+#ifndef UINT8_MAX
+# define UINT8_MAX 0xff
+#endif
+#if !defined(uint8_t) && !defined(_UINT8_T)
+# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S)
+    typedef unsigned char uint8_t;
+#   define UINT8_C(v) ((uint8_t) v)
+# else
+#   error "Platform not supported"
+# endif
+#endif
+
+#ifndef INT8_MAX
+# define INT8_MAX 0x7f
+#endif
+#ifndef INT8_MIN
+# define INT8_MIN INT8_C(0x80)
+#endif
+#if !defined(int8_t) && !defined(_INT8_T)
+# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S)
+    typedef signed char int8_t;
+#   define INT8_C(v) ((int8_t) v)
+# else
+#   error "Platform not supported"
+# endif
+#endif
+
+#ifndef UINT16_MAX
+# define UINT16_MAX 0xffff
+#endif
+#if !defined(uint16_t) && !defined(_UINT16_T)
+#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S)
+  typedef unsigned int uint16_t;
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER ""
+# endif
+# define UINT16_C(v) ((uint16_t) (v))
+#elif (USHRT_MAX == UINT16_MAX)
+  typedef unsigned short uint16_t;
+# define UINT16_C(v) ((uint16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef INT16_MAX
+# define INT16_MAX 0x7fff
+#endif
+#ifndef INT16_MIN
+# define INT16_MIN INT16_C(0x8000)
+#endif
+#if !defined(int16_t) && !defined(_INT16_T)
+#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S)
+  typedef signed int int16_t;
+# define INT16_C(v) ((int16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER ""
+# endif
+#elif (SHRT_MAX == INT16_MAX)
+  typedef signed short int16_t;
+# define INT16_C(v) ((int16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef UINT32_MAX
+# define UINT32_MAX (0xffffffffUL)
+#endif
+#if !defined(uint32_t) && !defined(_UINT32_T)
+#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S)
+  typedef unsigned long uint32_t;
+# define UINT32_C(v) v ## UL
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER "l"
+# endif
+#elif (UINT_MAX == UINT32_MAX)
+  typedef unsigned int uint32_t;
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+# define UINT32_C(v) v ## U
+#elif (USHRT_MAX == UINT32_MAX)
+  typedef unsigned short uint32_t;
+# define UINT32_C(v) ((unsigned short) (v))
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef INT32_MAX
+# define INT32_MAX (0x7fffffffL)
+#endif
+#ifndef INT32_MIN
+# define INT32_MIN INT32_C(0x80000000)
+#endif
+#if !defined(int32_t) && !defined(_INT32_T)
+#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S)
+  typedef signed long int32_t;
+# define INT32_C(v) v ## L
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER "l"
+# endif
+#elif (INT_MAX == INT32_MAX)
+  typedef signed int int32_t;
+# define INT32_C(v) v
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#elif (SHRT_MAX == INT32_MAX)
+  typedef signed short int32_t;
+# define INT32_C(v) ((short) (v))
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+/*
+ *  The macro stdint_int64_defined is temporarily used to record
+ *  whether or not 64 integer support is available.  It must be
+ *  defined for any 64 integer extensions for new platforms that are
+ *  added.
+ */
+
+#undef stdint_int64_defined
+#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S)
+# if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined (S_SPLINT_S)
+#  define stdint_int64_defined
+   typedef long long int64_t;
+   typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# endif
+#endif
+
+#if !defined (stdint_int64_defined)
+# if defined(__GNUC__)
+#  define stdint_int64_defined
+   __extension__ typedef long long int64_t;
+   __extension__ typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S)
+#  define stdint_int64_defined
+   typedef long long int64_t;
+   typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC)
+#  define stdint_int64_defined
+   typedef __int64 int64_t;
+   typedef unsigned __int64 uint64_t;
+#  define UINT64_C(v) v ## UI64
+#  define  INT64_C(v) v ## I64
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "I64"
+#  endif
+# endif
+#endif
+
+#if !defined (LONG_LONG_MAX) && defined (INT64_C)
+# define LONG_LONG_MAX INT64_C (9223372036854775807)
+#endif
+#ifndef ULONG_LONG_MAX
+# define ULONG_LONG_MAX UINT64_C (18446744073709551615)
+#endif
+
+#if !defined (INT64_MAX) && defined (INT64_C)
+# define INT64_MAX INT64_C (9223372036854775807)
+#endif
+#if !defined (INT64_MIN) && defined (INT64_C)
+# define INT64_MIN INT64_C (-9223372036854775808)
+#endif
+#if !defined (UINT64_MAX) && defined (INT64_C)
+# define UINT64_MAX UINT64_C (18446744073709551615)
+#endif
+
+/*
+ *  Width of hexadecimal for number field.
+ */
+
+#ifndef PRINTF_INT64_HEX_WIDTH
+# define PRINTF_INT64_HEX_WIDTH "16"
+#endif
+#ifndef PRINTF_INT32_HEX_WIDTH
+# define PRINTF_INT32_HEX_WIDTH "8"
+#endif
+#ifndef PRINTF_INT16_HEX_WIDTH
+# define PRINTF_INT16_HEX_WIDTH "4"
+#endif
+#ifndef PRINTF_INT8_HEX_WIDTH
+# define PRINTF_INT8_HEX_WIDTH "2"
+#endif
+
+#ifndef PRINTF_INT64_DEC_WIDTH
+# define PRINTF_INT64_DEC_WIDTH "20"
+#endif
+#ifndef PRINTF_INT32_DEC_WIDTH
+# define PRINTF_INT32_DEC_WIDTH "10"
+#endif
+#ifndef PRINTF_INT16_DEC_WIDTH
+# define PRINTF_INT16_DEC_WIDTH "5"
+#endif
+#ifndef PRINTF_INT8_DEC_WIDTH
+# define PRINTF_INT8_DEC_WIDTH "3"
+#endif
+
+/*
+ *  Ok, lets not worry about 128 bit integers for now.  Moore's law says
+ *  we don't need to worry about that until about 2040 at which point
+ *  we'll have bigger things to worry about.
+ */
+
+#ifdef stdint_int64_defined
+  typedef int64_t intmax_t;
+  typedef uint64_t uintmax_t;
+# define  INTMAX_MAX   INT64_MAX
+# define  INTMAX_MIN   INT64_MIN
+# define UINTMAX_MAX  UINT64_MAX
+# define UINTMAX_C(v) UINT64_C(v)
+# define  INTMAX_C(v)  INT64_C(v)
+# ifndef PRINTF_INTMAX_MODIFIER
+#   define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
+# endif
+#else
+  typedef int32_t intmax_t;
+  typedef uint32_t uintmax_t;
+# define  INTMAX_MAX   INT32_MAX
+# define UINTMAX_MAX  UINT32_MAX
+# define UINTMAX_C(v) UINT32_C(v)
+# define  INTMAX_C(v)  INT32_C(v)
+# ifndef PRINTF_INTMAX_MODIFIER
+#   define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH
+# endif
+#endif
+
+/*
+ *  Because this file currently only supports platforms which have
+ *  precise powers of 2 as bit sizes for the default integers, the
+ *  least definitions are all trivial.  Its possible that a future
+ *  version of this file could have different definitions.
+ */
+
+#ifndef stdint_least_defined
+  typedef   int8_t   int_least8_t;
+  typedef  uint8_t  uint_least8_t;
+  typedef  int16_t  int_least16_t;
+  typedef uint16_t uint_least16_t;
+  typedef  int32_t  int_least32_t;
+  typedef uint32_t uint_least32_t;
+# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER
+# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER
+# define  UINT_LEAST8_MAX  UINT8_MAX
+# define   INT_LEAST8_MAX   INT8_MAX
+# define UINT_LEAST16_MAX UINT16_MAX
+# define  INT_LEAST16_MAX  INT16_MAX
+# define UINT_LEAST32_MAX UINT32_MAX
+# define  INT_LEAST32_MAX  INT32_MAX
+# define   INT_LEAST8_MIN   INT8_MIN
+# define  INT_LEAST16_MIN  INT16_MIN
+# define  INT_LEAST32_MIN  INT32_MIN
+# ifdef stdint_int64_defined
+    typedef  int64_t  int_least64_t;
+    typedef uint64_t uint_least64_t;
+#   define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER
+#   define UINT_LEAST64_MAX UINT64_MAX
+#   define  INT_LEAST64_MAX  INT64_MAX
+#   define  INT_LEAST64_MIN  INT64_MIN
+# endif
+#endif
+#undef stdint_least_defined
+
+/*
+ *  The ANSI C committee pretending to know or specify anything about
+ *  performance is the epitome of misguided arrogance.  The mandate of
+ *  this file is to *ONLY* ever support that absolute minimum
+ *  definition of the fast integer types, for compatibility purposes.
+ *  No extensions, and no attempt to suggest what may or may not be a
+ *  faster integer type will ever be made in this file.  Developers are
+ *  warned to stay away from these types when using this or any other
+ *  stdint.h.
+ */
+
+typedef   int_least8_t   int_fast8_t;
+typedef  uint_least8_t  uint_fast8_t;
+typedef  int_least16_t  int_fast16_t;
+typedef uint_least16_t uint_fast16_t;
+typedef  int_least32_t  int_fast32_t;
+typedef uint_least32_t uint_fast32_t;
+#define  UINT_FAST8_MAX  UINT_LEAST8_MAX
+#define   INT_FAST8_MAX   INT_LEAST8_MAX
+#define UINT_FAST16_MAX UINT_LEAST16_MAX
+#define  INT_FAST16_MAX  INT_LEAST16_MAX
+#define UINT_FAST32_MAX UINT_LEAST32_MAX
+#define  INT_FAST32_MAX  INT_LEAST32_MAX
+#define   INT_FAST8_MIN   INT_LEAST8_MIN
+#define  INT_FAST16_MIN  INT_LEAST16_MIN
+#define  INT_FAST32_MIN  INT_LEAST32_MIN
+#ifdef stdint_int64_defined
+  typedef  int_least64_t  int_fast64_t;
+  typedef uint_least64_t uint_fast64_t;
+# define UINT_FAST64_MAX UINT_LEAST64_MAX
+# define  INT_FAST64_MAX  INT_LEAST64_MAX
+# define  INT_FAST64_MIN  INT_LEAST64_MIN
+#endif
+
+#undef stdint_int64_defined
+
+/*
+ *  Whatever piecemeal, per compiler thing we can do about the wchar_t
+ *  type limits.
+ */
+
+#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__)
+# include <wchar.h>
+# ifndef WCHAR_MIN
+#  define WCHAR_MIN 0
+# endif
+# ifndef WCHAR_MAX
+#  define WCHAR_MAX ((wchar_t)-1)
+# endif
+#endif
+
+/*
+ *  Whatever piecemeal, per compiler/platform thing we can do about the
+ *  (u)intptr_t types and limits.
+ */
+
+#if (defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED)) || defined (_UINTPTR_T)
+# define STDINT_H_UINTPTR_T_DEFINED
+#endif
+
+#ifndef STDINT_H_UINTPTR_T_DEFINED
+# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64) || defined (__ppc64__)
+#  define stdint_intptr_bits 64
+# elif defined (__WATCOMC__) || defined (__TURBOC__)
+#  if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__)
+#    define stdint_intptr_bits 16
+#  else
+#    define stdint_intptr_bits 32
+#  endif
+# elif defined (__i386__) || defined (_WIN32) || defined (WIN32) || defined (__ppc64__)
+#  define stdint_intptr_bits 32
+# elif defined (__INTEL_COMPILER)
+/* TODO -- what did Intel do about x86-64? */
+# else
+/* #error "This platform might not be supported yet" */
+# endif
+
+# ifdef stdint_intptr_bits
+#  define stdint_intptr_glue3_i(a,b,c)  a##b##c
+#  define stdint_intptr_glue3(a,b,c)    stdint_intptr_glue3_i(a,b,c)
+#  ifndef PRINTF_INTPTR_MODIFIER
+#    define PRINTF_INTPTR_MODIFIER      stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER)
+#  endif
+#  ifndef PTRDIFF_MAX
+#    define PTRDIFF_MAX                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef PTRDIFF_MIN
+#    define PTRDIFF_MIN                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
+#  endif
+#  ifndef UINTPTR_MAX
+#    define UINTPTR_MAX                 stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef INTPTR_MAX
+#    define INTPTR_MAX                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef INTPTR_MIN
+#    define INTPTR_MIN                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
+#  endif
+#  ifndef INTPTR_C
+#    define INTPTR_C(x)                 stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x)
+#  endif
+#  ifndef UINTPTR_C
+#    define UINTPTR_C(x)                stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x)
+#  endif
+  typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t;
+  typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t)  intptr_t;
+# else
+/* TODO -- This following is likely wrong for some platforms, and does
+   nothing for the definition of uintptr_t. */
+  typedef ptrdiff_t intptr_t;
+# endif
+# define STDINT_H_UINTPTR_T_DEFINED
+#endif
+
+/*
+ *  Assumes sig_atomic_t is signed and we have a 2s complement machine.
+ */
+
+#ifndef SIG_ATOMIC_MAX
+# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1)
+#endif
+
+#endif
+
+#if defined (__TEST_PSTDINT_FOR_CORRECTNESS)
+
+/*
+ *  Please compile with the maximum warning settings to make sure macros are not
+ *  defined more than once.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#define glue3_aux(x,y,z) x ## y ## z
+#define glue3(x,y,z) glue3_aux(x,y,z)
+
+#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,) = glue3(UINT,bits,_C) (0);
+#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,) = glue3(INT,bits,_C) (0);
+
+#define DECL(us,bits) glue3(DECL,us,) (bits)
+
+#define TESTUMAX(bits) glue3(u,bits,) = ~glue3(u,bits,); if (glue3(UINT,bits,_MAX) != glue3(u,bits,)) printf ("Something wrong with UINT%d_MAX\n", bits)
+
+int main () {
+	DECL(I,8)
+	DECL(U,8)
+	DECL(I,16)
+	DECL(U,16)
+	DECL(I,32)
+	DECL(U,32)
+#ifdef INT64_MAX
+	DECL(I,64)
+	DECL(U,64)
+#endif
+	intmax_t imax = INTMAX_C(0);
+	uintmax_t umax = UINTMAX_C(0);
+	char str0[256], str1[256];
+
+	sprintf (str0, "%d %x\n", 0, ~0);
+
+	sprintf (str1, "%d %x\n",  i8, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1);
+	sprintf (str1, "%u %x\n",  u8, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1);
+	sprintf (str1, "%d %x\n",  i16, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1);
+	sprintf (str1, "%u %x\n",  u16, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1);
+	sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n",  i32, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1);
+	sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n",  u32, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1);
+#ifdef INT64_MAX
+	sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n",  i64, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1);
+#endif
+	sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n",  imax, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1);
+	sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n",  umax, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1);
+
+	TESTUMAX(8);
+	TESTUMAX(16);
+	TESTUMAX(32);
+#ifdef INT64_MAX
+	TESTUMAX(64);
+#endif
+
+	return EXIT_SUCCESS;
+}
+
+#endif
diff --git a/test/Makefile.am b/test/Makefile.am
new file mode 100644
index 0000000..30a5e31
--- /dev/null
+++ b/test/Makefile.am
@@ -0,0 +1,15 @@
+
+TESTS = check_ahtable check_hattrie
+check_PROGRAMS = check_ahtable check_hattrie bench_sorted_iter
+
+check_ahtable_SOURCES  = check_ahtable.c str_map.c
+check_ahtable_LDADD    = $(top_builddir)/src/libhat-trie.la
+check_ahtable_CPPFLAGS = -I$(top_builddir)/src
+
+check_hattrie_SOURCES  = check_hattrie.c str_map.c
+check_hattrie_LDADD    = $(top_builddir)/src/libhat-trie.la
+check_hattrie_CPPFLAGS = -I$(top_builddir)/src
+
+bench_sorted_iter_SOURCES  = bench_sorted_iter.c
+bench_sorted_iter_LDADD    = $(top_builddir)/src/libhat-trie.la
+bench_sorted_iter_CPPFLAGS = -I$(top_builddir)/src
diff --git a/test/bench_sorted_iter.c b/test/bench_sorted_iter.c
new file mode 100644
index 0000000..0271bcb
--- /dev/null
+++ b/test/bench_sorted_iter.c
@@ -0,0 +1,69 @@
+
+/* A quick test of the degree to which ordered iteration is slower than unordered. */
+
+#include "../src/hat-trie.h"
+#include <stdio.h>
+#include <time.h>
+
+
+/* Simple random string generation. */
+void randstr(char* x, size_t len)
+{
+    x[len] = '\0';
+    while (len > 0) {
+        x[--len] = '\x20' + (rand() % ('\x7e' - '\x20' + 1));
+    }
+}
+
+int main()
+{
+    hattrie_t* T = hattrie_create();
+    const size_t n = 1000000;  // how many strings
+    const size_t m_low  = 50;  // minimum length of each string
+    const size_t m_high = 500; // maximum length of each string
+    char x[501];
+
+    size_t i, m;
+    for (i = 0; i < n; ++i) {
+        m = m_low + rand() % (m_high - m_low);
+        randstr(x, m);
+        *hattrie_get(T, x, m) = 1;
+    }
+
+    hattrie_iter_t* it;
+    clock_t t0, t;
+    const size_t repetitions = 100;
+    size_t r;
+
+    /* iterate in unsorted order */
+    fprintf(stderr, "iterating out of order ... ");
+    t0 = clock();
+    for (r = 0; r < repetitions; ++r) {
+        it = hattrie_iter_begin(T, false);
+        while (!hattrie_iter_finished(it)) {
+            hattrie_iter_next(it);
+        }
+        hattrie_iter_free(it);
+    }
+    t = clock();
+    fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC);
+
+
+    /* iterate in sorted order */
+    fprintf(stderr, "iterating in order ... ");
+    t0 = clock();
+    for (r = 0; r < repetitions; ++r) {
+        it = hattrie_iter_begin(T, true);
+        while (!hattrie_iter_finished(it)) {
+            hattrie_iter_next(it);
+        }
+        hattrie_iter_free(it);
+    }
+    t = clock();
+    fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC);
+
+
+    hattrie_free(T);
+
+    return 0;
+}
diff --git a/test/check_ahtable.c b/test/check_ahtable.c
new file mode 100644
index 0000000..f61132b
--- /dev/null
+++ b/test/check_ahtable.c
@@ -0,0 +1,222 @@
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "str_map.h"
+#include "../src/ahtable.h"
+
+/* Simple random string generation. */
+void randstr(char* x, size_t len)
+{
+    x[len] = '\0';
+    while (len > 0) {
+        x[--len] = '\x20' + (rand() % ('\x7e' - '\x20' + 1));
+    }
+}
+
+
+const size_t n = 100000;  // how many unique strings
+const size_t m_low  = 50;  // minimum length of each string
+const size_t m_high = 500; // maximum length of each string
+const size_t k = 200000;  // number of insertions
+char** xs;
+
+ahtable_t* T;
+str_map* M;
+
+
+void setup()
+{
+    fprintf(stderr, "generating %zu keys ... ", n);
+    xs = malloc(n * sizeof(char*));
+    size_t i;
+    size_t m;
+    for (i = 0; i < n; ++i) {
+        m = m_low + rand() % (m_high - m_low);
+        xs[i] = malloc(m + 1);
+        randstr(xs[i], m);
+    }
+
+    T = ahtable_create();
+    M = str_map_create();
+    fprintf(stderr, "done.\n");
+}
+
+
+void teardown()
+{
+    ahtable_free(T);
+    str_map_destroy(M);
+
+    size_t i;
+    for (i = 0; i < n; ++i) {
+        free(xs[i]);
+    }
+    free(xs);
+}
+
+
+void test_ahtable_insert()
+{
+    fprintf(stderr, "inserting %zu keys ... \n", k);
+
+    size_t i, j;
+    value_t* u;
+    value_t  v;
+
+    for (j = 0; j < k; ++j) {
+        i = rand() % n;
+
+
+        v = 1 + str_map_get(M, xs[i], strlen(xs[i]));
+        str_map_set(M, xs[i], strlen(xs[i]), v);
+
+
+        u = ahtable_get(T, xs[i], strlen(xs[i]));
+        *u += 1;
+
+
+        if (*u != v) {
+            fprintf(stderr, "[error] tally mismatch (reported: %lu, correct: %lu)\n",
+                            *u, v);
+        }
+    }
+
+    fprintf(stderr, "sizeof: %zu\n", ahtable_sizeof(T));
+
+    /* delete some keys */
+    for (j = 0; i < k/100; ++j) {
+        i = rand() % n;
+        ahtable_del(T, xs[i], strlen(xs[i]));
+        str_map_del(M, xs[i], strlen(xs[i]));
+        u = ahtable_tryget(T, xs[i], strlen(xs[i]));
+        if (u) {
+            fprintf(stderr, "[error] deleted node found in ahtable\n");
+        }
+    }
+
+    fprintf(stderr, "done.\n");
+}
+
+
+void test_ahtable_iteration()
+{
+    fprintf(stderr, "iterating through %zu keys ... \n", k);
+
+    ahtable_iter_t* i = ahtable_iter_begin(T, false);
+
+    size_t count = 0;
+    value_t* u;
+    value_t  v;
+
+    size_t len;
+    const char* key;
+
+    while (!ahtable_iter_finished(i)) {
+        ++count;
+
+        key = ahtable_iter_key(i, &len);
+        u   = ahtable_iter_val(i);
+        v   = str_map_get(M, key, len);
+
+        if (*u != v) {
+            if (v == 0) {
+                fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v);
+            }
+            else {
+                fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v);
+            }
+        }
+
+        // this way we will see an error if the same key is iterated through
+        // twice
+        str_map_set(M, key, len, 0);
+
+        ahtable_iter_next(i);
+    }
+
+    if (count != M->m) {
+        fprintf(stderr, "[error] iterated through %zu element, expected %zu\n",
+                count, M->m);
+    }
+
+    ahtable_iter_free(i);
+
+    fprintf(stderr, "done.\n");
+}
+
+
+int cmpkey(const char* a, size_t ka, const char* b, size_t kb)
+{
+    int c = memcmp(a, b, ka < kb ? ka : kb);
+    return c == 0 ? (int) ka - (int) kb : c;
+}
+
+
+void test_ahtable_sorted_iteration()
+{
+    fprintf(stderr, "iterating in order through %zu keys ... \n", k);
+
+    ahtable_iter_t* i = ahtable_iter_begin(T, true);
+
+    size_t count = 0;
+    value_t* u;
+    value_t  v;
+
+    char* prev_key = malloc(m_high + 1);
+    size_t prev_len = 0;
+
+    const char *key = NULL;
+    size_t len = 0;
+
+    while (!ahtable_iter_finished(i)) {
+        memcpy(prev_key, key, len);
+        prev_len = len;
+        ++count;
+
+        key = ahtable_iter_key(i, &len);
+        if (prev_key != NULL && cmpkey(prev_key, prev_len, key, len) > 0) {
+            fprintf(stderr, "[error] iteration is not correctly ordered.\n");
+        }
+
+        u  = ahtable_iter_val(i);
+        v  = str_map_get(M, key, len);
+
+        if (*u != v) {
+            if (v == 0) {
+                fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v);
+            }
+            else {
+                fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v);
+            }
+        }
+
+        // this way we will see an error if the same key is iterated through
+        // twice
+        str_map_set(M, key, len, 0);
+
+        ahtable_iter_next(i);
+    }
+
+    ahtable_iter_free(i);
+    free(prev_key);
+
+    fprintf(stderr, "done.\n");
+}
+
+
+int main()
+{
+    setup();
+    test_ahtable_insert();
+    test_ahtable_iteration();
+    teardown();
+
+    setup();
+    test_ahtable_insert();
+    test_ahtable_sorted_iteration();
+    teardown();
+
+    return 0;
+}
diff --git a/test/check_hattrie.c b/test/check_hattrie.c
new file mode 100644
index 0000000..5bb6b38
--- /dev/null
+++ b/test/check_hattrie.c
@@ -0,0 +1,270 @@
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "str_map.h"
+#include "../src/hat-trie.h"
+
+/* Simple random string generation. */
+void randstr(char* x, size_t len)
+{
+    x[len] = '\0';
+    while (len > 0) {
+        x[--len] = '\x20' + (rand() % ('\x7e' - '\x20' + 1));
+    }
+}
+
+const size_t n = 100000;  // how many unique strings
+const size_t m_low  = 50;  // minimum length of each string
+const size_t m_high = 500; // maximum length of each string
+const size_t k = 200000;  // number of insertions
+const size_t d = 50000;
+
+char** xs;
+char** ds;
+
+hattrie_t* T;
+str_map* M;
+
+
+void setup()
+{
+    fprintf(stderr, "generating %zu keys ... ", n);
+    xs = malloc(n * sizeof(char*));
+    ds = malloc(d * sizeof(char*));
+    size_t i;
+    size_t m;
+    for (i = 0; i < n; ++i) {
+        m = m_low + rand() % (m_high - m_low);
+        xs[i] = malloc(m + 1);
+        randstr(xs[i], m);
+    }
+    for (i = 0; i < d; ++i) {
+        m = rand()%n;
+        ds[i] = xs[m];
+    }
+
+    T = hattrie_create();
+    M = str_map_create();
+    fprintf(stderr, "done.\n");
+}
+
+
+void teardown()
+{
+    hattrie_free(T);
+    str_map_destroy(M);
+
+    size_t i;
+    for (i = 0; i < n; ++i) {
+        free(xs[i]);
+    }
+    free(xs);
+    free(ds);
+}
+
+
+void test_hattrie_insert()
+{
+    fprintf(stderr, "inserting %zu keys ... \n", k);
+
+    size_t i, j;
+    value_t* u;
+    value_t  v;
+
+    for (j = 0; j < k; ++j) {
+        i = rand() % n;
+
+
+        v = 1 + str_map_get(M, xs[i], strlen(xs[i]));
+        str_map_set(M, xs[i], strlen(xs[i]), v);
+
+
+        u = hattrie_get(T, xs[i], strlen(xs[i]));
+        *u += 1;
+
+
+        if (*u != v) {
+            fprintf(stderr, "[error] tally mismatch (reported: %lu, correct: %lu)\n",
+                            *u, v);
+        }
+    }
+
+    fprintf(stderr, "sizeof: %zu\n", hattrie_sizeof(T));
+
+    fprintf(stderr, "deleting %zu keys ... \n", d);
+    for (j = 0; j < d; ++j) {
+        str_map_del(M, ds[j], strlen(ds[j]));
+        hattrie_del(T, ds[j], strlen(ds[j]));
+        u = hattrie_tryget(T, ds[j], strlen(ds[j]));
+        if (u) {
+            fprintf(stderr, "[error] item %zu still found in trie after delete\n",
+                    j);
+        }
+    }
+
+    fprintf(stderr, "done.\n");
+}
+
+
+
+void test_hattrie_iteration()
+{
+    fprintf(stderr, "iterating through %zu keys ... \n", k);
+
+    hattrie_iter_t* i = hattrie_iter_begin(T, false);
+
+    size_t count = 0;
+    value_t* u;
+    value_t  v;
+
+    size_t len;
+    const char* key;
+
+    while (!hattrie_iter_finished(i)) {
+        ++count;
+
+        key = hattrie_iter_key(i, &len);
+        u   = hattrie_iter_val(i);
+
+        v = str_map_get(M, key, len);
+
+        if (*u != v) {
+            if (v == 0) {
+                fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v);
+            }
+            else {
+                fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v);
+            }
+        }
+
+        // this way we will see an error if the same key is iterated through
+        // twice
+        str_map_set(M, key, len, 0);
+
+        hattrie_iter_next(i);
+    }
+
+    if (count != M->m) {
+        fprintf(stderr, "[error] iterated through %zu element, expected %zu\n",
+                count, M->m);
+    }
+
+    hattrie_iter_free(i);
+
+    fprintf(stderr, "done.\n");
+}
+
+
+int cmpkey(const char* a, size_t ka, const char* b, size_t kb)
+{
+    int c = memcmp(a, b, ka < kb ? ka : kb);
+    return c == 0 ? (int) ka - (int) kb : c;
+}
+
+
+void test_hattrie_sorted_iteration()
+{
+    fprintf(stderr, "iterating in order through %zu keys ... \n", k);
+
+    hattrie_iter_t* i = hattrie_iter_begin(T, true);
+
+    size_t count = 0;
+    value_t* u;
+    value_t  v;
+
+    char* key_copy = malloc(m_high + 1);
+    char* prev_key = malloc(m_high + 1);
+    memset(prev_key, 0, m_high + 1);
+    size_t prev_len = 0;
+
+    const char *key = NULL;
+    size_t len = 0;
+
+    while (!hattrie_iter_finished(i)) {
+        memcpy(prev_key, key_copy, len);
+        prev_key[len] = '\0';
+        prev_len = len;
+        ++count;
+
+        key = hattrie_iter_key(i, &len);
+
+        /* memory for key may be changed on iter, copy it */
+        strncpy(key_copy, key, len);
+
+        if (prev_key != NULL && cmpkey(prev_key, prev_len, key, len) > 0) {
+            fprintf(stderr, "[error] iteration is not correctly ordered.\n");
+        }
+
+        u = hattrie_iter_val(i);
+        v = str_map_get(M, key, len);
+
+        if (*u != v) {
+            if (v == 0) {
+                fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v);
+            }
+            else {
+                fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v);
+            }
+        }
+
+        // this way we will see an error if the same key is iterated through
+        // twice
+        str_map_set(M, key, len, 0);
+
+        hattrie_iter_next(i);
+    }
+
+    if (count != M->m) {
+        fprintf(stderr, "[error] iterated through %zu element, expected %zu\n",
+                count, M->m);
+    }
+
+    hattrie_iter_free(i);
+    free(prev_key);
+    free(key_copy);
+
+    fprintf(stderr, "done.\n");
+}
+
+
+void test_trie_non_ascii()
+{
+    fprintf(stderr, "checking non-ascii... \n");
+
+    value_t* u;
+    hattrie_t* T = hattrie_create();
+    char* txt = "\x81\x70";
+
+    u = hattrie_get(T, txt, strlen(txt));
+    *u = 10;
+
+    u = hattrie_tryget(T, txt, strlen(txt));
+    if (*u != 10){
+        fprintf(stderr, "can't store non-ascii strings\n");
+    }
+    hattrie_free(T);
+
+    fprintf(stderr, "done.\n");
+}
+
+
+
+
+int main()
+{
+    test_trie_non_ascii();
+
+    setup();
+    test_hattrie_insert();
+    test_hattrie_iteration();
+    teardown();
+
+    setup();
+    test_hattrie_insert();
+    test_hattrie_sorted_iteration();
+    teardown();
+
+    return 0;
+}
diff --git a/test/str_map.c b/test/str_map.c
new file mode 100644
index 0000000..68303a3
--- /dev/null
+++ b/test/str_map.c
@@ -0,0 +1,241 @@
+
+/*
+ * This file is part of fastq-tools.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones at cs.washington.edu>
+ *
+ */
+
+
+#include "str_map.h"
+#include "misc.h"
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+
+static const size_t INITIAL_TABLE_SIZE = 16;
+static const double MAX_LOAD = 0.77;
+
+
+/*
+ * Paul Hsieh's SuperFastHash
+ * http://www.azillionmonkeys.com/qed/hash.html
+ */
+
+
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
+    || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
+        +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+
+static uint32_t hash(const char * data, size_t len) {
+    uint32_t hash = len, tmp;
+    int rem;
+
+    if (len <= 0 || data == NULL) return 0;
+
+    rem = len & 3;
+    len >>= 2;
+
+    /* Main loop */
+    for (;len > 0; len--) {
+        hash  += get16bits (data);
+        tmp    = (get16bits (data+2) << 11) ^ hash;
+        hash   = (hash << 16) ^ tmp;
+        data  += 2*sizeof (uint16_t);
+        hash  += hash >> 11;
+    }
+
+    /* Handle end cases */
+    switch (rem) {
+        case 3: hash += get16bits (data);
+                hash ^= hash << 16;
+                hash ^= data[sizeof (uint16_t)] << 18;
+                hash += hash >> 11;
+                break;
+        case 2: hash += get16bits (data);
+                hash ^= hash << 11;
+                hash += hash >> 17;
+                break;
+        case 1: hash += *data;
+                hash ^= hash << 10;
+                hash += hash >> 1;
+    }
+
+    /* Force "avalanching" of final 127 bits */
+    hash ^= hash << 3;
+    hash += hash >> 5;
+    hash ^= hash << 4;
+    hash += hash >> 17;
+    hash ^= hash << 25;
+    hash += hash >> 6;
+
+    return hash;
+}
+
+
+
+static void rehash(str_map* T, size_t new_n);
+static void clear(str_map*);
+
+
+
+str_map* str_map_create()
+{
+    str_map* T = malloc_or_die(sizeof(str_map));
+    T->A = malloc_or_die(INITIAL_TABLE_SIZE * sizeof(str_map_pair*));
+    memset(T->A, 0, INITIAL_TABLE_SIZE * sizeof(str_map_pair*));
+    T->n = INITIAL_TABLE_SIZE;
+    T->m = 0;
+    T->max_m = T->n * MAX_LOAD;
+
+    return T;
+}
+
+
+void str_map_destroy(str_map* T)
+{
+    if (T != NULL) {
+        clear(T);
+        free(T->A);
+        free(T);
+    }
+}
+
+
+
+void clear(str_map* T)
+{
+    str_map_pair* u;
+    size_t i;
+    for (i = 0; i < T->n; i++) {
+        while (T->A[i]) {
+            u = T->A[i]->next;
+            free(T->A[i]->key);
+            free(T->A[i]);
+            T->A[i] = u;
+        }
+    }
+
+    T->m = 0;
+}
+
+
+static void insert_without_copy(str_map* T, str_map_pair* V)
+{
+    uint32_t h = hash(V->key, V->keylen) % T->n;
+    V->next = T->A[h];
+    T->A[h] = V;
+    T->m++;
+}
+
+
+
+static void rehash(str_map* T, size_t new_n)
+{
+    str_map U;
+    U.n = new_n;
+    U.m = 0;
+    U.max_m = U.n * MAX_LOAD;
+    U.A = malloc_or_die(U.n * sizeof(str_map_pair*));
+    memset(U.A, 0, U.n * sizeof(str_map_pair*));
+
+    str_map_pair *j, *k;
+    size_t i;
+    for (i = 0; i < T->n; i++) {
+        j = T->A[i];
+        while (j) {
+            k = j->next;
+            insert_without_copy(&U, j);
+            j = k;
+        }
+        T->A[i] = NULL;
+    }
+
+    free(T->A);
+    T->A = U.A;
+    T->n = U.n;
+    T->max_m = U.max_m;
+}
+
+
+void str_map_set(str_map* T, const char* key, size_t keylen, value_t value)
+{
+    if (T->m >= T->max_m) rehash(T, T->n * 2);
+
+    uint32_t h = hash(key, keylen) % T->n;
+
+    str_map_pair* u = T->A[h];
+
+    while (u) {
+        if (u->keylen == keylen && memcmp(u->key, key, keylen) == 0) {
+            u->value = value;
+            return;
+        }
+
+        u = u->next;
+    }
+
+    u = malloc_or_die(sizeof(str_map_pair));
+    u->key = malloc_or_die(keylen);
+    memcpy(u->key, key, keylen);
+    u->keylen = keylen;
+    u->value  = value;
+
+    u->next = T->A[h];
+    T->A[h] = u;
+
+    T->m++;
+}
+
+
+value_t str_map_get(const str_map* T, const char* key, size_t keylen)
+{
+    uint32_t h = hash(key, keylen) % T->n;
+
+    str_map_pair* u = T->A[h];
+
+    while (u) {
+        if (u->keylen == keylen && memcmp(u->key, key, keylen) == 0) {
+            return u->value;
+        }
+
+        u = u->next;
+    }
+
+    return 0;
+}
+
+void str_map_del(str_map* T, const char* key, size_t keylen)
+{
+    uint32_t h = hash(key, keylen) % T->n;
+
+    str_map_pair* u = T->A[h];
+    str_map_pair* p = NULL;
+    while (u) {
+        
+        if (u->keylen == keylen && memcmp(u->key, key, keylen) == 0) {
+            if (p) {
+                p->next = u->next;
+            } else {
+                T->A[h] = u->next;
+            }
+            free(u->key);
+            free(u);
+            --T->m;
+            return;
+        }
+
+        p = u;
+        u = u->next;
+    }
+
+}
+
diff --git a/test/str_map.h b/test/str_map.h
new file mode 100644
index 0000000..7d000d5
--- /dev/null
+++ b/test/str_map.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones at cs.washington.edu>
+ *
+ * hash :
+ * A quick and simple hash table mapping strings to things.
+ *
+ */
+
+
+#ifndef ISOLATOR_STR_MAP_H
+#define ISOLATOR_STR_MAP_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <stdlib.h>
+#include <stdint.h>
+
+#include "common.h"
+
+
+typedef struct str_map_pair_
+{
+    char*         key;
+    size_t        keylen;
+    value_t       value;
+
+    struct str_map_pair_* next;
+} str_map_pair;
+
+
+typedef struct
+{
+    str_map_pair** A; /* table proper */
+    size_t n;         /* table size */
+    size_t m;         /* hashed items */
+    size_t max_m;     /* max hashed items before rehash */
+} str_map;
+
+
+
+str_map* str_map_create(void);
+void     str_map_destroy(str_map*);
+void     str_map_set(str_map*, const char* key, size_t keylen, value_t value);
+value_t  str_map_get(const str_map*, const char* key, size_t keylen);
+void     str_map_del(str_map* T, const char* key, size_t keylen);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/hat-trie.git



More information about the debian-med-commit mailing list