[med-svn] [khmer] 01/06: Imported Upstream version 1.4+dfsg

Michael Crusoe misterc-guest at moszumanska.debian.org
Thu May 28 00:34:06 UTC 2015


This is an automated email from the git hooks/post-receive script.

misterc-guest pushed a commit to branch master
in repository khmer.

commit f031ff07d8530f0fdc544100cad742d511757261
Author: Michael R. Crusoe <michael.crusoe at gmail.com>
Date:   Tue May 26 16:44:24 2015 -0400

    Imported Upstream version 1.4+dfsg
---
 .gitignore                                         |    3 +
 CONTRIBUTING.md                                    |    2 +
 ChangeLog                                          |  712 ++++-
 Doxyfile.in                                        |    2 +-
 Makefile                                           |   90 +-
 README.rst                                         |   23 +-
 doc/{LICENSE.txt => LICENSE.rst}                   |    0
 doc/Makefile.bak                                   |    4 -
 doc/README.html                                    |  403 ---
 doc/{citations.txt => citations.rst}               |    0
 doc/conf.py                                        |   25 +-
 doc/{contributors.txt => contributors.rst}         |    0
 .../{CODE_OF_CONDUCT.txt => CODE_OF_CONDUCT.rst}   |    0
 ...to-testing.txt => a-quick-guide-to-testing.rst} |    5 +
 doc/dev/{codebase-guide.txt => codebase-guide.rst} |    0
 ...review.txt => coding-guidelines-and-review.rst} |   39 +-
 doc/dev/{crazy-ideas.txt => crazy-ideas.rst}       |    0
 doc/dev/{details.txt => details.rst}               |    0
 doc/dev/{development.txt => development.rst}       |    0
 ...mer-developers.txt => for-khmer-developers.rst} |    0
 .../{getting-started.txt => getting-started.rst}   |   68 +-
 doc/dev/{hackathon.txt => hackathon.rst}           |    0
 doc/dev/{index.txt => index.rst}                   |    3 +-
 doc/dev/{release.txt => release.rst}               |   17 +-
 doc/dev/scripts-and-sandbox.rst                    |  120 +
 doc/{index.txt => index.rst}                       |    8 +-
 doc/introduction.rst                               |   99 +
 doc/introduction.txt                               |   93 -
 doc/release-notes/index.rst                        |   12 +
 doc/release-notes/release-1.0.1.md                 |    2 +
 doc/release-notes/release-1.0.1.rst                |   71 +
 doc/release-notes/release-1.0.md                   |    2 +
 doc/release-notes/release-1.0.rst                  |  110 +
 doc/release-notes/release-1.1.md                   |    2 +
 doc/release-notes/release-1.1.rst                  |   77 +
 doc/release-notes/release-1.2.md                   |   12 +-
 doc/release-notes/release-1.2.rst                  |  106 +
 doc/release-notes/release-1.3.md                   |    2 +
 doc/release-notes/release-1.3.rst                  |   62 +
 doc/release-notes/release-1.4.md                   |  237 ++
 doc/release-notes/release-1.4.rst                  |  257 ++
 doc/requirements.txt                               |    1 +
 doc/{roadmap.txt => roadmap.rst}                   |    0
 doc/user/{biblio.txt => biblio.rst}                |    0
 doc/user/{blog-posts.txt => blog-posts.rst}        |   10 +-
 ...ng-table-sizes.txt => choosing-table-sizes.rst} |    8 +-
 doc/user/{examples.txt => examples.rst}            |    0
 doc/user/{galaxy.txt => galaxy.rst}                |    0
 doc/user/getting-help.rst                          |   54 +
 doc/user/{guide.txt => guide.rst}                  |    6 +-
 doc/user/{index.txt => index.rst}                  |    1 +
 doc/user/{install.txt => install.rst}              |    4 +-
 doc/user/{known-issues.txt => known-issues.rst}    |    0
 ...ning-big-data.txt => partitioning-big-data.rst} |    0
 doc/user/{scripts.txt => scripts.rst}              |    6 +
 examples/stamps/do.sh                              |   44 +-
 jenkins-build.sh                                   |   14 +-
 khmer/__init__.py                                  |  131 +-
 khmer/_khmermodule.cc                              | 2758 ++++++++++++--------
 khmer/_version.py                                  |  110 +-
 khmer/{file.py => kfile.py}                        |   66 +-
 khmer/khmer_args.py                                |   31 +-
 khmer/load_pe.py                                   |   53 -
 khmer/thread_utils.py                              |   28 +-
 khmer/utils.py                                     |  155 +-
 lib/.check_openmp.cc                               |   17 +
 lib/.gitignore                                     |   11 +-
 lib/Makefile                                       |  342 ++-
 lib/counting.cc                                    |  118 +-
 lib/counting.hh                                    |   16 +-
 lib/get_version.py                                 |   22 +-
 lib/graphtest.cc                                   |    2 +-
 lib/hashbits.cc                                    |   55 +-
 lib/hashbits.hh                                    |   14 +-
 lib/hashtable.cc                                   |  138 +-
 lib/hashtable.hh                                   |   33 +-
 lib/hllcounter.cc                                  |  461 ++++
 lib/hllcounter.hh                                  |  221 ++
 lib/ht-diff.cc                                     |    2 +-
 lib/khmer.hh                                       |    7 +-
 lib/khmer.pc.in                                    |   14 +
 lib/khmer_exception.hh                             |   37 +-
 lib/kmer_hash.cc                                   |   66 +-
 lib/kmer_hash.hh                                   |   10 +-
 lib/labelhash.cc                                   |    2 +-
 lib/labelhash.hh                                   |    2 +-
 lib/perf_metrics.cc                                |    2 +-
 lib/perf_metrics.hh                                |    2 +-
 lib/primes.hh                                      |    2 +-
 lib/read_aligner.cc                                |    4 +-
 lib/read_aligner.hh                                |    2 +-
 lib/read_parsers.cc                                |   60 +-
 lib/read_parsers.hh                                |   44 +-
 lib/subset.cc                                      |   23 +-
 lib/subset.hh                                      |    2 +-
 lib/test-CacheManager.cc                           |  189 --
 lib/test-Colors.cc                                 |    2 +-
 lib/test-HashTables.cc                             |    2 +-
 lib/test-Parser.cc                                 |    2 +-
 lib/test-StreamReader.cc                           |  173 --
 lib/test-compile.cc                                |   17 +
 lib/{test_read_aligner.cc => test-read-aligner.cc} |    4 +-
 lib/trace_logger.cc                                |    2 +-
 lib/trace_logger.hh                                |    2 +-
 sandbox/README.rst                                 |   56 +-
 sandbox/abundance-hist-by-position.py              |    2 +-
 sandbox/assembly-diff-2.py                         |    2 +-
 sandbox/assembly-diff.py                           |    2 +-
 sandbox/assemstats3.py                             |    2 +-
 ...intersection.py => bloom-count-intersection.py} |    2 +-
 sandbox/{bloom_count.py => bloom-count.py}         |    2 +-
 sandbox/build-sparse-graph.py                      |    2 +-
 sandbox/calc-best-assembly.py                      |    6 +-
 sandbox/calc-error-profile.py                      |    6 +-
 sandbox/calc-median-distribution.py                |    2 +-
 sandbox/collect-reads.py                           |   29 +-
 sandbox/collect-variants.py                        |   13 +-
 sandbox/combine-pe.py                              |   66 -
 sandbox/compare-partitions.py                      |   68 -
 sandbox/correct-errors.py                          |   21 +-
 sandbox/count-within-radius.py                     |   60 -
 sandbox/degree-by-position.py                      |   47 -
 sandbox/dn-identify-errors.py                      |  147 --
 sandbox/ec.py                                      |   60 -
 sandbox/error-correct-pass2.py                     |   85 -
 sandbox/extract-single-partition.py                |    2 +-
 sandbox/fasta-to-abundance-hist.py                 |    2 +-
 sandbox/filter-below-abund.py                      |    2 +-
 sandbox/filter-median-and-pct.py                   |    4 +-
 sandbox/filter-median.py                           |    4 +-
 sandbox/find-high-abund-kmers.py                   |    2 +-
 sandbox/find-unpart.py                             |   53 -
 sandbox/graph-size.py                              |    2 +-
 sandbox/hi-lo-abundance-by-position.py             |    2 +-
 sandbox/make-coverage.py                           |   47 +
 sandbox/multi-rename.py                            |    2 +-
 sandbox/normalize-by-align.py                      |  150 --
 sandbox/normalize-by-median-pct.py                 |    6 +-
 sandbox/print-stoptags.py                          |    2 +-
 sandbox/print-tagset.py                            |    2 +-
 sandbox/read_aligner.py                            |   64 -
 sandbox/readstats.py                               |   51 -
 sandbox/renumber-partitions.py                     |    2 +-
 sandbox/saturate-by-median.py                      |   27 +-
 sandbox/shuffle-fasta.py                           |   27 -
 sandbox/shuffle-reverse-rotary.py                  |    2 +-
 sandbox/slice-reads-by-coverage.py                 |   11 +-
 sandbox/split-fasta.py                             |    2 +-
 sandbox/split-sequences-by-length.py               |    4 +-
 sandbox/stoptag-abundance-hist.py                  |    2 +-
 sandbox/stoptags-by-position.py                    |    2 +-
 sandbox/strip-partition.py                         |    2 +-
 sandbox/subset-report.py                           |    2 +-
 sandbox/sweep-files.py                             |   10 +-
 sandbox/sweep-out-reads-with-contigs.py            |    2 +-
 sandbox/sweep-reads.py                             |   38 +-
 sandbox/sweep-reads2.py                            |    2 +-
 sandbox/sweep-reads3.py                            |    6 +-
 sandbox/to-casava-1.8-fastq.py                     |   61 -
 sandbox/trim-low-abund.py                          |  236 --
 sandbox/uniqify-sequences.py                       |   67 -
 sandbox/unique-kmers.py                            |   96 +
 sandbox/write-interleave.py                        |   29 -
 sandbox/write-trimmomatic.py                       |    2 +-
 scripts/abundance-dist-single.py                   |   29 +-
 scripts/abundance-dist.py                          |   35 +-
 scripts/annotate-partitions.py                     |   14 +-
 scripts/count-median.py                            |   58 +-
 scripts/count-overlap.py                           |   29 +-
 scripts/do-partition.py                            |   24 +-
 scripts/extract-long-sequences.py                  |   24 +-
 scripts/extract-paired-reads.py                    |   83 +-
 scripts/extract-partitions.py                      |   30 +-
 scripts/fastq-to-fasta.py                          |    5 +-
 scripts/filter-abund-single.py                     |   14 +-
 scripts/filter-abund.py                            |   21 +-
 scripts/filter-stoptags.py                         |   14 +-
 scripts/find-knots.py                              |   17 +-
 scripts/galaxy/gedlab.py                           |    4 +-
 scripts/interleave-reads.py                        |   65 +-
 scripts/load-graph.py                              |   23 +-
 scripts/load-into-counting.py                      |   54 +-
 scripts/make-initial-stoptags.py                   |    8 +-
 scripts/merge-partitions.py                        |   12 +-
 scripts/normalize-by-median.py                     |  215 +-
 scripts/partition-graph.py                         |   14 +-
 scripts/readstats.py                               |  184 ++
 scripts/sample-reads-randomly.py                   |   73 +-
 scripts/split-paired-reads.py                      |  125 +-
 scripts/trim-low-abund.py                          |  331 +++
 setup.cfg                                          |    4 +-
 setup.py                                           |   92 +-
 tests/khmer_tst_utils.py                           |   14 +-
 .../{badversion-k12.kh => badversion-k12.ct}       |  Bin
 tests/test-data/casava_18-pe.fq                    |   16 +
 tests/test-data/{normC20k20.kh => normC20k20.ct}   |  Bin
 ...-mixed.fq.pe => old-style-format-w-comments.fq} |   12 +-
 tests/test-data/overlap.curve                      |  200 --
 tests/test-data/paired-broken.fq.1                 |   12 +
 tests/test-data/paired-broken.fq.2                 |    9 +
 tests/test-data/paired-broken2.fq.1                |    8 +
 tests/test-data/paired-broken2.fq.2                |   13 +
 tests/test-data/paired-broken3.fq.1                |   12 +
 tests/test-data/paired-broken3.fq.2                |    9 +
 .../{paired-mixed.fq => paired-mixed-2.fq}         |    6 +-
 tests/test-data/paired-mixed-broken.fq             |    4 +
 tests/test-data/paired-mixed.fq                    |    4 +-
 tests/test-data/paired-mixed.fq.pe                 |    4 +-
 tests/test-data/random-20-a.fq                     |    4 +-
 tests/test-data/simple_1.fa                        |    6 -
 tests/test-data/simple_2.fa                        |    8 -
 tests/test-data/simple_3.fa                        |    4 -
 tests/test-data/test-abund-read-2.fq               |    4 +-
 ...red-mixed.fq.pe => test-abund-read-2.paired.fq} |   52 +-
 ...ed-mixed.fq.pe => test-abund-read-2.paired2.fq} |   52 +-
 tests/test-data/test-abund-read-paired.fa          |    4 +-
 ...aired-mixed.fq.pe => test-abund-read-paired.fq} |    4 +-
 tests/test-data/test-colors.fa                     |    8 -
 tests/test-data/test-est.fa                        |    2 -
 tests/test-data/test-graph3.fa                     |    8 -
 tests/test-data/test-graph4.fa                     |   12 -
 tests/test-data/test-graph6.fa                     |  464 ----
 tests/test-data/test-reads.fq.gz                   |  Bin 0 -> 763372 bytes
 tests/test-data/truncated.fq                       |    5 +
 tests/test_c_wrapper.py                            |   48 -
 tests/test_counting_hash.py                        |  144 +-
 tests/test_counting_single.py                      |   13 +-
 tests/test_filter.py                               |    2 +-
 tests/test_functions.py                            |  233 +-
 tests/test_graph.py                                |   24 +-
 tests/test_hashbits.py                             |   11 +-
 tests/test_hashbits_obj.py                         |   13 +-
 tests/test_hll.py                                  |  250 ++
 tests/test_labelhash.py                            |   11 +-
 tests/test_lump.py                                 |    2 +-
 tests/test_read_aligner.py                         |   10 +-
 tests/test_read_parsers.py                         |   78 +-
 tests/test_sandbox_scripts.py                      |  169 +-
 tests/test_script_arguments.py                     |   26 +-
 tests/test_scripts.py                              | 1524 +++++++++--
 tests/test_subset_graph.py                         |   30 +-
 tests/test_threaded_sequence_processor.py          |    6 +-
 tests/test_version.py                              |    2 +-
 third-party/.gitignore                             |    6 +
 third-party/smhasher/MurmurHash3.cc                |  147 ++
 third-party/smhasher/MurmurHash3.h                 |   34 +
 versioneer.py                                      |  331 ++-
 247 files changed, 9760 insertions(+), 5896 deletions(-)

diff --git a/.gitignore b/.gitignore
index 671ff12..543df72 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,3 +42,6 @@ pip-log.txt
 sphinx-contrib
 compile_commands.json
 .DS_Store
+pylint_report.txt
+pep8_report.txt
+pep257_report.txt
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 49916ac..5e5024d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1 +1,3 @@
 See [our development docs](https://khmer.readthedocs.org/en/latest/dev/).
+
+Be sure to copy and paste the [checklist](https://khmer.readthedocs.org/en/latest/dev/coding-guidelines-and-review.html#checklist) in the Pull-Request comment
diff --git a/ChangeLog b/ChangeLog
index 6019397..ec2c779 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,711 @@
+2015-05-13  Scott Sievert <sieve121 at umn.edu>
+
+   * changed "doc/LICENSE.txt" to "LICENSE" in tests/*, scripts/*, lib/*,
+   sandbox/*, khmer/*
+
+2015-05-13  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * doc/dev/getting-started.rst: added missing dev tools to install list
+
+2015-05-12  Kevin Murray  <spam at kdmurray.id.au>
+
+   * scripts/load-into-counting.py,test/test_scripts.py: Add the number of
+   reads processed to the machine readable output files of --summary-info.
+
+2015-05-11  Titus Brown  <titus at idyll.org>
+
+   * scripts/sample-reads-randomly.py: fixed boundary error in
+   sample-reads-randomly.py.
+   * tests/test_scripts.py: updated tests to correspond with correct
+   behavior of sample-reads-randomly.py.
+
+2015-04-23  Lex Nederbragt  <lex.nederbragt at ibv.uio.no>
+
+   * tests/test_scripts.py: added a test for extract-partitions:
+   whitespace in fasta header.
+
+2015-04-21  Daniel Standage  <daniel.standage at gmail.com>
+
+   * scripts/sample-reads-randomly.py: use broken paired reader to provide
+   paired-end read support.
+   * tests/test_scripts.py: change test results to compensate for the change in
+   implementation.
+
+2015-04-17  Jessica Mizzi  <mizzijes at msu.edu>
+
+   * tests/test_scripts.py: split test_extract_long_sequences 
+   into test_extract_long_sequences_fa and test_extract_long_sequences_fq
+
+2015-04-15  Elmar Bucher <buchere at ohsu.edu>
+
+   * khmer/doc/dev/getting-started.rst: add information for OS X
+   mac port and homebrew distro users as well as Linux
+   Debian and Ubuntu distro users.
+   And add copyright header.
+
+2015-04-15  Susan Steinman  <steinman.tutoring at gmail.com>
+
+   * khmer/tests/khmer_tst_utils.py,doc/dev/a-quick-guide-to-testing.rst
+      edited docstring and docs to remind people to make sure tests test
+      errors correctly
+
+2015-04-15  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * sandbox/make-coverage.py: tweak for importability
+
+2015-04-15  Sherine Awad  <sherine.awad at gmail.com>
+
+   * sandbox/make-coverage.py: restored, was deleted by accident
+
+2015-04-15  Susan Steinman  <steinman.tutoring at gmail.com>
+
+   * khmer/tests/test_scripts.py: changed tests that use `runscript` with
+      `fail_okay=True` to use asserts to confirm the correct failure type
+
+2015-04-15  Sarah Guermond  <sarah.guermond at gmail.com>
+
+   * doc/dev/getting-started.rst: clarified dev communication
+
+2015-04-15  Sarah Guermond  <sarah.guermond at gmail.com>
+
+   * scripts/trim-low-abund.py: implemented STDOUT output, redirected
+   existing print statements to STDERR, fixed existing & new PEP 8 issues 
+   * tests/test_scripts.py: added test for above changes
+
+2014-04-15  Andreas Härpfer  <ahaerpfer at gmail.com>
+
+   * doc/conf.py: disable Sphinx smart rendering
+
+2015-04-15  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * lib/hashtable.cc: remove memory leak
+   * scripts/readstats.py,tests/test_scripts.py: fix PEP8 violations
+
+2015-04-15  Susan Steinman  <steinman.tutoring at gmail.com>
+
+   * khmer/scripts/normalize-by-median.py: pass individual arg values to 
+      functions instead of ArgParse object
+
+2015-04-15  Thomas Fenzl  <thomas.fenzl at gmx.net>
+
+   * scripts/{count-overlap.py,readstats.py},tests/test_scripts.py: 
+   added a --csv option to readstats
+   updated documentation for count-overlap
+   * khmer/_khmermodule.cc: fixed missing error handling 
+   for hashbits_count_overlap
+
+2015-04-15  en zyme  <en_zyme at outlook.com>
+
+   * khmer/khmer/kfile.py: check_file_status() -> check_input_files()
+   * khmer/sandbox/{collect-reads, khmer/sandbox/sweep-reads}.py 
+     khmer/scripts/{abundance-dist-single, abundance-dist, annotate-partitions,
+     count-median, count-overlap, do-partition, extract-paired-reads, 
+     extract-partitions, filter-abund-single, filter-abund, filter-stoptags,
+     find-knots, interleave-reads, load-graph, load-into-counting, 
+     make-initial-stoptags, merge-partitions, partition-graph,
+     sample-reads-randomly, split-paired-reads}.py:
+       check_file_status() -> check_input_files()
+   * khmer/tests/test_functions.py: check_file_status() -> check_input_files()
+
+2015-04-15  Andreas Härpfer  <ahaerpfer at gmail.com>
+
+   * khmer/utils.py: fix record checks to account for comments in old style
+   FASTQ data.
+   * tests/test-data/old-style-format-w-comments.fq: new test data.
+   * tests/test_scripts.py: add test against new test data.
+
+2015-04-15  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * doc/dev/release.txt: update release instructions to more thoroughly run
+   tests.
+
+2015-04-14  Susan Steinman  <steinman.tutoring at gmail.com>
+
+   * khmer/scripts/normalize-by-median.py: allow for paired and unpaired
+      files to be normalized together. separate function for error check
+   * khmer/tests/test_scripts.py: created test for paired/unpaired data
+
+2015-04-14  Scott Fay  <scott.a.fay at gmail.com>
+
+   * doc/user/getting-help.rst: added to user docs
+   * doc/index.rst: changed: added link to getting-help doc
+   * README.rst: changed: added link to getting-help doc
+
+2015-04-14  Scott Fay  <scott.a.fay at gmail.com>
+
+   * docs/index.rst: added github repo and release notes page to main docs page
+
+2015-04-14  Susan Steinman  <steinman.tutoring at gmail.com>
+
+   * khmer/{__init__.py},sandbox/{collect-reads,collect-variants,
+   saturate-by-median},scripts/{do-partition,filter-abund-single,load-graph,
+   load-into-counting,normalize-by-median,trim-low-abund}: pulled out check
+   max collisions logic to init.
+   * khmer/tests/test_scripts.py: modified tests to account for new error
+   message
+
+2015-04-14  Josiah Seaman  <josiah at dnaskittle.com>
+
+   * lib/{hashbits.cc}: changed: adding doxygen comments
+
+2015-04-14  Sarah Guermond  <sarah.guermond at gmail.com>
+
+   * doc/dev/coding-guidelines-and-review.rst: added copyright question
+   to commit checklist.
+
+2015-04-14  Andreas Härpfer  <ahaerpfer at gmail.com>
+
+   * */*.py: Make docstrings PEP 257 compliant.
+
+2015-04-14  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * khmer/_khmermodule.cc: catch more exceptions
+   * tests/test_{sandbox_scripts,subset_graph}.py: make tests more resilient
+
+2015-04-14  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * lib/count.cc: Make CountingHash::abundance_distribution threadsafe
+   * khmer/_khmermodule.cc: remove newly unnecessary check for exception
+   * tests/test_scripts.py: added test to confirm the above
+
+2015-04-14  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * khmer/{__init__.py,_khmermodule.cc},lib/{counting,hashbits,hashtable,
+   subset}.cc: catch IO errors and report them.
+   * tests/test_hashbits.py: remove write to fixed path in /tmp
+   * tests/test_scripts.py: added test for empty counting table file
+
+2015-04-13  Thomas Fenzl  <thomas.fenzl at gmx.net>
+
+   * lib/{khmer_exception.hh,{counting,hashbits,hashtable,subset}.cc}: changed 
+   khmer_exception to use std::string to fix memory management.
+
+2015-04-13  Elmar Bucher  <buchere at ohsu.edu>
+
+   * scripts/normalize-by-median.py (main): introduced warning for when at
+   least two input files are named the same.
+
+2015-04-13  Andreas Härpfer  <ahaerpfer at gmail.com>
+
+   * doc/dev/getting-started.rst: clarify Conda usage
+
+2015-04-13  Daniel Standage  <daniel.standage at gmail.com>
+
+   * scripts/normalize-by-median.py: Added support to the diginorm script for
+   sending output to terminal (stdout) when using the conventional - as the
+   output filename. Also removed --append option.
+   * tests/test_scripts.py: Added functional test for diginorm stdout, removed
+   test of --append option.
+
+2015-04-13  Scott Fay  <scott.a.fay at gmail.com>
+
+   * scripts/filter-abund.py: added checking of input_table by
+   `check_file_status()`
+
+2015-04-13  David Lin
+
+   * scripts/abundance-dist.py: disambiguate documentation for force and 
+   squash options
+
+2015-04-13  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * README.rst,doc/index.rst: added link to gitter.im chat room
+   * doc/README.rst: removed ancient, outdated, and unused file
+
+2015-04-13  Thomas Fenzl  <thomas.fenzl at gmx.net>
+
+   * khmer/_khmermodule.cc: removed unused find_all_tags_truncate_on_abundance
+   from python api
+
+2015-04-10  Will Trimble
+
+   * tests/test_script_arguments.py: added a test to check for the empty file
+   warning when checking if a file exists
+
+2015-04-10  Jacob Fenton  <bocajnotnef at gmail.com>
+
+   * scripts/test-{scripts.py}: added test for check_file_writable using 
+   load_into_counting
+
+2015-04-10  Phillip Garland  <pgarland at gmail.com>
+
+   * khmer/file.py (check_file_writable): new function to check writability
+   * scripts/load-into-counting.py (main): early check to see if output is
+   writable
+
+2015-04-07  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+    * README.rst: add a ReadTheDocs badge
+
+2015-04-06  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * jenkins-build.sh: updated OS X warning flag to quiet the build a bit
+
+2015-04-06  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * Makefile: added 'convert-release-notes' target for MD->RST conversion
+   * doc/{,release-notes}/index.rst: include release notes in documentation
+   * doc/release-notes/*.rst: added pandoc converted versions of release notes
+   * jenkins-build.sh: use the Sphinx method to install doc dependencies
+
+2015-04-05  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * setup.py: use the release version of screed 0.8
+
+2015-04-05  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * doc/*/*.txt: all documentation sources have been renamed to use the rst
+   extension to indicate that they are reStructuredText files. This enables
+   use of rich text editors on GitHub and elsewhere.
+   * doc/conf.py: update Sphinx configuration to reflect this change
+   * doc/requirements.txt: added hint to install version 3.4.1 of Setuptools;
+   this file is used by ReadTheDocs only.
+
+2015-04-05  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * ChangeLog, lib/read_aligner.cc, sandbox/sweep-reads.py: fixed spelling
+   errors.
+
+2015-04-05  Kevin Murray  <spam at kdmurray.id.au>
+
+   * lib/read_parsers.{cc,hh}: Work around an issue (#884) in SeqAn 1.4.x
+   handling of truncated sequence files. Also revamp exceptions
+   * khmer/_khmermodule.cc: Use new/updated exceptions handling malformed
+   FASTA/Q files.
+   * tests/test_read_parsers.py: add a test of parsing of truncated fastq
+   files
+
+2015-04-03  Luiz Irber  <irberlui at msu.edu>
+
+   * lib/hllcounter.cc: Use for loop instead of transform on merge method,
+   now works on C++11.
+
+2015-04-01  Luiz Irber  <irberlui at msu.edu>
+
+   * third-party/smhasher/MurmurHash3.{cc,h}: remove unused code, fix warnings.
+
+2015-04-01  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * Doxyfile.in: make documentation generation reproducible, removed timestamp
+
+2015-04-01  Alex Hyer  <theonehyer at gmail.com>
+
+   * scripts/find-knots.py: added force argument to check_file_status()
+   call in main().
+
+2015-03-31  Kevin Murray  <spam at kdmurray.id.au>
+
+   * lib/read_parsers.{cc,hh}: add read counting to IParser and subclasses
+   * khmer/_khmermodule.cc,tests/test_read_parsers.py: add 'num_reads'
+   attribute to khmer.ReadParser objects in python land, and test it.
+
+2015-03-28  Kevin Murray  <spam at kdmurray.id.au>
+
+   * lib/hashbits.hh: Add Hashbits::n_tables() accessor
+
+2015-03-27  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * lib/read_parsers.{cc,hh}: Obfuscate SeqAn SequenceStream objects with a
+   wrapper struct, to avoid #include-ing the SeqAn headers.
+   * lib/Makefile: Don't install the SeqAn headers.
+
+2015-03-27  Kevin Murray  <spam at kdmurray.id.au>
+
+   * lib/Makefile: Add libkhmer targets, clean up
+   * lib/get_version.py: Rewrite to use versioneer.py
+   * lib/.gitignore,third-party/.gitignore: Add more compiled outputs
+   * lib/.check_openmp.cc: add source that checks compiler for openmp support.
+   * lib/khmer.pc.in: add pkg-config file for khmer
+
+2015-03-23  Kevin Murray  <spam at kdmurray.id.au>
+
+   * lib/counting.hh: Add CountingHash::n_tables() accessor
+
+2015-03-16  Jessica Mizzi  <mizzijes at msu.edu>
+
+    * khmer/kfile.py: Added file not existing error for system exit
+    * tests/{test_scripts,test_functions}.py: Added tests for
+    check_file_status for file existence and force option
+
+2015-03-15  Kevin Murray  <spam at kdmurray.id.au>  &  Titus Brown  <titus at idyll.org>
+
+   * tests/test_counting_hash.py: Skip get_raw_tables test if python doesn't
+   have the memoryview type/function.
+
+2015-03-11  Erich Schwarz  <ems394 at cornell.edu>
+
+   * Added URLs and brief descriptions for khmer-relevant documentation in
+   doc/introduction.txt, pointing to http://khmer-protocols.readthedocs.org and
+   khmer-recipes.readthedocs.org, with brief descriptions of their content.
+
+2015-03-10  Camille Scott  <camille.scott.w at gmail.com>
+
+   * lib/counting.hh, khmer/_khmermodule.cc: Expose the raw tables of
+   count-min sketches to the world of python using a buffer interface.
+   * tests/test_counting_hash.py: Tests of the above functionality.
+
+2015-03-08  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * Makefile: make 'pep8' target be more verbose
+   * jenkins-build.sh: specify setuptools version
+   * scripts/{abundance-dist,annotate-partitions,count-median,do-partition,
+   extract-paired-reads,extract-partitions,filter-stoptags,find-knots,
+   interleave-reads,merge-partitions,partition-graph,sample-reads-randomly,
+   split-paired-reads}.py,setup.py: fix new PEP8 errors
+   * setup.py: specify that this is a Python 2 only project (for now)
+   * tests/test_{counting_single,subset_graph}.py: make explicit the use of
+   floor division behavior.
+
+2015-03-06  Titus Brown  <titus at idyll.org>
+
+   * sandbox/{collect-reads.py,saturate-by-median.py}: update for 'force'
+   argument in khmer.kfile functions, so that khmer-recipes compile.
+
+2015-03-02  Titus Brown  <titus at idyll.org>
+
+   * sandbox/{combine-pe.py,compare-partitions.py,count-within-radius.py,
+   degree-by-position.py,dn-identify-errors.py,ec.py,error-correct-pass2.py,
+   find-unpart.py,normalize-by-align.py,read-aligner.py,shuffle-fasta.py,
+   to-casava-1.8-fastq.py,uniqify-sequences.py}: removed from sandbox/ as
+   obsolete/unmaintained.
+   * sandbox/README.rst: updated to reflect readstats.py and trim-low-abund.py
+   promotion to sandbox/.
+   * doc/dev/scripts-and-sandbox.txt: updated to reflect sandbox/ script name
+   preferences, and note to remove from README.rst when moved over to scripts/.
+
+2015-02-27  Kevin Murray  <spam at kdmurray.id.au>
+
+   * scripts/load-into-counting.py: Be verbose in the help text, to clarify
+   what the -b flag does.
+
+2015-02-25  Hussien Alameldin  <hussien at msu.edu>
+
+   * sandbox/bloom_count.py: renamed to bloom-count.py
+   * sandbox/bloom_count_intersection.py: renamed to
+     bloom-count-intersection.py
+   * sandbox/read_aligner.py: renamed to read-aligner.py
+
+2015-02-26  Tamer A. Mansour  <drtamermansour at gmail.com>
+
+   * scripts/abundance-dist-single.py: Use CSV format for the histogram.
+   * scripts/count-overlap.py: Use CSV format for the curve file output.
+   Includes column headers.
+   * scripts/abundance-dist-single.py: Use CSV format for the histogram. 
+   Includes column headers.
+   * tests/test_scripts.py: add test functions for the --csv option in
+   abundance-dist-single.py and count-overlap.py
+
+2015-02-26  Jacob Fenton  <bocajnotnef at gmail.com>
+
+   * doc/introduction.txt, doc/user/choosing-table-sizes.txt: Updated docs to
+   ref correct links and names
+
+2015-02-25  Aditi Gupta  <agupta at msu.edu>
+
+   * sandbox/{collect-reads.py, correct-errors.py, 
+   normalize-by-median-pct.py, slice-reads-by-coverage.py, 
+   sweep-files.py, sweep-reads3.py, to-casava-1.8-fastq.py}: 
+   Replaced 'accuracy' with 'quality'. Fixes #787.
+
+2015-02-25  Tamer A. Mansour  <drtamermansour at gmail.com>
+
+   * scripts/normalize-by-median.py: change to the default behavior to
+   overwrite the sequences output file. Also add a new argument --append to
+   append new reads to the output file.
+   * tests/test_scripts.py: add a test for the --append option in
+   normalize-by-median.py
+
+2015-02-25  Hussien Alameldin  <hussien at msu.edu>
+
+   * khmer/khmer_args.py: add 'hll' citation entry "Irber and Brown,
+     unpublished." to  _alg. dict.
+   * sandbox/unique-kmers.py: add call to 'info' with 'hll' in the
+     algorithms list.
+
+2015-02-24  Luiz Irber  <irberlui at msu.edu>
+
+    * khmer/_khmermodule.cc: expose HLL internals as read-only attributes.
+    * lib/hllcounter.{cc,hh}: simplify error checking, add getters for HLL.
+    * tests/test_hll.py: add test cases for increasing coverage, also fix
+    some of the previous ones using the new HLL read-only attributes.
+
+2015-02-24  Luiz Irber  <irberlui at msu.edu>
+
+   * khmer/_khmermodule.cc: Fix coding style violations.
+
+2015-02-24  Luiz Irber  <irberlui at msu.edu>
+
+   * khmer/_khmermodule.cc: Update extension to use recommended practices,
+   PyLong instead of PyInt, Type initialization, PyBytes instead of PyString.
+   Replace common initialization with explicit type structs, and all types
+   conform to the CPython checklist.
+
+2015-02-24  Tamer A. Mansour  <drtamermansour at gmail.com>
+
+   * scripts/abundance-dist.py: Use CSV format for the histogram. Includes
+   column headers.
+   * tests/test_scripts.py: add coverage for the new --csv option in
+   abundance-dist.py
+
+2015-02-24  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * jenkins-build.sh: remove examples/stamps/do.sh testing for now; takes too
+   long to run on every build. Related to #836
+
+2015-02-24  Kevin Murray  <spam at kdmurray.id.au>
+
+   * scripts/interleave-reads.py: Make the output file name print nicely.
+
+2015-02-23  Titus Brown  <titus at idyll.org>
+
+   * khmer/utils.py: added 'check_is_left' and 'check_is_right' functions;
+   fixed bug in check_is_pair.
+   * tests/test_functions.py: added tests for now-fixed bug in check_is_pair,
+   as well as 'check_is_left' and 'check_is_right'.
+   * scripts/interleave-reads.py: updated to handle Casava 1.8 formatting.
+   * scripts/split-paired-reads.py: fixed bug where sequences with bad names
+   got dropped; updated to properly handle Casava 1.8 names in FASTQ files.
+   * scripts/count-median.py: added '--csv' output format; updated to properly
+   handle Casava 1.8 FASTQ format when '--csv' is specified.
+   * scripts/normalize-by-median.py: replaced pair checking with
+   utils.check_is_pair(), which properly handles Casava 1.8 FASTQ format.
+   * tests/test_scripts.py: updated script tests to check Casava 1.8
+   formatting; fixed extract-long-sequences.py test.
+   * scripts/{extract-long-sequences.py,extract-paired-reads.py,
+   fastq-to-fasta.py,readstats.py,sample-reads-randomly.py,trim-low-abund.py},
+   khmer/thread_utils.py: updated to handle Casava 1.8 FASTQ format by
+   setting parse_description=False in screed.open(...).
+   * tests/test-data/{paired-mixed.fq,paired-mixed.fq.pe,random-20-a.fq,
+   test-abund-read-2.fq,test-abund-read-2.paired2.fq,test-abund-read-paired.fa,
+   test-abund-read-paired.fq}: switched some sequences over to Casava 1.8
+   format, to test format handling.
+   * tests/test-data/{casava_18-pe.fq,test-reads.fq.gz}: new test file for
+   Casava 1.8 format handling.
+   * tests/test-data/{overlap.curve,paired-mixed.fq.1,paired-mixed.fq.2,
+   simple_1.fa,simple_2.fa,simple_3.fa,test-colors.fa,test-est.fa,
+   test-graph3.fa,test-graph4.fa,test-graph6.fa}: removed no-longer used
+   test files.
+
+2015-02-23  Titus Brown  <titus at idyll.org>
+
+   * setup.cfg: set !linux flag by default, to avoid running tests that
+   request too much memory when 'nosetests' is run.  (This is an OS difference
+   where Mac OS X attempts to allocate as much memory as requested, while
+   on Linux it just crashes).
+
+2015-02-23  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * khmer/{__init__.py,_khmermodule.cc},lib/{hashbits.cc,hashbits.hh,
+   hashtable,tests/test_{c_wrapper,read_parsers}.py: remove unused callback
+   functionality
+
+2015-02-23  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * setup.py: point to the latest screed release candidate to work around
+   versioneer bug.
+
+2015-02-23  Tamer A. Mansour  <drtamermansour at gmail.com>
+
+   * examples/stamps/do.sh: the argument --savehash was changed to --savetable
+   and change mode to u+x
+   * jenkins-build.sh: add a test to check for the do.sh file
+
+2015-02-23  Kevin Murray  <spam at kdmurray.id.au>
+
+   * khmer/load_pe.py: Remove unused/undocumented module. See #784
+
+2015-02-21  Hussien Alameldin  <hussien at msu.edu>
+
+   * sandbox/normalize-by-align.py: "copyright header 2013-2015 was added"
+   * sandbob/read_aligner.py: "copyright header 2013-2015 was added"
+   * sandbox/slice-reads-by-coverage.py: "copyright header 2014  was added"
+
+2015-02-21  Hussien Alameldin  <hussien at msu.edu>
+
+   * sandbox/calc-best-assembly.py, collect-variants.py, graph-size.py: Set executable bits using "chmod +x"
+
+2015-02-21  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * khmer/_khmermodule.cc,lib/read_parsers.cc: Rename the 'accuracy' attribute
+   of ReadParser Reads to 'quality'
+   * tests/test_read_parsers.py: update test to match
+
+2015-02-21  Rhys Kidd  <rhyskidd at gmail.com>
+
+   * sandbox/{calc-best-assembly,calc-error-profile,normalize-by-align,
+   read_aligner,slice-reads-by-coverage}.py: reference /usr/bin/env python2
+   in the #! line.
+
+2015-02-21  Rhys Kidd  <rhyskidd at gmail.com>
+
+   * sandbox/sweep-paired-reads.py: remove empty script
+
+2015-02-20  Titus Brown  <titus at idyll.org>
+
+   * doc/dev/scripts-and-sandbox.txt: policies for sandbox/ and scripts/
+   content, and a process for adding new command line scripts into scripts/.
+   * doc/dev/index.txt: added scripts-and-sandbox to developer doc index.
+
+2015-02-20  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+    * khmer/_khmermodule.cc: convert C++ out of memory exceptions to Python
+    out of memory exception.
+    * test/test_{counting_hash,counting_single,hashbits_obj,labelhash,
+    scripts}.py: partial tests for the above
+
+2015-02-20  Aditi Gupta  <agupta at msu.edu>
+
+   * doc/dev/coding-guidelines-and-review.txt: fixed spelling errors.
+
+2015-02-19  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+   * doc/dev/coding-guidelines-and-review.txt: added checklist for new CPython
+   types
+   * khmer/_khmermodule.cc: Update ReadAligner to follow the new guidelines
+
+2015-02-19  Daniel Standage  <daniel.standage at gmail.com>
+
+   * Makefile: add a new Makefile target `help` to list and describe all
+   common targets.
+   * khmer/utils.py, tests/test_functions.py: minor style fixes.
+
+2015-02-16  Titus Brown  <titus at idyll.org>
+
+   * khmer/utils.py: added 'check_is_pair', 'broken_paired_reader', and
+   'write_record_pair' functions.
+   * khmer/khmer_args.py: added streaming reference for future algorithms
+   citation.
+   * tests/test_functions.py: added unit tests for 'check_is_pair' and
+   'broken_paired_reader'.
+   * scripts/trim-low-abund.py: upgraded to track pairs properly; added
+   proper get_parser information; moved to scripts/ from sandbox/.
+   * tests/test_scripts.py: added paired-read tests for
+   trim-low-abund.py.
+   * tests/test-data/test-abund-read-2.paired.fq: data for paired-read tests.
+   * scripts/extract-paired-reads.py: removed 'is_pair' in favor of
+   'check_is_pair'; switched to using 'broken_paired_reader'; fixed use
+   of sys.argv.
+   * scripts/sample-reads-randomly.py: removed unused 'output_single' function.
+   * doc/user/scripts.txt: added trim-low-abund.py.
+
+2015-02-13  Qingpeng Zhang  <qingpeng at msu.edu>
+
+   * scripts/sample-reads-randomly.py: fix a glitch about string formatting.
+
+2015-02-11  Titus Brown  <titus at idyll.org>
+
+   * khmer/_khmermodule.cc: fixed k-mer size checking; updated some error
+   messages.
+   * tests/test_graph.py: added test for k-mer size checking in find_all_tags.
+
+2015-02-09  Titus Brown  <titus at idyll.org>
+
+   * scripts/split-paired-reads.py: added -1 and -2 options to allow fine-
+   grain specification of output locations; switch to using write_record
+   instead of script-specific output functionality.
+   * tests/test_scripts.py: added accompanying tests.
+
+2015-02-09  Bede Constantinides  <bede.constantinides at manchester.ac.uk>
+
+   * scripts/split-paired-reads.py: added -o option to allow specification
+   of an output directory
+   * tests/test_scripts.py: added accompanying test for split-paired-reads.py
+
+2015-02-01  Titus Brown  <titus at idyll.org>
+
+   * khmer/_khmermodule.cc: added functions hash_find_all_tags_list and
+   hash_get_tags_and_positions to CountingHash objects.
+   * tests/test_counting_hash.py: added tests for new functionality.
+
+2015-01-25  Titus Brown  <titus at idyll.org>
+
+   * sandbox/correct-errors.py: fixed sequence output so that quality
+   scores length always matches the sequence length; fixed argparse
+   setup to make use of default parameter.
+
+2015-01-25  Titus Brown  <titus at idyll.org>
+
+    * sandbox/readstats.py: fixed non-functional string interpolation at end;
+    added -o to send output to a file; moved to scripts/.
+    * doc/user/scripts.txt: added readstats description.
+    * tests/test_scripts.py: added tests for readstats.py
+
+2015-01-23  Jessica Mizzi  <mizzijes at msu.edu>
+
+    * khmer/utils.py: Added single write_record fuction to write FASTA/Q
+    * scripts/{abundance-dist,extract-long-sequences,extract-partitions,
+    interleave-reads,normalize-by-median,sample-reads-randomly}.py: 
+    Replaced FASTA/Q writing method with write_record
+
+2015-01-23  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+    * Makefile: remove the user installs for the `install-dependencies` target
+
+2015-01-23  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+    * README.rst,doc/user/install.txt: clarify that we support Python 2.7.x
+    and not Python 3.
+
+2015-01-21  Luiz Irber  <irberlui at msu.edu>
+
+    * lib/hllcounter.{cc,hh}: Implemented a HyperLogLog counter.
+    * khmer/{_khmermodule.cc, __init__.py}: added HLLCounter class
+    initialization and wrapper.
+    * tests/test_hll.py: added test functions for the new
+    HyperLogLog counter.
+    * sandbox/unique-kmers.py: implemented a CLI script for
+    approximate cardinality estimation using a HyperLogLog counter.
+    * setup.cfg, Makefile, third-party/smhasher/MurmurHash3.{cc,h},
+    lib/kmer_hash.{cc,hh}, setup.py: added MurmurHash3 hash function
+    and configuration.
+    * setup.py: added a function to check if compiler supports OpenMP.
+
+2015-01-14  Reed Cartwright  <cartwright at asu.edu>
+
+    * doc/dev/getting-started.txt: Added install information for
+    Arch Linux
+
+2014-01-14  Michael R. Crusoe  <mcrusoe at msu.edu>
+
+    * doc/user/{blog-posts,guide}.txt,examples/stamps/do.sh,sandbox/{
+    collect-reads,error-correct-pass2,filter-median-and-pct,filter-median,
+    read_aligner,split-sequences-by-length}.py,scripts/{filter-abund,
+    load-into-counting}.py,tests/test_{counting_hash,hashbits,scripts}.py:
+    remove references to ".kh" files replaces with ".pt" or ".ct" as
+    appropriate
+    * tests/test-data/{bad-versionk12,normC20k20}.kh: renamed to "*.ct"
+
+2015-01-13  Daniel Standage  <daniel.standage at gmail.com>
+
+    * tests/khmer_tst_utils.py, tests/test_sandbox_scripts.py: removed
+    unused module imports
+    * .gitignore: added pylint_report.txt so that it is not accidentally
+    committed after running make diff_pylint_report
+    * khmer/file.py -> khmer/kfile.py: renamed internal file handling
+    class to avoid collisions with builtin Python file module
+    * sandbox/collect-reads.py, sanbox/saturate-by-median.py,
+    sandbox/sweep-files.py, sandbox/sweep-reads.py,
+    scripts/abundance-dist-single.py, scripts/abundance-dist.py,
+    scripts/annotate-partitions.py, scripts/count-median.py,
+    scripts/count-overlap.py, scripts/do-partition.py,
+    scripts/extract-long-sequences.py, scripts/extract-paired-reads.py,
+    scripts/extract-partitions.py, scripts/filter-abund-single.py,
+    scripts/filter-abund.py, scripts/filter-stoptags.py,
+    scripts/find-knots.py, scripts/interleave-reads.py,
+    scripts/load-graph.py, scripts/load-into-counting.py,
+    scripts/make-initial-stoptags.py, scripts/merge-partitions.py,
+    scripts/normalize-by-median.py, scripts/partition-graph.py,
+    scripts/sample-reads-randomly.py, scripts/split-paired-reads.py,
+    tests/test_script_arguments.py, tests/test_scripts.py: changed all
+    occurrences of `file` to `kfile`
+
+2015-01-09  Rhys Kidd  <rhyskidd at gmail.com>
+
+    * lib/khmer.hh: implement generic NONCOPYABLE() macro guard
+    * lib/hashtable.hh: apply NONCOPYABLE macro guard in case of future 
+    modifications to Hashtable that might exposure potential memory corruption 
+    with default copy constructor
+
 2014-12-30  Michael Wright  <wrig517 at msu.edu>
 
     * tests/test_scripts.py: Attained complete testing coverage for 
@@ -154,7 +862,7 @@
 2014-12-01  Kevin Murray  <spam at kdmurray.id.au>
 
     * tests/test_scripts.py: Stop a test from making a temporary output file
-    in the current dir by explictly specifying an output file.
+    in the current dir by explicitly specifying an output file.
 
 2014-12-01  Kevin Murray  <spam at kdmurray.id.au>
 
@@ -619,7 +1327,7 @@
 2014-06-16  Michael R. Crusoe  <mcrusoe at msu.edu>
 
     * scripts/{abundance-dist-single,filter-abund-single,load-into-counting,
-    normalize-by-median,load-graph}.py: restore Python 2.6 compatability for
+    normalize-by-median,load-graph}.py: restore Python 2.6 compatibility for
     Debian 6, RedHat 6, SL6, and Ubuntu 10.04 LTS users.
 
 2014-06-15  Titus Brown  <t at idyll.org>
diff --git a/Doxyfile.in b/Doxyfile.in
index 193e9db..d76ba0b 100644
--- a/Doxyfile.in
+++ b/Doxyfile.in
@@ -1170,7 +1170,7 @@ HTML_COLORSTYLE_GAMMA  = 80
 # The default value is: YES.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_TIMESTAMP         = YES
+HTML_TIMESTAMP         = NO
 
 # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
 # documentation will contain sections that can be hidden and shown after the
diff --git a/Makefile b/Makefile
index 2506cf1..4e1d371 100644
--- a/Makefile
+++ b/Makefile
@@ -8,21 +8,38 @@ CPPSOURCES=$(wildcard lib/*.cc lib/*.hh khmer/_khmermodule.cc)
 PYSOURCES=$(wildcard khmer/*.py scripts/*.py)
 SOURCES=$(PYSOURCES) $(CPPSOURCES) setup.py
 DEVPKGS=sphinxcontrib-autoprogram pep8==1.5.7 diff_cover \
-autopep8 pylint coverage gcovr nose screed
+autopep8 pylint coverage gcovr nose screed pep257
 
 GCOVRURL=git+https://github.com/nschum/gcovr.git@never-executed-branches
 VERSION=$(shell git describe --tags --dirty | sed s/v//)
 CPPCHECK=ls lib/*.cc khmer/_khmermodule.cc | grep -v test | cppcheck -DNDEBUG \
 	 -DVERSION=0.0.cppcheck -UNO_UNIQUE_RC --enable=all \
 	 --file-list=- --platform=unix64 --std=c++03 --inline-suppr \
-	 --quiet -Ilib -Ithird-party/bzip2 -Ithird-party/zlib
+	 --quiet -Ilib -Ithird-party/bzip2 -Ithird-party/zlib \
+	 -Ithird-party/smhasher
 
-all: khmer/_khmermodule.so
+UNAME := $(shell uname)
+ifeq ($(UNAME),Linux)
+	TESTATTR='!known_failing,!jenkins'
+else
+	TESTATTR='!known_failing,!jenkins,!linux'
+endif
+
+## all         : default task; compile C++ code, build shared object library
+all: sharedobj
+
+## help        : print this help message and exit
+help: Makefile
+	@sed -n 's/^##//p' $<
+
+## install-dep : install most of the development dependencies via pip
+install-dep: install-dependencies
 
 install-dependencies:
-	pip2 install --user --upgrade $(DEVPKGS) || pip2 install --upgrade \
-		$(DEVPKGS) || pip install --user --upgrade $(DEVPKGS) || pip \
-		install --upgrade $(DEVPKGS)
+	pip2 install --upgrade $(DEVPKGS) || pip install --upgrade $(DEVPKGS)
+
+## sharedobj   : build khmer shared object file
+sharedobj: khmer/_khmermodule.so
 
 khmer/_khmermodule.so: $(CPPSOURCES)
 	./setup.py build_ext --inplace
@@ -32,14 +49,17 @@ coverage-debug: $(CPPSOURCES)
 		build_ext --debug --inplace --libraries gcov
 	touch coverage-debug
 
+## install     : install the khmer module and scripts
 install: FORCE
 	./setup.py build install
 
+## dist        : create a module package for distribution
 dist: dist/khmer-$(VERSION).tar.gz
 
 dist/khmer-$(VERSION).tar.gz: $(SOURCES)
 	./setup.py sdist
 
+## clean       : clean up all temporary / machine-generated files
 clean: FORCE
 	cd lib && ${MAKE} clean || true
 	cd tests && rm -rf khmertest_* || true
@@ -53,6 +73,7 @@ debug: FORCE
 	export CFLAGS="-pg -fprofile-arcs"; python setup.py build_ext --debug \
 		--inplace
 
+## doc         : render documentation in HTML
 doc: build/sphinx/html/index.html
 
 build/sphinx/html/index.html: $(SOURCES) $(wildcard doc/*.txt) doc/conf.py all
@@ -61,6 +82,7 @@ build/sphinx/html/index.html: $(SOURCES) $(wildcard doc/*.txt) doc/conf.py all
 	@echo '--> docs in build/sphinx/html <--'
 	@echo ''
 
+## pdf         : render documentation as a PDF file
 pdf: build/sphinx/latex/khmer.pdf
 
 build/sphinx/latex/khmer.pdf: $(SOURCES) doc/conf.py $(wildcard doc/*.txt)
@@ -72,11 +94,14 @@ build/sphinx/latex/khmer.pdf: $(SOURCES) doc/conf.py $(wildcard doc/*.txt)
 cppcheck-result.xml: $(CPPSOURCES)
 	${CPPCHECK} --xml-version=2 2> cppcheck-result.xml
 
+## cppcheck    : run static analysis on C++ code
 cppcheck: $(CPPSOURCES)
 	${CPPCHECK}
 
+## pep8        : check Python code style
 pep8: $(PYSOURCES) $(wildcard tests/*.py)
-	pep8 --exclude=_version.py setup.py khmer/ scripts/ tests/ || true
+	pep8 --exclude=_version.py  --show-source --show-pep8 setup.py khmer/ \
+		scripts/ tests/ || true
 
 pep8_report.txt: $(PYSOURCES) $(wildcard tests/*.py)
 	pep8 --exclude=_version.py setup.py khmer/ scripts/ tests/ \
@@ -85,17 +110,33 @@ pep8_report.txt: $(PYSOURCES) $(wildcard tests/*.py)
 diff_pep8_report: pep8_report.txt
 	diff-quality --violations=pep8 pep8_report.txt
 
+## pep257      : check Python code style
+pep257: $(PYSOURCES) $(wildcard tests/*.py)
+	pep257 --ignore=D100,D101,D102,D103 \
+		setup.py khmer/ scripts/ tests/ || true
+
+pep257_report.txt: $(PYSOURCES) $(wildcard tests/*.py)
+	pep257 setup.py khmer/ scripts/ tests/ \
+		> pep257_report.txt 2>&1 || true
+
+diff_pep257_report: pep257_report.txt
+	diff-quality --violations=pep8 pep257_report.txt
+
+## astyle      : fix most C++ code indentation and formatting
 astyle: $(CPPSOURCES)
 	astyle -A10 --max-code-length=80 $(CPPSOURCES)
 
+## autopep8    : fix most Python code indentation and formatting
 autopep8: $(PYSOURCES) $(wildcard tests/*.py)
 	autopep8 --recursive --in-place --exclude _version.py --ignore E309 \
 		setup.py khmer/*.py scripts/*.py tests/*.py
 
 # A command to automatically run astyle and autopep8 on appropriate files
+## format      : check/fix all code indentation and formatting (runs astyle and autopep8)
 format: astyle autopep8
 	# Do nothing
 
+## pylint      : run static code analysis on Python code
 pylint: $(PYSOURCES) $(wildcard tests/*.py)
 	pylint --msg-template="{path}:{line}: [{msg_id}({symbol}), {obj}] {msg}" \
 		setup.py khmer/[!_]*.py khmer/__init__.py scripts/*.py tests \
@@ -129,8 +170,8 @@ coverage-report: .coverage
 	coverage report
 
 coverage-gcovr.xml: coverage-debug .coverage
-	gcovr --root=. --branches --xml --output=coverage-gcovr.xml \
-		--gcov-exclude='.*zlib.*|.*bzip2.*|.*seqan.*' --xml
+	gcovr --root=. --branches --output=coverage-gcovr.xml --xml \
+          --gcov-exclude='.*zlib.*|.*bzip2.*|.*smhasher.*|.*seqan.*'
 
 diff-cover: coverage-gcovr.xml coverage.xml
 	diff-cover coverage-gcovr.xml coverage.xml
@@ -140,8 +181,9 @@ diff-cover.html: coverage-gcovr.xml coverage.xml
 		--html-report diff-cover.html
 
 nosetests.xml: FORCE
-	./setup.py nosetests --with-xunit
+	./setup.py nosetests --with-xunit --attr ${TESTATTR}
 
+## doxygen     : generate documentation of the C++ and Python code
 doxygen: doc/doxygen/html/index.html
 
 doc/doxygen/html/index.html: ${CPPSOURCES} ${PYSOURCES}
@@ -154,14 +196,34 @@ lib:
 	cd lib && \
 	$(MAKE)
 
+# Runs a test of ./lib
+libtest: FORCE
+	rm -rf install_target
+	mkdir -p install_target
+	cd lib && \
+	 $(MAKE) clean && \
+	 $(MAKE) all && \
+	 $(MAKE) install PREFIX=../install_target
+	test -d install_target/include
+	test -f install_target/include/khmer.hh
+	test -d install_target/lib
+	test -f install_target/lib/libkhmer.a
+	$(CXX) -o install_target/test-prog-static -I install_target/include \
+		lib/test-compile.cc install_target/lib/libkhmer.a
+	$(CXX) -o install_target/test-prog-dynamic -I install_target/include \
+		-L install_target/lib lib/test-compile.cc -lkhmer
+	rm -rf install_target
+
+## test        : run the khmer test suite
 test: FORCE
 	./setup.py develop
-	./setup.py nosetests
+	./setup.py nosetests --attr ${TESTATTR}
 
 sloccount.sc: ${CPPSOURCES} ${PYSOURCES} $(wildcard tests/*.py) Makefile
 	sloccount --duplicates --wide --details lib khmer scripts tests \
 		setup.py Makefile > sloccount.sc
 
+## sloccount   : count lines of code
 sloccount: 
 	sloccount lib khmer scripts tests setup.py Makefile
 
@@ -206,4 +268,10 @@ coverity-configure:
 compile_commands.json: clean
 	export PATH=$(shell echo $$PATH | sed 's=/usr/lib/ccache:==g') ; \
 		bear -- ./setup.py build_ext
+
+convert-release-notes:
+	for file in doc/release-notes/*.md; do \
+		pandoc --from=markdown --to=rst $${file} > $${file%%.md}.rst; \
+		done
+
 FORCE:
diff --git a/README.rst b/README.rst
index 993246b..97bd1c4 100644
--- a/README.rst
+++ b/README.rst
@@ -3,21 +3,26 @@ khmer
 
 Welcome to khmer: k-mer counting, filtering and graph traversal FTW!
 
+.. image:: https://readthedocs.org/projects/khmer/badge
+    :target: https://readthedocs.org/projects/khmer/
+    :alt: Documentation Status
 .. image:: https://badge.fury.io/py/khmer.png
     :target: http://badge.fury.io/py/khmer
+    :alt: PyPI Package
 .. image:: https://pypip.in/d/khmer/badge.png
     :target: https://crate.io/packages/khmer
+    :alt: Downloads Counter
 .. image:: https://pypip.in/license/khmer/badge.png
     :target: https://pypi.python.org/pypi/khmer/
     :alt: License
 .. image:: http://ci.ged.msu.edu/job/khmer-master/badge/icon
     :target: http://ci.ged.msu.edu/job/khmer-master/
 .. image:: https://landscape.io/github/ged-lab/khmer/master/landscape.png
-   :target: https://landscape.io/github/ged-lab/khmer/master
-   :alt: Python Code Health
+    :target: https://landscape.io/github/ged-lab/khmer/master
+    :alt: Python Code Health
 .. image:: https://scan.coverity.com/projects/621/badge.svg
-   :target: https://scan.coverity.com/projects/621
-   :alt: Coverity Scan Build Status
+    :target: https://scan.coverity.com/projects/621
+    :alt: Coverity Scan Build Status
 
 The official repository is at
 
@@ -35,7 +40,10 @@ them, please visit the following URLs:
 
 -  Announcements: http://lists.idyll.org/listinfo/khmer-announce
 
-You can contact the current maintainers at khmer-project at idyll.org.
+We chat at https://gitter.im/ged-lab/khmer and the maintainers can be
+contacted at khmer-project at idyll.org.
+
+For getting help with please see this guide: http://khmer.readthedocs.org/user/getting-help.html
 
 IMPORTANT NOTE: CITE US!
 ------------------------
@@ -47,7 +55,7 @@ citation information.
 INSTALL INSTRUCTIONS:
 ---------------------
 
-khmer requires a 64-bit operating system and Python 2.7+. Linux users will need 
+khmer requires a 64-bit operating system and Python 2.7.x. Linux users will need
 the Python development libraries and gcc. OS X users may need XCode installed.
 
 In short:
@@ -64,9 +72,6 @@ khmer is under the BSD license; see doc/LICENSE.txt. Distribution,
 modification and redistribution, incorporation into other software, and
 pretty much everything else is allowed.
 
-khmer project contributors also edit documents collaboratively at `our wiki 
-<https://github.com/ged-lab/khmer/wiki/_pages>`__.
-
 MRC 2014-05-14
 
 .. vim: set filetype=rst
diff --git a/doc/LICENSE.txt b/doc/LICENSE.rst
similarity index 100%
rename from doc/LICENSE.txt
rename to doc/LICENSE.rst
diff --git a/doc/Makefile.bak b/doc/Makefile.bak
deleted file mode 100644
index 955d223..0000000
--- a/doc/Makefile.bak
+++ /dev/null
@@ -1,4 +0,0 @@
-%.html : %.txt
-	rst2html.py $< $@
-
-all: README.html
diff --git a/doc/README.html b/doc/README.html
deleted file mode 100644
index 897cb09..0000000
--- a/doc/README.html
+++ /dev/null
@@ -1,403 +0,0 @@
-<?xml version="1.0" encoding="utf-8" ?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-<meta name="generator" content="Docutils 0.5: http://docutils.sourceforge.net/" />
-<title></title>
-<style type="text/css">
-
-/*
-:Author: David Goodger (goodger at python.org)
-:Id: $Id: html4css1.css 5196 2007-06-03 20:25:28Z wiemann $
-:Copyright: This stylesheet has been placed in the public domain.
-
-Default cascading style sheet for the HTML output of Docutils.
-
-See http://docutils.sf.net/docs/howto/html-stylesheets.html for how to
-customize this style sheet.
-*/
-
-/* used to remove borders from tables and images */
-.borderless, table.borderless td, table.borderless th {
-  border: 0 }
-
-table.borderless td, table.borderless th {
-  /* Override padding for "table.docutils td" with "! important".
-     The right padding separates the table cells. */
-  padding: 0 0.5em 0 0 ! important }
-
-.first {
-  /* Override more specific margin styles with "! important". */
-  margin-top: 0 ! important }
-
-.last, .with-subtitle {
-  margin-bottom: 0 ! important }
-
-.hidden {
-  display: none }
-
-a.toc-backref {
-  text-decoration: none ;
-  color: black }
-
-blockquote.epigraph {
-  margin: 2em 5em ; }
-
-dl.docutils dd {
-  margin-bottom: 0.5em }
-
-/* Uncomment (and remove this text!) to get bold-faced definition list terms
-dl.docutils dt {
-  font-weight: bold }
-*/
-
-div.abstract {
-  margin: 2em 5em }
-
-div.abstract p.topic-title {
-  font-weight: bold ;
-  text-align: center }
-
-div.admonition, div.attention, div.caution, div.danger, div.error,
-div.hint, div.important, div.note, div.tip, div.warning {
-  margin: 2em ;
-  border: medium outset ;
-  padding: 1em }
-
-div.admonition p.admonition-title, div.hint p.admonition-title,
-div.important p.admonition-title, div.note p.admonition-title,
-div.tip p.admonition-title {
-  font-weight: bold ;
-  font-family: sans-serif }
-
-div.attention p.admonition-title, div.caution p.admonition-title,
-div.danger p.admonition-title, div.error p.admonition-title,
-div.warning p.admonition-title {
-  color: red ;
-  font-weight: bold ;
-  font-family: sans-serif }
-
-/* Uncomment (and remove this text!) to get reduced vertical space in
-   compound paragraphs.
-div.compound .compound-first, div.compound .compound-middle {
-  margin-bottom: 0.5em }
-
-div.compound .compound-last, div.compound .compound-middle {
-  margin-top: 0.5em }
-*/
-
-div.dedication {
-  margin: 2em 5em ;
-  text-align: center ;
-  font-style: italic }
-
-div.dedication p.topic-title {
-  font-weight: bold ;
-  font-style: normal }
-
-div.figure {
-  margin-left: 2em ;
-  margin-right: 2em }
-
-div.footer, div.header {
-  clear: both;
-  font-size: smaller }
-
-div.line-block {
-  display: block ;
-  margin-top: 1em ;
-  margin-bottom: 1em }
-
-div.line-block div.line-block {
-  margin-top: 0 ;
-  margin-bottom: 0 ;
-  margin-left: 1.5em }
-
-div.sidebar {
-  margin: 0 0 0.5em 1em ;
-  border: medium outset ;
-  padding: 1em ;
-  background-color: #ffffee ;
-  width: 40% ;
-  float: right ;
-  clear: right }
-
-div.sidebar p.rubric {
-  font-family: sans-serif ;
-  font-size: medium }
-
-div.system-messages {
-  margin: 5em }
-
-div.system-messages h1 {
-  color: red }
-
-div.system-message {
-  border: medium outset ;
-  padding: 1em }
-
-div.system-message p.system-message-title {
-  color: red ;
-  font-weight: bold }
-
-div.topic {
-  margin: 2em }
-
-h1.section-subtitle, h2.section-subtitle, h3.section-subtitle,
-h4.section-subtitle, h5.section-subtitle, h6.section-subtitle {
-  margin-top: 0.4em }
-
-h1.title {
-  text-align: center }
-
-h2.subtitle {
-  text-align: center }
-
-hr.docutils {
-  width: 75% }
-
-img.align-left {
-  clear: left }
-
-img.align-right {
-  clear: right }
-
-ol.simple, ul.simple {
-  margin-bottom: 1em }
-
-ol.arabic {
-  list-style: decimal }
-
-ol.loweralpha {
-  list-style: lower-alpha }
-
-ol.upperalpha {
-  list-style: upper-alpha }
-
-ol.lowerroman {
-  list-style: lower-roman }
-
-ol.upperroman {
-  list-style: upper-roman }
-
-p.attribution {
-  text-align: right ;
-  margin-left: 50% }
-
-p.caption {
-  font-style: italic }
-
-p.credits {
-  font-style: italic ;
-  font-size: smaller }
-
-p.label {
-  white-space: nowrap }
-
-p.rubric {
-  font-weight: bold ;
-  font-size: larger ;
-  color: maroon ;
-  text-align: center }
-
-p.sidebar-title {
-  font-family: sans-serif ;
-  font-weight: bold ;
-  font-size: larger }
-
-p.sidebar-subtitle {
-  font-family: sans-serif ;
-  font-weight: bold }
-
-p.topic-title {
-  font-weight: bold }
-
-pre.address {
-  margin-bottom: 0 ;
-  margin-top: 0 ;
-  font-family: serif ;
-  font-size: 100% }
-
-pre.literal-block, pre.doctest-block {
-  margin-left: 2em ;
-  margin-right: 2em }
-
-span.classifier {
-  font-family: sans-serif ;
-  font-style: oblique }
-
-span.classifier-delimiter {
-  font-family: sans-serif ;
-  font-weight: bold }
-
-span.interpreted {
-  font-family: sans-serif }
-
-span.option {
-  white-space: nowrap }
-
-span.pre {
-  white-space: pre }
-
-span.problematic {
-  color: red }
-
-span.section-subtitle {
-  /* font-size relative to parent (h1..h6 element) */
-  font-size: 80% }
-
-table.citation {
-  border-left: solid 1px gray;
-  margin-left: 1px }
-
-table.docinfo {
-  margin: 2em 4em }
-
-table.docutils {
-  margin-top: 0.5em ;
-  margin-bottom: 0.5em }
-
-table.footnote {
-  border-left: solid 1px black;
-  margin-left: 1px }
-
-table.docutils td, table.docutils th,
-table.docinfo td, table.docinfo th {
-  padding-left: 0.5em ;
-  padding-right: 0.5em ;
-  vertical-align: top }
-
-table.docutils th.field-name, table.docinfo th.docinfo-name {
-  font-weight: bold ;
-  text-align: left ;
-  white-space: nowrap ;
-  padding-left: 0 }
-
-h1 tt.docutils, h2 tt.docutils, h3 tt.docutils,
-h4 tt.docutils, h5 tt.docutils, h6 tt.docutils {
-  font-size: 100% }
-
-ul.auto-toc {
-  list-style-type: none }
-
-</style>
-</head>
-<body>
-<div class="document">
-
-
-<div class="section" id="khmer-a-simple-k-mer-counting-library">
-<h1>khmer, a simple k-mer counting library</h1>
-<p>khmer is a simple C++ library for counting k-mers in DNA sequences.
-It has a complete Python wrapping and should be pretty darned fast;
-it's intended for genome-scale k-mer counting.</p>
-<p>The current version is <strong>0.2</strong>.  I haven't used it for much myself,
-but the test code functions & it should work as advertised.</p>
-<p>khmer operates by building a 'ktable', a table of 4**k counters.
-It then maps each k-mer into this table with a simple
-(and reversible) hash function.</p>
-<p>Right now, only the Python interface is documented here.  The C++
-interface is essentially identical; if you need to use it and want
-it documented, drop me a line.</p>
-</div>
-<div class="section" id="counting-speed-and-memory-usage">
-<h1>Counting Speed and Memory Usage</h1>
-<p>On the 5 mb <em>Shewanella oneidensis</em> genome, khmer takes less than a second
-to count all k-mers, for any k between 6 and 12.  At 13 it craps out
-because the table goes over my default stack size limit.</p>
-<p>Approximate memory usage can be calculated by finding the size of a
-<tt class="docutils literal"><span class="pre">long</span> <span class="pre">long</span></tt> on your machine and then multiplying that by 4**k.
-For a 12bp wordsize, this works out to 16384*1024; on an Intel-based
-processor running Linux, <tt class="docutils literal"><span class="pre">long</span> <span class="pre">long</span></tt> is 8 bytes, so memory usage
-is approximately 128 mb.</p>
-</div>
-<div class="section" id="python-interface">
-<h1>Python interface</h1>
-<p>Essentially everything requires a <tt class="docutils literal"><span class="pre">ktable</span></tt>.</p>
-<pre class="literal-block">
-import khmer
-ktable = khmer.new_ktable(L)
-</pre>
-<p>These commands will create a new <tt class="docutils literal"><span class="pre">ktable</span></tt> of size 4**L, suitable
-for counting L-mers.</p>
-<p>Each <tt class="docutils literal"><span class="pre">ktable</span></tt> object has a few accessor functions:</p>
-<blockquote>
-<ul class="simple">
-<li><tt class="docutils literal"><span class="pre">ktable.ksize()</span></tt> will return L.</li>
-<li><tt class="docutils literal"><span class="pre">ktable.max_hash()</span></tt> will return the max hash value in the table, 4**L - 1.</li>
-<li><tt class="docutils literal"><span class="pre">ktable.n_entries()</span></tt> will return the number of table entries, 4**L.</li>
-</ul>
-</blockquote>
-<p>The forward and reverse hashing functions are directly accessible:</p>
-<blockquote>
-<ul>
-<li><dl class="first docutils">
-<dt><tt class="docutils literal"><span class="pre">hashval</span> <span class="pre">=</span> <span class="pre">ktable.forward_hash(kmer)</span></tt> will return the hash value</dt>
-<dd><p class="first last">of the given kmer.</p>
-</dd>
-</dl>
-</li>
-<li><dl class="first docutils">
-<dt><tt class="docutils literal"><span class="pre">kmer</span> <span class="pre">=</span> <span class="pre">ktable.reverse_hash(hashval)</span></tt> will return the kmer that hashes</dt>
-<dd><p class="first last">to the given hashval.</p>
-</dd>
-</dl>
-</li>
-</ul>
-</blockquote>
-<p>There are also some counting functions:</p>
-<blockquote>
-<ul class="simple">
-<li><tt class="docutils literal"><span class="pre">ktable.count(kmer)</span></tt> will increment the count associated with the given kmer
-by one.</li>
-<li><tt class="docutils literal"><span class="pre">ktable.consume(sequence)</span></tt> will run through the sequence and count
-each kmer present.</li>
-<li><tt class="docutils literal"><span class="pre">n</span> <span class="pre">=</span> <span class="pre">ktable.get(kmer|hashval)</span></tt> will return the count associated with the
-given kmer string or the given hashval, whichever is passed in.</li>
-<li><tt class="docutils literal"><span class="pre">ktable.set(kmer|hashval,</span> <span class="pre">count)</span></tt> set the count for the given kmer
-string or hashval.</li>
-</ul>
-</blockquote>
-<p>In all of the cases above, 'kmer' is an L-length string, 'hashval' is
-a non-negative integer, and 'sequence' is a DNA sequence containg ONLY
-A/C/G/T.</p>
-<p><strong>Note:</strong> 'N' is not a legal DNA character as far as khmer is concerned!</p>
-<p>And, finally, there are some set operations:</p>
-<blockquote>
-<ul class="simple">
-<li><tt class="docutils literal"><span class="pre">ktable.clear()</span></tt> empties the ktable.</li>
-<li><tt class="docutils literal"><span class="pre">ktable.update(other)</span></tt> adds all of the entries in <tt class="docutils literal"><span class="pre">other</span></tt> into
-<tt class="docutils literal"><span class="pre">ktable</span></tt>.  The wordsize must be the same for both ktables.</li>
-<li><tt class="docutils literal"><span class="pre">intersection</span> <span class="pre">=</span> <span class="pre">ktable.intersect(other)</span></tt> returns a ktable where
-only nonzero entries in both ktables are kept.  The count for ach
-entry is the sum of the counts in <tt class="docutils literal"><span class="pre">ktable</span></tt> and <tt class="docutils literal"><span class="pre">other</span></tt>.</li>
-</ul>
-</blockquote>
-</div>
-<div class="section" id="an-example">
-<h1>An Example</h1>
-<p>This short code example will count all 6-mers present in the given
-DNA sequence, and then print them all out along with their prevalence.</p>
-<pre class="literal-block">
-# make a new ktable, L=6
-ktable = khmer.new_ktable(6)
-
-# count all k-mers in the given string
-ktable.consume("ATGAGAGACACAGGGAGAGACCCAATTAGAGAATTGGACC")
-
-# run through all entries. if they have nonzero presence, print.
-for i in range(0, ktable.n_entries()):
-   n = ktable.get(i)
-   if n:
-      print ktable.reverse_hash(i), "is present", n, "times."
-</pre>
-<p>And that's all, folks... Let me know if there's other functionality that
-you think is important.</p>
-<pre class="literal-block">
-CTB, 3/2005
-</pre>
-</div>
-</div>
-</body>
-</html>
diff --git a/doc/citations.txt b/doc/citations.rst
similarity index 100%
rename from doc/citations.txt
rename to doc/citations.rst
diff --git a/doc/conf.py b/doc/conf.py
index cb5da4b..99017b4 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -32,7 +32,8 @@ extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.todo',
 templates_path = ['_templates']
 
 # The suffix of source filenames.
-source_suffix = '.txt'
+source_suffix = '.rst'
+
 
 # The encoding of source files.
 # source_encoding = 'utf-8'
@@ -56,16 +57,16 @@ Rajaram Srinivasan, Qingpeng Zhang, and C. Titus Brown'''
 
 # The full version, including alpha/beta/rc tags.
 
-import pkg_resources
-try:
-    release = pkg_resources.get_distribution('khmer').version
-except pkg_resources.DistributionNotFound:
-    print 'To build the documentation, The distribution information of khmer' \
-        ' has to be available.  Either install the package into your' \
-        ' development environment or run "setup.py develop" to setup the' \
-        ' metadata.  A virtualenv is recommended!'
-    sys.exit(1)
-del pkg_resources
+sys.path.insert(0, '../')
+import versioneer
+versioneer.VCS = 'git'
+versioneer.versionfile_source = '../khmer/_version.py'
+versioneer.versionfile_build = '../khmer/_version.py'
+versioneer.tag_prefix = 'v'  # tags are like v1.2.0
+versioneer.parentdir_prefix = '..'
+release = versioneer.get_version()
+del versioneer
+sys.path.remove('../')
 
 # The short X.Y version.
 
@@ -154,7 +155,7 @@ html_style = 'labibi.css'
 
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
-#html_use_smartypants = True
+html_use_smartypants = False
 
 # Custom sidebar templates, maps document names to template names.
 #html_sidebars = {}
diff --git a/doc/contributors.txt b/doc/contributors.rst
similarity index 100%
rename from doc/contributors.txt
rename to doc/contributors.rst
diff --git a/doc/dev/CODE_OF_CONDUCT.txt b/doc/dev/CODE_OF_CONDUCT.rst
similarity index 100%
rename from doc/dev/CODE_OF_CONDUCT.txt
rename to doc/dev/CODE_OF_CONDUCT.rst
diff --git a/doc/dev/a-quick-guide-to-testing.txt b/doc/dev/a-quick-guide-to-testing.rst
similarity index 94%
rename from doc/dev/a-quick-guide-to-testing.txt
rename to doc/dev/a-quick-guide-to-testing.rst
index fee5126..9830c93 100644
--- a/doc/dev/a-quick-guide-to-testing.txt
+++ b/doc/dev/a-quick-guide-to-testing.rst
@@ -58,6 +58,11 @@ We suggest the following approach to writing tests for **new code**:
    your code -- if statements, fence-post bound errors, etc. -- and write
    tests that exercise those bits of code specifically.
 
+#. Make sure tests that expect a function call to fail (esp. with
+   fail_ok=True) are failing for the expected reason. Run the code from the
+   command line and see what the behavior is. For troubleshooting tests,
+   catch the error with try: ... except: or print err.
+
 For adding tests to **old code**, we recommend a mix of two approaches:
 
 #. use `"stupidity driven testing"
diff --git a/doc/dev/codebase-guide.txt b/doc/dev/codebase-guide.rst
similarity index 100%
rename from doc/dev/codebase-guide.txt
rename to doc/dev/codebase-guide.rst
diff --git a/doc/dev/coding-guidelines-and-review.txt b/doc/dev/coding-guidelines-and-review.rst
similarity index 58%
rename from doc/dev/coding-guidelines-and-review.txt
rename to doc/dev/coding-guidelines-and-review.rst
index f5d92e7..59331b8 100644
--- a/doc/dev/coding-guidelines-and-review.txt
+++ b/doc/dev/coding-guidelines-and-review.rst
@@ -58,10 +58,10 @@ Checklist
 Copy and paste the following into a pull request comment when it is
 ready for review::
    
-   - [ ] Is it mergable?
+   - [ ] Is it mergeable?
    - [ ] Did it pass the tests?
    - [ ] If it introduces new functionality in scripts/ is it tested?
-     Check for code coverage.
+     Check for code coverage with `make clean diff-cover`
    - [ ] Is it well formatted? Look at `make pep8`, `make diff_pylint_report`,
      `make cppcheck`, and `make doc` output. Use `make format` and manual
      fixing as needed.
@@ -69,9 +69,44 @@ ready for review::
      without a major version increment. Changing file formats also requires a
      major version number increment.
    - [ ] Is it documented in the ChangeLog?
+     http://en.wikipedia.org/wiki/Changelog#Format
    - [ ] Was a spellchecker run on the source code and documentation after
      changes were made?
+   - [ ] Is the Copyright year up to date?
 
 **Note** that after you submit the comment you can check and uncheck
 the individual boxes on the formatted comment; no need to put x or y
 in the middle.
+
+CPython Checklist
+-----------------
+
+Here's a checklist for new CPython types with future-proofing for Python 3::
+
+   - [ ] the CPython object name is of the form `khmer_${OBJECTNAME}_Object`
+   - [ ] Named struct with `PyObject_HEAD` macro
+   - [ ] `static PyTypeObject khmer_${OBJECTNAME}_Type` with the following
+     entries
+      - [ ] `PyVarObject_HEAD_INIT(NULL, 0)` as the object init (this includes
+        the `ob_size` field).
+      - [ ] all fields should have their name in a comment for readability
+      - [ ] The `tp_name` filed is a dotted name with both the module name and
+        the name of the type within the module. Example: `khmer.ReadAligner`
+      - [ ] Deallocator defined and cast to `(destructor)` in tp_dealloc
+        - [ ] The object's deallocator must be
+          `Py_TYPE(obj)->tp_free((PyObject*)obj);`
+      - [ ] Do _not_ define a `tp_getattr`
+      - [ ] BONUS: write methods to present the state of the object via
+        `tp_str` & `tp_repr`
+      - [ ] _Do_ pass in the array of methods in `tp_methods`
+      - [ ] _Do_ define a new method in `tp_new`
+   - [ ] PyMethodDef arrays contain doc strings
+      - [ ] Methods are cast to `PyCFunctions`s
+   - [ ] Type methods use their type Object in the method signature.
+   - [ ] Type creation method decrements the reference to self
+     (`Py_DECREF(self);`) before each error-path exit (`return NULL;`)
+   - [ ] No factory methods. Example: `khmer_new_readaligner`
+   - [ ] Type object is passed to `PyType_Ready` and its return code is checked
+     in `init_khmer()`
+   - [ ] The reference count for the type object is incremented before adding
+     it to the module: `Py_INCREF(&khmer_${OBJECTNAME}_Type);`.
diff --git a/doc/dev/crazy-ideas.txt b/doc/dev/crazy-ideas.rst
similarity index 100%
rename from doc/dev/crazy-ideas.txt
rename to doc/dev/crazy-ideas.rst
diff --git a/doc/dev/details.txt b/doc/dev/details.rst
similarity index 100%
rename from doc/dev/details.txt
rename to doc/dev/details.rst
diff --git a/doc/dev/development.txt b/doc/dev/development.rst
similarity index 100%
rename from doc/dev/development.txt
rename to doc/dev/development.rst
diff --git a/doc/dev/for-khmer-developers.txt b/doc/dev/for-khmer-developers.rst
similarity index 100%
rename from doc/dev/for-khmer-developers.txt
rename to doc/dev/for-khmer-developers.rst
diff --git a/doc/dev/getting-started.txt b/doc/dev/getting-started.rst
similarity index 83%
rename from doc/dev/getting-started.txt
rename to doc/dev/getting-started.rst
index f7e3e0f..87e3ed1 100644
--- a/doc/dev/getting-started.txt
+++ b/doc/dev/getting-started.rst
@@ -1,3 +1,9 @@
+.. This file is part of khmer, http://github.com/ged-lab/khmer/, and is
+   Copyright (C) Michigan State University, 2009-2015. It is licensed under
+   the three-clause BSD license; see doc/LICENSE.txt.
+   Contact: khmer-project at idyll.org
+
+
 Getting started with khmer development
 ======================================
 
@@ -40,13 +46,17 @@ One-time Preparation
        On recent Debian and Ubuntu this can be done with::
 
            sudo apt-get install python2.7-dev python-virtualenv python-pip gcc \
-           g++
+           g++ git astyle gcovr cppcheck
 
        For RHEL6::
 
            sudo yum install -y python-devel python-pip git gcc gcc-c++ make
            sudo pip install virtualenv
 
+      For Arch Linux::
+      
+          sudo pacman -S python2 python2-pip python2-virtualenv gcc make
+
 #. Get a `GitHub <http://github.com>`__ account.
 
    (We use GitHub to manage khmer contributions.)
@@ -91,13 +101,19 @@ One-time Preparation
 	tar xzf virtualenv*
 	cd virtualenv-*; python2.7 virtualenv.py ../env; cd ..
 
-   `Conda <https://github.com/conda/conda>`__ users on any platform can install
-   virtualenv this way::
+   `Mac ports <https://www.macports.org/>`__ users on the OS X platform can
+   install pip by execution from the command line::
+     
+       sudo port install py27-pip
+     
+   `Homebrew <http://brew.sh/>`__ users on the OS X platform will have pip
+   already installed
+
 
-       conda install pip
-       hash -r
-       pip install virtualenv
-       python2.7 -m virtualenv env 
+   `Conda <https://github.com/conda/conda>`__ users on any platform
+   should instead create a separate Conda environment::
+
+       conda create -n khmer anaconda
 
 #. Activate the virtualenv and install a few packages::
 
@@ -109,6 +125,32 @@ One-time Preparation
    <https://nose.readthedocs.org/en/latest/>`__, packages we use for
    building the documentation and running the tests.)
 
+   In Conda to activate the previously created environment and install
+   dependencies::
+
+       source activate khmer
+       cd khmer
+       make install-dependencies
+       
+#. Cppcheck installation:
+   
+   `Debian <https://www.debian.org/>`__ and
+   `Ubuntu <http://www.ubuntu.com/>`__ Linux distro users can
+   install cppcheck by executing from the command line::
+     
+       sudo apt-get install cppcheck
+
+   `Mac ports <https://www.macports.org/>`__ users on the OS X platform can
+   install cppcheck by executing from the command line::
+     
+       sudo port install cppcheck
+
+   `Homebrew <http://brew.sh/>`__ users on the OS X platform can
+   install cppcheck by executing from the command line::
+     
+       sudo brew install cppcheck
+
+
 Building khmer and running the tests
 ------------------------------------
 
@@ -116,6 +158,10 @@ Building khmer and running the tests
 
       source ../env/bin/activate
 
+   ... or for Conda users::
+
+      source activate khmer
+
    You can run this many times without any ill effects.
 
    (This puts you in the development environment.)
@@ -241,13 +287,13 @@ Claiming an issue and starting to develop
 
       git push origin
 
-#. When you are ready to have the pull request reviewed, please add a
-   comment "ready for review!".
+#. When you are ready to have the pull request reviewed, please mention 
+   @luizirber, @camillescott, @mr-c, or @ctb with a comment 'Ready for review!'
 
 #. The khmer team will now review your pull request and communicate
    with you through the pull request page.  Please feel free to add
-   'ping!' in the comments if you are looking for feedback -- this
-   will alert us that you are still on the line -- but we will
+   'ping!' and an @ in the comments if you are looking for feedback 
+   -- this will alert us that you are still on the line -- but we will
    automatically get notified of your pull request and any new
    comments, so use sparingly.
 
diff --git a/doc/dev/hackathon.txt b/doc/dev/hackathon.rst
similarity index 100%
rename from doc/dev/hackathon.txt
rename to doc/dev/hackathon.rst
diff --git a/doc/dev/index.txt b/doc/dev/index.rst
similarity index 96%
rename from doc/dev/index.txt
rename to doc/dev/index.rst
index 5c04041..d5b2b15 100644
--- a/doc/dev/index.txt
+++ b/doc/dev/index.rst
@@ -14,14 +14,15 @@ Contents:
 .. toctree::
    :maxdepth: 1
 
+   CODE_OF_CONDUCT
    getting-started
    a-quick-guide-to-testing
    codebase-guide
    coding-guidelines-and-review
+   scripts-and-sandbox
    for-khmer-developers
    release
 
    details
    development
    crazy-ideas
-   CODE_OF_CONDUCT
diff --git a/doc/dev/release.txt b/doc/dev/release.rst
similarity index 93%
rename from doc/dev/release.txt
rename to doc/dev/release.rst
index 959167a..8e6ed67 100644
--- a/doc/dev/release.txt
+++ b/doc/dev/release.rst
@@ -81,12 +81,13 @@ release makers, following this checklist by MRC.
         git clone --depth 1 --branch v${new_version}-${rc} https://github.com/ged-lab/khmer.git
         cd khmer
         make install-dependencies
-        make install
         make test
         normalize-by-median.py --version 2>&1 | grep ${new_version}-${rc} && \
                 echo 1st manual version check passed
-        cd ..
-        nosetests khmer --attr '!known_failing'
+        pip uninstall -y khmer; pip uninstall -y khmer; make install
+        mkdir ../not-khmer # if there is a subdir named 'khmer' nosetest will execute tests
+        # there instead of the installed khmer module's tests
+        pushd ../not-khmer; nosetests khmer --attr '!known_failing'; popd
 
 
         # Secondly we test via pip
@@ -98,12 +99,12 @@ release makers, following this checklist by MRC.
         cd src/khmer
         make install-dependencies
         make dist
-        make install
         make test
+        cp dist/khmer*tar.gz ../../../testenv3/
+        pip uninstall -y khmer; pip uninstall -y khmer; make install
+        cd ../.. # no subdir named khmer here, safe for nosetesting installed khmer module
         normalize-by-median.py --version 2>&1 | grep ${new_version}-${rc} && \
                 echo 2nd manual version check passed
-        cp dist/khmer*tar.gz ../../../testenv3/
-        cd ../..
         nosetests khmer --attr '!known_failing'
 
         # Is the distribution in testenv2 complete enough to build another
@@ -118,7 +119,9 @@ release makers, following this checklist by MRC.
         cd khmer*
         make dist
         make test
-        pushd .. ; nosetests khmer --attr '!known_failing' ; popd
+        pip uninstall -y khmer; pip uninstall -y khmer; make install
+        mkdir ../not-khmer
+        pushd ../not-khmer ; nosetests khmer --attr '!known_failing' ; popd
 
 #. Publish the new release on the testing PyPI server.  You will need
    to change your PyPI credentials as documented here:
diff --git a/doc/dev/scripts-and-sandbox.rst b/doc/dev/scripts-and-sandbox.rst
new file mode 100644
index 0000000..b8a11af
--- /dev/null
+++ b/doc/dev/scripts-and-sandbox.rst
@@ -0,0 +1,120 @@
+Command line scripts, ``scripts/``, and ``sandbox/``
+====================================================
+
+.. note::
+
+   This document applies through khmer/oxli 2.0/3.0 (see
+   :doc:`../roadmap`) - we will revisit when the Python API falls
+   under semantic versioning for oxli 4.0.
+
+khmer has two conflicting goals: first, we want to provide a reliable
+piece of software to our users; and second, we want to be flexible and
+enable exploration of new algorithms and programs.  To this end,
+we've split our command line scripts across two directories,
+``scripts/`` and ``sandbox/``.  The former is the staid, boring, reliable
+code; the latter is a place for exploration.
+
+As a result, we are committed to high test coverage, stringent code
+review, and `Semantic Versioning <http://semver.org/>`__ for files in
+``scripts/``, but explicitly *not* committed to this for files and
+functionality implemented in ``sandbox/``.  So, putting a file into
+``scripts/`` is a big deal, especially since it increases our maintenance
+burden for the indefinite future.
+
+We've roughed out the following process for moving scripts into ``scripts/``:
+
+* Command line scripts start in ``sandbox/``;
+* Once their utility is proven (in a paper, for example), we can propose to
+  move them into ``scripts/``;
+* There's a procedure for moving scripts from ``sandbox/`` into ``scripts/``.
+
+Read on!
+
+Sandbox script requirements and suggestions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+All scripts in ``sandbox/`` must:
+
+* be importable (enforced by ``test_import_all`` in
+  ``test_sandbox_scripts.py``)
+* be mentioned in ``sandbox/README.rst``
+* have a hash-bang line (``#! /usr/bin/env python2``) at the top
+* be command-line executable (``chmod a+x``)
+* have a Copyright message (see below)
+* have lowercase names
+* use '-' as a word separator, rather than '_' or CamelCase
+
+All *new* scripts being added to ``sandbox/`` should:
+
+* have decent automated tests
+* be used in a protocol (see khmer-protocols) or a recipe (see khmer-recipes)
+* be pep8 clean and pylint clean-ish (see ``make pep8`` and ``make_diff_pylint``).
+
+Command line standard options for scripts/
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+All scripts in scripts/ should have the following options, if they could apply:
+
+* ``--version`` - should always apply
+* ``--help`` - should always apply
+* ``--force`` - override any sanity checks that may prevent the script from running
+* ``--loadtable`` and ``--savetable`` - where appropriate (see khmer_args.py)
+
+Copyright message
+~~~~~~~~~~~~~~~~~
+
+Our current Copyright message is::
+
+   #
+   # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
+   # Copyright (C) Michigan State University, 2009-2015. It is licensed under
+   # the three-clause BSD license; see doc/LICENSE.txt.
+   # Contact: khmer-project at idyll.org
+   #
+
+The beginning year should be the first year that this file existed in
+the repo; the end year should be the last year a coding change was
+made in the file.
+
+Upgrading a script from 'sandbox' to 'scripts'
+----------------------------------------------
+
+First, everything needed (all library support code) should be already
+committed to khmer master after the usual review process; the relevant
+script(s) should be in ``sandbox/``.
+
+Second, an issue should be started explicitly to discuss whether the
+script(s) should be moved from ``sandbox/`` into ``scripts/``.  This issue
+should discuss the general need for this script, outside of a particular
+paper pipeline.  (Note that there is no imperative to move a script
+out of ``sandbox/``; if we think it's useful code to have around and
+want to keep it functioning, we should just add in automated tests and
+otherwise level it up.)
+
+Third, assuming we reach general agreement about moving the script(s)
+into ``scripts/``, start a pull request to do so, referencing the
+issue and containing the following checklist.  The PR should start by
+moving the script from ``sandbox/`` into ``scripts/``, and moving the
+tests out of the ``test_sandbox_scripts.py`` file.
+
+Last but not least, intensive code review may raise more general
+issues that could apply to the entire code base; if contentious or
+needing discussion, these issues may be punted to general issues so as
+to not block a merge.
+
+A checklist for moving a script into the scripts/ directory from sandbox/
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Copy or paste this checklist into the PR, in addition to the normal
+development/PR checklist::
+
+   - [ ] most or all lines of code are covered by automated tests (see output of ``make diff-cover``)
+   - [ ] ``make diff_pylint`` is clean
+   - [ ] the script has been updated with a ``get_parser()`` and added to doc/user/scripts.txt
+   - [ ] argparse help text exists, with an epilog docstring, with examples and options
+   - [ ] standard command line options are implemented
+   - [ ] version and citation information is output to STDERR (`khmer_args.info(...)`)
+   - [ ] support '-' (STDIN) as an input file, if appropriate
+   - [ ] support designation of an output file (including STDOUT), if appropriate
+   - [ ] runtime diagnostic information (progress, etc.) is output to STDERR
+   - [ ] script has been removed from sandbox/README.rst
diff --git a/doc/index.txt b/doc/index.rst
similarity index 93%
rename from doc/index.txt
rename to doc/index.rst
index 0df9675..8135f1f 100644
--- a/doc/index.txt
+++ b/doc/index.rst
@@ -12,8 +12,11 @@ khmer -- k-mer counting & filtering FTW
           Rajaram Srinivasan, Qingpeng Zhang, and C. Titus Brown
 
 :Contact: khmer-project at idyll.org
+:GitHub: https://github.com/ged-lab/khmer
+:Chat: https://gitter.im/ged-lab/khmer
 :License: BSD
 
+
 khmer is a library and suite of command line tools for working with
 DNA sequence.  It is primarily aimed at short-read sequencing data
 such as that produced by the Illumina platform.  khmer takes a k-mer-centric
@@ -21,6 +24,8 @@ approach to sequence analysis, hence the name.
 
 :doc:`user/install`
 
+:doc:`user/getting-help`
+
 There are two mailing lists dedicated to khmer, an announcements-only list and
 a discussion list. To search their archives and sign-up for them, please visit
 the following URLs:
@@ -47,9 +52,8 @@ Contents:
    introduction
    contributors
    citations
-
+   release-notes/index
    user/index
    dev/index
    roadmap
-
    LICENSE
diff --git a/doc/introduction.rst b/doc/introduction.rst
new file mode 100644
index 0000000..a3f0382
--- /dev/null
+++ b/doc/introduction.rst
@@ -0,0 +1,99 @@
+.. vim: set filetype=rst
+
+=====================
+Introduction to khmer
+=====================
+
+Introduction
+============
+
+khmer is a library and toolkit for doing k-mer-based dataset analysis and 
+transformations.  Our focus in developing it has been on scaling assembly of 
+metagenomes and mRNA.
+
+khmer can be used for a number of transformations, include inexact 
+transformations (abundance filtering and error trimming) and exact 
+transformations (graph-size filtering, to throw away disconnected reads; and 
+partitioning, to split reads into disjoint sets).  Of these, only partitioning 
+is not constant memory.  In all cases, the memory required for assembly with 
+Velvet or another de Bruijn graph assembler will be more than the memory 
+required to use our software. Our software will not increase the memory required 
+for Velvet, either, although we may not be able to *decrease* the memory 
+required for assembly for every data set.
+
+Most of khmer relies on an underlying probabilistic data structure known as a 
+`Bloom filter <http://en.wikipedia.org/wiki/Bloom_filter>`__ (also see 
+`Count-Min Sketch <http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf>`__ 
+and `These Are Not The k-mers You're Looking For 
+<http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4111482/>`__), which is essentially 
+a set of hash tables, each of different size, with no collision detection. These 
+hash tables are used to store the presence of specific k-mers and/or their 
+count.  The lack of collision detection means that the Bloom filter may report a 
+k-mer as being "present" when it is not, in fact, in the data set; however, it 
+will never incorrectly report a k-mer as being absent when it *is* present.  
+This one-sided error makes the Bloom filter very useful for certain kinds of 
+operations.
+
+khmer is also independent of K, and currently works for K <= 32.  We will be 
+integrating code for up to K=64 soon.
+
+khmer is implemented in C++ with a Python wrapper, which is what all of the 
+scripts use.
+
+Some important documentation for khmer is provided on the Web sites for 
+`khmer-protocols <http://khmer-protocols.readthedocs.org>`__ and `khmer-recipes 
+<http://khmer-recipes.readthedocs.org>`__. khmer-protocols provides detailed 
+protocols for using khmer to analyze either a transcriptome or a metagenome; 
+khmer-recipes provides individual recipes for using khmer in a variety of 
+sequence-oriented tasks such as extracting reads by coverage, estimating a 
+genome or metagenome size from unassembled reads, and error-trimming reads via 
+streaming k-mer abundance.
+
+Using khmer
+===========
+
+khmer comes "out of the box" with a number of scripts that make it
+immediately useful for a few different operations, including:
+
+ - normalizing read coverage ("digital normalization")
+
+ - dividing reads into disjoint sets that do not connect ("partitioning")
+
+ - eliminating reads that will not be used by a de Bruijn graph assembler;
+
+ - removing reads with low- or high-abundance k-mers;
+
+ - trimming reads of certain kinds of sequencing errors;
+
+ - counting k-mers and estimating data set coverage based on k-mer counts;
+
+ - running Velvet and calculating assembly statistics;
+
+ - optimizing assemblies on various parameters;
+
+ - converting FASTA to FASTQ;
+
+and a few other random functions.
+
+Practical considerations
+========================
+
+The most important thing to think about when using khmer is whether or not the 
+transformation or filter you're applying is appropriate for the data you're 
+trying to assemble.  Two of the most powerful operations available in khmer, 
+graph-size filtering and graph partitioning, only make sense for assembly 
+datasets with many theoretically unconnected components.  This is typical of 
+metagenomic data sets.
+
+The second most important consideration is memory usage.  The effectiveness of 
+all of the Bloom filter-based functions (which is everything interesting in 
+khmer!) depends critically on having enough memory to do a good job.  See 
+:doc:`user/choosing-table-sizes` for more information.
+
+Copyright and license
+=====================
+
+Portions of khmer are Copyright California Institute of Technology,
+where the exact counting code was first developed; the remainder is
+Copyright Michigan State University.  The code is freely available for
+use and re-use under the BSD License.
diff --git a/doc/introduction.txt b/doc/introduction.txt
deleted file mode 100644
index d3ec27f..0000000
--- a/doc/introduction.txt
+++ /dev/null
@@ -1,93 +0,0 @@
-.. vim: set filetype=rst
-
-=====================
-Introduction to khmer
-=====================
-
-Introduction
-============
-
-khmer is a library and toolkit for doing k-mer-based dataset analysis
-and transformations.  Our focus in developing it has been on scaling
-assembly of metagenomes and mRNA.
-
-khmer can be used for a number of transformations, include inexact
-transformations (abundance filtering and error trimming) and exact
-transformations (graph-size filtering, to throw away disconnected
-reads; and partitioning, to split reads into disjoint sets).  Of
-these, only partitioning is not constant memory.  In all cases, the
-memory required for assembly with Velvet or another de Bruijn graph
-assembler will be more than the memory required to use our
-software. Our software will not increase the memory required for
-Velvet, either, although we may not be able to *decrease* the memory
-required for assembly for every data set.
-
-Most of khmer relies on an underlying probabilistic data structure
-known as a `Bloom filter <http://en.wikipedia.org/wiki/Bloom_filter>`__
-(also see `MinCount Sketch
-<http://www.eecs.harvard.edu/~michaelm/CS222/countmin.pdf>`__), which is
-essentially a set of hash tables, each of different size, with no collision
-detection. These hash tables are used to store the presence of specific k-mers
-and/or their count.  The lack of collision detection means that
-the Bloom filter may report a k-mer as being "present" when it is
-not, in fact, in the data set; however, it will never incorrectly
-report a k-mer as being absent when it *is* present.  This one-sided
-error makes the Bloom filter very useful for certain kinds of
-operations.
-
-khmer is also independent of K, and currently works for K <= 32.  We
-will be integrating code for up to K=64 soon.
-
-khmer is implemented in C++ with a Python wrapper, which is what
-all of the scripts use.
-
-Using khmer
-===========
-
-khmer comes "out of the box" with a number of scripts that make it
-immediately useful for a few different operations, including:
-
- - normalizing read coverage ("digital normalization")
-
- - dividing reads into disjoint sets that do not connect ("partitioning")
-
- - eliminating reads that will not be used by a de Bruijn graph assembler;
-
- - removing reads with low- or high-abundance k-mers;
-
- - trimming reads of certain kinds of sequencing errors;
-
- - counting k-mers and estimating data set coverage based on k-mer counts;
-
- - running Velvet and calculating assembly statistics;
-
- - optimizing assemblies on various parameters;
-
- - converting FASTA to FASTQ;
-
-and a few other random functions.
-
-Practical considerations
-========================
-
-The most important thing to think about when using khmer is whether or
-not the transformation or filter you're applying is appropriate for
-the data you're trying to assemble.  Two of the most powerful
-operations available in khmer, graph-size filtering and graph
-partitioning, only make sense for assembly datasets with many
-theoretically unconnected components.  This is typical of metagenomic
-data sets.
-
-The second most important consideration is memory usage.  The
-effectiveness of all of the Bloom filter-based functions (which is
-everything interesting in khmer!) depends critically on having enough
-memory to do a good job.  See :doc:`user/choosing-table-sizes` for more
-information.
-
-Copyright and license
-=====================
-
-Portions of khmer are Copyright California Institute of Technology,
-where the exact counting code was first developed; the remainder is
-Copyright Michigan State University.  The code is freely available for
-use and re-use under the BSD License.
diff --git a/doc/release-notes/index.rst b/doc/release-notes/index.rst
new file mode 100644
index 0000000..340e2cf
--- /dev/null
+++ b/doc/release-notes/index.rst
@@ -0,0 +1,12 @@
+Release notes
+=============
+
+Contents:
+
+.. toctree::
+   :maxdepth: 0
+   :glob:
+   :titlesonly:
+
+   release-1.*
+
diff --git a/doc/release-notes/release-1.0.1.md b/doc/release-notes/release-1.0.1.md
index b110104..5668731 100644
--- a/doc/release-notes/release-1.0.1.md
+++ b/doc/release-notes/release-1.0.1.md
@@ -1,3 +1,5 @@
+# khmer v1.0.1 release notes
+
 This is bugfix release. Note: the installation instructions have been slightly simplified.
 
 https://khmer.readthedocs.org/en/v1.0.1/
diff --git a/doc/release-notes/release-1.0.1.rst b/doc/release-notes/release-1.0.1.rst
new file mode 100644
index 0000000..8b57b76
--- /dev/null
+++ b/doc/release-notes/release-1.0.1.rst
@@ -0,0 +1,71 @@
+khmer v1.0.1 release notes
+==========================
+
+This is bugfix release. Note: the installation instructions have been
+slightly simplified.
+
+https://khmer.readthedocs.org/en/v1.0.1/
+
+New items of note:
+------------------
+
+This release successfully installs and passes its unit tests on Debian
+6.0 "Squeeze", Debian 7.0 "Wheezy", Fedora 19, OS X 7 "Lion", OS X 8
+"Mountain Lion", Red Hat Enterprise Linux 6, Scientific Linux 6, Ubuntu
+10.04 LTS, and Ubuntu 12.04 LTS. Thanks to the `UW-Madison Build and
+Test Lab <https://www.batlab.org/>`__ for their `testing
+infrastructure <http://submit-1.batlab.org/nmi/results/details?runID=247153>`__.
+
+Notable bugs fixed/issues closed:
+---------------------------------
+
+fixed thread hanging issue #406 @ctb Explicit python2 invocation #404
+ at mr-c MANIFEST.in,setup.py: fix to correct zlib packaging #365 @mr-c
+fixed check\_space\_for\_hashtable to use args.n\_tables #382 @ctb Bug
+fix: make-initial-stoptags.py error on missing .ht input file, actual
+input file is .pt #391 @mr-c
+
+Minor updates
+-------------
+
+include calc-best-assembly.py in v1.0.1 #409 @ctb updated
+normalize-by-median documentation for loadtable #378 @ctb updated
+diginorm for new FP rate info; corrected spelling error #398 @ctb Add
+spellcheck to code review checklist. #397 @ctb
+
+Known Issues
+------------
+
+All of these are pre-existing.
+
+Some users have reported that normalize-by-median.py will utilize more
+memory than it was configured for. This is being investigated in
+https://github.com/ged-lab/khmer/issues/266
+
+Some FASTQ files confuse our parser when running with more than one
+thread. For example, while using load-into-counting.py. If you
+experience this then add "--threads=1" to your command line. This issue
+is being tracked in https://github.com/ged-lab/khmer/issues/249
+
+If your k-mer table (hashfile) gets truncated, perhaps from a full
+filesystem, then our tools currently will get stuck. This is being
+tracked in https://github.com/ged-lab/khmer/issues/247 and
+https://github.com/ged-lab/khmer/issues/246
+
+Paired-end reads from Casava 1.8 currently require renaming for use in
+normalize-by-median and abund-filter when used in paired mode. The
+integration of a fix for this is being tracked in
+https://github.com/ged-lab/khmer/issues/23
+
+annotate-partitions.py only outputs FASTA even if given a FASTQ file.
+This issue is being tracked in
+https://github.com/ged-lab/khmer/issues/46
+
+A user reported that abundance-dist-single.py fails with small files and
+many threads. This issue is being tracked in
+https://github.com/ged-lab/khmer/issues/75
+
+Contributors
+------------
+
+ at mr-c, @ctb, @luizirber, @RamRS, @ctSkennerton
diff --git a/doc/release-notes/release-1.0.md b/doc/release-notes/release-1.0.md
index e7fb221..99ce8ab 100644
--- a/doc/release-notes/release-1.0.md
+++ b/doc/release-notes/release-1.0.md
@@ -1,3 +1,5 @@
+# khmer v1.0 release notes
+
 582 changed files with 40,527 additions and 31,772 deletions.
 
 The team has been hard at work since v0.8 to refine the codebase into a stable product.
diff --git a/doc/release-notes/release-1.0.rst b/doc/release-notes/release-1.0.rst
new file mode 100644
index 0000000..94dc769
--- /dev/null
+++ b/doc/release-notes/release-1.0.rst
@@ -0,0 +1,110 @@
+khmer v1.0 release notes
+========================
+
+582 changed files with 40,527 additions and 31,772 deletions.
+
+The team has been hard at work since v0.8 to refine the codebase into a
+stable product.
+
+https://khmer.readthedocs.org/en/latest/
+
+With the 1.0 release we are making a commitment to using Semantic
+Versioning[0]: the version number will reflect the impact of the changes
+between releases. New major versions will likely require you to change
+how you use the project. Minor versions indicate new functionality that
+doesn't impact the existing. Patch versions indicate
+backwards-compatible fixes. Right now we are limiting this promise to
+the command-line interface. A future release will introduce a stable and
+mature Python API to the khmer project and at that time we will extend
+the version system to include that API.
+
+New items of note:
+------------------
+
+CITATION: Each script now outputs information on how to cite it. There
+is a new paper to describes the project overall: MR Crusoe et al., 2014.
+doi: 10.6084/m9.figshare.979190
+
+The documentation for the scripts has undergone an overhaul. The scripts
+now output extensive notes and the formal documentation website is
+generated from the scripts themselves and will never be out of sync.
+
+https://khmer.readthedocs.org/en/latest/scripts.html
+
+Notable bugs fixed/issues closed:
+---------------------------------
+
+git clone of the khmer repo reqs > 0.5 GiB #223 @mr-c new khmer/file
+module #357 @RamRS Floating point exception in count-overlap.py #282
+ at qingpeng add documentation for sample-reads-randomly #192 @mr-c only
+build zlib and bzip2 when needed #168 @mr-c
+
+Minor updates
+-------------
+
+khmer tools should output intelligent error messages when fed empty
+files #135 @RamRS set IParser::ParserState::ParserState:fill\_id to zero
+at initialization #356 @mr-c demote nose & sphinx to extra dependencies.
+#351 @mr-c CID 1054792 (Medium) Uninitialized scalar field
+(UNINIT\_CTOR) #179 @mr-c CID 1077117 (Medium): Division or modulo by
+zero (DIVIDE\_BY\_ZERO) #182 @mr-c if --savehash is specified then don't
+continue if there is not enough free disk space #245 @RamRS finish
+fixing implicit downcasts #330 @mr-c Clean up compile warnings in
+subset.cc #172 @mr-c all scripts need to output their version #236 @mr-c
+environmental variables need documenting #303 @mr-c C++ code should be
+consistently formatted #261 @mr-c Clean up ancillary files #146 @mr-c
+squash option not implemented in abundance-dist-single.py #271 @RamRS
+Add documentation on how to tie into a particular tagged version #29
+ at mr-c pip install -e fails with compile error #352 @mr-c remove the
+unused KTable object #337 @luizirber zlib 1.2.3 -> zlib 1.2.8 #336 @mr-c
+CID 1173035: Uninitialized scalar field (UNINIT\_CTOR) #311 @mr-c CID
+1153101: Resource leak in object (CTOR\_DTOR\_LEAK) #309 @mr-c remove
+khmer::read\_parsers::IParser::ParserState::thread\_id #323 @mr-c
+several modifications about count-overlap.py script #324 @qingpeng fixed
+runscript to handle SystemExit #332 @ctb CID 1063852: Uninitialized
+scalar field (UNINIT\_CTOR) #313 @mr-c [infrastructure] update to new
+Doxyfile format, make version number autoupdate #315 @mr-c Removed an
+extraneous using namespace khmer; in kmer.hh, #276 @fishjord Minimum and
+recommended python version #94 @mr-c KmerCount class appears to be
+unused #302 @mr-c If loadhash is specified in e.g. normalize-by-median,
+don't complain about default hashsize parameters #117 @RamRS
+
+Known Issues
+------------
+
+All of these are pre-existing.
+
+Some users have reported that normalize-by-median.py will utilize more
+memory than it was configured for. This is being investigated in
+https://github.com/ged-lab/khmer/issues/266
+
+Some FASTQ files confuse our parser when running with more than one
+thread. For example, while using load-into-counting.py. If you
+experience this then add "--threads=1" to your command line. This issue
+is being tracked in https://github.com/ged-lab/khmer/issues/249
+
+If your k-mer table (hashfile) gets truncated, perhaps from a full
+filesystem, then our tools currently will get stuck. This is being
+tracked in https://github.com/ged-lab/khmer/issues/247 and
+https://github.com/ged-lab/khmer/issues/96 and
+https://github.com/ged-lab/khmer/issues/246
+
+Paired-end reads from Casava 1.8 currently require renaming for use in
+normalize-by-median and abund-filter when used in paired mode. The
+integration of a fix for this is being tracked in
+https://github.com/ged-lab/khmer/issues/23
+
+annotate-partitions.py only outputs FASTA even if given a FASTQ file.
+This issue is being tracked in
+https://github.com/ged-lab/khmer/issues/46
+
+A user reported that abundance-dist-single.py fails with small files and
+many threads. This issue is being tracked in
+https://github.com/ged-lab/khmer/issues/75
+
+Contributors
+------------
+
+ at camillescott, @mr-c, @ctb, @luizirber, @RamRS, @qingpeng
+
+[0] http://semver.org/
diff --git a/doc/release-notes/release-1.1.md b/doc/release-notes/release-1.1.md
index 74ffca4..0629f04 100644
--- a/doc/release-notes/release-1.1.md
+++ b/doc/release-notes/release-1.1.md
@@ -1,3 +1,5 @@
+# khmer v1.1 release notes
+
 This is v1.1, a minor version release; this version adds several new scripts.
 
 Docs at: https://khmer.readthedocs.org/en/v1.1/
diff --git a/doc/release-notes/release-1.1.rst b/doc/release-notes/release-1.1.rst
new file mode 100644
index 0000000..edcd85b
--- /dev/null
+++ b/doc/release-notes/release-1.1.rst
@@ -0,0 +1,77 @@
+khmer v1.1 release notes
+========================
+
+This is v1.1, a minor version release; this version adds several new
+scripts.
+
+Docs at: https://khmer.readthedocs.org/en/v1.1/
+
+Release notes w/links:
+https://github.com/ged-lab/khmer/releases/tag/v1.1
+
+New items of note:
+------------------
+
+-  removed unnecessary files from PyPI package; distribution is now
+   under 2 MB (#419) @mr-c
+-  tests are now distributed with package and can be run after 'pip
+   install' (#451) @mr-c
+-  complain properly on file read failures (#333) @ctb
+-  Sequence loading scripts will now report total numbers of k-mers if
+   given --report\_total\_kmers (#491/#429) @mr-c
+-  added metagenome protocol to acceptance testing (#472) @SherineAwad
+   @ctb
+
+Notable bugs fixed/issues closed:
+---------------------------------
+
+-  removed sandbox/load-into-hashbits.py (superseded by
+   scripts/load-graph.py --no-tagset) (#480, @wrightmhw)
+-  promoted extract-long-sequences.py to scripts (#461, @wrightmhw)
+-  promoted fastq-to-fasta.py to scripts (#436, @wrightmhw)
+-  remove incorrect filesystem space check from abundance-dist.py (#452,
+   @chuckpr)
+-  when counting hash writes fail, produce error message (#411, @znruss)
+-  removed a number of memory leaks found by Coverity and valgrind
+   (#451, @mr-c)
+-  updated reservoir sampling to produce multiple subsamples with -S
+   (#197, @ctb)
+-  fixed pip2, python2 issues (#428 and #485, @accaldwell @mr-c)
+-  removed untested/unused code and scripts (#438, @mr-c)
+
+Known issues:
+-------------
+
+All of these are pre-existing.
+
+Some users have reported that normalize-by-median.py will utilize more
+memory than it was configured for. This is being investigated in
+https://github.com/ged-lab/khmer/issues/266
+
+Some FASTQ files confuse our parser when running with more than one
+thread. For example, while using load-into-counting.py. If you
+experience this then add "--threads=1" to your command line. This issue
+is being tracked in https://github.com/ged-lab/khmer/issues/249
+
+If your k-mer table is truncated on write, an error may not be reported;
+this is being tracked in https://github.com/ged-lab/khmer/issues/443.
+However, khmer will now (correctly) fail when trying to read a truncated
+file (See #333).
+
+Paired-end reads from Casava 1.8 currently require renaming for use in
+normalize-by-median and abund-filter when used in paired mode. The
+integration of a fix for this is being tracked in
+https://github.com/ged-lab/khmer/issues/23
+
+Some scripts only output FASTA even if given a FASTQ file. This issue is
+being tracked in https://github.com/ged-lab/khmer/issues/46
+
+A user reported that abundance-dist-single.py fails with small files and
+many threads. This issue is being tracked in
+https://github.com/ged-lab/khmer/issues/75
+
+Contributors
+------------
+
+ at mr-c, @ctb, @camillescott, @wrightmhw, @chuckpr, @luizirber,
+ at accaldwell, @znruss
diff --git a/doc/release-notes/release-1.2.md b/doc/release-notes/release-1.2.md
index edf557f..9f7bd4a 100644
--- a/doc/release-notes/release-1.2.md
+++ b/doc/release-notes/release-1.2.md
@@ -1,3 +1,5 @@
+# khmer v1.2 release notes
+
 This is the v1.2 release of khmer: minor new features and bug fixes. The start
 of this release cycle coincided with the Mozilla Science Lab Global Sprint
 2014. We honor and thank the 19 new contributors (including four Michigan State
@@ -89,10 +91,10 @@ https://github.com/ged-lab/khmer/issues/75
 
 ## Contributors
 
- at mr-c, @ctb, *@bocajnotnef, *@Echelon9, *@jlippi, *@kdmurray91, @qingpeng,
-*@leogargu, *@jiarong, *@brtaylor92, *@iglpdc, @camillescott, *@HLWiencko,
-*@cowguru2000, *@drlabratory, *@jstapleton, *@b-wyss, *@jgluck, @fishjord,
-*@SherineAwad, *@pgarland, *@majoras-masque, @chuckpr, *@RodPic, @luizirber,
-*@jrherr 
+ at mr-c, @ctb, \*@bocajnotnef, \*@Echelon9, \*@jlippi, \*@kdmurray91, @qingpeng,
+\*@leogargu, \*@jiarong, \*@brtaylor92, \*@iglpdc, @camillescott, \*@HLWiencko,
+\*@cowguru2000, \*@drlabratory, \*@jstapleton, \*@b-wyss, \*@jgluck, @fishjord,
+\*@SherineAwad, \*@pgarland, \*@majoras-masque, @chuckpr, \*@RodPic, @luizirber,
+\*@jrherr
 
 `*` Denotes new contributor
diff --git a/doc/release-notes/release-1.2.rst b/doc/release-notes/release-1.2.rst
new file mode 100644
index 0000000..9e851ec
--- /dev/null
+++ b/doc/release-notes/release-1.2.rst
@@ -0,0 +1,106 @@
+khmer v1.2 release notes
+========================
+
+This is the v1.2 release of khmer: minor new features and bug fixes. The
+start of this release cycle coincided with the Mozilla Science Lab
+Global Sprint 2014. We honor and thank the 19 new contributors
+(including four Michigan State University undergraduates) who
+volunteered their time to contribute!
+
+Docs at: https://khmer.readthedocs.org/en/v1.2/
+
+New items of note:
+------------------
+
+ at mr-c and @ctb are proud to announce khmer's code of conduct
+http://khmer.readthedocs.org/en/v1.2/dev/CODE\_OF\_CONDUCT.html #664 All
+scripts list which files have been created during their execution #477
+ at bocajnotnef All scripts now only output status messages to STDERR
+instead of STDOUT #626 @b-wyss docs/ a fairly major re-organization and
+brand new developer docs @ctb @mr-c load-into-counting.py:
+``--summary-info``: machine readable summary in JSON or TSV format #649
+ at kdmurray91 scripts/extract-partitions.py: added documentation for .dist
+columns #516 @chuckpr Makefile: a new target
+``make install-dependencies`` is useful for developers #539 @mr-c
+Sandbox scripts have been cleaned up, or removed (see the
+sandbox/README.rst for details) #589 @ctb
+
+Notable bugs fixed/issues closed:
+---------------------------------
+
+do-partition.py's excessive spawning of threads fixed. #637
+ at camillescott Fixed unique k-mer count reporting in load-graph,
+load-into-counting, and normalize-by-median. #562 @mr-c Clarified and
+test the requirement for a 64-bit operating system #529 @Echelon9
+Removed some of the broken multi-threading options #511 @majoras-masque
+Fix table.get("wrong\_length\_string") gives core dump #585 @Echelon9
+filter-abund lists parameters that it doesn't use #524 @jstapleton
+Reduction of memory required to run the test suite #542 @leogargu BibTeX
+included in CITATIONS #541 @HLWiencko
+
+Additional fixes/features
+-------------------------
+
+delete ScoringMatrix::assign as it is unused #502 @RodPic Root all of
+our C++ exceptions to a common base exception #508 @iglpdc deleted
+KhmerError #503 @drlabratory normalize-by-median reporting output after
+main loop exits, in case it hadn't been triggered #586 @ctb Many issues
+discovered by cppcheck cleaned up #506 @brtaylor92 Developers have a new
+Makefile target to autofix formatting: ``make format`` #612 @brtaylor92
+normalize-by-median.py test coverage increased #361 @SherineAwad Several
+unused functions were removed #599 @brtaylor92 Developer docs now link
+to the stdc++ docs as appropriate #629 @mr-c Added tests for
+non-sequential access to input files #644 @bocajnotnef Removed
+khmer/theading\_args.py #653 @bocajnotnef Improved test for maximum k
+value #658 @pgarland ReadParser no longer crashes if n\_threads = 0 #86
+ at jiarong
+
+Known issues:
+-------------
+
+All of these are pre-existing.
+
+Multithreaded reading will drop reads. This major issue has been present
+for several khmer releases and was only found via a much larger test
+case that we had been previously using. Credit to @camillescott.
+Workaround: disable threading. The next release will fix this and the
+other FAST[AQ] parsing issues.
+https://github.com/ged-lab/khmer/issues/681
+
+Some users have reported that normalize-by-median.py will utilize more
+memory than it was configured for. This is being investigated in
+https://github.com/ged-lab/khmer/issues/266
+
+Some FASTQ files confuse our parser when running with more than one
+thread. For example, while using load-into-counting.py. If you
+experience this then add "--threads=1" to your command line. This issue
+is being tracked in https://github.com/ged-lab/khmer/issues/249
+
+If your k-mer table is truncated on write, an error may not be reported;
+this is being tracked in https://github.com/ged-lab/khmer/issues/443.
+However, khmer will now (correctly) fail when trying to read a truncated
+file (See #333).
+
+Paired-end reads from Casava 1.8 currently require renaming for use in
+normalize-by-median and abund-filter when used in paired mode. The
+integration of a fix for this is being tracked in
+https://github.com/ged-lab/khmer/issues/23
+
+Some scripts only output FASTA even if given a FASTQ file. This issue is
+being tracked in https://github.com/ged-lab/khmer/issues/46
+
+A user reported that abundance-dist-single.py fails with small files and
+many threads. This issue is being tracked in
+https://github.com/ged-lab/khmer/issues/75
+
+Contributors
+------------
+
+ at mr-c, @ctb, \*@bocajnotnef, \*@Echelon9, \*@jlippi, \*@kdmurray91,
+ at qingpeng, \*@leogargu, \*@jiarong, \*@brtaylor92, \*@iglpdc,
+ at camillescott, \*@HLWiencko, \*@cowguru2000, \*@drlabratory,
+\*@jstapleton, \*@b-wyss, \*@jgluck, @fishjord, \*@SherineAwad,
+\*@pgarland, \*@majoras-masque, @chuckpr, \*@RodPic, @luizirber,
+\*@jrherr
+
+``*`` Denotes new contributor
diff --git a/doc/release-notes/release-1.3.md b/doc/release-notes/release-1.3.md
index b55bf31..95edb2c 100644
--- a/doc/release-notes/release-1.3.md
+++ b/doc/release-notes/release-1.3.md
@@ -1,3 +1,5 @@
+# khmer v1.3 release notes
+
 This is the v1.3 release of khmer featuring a new FAST[AQ] parser from the
 SeqAn project.
 
diff --git a/doc/release-notes/release-1.3.rst b/doc/release-notes/release-1.3.rst
new file mode 100644
index 0000000..68347f3
--- /dev/null
+++ b/doc/release-notes/release-1.3.rst
@@ -0,0 +1,62 @@
+khmer v1.3 release notes
+========================
+
+This is the v1.3 release of khmer featuring a new FAST[AQ] parser from
+the SeqAn project.
+
+Docs at: https://khmer.readthedocs.org/en/v1.3/
+
+New items of note:
+------------------
+
+Fixes the two multithreaded reading of sequence files issues: FASTQ
+parsing and the recently found read dropping issue. Several khmer
+scripts now support reading from non-seekable plain and gziped FAST[AQ]
+files (a.k.a pipe or streaming support). @mr-c #642
+
+Notable bugs fixed/issues closed:
+---------------------------------
+
+restore threading to load-graph.py #699 @mr-c
+
+Additional fixes/features
+-------------------------
+
+increase filter\_abund.py coverage #568 @wrightmhw Provide scripts/
+testing coverage for check\_space\_for\_hashtable #386 #678 #718 @b-wyss
+Use absolute URI in CODE\_OF\_CONDUCT #684 @jsspencer give SeqAn credit
+#712 @mr-c Added testing to make sure all sandbox scripts are
+import-able and execfile-able. #709 @ctb reduce memory requirements to
+run tests #701 @ctb Two minor bug fixes to sandbox scripts #706 @ctb
+Upgrade of trim-low-abund for better, more profitable streaming. #601
+ at ctb Add --force or --expert or --ignore flag to all khmer scripts that
+do sanity checking #399 #647 @jessicamizzi Add XDECREF for returned read
+tuple in ReadParser.read\_pair\_iterator() #693 @mr-c @camillescott
+
+Known issues:
+-------------
+
+All of these are pre-existing.
+
+Some users have reported that normalize-by-median.py will utilize more
+memory than it was configured for. This is being investigated in #266
+
+If your k-mer table is truncated on write, an error may not be reported;
+this is being tracked in https://github.com/ged-lab/khmer/issues/443.
+However, khmer will now (correctly) fail when trying to read a truncated
+file (See #333).
+
+Paired-end reads from Casava 1.8 currently require renaming for use in
+normalize-by-median and abund-filter when used in paired mode. The
+integration of a fix for this is being tracked in #23
+
+Some scripts only output FASTA even if given a FASTQ file. This issue is
+being tracked in #46
+
+A user reported that abundance-dist-single.py fails with small files and
+many threads. This issue is being tracked in #75
+
+Contributors
+------------
+
+ at mr-c, @ctb, @camillescott, @b-wyss, @wrightmhw, @jsspencer
diff --git a/doc/release-notes/release-1.4.md b/doc/release-notes/release-1.4.md
new file mode 100644
index 0000000..7329f96
--- /dev/null
+++ b/doc/release-notes/release-1.4.md
@@ -0,0 +1,237 @@
+# khmer v1.4 release notes
+
+This is the v1.4 release of khmer featuring the results of our March and April
+(PyCon) coding sprints and the 16 new contributors; the use of the new v0.8
+release of screed (the library we use for pure Python reading of nucleotide
+sequence files); and the addition of @luizirber's HyperLogLog counter for quick
+cardinality estimation.
+
+Documentation is at https://khmer.readthedocs.org/en/v1.4/
+
+## New items of note:
+
+Casava 1.8 read naming is now fully supported and in general the scripts no
+longer mangle read names. Side benefits: `split-paired-reads.py` will no longer
+drop reads with 'bad' names; `count-median.py` can generate output in CSV
+format. #759 #818 @ctb #873 @ahaerpfer
+
+Most scripts now support a "broken" interleaved paired-read format for FASTA/
+FASTQ nucleotide sequence files.
+[`trim-low-abund.py`](http://khmer.readthedocs.org/en/v1.4/user/scripts.html#trim-low-abund-py)
+has been promoted from the sandbox as well (with streaming support). #759 @ctb
+\#963 @sguermond #933 @standage 
+
+The script to transform an interleaved paired-read nucleotide sequence file
+into two files now allows one to name the output files which can be useful in
+combination with named pipes for streaming processing #762 @ctb 
+
+Streaming everywhere: thanks to screed v0.8 we now support streaming of almost
+all inputs and outputs. #830 @aditi9783 #812 @mr-c #917 @bocajnotnef #882
+ at standage 
+
+Need a quick way to count total number of unique k-mers in very low memory? the
+`unique-kmers.py` script in the sandbox uses a HyperLogLog counter to quickly
+(and with little memory) provide an estimate with a controllable error rate.
+\#257 #738 #895 #902 @luizirber 
+
+`normalize-by-median.py` can now process both a paired interleaved sequence
+file and a file of unpaired reads in the same invocation thus removing the need
+to write the counting table to disk as required in the workaround. #957
+ at susinmotion 
+
+## Notable bugs fixed/issues closed:
+
+Paired-end reads from Casava 1.8 no longer require renaming for use in
+`normalize-by-median.py` and `abund-filter.py` when used in paired mode #818
+ at ctb
+
+Python version support clarified. We do not (yet) support Python 3.x #741 @mr-c
+
+If a single output file mode is chosen for normalize-by-median.py we now
+default to overwriting the output. Appending the output is available by using
+the append redirection operator from the shell. #843 @drtamermansour 
+
+Scripts that consume sequence data using C++ will now properly throw an error
+on truncated files. #897 @kdmurray91 And while writing to disk we properly
+check for errors #856 #962 @mr-c
+
+`abundance-dist-single.py` no longer fails with small files and many threads.
+\#900 @mr-c
+
+## Additional fixes/features
+
+### Of interest to users:
+
+Many documentation updates #753 @PamelaM, #782 @bocajnotnef, #845 @alameldin,
+\#804 @ctb, #870 @SchwarzEM, #953 #942 @safay, #929, at davelin1, #687 #912 #926
+ at mr-c
+
+Installation instructions for Conda, Arch Linux, and Mac Ports have been added
+\#723 @reedacartwright #952 @elmbeech #930 @ahaerpfer
+
+The example script for the STAMPS database has been fixed to run correctly #781
+ at drtamermansour 
+
+`split-paired-reads.py`: added `-o` option to allow specification of an output
+directory #752 @bede 
+
+Fixed a string formatting and a boundry error in `sample-reads-randomly.py`
+\#773 @qingpeng #995 @ctb 
+
+CSV output added to `abundance-dist.py`, `abundance-dist-single.py`, and
+`count-overlap.py`, and `readstats.py` #831 #854 #855 @drtamermansour #959
+ at anotherthomas 
+
+TSV/JSON output of `load-into-counting.py` enhanced with the total number of
+reads processed #996 @kdmurray91 Output files are now also checked to be
+writable *before* loading the input files #672 @pgarland @bocajnotnef 
+
+`interleave-reads.py` now prints the output filename nicely #827 @kdmurray91 
+
+Cleaned up error for input file not existing #772 @jessicamizzi #851 @ctb 
+
+Fixed error in `find-knots.py` #860 @TheOneHyer 
+
+The help text for `load-into-counting.py` for the `--no-bigcounts`/`-b` flag
+has been clarified #857 @kdmurray91
+
+ at lexnederbragt confirmed an old bug has been fixed with his test for whitespace
+in sequence identifiers interacting with the `extract-partitions.py` script
+\#979 
+
+Now safe to copy-and-paste from the user documentation as the smart quotes have
+been turned off. #967 @ahaerpfer
+
+The script `make-coverage.py` has been restored to the sandbox. #920
+ at SherineAwad 
+
+`normalize-by-median.py` will warn if two of the input files have the same name
+\#932 @elmbeech
+
+### Of interest to developers:
+
+Switched away from using `--user` install for developers #740 @mr-c
+ at drtamermansour & #883 @standage 
+
+Developers can now see a summary of important Makefile targets via `make help`
+\#783 @standage 
+
+The unused `khmer.load_pe` module has been removed #828 @kdmurray91 
+
+Versioneer bug due to new screed release was squashed #835 @mr-c
+
+A Python 2.6 and 2.7.2 specific bug was worked around #869 @kdmurray91 @ctb 
+
+Added functions hash_find_all_tags_list and hash_get_tags_and_positions to
+CountingHash objects #749 #765 @ctb
+
+The `make diff-cover` and ChangeLog formatting requirements have been added to
+checklist #766 @mr-c 
+
+A useful message is now presented if large tables fail to allocate enough
+memory #704 @mr-c
+
+A checklist for developers adding new CPython types was added #727 @mr-c
+
+The sandbox graduation checklist has been updated to include streaming support
+\#951 @sguermond 
+
+Specific policies for sandbox/ and scripts/ content, and a process for adding
+new command line scripts into scripts/ have been added to the developer
+documentation #799 @ctb
+
+Sandbox scripts update: corrected #! Python invocation #815 @Echelon9,
+executable bits, copyright headers,  no underscores in filenames #823 #826 #850
+ at alameldin several scripts deleted, docs + requirements updated #852 @ctb
+
+Avoid running big-memory tests on OS X #819 @ctb
+
+Unused callback code was removed #698 @mr-c
+
+The CPython code was updated to use the new checklist and follow additional
+best practices #785 #842 @luizirber 
+
+Added a read-only view of the raw counting tables #671 @camillescott #869
+ at kdmurray91 
+
+Added a Python method for quickly getting the number of underlying tables in a
+counting or presence table #879 #880 @kdmurray91 
+
+The C++ library can now be built separately for the brave and curious developer
+\#788 @kdmurray91 
+
+The ReadParser object now keeps track of the number of reads processed #877
+ at kdmurray91 
+
+Documentation is now reproducible #886 @mr-c
+
+Python future proofing: specify floor division #863 @mr-c
+
+Miscellaneous spelling fixes; thanks codespell! #867 @mr-c 
+
+Debian package list update #984 @mr-c
+
+`khmer.kfile.check_file_status()` has been renamed to `check_input_files()`
+\#941 @proteasome `filter-abund.py` now uses it to check the input counting
+table #931 @safay
+
+`normalize-by-median.py` was refactored to not pass the ArgParse object around
+#965 @susinmotion 
+
+Developer communication has been clarified #969 @sguermond 
+
+Tests using the 'fail_okay=true' parameter to `runscript` have been updated to
+confirm the correct error occurred. 3 faulty tests were fixed and the docs were
+clarified  #968 #971 @susinmotion 
+
+FASTA test added for `extract-long-sequences.py` #901 @jessicamizzi 
+
+'added silly test for empty file warning' #557 @wltrimbl @bocajnotnef 
+
+A couple tests were made more resilient and some extra error checking added in
+CPython land #889 @mr-c
+
+Copyright added to pull request checklist #940 @sguermond 
+
+`khmer_exception`s are now based on `std::string`s which plugs a memory leak
+\#938 @anotherthomas 
+
+Python docstrings were made PEP257 compliant #936 @ahaerpfer 
+
+Some C++ comments were converted to be Doxygen compliant #950 @josiahseaman
+
+The counting and presence table warning logic was refactored and centralized
+\#944 @susinmotion 
+
+The release checklist was updated to better run the post-install tests #911
+ at mr-c
+
+The unused method `find_all_tags_truncate_on_abundance` was removed from the
+CPython API #924 @anotherthomas 
+
+OS X warnings quieted #887 @mr-c
+
+## Known issues:
+
+All of these are pre-existing.
+
+Some users have reported that normalize-by-median.py will utilize more memory
+than it was configured for. This is being investigated in
+https://github.com/ged-lab/khmer/issues/266
+
+Some scripts only output FASTA even if given a FASTQ file. This issue is being
+tracked in https://github.com/ged-lab/khmer/issues/46
+
+## Contributors
+
+ at ctb, @kdmurray91, @mr-c, @drtamermansour, @luizirber, @standage, @bocajnotnef,
+\*@susinmotion, @jessicamizzi, \*@elmbeech, \*@anotherthomas, \*@sguermond,
+\*@ahaerpfer, \*@alameldin, \*@TheOneHyer, \*@aditi9783, \*@proteasome,
+\*@bede, \*@davelin1, @Echelon9, \*@reedacartwright, @qingpeng, \*@SchwarzEM,
+\*@scottsievert, @PamelaM, @SherineAwad, \*@josiahseaman, \*@lexnederbragt,
+
+\* Indicates new contributors
+
+## Issue reporters
+
+ at moorepants, @teshomem, @macmanes, @lexnederbragt, @r-gaia-cs, @magentashades
diff --git a/doc/release-notes/release-1.4.rst b/doc/release-notes/release-1.4.rst
new file mode 100644
index 0000000..667f7d3
--- /dev/null
+++ b/doc/release-notes/release-1.4.rst
@@ -0,0 +1,257 @@
+khmer v1.4 release notes
+========================
+
+This is the v1.4 release of khmer featuring the results of our March and
+April (PyCon) coding sprints and the 16 new contributors; the use of the
+new v0.8 release of screed (the library we use for pure Python reading
+of nucleotide sequence files); and the addition of @luizirber's
+HyperLogLog counter for quick cardinality estimation.
+
+Documentation is at https://khmer.readthedocs.org/en/v1.4/
+
+New items of note:
+------------------
+
+Casava 1.8 read naming is now fully supported and in general the scripts
+no longer mangle read names. Side benefits: ``split-paired-reads.py``
+will no longer drop reads with 'bad' names; ``count-median.py`` can
+generate output in CSV format. #759 #818 @ctb #873 @ahaerpfer
+
+Most scripts now support a "broken" interleaved paired-read format for
+FASTA/ FASTQ nucleotide sequence files.
+```trim-low-abund.py`` <http://khmer.readthedocs.org/en/v1.4/user/scripts.html#trim-low-abund-py>`__
+has been promoted from the sandbox as well (with streaming support).
+#759 @ctb #963 @sguermond #933 @standage
+
+The script to transform an interleaved paired-read nucleotide sequence
+file into two files now allows one to name the output files which can be
+useful in combination with named pipes for streaming processing #762
+ at ctb
+
+Streaming everywhere: thanks to screed v0.8 we now support streaming of
+almost all inputs and outputs. #830 @aditi9783 #812 @mr-c #917
+ at bocajnotnef #882 @standage
+
+Need a quick way to count total number of unique k-mers in very low
+memory? the ``unique-kmers.py`` script in the sandbox uses a HyperLogLog
+counter to quickly (and with little memory) provide an estimate with a
+controllable error rate. #257 #738 #895 #902 @luizirber
+
+``normalize-by-median.py`` can now process both a paired interleaved
+sequence file and a file of unpaired reads in the same invocation thus
+removing the need to write the counting table to disk as required in the
+workaround. #957 @susinmotion
+
+Notable bugs fixed/issues closed:
+---------------------------------
+
+Paired-end reads from Casava 1.8 no longer require renaming for use in
+``normalize-by-median.py`` and ``abund-filter.py`` when used in paired
+mode #818 @ctb
+
+Python version support clarified. We do not (yet) support Python 3.x
+#741 @mr-c
+
+If a single output file mode is chosen for normalize-by-median.py we now
+default to overwriting the output. Appending the output is available by
+using the append redirection operator from the shell. #843
+ at drtamermansour
+
+Scripts that consume sequence data using C++ will now properly throw an
+error on truncated files. #897 @kdmurray91 And while writing to disk we
+properly check for errors #856 #962 @mr-c
+
+``abundance-dist-single.py`` no longer fails with small files and many
+threads. #900 @mr-c
+
+Additional fixes/features
+-------------------------
+
+Of interest to users:
+~~~~~~~~~~~~~~~~~~~~~
+
+Many documentation updates #753 @PamelaM, #782 @bocajnotnef, #845
+ at alameldin, #804 @ctb, #870 @SchwarzEM, #953 #942 @safay,
+#929, at davelin1, #687 #912 #926 @mr-c
+
+Installation instructions for Conda, Arch Linux, and Mac Ports have been
+added #723 @reedacartwright #952 @elmbeech #930 @ahaerpfer
+
+The example script for the STAMPS database has been fixed to run
+correctly #781 @drtamermansour
+
+``split-paired-reads.py``: added ``-o`` option to allow specification of
+an output directory #752 @bede
+
+Fixed a string formatting and a boundry error in
+``sample-reads-randomly.py`` #773 @qingpeng #995 @ctb
+
+CSV output added to ``abundance-dist.py``, ``abundance-dist-single.py``,
+and ``count-overlap.py``, and ``readstats.py`` #831 #854 #855
+ at drtamermansour #959 @anotherthomas
+
+TSV/JSON output of ``load-into-counting.py`` enhanced with the total
+number of reads processed #996 @kdmurray91 Output files are now also
+checked to be writable *before* loading the input files #672 @pgarland
+ at bocajnotnef
+
+``interleave-reads.py`` now prints the output filename nicely #827
+ at kdmurray91
+
+Cleaned up error for input file not existing #772 @jessicamizzi #851
+ at ctb
+
+Fixed error in ``find-knots.py`` #860 @TheOneHyer
+
+The help text for ``load-into-counting.py`` for the
+``--no-bigcounts``/``-b`` flag has been clarified #857 @kdmurray91
+
+ at lexnederbragt confirmed an old bug has been fixed with his test for
+whitespace in sequence identifiers interacting with the
+``extract-partitions.py`` script #979
+
+Now safe to copy-and-paste from the user documentation as the smart
+quotes have been turned off. #967 @ahaerpfer
+
+The script ``make-coverage.py`` has been restored to the sandbox. #920
+ at SherineAwad
+
+``normalize-by-median.py`` will warn if two of the input files have the
+same name #932 @elmbeech
+
+Of interest to developers:
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Switched away from using ``--user`` install for developers #740 @mr-c
+ at drtamermansour & #883 @standage
+
+Developers can now see a summary of important Makefile targets via
+``make help`` #783 @standage
+
+The unused ``khmer.load_pe`` module has been removed #828 @kdmurray91
+
+Versioneer bug due to new screed release was squashed #835 @mr-c
+
+A Python 2.6 and 2.7.2 specific bug was worked around #869 @kdmurray91
+ at ctb
+
+Added functions hash\_find\_all\_tags\_list and
+hash\_get\_tags\_and\_positions to CountingHash objects #749 #765 @ctb
+
+The ``make diff-cover`` and ChangeLog formatting requirements have been
+added to checklist #766 @mr-c
+
+A useful message is now presented if large tables fail to allocate
+enough memory #704 @mr-c
+
+A checklist for developers adding new CPython types was added #727 @mr-c
+
+The sandbox graduation checklist has been updated to include streaming
+support #951 @sguermond
+
+Specific policies for sandbox/ and scripts/ content, and a process for
+adding new command line scripts into scripts/ have been added to the
+developer documentation #799 @ctb
+
+Sandbox scripts update: corrected #! Python invocation #815 @Echelon9,
+executable bits, copyright headers, no underscores in filenames #823
+#826 #850 @alameldin several scripts deleted, docs + requirements
+updated #852 @ctb
+
+Avoid running big-memory tests on OS X #819 @ctb
+
+Unused callback code was removed #698 @mr-c
+
+The CPython code was updated to use the new checklist and follow
+additional best practices #785 #842 @luizirber
+
+Added a read-only view of the raw counting tables #671 @camillescott
+#869 @kdmurray91
+
+Added a Python method for quickly getting the number of underlying
+tables in a counting or presence table #879 #880 @kdmurray91
+
+The C++ library can now be built separately for the brave and curious
+developer #788 @kdmurray91
+
+The ReadParser object now keeps track of the number of reads processed
+#877 @kdmurray91
+
+Documentation is now reproducible #886 @mr-c
+
+Python future proofing: specify floor division #863 @mr-c
+
+Miscellaneous spelling fixes; thanks codespell! #867 @mr-c
+
+Debian package list update #984 @mr-c
+
+``khmer.kfile.check_file_status()`` has been renamed to
+``check_input_files()`` #941 @proteasome ``filter-abund.py`` now uses it
+to check the input counting table #931 @safay
+
+``normalize-by-median.py`` was refactored to not pass the ArgParse
+object around #965 @susinmotion
+
+Developer communication has been clarified #969 @sguermond
+
+Tests using the 'fail\_okay=true' parameter to ``runscript`` have been
+updated to confirm the correct error occurred. 3 faulty tests were fixed
+and the docs were clarified #968 #971 @susinmotion
+
+FASTA test added for ``extract-long-sequences.py`` #901 @jessicamizzi
+
+'added silly test for empty file warning' #557 @wltrimbl @bocajnotnef
+
+A couple tests were made more resilient and some extra error checking
+added in CPython land #889 @mr-c
+
+Copyright added to pull request checklist #940 @sguermond
+
+``khmer_exception``\ s are now based on ``std::string``\ s which plugs a
+memory leak #938 @anotherthomas
+
+Python docstrings were made PEP257 compliant #936 @ahaerpfer
+
+Some C++ comments were converted to be Doxygen compliant #950
+ at josiahseaman
+
+The counting and presence table warning logic was refactored and
+centralized #944 @susinmotion
+
+The release checklist was updated to better run the post-install tests
+#911 @mr-c
+
+The unused method ``find_all_tags_truncate_on_abundance`` was removed
+from the CPython API #924 @anotherthomas
+
+OS X warnings quieted #887 @mr-c
+
+Known issues:
+-------------
+
+All of these are pre-existing.
+
+Some users have reported that normalize-by-median.py will utilize more
+memory than it was configured for. This is being investigated in
+https://github.com/ged-lab/khmer/issues/266
+
+Some scripts only output FASTA even if given a FASTQ file. This issue is
+being tracked in https://github.com/ged-lab/khmer/issues/46
+
+Contributors
+------------
+
+ at ctb, @kdmurray91, @mr-c, @drtamermansour, @luizirber, @standage,
+ at bocajnotnef, \*@susinmotion, @jessicamizzi, \*@elmbeech,
+\*@anotherthomas, \*@sguermond, \*@ahaerpfer, \*@alameldin,
+\*@TheOneHyer, \*@aditi9783, \*@proteasome, \*@bede, \*@davelin1,
+ at Echelon9, \*@reedacartwright, @qingpeng, \*@SchwarzEM, \*@scottsievert,
+ at PamelaM, @SherineAwad, \*@josiahseaman, \*@lexnederbragt,
+
+\* Indicates new contributors
+
+Issue reporters
+---------------
+
+ at moorepants, @teshomem, @macmanes, @lexnederbragt, @r-gaia-cs,
+ at magentashades
diff --git a/doc/requirements.txt b/doc/requirements.txt
index 9f974f9..b53f76d 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -1 +1,2 @@
 http://athyra.ged.msu.edu/~mcrusoe/autoprogram/sphinxcontrib-autoprogram-0.1.1-khmerdev-20140331.tar.gz#egg=sphinxcontrib-autoprogram==0.1.1-khmerdev-20140331
+setuptools==3.4.1
diff --git a/doc/roadmap.txt b/doc/roadmap.rst
similarity index 100%
rename from doc/roadmap.txt
rename to doc/roadmap.rst
diff --git a/doc/user/biblio.txt b/doc/user/biblio.rst
similarity index 100%
rename from doc/user/biblio.txt
rename to doc/user/biblio.rst
diff --git a/doc/user/blog-posts.txt b/doc/user/blog-posts.rst
similarity index 85%
rename from doc/user/blog-posts.txt
rename to doc/user/blog-posts.rst
index 9fb1000..ab939c7 100644
--- a/doc/user/blog-posts.txt
+++ b/doc/user/blog-posts.rst
@@ -29,13 +29,13 @@ The `fasta-to-abundance-hist
 and `abundance-hist-by-position
 <http://github.com/ctb/khmer/blob/master/sandbox/abundance-hist-by-position.py>`__
 scripts can be used to generate the k-mer abundance profile data, after
-loading all the k-mer counts into a .kh file::
+loading all the k-mer counts into a .ct file::
 
    # first, load all the k-mer counts:
-   load-into-counting.py -k 20 -x 1e7 25k.kh data/25k.fq.gz
+   load-into-counting.py -k 20 -x 1e7 25k.ct data/25k.fq.gz
 
    # then, build the '.freq' file that contains all of the counts by position
-   python sandbox/fasta-to-abundance-hist.py 25k.kh data/25k.fq.gz
+   python sandbox/fasta-to-abundance-hist.py 25k.ct data/25k.fq.gz
 
    # sum across positions.
    python sandbox/abundance-hist-by-position.py data/25k.fq.gz.freq > out.dist
@@ -46,8 +46,8 @@ high abundance k-mers, but we don't have a script handy to do that yet.
 You can assess high/low abundance k-mer distributions with the
 `hi-lo-abundance-by-position script <http://github.com/ctb/khmer/blob/master/sandbox/hi-lo-abundance-by-position.py>`__::
 
-   load-into-counting.py -k 20 25k.kh data/25k.fq.gz
-   python sandbox/hi-lo-abundance-by-position.py 25k.kh data/25k.fq.gz
+   load-into-counting.py -k 20 25k.ct data/25k.fq.gz
+   python sandbox/hi-lo-abundance-by-position.py 25k.ct data/25k.fq.gz
 
 This will produce two output files, <filename>.pos.abund=1 and
 <filename>.pos.abund=255.
diff --git a/doc/user/choosing-table-sizes.txt b/doc/user/choosing-table-sizes.rst
similarity index 97%
rename from doc/user/choosing-table-sizes.txt
rename to doc/user/choosing-table-sizes.rst
index 166d130..bbc112d 100644
--- a/doc/user/choosing-table-sizes.txt
+++ b/doc/user/choosing-table-sizes.rst
@@ -60,15 +60,15 @@ The real full version
 
 khmer's scripts, at their heart, represents k-mers in a very memory
 efficient way by taking advantage of two data structures, `Bloom
-filters <http://en.wikipedia.org/wiki/Bloom_filter>`__ and `CountMin
-Sketches <https://sites.google.com/site/countminsketch/>`__, that are
+filters <http://en.wikipedia.org/wiki/Bloom_filter>`__ and `Count-Min
+Sketches <http://en.wikipedia.org/wiki/Count%E2%80%93min_sketch>`__, that are
 both *probabilistic* and *constant memory*.  The "probabilistic" part
 means that there are false positives: the less memory you use, the
 more likely it is that khmer will think that k-mers are present when
 they are not, in fact, present.
 
 Digital normalization (normalize-by-median and filter-abund) uses
-the CountMin Sketch data structure.
+the Count-Min Sketch data structure.
 
 Graph partitioning (load-graph etc.) uses the Bloom filter data structure.
 
@@ -76,7 +76,7 @@ The practical ramifications of this are pretty cool.  For example,
 your digital normalization is guaranteed not to increase in memory
 utilization, and graph partitioning is estimated to be 10-20x more
 memory efficient than any other de Bruijn graph representation.  And
-hash tables (which is what Bloom filters and CountMin Sketches use)
+hash tables (which is what Bloom filters and Count-Min Sketches use)
 are really fast and efficient.  Moreover, the optimal memory size for
 these primary data structures is dependent on the number of k-mers,
 but not explicitly on the size of k itself, which is very unusual.
diff --git a/doc/user/examples.txt b/doc/user/examples.rst
similarity index 100%
rename from doc/user/examples.txt
rename to doc/user/examples.rst
diff --git a/doc/user/galaxy.txt b/doc/user/galaxy.rst
similarity index 100%
rename from doc/user/galaxy.txt
rename to doc/user/galaxy.rst
diff --git a/doc/user/getting-help.rst b/doc/user/getting-help.rst
new file mode 100644
index 0000000..007b02a
--- /dev/null
+++ b/doc/user/getting-help.rst
@@ -0,0 +1,54 @@
+.. vim: set filetype=rst
+
+===============
+How to get help
+===============
+
+First, be sure that you:
+
+#. Read the documentation (this site)
+
+#. Google search for the error output and/or keywords related to your problem.
+   Here you can search results from the mailing list, where others may
+   have discussed solutions to the same issue.
+
+.. raw:: html
+
+    <form action="http://google.com/search" method="get">
+    <input type="text" name="q" size="28" maxlength="255" value="" />
+    <input type="submit" value="Google Search" />
+    <br/>
+    <input type="checkbox" name="sitesearch"
+    value="http://lists.idyll.org/pipermail/khmer" checked /> only search
+    khmer discussion email archive<br/>
+    </form>
+
+Mailing List
+------------
+
+The primary way to get help is through the khmer discussion list:
+http://lists.idyll.org/listinfo/khmer
+
+Asking a question
+-----------------
+
+#. Include your:
+
+   * OS version (Mac OS X or Linux):  ``uname -mrs``
+   * Python version:  ``python --version``
+   * and khmer version:  ``pip freeze | grep khmer``
+
+#. Precisely describe what you are trying to do.  Reread it from the
+   perspective of someone else trying to reproduce your task.
+
+#. Copy-and-paste the exact command that is causing the problem.  Include the
+   steps you performed leading up to the issue.
+
+#. Include the complete error message; if it is large, include a link to a
+   file.
+
+GitHub
+------
+
+You are also welcome to report an issue you are having using GitHub::
+https://github.com/ged-lab/khmer/issues/new
diff --git a/doc/user/guide.txt b/doc/user/guide.rst
similarity index 97%
rename from doc/user/guide.txt
rename to doc/user/guide.rst
index 18e4cb0..54458b5 100644
--- a/doc/user/guide.txt
+++ b/doc/user/guide.rst
@@ -206,9 +206,9 @@ normalizations on multiple files in multiple steps. For example, break ::
 
 into multiple steps like so::
 
-  normalize-by-median.py [ ... ] --savetable file1.kh file1.fa
-  normalize-by-median.py [ ... ] --loadtable file1.kh --savetable file2.kh file2.fa
-  normalize-by-median.py [ ... ] --loadtable file2.kh --savetable file3.kh file3.fa
+  normalize-by-median.py [ ... ] --savetable file1.ct file1.fa
+  normalize-by-median.py [ ... ] --loadtable file1.ct --savetable file2.ct file2.fa
+  normalize-by-median.py [ ... ] --loadtable file2.ct --savetable file3.ct file3.fa
 
 The results should be identical!
 
diff --git a/doc/user/index.txt b/doc/user/index.rst
similarity index 93%
rename from doc/user/index.txt
rename to doc/user/index.rst
index d98822c..c8f51f4 100644
--- a/doc/user/index.txt
+++ b/doc/user/index.rst
@@ -16,4 +16,5 @@ Contents:
    known-issues
    galaxy
    biblio
+   getting-help
 
diff --git a/doc/user/install.txt b/doc/user/install.rst
similarity index 95%
rename from doc/user/install.txt
rename to doc/user/install.rst
index 0907c76..e43709c 100644
--- a/doc/user/install.txt
+++ b/doc/user/install.rst
@@ -4,9 +4,9 @@
 Installing and running khmer
 ============================
 
-You'll need a 64-bit operating system, Python 2.7+ and internet access.
+You'll need a 64-bit operating system, Python 2.7.x and internet access.
 
-The khmer project currently works with Python 2.6 but we target Python 2.7+. 
+The khmer project currently works with Python 2.6 but we target Python 2.7.x.
 
 Build requirements
 ------------------
diff --git a/doc/user/known-issues.txt b/doc/user/known-issues.rst
similarity index 100%
rename from doc/user/known-issues.txt
rename to doc/user/known-issues.rst
diff --git a/doc/user/partitioning-big-data.txt b/doc/user/partitioning-big-data.rst
similarity index 100%
rename from doc/user/partitioning-big-data.txt
rename to doc/user/partitioning-big-data.rst
diff --git a/doc/user/scripts.txt b/doc/user/scripts.rst
similarity index 96%
rename from doc/user/scripts.txt
rename to doc/user/scripts.rst
index 54c726d..157af7b 100644
--- a/doc/user/scripts.txt
+++ b/doc/user/scripts.rst
@@ -53,6 +53,9 @@ k-mer counting and abundance filtering
 .. autoprogram:: filter-abund-single:get_parser()
         :prog: filter-abund-single.py
 
+.. autoprogram:: trim-low-abund:get_parser()
+        :prog: trim-low-abund.py
+
 .. autoprogram:: count-median:get_parser()
         :prog: count-median.py
 
@@ -126,6 +129,9 @@ Read handling: interleaving, splitting, etc.
 .. autoprogram:: interleave-reads:get_parser()
         :prog: interleave-reads.py
 
+.. autoprogram:: readstats:get_parser()
+        :prog: readstats.py
+
 .. autoprogram:: sample-reads-randomly:get_parser()
         :prog: sample-reads-randomly.py
 
diff --git a/examples/stamps/do.sh b/examples/stamps/do.sh
old mode 100644
new mode 100755
index 3623854..4777142
--- a/examples/stamps/do.sh
+++ b/examples/stamps/do.sh
@@ -1,25 +1,33 @@
+#!/bin/bash
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt. 
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
-python ../../scripts/load-into-counting.py -x 1e8 -k 20 stamps-reads.kh ../../data/stamps-reads.fa.gz 
-python ../../scripts/abundance-dist.py stamps-reads.kh ../../data/stamps-reads.fa.gz stamps-reads.hist
-python ../../scripts/normalize-by-median.py -k 20 -C 10 -x 1e8 ../../data/stamps-reads.fa.gz --savehash stamps-dn.kh
-python ../../scripts/abundance-dist.py stamps-dn.kh stamps-reads.fa.gz.keep stamps-dn.hist
-python ../../scripts/do-partition.py -k 32 -x 1e8 -s 1e4 -T 8 stamps-part ../../data/stamps-reads.fa.gz 
-python ../../sandbox/error-correct-pass2.py -C 10 stamps-dn.kh ../../data/stamps-reads.fa.gz 
-python ../../scripts/load-into-counting.py -x 1e8 -k 20 stamps-corr.kh stamps-reads.fa.gz.corr
-python ../../scripts/abundance-dist.py stamps-corr.kh stamps-reads.fa.gz.corr stamps-corr.hist
-python ../../scripts/extract-partitions.py stamps-part stamps-reads.fa.gz.part
-python ../../scripts/extract-partitions.py -X 1 stamps-part stamps-reads.fa.gz.part
-python ../../scripts/load-into-counting.py -x 1e8 -k 20 stamps-part.g0.kh stamps-part.group0000.fa 
-python ../../scripts/load-into-counting.py -x 1e8 -k 20 stamps-part.g1.kh stamps-part.group0001.fa 
-python ../../scripts/abundance-dist.py stamps-part.g0.kh stamps-part.group0000.fa stamps-part.g0.hist
-python ../../scripts/abundance-dist.py stamps-part.g1.kh stamps-part.group0001.fa stamps-part.g1.hist
+load-into-counting.py -x 1e8 -k 20 stamps-reads.ct \
+	../../data/stamps-reads.fa.gz
+abundance-dist.py stamps-reads.ct ../../data/stamps-reads.fa.gz \
+	stamps-reads.hist
+normalize-by-median.py -k 20 -C 10 -x 1e8 ../../data/stamps-reads.fa.gz \
+	--savetable stamps-dn.ct
+abundance-dist.py stamps-dn.ct stamps-reads.fa.gz.keep stamps-dn.hist
+do-partition.py -k 32 -x 1e8 -s 1e4 -T 8 stamps-part \
+	../../data/stamps-reads.fa.gz
+../../sandbox/error-correct-pass2.py -C 10 stamps-dn.ct \
+	../../data/stamps-reads.fa.gz
+load-into-counting.py -x 1e8 -k 20 stamps-corr.ct stamps-reads.fa.gz.corr
+abundance-dist.py stamps-corr.ct stamps-reads.fa.gz.corr stamps-corr.hist
+extract-partitions.py stamps-part stamps-reads.fa.gz.part
+extract-partitions.py -X 1 stamps-part stamps-reads.fa.gz.part
+load-into-counting.py -x 1e8 -k 20 stamps-part.g0.ct stamps-part.group0000.fa
+load-into-counting.py -x 1e8 -k 20 stamps-part.g1.ct stamps-part.group0001.fa
+abundance-dist.py stamps-part.g0.ct stamps-part.group0000.fa stamps-part.g0.hist
+abundance-dist.py stamps-part.g1.ct stamps-part.group0001.fa stamps-part.g1.hist
 
-python ../../scripts/filter-abund.py stamps-dn.kh stamps-reads.fa.gz.keep
-python ../../scripts/normalize-by-median.py -x 1e8 -k 20 -C 10 stamps-reads.fa.gz.keep.abundfilt --savehash stamps-dn3.kh
+filter-abund.py stamps-dn.ct stamps-reads.fa.gz.keep
+normalize-by-median.py -x 1e8 -k 20 -C 10 stamps-reads.fa.gz.keep.abundfilt \
+	--savetable stamps-dn3.ct
 
-python ../../scripts/abundance-dist.py stamps-dn3.kh stamps-reads.fa.gz.keep.abundfilt.keep stamps-dn3.hist
+abundance-dist.py stamps-dn3.ct stamps-reads.fa.gz.keep.abundfilt.keep \
+	stamps-dn3.hist
diff --git a/jenkins-build.sh b/jenkins-build.sh
index 20e30f7..96007e9 100755
--- a/jenkins-build.sh
+++ b/jenkins-build.sh
@@ -13,6 +13,7 @@ fi
 virtualenv -p ${PYTHON_EXECUTABLE} .env
 
 . .env/bin/activate
+pip install setuptools==3.4.1
 make install-dependencies
 
 if type ccache >/dev/null 2>&1
@@ -23,7 +24,7 @@ then
 fi
 if [[ "${NODE_LABELS}" == *osx* ]]
 then
-	export ARCHFLAGS=-Wno-error=unused-command-line-argument-hard-error-in-future
+	export ARCHFLAGS=-Wno-error=unused-command-line-argument
 fi
 
 if type gcov >/dev/null 2>&1 && [[ "${NODE_LABELS}" != *osx* ]]
@@ -53,8 +54,9 @@ if type hg >/dev/null 2>&1
 then
 	rm -Rf sphinx-contrib
 	#hg clone http://bitbucket.org/mcrusoe/sphinx-contrib
-	hg clone http://athyra.ged.msu.edu/~mcrusoe/sphinx-contrib
-	pip install --upgrade sphinx-contrib/autoprogram/
+	#hg clone http://athyra.ged.msu.edu/~mcrusoe/sphinx-contrib
+	#pip install --upgrade sphinx-contrib/autoprogram/
+	pip install -r doc/requirements.txt
 	make doc
 fi
 make pylint 2>&1 > pylint.out
@@ -64,3 +66,9 @@ if type sloccount >/dev/null 2>&1
 then
 	make sloccount.sc
 fi
+
+# takes too long to run on every build
+#bash -ex -c 'cd examples/stamps/; ./do.sh' || { echo examples/stamps/do.sh no longer runs; /bin/false; }
+
+make lib
+make libtest
diff --git a/khmer/__init__.py b/khmer/__init__.py
index 21386ad..b48d763 100644
--- a/khmer/__init__.py
+++ b/khmer/__init__.py
@@ -1,19 +1,16 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2010-2014. It is licensed under
+# Copyright (C) Michigan State University, 2010-2015. It is licensed under
 # the three-clause BSD license; see doc/LICENSE.txt.
 # Contact: khmer-project at idyll.org
 #
-"""
-This is khmer; please see http://khmer.readthedocs.org/.
-"""
+"""This is khmer; please see http://khmer.readthedocs.org/."""
 
-from khmer._khmer import _new_counting_hash
-from khmer._khmer import _new_hashbits
-from khmer._khmer import set_reporting_callback
-from khmer._khmer import _LabelHash
-from khmer._khmer import _Hashbits
-from khmer._khmer import new_readaligner  # sandbox/{ec,error-correct-pass2}.py
+from khmer._khmer import CountingHash
+from khmer._khmer import LabelHash as _LabelHash
+from khmer._khmer import Hashbits as _Hashbits
+from khmer._khmer import HLLCounter as _HLLCounter
+from khmer._khmer import ReadAligner
 
 from khmer._khmer import forward_hash  # figuregen/*.py
 # tests/test_{functions,counting_hash,labelhash,counting_single}.py
@@ -27,6 +24,9 @@ from khmer._khmer import forward_hash_no_rc  # tests/test_functions.py
 from khmer._khmer import reverse_hash  # tests/test_functions.py
 # tests/counting_single.py
 
+from khmer._khmer import hash_murmur3        # tests/test_functions.py
+from khmer._khmer import hash_no_rc_murmur3  # tests/test_functions.py
+
 from khmer._khmer import get_version_cpp as __version_cpp__
 # tests/test_version.py
 
@@ -34,6 +34,7 @@ from khmer._khmer import ReadParser  # sandbox/to-casava-1.8-fastq.py
 # tests/test_read_parsers.py,scripts/{filter-abund-single,load-graph}.py
 # scripts/{abundance-dist-single,load-into-counting}.py
 
+import sys
 
 from struct import pack, unpack
 
@@ -55,7 +56,7 @@ def new_hashbits(k, starting_size, n_tables=2):
     """
     primes = get_n_primes_above_x(n_tables, starting_size)
 
-    return _new_hashbits(k, primes)
+    return _Hashbits(k, primes)
 
 
 def new_counting_hash(k, starting_size, n_tables=2):
@@ -69,7 +70,7 @@ def new_counting_hash(k, starting_size, n_tables=2):
     """
     primes = get_n_primes_above_x(n_tables, starting_size)
 
-    return _new_counting_hash(k, primes)
+    return CountingHash(k, primes)
 
 
 def load_hashbits(filename):
@@ -78,7 +79,7 @@ def load_hashbits(filename):
     Keyword argument:
     filename -- the name of the hashbits file
     """
-    hashtable = _new_hashbits(1, [1])
+    hashtable = _Hashbits(1, [1])
     hashtable.load(filename)
 
     return hashtable
@@ -90,22 +91,12 @@ def load_counting_hash(filename):
     Keyword argument:
     filename -- the name of the counting_hash file
     """
-    hashtable = _new_counting_hash(1, [1])
+    hashtable = CountingHash(1, [1])
     hashtable.load(filename)
 
     return hashtable
 
 
-def _default_reporting_callback(info, n_reads, other):
-    print '...', info, n_reads, other
-
-
-def reset_reporting_callback():
-    set_reporting_callback(_default_reporting_callback)
-
-reset_reporting_callback()
-
-
 def extract_hashbits_info(filename):
     """Open the given hashbits file and return a tuple of information.
 
@@ -125,12 +116,15 @@ def extract_hashbits_info(filename):
     uchar_size = len(pack('B', 0))
     ulonglong_size = len(pack('Q', 0))
 
-    with open(filename, 'rb') as hashbits:
-        version, = unpack('B', hashbits.read(1))
-        ht_type, = unpack('B', hashbits.read(1))
-        ksize, = unpack('I', hashbits.read(uint_size))
-        n_tables, = unpack('B', hashbits.read(uchar_size))
-        table_size, = unpack('Q', hashbits.read(ulonglong_size))
+    try:
+        with open(filename, 'rb') as hashbits:
+            version, = unpack('B', hashbits.read(1))
+            ht_type, = unpack('B', hashbits.read(1))
+            ksize, = unpack('I', hashbits.read(uint_size))
+            n_tables, = unpack('B', hashbits.read(uchar_size))
+            table_size, = unpack('Q', hashbits.read(ulonglong_size))
+    except:
+        raise ValueError("Presence table '{}' is corrupt ".format(filename))
 
     return ksize, round(table_size, -2), n_tables, version, ht_type
 
@@ -154,20 +148,24 @@ def extract_countinghash_info(filename):
     uint_size = len(pack('I', 0))
     ulonglong_size = len(pack('Q', 0))
 
-    with open(filename, 'rb') as countinghash:
-        version, = unpack('B', countinghash.read(1))
-        ht_type, = unpack('B', countinghash.read(1))
-        use_bigcount, = unpack('B', countinghash.read(1))
-        ksize, = unpack('I', countinghash.read(uint_size))
-        n_tables, = unpack('B', countinghash.read(1))
-        table_size, = unpack('Q', countinghash.read(ulonglong_size))
+    try:
+        with open(filename, 'rb') as countinghash:
+            version, = unpack('B', countinghash.read(1))
+            ht_type, = unpack('B', countinghash.read(1))
+            use_bigcount, = unpack('B', countinghash.read(1))
+            ksize, = unpack('I', countinghash.read(uint_size))
+            n_tables, = unpack('B', countinghash.read(1))
+            table_size, = unpack('Q', countinghash.read(ulonglong_size))
+    except:
+        raise ValueError("Counting table '{}' is corrupt ".format(filename))
 
     return ksize, round(table_size, -2), n_tables, use_bigcount, version, \
         ht_type
 
 
-def calc_expected_collisions(hashtable):
+def calc_expected_collisions(hashtable, force=False, max_false_pos=.2):
     """Do a quick & dirty expected collision rate calculation on a hashtable.
+    Check to see that collision rate is within threshold.
 
     Keyword argument:
     hashtable: the hashtable object to inspect
@@ -180,11 +178,21 @@ def calc_expected_collisions(hashtable):
     fp_one = occupancy / min_size
     fp_all = fp_one ** n_ht
 
+    if fp_all > max_false_pos:
+        print >>sys.stderr, "**"
+        print >>sys.stderr, "** ERROR: the graph structure is too small for "
+        print >>sys.stderr, "this data set.  Increase k-mer presence table "
+        print >>sys.stderr, "size/num of tables."
+        print >>sys.stderr, "** Do not use these results!!"
+        print >>sys.stderr, "**"
+        if not force:
+            sys.exit(1)
+
     return fp_all
 
 
 def is_prime(number):
-    '''Checks if a number is prime.'''
+    """Check if a number is prime."""
     if number < 2:
         return False
     if number == 2:
@@ -198,13 +206,15 @@ def is_prime(number):
 
 
 def get_n_primes_near_x(number, target):
-    ''' Step backwards until a number of primes (other than 2) have been
+    """Backward-find primes smaller than target.
+
+    Step backwards until a number of primes (other than 2) have been
     found that are smaller than the target and return them.
 
     Keyword arguments:
     number -- the number of primes to find
     target -- the number to step backwards from
-    '''
+    """
     primes = []
     i = target - 1
     if i % 2 == 0:
@@ -217,13 +227,15 @@ def get_n_primes_near_x(number, target):
 
 
 def get_n_primes_above_x(number, target):
-    '''Step forwards until a number of primes (other than 2) have been
+    """Forward-find primes smaller than target.
+
+    Step forwards until a number of primes (other than 2) have been
     found that are smaller than the target and return them.
 
     Keyword arguments:
     number -- the number of primes to find
     target -- the number to step forwards from
-    '''
+    """
     primes = []
     i = target + 1
     if i % 2 == 0:
@@ -234,12 +246,11 @@ def get_n_primes_above_x(number, target):
         i += 2
     return primes
 
-'''
-Expose the cpython objects with __new__ implementations.
-These constructors add the functionality provided by the existing
-factory methods to the constructors defined over in cpython land.
-Additional functionality can be added to these classes as appropriate.
-'''
+
+# Expose the cpython objects with __new__ implementations.
+# These constructors add the functionality provided by the existing
+# factory methods to the constructors defined over in cpython land.
+# Additional functionality can be added to these classes as appropriate.
 
 
 class LabelHash(_LabelHash):
@@ -258,3 +269,25 @@ class Hashbits(_Hashbits):
         c = _Hashbits.__new__(cls, k, primes)
         c.primes = primes
         return c
+
+
+class HLLCounter(_HLLCounter):
+
+    """HyperLogLog counter.
+
+    A HyperLogLog counter is a probabilistic data structure specialized on
+    cardinality estimation.
+    There is a precision/memory consumption trade-off: error rate determines
+    how much memory is consumed.
+
+    # Creating a new HLLCounter:
+
+    >>> khmer.HLLCounter(error_rate, ksize)
+
+    where the default values are:
+      - error_rate: 0.01
+      - ksize: 20
+    """
+
+    def __len__(self):
+        return self.estimate_cardinality()
diff --git a/khmer/_khmermodule.cc b/khmer/_khmermodule.cc
index b8a7c46..8a90c77 100644
--- a/khmer/_khmermodule.cc
+++ b/khmer/_khmermodule.cc
@@ -1,6 +1,6 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-// Copyright (C) Michigan State University, 2009-2013. It is licensed under
+// Copyright (C) Michigan State University, 2009-2015. It is licensed under
 // the three-clause BSD license; see doc/LICENSE.txt.
 // Contact: khmer-project at idyll.org
 //
@@ -22,6 +22,26 @@
 #include "read_aligner.hh"
 #include "labelhash.hh"
 #include "khmer_exception.hh"
+#include "hllcounter.hh"
+
+using namespace khmer;
+using namespace read_parsers;
+
+//
+// Python 2/3 compatibility: PyInt and PyLong
+//
+
+#if (PY_MAJOR_VERSION >= 3)
+#define PyInt_Check(arg) PyLong_Check(arg)
+#define PyInt_AsLong(arg) PyLong_AsLong(arg)
+#endif
+
+//
+// Python 2/3 compatibility: PyBytes and PyString
+// https://docs.python.org/2/howto/cporting.html#str-unicode-unification
+//
+
+#include "bytesobject.h"
 
 using namespace khmer;
 
@@ -67,50 +87,6 @@ static void     _trace_logger(
 #endif
 
 
-template < typename OBJECT >
-void
-_common_init_Type(
-    PyTypeObject &tobj, char const * name, char const * doc
-)
-{
-    if (!name) {
-        throw khmer_exception();
-    }
-    if (!doc) {
-        throw khmer_exception();
-    }
-
-    tobj.ob_size        = 0;
-    tobj.ob_type        = &PyType_Type;
-    tobj.tp_name        = name;
-    tobj.tp_basicsize       = sizeof( OBJECT );
-    tobj.tp_alloc       = PyType_GenericAlloc;
-    tobj.tp_free        = PyObject_Free;
-    tobj.tp_getattro        = PyObject_GenericGetAttr;
-    tobj.tp_flags       = Py_TPFLAGS_DEFAULT;
-    tobj.tp_doc         = doc;
-}
-
-
-static inline
-void
-_debug_class_attrs( PyTypeObject &tobj )
-{
-#ifdef WITH_INTERNAL_TRACING
-    PyObject *key, *val;
-    Py_ssize_t pos = 0;
-
-    while (PyDict_Next( tobj.tp_dict, &pos, &key, &val )) {
-        _trace_logger(
-            TraceLogger:: TLVL_DEBUG5,
-            "\ttype '%s' dictionary key %d: '%s'\n",
-            tobj.tp_name, pos, PyString_AsString( key )
-        );
-    }
-#endif // WITH_INTERNAL_TRACING
-}
-
-
 } // namespace python
 
 } // namespace khmer
@@ -136,46 +112,6 @@ public:
 
 typedef pre_partition_info _pre_partition_info;
 
-// default callback obj;
-static PyObject *_callback_obj = NULL;
-
-// callback function to pass into C++ functions
-
-void _report_fn(const char * info, void * data, unsigned long long n_reads,
-                unsigned long long other)
-{
-    // handle signals etc. (like CTRL-C)
-    if (PyErr_CheckSignals() != 0) {
-        throw _khmer_signal("PyErr_CheckSignals received a signal");
-    }
-
-    // set data to default?
-    if (!data && _callback_obj) {
-        data = _callback_obj;
-    }
-
-    // if 'data' is set, it is None, or a Python callable
-    if (data) {
-        PyObject * obj = (PyObject *) data;
-        if (obj != Py_None) {
-            PyObject * args = Py_BuildValue("sKK", info, n_reads, other);
-            if (args != NULL) {
-                PyObject * r = PyObject_Call(obj, args, NULL);
-                Py_XDECREF(r);
-            }
-            Py_XDECREF(args);
-        }
-    }
-
-    if (PyErr_Occurred()) {
-        throw _khmer_signal("PyErr_Occurred is set");
-    }
-
-    // ...allow other Python threads to do stuff...
-    Py_BEGIN_ALLOW_THREADS;
-    Py_END_ALLOW_THREADS;
-}
-
 /***********************************************************************/
 
 //
@@ -188,70 +124,59 @@ namespace khmer
 namespace python
 {
 
-
-static PyTypeObject Read_Type = { PyObject_HEAD_INIT( NULL ) };
-
-
 typedef struct {
     PyObject_HEAD
     //! Pointer to the low-level genomic read object.
     read_parsers:: Read *   read;
-} Read_Object;
+} khmer_Read_Object;
 
 
 static
 void
-_Read_dealloc( PyObject * self )
+khmer_Read_dealloc(khmer_Read_Object * obj)
 {
-    Read_Object * myself = (Read_Object *)self;
-    delete myself->read;
-    myself->read = NULL;
-    Read_Type.tp_free( self );
+    delete obj->read;
+    obj->read = NULL;
+    Py_TYPE(obj)->tp_free((PyObject*)obj);
 }
 
 
-#define KHMER_READ_STRING_GETTER( SELF, ATTR_NAME ) \
-    PyString_FromString( \
-    ((((Read_Object *)(SELF))->read)->ATTR_NAME).c_str( ) \
-    )
-
-
 static
 PyObject *
-Read_get_name( PyObject * self, void * closure )
+Read_get_name(khmer_Read_Object * obj, void * closure )
 {
-    return KHMER_READ_STRING_GETTER( self, name );
+    return PyBytes_FromString(obj->read->name.c_str()) ;
 }
 
 
 static
 PyObject *
-Read_get_sequence( PyObject * self, void * closure )
+Read_get_sequence(khmer_Read_Object * obj, void * closure)
 {
-    return KHMER_READ_STRING_GETTER( self, sequence );
+    return PyBytes_FromString(obj->read->sequence.c_str()) ;
 }
 
 
 static
 PyObject *
-Read_get_accuracy( PyObject * self, void * closure )
+Read_get_quality(khmer_Read_Object * obj, void * closure)
 {
-    return KHMER_READ_STRING_GETTER( self, accuracy );
+    return PyBytes_FromString(obj->read->quality.c_str()) ;
 }
 
 
 static
 PyObject *
-Read_get_annotations( PyObject * self, void * closure )
+Read_get_annotations(khmer_Read_Object * obj, void * closure)
 {
-    return KHMER_READ_STRING_GETTER( self, annotations );
+    return PyBytes_FromString(obj->read->annotations.c_str()) ;
 }
 
 
 // TODO? Implement setters.
 
 
-static PyGetSetDef _Read_accessors [ ] = {
+static PyGetSetDef khmer_Read_accessors [ ] = {
     {
         (char *)"name",
         (getter)Read_get_name, (setter)NULL,
@@ -263,8 +188,8 @@ static PyGetSetDef _Read_accessors [ ] = {
         (char *)"Genomic sequence.", NULL
     },
     {
-        (char *)"accuracy",
-        (getter)Read_get_accuracy, (setter)NULL,
+        (char *)"quality",
+        (getter)Read_get_quality, (setter)NULL,
         (char *)"Quality scores.", NULL
     },
     {
@@ -277,24 +202,38 @@ static PyGetSetDef _Read_accessors [ ] = {
 };
 
 
-static
-void
-_init_Read_Type( )
-{
-    using namespace read_parsers;
-
-    _common_init_Type<Read_Object>(
-        Read_Type, "Read", "A FASTQ record plus some metadata."
-    );
-    Read_Type.tp_dealloc    = (destructor)_Read_dealloc;
-
-    Read_Type.tp_getset     = (PyGetSetDef *)_Read_accessors;
-
-    PyType_Ready( &Read_Type );
-
-    _debug_class_attrs( Read_Type );
-}
-
+static PyTypeObject khmer_Read_Type = {
+    PyVarObject_HEAD_INIT(NULL, 0)        /* init & ob_size */
+    "_khmer.Read",                         /* tp_name */
+    sizeof(khmer_Read_Object),            /* tp_basicsize */
+    0,                                    /* tp_itemsize */
+    (destructor)khmer_Read_dealloc,       /* tp_dealloc */
+    0,                                    /* tp_print */
+    0,                                    /* tp_getattr */
+    0,                                    /* tp_setattr */
+    0,                                    /* tp_compare */
+    0,                                    /* tp_repr */
+    0,                                    /* tp_as_number */
+    0,                                    /* tp_as_sequence */
+    0,                                    /* tp_as_mapping */
+    0,                                    /* tp_hash */
+    0,                                    /* tp_call */
+    0,                                    /* tp_str */
+    0,                                    /* tp_getattro */
+    0,                                    /* tp_setattro */
+    0,                                    /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,                   /* tp_flags */
+    "A FASTQ record plus some metadata.", /* tp_doc */
+    0,                                    /* tp_traverse */
+    0,                                    /* tp_clear */
+    0,                                    /* tp_richcompare */
+    0,                                    /* tp_weaklistoffset */
+    0,                                    /* tp_iter */
+    0,                                    /* tp_iternext */
+    0,                                    /* tp_methods */
+    0,                                    /* tp_members */
+    (PyGetSetDef *)khmer_Read_accessors,  /* tp_getset */
+};
 
 /***********************************************************************/
 
@@ -304,17 +243,11 @@ _init_Read_Type( )
 //
 
 
-static PyTypeObject ReadParser_Type
-CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF("ReadParser_Object")
-    = { PyObject_HEAD_INIT( NULL ) };
-static PyTypeObject ReadPairIterator_Type = { PyObject_HEAD_INIT( NULL ) };
-
-
 typedef struct {
     PyObject_HEAD
     //! Pointer to the low-level parser object.
     read_parsers:: IParser *  parser;
-} ReadParser_Object;
+} khmer_ReadParser_Object;
 
 
 typedef struct {
@@ -323,31 +256,26 @@ typedef struct {
     PyObject *  parent;
     //! Persistent value of pair mode across invocations.
     int pair_mode;
-} ReadPairIterator_Object;
+} khmer_ReadPairIterator_Object;
 
 
 static
 void
-_ReadParser_dealloc( PyObject * self )
+_ReadParser_dealloc(khmer_ReadParser_Object * obj)
 {
-
-    ReadParser_Object * myself = (ReadParser_Object *)self;
-    delete myself->parser;
-    myself->parser = NULL;
-    ReadParser_Type.tp_free( self );
-
+    Py_DECREF(obj->parser);
+    obj->parser = NULL;
+    Py_TYPE(obj)->tp_free((PyObject*)obj);
 }
 
 
 static
 void
-_ReadPairIterator_dealloc( PyObject * self )
+khmer_ReadPairIterator_dealloc(khmer_ReadPairIterator_Object * obj)
 {
-    ReadPairIterator_Object * myself = (ReadPairIterator_Object *)self;
-
-    Py_DECREF( myself->parent );
-    myself->parent = NULL;
-    ReadPairIterator_Type.tp_free( self );
+    Py_DECREF(obj->parent);
+    obj->parent = NULL;
+    Py_TYPE(obj)->tp_free((PyObject*)obj);
 }
 
 
@@ -355,8 +283,6 @@ static
 PyObject *
 _ReadParser_new( PyTypeObject * subtype, PyObject * args, PyObject * kwds )
 {
-    using namespace read_parsers;
-
     const char *      ifile_name_CSTR;
 
     if (!PyArg_ParseTuple(args, "s", &ifile_name_CSTR )) {
@@ -368,7 +294,7 @@ _ReadParser_new( PyTypeObject * subtype, PyObject * args, PyObject * kwds )
     if (self == NULL) {
         return NULL;
     }
-    ReadParser_Object * myself  = (ReadParser_Object *)self;
+    khmer_ReadParser_Object * myself  = (khmer_ReadParser_Object *)self;
 
     // Wrap the low-level parser object.
     try {
@@ -386,14 +312,17 @@ static
 PyObject *
 _ReadParser_iternext( PyObject * self )
 {
-    using namespace read_parsers;
-
-    ReadParser_Object * myself  = (ReadParser_Object *)self;
+    khmer_ReadParser_Object * myself  = (khmer_ReadParser_Object *)self;
     IParser *       parser  = myself->parser;
 
     bool    stop_iteration = false;
     char    const * exc = NULL;
-    Read *  the_read_PTR    = new Read( );
+    Read *  the_read_PTR;
+    try {
+        the_read_PTR = new Read( );
+    } catch (std::bad_alloc &e) {
+        return PyErr_NoMemory();
+    }
 
     Py_BEGIN_ALLOW_THREADS
     stop_iteration = parser->is_complete( );
@@ -404,6 +333,8 @@ _ReadParser_iternext( PyObject * self )
             stop_iteration = true;
         } catch (StreamReadError &e) {
             exc = e.what();
+        } catch (InvalidRead &e) {
+            exc = e.what();
         }
     }
     Py_END_ALLOW_THREADS
@@ -421,40 +352,38 @@ _ReadParser_iternext( PyObject * self )
         return NULL;
     }
 
-    PyObject * the_read_OBJECT = Read_Type.tp_alloc( &Read_Type, 1 );
-    ((Read_Object *)the_read_OBJECT)->read = the_read_PTR;
+    PyObject * the_read_OBJECT = khmer_Read_Type.tp_alloc( &khmer_Read_Type, 1 );
+    ((khmer_Read_Object *)the_read_OBJECT)->read = the_read_PTR;
     return the_read_OBJECT;
 }
 
 
 static
 PyObject *
-_ReadPairIterator_iternext( PyObject * self )
+_ReadPairIterator_iternext(khmer_ReadPairIterator_Object * myself)
 {
-    using namespace read_parsers;
-
-    ReadPairIterator_Object *   myself    = (ReadPairIterator_Object *)self;
-    ReadParser_Object *     parent    =
-        (ReadParser_Object *)(myself->parent);
+    khmer_ReadParser_Object * parent = (khmer_ReadParser_Object*)myself->parent;
     IParser *           parser    = parent->parser;
     uint8_t         pair_mode = myself->pair_mode;
 
     ReadPair    the_read_pair;
     bool    stop_iteration      = false;
-    bool    unknown_pair_reading_mode   = false;
-    bool    invalid_read_pair       = false;
-    bool    stream_read_error = false;
+    const char * value_error_what = NULL;
+    const char * io_error_what = NULL;
+
     Py_BEGIN_ALLOW_THREADS
     stop_iteration = parser->is_complete( );
     if (!stop_iteration)
         try {
             parser->imprint_next_read_pair( the_read_pair, pair_mode );
         } catch (UnknownPairReadingMode &exc) {
-            unknown_pair_reading_mode = true;
+            value_error_what = exc.what();
+        } catch (InvalidRead &exc) {
+            io_error_what = exc.what();
         } catch (InvalidReadPair &exc) {
-            invalid_read_pair = true;
+            io_error_what = exc.what();
         } catch (StreamReadError &exc) {
-            stream_read_error = true;
+            io_error_what = "Input file error.";
         } catch (NoMoreReadsAvailable &exc) {
             stop_iteration = true;
         }
@@ -464,50 +393,86 @@ _ReadPairIterator_iternext( PyObject * self )
     if (stop_iteration) {
         return NULL;
     }
-
-    if (unknown_pair_reading_mode) {
-        PyErr_SetString(
-            PyExc_ValueError, "Unknown pair reading mode supplied."
-        );
-        return NULL;
-    }
-    if (invalid_read_pair) {
-        PyErr_SetString( PyExc_IOError, "Invalid read pair detected." );
+    if (value_error_what != NULL) {
+        PyErr_SetString(PyExc_ValueError, value_error_what);
         return NULL;
     }
-
-    if (stream_read_error) {
-        PyErr_SetString( PyExc_IOError, "Input file error.");
+    if (io_error_what != NULL) {
+        PyErr_SetString( PyExc_IOError, io_error_what);
         return NULL;
     }
 
     // Copy elements of 'ReadPair' object into Python tuple.
     // TODO? Replace dummy reads with 'None' object.
-    PyObject * read_1_OBJECT = Read_Type.tp_alloc( &Read_Type, 1 );
-    ((Read_Object *)read_1_OBJECT)->read = new Read( the_read_pair.first );
-    PyObject * read_2_OBJECT = Read_Type.tp_alloc( &Read_Type, 1 );
-    ((Read_Object *)read_2_OBJECT)->read = new Read( the_read_pair.second );
+    PyObject * read_1_OBJECT = khmer_Read_Type.tp_alloc( &khmer_Read_Type, 1 );
+    try {
+        ((khmer_Read_Object *)read_1_OBJECT)->read = new Read( the_read_pair.first );
+    } catch (std::bad_alloc &e) {
+        return PyErr_NoMemory();
+    }
+    PyObject * read_2_OBJECT = khmer_Read_Type.tp_alloc( &khmer_Read_Type, 1 );
+    try {
+        ((khmer_Read_Object *)read_2_OBJECT)->read = new Read( the_read_pair.second );
+    } catch (std::bad_alloc &e) {
+        delete ((khmer_Read_Object *)read_1_OBJECT)->read;
+        return PyErr_NoMemory();
+    }
     PyObject * tup = PyTuple_Pack( 2, read_1_OBJECT, read_2_OBJECT );
     Py_XDECREF(read_1_OBJECT);
     Py_XDECREF(read_2_OBJECT);
     return tup;
 }
 
+static PyTypeObject khmer_ReadPairIterator_Type = {
+    PyVarObject_HEAD_INIT(NULL, 0)              /* init & ob_size */
+    "_khmer.ReadPairIterator",                   /* tp_name */
+    sizeof(khmer_ReadPairIterator_Object),      /* tp_basicsize */
+    0,                                          /* tp_itemsize */
+    (destructor)khmer_ReadPairIterator_dealloc, /* tp_dealloc */
+    0,                                          /* tp_print */
+    0,                                          /* tp_getattr */
+    0,                                          /* tp_setattr */
+    0,                                          /* tp_compare */
+    0,                                          /* tp_repr */
+    0,                                          /* tp_as_number */
+    0,                                          /* tp_as_sequence */
+    0,                                          /* tp_as_mapping */
+    0,                                          /* tp_hash */
+    0,                                          /* tp_call */
+    0,                                          /* tp_str */
+    0,                                          /* tp_getattro */
+    0,                                          /* tp_setattro */
+    0,                                          /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,                         /* tp_flags */
+    "Iterates over 'ReadParser' objects and returns read pairs.", /* tp_doc */
+    0,                                          /* tp_traverse */
+    0,                                          /* tp_clear */
+    0,                                          /* tp_richcompare */
+    0,                                          /* tp_weaklistoffset */
+    PyObject_SelfIter,                          /* tp_iter */
+    (iternextfunc)_ReadPairIterator_iternext,   /* tp_iternext */
+};
+
+
 
 static
 PyObject *
-ReadParser_iter_reads( PyObject * self, PyObject * args )
+ReadParser_iter_reads(PyObject * self, PyObject * args )
 {
     return PyObject_SelfIter( self );
 }
 
-
 static
 PyObject *
-ReadParser_iter_read_pairs( PyObject * self, PyObject * args )
+ReadParser_get_num_reads(khmer_ReadParser_Object * me)
 {
-    using namespace read_parsers;
+    return PyLong_FromLong(me->parser->get_num_reads());
+}
 
+static
+PyObject *
+ReadParser_iter_read_pairs(PyObject * self, PyObject * args )
+{
     int  pair_mode  = IParser:: PAIR_MODE_ERROR_ON_UNPAIRED;
 
     if (!PyArg_ParseTuple( args, "|i", &pair_mode )) {
@@ -515,13 +480,13 @@ ReadParser_iter_read_pairs( PyObject * self, PyObject * args )
     }
 
     // Capture existing read parser.
-    PyObject * obj = ReadPairIterator_Type.tp_alloc(
-                         &ReadPairIterator_Type, 1
+    PyObject * obj = khmer_ReadPairIterator_Type.tp_alloc(
+                         &khmer_ReadPairIterator_Type, 1
                      );
     if (obj == NULL) {
         return NULL;
     }
-    ReadPairIterator_Object * rpi   = (ReadPairIterator_Object *)obj;
+    khmer_ReadPairIterator_Object * rpi   = (khmer_ReadPairIterator_Object *)obj;
     rpi->parent             = self;
     rpi->pair_mode          = pair_mode;
 
@@ -542,31 +507,63 @@ static PyMethodDef _ReadParser_methods [ ] = {
         "iter_read_pairs",  (PyCFunction)ReadParser_iter_read_pairs,
         METH_VARARGS,       "Iterates over paired reads as pairs."
     },
-
     { NULL, NULL, 0, NULL } // sentinel
 };
 
+static PyGetSetDef khmer_ReadParser_accessors[] = {
+    {
+        (char *)"num_reads",
+        (getter)ReadParser_get_num_reads, NULL,
+        (char *)"count of reads processed thus far.",
+        NULL
+    },
+    {NULL, NULL, NULL, NULL, NULL} /* Sentinel */
+};
 
-static
-void
-_init_ReadParser_Type( )
-{
-    using namespace read_parsers;
-
-    _common_init_Type<ReadParser_Object>(
-        ReadParser_Type,
-        "_khmer.ReadParser",
-        "Parses streams from various file formats, " \
-        "such as FASTA and FASTQ."
-    );
-    ReadParser_Type.tp_new      = (newfunc)_ReadParser_new;
-    ReadParser_Type.tp_dealloc      = (destructor)_ReadParser_dealloc;
-
-    ReadParser_Type.tp_iter     = PyObject_SelfIter;
-    ReadParser_Type.tp_iternext     = (iternextfunc)_ReadParser_iternext;
-
-    ReadParser_Type.tp_methods      = (PyMethodDef *)_ReadParser_methods;
+static PyTypeObject khmer_ReadParser_Type = {
+    PyVarObject_HEAD_INIT(NULL, 0)             /* init & ob_size */
+    "_khmer.ReadParser",                        /* tp_name */
+    sizeof(khmer_ReadParser_Object),           /* tp_basicsize */
+    0,                                         /* tp_itemsize */
+    (destructor)_ReadParser_dealloc,           /* tp_dealloc */
+    0,                                         /* tp_print */
+    0,                                         /* tp_getattr */
+    0,                                         /* tp_setattr */
+    0,                                         /* tp_compare */
+    0,                                         /* tp_repr */
+    0,                                         /* tp_as_number */
+    0,                                         /* tp_as_sequence */
+    0,                                         /* tp_as_mapping */
+    0,                                         /* tp_hash */
+    0,                                         /* tp_call */
+    0,                                         /* tp_str */
+    0,                                         /* tp_getattro */
+    0,                                         /* tp_setattro */
+    0,                                         /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,                        /* tp_flags */
+    "Parses streams from various file formats, " \
+    "such as FASTA and FASTQ.",                /* tp_doc */
+    0,                                         /* tp_traverse */
+    0,                                         /* tp_clear */
+    0,                                         /* tp_richcompare */
+    0,                                         /* tp_weaklistoffset */
+    PyObject_SelfIter,                         /* tp_iter */
+    (iternextfunc)_ReadParser_iternext,        /* tp_iternext */
+    _ReadParser_methods,                       /* tp_methods */
+    0,                                         /* tp_members */
+    khmer_ReadParser_accessors,                /* tp_getset */
+    0,                                         /* tp_base */
+    0,                                         /* tp_dict */
+    0,                                         /* tp_descr_get */
+    0,                                         /* tp_descr_set */
+    0,                                         /* tp_dictoffset */
+    0,                                         /* tp_init */
+    0,                                         /* tp_alloc */
+    _ReadParser_new,                           /* tp_new */
+};
 
+void _init_ReadParser_Type_constants()
+{
     PyObject * cls_attrs_DICT = PyDict_New( );
     if (cls_attrs_DICT == NULL) {
         return;
@@ -575,7 +572,7 @@ _init_ReadParser_Type( )
     // Place pair mode constants into class dictionary.
     int result;
 
-    PyObject * value = PyInt_FromLong( IParser:: PAIR_MODE_ALLOW_UNPAIRED );
+    PyObject * value = PyLong_FromLong( IParser:: PAIR_MODE_ALLOW_UNPAIRED );
     result = PyDict_SetItemString(cls_attrs_DICT,
                                   "PAIR_MODE_ALLOW_UNPAIRED", value);
     Py_XDECREF(value);
@@ -584,7 +581,7 @@ _init_ReadParser_Type( )
         return;
     }
 
-    value = PyInt_FromLong( IParser:: PAIR_MODE_IGNORE_UNPAIRED );
+    value = PyLong_FromLong( IParser:: PAIR_MODE_IGNORE_UNPAIRED );
     result = PyDict_SetItemString(cls_attrs_DICT,
                                   "PAIR_MODE_IGNORE_UNPAIRED", value );
     Py_XDECREF(value);
@@ -593,7 +590,7 @@ _init_ReadParser_Type( )
         return;
     }
 
-    value = PyInt_FromLong( IParser:: PAIR_MODE_ERROR_ON_UNPAIRED );
+    value = PyLong_FromLong( IParser:: PAIR_MODE_ERROR_ON_UNPAIRED );
     result = PyDict_SetItemString(cls_attrs_DICT,
                                   "PAIR_MODE_ERROR_ON_UNPAIRED", value);
     Py_XDECREF(value);
@@ -602,36 +599,8 @@ _init_ReadParser_Type( )
         return;
     }
 
-    ReadParser_Type.tp_dict     = cls_attrs_DICT;
-    _debug_class_attrs( ReadParser_Type );
-
-} // _init_ReadParser_Type
-
-
-static
-void
-_init_ReadPairIterator_Type( )
-{
-
-    _common_init_Type<ReadPairIterator_Object>(
-        ReadPairIterator_Type,
-        "ReadParser-pair-iterator",
-        "Iterates over 'ReadParser' objects and returns read pairs."
-    );
-    //ReadPairIterator_Type.tp_new  = (newfunc)_ReadPairIterator_new;
-    ReadPairIterator_Type.tp_dealloc    =
-        (destructor)_ReadPairIterator_dealloc;
-
-    ReadPairIterator_Type.tp_iter   = PyObject_SelfIter;
-    ReadPairIterator_Type.tp_iternext   =
-        (iternextfunc)_ReadPairIterator_iternext;
-
-    PyType_Ready( &ReadPairIterator_Type );
-
-    _debug_class_attrs( ReadPairIterator_Type );
-
-} // _init_ReadPairIterator_Type
-
+    khmer_ReadParser_Type.tp_dict     = cls_attrs_DICT;
+}
 
 } // namespace python
 
@@ -644,7 +613,7 @@ _PyObject_to_khmer_ReadParser( PyObject * py_object )
 {
     // TODO: Add type-checking.
 
-    return ((python:: ReadParser_Object *)py_object)->parser;
+    return ((python:: khmer_ReadParser_Object *)py_object)->parser;
 }
 
 
@@ -669,62 +638,85 @@ void free_subset_partition_info(void * p)
 typedef struct {
     PyObject_HEAD
     CountingHash * counting;
-} khmer_KCountingHashObject;
+} khmer_KCountingHash_Object;
 
 typedef struct {
     PyObject_HEAD
     SubsetPartition * subset;
-} khmer_KSubsetPartitionObject;
+} khmer_KSubsetPartition_Object;
 
 typedef struct {
     PyObject_HEAD
     Hashbits * hashbits;
-} khmer_KHashbitsObject;
-
-static void khmer_subset_dealloc(PyObject *);
-static PyObject * khmer_subset_getattr(PyObject * obj, char * name);
-
-static PyTypeObject khmer_KSubsetPartitionType = {
-    PyObject_HEAD_INIT(NULL)
-    0,
-    "KSubset", sizeof(khmer_KSubsetPartitionObject),
-    0,
-    khmer_subset_dealloc,   /*tp_dealloc*/
-    0,              /*tp_print*/
-    khmer_subset_getattr,   /*tp_getattr*/
-    0,              /*tp_setattr*/
-    0,              /*tp_compare*/
-    0,              /*tp_repr*/
-    0,              /*tp_as_number*/
-    0,              /*tp_as_sequence*/
-    0,              /*tp_as_mapping*/
-    0,              /*tp_hash */
-    0,              /*tp_call*/
-    0,              /*tp_str*/
-    0,              /*tp_getattro*/
-    0,              /*tp_setattro*/
-    0,              /*tp_as_buffer*/
-    Py_TPFLAGS_DEFAULT,     /*tp_flags*/
-    "subset object",           /* tp_doc */
+} khmer_KHashbits_Object;
+
+static void khmer_subset_dealloc(khmer_KSubsetPartition_Object * obj);
+
+static PyTypeObject khmer_KSubsetPartition_Type = {
+    PyVarObject_HEAD_INIT(NULL, 0)         /* init & ob_size */
+    "_khmer.KSubsetPartition",              /* tp_name */
+    sizeof(khmer_KSubsetPartition_Object), /* tp_basicsize */
+    0,                                     /* tp_itemsize */
+    (destructor)khmer_subset_dealloc,      /*tp_dealloc*/
+    0,                                     /*tp_print*/
+    0,                                     /*tp_getattr*/
+    0,                                     /*tp_setattr*/
+    0,                                     /*tp_compare*/
+    0,                                     /*tp_repr*/
+    0,                                     /*tp_as_number*/
+    0,                                     /*tp_as_sequence*/
+    0,                                     /*tp_as_mapping*/
+    0,                                     /*tp_hash */
+    0,                                     /*tp_call*/
+    0,                                     /*tp_str*/
+    0,                                     /*tp_getattro*/
+    0,                                     /*tp_setattro*/
+    0,                                     /*tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT,                    /*tp_flags*/
+    "subset object",                       /* tp_doc */
 };
 
 typedef struct {
     PyObject_HEAD
     ReadAligner * aligner;
-} khmer_ReadAlignerObject;
+} khmer_ReadAligner_Object;
+
+static void khmer_counting_dealloc(khmer_KCountingHash_Object * obj);
 
-static void khmer_counting_dealloc(PyObject *);
+static
+PyObject *
+hash_abundance_distribution(khmer_KCountingHash_Object * me, PyObject * args);
 
-static PyObject * hash_abundance_distribution(PyObject * self,
+static
+PyObject *
+hash_abundance_distribution_with_reads_parser(khmer_KCountingHash_Object * me,
         PyObject * args);
 
-static PyObject * hash_abundance_distribution_with_reads_parser(
-    PyObject * self,
-    PyObject * args);
+static
+PyObject *
+hash_get_raw_tables(khmer_KCountingHash_Object * self, PyObject * args)
+{
+    CountingHash * counting = self->counting;
+
+    khmer::Byte ** table_ptrs = counting->get_raw_tables();
+    std::vector<HashIntoType> sizes = counting->get_tablesizes();
+
+    PyObject * raw_tables = PyList_New(sizes.size());
+    for (unsigned int i=0; i<sizes.size(); ++i) {
+        PyObject * buf = PyBuffer_FromMemory(table_ptrs[i], sizes[i]);
+        if(!PyBuffer_Check(buf)) {
+            return NULL;
+        }
+        PyList_SET_ITEM(raw_tables, i, buf);
+    }
+
+    return raw_tables;
+}
 
-static PyObject * hash_set_use_bigcount(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_set_use_bigcount(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     PyObject * x;
@@ -740,9 +732,10 @@ static PyObject * hash_set_use_bigcount(PyObject * self, PyObject * args)
     Py_RETURN_NONE;
 }
 
-static PyObject * hash_get_use_bigcount(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_get_use_bigcount(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     if (!PyArg_ParseTuple(args, "")) {
@@ -754,9 +747,10 @@ static PyObject * hash_get_use_bigcount(PyObject * self, PyObject * args)
     return PyBool_FromLong((int)val);
 }
 
-static PyObject * hash_n_occupied(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_n_occupied(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     HashIntoType start = 0, stop = 0;
@@ -770,9 +764,10 @@ static PyObject * hash_n_occupied(PyObject * self, PyObject * args)
     return PyLong_FromUnsignedLongLong(n);
 }
 
-static PyObject * hash_n_unique_kmers(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_n_unique_kmers(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     HashIntoType n = counting->n_unique_kmers();
@@ -780,9 +775,10 @@ static PyObject * hash_n_unique_kmers(PyObject * self, PyObject * args)
     return PyLong_FromUnsignedLongLong(n);
 }
 
-static PyObject * hash_n_entries(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_n_entries(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     if (!PyArg_ParseTuple(args, "")) {
@@ -792,9 +788,10 @@ static PyObject * hash_n_entries(PyObject * self, PyObject * args)
     return PyLong_FromUnsignedLongLong(counting->n_entries());
 }
 
-static PyObject * hash_count(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_count(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     const char * kmer;
@@ -811,13 +808,14 @@ static PyObject * hash_count(PyObject * self, PyObject * args)
 
     counting->count(kmer);
 
-    return PyInt_FromLong(1);
+    return PyLong_FromLong(1);
 }
 
-static PyObject * hash_output_fasta_kmer_pos_freq(PyObject * self,
-        PyObject *args)
+static
+PyObject *
+hash_output_fasta_kmer_pos_freq(khmer_KCountingHash_Object * me,
+                                PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     const char * infile;
@@ -829,20 +827,18 @@ static PyObject * hash_output_fasta_kmer_pos_freq(PyObject * self,
 
     counting->output_fasta_kmer_pos_freq(infile, outfile);
 
-    return PyInt_FromLong(0);
+    return PyLong_FromLong(0);
 }
 
-static PyObject * hash_consume_fasta(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_consume_fasta(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me  = (khmer_KCountingHashObject *) self;
     CountingHash * counting  = me->counting;
 
     const char * filename;
-    PyObject * callback_obj = NULL;
 
-    if (!PyArg_ParseTuple(
-                args, "s|O", &filename, &callback_obj
-            )) {
+    if (!PyArg_ParseTuple(args, "s", &filename)) {
         return NULL;
     }
 
@@ -850,8 +846,7 @@ static PyObject * hash_consume_fasta(PyObject * self, PyObject * args)
     unsigned long long  n_consumed    = 0;
     unsigned int          total_reads   = 0;
     try {
-        counting->consume_fasta(filename, total_reads, n_consumed,
-                                _report_fn, callback_obj);
+        counting->consume_fasta(filename, total_reads, n_consumed);
     } catch (_khmer_signal &e) {
         PyErr_SetString(PyExc_IOError, e.get_message().c_str());
         return NULL;
@@ -863,19 +858,16 @@ static PyObject * hash_consume_fasta(PyObject * self, PyObject * args)
     return Py_BuildValue("IK", total_reads, n_consumed);
 }
 
-static PyObject * hash_consume_fasta_with_reads_parser(
-    PyObject * self, PyObject * args
-)
+static
+PyObject *
+hash_consume_fasta_with_reads_parser(khmer_KCountingHash_Object * me,
+                                     PyObject * args)
 {
-    khmer_KCountingHashObject * me  = (khmer_KCountingHashObject *) self;
     CountingHash * counting  = me->counting;
 
     PyObject * rparser_obj = NULL;
-    PyObject * callback_obj = NULL;
 
-    if (!PyArg_ParseTuple(
-                args, "O|O", &rparser_obj, &callback_obj
-            )) {
+    if (!PyArg_ParseTuple(args, "O", &rparser_obj)) {
         return NULL;
     }
 
@@ -889,8 +881,7 @@ static PyObject * hash_consume_fasta_with_reads_parser(
     bool        exc_raised  = false;
     Py_BEGIN_ALLOW_THREADS
     try {
-        counting->consume_fasta(rparser, total_reads, n_consumed,
-                                _report_fn, callback_obj);
+        counting->consume_fasta(rparser, total_reads, n_consumed);
     } catch (_khmer_signal &e) {
         exc = e.get_message().c_str();
         exc_raised = true;
@@ -904,9 +895,10 @@ static PyObject * hash_consume_fasta_with_reads_parser(
     return Py_BuildValue("IK", total_reads, n_consumed);
 }
 
-static PyObject * hash_consume(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_consume(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     const char * long_str;
@@ -924,12 +916,13 @@ static PyObject * hash_consume(PyObject * self, PyObject * args)
     unsigned int n_consumed;
     n_consumed = counting->consume_string(long_str);
 
-    return PyInt_FromLong(n_consumed);
+    return PyLong_FromLong(n_consumed);
 }
 
-static PyObject * hash_get_min_count(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_get_min_count(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     const char * long_str;
@@ -947,12 +940,13 @@ static PyObject * hash_get_min_count(PyObject * self, PyObject * args)
     BoundedCounterType c = counting->get_min_count(long_str);
     unsigned int N = c;
 
-    return PyInt_FromLong(N);
+    return PyLong_FromLong(N);
 }
 
-static PyObject * hash_get_max_count(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_get_max_count(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     const char * long_str;
@@ -970,12 +964,13 @@ static PyObject * hash_get_max_count(PyObject * self, PyObject * args)
     BoundedCounterType c = counting->get_max_count(long_str);
     unsigned int N = c;
 
-    return PyInt_FromLong(N);
+    return PyLong_FromLong(N);
 }
 
-static PyObject * hash_get_median_count(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_get_median_count(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     const char * long_str;
@@ -998,9 +993,10 @@ static PyObject * hash_get_median_count(PyObject * self, PyObject * args)
     return Py_BuildValue("iff", med, average, stddev);
 }
 
-static PyObject * hash_get_kadian_count(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_get_kadian_count(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     const char * long_str;
@@ -1023,9 +1019,10 @@ static PyObject * hash_get_kadian_count(PyObject * self, PyObject * args)
     return Py_BuildValue("i", kad);
 }
 
-static PyObject * hash_get(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_get(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     PyObject * arg;
@@ -1039,24 +1036,25 @@ static PyObject * hash_get(PyObject * self, PyObject * args)
     if (PyInt_Check(arg)) {
         long pos = PyInt_AsLong(arg);
         count = counting->get_count((unsigned int) pos);
-    } else if (PyString_Check(arg)) {
-        std::string s = PyString_AsString(arg);
+    } else if (PyBytes_Check(arg)) {
+        std::string s = PyBytes_AsString(arg);
 
-        if (strlen(s.c_str()) < counting->ksize()) {
+        if (strlen(s.c_str()) != counting->ksize()) {
             PyErr_SetString(PyExc_ValueError,
-                            "string length must >= the counting table k-mer size");
+                            "k-mer size must equal the counting table k-mer size");
             return NULL;
         }
 
         count = counting->get_count(s.c_str());
     }
 
-    return PyInt_FromLong(count);
+    return PyLong_FromLong(count);
 }
 
-static PyObject * count_trim_on_abundance(PyObject * self, PyObject * args)
+static
+PyObject *
+count_trim_on_abundance(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     const char * seq = NULL;
@@ -1075,7 +1073,7 @@ static PyObject * count_trim_on_abundance(PyObject * self, PyObject * args)
 
     Py_END_ALLOW_THREADS;
 
-    PyObject * trim_seq = PyString_FromStringAndSize(seq, trim_at);
+    PyObject * trim_seq = PyBytes_FromStringAndSize(seq, trim_at);
     if (trim_seq == NULL) {
         return NULL;
     }
@@ -1084,9 +1082,11 @@ static PyObject * count_trim_on_abundance(PyObject * self, PyObject * args)
 
     return ret;
 }
-static PyObject * count_trim_below_abundance(PyObject * self, PyObject * args)
+
+static
+PyObject *
+count_trim_below_abundance(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     const char * seq = NULL;
@@ -1105,7 +1105,7 @@ static PyObject * count_trim_below_abundance(PyObject * self, PyObject * args)
 
     Py_END_ALLOW_THREADS;
 
-    PyObject * trim_seq = PyString_FromStringAndSize(seq, trim_at);
+    PyObject * trim_seq = PyBytes_FromStringAndSize(seq, trim_at);
     if (trim_seq == NULL) {
         return NULL;
     }
@@ -1115,10 +1115,11 @@ static PyObject * count_trim_below_abundance(PyObject * self, PyObject * args)
     return ret;
 }
 
-static PyObject * count_find_spectral_error_positions(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+count_find_spectral_error_positions(khmer_KCountingHash_Object * me,
+                                    PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     khmer::CountingHash * counting = me->counting;
 
     char * seq = NULL;
@@ -1144,26 +1145,26 @@ static PyObject * count_find_spectral_error_positions(PyObject * self,
         return NULL;
     }
     for (Py_ssize_t i = 0; i < posns_size; i++) {
-        PyList_SET_ITEM(x, i, PyInt_FromLong(posns[i]));
+        PyList_SET_ITEM(x, i, PyLong_FromLong(posns[i]));
     }
 
     return x;
 }
 
-static PyObject * hash_fasta_count_kmers_by_position(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hash_fasta_count_kmers_by_position(khmer_KCountingHash_Object * me,
+                                   PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     const char * inputfile;
     unsigned int max_read_len = 0;
     long max_read_len_long;
     int limit_by_count_int;
-    PyObject * callback_obj = NULL;
 
-    if (!PyArg_ParseTuple(args, "sli|O", &inputfile, &max_read_len_long,
-                          &limit_by_count_int, &callback_obj)) {
+    if (!PyArg_ParseTuple(args, "sli", &inputfile, &max_read_len_long,
+                          &limit_by_count_int)) {
         return NULL;
     }
     if (max_read_len_long < 0 || max_read_len_long >= pow(2, 32)) {
@@ -1182,7 +1183,7 @@ static PyObject * hash_fasta_count_kmers_by_position(PyObject * self,
 
     unsigned long long * counts;
     counts = counting->fasta_count_kmers_by_position(inputfile, max_read_len,
-             (unsigned short) limit_by_count_int, _report_fn, callback_obj);
+             (unsigned short) limit_by_count_int);
 
     PyObject * x = PyList_New(max_read_len);
     if (x == NULL) {
@@ -1203,32 +1204,30 @@ static PyObject * hash_fasta_count_kmers_by_position(PyObject * self,
     return x;
 }
 
-static PyObject * hash_fasta_dump_kmers_by_abundance(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hash_fasta_dump_kmers_by_abundance(khmer_KCountingHash_Object * me,
+                                   PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     const char * inputfile;
     int limit_by = 0;
-    PyObject * callback_obj = NULL;
 
-    if (!PyArg_ParseTuple(args, "si|O", &inputfile, &limit_by,
-                          &callback_obj)) {
+    if (!PyArg_ParseTuple(args, "si", &inputfile, &limit_by)) {
         return NULL;
     }
 
     counting->fasta_dump_kmers_by_abundance(inputfile,
-                                            limit_by,
-                                            _report_fn, callback_obj);
-
+                                            limit_by);
 
     Py_RETURN_NONE;
 }
 
-static PyObject * hash_load(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_load(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     const char * filename = NULL;
@@ -1247,9 +1246,10 @@ static PyObject * hash_load(PyObject * self, PyObject * args)
     Py_RETURN_NONE;
 }
 
-static PyObject * hash_save(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_save(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     const char * filename = NULL;
@@ -1258,14 +1258,20 @@ static PyObject * hash_save(PyObject * self, PyObject * args)
         return NULL;
     }
 
-    counting->save(filename);
+    try {
+        counting->save(filename);
+    } catch (khmer_file_exception &e) {
+        PyErr_SetString(PyExc_IOError, e.what());
+        return NULL;
+    }
 
     Py_RETURN_NONE;
 }
 
-static PyObject * hash_get_ksize(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_get_ksize(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     if (!PyArg_ParseTuple(args, "")) {
@@ -1274,12 +1280,13 @@ static PyObject * hash_get_ksize(PyObject * self, PyObject * args)
 
     unsigned int k = counting->ksize();
 
-    return PyInt_FromLong(k);
+    return PyLong_FromLong(k);
 }
 
-static PyObject * hash_get_hashsizes(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_get_hashsizes(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
 
@@ -1297,12 +1304,15 @@ static PyObject * hash_get_hashsizes(PyObject * self, PyObject * args)
     return x;
 }
 
-static PyObject * hash_collect_high_abundance_kmers(PyObject * self,
-        PyObject * args);
+static
+PyObject *
+hash_collect_high_abundance_kmers(khmer_KCountingHash_Object * me,
+                                  PyObject * args);
 
-static PyObject * hash_consume_and_tag(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_consume_and_tag(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     const char * seq;
@@ -1325,91 +1335,133 @@ static PyObject * hash_consume_and_tag(PyObject * self, PyObject * args)
     return Py_BuildValue("K", n_consumed);
 }
 
-static PyObject * hash_consume_fasta_and_tag(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_get_tags_and_positions(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
-    const char * filename;
-    PyObject * callback_obj = NULL;
+    const char * seq;
 
-    if (!PyArg_ParseTuple(args, "s|O", &filename, &callback_obj)) {
+    if (!PyArg_ParseTuple(args, "s", &seq)) {
         return NULL;
     }
 
     // call the C++ function, and trap signals => Python
 
-    unsigned long long n_consumed;
-    unsigned int total_reads;
+    std::vector<unsigned int> posns;
+    std::vector<HashIntoType> tags;
 
-    try {
-        counting->consume_fasta_and_tag(filename, total_reads, n_consumed,
-                                        _report_fn, callback_obj);
-    } catch (_khmer_signal &e) {
-        PyErr_SetString(PyExc_IOError, e.get_message().c_str());
-        return NULL;
+    unsigned int pos = 1;
+    KMerIterator kmers(seq, counting->ksize());
+
+    while (!kmers.done()) {
+        HashIntoType kmer = kmers.next();
+        if (set_contains(counting->all_tags, kmer)) {
+            posns.push_back(pos);
+            tags.push_back(kmer);
+        }
+        pos++;
     }
 
-    return Py_BuildValue("IK", total_reads, n_consumed);
+    PyObject * posns_list = PyList_New(posns.size());
+    for (size_t i = 0; i < posns.size(); i++) {
+        PyObject * tup = Py_BuildValue("IK", posns[i], tags[i]);
+        PyList_SET_ITEM(posns_list, i, tup);
+    }
+
+    return posns_list;
 }
 
-static PyObject * hash_find_all_tags_truncate_on_abundance(PyObject * self,
-        PyObject *args)
+static
+PyObject *
+hash_find_all_tags_list(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     const char * kmer_s = NULL;
-    BoundedCounterType min_count, max_count;
 
-    if (!PyArg_ParseTuple(args, "sHH", &kmer_s, &min_count, &max_count)) {
+    if (!PyArg_ParseTuple(args, "s", &kmer_s)) {
         return NULL;
     }
 
-    if (strlen(kmer_s) < counting->ksize()) {
+    if (strlen(kmer_s) != counting->ksize()) {
         PyErr_SetString(PyExc_ValueError,
-                        "kmer_s must be less than the k-mer size of the counting hash");
+                        "k-mer length must equal the counting table k-mer size");
         return NULL;
     }
 
-    _pre_partition_info * ppi = NULL;
+    SeenSet tags;
 
     Py_BEGIN_ALLOW_THREADS
 
-    HashIntoType kmer, kmer_f, kmer_r;
-    kmer = _hash(kmer_s, counting->ksize(), kmer_f, kmer_r);
+    HashIntoType kmer_f, kmer_r;
+    _hash(kmer_s, counting->ksize(), kmer_f, kmer_r);
 
-    ppi = new _pre_partition_info(kmer);
-    counting->partition->find_all_tags_truncate_on_abundance(kmer_f, kmer_r,
-            ppi->tagged_kmers,
-            counting->all_tags,
-            min_count,
-            max_count);
-    counting->add_kmer_to_tags(kmer);
+    counting->partition->find_all_tags(kmer_f, kmer_r, tags,
+                                       counting->all_tags);
 
     Py_END_ALLOW_THREADS
 
-    return PyCObject_FromVoidPtr(ppi, free_pre_partition_info);
+    PyObject * x =  PyList_New(tags.size());
+    if (x == NULL) {
+        return NULL;
+    }
+    SeenSet::iterator si;
+    unsigned long long i = 0;
+    for (si = tags.begin(); si != tags.end(); ++si) {
+        // type K for python unsigned long long
+        PyList_SET_ITEM(x, i, Py_BuildValue("K", *si));
+        i++;
+    }
+
+    return x;
 }
 
-static PyObject * hash_do_subset_partition_with_abundance(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hash_consume_fasta_and_tag(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
-    PyObject * callback_obj = NULL;
-    HashIntoType start_kmer = 0, end_kmer = 0;
-    PyObject * break_on_stop_tags_o = NULL;
-    PyObject * stop_big_traversals_o = NULL;
-    BoundedCounterType min_count, max_count;
+    const char * filename;
+
+    if (!PyArg_ParseTuple(args, "s", &filename)) {
+        return NULL;
+    }
+
+    // call the C++ function, and trap signals => Python
+
+    unsigned long long n_consumed;
+    unsigned int total_reads;
+
+    try {
+        counting->consume_fasta_and_tag(filename, total_reads, n_consumed);
+    } catch (_khmer_signal &e) {
+        PyErr_SetString(PyExc_IOError, e.get_message().c_str());
+        return NULL;
+    }
 
-    if (!PyArg_ParseTuple(args, "HH|KKOOO",
+    return Py_BuildValue("IK", total_reads, n_consumed);
+}
+
+static
+PyObject *
+hash_do_subset_partition_with_abundance(khmer_KCountingHash_Object * me,
+                                        PyObject * args)
+{
+    CountingHash * counting = me->counting;
+
+    HashIntoType start_kmer = 0, end_kmer = 0;
+    PyObject * break_on_stop_tags_o = NULL;
+    PyObject * stop_big_traversals_o = NULL;
+    BoundedCounterType min_count, max_count;
+
+    if (!PyArg_ParseTuple(args, "HH|KKOO",
                           &min_count, &max_count,
                           &start_kmer, &end_kmer,
                           &break_on_stop_tags_o,
-                          &stop_big_traversals_o,
-                          &callback_obj)) {
+                          &stop_big_traversals_o)) {
         return NULL;
     }
 
@@ -1429,15 +1481,16 @@ static PyObject * hash_do_subset_partition_with_abundance(PyObject * self,
         subset_p->do_partition_with_abundance(start_kmer, end_kmer,
                                               min_count, max_count,
                                               break_on_stop_tags,
-                                              stop_big_traversals,
-                                              _report_fn, callback_obj);
+                                              stop_big_traversals);
         Py_END_ALLOW_THREADS
     } catch (_khmer_signal &e) {
         return NULL;
+    } catch (std::bad_alloc &e) {
+        return PyErr_NoMemory();
     }
 
-    khmer_KSubsetPartitionObject * subset_obj = (khmer_KSubsetPartitionObject *)\
-            PyObject_New(khmer_KSubsetPartitionObject, &khmer_KSubsetPartitionType);
+    khmer_KSubsetPartition_Object * subset_obj = (khmer_KSubsetPartition_Object *)\
+            PyObject_New(khmer_KSubsetPartition_Object, &khmer_KSubsetPartition_Type);
 
     if (subset_obj == NULL) {
         delete subset_p;
@@ -1450,81 +1503,104 @@ static PyObject * hash_do_subset_partition_with_abundance(PyObject * self,
 }
 
 static PyMethodDef khmer_counting_methods[] = {
-    { "ksize", hash_get_ksize, METH_VARARGS, "" },
-    { "hashsizes", hash_get_hashsizes, METH_VARARGS, "" },
-    { "set_use_bigcount", hash_set_use_bigcount, METH_VARARGS, "" },
-    { "get_use_bigcount", hash_get_use_bigcount, METH_VARARGS, "" },
-    { "n_unique_kmers", hash_n_unique_kmers, METH_VARARGS, "Count the number of unique kmers" },
-    { "n_occupied", hash_n_occupied, METH_VARARGS, "Count the number of occupied bins" },
-    { "n_entries", hash_n_entries, METH_VARARGS, "" },
-    { "count", hash_count, METH_VARARGS, "Count the given kmer" },
-    { "consume", hash_consume, METH_VARARGS, "Count all k-mers in the given string" },
-    { "consume_fasta", hash_consume_fasta, METH_VARARGS, "Count all k-mers in a given file" },
     {
-        "consume_fasta_with_reads_parser", hash_consume_fasta_with_reads_parser,
+        "ksize",
+        (PyCFunction)hash_get_ksize,
+        METH_VARARGS,
+        ""
+    },
+    { "hashsizes", (PyCFunction)hash_get_hashsizes, METH_VARARGS, "" },
+    { "set_use_bigcount", (PyCFunction)hash_set_use_bigcount, METH_VARARGS, "" },
+    { "get_use_bigcount", (PyCFunction)hash_get_use_bigcount, METH_VARARGS, "" },
+    { "n_unique_kmers", (PyCFunction)hash_n_unique_kmers, METH_VARARGS, "Count the number of unique kmers" },
+    { "n_occupied", (PyCFunction)hash_n_occupied, METH_VARARGS, "Count the number of occupied bins" },
+    { "n_entries", (PyCFunction)hash_n_entries, METH_VARARGS, "" },
+    { "count", (PyCFunction)hash_count, METH_VARARGS, "Count the given kmer" },
+    { "consume", (PyCFunction)hash_consume, METH_VARARGS, "Count all k-mers in the given string" },
+    { "consume_fasta", (PyCFunction)hash_consume_fasta, METH_VARARGS, "Count all k-mers in a given file" },
+    {
+        "consume_fasta_with_reads_parser", (PyCFunction)hash_consume_fasta_with_reads_parser,
         METH_VARARGS, "Count all k-mers using a given reads parser"
     },
-    { "output_fasta_kmer_pos_freq", hash_output_fasta_kmer_pos_freq, METH_VARARGS, "" },
-    { "get", hash_get, METH_VARARGS, "Get the count for the given k-mer" },
-    { "get_min_count", hash_get_min_count, METH_VARARGS, "Get the smallest count of all the k-mers in the string" },
-    { "get_max_count", hash_get_max_count, METH_VARARGS, "Get the largest count of all the k-mers in the string" },
-    { "get_median_count", hash_get_median_count, METH_VARARGS, "Get the median, average, and stddev of the k-mer counts in the string" },
-    { "get_kadian_count", hash_get_kadian_count, METH_VARARGS, "Get the kadian (abundance of k-th rank-ordered k-mer) of the k-mer counts in the string" },
-    { "trim_on_abundance", count_trim_on_abundance, METH_VARARGS, "Trim on >= abundance" },
-    { "trim_below_abundance", count_trim_below_abundance, METH_VARARGS, "Trim on >= abundance" },
-    { "find_spectral_error_positions", count_find_spectral_error_positions, METH_VARARGS, "Identify positions of low-abundance k-mers" },
-    { "abundance_distribution", hash_abundance_distribution, METH_VARARGS, "" },
-    { "abundance_distribution_with_reads_parser", hash_abundance_distribution_with_reads_parser, METH_VARARGS, "" },
-    { "fasta_count_kmers_by_position", hash_fasta_count_kmers_by_position, METH_VARARGS, "" },
-    { "fasta_dump_kmers_by_abundance", hash_fasta_dump_kmers_by_abundance, METH_VARARGS, "" },
-    { "load", hash_load, METH_VARARGS, "" },
-    { "save", hash_save, METH_VARARGS, "" },
+    { "output_fasta_kmer_pos_freq", (PyCFunction)hash_output_fasta_kmer_pos_freq, METH_VARARGS, "" },
+    { "get", (PyCFunction)hash_get, METH_VARARGS, "Get the count for the given k-mer" },
     {
-        "collect_high_abundance_kmers", hash_collect_high_abundance_kmers,
+        "get_raw_tables", (PyCFunction)hash_get_raw_tables,
+        METH_VARARGS, "Get a list of the raw tables as memoryview objects"
+    },
+    { "get_min_count", (PyCFunction)hash_get_min_count, METH_VARARGS, "Get the smallest count of all the k-mers in the string" },
+    { "get_max_count", (PyCFunction)hash_get_max_count, METH_VARARGS, "Get the largest count of all the k-mers in the string" },
+    { "get_median_count", (PyCFunction)hash_get_median_count, METH_VARARGS, "Get the median, average, and stddev of the k-mer counts in the string" },
+    { "get_kadian_count", (PyCFunction)hash_get_kadian_count, METH_VARARGS, "Get the kadian (abundance of k-th rank-ordered k-mer) of the k-mer counts in the string" },
+    { "trim_on_abundance", (PyCFunction)count_trim_on_abundance, METH_VARARGS, "Trim on >= abundance" },
+    { "trim_below_abundance", (PyCFunction)count_trim_below_abundance, METH_VARARGS, "Trim on >= abundance" },
+    { "find_spectral_error_positions", (PyCFunction)count_find_spectral_error_positions, METH_VARARGS, "Identify positions of low-abundance k-mers" },
+    { "abundance_distribution", (PyCFunction)hash_abundance_distribution, METH_VARARGS, "" },
+    { "abundance_distribution_with_reads_parser", (PyCFunction)hash_abundance_distribution_with_reads_parser, METH_VARARGS, "" },
+    { "fasta_count_kmers_by_position", (PyCFunction)hash_fasta_count_kmers_by_position, METH_VARARGS, "" },
+    { "fasta_dump_kmers_by_abundance", (PyCFunction)hash_fasta_dump_kmers_by_abundance, METH_VARARGS, "" },
+    { "load", (PyCFunction)hash_load, METH_VARARGS, "" },
+    { "save", (PyCFunction)hash_save, METH_VARARGS, "" },
+    {
+        "collect_high_abundance_kmers", (PyCFunction)hash_collect_high_abundance_kmers,
         METH_VARARGS, ""
     },
-    { "consume_and_tag", hash_consume_and_tag, METH_VARARGS, "Consume a sequence and tag it" },
-    { "consume_fasta_and_tag", hash_consume_fasta_and_tag, METH_VARARGS, "Count all k-mers in a given file" },
-    { "do_subset_partition_with_abundance", hash_do_subset_partition_with_abundance, METH_VARARGS, "" },
-    { "find_all_tags_truncate_on_abundance", hash_find_all_tags_truncate_on_abundance, METH_VARARGS, "" },
-
+    { "consume_and_tag", (PyCFunction)hash_consume_and_tag, METH_VARARGS, "Consume a sequence and tag it" },
+    { "get_tags_and_positions", (PyCFunction)hash_get_tags_and_positions, METH_VARARGS, "Retrieve tags and their positions in a sequence." },
+    { "find_all_tags_list", (PyCFunction)hash_find_all_tags_list, METH_VARARGS, "Find all tags within range of the given k-mer, return as list" },
+    { "consume_fasta_and_tag", (PyCFunction)hash_consume_fasta_and_tag, METH_VARARGS, "Count all k-mers in a given file" },
+    { "do_subset_partition_with_abundance", (PyCFunction)hash_do_subset_partition_with_abundance, METH_VARARGS, "" },
     {NULL, NULL, 0, NULL}           /* sentinel */
 };
 
-static PyObject *
-khmer_counting_getattr(PyObject * obj, char * name)
-{
-    return Py_FindMethod(khmer_counting_methods, obj, name);
-}
-
-#define is_counting_obj(v)  ((v)->ob_type == &khmer_KCountingHashType)
+static PyObject* _new_counting_hash(PyTypeObject * type, PyObject * args,
+                                    PyObject * kwds);
 
-static PyTypeObject khmer_KCountingHashType
-CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF("khmer_KCountingHashObject")
+static PyTypeObject khmer_KCountingHash_Type
+CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF("khmer_KCountingHash_Object")
 = {
-    PyObject_HEAD_INIT(NULL)
-    0,
-    "KCountingHash", sizeof(khmer_KCountingHashObject),
-    0,
-    khmer_counting_dealloc, /*tp_dealloc*/
-    0,              /*tp_print*/
-    khmer_counting_getattr, /*tp_getattr*/
-    0,              /*tp_setattr*/
-    0,              /*tp_compare*/
-    0,              /*tp_repr*/
-    0,              /*tp_as_number*/
-    0,              /*tp_as_sequence*/
-    0,              /*tp_as_mapping*/
-    0,              /*tp_hash */
-    0,              /*tp_call*/
-    0,              /*tp_str*/
-    0,              /*tp_getattro*/
-    0,              /*tp_setattro*/
-    0,              /*tp_as_buffer*/
-    Py_TPFLAGS_DEFAULT,     /*tp_flags*/
-    "counting hash object",           /* tp_doc */
+    PyVarObject_HEAD_INIT(NULL, 0)       /* init & ob_size */
+    "_khmer.KCountingHash",              /*tp_name*/
+    sizeof(khmer_KCountingHash_Object),  /*tp_basicsize*/
+    0,                                   /*tp_itemsize*/
+    (destructor)khmer_counting_dealloc,  /*tp_dealloc*/
+    0,                                   /*tp_print*/
+    0,                                   /*tp_getattr*/
+    0,                                   /*tp_setattr*/
+    0,                                   /*tp_compare*/
+    0,                                   /*tp_repr*/
+    0,                                   /*tp_as_number*/
+    0,                                   /*tp_as_sequence*/
+    0,                                   /*tp_as_mapping*/
+    0,                                   /*tp_hash */
+    0,                                   /*tp_call*/
+    0,                                   /*tp_str*/
+    0,                                   /*tp_getattro*/
+    0,                                   /*tp_setattro*/
+    0,                                   /*tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT,                  /*tp_flags*/
+    "counting hash object",              /* tp_doc */
+    0,                                   /* tp_traverse */
+    0,                                   /* tp_clear */
+    0,                                   /* tp_richcompare */
+    0,                                   /* tp_weaklistoffset */
+    0,                                   /* tp_iter */
+    0,                                   /* tp_iternext */
+    khmer_counting_methods,              /* tp_methods */
+    0,                                   /* tp_members */
+    0,                                   /* tp_getset */
+    0,                                   /* tp_base */
+    0,                                   /* tp_dict */
+    0,                                   /* tp_descr_get */
+    0,                                   /* tp_descr_set */
+    0,                                   /* tp_dictoffset */
+    0,                                   /* tp_init */
+    0,                                   /* tp_alloc */
+    _new_counting_hash,                  /* tp_new */
 };
 
+#define is_counting_obj(v)  (Py_TYPE(v) == &khmer_KCountingHash_Type)
+
 //
 // new_hashtable
 //
@@ -1538,14 +1614,18 @@ static PyObject* new_hashtable(PyObject * self, PyObject * args)
         return NULL;
     }
 
-    khmer_KCountingHashObject * kcounting_obj = (khmer_KCountingHashObject *) \
-            PyObject_New(khmer_KCountingHashObject, &khmer_KCountingHashType);
+    khmer_KCountingHash_Object * kcounting_obj = (khmer_KCountingHash_Object *) \
+            PyObject_New(khmer_KCountingHash_Object, &khmer_KCountingHash_Type);
 
     if (kcounting_obj == NULL) {
         return NULL;
     }
 
-    kcounting_obj->counting = new CountingHash(k, size);
+    try {
+        kcounting_obj->counting = new CountingHash(k, size);
+    } catch (std::bad_alloc &e) {
+        return PyErr_NoMemory();
+    }
 
     return (PyObject *) kcounting_obj;
 }
@@ -1554,69 +1634,75 @@ static PyObject* new_hashtable(PyObject * self, PyObject * args)
 // new_counting_hash
 //
 
-static PyObject* _new_counting_hash(PyObject * self, PyObject * args)
+static PyObject* _new_counting_hash(PyTypeObject * type, PyObject * args,
+                                    PyObject * kwds)
 {
-    WordLength k = 0;
-    PyListObject * sizes_list_o = NULL;
+    khmer_KCountingHash_Object * self;
 
-    if (!PyArg_ParseTuple(args, "bO!", &k, &PyList_Type, &sizes_list_o)) {
-        return NULL;
-    }
+    self = (khmer_KCountingHash_Object *)type->tp_alloc(type, 0);
 
-    std::vector<HashIntoType> sizes;
-    Py_ssize_t sizes_list_o_length = PyList_GET_SIZE(sizes_list_o);
-    if (sizes_list_o_length == -1) {
-        PyErr_SetString(PyExc_ValueError, "error with hashtable primes!");
-        return NULL;
-    }
-    for (Py_ssize_t i = 0; i < sizes_list_o_length; i++) {
-        PyObject * size_o = PyList_GET_ITEM(sizes_list_o, i);
-        if (PyInt_Check(size_o)) {
-            sizes.push_back((HashIntoType) PyInt_AsLong(size_o));
-        } else if (PyLong_Check(size_o)) {
-            sizes.push_back((HashIntoType) PyLong_AsUnsignedLongLong(size_o));
-        } else if (PyFloat_Check(size_o)) {
-            sizes.push_back((HashIntoType) PyFloat_AS_DOUBLE(size_o));
-        } else {
-            PyErr_SetString(PyExc_TypeError,
-                            "2nd argument must be a list of ints, longs, or floats");
+    if (self != NULL) {
+        WordLength k = 0;
+        PyListObject * sizes_list_o = NULL;
+
+        if (!PyArg_ParseTuple(args, "bO!", &k, &PyList_Type, &sizes_list_o)) {
+            Py_DECREF(self);
             return NULL;
         }
-    }
 
-    khmer_KCountingHashObject * kcounting_obj = (khmer_KCountingHashObject *) \
-            PyObject_New(khmer_KCountingHashObject, &khmer_KCountingHashType);
+        std::vector<HashIntoType> sizes;
+        Py_ssize_t sizes_list_o_length = PyList_GET_SIZE(sizes_list_o);
+        if (sizes_list_o_length == -1) {
+            Py_DECREF(self);
+            PyErr_SetString(PyExc_ValueError, "error with hashtable primes!");
+            return NULL;
+        }
+        for (Py_ssize_t i = 0; i < sizes_list_o_length; i++) {
+            PyObject * size_o = PyList_GET_ITEM(sizes_list_o, i);
+            if (PyLong_Check(size_o)) {
+                sizes.push_back((HashIntoType) PyLong_AsUnsignedLongLong(size_o));
+            } else if (PyInt_Check(size_o)) {
+                sizes.push_back((HashIntoType) PyInt_AsLong(size_o));
+            } else if (PyFloat_Check(size_o)) {
+                sizes.push_back((HashIntoType) PyFloat_AS_DOUBLE(size_o));
+            } else {
+                Py_DECREF(self);
+                PyErr_SetString(PyExc_TypeError,
+                                "2nd argument must be a list of ints, longs, or floats");
+                return NULL;
+            }
+        }
 
-    if (kcounting_obj == NULL) {
-        return NULL;
+        try {
+            self->counting = new CountingHash(k, sizes);
+        } catch (std::bad_alloc &e) {
+            return PyErr_NoMemory();
+        }
     }
 
-    kcounting_obj->counting = new CountingHash(k, sizes);
-
-    return (PyObject *) kcounting_obj;
+    return (PyObject *) self;
 }
 
 //
 // hashbits stuff
 //
 
-static void khmer_hashbits_dealloc(PyObject * obj);
+static void khmer_hashbits_dealloc(khmer_KHashbits_Object * obj);
 static PyObject* khmer_hashbits_new(PyTypeObject * type, PyObject * args,
                                     PyObject * kwds);
-static int khmer_hashbits_init(khmer_KHashbitsObject * self, PyObject * args,
+static int khmer_hashbits_init(khmer_KHashbits_Object * self, PyObject * args,
                                PyObject * kwds);
-static PyObject * khmer_hashbits_getattr(PyObject * obj, char * name);
 
-static PyTypeObject khmer_KHashbitsType
-CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF("khmer_KHashbitsObject")
+static PyTypeObject khmer_KHashbits_Type
+CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF("khmer_KHashbits_Object")
 = {
-    PyObject_HEAD_INIT(NULL)
-    0,
-    "Hashbits", sizeof(khmer_KHashbitsObject),
-    0,
+    PyVarObject_HEAD_INIT(NULL, 0) /* init & ob_size */
+    "_khmer.Hashbits",             /* tp_name */
+    sizeof(khmer_KHashbits_Object), /* tp_basicsize */
+    0,                             /* tp_itemsize */
     (destructor)khmer_hashbits_dealloc, /*tp_dealloc*/
     0,              /*tp_print*/
-    khmer_hashbits_getattr, /*tp_getattr*/
+    0,              /*tp_getattr*/
     0,              /*tp_setattr*/
     0,              /*tp_compare*/
     0,              /*tp_repr*/
@@ -1647,20 +1733,21 @@ CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF("khmer_KHashbitsObject")
     0,                       /* tp_dictoffset */
     (initproc)khmer_hashbits_init,   /* tp_init */
     0,                       /* tp_alloc */
+    khmer_hashbits_new,                  /* tp_new */
 };
 
-static PyObject * hash_abundance_distribution_with_reads_parser(
-    PyObject * self,
-    PyObject * args)
+static
+PyObject *
+hash_abundance_distribution_with_reads_parser(khmer_KCountingHash_Object * me,
+        PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
-    khmer :: python :: ReadParser_Object * rparser_obj = NULL;
-    khmer_KHashbitsObject *tracking_obj = NULL;
+    khmer :: python :: khmer_ReadParser_Object * rparser_obj = NULL;
+    khmer_KHashbits_Object *tracking_obj = NULL;
 
-    if (!PyArg_ParseTuple(args, "O!O!", &python::ReadParser_Type,
-                          &rparser_obj, &khmer_KHashbitsType, &tracking_obj)) {
+    if (!PyArg_ParseTuple(args, "O!O!", &python::khmer_ReadParser_Type,
+                          &rparser_obj, &khmer_KHashbits_Type, &tracking_obj)) {
         return NULL;
     }
 
@@ -1669,19 +1756,9 @@ static PyObject * hash_abundance_distribution_with_reads_parser(
 
     HashIntoType * dist = NULL;
 
-    const char * exception = NULL;
     Py_BEGIN_ALLOW_THREADS
-    try {
-        dist = counting->abundance_distribution(rparser, hashbits);
-    } catch (khmer::read_parsers::NoMoreReadsAvailable &exc ) {
-        exception = exc.what();
-    }
+    dist = counting->abundance_distribution(rparser, hashbits);
     Py_END_ALLOW_THREADS
-    if (exception != NULL) {
-        delete[] dist;
-        PyErr_SetString(PyExc_IOError, exception);
-        return NULL;
-    }
 
     PyObject * x = PyList_New(MAX_BIGCOUNT + 1);
     if (x == NULL) {
@@ -1696,14 +1773,15 @@ static PyObject * hash_abundance_distribution_with_reads_parser(
     return x;
 }
 
-static PyObject * hash_abundance_distribution(PyObject * self, PyObject * args)
+static
+PyObject *
+hash_abundance_distribution(khmer_KCountingHash_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
     CountingHash * counting = me->counting;
 
     const char * filename = NULL;
-    khmer_KHashbitsObject * tracking_obj = NULL;
-    if (!PyArg_ParseTuple(args, "sO!", &filename, &khmer_KHashbitsType,
+    khmer_KHashbits_Object * tracking_obj = NULL;
+    if (!PyArg_ParseTuple(args, "sO!", &filename, &khmer_KHashbits_Type,
                           &tracking_obj)) {
         return NULL;
     }
@@ -1741,9 +1819,10 @@ static PyObject * hash_abundance_distribution(PyObject * self, PyObject * args)
     return x;
 }
 
-static PyObject * hashbits_n_unique_kmers(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_n_unique_kmers(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     HashIntoType n = hashbits->n_unique_kmers();
@@ -1752,36 +1831,37 @@ static PyObject * hashbits_n_unique_kmers(PyObject * self, PyObject * args)
 }
 
 
-static PyObject * hashbits_count_overlap(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_count_overlap(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
-    khmer_KHashbitsObject * ht2_argu;
+    khmer_KHashbits_Object * ht2_argu;
     const char * filename;
-    PyObject * callback_obj = NULL;
     Hashbits * ht2;
 
-    if (!PyArg_ParseTuple(args, "sO!|O", &filename, &khmer_KHashbitsType,
-                          &ht2_argu,
-                          &callback_obj)) {
+    if (!PyArg_ParseTuple(args, "sO!", &filename, &khmer_KHashbits_Type,
+                          &ht2_argu)) {
         return NULL;
     }
 
     ht2 = ht2_argu->hashbits;
 
-    // call the C++ function, and trap signals => Python
+// call the C++ function, and trap signals => Python
 
     unsigned long long n_consumed;
     unsigned int total_reads;
     HashIntoType curve[2][100];
 
     try {
-        hashbits->consume_fasta_overlap(filename, curve, *ht2, total_reads, n_consumed,
-                                        _report_fn, callback_obj);
+        hashbits->consume_fasta_overlap(filename, curve, *ht2, total_reads, n_consumed);
     } catch (_khmer_signal &e) {
         PyErr_SetString(PyExc_IOError, e.get_message().c_str());
         return NULL;
-    }
+    } catch (InvalidStreamHandle &e) {
+        PyErr_SetString(PyExc_IOError, e.what());
+        return NULL;
+    } 
 
     HashIntoType n = hashbits->n_unique_kmers();
     HashIntoType n_overlap = hashbits->n_overlap_kmers();
@@ -1797,9 +1877,10 @@ static PyObject * hashbits_count_overlap(PyObject * self, PyObject * args)
     return Py_BuildValue("KKO", n, n_overlap, x);
 }
 
-static PyObject * hashbits_n_occupied(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_n_occupied(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     HashIntoType start = 0, stop = 0;
@@ -1813,21 +1894,23 @@ static PyObject * hashbits_n_occupied(PyObject * self, PyObject * args)
     return PyLong_FromUnsignedLongLong(n);
 }
 
-static PyObject * hashbits_n_tags(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_n_tags(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     if (!PyArg_ParseTuple(args, "")) {
         return NULL;
     }
 
-    return PyInt_FromSize_t(hashbits->n_tags());
+    return PyLong_FromSize_t(hashbits->n_tags());
 }
 
-static PyObject * hashbits_count(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_count(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * kmer;
@@ -1838,18 +1921,19 @@ static PyObject * hashbits_count(PyObject * self, PyObject * args)
 
     if (strlen(kmer) != hashbits->ksize()) {
         PyErr_SetString(PyExc_ValueError,
-                        "k-mer length must be the same as the hashbits k-size");
+                        "k-mer length must equal the presence table k-mer size");
         return NULL;
     }
 
     hashbits->count(kmer);
 
-    return PyInt_FromLong(1);
+    return PyLong_FromLong(1);
 }
 
-static PyObject * hashbits_consume(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_consume(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * long_str;
@@ -1867,12 +1951,13 @@ static PyObject * hashbits_consume(PyObject * self, PyObject * args)
     unsigned int n_consumed;
     n_consumed = hashbits->consume_string(long_str);
 
-    return PyInt_FromLong(n_consumed);
+    return PyLong_FromLong(n_consumed);
 }
 
-static PyObject * hashbits_print_stop_tags(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_print_stop_tags(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename = NULL;
@@ -1886,9 +1971,10 @@ static PyObject * hashbits_print_stop_tags(PyObject * self, PyObject * args)
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_print_tagset(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_print_tagset(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename = NULL;
@@ -1902,9 +1988,10 @@ static PyObject * hashbits_print_tagset(PyObject * self, PyObject * args)
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_load_stop_tags(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_load_stop_tags(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename = NULL;
@@ -1931,9 +2018,10 @@ static PyObject * hashbits_load_stop_tags(PyObject * self, PyObject * args)
 }
 
 
-static PyObject * hashbits_save_stop_tags(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_save_stop_tags(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename = NULL;
@@ -1942,20 +2030,26 @@ static PyObject * hashbits_save_stop_tags(PyObject * self, PyObject * args)
         return NULL;
     }
 
-    hashbits->save_stop_tags(filename);
+    try {
+        hashbits->save_stop_tags(filename);
+    } catch (khmer_file_exception &e) {
+        PyErr_SetString(PyExc_IOError, e.what());
+        return NULL;
+    }
 
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_traverse_from_tags(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_traverse_from_tags(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
-    khmer_KCountingHashObject * counting_o = NULL;
+    khmer_KCountingHash_Object * counting_o = NULL;
     unsigned int distance, threshold, frequency;
 
-    if (!PyArg_ParseTuple(args, "O!III", &khmer_KCountingHashType, &counting_o,
+    if (!PyArg_ParseTuple(args, "O!III", &khmer_KCountingHash_Type, &counting_o,
                           &distance, &threshold, &frequency)) {
         return NULL;
     }
@@ -1966,17 +2060,18 @@ static PyObject * hashbits_traverse_from_tags(PyObject * self, PyObject * args)
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_repartition_largest_partition(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hashbits_repartition_largest_partition(khmer_KHashbits_Object * me,
+                                       PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
-    khmer_KCountingHashObject * counting_o = NULL;
+    khmer_KCountingHash_Object * counting_o = NULL;
     PyObject * subset_o = NULL;
     unsigned int distance, threshold, frequency;
 
-    if (!PyArg_ParseTuple(args, "OO!III", &subset_o, &khmer_KCountingHashType,
+    if (!PyArg_ParseTuple(args, "OO!III", &subset_o, &khmer_KCountingHash_Type,
                           &counting_o, &distance, &threshold, &frequency)) {
         return NULL;
     }
@@ -1990,15 +2085,22 @@ static PyObject * hashbits_repartition_largest_partition(PyObject * self,
 
     CountingHash * counting = counting_o->counting;
 
-    unsigned long next_largest = subset_p->repartition_largest_partition(distance,
-                                 threshold, frequency, *counting);
+    unsigned long next_largest;
+    try {
+        next_largest = subset_p->repartition_largest_partition(distance,
+                       threshold, frequency, *counting);
+    } catch (khmer_exception &e) {
+        PyErr_SetString(PyExc_RuntimeError, e.what());
+        return NULL;
+    }
 
-    return PyInt_FromLong(next_largest);
+    return PyLong_FromLong(next_largest);
 }
 
-static PyObject * hashbits_get(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_get(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     PyObject * arg;
@@ -2012,12 +2114,12 @@ static PyObject * hashbits_get(PyObject * self, PyObject * args)
     if (PyInt_Check(arg)) {
         long pos = PyInt_AsLong(arg);
         count = hashbits->get_count((unsigned int) pos);
-    } else if (PyString_Check(arg)) {
-        std::string s = PyString_AsString(arg);
+    } else if (PyBytes_Check(arg)) {
+        std::string s = PyBytes_AsString(arg);
 
         if (strlen(s.c_str()) < hashbits->ksize()) {
             PyErr_SetString(PyExc_ValueError,
-                            "string length must >= the presence table k-mer size");
+                            "string length must equal the presence table k-mer size");
             return NULL;
         }
 
@@ -2027,13 +2129,13 @@ static PyObject * hashbits_get(PyObject * self, PyObject * args)
         return NULL;
     }
 
-    return PyInt_FromLong(count);
+    return PyLong_FromLong(count);
 }
 
-static PyObject * hashbits_calc_connected_graph_size(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hashbits_calc_connected_graph_size(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * _kmer;
@@ -2059,9 +2161,10 @@ static PyObject * hashbits_calc_connected_graph_size(PyObject * self,
     return PyLong_FromUnsignedLongLong(size);
 }
 
-static PyObject * hashbits_kmer_degree(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_kmer_degree(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * kmer_s = NULL;
@@ -2070,12 +2173,13 @@ static PyObject * hashbits_kmer_degree(PyObject * self, PyObject * args)
         return NULL;
     }
 
-    return PyInt_FromLong(hashbits->kmer_degree(kmer_s));
+    return PyLong_FromLong(hashbits->kmer_degree(kmer_s));
 }
 
-static PyObject * hashbits_trim_on_stoptags(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_trim_on_stoptags(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * seq = NULL;
@@ -2091,7 +2195,7 @@ static PyObject * hashbits_trim_on_stoptags(PyObject * self, PyObject * args)
 
     Py_END_ALLOW_THREADS;
 
-    PyObject * trim_seq = PyString_FromStringAndSize(seq, trim_at);
+    PyObject * trim_seq = PyBytes_FromStringAndSize(seq, trim_at);
     if (trim_seq == NULL) {
         return NULL;
     }
@@ -2101,10 +2205,11 @@ static PyObject * hashbits_trim_on_stoptags(PyObject * self, PyObject * args)
     return ret;
 }
 
-static PyObject * hashbits_identify_stoptags_by_position(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hashbits_identify_stoptags_by_position(khmer_KHashbits_Object * me,
+                                       PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * seq = NULL;
@@ -2129,21 +2234,19 @@ static PyObject * hashbits_identify_stoptags_by_position(PyObject * self,
     return x;
 }
 
-static PyObject * hashbits_do_subset_partition(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hashbits_do_subset_partition(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
-    PyObject * callback_obj = NULL;
     HashIntoType start_kmer = 0, end_kmer = 0;
     PyObject * break_on_stop_tags_o = NULL;
     PyObject * stop_big_traversals_o = NULL;
 
-    if (!PyArg_ParseTuple(args, "|KKOOO", &start_kmer, &end_kmer,
+    if (!PyArg_ParseTuple(args, "|KKOO", &start_kmer, &end_kmer,
                           &break_on_stop_tags_o,
-                          &stop_big_traversals_o,
-                          &callback_obj)) {
+                          &stop_big_traversals_o)) {
         return NULL;
     }
 
@@ -2161,20 +2264,21 @@ static PyObject * hashbits_do_subset_partition(PyObject * self,
         Py_BEGIN_ALLOW_THREADS
         subset_p = new SubsetPartition(hashbits);
         subset_p->do_partition(start_kmer, end_kmer, break_on_stop_tags,
-                               stop_big_traversals,
-                               _report_fn, callback_obj);
+                               stop_big_traversals);
         Py_END_ALLOW_THREADS
     } catch (_khmer_signal &e) {
         return NULL;
+    } catch (std::bad_alloc &e) {
+        return PyErr_NoMemory();
     }
 
     return PyCObject_FromVoidPtr(subset_p, free_subset_partition_info);
 }
 
-static PyObject * hashbits_join_partitions_by_path(PyObject * self,
-        PyObject *args)
+static
+PyObject *
+hashbits_join_partitions_by_path(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * sequence = NULL;
@@ -2187,9 +2291,10 @@ static PyObject * hashbits_join_partitions_by_path(PyObject * self,
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_merge_subset(PyObject * self, PyObject *args)
+static
+PyObject *
+hashbits_merge_subset(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     PyObject * subset_obj;
@@ -2210,9 +2315,10 @@ static PyObject * hashbits_merge_subset(PyObject * self, PyObject *args)
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_merge_from_disk(PyObject * self, PyObject *args)
+static
+PyObject *
+hashbits_merge_from_disk(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename = NULL;
@@ -2230,26 +2336,25 @@ static PyObject * hashbits_merge_from_disk(PyObject * self, PyObject *args)
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_consume_fasta(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_consume_fasta(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename;
-    PyObject * callback_obj = NULL;
 
-    if (!PyArg_ParseTuple(args, "s|O", &filename, &callback_obj)) {
+    if (!PyArg_ParseTuple(args, "s", &filename)) {
         return NULL;
     }
 
-    // call the C++ function, and trap signals => Python
+// call the C++ function, and trap signals => Python
 
     unsigned long long n_consumed = 0;
     unsigned int total_reads = 0;
 
     try {
-        hashbits->consume_fasta(filename, total_reads, n_consumed,
-                                _report_fn, callback_obj);
+        hashbits->consume_fasta(filename, total_reads, n_consumed);
     } catch (_khmer_signal &e) {
         PyErr_SetString(PyExc_IOError, e.get_message().c_str());
         return NULL;
@@ -2261,32 +2366,30 @@ static PyObject * hashbits_consume_fasta(PyObject * self, PyObject * args)
     return Py_BuildValue("IK", total_reads, n_consumed);
 }
 
-static PyObject * hashbits_consume_fasta_with_reads_parser(
-    PyObject * self, PyObject * args
-)
+static
+PyObject *
+hashbits_consume_fasta_with_reads_parser(khmer_KHashbits_Object * me,
+        PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     PyObject * rparser_obj = NULL;
-    PyObject * callback_obj = NULL;
 
     if (!PyArg_ParseTuple(
-                args, "O|O", &rparser_obj, &callback_obj)) {
+                args, "O", &rparser_obj)) {
         return NULL;
     }
 
     read_parsers:: IParser * rparser =
         _PyObject_to_khmer_ReadParser( rparser_obj );
 
-    // call the C++ function, and trap signals => Python
+// call the C++ function, and trap signals => Python
     unsigned long long  n_consumed  = 0;
     unsigned int          total_reads = 0;
     char const * exc = NULL;
     Py_BEGIN_ALLOW_THREADS
     try {
-        hashbits->consume_fasta(rparser, total_reads, n_consumed,
-                                _report_fn, callback_obj);
+        hashbits->consume_fasta(rparser, total_reads, n_consumed);
     } catch (_khmer_signal &e) {
         exc = e.get_message().c_str();
     }
@@ -2300,19 +2403,20 @@ static PyObject * hashbits_consume_fasta_with_reads_parser(
     return Py_BuildValue("IK", total_reads, n_consumed);
 }
 
-static PyObject * hashbits_consume_fasta_and_traverse(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hashbits_consume_fasta_and_traverse(khmer_KHashbits_Object * me,
+                                    PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename;
     unsigned int radius, big_threshold, transfer_threshold;
-    khmer_KCountingHashObject * counting_o = NULL;
+    khmer_KCountingHash_Object * counting_o = NULL;
 
     if (!PyArg_ParseTuple(args, "sIIIO!", &filename,
                           &radius, &big_threshold, &transfer_threshold,
-                          &khmer_KCountingHashType, &counting_o)) {
+                          &khmer_KCountingHash_Type, &counting_o)) {
         return NULL;
     }
 
@@ -2330,16 +2434,15 @@ void sig(unsigned int total_reads, unsigned int n_consumed)
     std::cout << total_reads << " " << n_consumed << std::endl;
 }
 
-static PyObject * hashbits_consume_fasta_and_tag(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hashbits_consume_fasta_and_tag(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename;
-    PyObject * callback_obj = NULL;
 
-    if (!PyArg_ParseTuple(args, "s|O", &filename, &callback_obj)) {
+    if (!PyArg_ParseTuple(args, "s", &filename)) {
         return NULL;
     }
 
@@ -2349,8 +2452,7 @@ static PyObject * hashbits_consume_fasta_and_tag(PyObject * self,
     unsigned int total_reads;
 
     try {
-        hashbits->consume_fasta_and_tag(filename, total_reads, n_consumed,
-                                        _report_fn, callback_obj);
+        hashbits->consume_fasta_and_tag(filename, total_reads, n_consumed);
     } catch (_khmer_signal &e) {
         PyErr_SetString(PyExc_IOError, e.get_message().c_str());
         return NULL;
@@ -2362,18 +2464,17 @@ static PyObject * hashbits_consume_fasta_and_tag(PyObject * self,
     return Py_BuildValue("IK", total_reads, n_consumed);
 }
 
-static PyObject * hashbits_consume_fasta_and_tag_with_reads_parser(
-    PyObject * self, PyObject * args
-)
+static
+PyObject *
+hashbits_consume_fasta_and_tag_with_reads_parser(khmer_KHashbits_Object * me,
+        PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
-    python::ReadParser_Object * rparser_obj = NULL;
-    PyObject * callback_obj = NULL;
+    python::khmer_ReadParser_Object * rparser_obj = NULL;
 
-    if (!PyArg_ParseTuple( args, "O!|O", &python::ReadParser_Type,
-                           &rparser_obj, &callback_obj )) {
+    if (!PyArg_ParseTuple( args, "O!", &python::khmer_ReadParser_Type,
+                           &rparser_obj)) {
         return NULL;
     }
 
@@ -2386,7 +2487,7 @@ static PyObject * hashbits_consume_fasta_and_tag_with_reads_parser(
     Py_BEGIN_ALLOW_THREADS
     try {
         hashbits->consume_fasta_and_tag(
-            rparser, total_reads, n_consumed, _report_fn, callback_obj
+            rparser, total_reads, n_consumed
         );
     } catch (_khmer_signal &e) {
         exc = e.get_message().c_str();
@@ -2402,16 +2503,16 @@ static PyObject * hashbits_consume_fasta_and_tag_with_reads_parser(
     return Py_BuildValue("IK", total_reads, n_consumed);
 }
 
-static PyObject * hashbits_consume_fasta_and_tag_with_stoptags(PyObject * self,
+static
+PyObject *
+hashbits_consume_fasta_and_tag_with_stoptags(khmer_KHashbits_Object * me,
         PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename;
-    PyObject * callback_obj = NULL;
 
-    if (!PyArg_ParseTuple(args, "s|O", &filename, &callback_obj)) {
+    if (!PyArg_ParseTuple(args, "s", &filename)) {
         return NULL;
     }
 
@@ -2422,8 +2523,7 @@ static PyObject * hashbits_consume_fasta_and_tag_with_stoptags(PyObject * self,
 
     try {
         hashbits->consume_fasta_and_tag_with_stoptags(filename,
-                total_reads, n_consumed,
-                _report_fn, callback_obj);
+                total_reads, n_consumed);
     } catch (_khmer_signal &e) {
         PyErr_SetString(PyExc_IOError, e.get_message().c_str());
         return NULL;
@@ -2435,16 +2535,15 @@ static PyObject * hashbits_consume_fasta_and_tag_with_stoptags(PyObject * self,
     return Py_BuildValue("IK", total_reads, n_consumed);
 }
 
-static PyObject * hashbits_consume_partitioned_fasta(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hashbits_consume_partitioned_fasta(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename;
-    PyObject * callback_obj = NULL;
 
-    if (!PyArg_ParseTuple(args, "s|O", &filename, &callback_obj)) {
+    if (!PyArg_ParseTuple(args, "s", &filename)) {
         return NULL;
     }
 
@@ -2454,8 +2553,7 @@ static PyObject * hashbits_consume_partitioned_fasta(PyObject * self,
     unsigned int total_reads;
 
     try {
-        hashbits->consume_partitioned_fasta(filename, total_reads, n_consumed,
-                                            _report_fn, callback_obj);
+        hashbits->consume_partitioned_fasta(filename, total_reads, n_consumed);
     } catch (_khmer_signal &e) {
         PyErr_SetString(PyExc_IOError, e.get_message().c_str());
         return NULL;
@@ -2467,9 +2565,10 @@ static PyObject * hashbits_consume_partitioned_fasta(PyObject * self,
     return Py_BuildValue("IK", total_reads, n_consumed);
 }
 
-static PyObject * hashbits_find_all_tags(PyObject * self, PyObject *args)
+static
+PyObject *
+hashbits_find_all_tags(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * kmer_s = NULL;
@@ -2478,9 +2577,9 @@ static PyObject * hashbits_find_all_tags(PyObject * self, PyObject *args)
         return NULL;
     }
 
-    if (strlen(kmer_s) < hashbits->ksize()) { // @@
+    if (strlen(kmer_s) != hashbits->ksize()) {
         PyErr_SetString( PyExc_ValueError,
-                         "starting kmer is smaller than the K size of the hashbits");
+                         "k-mer size must equal the k-mer size of the presence table");
         return NULL;
     }
 
@@ -2491,7 +2590,11 @@ static PyObject * hashbits_find_all_tags(PyObject * self, PyObject *args)
     HashIntoType kmer, kmer_f, kmer_r;
     kmer = _hash(kmer_s, hashbits->ksize(), kmer_f, kmer_r);
 
-    ppi = new _pre_partition_info(kmer);
+    try {
+        ppi = new _pre_partition_info(kmer);
+    } catch (std::bad_alloc &e) {
+        return PyErr_NoMemory();
+    }
     hashbits->partition->find_all_tags(kmer_f, kmer_r, ppi->tagged_kmers,
                                        hashbits->all_tags);
     hashbits->add_kmer_to_tags(kmer);
@@ -2501,9 +2604,10 @@ static PyObject * hashbits_find_all_tags(PyObject * self, PyObject *args)
     return PyCObject_FromVoidPtr(ppi, free_pre_partition_info);
 }
 
-static PyObject * hashbits_assign_partition_id(PyObject * self, PyObject *args)
+static
+PyObject *
+hashbits_assign_partition_id(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     PyObject * ppi_obj;
@@ -2523,12 +2627,13 @@ static PyObject * hashbits_assign_partition_id(PyObject * self, PyObject *args)
     p = hashbits->partition->assign_partition_id(ppi->kmer,
             ppi->tagged_kmers);
 
-    return PyInt_FromLong(p);
+    return PyLong_FromLong(p);
 }
 
-static PyObject * hashbits_add_tag(PyObject * self, PyObject *args)
+static
+PyObject *
+hashbits_add_tag(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * kmer_s = NULL;
@@ -2542,9 +2647,10 @@ static PyObject * hashbits_add_tag(PyObject * self, PyObject *args)
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_add_stop_tag(PyObject * self, PyObject *args)
+static
+PyObject *
+hashbits_add_stop_tag(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * kmer_s = NULL;
@@ -2558,9 +2664,10 @@ static PyObject * hashbits_add_stop_tag(PyObject * self, PyObject *args)
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_get_stop_tags(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_get_stop_tags(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     if (!PyArg_ParseTuple(args, "")) {
@@ -2581,9 +2688,10 @@ static PyObject * hashbits_get_stop_tags(PyObject * self, PyObject * args)
     return x;
 }
 
-static PyObject * hashbits_get_tagset(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_get_tagset(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     if (!PyArg_ParseTuple(args, "")) {
@@ -2604,19 +2712,18 @@ static PyObject * hashbits_get_tagset(PyObject * self, PyObject * args)
     return x;
 }
 
-static PyObject * hashbits_output_partitions(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_output_partitions(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename = NULL;
     const char * output = NULL;
-    PyObject * callback_obj = NULL;
     PyObject * output_unassigned_o = NULL;
 
-    if (!PyArg_ParseTuple(args, "ss|OO", &filename, &output,
-                          &output_unassigned_o,
-                          &callback_obj)) {
+    if (!PyArg_ParseTuple(args, "ss|O", &filename, &output,
+                          &output_unassigned_o)) {
         return NULL;
     }
 
@@ -2631,9 +2738,7 @@ static PyObject * hashbits_output_partitions(PyObject * self, PyObject * args)
         SubsetPartition * subset_p = hashbits->partition;
         n_partitions = subset_p->output_partitioned_file(filename,
                        output,
-                       output_unassigned,
-                       _report_fn,
-                       callback_obj);
+                       output_unassigned);
     } catch (_khmer_signal &e) {
         PyErr_SetString(PyExc_IOError, e.get_message().c_str());
         return NULL;
@@ -2642,21 +2747,21 @@ static PyObject * hashbits_output_partitions(PyObject * self, PyObject * args)
         return NULL;
     }
 
-    return PyInt_FromLong(n_partitions);
+    return PyLong_FromLong(n_partitions);
 }
 
-static PyObject * hashbits_find_unpart(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_find_unpart(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename = NULL;
     PyObject * traverse_o = NULL;
     PyObject * stop_big_traversals_o = NULL;
-    PyObject * callback_obj = NULL;
 
-    if (!PyArg_ParseTuple(args, "sOO|O", &filename, &traverse_o,
-                          &stop_big_traversals_o, &callback_obj)) {
+    if (!PyArg_ParseTuple(args, "sOO", &filename, &traverse_o,
+                          &stop_big_traversals_o)) {
         return NULL;
     }
 
@@ -2667,33 +2772,32 @@ static PyObject * hashbits_find_unpart(PyObject * self, PyObject * args)
     try {
         SubsetPartition * subset_p = hashbits->partition;
         n_singletons = subset_p->find_unpart(filename, traverse,
-                                             stop_big_traversals,
-                                             _report_fn, callback_obj);
+                                             stop_big_traversals);
     } catch (_khmer_signal &e) {
         return NULL;
     }
 
-    return PyInt_FromLong(n_singletons);
+    return PyLong_FromLong(n_singletons);
 
     // Py_INCREF(Py_None);
     // return Py_None;
 }
 
-static PyObject * hashbits_filter_if_present(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_filter_if_present(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename = NULL;
     const char * output = NULL;
-    PyObject * callback_obj = NULL;
 
-    if (!PyArg_ParseTuple(args, "ss|O", &filename, &output, &callback_obj)) {
+    if (!PyArg_ParseTuple(args, "ss", &filename, &output)) {
         return NULL;
     }
 
     try {
-        hashbits->filter_if_present(filename, output, _report_fn, callback_obj);
+        hashbits->filter_if_present(filename, output);
     } catch (_khmer_signal &e) {
         return NULL;
     }
@@ -2701,9 +2805,10 @@ static PyObject * hashbits_filter_if_present(PyObject * self, PyObject * args)
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_save_partitionmap(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_save_partitionmap(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename = NULL;
@@ -2712,14 +2817,20 @@ static PyObject * hashbits_save_partitionmap(PyObject * self, PyObject * args)
         return NULL;
     }
 
-    hashbits->partition->save_partitionmap(filename);
+    try {
+        hashbits->partition->save_partitionmap(filename);
+    } catch (khmer_file_exception &e) {
+        PyErr_SetString(PyExc_IOError, e.what());
+        return NULL;
+    }
 
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_load_partitionmap(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_load_partitionmap(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename = NULL;
@@ -2733,10 +2844,10 @@ static PyObject * hashbits_load_partitionmap(PyObject * self, PyObject * args)
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits__validate_partitionmap(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hashbits__validate_partitionmap(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     if (!PyArg_ParseTuple(args, "")) {
@@ -2748,9 +2859,10 @@ static PyObject * hashbits__validate_partitionmap(PyObject * self,
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_count_partitions(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_count_partitions(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     if (!PyArg_ParseTuple(args, "")) {
@@ -2764,8 +2876,9 @@ static PyObject * hashbits_count_partitions(PyObject * self, PyObject * args)
                          (Py_ssize_t) n_unassigned);
 }
 
-static PyObject * hashbits_subset_count_partitions(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hashbits_subset_count_partitions(khmer_KHashbits_Object * me, PyObject * args)
 {
     PyObject * subset_obj = NULL;
 
@@ -2783,7 +2896,9 @@ static PyObject * hashbits_subset_count_partitions(PyObject * self,
                          (Py_ssize_t) n_unassigned);
 }
 
-static PyObject * hashbits_subset_partition_size_distribution(PyObject * self,
+static
+PyObject *
+hashbits_subset_partition_size_distribution(khmer_KHashbits_Object * me,
         PyObject * args)
 {
     PyObject * subset_obj = NULL;
@@ -2826,9 +2941,10 @@ static PyObject * hashbits_subset_partition_size_distribution(PyObject * self,
     return returnValue;
 }
 
-static PyObject * hashbits_load(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_load(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename = NULL;
@@ -2847,9 +2963,10 @@ static PyObject * hashbits_load(PyObject * self, PyObject * args)
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_save(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_save(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename = NULL;
@@ -2858,14 +2975,20 @@ static PyObject * hashbits_save(PyObject * self, PyObject * args)
         return NULL;
     }
 
-    hashbits->save(filename);
+    try {
+        hashbits->save(filename);
+    } catch (khmer_file_exception &e) {
+        PyErr_SetString(PyExc_IOError, e.what());
+        return NULL;
+    }
 
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_load_tagset(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_load_tagset(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename = NULL;
@@ -2890,9 +3013,10 @@ static PyObject * hashbits_load_tagset(PyObject * self, PyObject * args)
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_save_tagset(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_save_tagset(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * filename = NULL;
@@ -2901,13 +3025,19 @@ static PyObject * hashbits_save_tagset(PyObject * self, PyObject * args)
         return NULL;
     }
 
-    hashbits->save_tagset(filename);
+    try {
+        hashbits->save_tagset(filename);
+    } catch (khmer_file_exception &e) {
+        PyErr_SetString(PyExc_IOError, e.what());
+        return NULL;
+    }
 
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_save_subset_partitionmap(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hashbits_save_subset_partitionmap(khmer_KHashbits_Object * me, PyObject * args)
 {
     const char * filename = NULL;
     PyObject * subset_obj = NULL;
@@ -2921,17 +3051,22 @@ static PyObject * hashbits_save_subset_partitionmap(PyObject * self,
 
     Py_BEGIN_ALLOW_THREADS
 
-    subset_p->save_partitionmap(filename);
+    try {
+        subset_p->save_partitionmap(filename);
+    } catch (khmer_file_exception &e) {
+        PyErr_SetString(PyExc_IOError, e.what());
+        return NULL;
+    }
 
     Py_END_ALLOW_THREADS
 
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_load_subset_partitionmap(PyObject * self,
-        PyObject * args)
-{
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
+static
+PyObject *
+hashbits_load_subset_partitionmap(khmer_KHashbits_Object * me, PyObject * args)
+{
     Hashbits * hashbits = me->hashbits;
 
     const char * filename = NULL;
@@ -2941,7 +3076,11 @@ static PyObject * hashbits_load_subset_partitionmap(PyObject * self,
     }
 
     SubsetPartition * subset_p;
-    subset_p = new SubsetPartition(hashbits);
+    try {
+        subset_p = new SubsetPartition(hashbits);
+    } catch (std::bad_alloc &e) {
+        return PyErr_NoMemory();
+    }
 
     bool fail = false;
     std::string err;
@@ -2966,9 +3105,10 @@ static PyObject * hashbits_load_subset_partitionmap(PyObject * self,
     }
 }
 
-static PyObject * hashbits__set_tag_density(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits__set_tag_density(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     unsigned int d;
@@ -2981,9 +3121,10 @@ static PyObject * hashbits__set_tag_density(PyObject * self, PyObject * args)
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits__get_tag_density(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits__get_tag_density(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     if (!PyArg_ParseTuple(args, "")) {
@@ -2992,11 +3133,13 @@ static PyObject * hashbits__get_tag_density(PyObject * self, PyObject * args)
 
     unsigned int d = hashbits->_get_tag_density();
 
-    return PyInt_FromLong(d);
+    return PyLong_FromLong(d);
 }
 
-static PyObject * hashbits__validate_subset_partitionmap(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hashbits__validate_subset_partitionmap(khmer_KHashbits_Object * me,
+                                       PyObject * args)
 {
     PyObject * subset_obj = NULL;
 
@@ -3011,9 +3154,10 @@ static PyObject * hashbits__validate_subset_partitionmap(PyObject * self,
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_set_partition_id(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_set_partition_id(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * kmer = NULL;
@@ -3028,9 +3172,10 @@ static PyObject * hashbits_set_partition_id(PyObject * self, PyObject * args)
     Py_RETURN_NONE;
 }
 
-static PyObject * hashbits_join_partitions(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_join_partitions(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     PartitionID p1 = 0, p2 = 0;
@@ -3041,12 +3186,13 @@ static PyObject * hashbits_join_partitions(PyObject * self, PyObject * args)
 
     p1 = hashbits->partition->join_partitions(p1, p2);
 
-    return PyInt_FromLong(p1);
+    return PyLong_FromLong(p1);
 }
 
-static PyObject * hashbits_get_partition_id(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_get_partition_id(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * kmer = NULL;
@@ -3058,13 +3204,13 @@ static PyObject * hashbits_get_partition_id(PyObject * self, PyObject * args)
     PartitionID partition_id;
     partition_id = hashbits->partition->get_partition_id(kmer);
 
-    return PyInt_FromLong(partition_id);
+    return PyLong_FromLong(partition_id);
 }
 
-static PyObject * hashbits_is_single_partition(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hashbits_is_single_partition(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * seq = NULL;
@@ -3086,10 +3232,10 @@ static PyObject * hashbits_is_single_partition(PyObject * self,
     return val;
 }
 
-static PyObject * hashbits_divide_tags_into_subsets(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hashbits_divide_tags_into_subsets(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     unsigned int subset_size = 0;
@@ -3111,10 +3257,10 @@ static PyObject * hashbits_divide_tags_into_subsets(PyObject * self,
     return x;
 }
 
-static PyObject * hashbits_count_kmers_within_radius(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hashbits_count_kmers_within_radius(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * kmer = NULL;
@@ -3139,9 +3285,10 @@ static PyObject * hashbits_count_kmers_within_radius(PyObject * self,
     return PyLong_FromUnsignedLong(n);
 }
 
-static PyObject * hashbits_get_ksize(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_get_ksize(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     if (!PyArg_ParseTuple(args, "")) {
@@ -3150,13 +3297,14 @@ static PyObject * hashbits_get_ksize(PyObject * self, PyObject * args)
 
     unsigned int k = hashbits->ksize();
 
-    return PyInt_FromLong(k);
+    return PyLong_FromLong(k);
 }
 
 
-static PyObject * hashbits_get_hashsizes(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_get_hashsizes(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     if (!PyArg_ParseTuple(args, "")) {
@@ -3173,10 +3321,10 @@ static PyObject * hashbits_get_hashsizes(PyObject * self, PyObject * args)
     return x;
 }
 
-static PyObject * hashbits_extract_unique_paths(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hashbits_extract_unique_paths(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * sequence = NULL;
@@ -3195,15 +3343,16 @@ static PyObject * hashbits_extract_unique_paths(PyObject * self,
     }
 
     for (unsigned int i = 0; i < results.size(); i++) {
-        PyList_SET_ITEM(x, i, PyString_FromString(results[i].c_str()));
+        PyList_SET_ITEM(x, i, PyBytes_FromString(results[i].c_str()));
     }
 
     return x;
 }
 
-static PyObject * hashbits_get_median_count(PyObject * self, PyObject * args)
+static
+PyObject *
+hashbits_get_median_count(khmer_KHashbits_Object * me, PyObject * args)
 {
-    khmer_KHashbitsObject * me = (khmer_KHashbitsObject *) self;
     Hashbits * hashbits = me->hashbits;
 
     const char * long_str;
@@ -3227,80 +3376,74 @@ static PyObject * hashbits_get_median_count(PyObject * self, PyObject * args)
 }
 
 static PyMethodDef khmer_hashbits_methods[] = {
-    { "extract_unique_paths", hashbits_extract_unique_paths, METH_VARARGS, "" },
-    { "ksize", hashbits_get_ksize, METH_VARARGS, "" },
-    { "hashsizes", hashbits_get_hashsizes, METH_VARARGS, "" },
-    { "n_occupied", hashbits_n_occupied, METH_VARARGS, "Count the number of occupied bins" },
-    { "n_unique_kmers", hashbits_n_unique_kmers,  METH_VARARGS, "Count the number of unique kmers" },
-    { "count", hashbits_count, METH_VARARGS, "Count the given kmer" },
-    { "count_overlap", hashbits_count_overlap, METH_VARARGS, "Count overlap kmers in two datasets" },
-    { "consume", hashbits_consume, METH_VARARGS, "Count all k-mers in the given string" },
-    { "load_stop_tags", hashbits_load_stop_tags, METH_VARARGS, "" },
-    { "save_stop_tags", hashbits_save_stop_tags, METH_VARARGS, "" },
-    { "print_stop_tags", hashbits_print_stop_tags, METH_VARARGS, "" },
-    { "print_tagset", hashbits_print_tagset, METH_VARARGS, "" },
-    { "get", hashbits_get, METH_VARARGS, "Get the count for the given k-mer" },
-    { "calc_connected_graph_size", hashbits_calc_connected_graph_size, METH_VARARGS, "" },
-    { "kmer_degree", hashbits_kmer_degree, METH_VARARGS, "" },
-    { "trim_on_stoptags", hashbits_trim_on_stoptags, METH_VARARGS, "" },
-    { "identify_stoptags_by_position", hashbits_identify_stoptags_by_position, METH_VARARGS, "" },
-    { "do_subset_partition", hashbits_do_subset_partition, METH_VARARGS, "" },
-    { "find_all_tags", hashbits_find_all_tags, METH_VARARGS, "" },
-    { "assign_partition_id", hashbits_assign_partition_id, METH_VARARGS, "" },
-    { "output_partitions", hashbits_output_partitions, METH_VARARGS, "" },
-    { "find_unpart", hashbits_find_unpart, METH_VARARGS, "" },
-    { "filter_if_present", hashbits_filter_if_present, METH_VARARGS, "" },
-    { "add_tag", hashbits_add_tag, METH_VARARGS, "" },
-    { "add_stop_tag", hashbits_add_stop_tag, METH_VARARGS, "" },
-    { "get_stop_tags", hashbits_get_stop_tags, METH_VARARGS, "" },
-    { "get_tagset", hashbits_get_tagset, METH_VARARGS, "" },
-    { "load", hashbits_load, METH_VARARGS, "" },
-    { "save", hashbits_save, METH_VARARGS, "" },
-    { "load_tagset", hashbits_load_tagset, METH_VARARGS, "" },
-    { "save_tagset", hashbits_save_tagset, METH_VARARGS, "" },
-    { "n_tags", hashbits_n_tags, METH_VARARGS, "" },
-    { "divide_tags_into_subsets", hashbits_divide_tags_into_subsets, METH_VARARGS, "" },
-    { "load_partitionmap", hashbits_load_partitionmap, METH_VARARGS, "" },
-    { "save_partitionmap", hashbits_save_partitionmap, METH_VARARGS, "" },
-    { "_validate_partitionmap", hashbits__validate_partitionmap, METH_VARARGS, "" },
-    { "_get_tag_density", hashbits__get_tag_density, METH_VARARGS, "" },
-    { "_set_tag_density", hashbits__set_tag_density, METH_VARARGS, "" },
-    { "consume_fasta", hashbits_consume_fasta, METH_VARARGS, "Count all k-mers in a given file" },
-    { "consume_fasta_with_reads_parser", hashbits_consume_fasta_with_reads_parser, METH_VARARGS, "Count all k-mers in a given file" },
-    { "consume_fasta_and_tag", hashbits_consume_fasta_and_tag, METH_VARARGS, "Count all k-mers in a given file" },
+    { "extract_unique_paths", (PyCFunction)hashbits_extract_unique_paths, METH_VARARGS, "" },
+    { "ksize", (PyCFunction)hashbits_get_ksize, METH_VARARGS, "" },
+    { "hashsizes", (PyCFunction)hashbits_get_hashsizes, METH_VARARGS, "" },
+    { "n_occupied", (PyCFunction)hashbits_n_occupied, METH_VARARGS, "Count the number of occupied bins" },
+    { "n_unique_kmers", (PyCFunction)hashbits_n_unique_kmers,  METH_VARARGS, "Count the number of unique kmers" },
+    { "count", (PyCFunction)hashbits_count, METH_VARARGS, "Count the given kmer" },
+    { "count_overlap", (PyCFunction)hashbits_count_overlap, METH_VARARGS, "Count overlap kmers in two datasets" },
+    { "consume", (PyCFunction)hashbits_consume, METH_VARARGS, "Count all k-mers in the given string" },
+    { "load_stop_tags", (PyCFunction)hashbits_load_stop_tags, METH_VARARGS, "" },
+    { "save_stop_tags", (PyCFunction)hashbits_save_stop_tags, METH_VARARGS, "" },
+    { "print_stop_tags", (PyCFunction)hashbits_print_stop_tags, METH_VARARGS, "" },
+    { "print_tagset", (PyCFunction)hashbits_print_tagset, METH_VARARGS, "" },
+    { "get", (PyCFunction)hashbits_get, METH_VARARGS, "Get the count for the given k-mer" },
+    { "calc_connected_graph_size", (PyCFunction)hashbits_calc_connected_graph_size, METH_VARARGS, "" },
+    { "kmer_degree", (PyCFunction)hashbits_kmer_degree, METH_VARARGS, "" },
+    { "trim_on_stoptags", (PyCFunction)hashbits_trim_on_stoptags, METH_VARARGS, "" },
+    { "identify_stoptags_by_position", (PyCFunction)hashbits_identify_stoptags_by_position, METH_VARARGS, "" },
+    { "do_subset_partition", (PyCFunction)hashbits_do_subset_partition, METH_VARARGS, "" },
+    { "find_all_tags", (PyCFunction)hashbits_find_all_tags, METH_VARARGS, "" },
+    { "assign_partition_id", (PyCFunction)hashbits_assign_partition_id, METH_VARARGS, "" },
+    { "output_partitions", (PyCFunction)hashbits_output_partitions, METH_VARARGS, "" },
+    { "find_unpart", (PyCFunction)hashbits_find_unpart, METH_VARARGS, "" },
+    { "filter_if_present", (PyCFunction)hashbits_filter_if_present, METH_VARARGS, "" },
+    { "add_tag", (PyCFunction)hashbits_add_tag, METH_VARARGS, "" },
+    { "add_stop_tag", (PyCFunction)hashbits_add_stop_tag, METH_VARARGS, "" },
+    { "get_stop_tags", (PyCFunction)hashbits_get_stop_tags, METH_VARARGS, "" },
+    { "get_tagset", (PyCFunction)hashbits_get_tagset, METH_VARARGS, "" },
+    { "load", (PyCFunction)hashbits_load, METH_VARARGS, "" },
+    { "save", (PyCFunction)hashbits_save, METH_VARARGS, "" },
+    { "load_tagset", (PyCFunction)hashbits_load_tagset, METH_VARARGS, "" },
+    { "save_tagset", (PyCFunction)hashbits_save_tagset, METH_VARARGS, "" },
+    { "n_tags", (PyCFunction)hashbits_n_tags, METH_VARARGS, "" },
+    { "divide_tags_into_subsets", (PyCFunction)hashbits_divide_tags_into_subsets, METH_VARARGS, "" },
+    { "load_partitionmap", (PyCFunction)hashbits_load_partitionmap, METH_VARARGS, "" },
+    { "save_partitionmap", (PyCFunction)hashbits_save_partitionmap, METH_VARARGS, "" },
+    { "_validate_partitionmap", (PyCFunction)hashbits__validate_partitionmap, METH_VARARGS, "" },
+    { "_get_tag_density", (PyCFunction)hashbits__get_tag_density, METH_VARARGS, "" },
+    { "_set_tag_density", (PyCFunction)hashbits__set_tag_density, METH_VARARGS, "" },
+    { "consume_fasta", (PyCFunction)hashbits_consume_fasta, METH_VARARGS, "Count all k-mers in a given file" },
+    { "consume_fasta_with_reads_parser", (PyCFunction)hashbits_consume_fasta_with_reads_parser, METH_VARARGS, "Count all k-mers in a given file" },
+    { "consume_fasta_and_tag", (PyCFunction)hashbits_consume_fasta_and_tag, METH_VARARGS, "Count all k-mers in a given file" },
     {
-        "consume_fasta_and_tag_with_reads_parser", hashbits_consume_fasta_and_tag_with_reads_parser,
+        "consume_fasta_and_tag_with_reads_parser", (PyCFunction)hashbits_consume_fasta_and_tag_with_reads_parser,
         METH_VARARGS, "Count all k-mers using a given reads parser"
     },
-    { "consume_fasta_and_traverse", hashbits_consume_fasta_and_traverse, METH_VARARGS, "" },
-    { "consume_fasta_and_tag_with_stoptags", hashbits_consume_fasta_and_tag_with_stoptags, METH_VARARGS, "Count all k-mers in a given file" },
-    { "consume_partitioned_fasta", hashbits_consume_partitioned_fasta, METH_VARARGS, "Count all k-mers in a given file" },
-    { "join_partitions_by_path", hashbits_join_partitions_by_path, METH_VARARGS, "" },
-    { "merge_subset", hashbits_merge_subset, METH_VARARGS, "" },
-    { "merge_subset_from_disk", hashbits_merge_from_disk, METH_VARARGS, "" },
-    { "count_partitions", hashbits_count_partitions, METH_VARARGS, "" },
-    { "subset_count_partitions", hashbits_subset_count_partitions, METH_VARARGS, "" },
-    { "subset_partition_size_distribution", hashbits_subset_partition_size_distribution, METH_VARARGS, "" },
-    { "save_subset_partitionmap", hashbits_save_subset_partitionmap, METH_VARARGS },
-    { "load_subset_partitionmap", hashbits_load_subset_partitionmap, METH_VARARGS },
-    { "_validate_subset_partitionmap", hashbits__validate_subset_partitionmap, METH_VARARGS, "" },
-    { "set_partition_id", hashbits_set_partition_id, METH_VARARGS, "" },
-    { "join_partitions", hashbits_join_partitions, METH_VARARGS, "" },
-    { "get_partition_id", hashbits_get_partition_id, METH_VARARGS, "" },
-    { "is_single_partition", hashbits_is_single_partition, METH_VARARGS, "" },
-    { "count_kmers_within_radius", hashbits_count_kmers_within_radius, METH_VARARGS, "" },
-    { "traverse_from_tags", hashbits_traverse_from_tags, METH_VARARGS, "" },
-    { "repartition_largest_partition", hashbits_repartition_largest_partition, METH_VARARGS, "" },
-    { "get_median_count", hashbits_get_median_count, METH_VARARGS, "Get the median, average, and stddev of the k-mer counts in the string" },
+    { "consume_fasta_and_traverse", (PyCFunction)hashbits_consume_fasta_and_traverse, METH_VARARGS, "" },
+    { "consume_fasta_and_tag_with_stoptags", (PyCFunction)hashbits_consume_fasta_and_tag_with_stoptags, METH_VARARGS, "Count all k-mers in a given file" },
+    { "consume_partitioned_fasta", (PyCFunction)hashbits_consume_partitioned_fasta, METH_VARARGS, "Count all k-mers in a given file" },
+    { "join_partitions_by_path", (PyCFunction)hashbits_join_partitions_by_path, METH_VARARGS, "" },
+    { "merge_subset", (PyCFunction)hashbits_merge_subset, METH_VARARGS, "" },
+    { "merge_subset_from_disk", (PyCFunction)hashbits_merge_from_disk, METH_VARARGS, "" },
+    { "count_partitions", (PyCFunction)hashbits_count_partitions, METH_VARARGS, "" },
+    { "subset_count_partitions", (PyCFunction)hashbits_subset_count_partitions, METH_VARARGS, "" },
+    { "subset_partition_size_distribution", (PyCFunction)hashbits_subset_partition_size_distribution, METH_VARARGS, "" },
+    { "save_subset_partitionmap", (PyCFunction)hashbits_save_subset_partitionmap, METH_VARARGS },
+    { "load_subset_partitionmap", (PyCFunction)hashbits_load_subset_partitionmap, METH_VARARGS },
+    { "_validate_subset_partitionmap", (PyCFunction)hashbits__validate_subset_partitionmap, METH_VARARGS, "" },
+    { "set_partition_id", (PyCFunction)hashbits_set_partition_id, METH_VARARGS, "" },
+    { "join_partitions", (PyCFunction)hashbits_join_partitions, METH_VARARGS, "" },
+    { "get_partition_id", (PyCFunction)hashbits_get_partition_id, METH_VARARGS, "" },
+    { "is_single_partition", (PyCFunction)hashbits_is_single_partition, METH_VARARGS, "" },
+    { "count_kmers_within_radius", (PyCFunction)hashbits_count_kmers_within_radius, METH_VARARGS, "" },
+    { "traverse_from_tags", (PyCFunction)hashbits_traverse_from_tags, METH_VARARGS, "" },
+    { "repartition_largest_partition", (PyCFunction)hashbits_repartition_largest_partition, METH_VARARGS, "" },
+    { "get_median_count", (PyCFunction)hashbits_get_median_count, METH_VARARGS, "Get the median, average, and stddev of the k-mer counts in the string" },
     {NULL, NULL, 0, NULL}           /* sentinel */
 };
 
-static PyObject *
-khmer_hashbits_getattr(PyObject * obj, char * name)
-{
-    return Py_FindMethod(khmer_hashbits_methods, obj, name);
-}
-
 // __new__ for hashbits; necessary for proper subclassing
 // This will essentially do what the old factory function did. Unlike many __new__
 // methods, we take our arguments here, because there's no "uninitialized" hashbits
@@ -3308,8 +3451,8 @@ khmer_hashbits_getattr(PyObject * obj, char * name)
 static PyObject* khmer_hashbits_new(PyTypeObject * type, PyObject * args,
                                     PyObject * kwds)
 {
-    khmer_KHashbitsObject * self;
-    self = (khmer_KHashbitsObject *)type->tp_alloc(type, 0);
+    khmer_KHashbits_Object * self;
+    self = (khmer_KHashbits_Object *)type->tp_alloc(type, 0);
 
     if (self != NULL) {
         WordLength k = 0;
@@ -3324,10 +3467,10 @@ static PyObject* khmer_hashbits_new(PyTypeObject * type, PyObject * args,
         Py_ssize_t sizes_list_o_length = PyList_GET_SIZE(sizes_list_o);
         for (Py_ssize_t i = 0; i < sizes_list_o_length; i++) {
             PyObject * size_o = PyList_GET_ITEM(sizes_list_o, i);
-            if (PyInt_Check(size_o)) {
-                sizes.push_back((HashIntoType) PyInt_AsLong(size_o));
-            } else if (PyLong_Check(size_o)) {
+            if (PyLong_Check(size_o)) {
                 sizes.push_back((HashIntoType) PyLong_AsUnsignedLongLong(size_o));
+            } else if (PyInt_Check(size_o)) {
+                sizes.push_back((HashIntoType) PyInt_AsLong(size_o));
             } else if (PyFloat_Check(size_o)) {
                 sizes.push_back((HashIntoType) PyFloat_AS_DOUBLE(size_o));
             } else {
@@ -3338,26 +3481,30 @@ static PyObject* khmer_hashbits_new(PyTypeObject * type, PyObject * args,
             }
         }
 
-        self->hashbits = new Hashbits(k, sizes);
+        try {
+            self->hashbits = new Hashbits(k, sizes);
+        } catch (std::bad_alloc &e) {
+            return PyErr_NoMemory();
+        }
     }
     return (PyObject *) self;
 }
 
 // there are no attributes that we need at this time, so we'll just return 0
-static int khmer_hashbits_init(khmer_KHashbitsObject * self, PyObject * args,
+static int khmer_hashbits_init(khmer_KHashbits_Object * self, PyObject * args,
                                PyObject * kwds)
 {
     return 0;
 }
 
-#define is_hashbits_obj(v)  ((v)->ob_type == &khmer_KHashbitsType)
+#define is_hashbits_obj(v)  (Py_TYPE(v) == &khmer_KHashbits_Type)
 
 ////////////////////////////////////////////////////////////////////////////
 
-static PyObject * subset_count_partitions(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+subset_count_partitions(khmer_KSubsetPartition_Object * me, PyObject * args)
 {
-    khmer_KSubsetPartitionObject * me = (khmer_KSubsetPartitionObject *) self;
     SubsetPartition * subset_p = me->subset;
 
     if (!PyArg_ParseTuple(args, "")) {
@@ -3371,10 +3518,10 @@ static PyObject * subset_count_partitions(PyObject * self,
                          (Py_ssize_t) n_unassigned);
 }
 
-static PyObject * subset_report_on_partitions(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+subset_report_on_partitions(khmer_KSubsetPartition_Object * me, PyObject * args)
 {
-    khmer_KSubsetPartitionObject * me = (khmer_KSubsetPartitionObject *) self;
     SubsetPartition * subset_p = me->subset;
 
     if (!PyArg_ParseTuple(args, "")) {
@@ -3386,10 +3533,10 @@ static PyObject * subset_report_on_partitions(PyObject * self,
     Py_RETURN_NONE;
 }
 
-static PyObject * subset_compare_partitions(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+subset_compare_partitions(khmer_KSubsetPartition_Object * me, PyObject * args)
 {
-    khmer_KSubsetPartitionObject * me = (khmer_KSubsetPartitionObject *) self;
     SubsetPartition * subset1_p = me->subset;
 
     PyObject * subset2_obj = NULL;
@@ -3400,8 +3547,8 @@ static PyObject * subset_compare_partitions(PyObject * self,
         return NULL;
     }
 
-    khmer_KSubsetPartitionObject *other = (khmer_KSubsetPartitionObject *)
-                                          subset2_obj;
+    khmer_KSubsetPartition_Object *other = (khmer_KSubsetPartition_Object *)
+                                           subset2_obj;
     SubsetPartition * subset2_p = other->subset;
 
     unsigned int n_only1 = 0, n_only2 = 0, n_shared = 0;
@@ -3411,10 +3558,11 @@ static PyObject * subset_compare_partitions(PyObject * self,
     return Py_BuildValue("III", n_only1, n_only2, n_shared);
 }
 
-static PyObject * subset_partition_size_distribution(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+subset_partition_size_distribution(khmer_KSubsetPartition_Object * me,
+                                   PyObject * args)
 {
-    khmer_KSubsetPartitionObject * me = (khmer_KSubsetPartitionObject *) self;
     SubsetPartition * subset_p = me->subset;
 
     if (!PyArg_ParseTuple(args, "")) {
@@ -3449,10 +3597,10 @@ static PyObject * subset_partition_size_distribution(PyObject * self,
     return ret;
 }
 
-static PyObject * subset_partition_sizes(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+subset_partition_sizes(khmer_KSubsetPartition_Object * me, PyObject * args)
 {
-    khmer_KSubsetPartitionObject * me = (khmer_KSubsetPartitionObject *) self;
     SubsetPartition * subset_p = me->subset;
 
     unsigned int min_size = 0;
@@ -3495,15 +3643,16 @@ static PyObject * subset_partition_sizes(PyObject * self,
     return ret;
 }
 
-static PyObject * subset_partition_average_coverages(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+subset_partition_average_coverages(khmer_KSubsetPartition_Object * me,
+                                   PyObject * args)
 {
-    khmer_KSubsetPartitionObject * me = (khmer_KSubsetPartitionObject *) self;
     SubsetPartition * subset_p = me->subset;
 
-    khmer_KCountingHashObject * counting_o;
+    khmer_KCountingHash_Object * counting_o;
 
-    if (!PyArg_ParseTuple(args, "O!", &khmer_KCountingHashType, &counting_o)) {
+    if (!PyArg_ParseTuple(args, "O!", &khmer_KCountingHash_Type, &counting_o)) {
         return NULL;
     }
 
@@ -3530,21 +3679,45 @@ static PyObject * subset_partition_average_coverages(PyObject * self,
 }
 
 static PyMethodDef khmer_subset_methods[] = {
-    { "count_partitions", subset_count_partitions, METH_VARARGS, "" },
-    { "report_on_partitions", subset_report_on_partitions, METH_VARARGS, "" },
-    { "compare_partitions", subset_compare_partitions, METH_VARARGS, "" },
-    { "partition_size_distribution", subset_partition_size_distribution, METH_VARARGS, "" },
-    { "partition_sizes", subset_partition_sizes, METH_VARARGS, "" },
-    { "partition_average_coverages", subset_partition_average_coverages, METH_VARARGS, "" },
+    {
+        "count_partitions",
+        (PyCFunction)subset_count_partitions,
+        METH_VARARGS,
+        ""
+    },
+    {
+        "report_on_partitions",
+        (PyCFunction)subset_report_on_partitions,
+        METH_VARARGS,
+        ""
+    },
+    {
+        "compare_partitions",
+        (PyCFunction)subset_compare_partitions,
+        METH_VARARGS,
+        ""
+    },
+    {
+        "partition_size_distribution",
+        (PyCFunction)subset_partition_size_distribution,
+        METH_VARARGS,
+        ""
+    },
+    {
+        "partition_sizes",
+        (PyCFunction)subset_partition_sizes,
+        METH_VARARGS,
+        ""
+    },
+    {
+        "partition_average_coverages",
+        (PyCFunction)subset_partition_average_coverages,
+        METH_VARARGS,
+        ""
+    },
     {NULL, NULL, 0, NULL}           /* sentinel */
 };
 
-static PyObject *
-khmer_subset_getattr(PyObject * obj, char * name)
-{
-    return Py_FindMethod(khmer_subset_methods, obj, name);
-}
-
 /////////////////
 // LabelHash
 /////////////////
@@ -3552,31 +3725,27 @@ khmer_subset_getattr(PyObject * obj, char * name)
 // LabelHash addition
 typedef struct {
     //PyObject_HEAD
-    khmer_KHashbitsObject khashbits;
+    khmer_KHashbits_Object khashbits;
     LabelHash * labelhash;
-} khmer_KLabelHashObject;
+} khmer_KLabelHash_Object;
 
-static void khmer_labelhash_dealloc(PyObject *);
-static int khmer_labelhash_init(khmer_KLabelHashObject * self, PyObject *args,
+static int khmer_labelhash_init(khmer_KLabelHash_Object * self, PyObject *args,
                                 PyObject *kwds);
 static PyObject * khmer_labelhash_new(PyTypeObject * type, PyObject *args,
                                       PyObject *kwds);
 
-#define is_labelhash_obj(v)  ((v)->ob_type == &khmer_KLabelHashType)
+#define is_labelhash_obj(v)  (Py_TYPE(v) == &khmer_KLabelHash_Type)
 
 //
 // khmer_labelhash_dealloc -- clean up a labelhash object.
 //
 
-static void khmer_labelhash_dealloc(PyObject* obj)
+static void khmer_labelhash_dealloc(khmer_KLabelHash_Object * obj)
 {
-    khmer_KLabelHashObject * self = (khmer_KLabelHashObject *) obj;
-
-    delete self->labelhash;
-    self->labelhash = NULL;
+    delete obj->labelhash;
+    obj->labelhash = NULL;
 
-    obj->ob_type->tp_free((PyObject*)self);
-    //PyObject_Del((PyObject *) obj);
+    Py_TYPE(obj)->tp_free((PyObject*)obj);
 }
 
 // a little weird; we don't actually want to call Hashbits' new method. Rather, we
@@ -3585,8 +3754,8 @@ static void khmer_labelhash_dealloc(PyObject* obj)
 static PyObject * khmer_labelhash_new(PyTypeObject *type, PyObject *args,
                                       PyObject *kwds)
 {
-    khmer_KLabelHashObject *self;
-    self = (khmer_KLabelHashObject*)type->tp_alloc(type, 0);
+    khmer_KLabelHash_Object *self;
+    self = (khmer_KLabelHash_Object*)type->tp_alloc(type, 0);
 
     if (self != NULL) {
         WordLength k = 0;
@@ -3601,10 +3770,10 @@ static PyObject * khmer_labelhash_new(PyTypeObject *type, PyObject *args,
         Py_ssize_t sizes_list_o_length = PyList_GET_SIZE(sizes_list_o);
         for (Py_ssize_t i = 0; i < sizes_list_o_length; i++) {
             PyObject * size_o = PyList_GET_ITEM(sizes_list_o, i);
-            if (PyInt_Check(size_o)) {
-                sizes.push_back((HashIntoType) PyInt_AsLong(size_o));
-            } else if (PyLong_Check(size_o)) {
+            if (PyLong_Check(size_o)) {
                 sizes.push_back((HashIntoType) PyLong_AsUnsignedLongLong(size_o));
+            } else if (PyInt_Check(size_o)) {
+                sizes.push_back((HashIntoType) PyInt_AsLong(size_o));
             } else if (PyFloat_Check(size_o)) {
                 sizes.push_back((HashIntoType) PyFloat_AS_DOUBLE(size_o));
             } else {
@@ -3618,17 +3787,22 @@ static PyObject * khmer_labelhash_new(PyTypeObject *type, PyObject *args,
 
         // We want the hashbits pointer in the base class to point to our labelhash,
         // so that the KHashbits methods are called on the correct object (a LabelHash)
-        self->labelhash = new LabelHash(k, sizes);
+        try {
+            self->labelhash = new LabelHash(k, sizes);
+        } catch (std::bad_alloc &e) {
+            Py_DECREF(self);
+            return PyErr_NoMemory();
+        }
         self->khashbits.hashbits = (Hashbits *)self->labelhash;
     }
 
     return (PyObject *) self;
 }
 
-static int khmer_labelhash_init(khmer_KLabelHashObject * self, PyObject *args,
+static int khmer_labelhash_init(khmer_KLabelHash_Object * self, PyObject *args,
                                 PyObject *kwds)
 {
-    if (khmer_KHashbitsType.tp_init((PyObject *)self, args, kwds) < 0) {
+    if (khmer_KHashbits_Type.tp_init((PyObject *)self, args, kwds) < 0) {
         return -1;
     }
     //std::cout << "testing my pointer ref to hashbits: " << self->khashbits.hashbits->n_tags() << std::endl;
@@ -3637,9 +3811,10 @@ static int khmer_labelhash_init(khmer_KLabelHashObject * self, PyObject *args,
     return 0;
 }
 
-static PyObject * labelhash_get_label_dict(PyObject * self, PyObject * args)
+static
+PyObject *
+labelhash_get_label_dict(khmer_KLabelHash_Object * me, PyObject * args)
 {
-    khmer_KLabelHashObject * me = (khmer_KLabelHashObject *) self;
     LabelHash * hb = me->labelhash;
 
     PyObject * d = PyDict_New();
@@ -3661,18 +3836,18 @@ static PyObject * labelhash_get_label_dict(PyObject * self, PyObject * args)
     return d;
 }
 
-static PyObject * labelhash_consume_fasta_and_tag_with_labels(
-    PyObject * self, PyObject * args)
+static
+PyObject *
+labelhash_consume_fasta_and_tag_with_labels(khmer_KLabelHash_Object * me,
+        PyObject * args)
 {
-    khmer_KLabelHashObject * me = (khmer_KLabelHashObject *) self;
     LabelHash * hb = me->labelhash;
 
     std::ofstream outfile;
 
     const char * filename;
-    PyObject * callback_obj = NULL;
 
-    if (!PyArg_ParseTuple(args, "s|O", &filename, &callback_obj)) {
+    if (!PyArg_ParseTuple(args, "s", &filename)) {
         return NULL;
     }
 
@@ -3682,7 +3857,7 @@ static PyObject * labelhash_consume_fasta_and_tag_with_labels(
     //Py_BEGIN_ALLOW_THREADS
     try {
         hb->consume_fasta_and_tag_with_labels(filename, total_reads,
-                                              n_consumed, _report_fn, callback_obj);
+                                              n_consumed);
     } catch (_khmer_signal &e) {
         exc = e.get_message().c_str();
     } catch (khmer_file_exception &e) {
@@ -3698,16 +3873,16 @@ static PyObject * labelhash_consume_fasta_and_tag_with_labels(
 
 }
 
-static PyObject * labelhash_consume_partitioned_fasta_and_tag_with_labels(
-    PyObject * self, PyObject * args)
+static
+PyObject *
+labelhash_consume_partitioned_fasta_and_tag_with_labels(
+    khmer_KLabelHash_Object * me, PyObject * args)
 {
-    khmer_KLabelHashObject * me = (khmer_KLabelHashObject *) self;
     LabelHash * labelhash = me->labelhash;
 
     const char * filename;
-    PyObject * callback_obj = NULL;
 
-    if (!PyArg_ParseTuple(args, "s|O", &filename, &callback_obj)) {
+    if (!PyArg_ParseTuple(args, "s", &filename)) {
         return NULL;
     }
 
@@ -3718,7 +3893,7 @@ static PyObject * labelhash_consume_partitioned_fasta_and_tag_with_labels(
 
     try {
         labelhash->consume_partitioned_fasta_and_tag_with_labels(filename,
-                total_reads, n_consumed, _report_fn, callback_obj);
+                total_reads, n_consumed);
     } catch (_khmer_signal &e) {
         PyErr_SetString(PyExc_IOError,
                         "error parsing in consume_partitioned_fasta_and_tag_with_labels");
@@ -3730,10 +3905,11 @@ static PyObject * labelhash_consume_partitioned_fasta_and_tag_with_labels(
     return Py_BuildValue("IK", total_reads, n_consumed);
 }
 
-static PyObject * labelhash_consume_sequence_and_tag_with_labels(
-    PyObject * self, PyObject * args)
+static
+PyObject *
+labelhash_consume_sequence_and_tag_with_labels(khmer_KLabelHash_Object * me,
+        PyObject * args)
 {
-    khmer_KLabelHashObject * me = (khmer_KLabelHashObject *) self;
     LabelHash * hb = me->labelhash;
     const char * seq = NULL;
     unsigned long long c = 0;
@@ -3751,10 +3927,11 @@ static PyObject * labelhash_consume_sequence_and_tag_with_labels(
     return Py_BuildValue("K", n_consumed);
 }
 
-static PyObject * labelhash_sweep_label_neighborhood(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+labelhash_sweep_label_neighborhood(khmer_KLabelHash_Object * me,
+                                   PyObject * args)
 {
-    khmer_KLabelHashObject * me = (khmer_KLabelHashObject *) self;
     LabelHash * hb = me->labelhash;
 
     const char * seq = NULL;
@@ -3822,10 +3999,11 @@ static PyObject * labelhash_sweep_label_neighborhood(PyObject * self,
 // Similar to find_all_tags, but returns tags in a way actually usable by python
 // need a tags_in_sequence iterator or function in c++ land for reuse in all
 // these functions
-static PyObject * labelhash_sweep_tag_neighborhood(PyObject * self,
-        PyObject *args)
+
+static
+PyObject *
+labelhash_sweep_tag_neighborhood(khmer_KLabelHash_Object * me, PyObject * args)
 {
-    khmer_KLabelHashObject * me = (khmer_KLabelHashObject *) self;
     LabelHash * labelhash = me->labelhash;
 
     const char * seq = NULL;
@@ -3884,11 +4062,10 @@ static PyObject * labelhash_sweep_tag_neighborhood(PyObject * self,
     return x;
 }
 
-
-static PyObject * labelhash_get_tag_labels(PyObject * self, PyObject * args)
+static
+PyObject *
+labelhash_get_tag_labels(khmer_KLabelHash_Object * me, PyObject * args)
 {
-
-    khmer_KLabelHashObject * me = (khmer_KLabelHashObject *) self;
     LabelHash * labelhash = me->labelhash;
 
     HashIntoType tag;
@@ -3913,40 +4090,39 @@ static PyObject * labelhash_get_tag_labels(PyObject * self, PyObject * args)
     return x;
 }
 
-static PyObject * labelhash_n_labels(PyObject * self, PyObject * args)
+static
+PyObject *
+labelhash_n_labels(khmer_KLabelHash_Object * me, PyObject * args)
 {
-    khmer_KLabelHashObject * me = (khmer_KLabelHashObject *) self;
     LabelHash * labelhash = me->labelhash;
 
     if (!PyArg_ParseTuple(args, "")) {
         return NULL;
     }
 
-    return PyInt_FromSize_t(labelhash->n_labels());
+    return PyLong_FromSize_t(labelhash->n_labels());
 }
 
 static PyMethodDef khmer_labelhash_methods[] = {
-    { "consume_fasta_and_tag_with_labels", labelhash_consume_fasta_and_tag_with_labels, METH_VARARGS, "" },
-    { "sweep_label_neighborhood", labelhash_sweep_label_neighborhood, METH_VARARGS, "" },
-    {"consume_partitioned_fasta_and_tag_with_labels", labelhash_consume_partitioned_fasta_and_tag_with_labels, METH_VARARGS, "" },
-    {"sweep_tag_neighborhood", labelhash_sweep_tag_neighborhood, METH_VARARGS, "" },
-    {"get_tag_labels", labelhash_get_tag_labels, METH_VARARGS, ""},
-    {"consume_sequence_and_tag_with_labels", labelhash_consume_sequence_and_tag_with_labels, METH_VARARGS, "" },
-    {"n_labels", labelhash_n_labels, METH_VARARGS, ""},
-    {"get_label_dict", labelhash_get_label_dict, METH_VARARGS, "" },
-
+    { "consume_fasta_and_tag_with_labels", (PyCFunction)labelhash_consume_fasta_and_tag_with_labels, METH_VARARGS, "" },
+    { "sweep_label_neighborhood", (PyCFunction)labelhash_sweep_label_neighborhood, METH_VARARGS, "" },
+    {"consume_partitioned_fasta_and_tag_with_labels", (PyCFunction)labelhash_consume_partitioned_fasta_and_tag_with_labels, METH_VARARGS, "" },
+    {"sweep_tag_neighborhood", (PyCFunction)labelhash_sweep_tag_neighborhood, METH_VARARGS, "" },
+    {"get_tag_labels", (PyCFunction)labelhash_get_tag_labels, METH_VARARGS, ""},
+    {"consume_sequence_and_tag_with_labels", (PyCFunction)labelhash_consume_sequence_and_tag_with_labels, METH_VARARGS, "" },
+    {"n_labels", (PyCFunction)labelhash_n_labels, METH_VARARGS, ""},
+    {"get_label_dict", (PyCFunction)labelhash_get_label_dict, METH_VARARGS, "" },
     {NULL, NULL, 0, NULL}           /* sentinel */
 };
 
-static PyTypeObject khmer_KLabelHashType = {
-    PyObject_HEAD_INIT(NULL)
-    0,                       /* ob_size */
-    "_LabelHash",            /* tp_name */
-    sizeof(khmer_KLabelHashObject), /* tp_basicsize */
+static PyTypeObject khmer_KLabelHash_Type = {
+    PyVarObject_HEAD_INIT(NULL, 0)  /* init & ob_size */
+    "_khmer.LabelHash",            /* tp_name */
+    sizeof(khmer_KLabelHash_Object), /* tp_basicsize */
     0,                       /* tp_itemsize */
     (destructor)khmer_labelhash_dealloc, /* tp_dealloc */
     0,                       /* tp_print */
-    0,  /* khmer_labelhash_getattr, tp_getattr */
+    0,                       /* tp_getattr */
     0,                       /* tp_setattr */
     0,                       /* tp_compare */
     0,                       /* tp_repr */
@@ -3977,13 +4153,12 @@ static PyTypeObject khmer_KLabelHashType = {
     0,                       /* tp_dictoffset */
     (initproc)khmer_labelhash_init,   /* tp_init */
     0,                       /* tp_alloc */
+    khmer_labelhash_new,      /* tp_new */
 };
 
-static PyObject * readaligner_align(PyObject * self, PyObject * args)
+static PyObject * readaligner_align(khmer_ReadAligner_Object * me,
+                                    PyObject * args)
 {
-    khmer_ReadAlignerObject * me = (khmer_ReadAlignerObject *) self;
-    ReadAligner * aligner = me->aligner;
-
     const char * read;
 
     if (!PyArg_ParseTuple(args, "s", &read)) {
@@ -3996,8 +4171,7 @@ static PyObject * readaligner_align(PyObject * self, PyObject * args)
         return NULL;
     }*/
 
-    Alignment * aln;
-    aln = aligner->Align(read);
+    Alignment * aln = me->aligner->Align(read);
 
     const char* alignment = aln->graph_alignment.c_str();
     const char* readAlignment = aln->read_alignment.c_str();
@@ -4009,36 +4183,57 @@ static PyObject * readaligner_align(PyObject * self, PyObject * args)
 }
 
 static PyMethodDef khmer_ReadAligner_methods[] = {
-    {"align", readaligner_align, METH_VARARGS, ""},
-    {NULL, NULL, 0, NULL}
+    {"align", (PyCFunction)readaligner_align, METH_VARARGS, ""},
+    {NULL} /* Sentinel */
 };
 
-static PyObject *
-khmer_readaligner_getattr(PyObject * obj, char * name)
-{
-    return Py_FindMethod(khmer_ReadAligner_methods, obj, name);
-}
-
 //
 // khmer_readaligner_dealloc -- clean up readaligner object
 // GRAPHALIGN addition
 //
-static void khmer_readaligner_dealloc(PyObject* self)
+static void khmer_readaligner_dealloc(khmer_ReadAligner_Object* obj)
 {
-    khmer_ReadAlignerObject * obj = (khmer_ReadAlignerObject *) self;
     delete obj->aligner;
     obj->aligner = NULL;
+    Py_TYPE(obj)->tp_free((PyObject*)obj);
 }
 
+//
+// new_readaligner
+//
+static PyObject* khmer_ReadAligner_new(PyTypeObject *type, PyObject * args,
+                                       PyObject *kwds)
+{
+    khmer_ReadAligner_Object * self;
+
+    self = (khmer_ReadAligner_Object *)type->tp_alloc(type, 0);
+
+    if (self != NULL) {
+        khmer_KCountingHash_Object * ch = NULL;
+        unsigned short int trusted_cov_cutoff = 2;
+        double bits_theta = 1;
+
+        if(!PyArg_ParseTuple(args, "O!Hd", &khmer_KCountingHash_Type, &ch,
+                             &trusted_cov_cutoff, &bits_theta)) {
+            Py_DECREF(self);
+            return NULL;
+        }
+
+        self->aligner = new ReadAligner(ch->counting, trusted_cov_cutoff,
+                                        bits_theta);
+    }
+
+    return (PyObject *) self;
+}
 
 static PyTypeObject khmer_ReadAlignerType = {
-    PyObject_HEAD_INIT(NULL)
-    0,
-    "ReadAligner", sizeof(khmer_ReadAlignerObject),
-    0,
-    khmer_readaligner_dealloc,     /*tp_dealloc*/
+    PyVarObject_HEAD_INIT(NULL, 0) /* init & ob_size */
+    "_khmer.ReadAligner",		    /*tp_name*/
+    sizeof(khmer_ReadAligner_Object),	    /*tp_basicsize*/
+    0,					    /*tp_itemsize*/
+    (destructor)khmer_readaligner_dealloc,  /*tp_dealloc*/
     0,                          /*tp_print*/
-    khmer_readaligner_getattr,     /*tp_getattr*/
+    0,                          /*tp_getattr*/
     0,                          /*tp_setattr*/
     0,                          /*tp_compare*/
     0,                          /*tp_repr*/
@@ -4053,154 +4248,427 @@ static PyTypeObject khmer_ReadAlignerType = {
     0,                          /*tp_as_buffer*/
     Py_TPFLAGS_DEFAULT,         /*tp_flags*/
     "ReadAligner object",           /* tp_doc */
+    0,                         /* tp_traverse */
+    0,                         /* tp_clear */
+    0,                         /* tp_richcompare */
+    0,                         /* tp_weaklistoffset */
+    0,                         /* tp_iter */
+    0,                         /* tp_iternext */
+    khmer_ReadAligner_methods, /* tp_methods */
+    0,                         /* tp_members */
+    0,                         /* tp_getset */
+    0,                         /* tp_base */
+    0,                         /* tp_dict */
+    0,                         /* tp_descr_get */
+    0,                         /* tp_descr_set */
+    0,                         /* tp_dictoffset */
+    0,			               /* tp_init */
+    0,                         /* tp_alloc */
+    khmer_ReadAligner_new,     /* tp_new */
 };
 
-
-//
-// new_readaligner
-//
-static PyObject* new_readaligner(PyObject * self, PyObject * args)
+static PyObject * hash_collect_high_abundance_kmers(khmer_KCountingHash_Object *
+        me , PyObject * args)
 {
-    khmer_KCountingHashObject * ch = NULL;
-    unsigned short int trusted_cov_cutoff = 2;
-    double bits_theta = 1;
+    CountingHash * counting = me->counting;
+
+    const char * filename = NULL;
+    unsigned int lower_count, upper_count;
 
-    if(!PyArg_ParseTuple(args, "O!Hd", &khmer_KCountingHashType, &ch,
-                         &trusted_cov_cutoff, &bits_theta)) {
+    if (!PyArg_ParseTuple(args, "sII", &filename, &lower_count, &upper_count)) {
         return NULL;
     }
 
-    khmer_ReadAlignerObject * readaligner_obj = (khmer_ReadAlignerObject *) \
-            PyObject_New(khmer_ReadAlignerObject, &khmer_ReadAlignerType);
+    SeenSet found_kmers;
+    counting->collect_high_abundance_kmers(filename, lower_count, upper_count,
+                                           found_kmers);
+
+    // create a new hashbits object...
+    std::vector<HashIntoType> sizes;
+    sizes.push_back(1);
 
-    if (readaligner_obj == NULL) {
+    khmer_KHashbits_Object * khashbits_obj = (khmer_KHashbits_Object *) \
+            PyObject_New(khmer_KHashbits_Object, &khmer_KHashbits_Type);
+    if (khashbits_obj == NULL) {
         return NULL;
     }
 
-    readaligner_obj->aligner = new ReadAligner(ch->counting, trusted_cov_cutoff,
-            bits_theta);
+    // ...and set the collected kmers as the stoptags.
+    try {
+        khashbits_obj->hashbits = new Hashbits(counting->ksize(), sizes);
+    } catch (std::bad_alloc &e) {
+        return PyErr_NoMemory();
+    }
+    khashbits_obj->hashbits->stop_tags.swap(found_kmers);
 
-    return (PyObject *) readaligner_obj;
+    return (PyObject *) khashbits_obj;
 }
 
 //
-// new_hashbits
+// khmer_counting_dealloc -- clean up a counting hash object.
 //
 
-static PyObject* _new_hashbits(PyObject * self, PyObject * args)
+static void khmer_counting_dealloc(khmer_KCountingHash_Object * obj)
 {
-    WordLength k = 0;
-    PyListObject * sizes_list_o = NULL;
+    delete obj->counting;
+    obj->counting = NULL;
+    Py_TYPE(obj)->tp_free((PyObject*)obj);
+}
 
-    if (!PyArg_ParseTuple(args, "bO!", &k, &PyList_Type, &sizes_list_o)) {
-        return NULL;
-    }
+//
+// khmer_hashbits_dealloc -- clean up a hashbits object.
+//
+static void khmer_hashbits_dealloc(khmer_KHashbits_Object * obj)
+{
+    delete obj->hashbits;
+    obj->hashbits = NULL;
 
-    std::vector<HashIntoType> sizes;
-    Py_ssize_t sizes_list_o_length = PyList_GET_SIZE(sizes_list_o);
-    for (Py_ssize_t i = 0; i < sizes_list_o_length; i++) {
-        PyObject * size_o = PyList_GET_ITEM(sizes_list_o, i);
-        if (PyInt_Check(size_o)) {
-            sizes.push_back((HashIntoType) PyInt_AsLong(size_o));
-        } else if (PyLong_Check(size_o)) {
-            sizes.push_back((HashIntoType) PyLong_AsUnsignedLongLong(size_o));
-        } else if (PyFloat_Check(size_o)) {
-            sizes.push_back((HashIntoType) PyFloat_AS_DOUBLE(size_o));
-        } else {
-            PyErr_SetString(PyExc_TypeError,
-                            "2nd argument must be a list of ints, longs, or floats");
+    Py_TYPE(obj)->tp_free((PyObject*)obj);
+}
+
+
+//
+// khmer_subset_dealloc -- clean up a hashbits object.
+//
+
+static void khmer_subset_dealloc(khmer_KSubsetPartition_Object * obj)
+{
+    delete obj->subset;
+    obj->subset = NULL;
+    Py_TYPE(obj)->tp_free((PyObject*)obj);
+}
+
+
+/***********************************************************************/
+
+//
+// KHLLCounter object
+//
+
+typedef struct {
+    PyObject_HEAD
+    khmer::HLLCounter * hllcounter;
+} khmer_KHLLCounter_Object;
+
+static PyObject* khmer_hllcounter_new(PyTypeObject * type, PyObject * args,
+                                      PyObject * kwds)
+{
+    khmer_KHLLCounter_Object * self;
+    self = (khmer_KHLLCounter_Object *)type->tp_alloc(type, 0);
+
+    if (self != NULL) {
+        double error_rate = 0.01;
+        WordLength ksize = 20;
+
+        if (!PyArg_ParseTuple(args, "|db", &error_rate, &ksize)) {
+            Py_DECREF(self);
+            return NULL;
+        }
+
+        try {
+            self->hllcounter = new HLLCounter(error_rate, ksize);
+        } catch (InvalidValue &e) {
+            Py_DECREF(self);
+            PyErr_SetString(PyExc_ValueError, e.what());
             return NULL;
         }
     }
 
+    return (PyObject *) self;
+}
 
-    khmer_KHashbitsObject * khashbits_obj = (khmer_KHashbitsObject *) \
-                                            PyObject_New(khmer_KHashbitsObject, &khmer_KHashbitsType);
+//
+// khmer_hllcounter_dealloc -- clean up a hllcounter object.
+//
 
-    if (khashbits_obj == NULL) {
+static void khmer_hllcounter_dealloc(khmer_KHLLCounter_Object * obj)
+{
+    delete obj->hllcounter;
+    obj->hllcounter = NULL;
+
+    Py_TYPE(obj)->tp_free((PyObject*)obj);
+}
+
+static
+PyObject *
+hllcounter_add(khmer_KHLLCounter_Object * me, PyObject * args)
+{
+    const char * kmer_str;
+
+    if (!PyArg_ParseTuple(args, "s", &kmer_str)) {
         return NULL;
     }
 
-    khashbits_obj->hashbits = new Hashbits(k, sizes);
+    try {
+        me->hllcounter->add(kmer_str);
+    } catch (khmer_exception &e) {
+        PyErr_SetString(PyExc_ValueError, e.what());
+        return NULL;
+    }
 
-    return (PyObject *) khashbits_obj;
+    Py_RETURN_NONE;
 }
 
-static PyObject * hash_collect_high_abundance_kmers(PyObject * self,
-        PyObject * args)
+static
+PyObject *
+hllcounter_estimate_cardinality(khmer_KHLLCounter_Object * me, PyObject * args)
 {
-    khmer_KCountingHashObject * me = (khmer_KCountingHashObject *) self;
-    CountingHash * counting = me->counting;
+    if (!PyArg_ParseTuple( args, "" )) {
+        return NULL;
+    }
 
-    const char * filename = NULL;
-    unsigned int lower_count, upper_count;
+    return PyLong_FromLong(me->hllcounter->estimate_cardinality());
+}
 
-    if (!PyArg_ParseTuple(args, "sII", &filename, &lower_count, &upper_count)) {
+static
+PyObject *
+hllcounter_consume_string(khmer_KHLLCounter_Object * me, PyObject * args)
+{
+    const char * kmer_str;
+    unsigned long long n_consumed;
+
+    if (!PyArg_ParseTuple(args, "s", &kmer_str)) {
         return NULL;
     }
 
-    SeenSet found_kmers;
-    counting->collect_high_abundance_kmers(filename, lower_count, upper_count,
-                                           found_kmers);
+    try {
+        n_consumed = me->hllcounter->consume_string(kmer_str);
+    } catch (khmer_exception &e) {
+        PyErr_SetString(PyExc_ValueError, e.what());
+        return NULL;
+    }
 
-    // create a new hashbits object...
-    std::vector<HashIntoType> sizes;
-    sizes.push_back(1);
+    return PyLong_FromLong(n_consumed);
+}
 
-    khmer_KHashbitsObject * khashbits_obj = (khmer_KHashbitsObject *) \
-                                            PyObject_New(khmer_KHashbitsObject, &khmer_KHashbitsType);
-    if (khashbits_obj == NULL) {
+static PyObject * hllcounter_consume_fasta(khmer_KHLLCounter_Object * me,
+        PyObject * args)
+{
+    const char * filename;
+
+    if (!PyArg_ParseTuple(args, "s", &filename)) {
         return NULL;
     }
 
-    // ...and set the collected kmers as the stoptags.
-    khashbits_obj->hashbits = new Hashbits(counting->ksize(), sizes);
-    khashbits_obj->hashbits->stop_tags.swap(found_kmers);
+    // call the C++ function, and trap signals => Python
+    unsigned long long  n_consumed    = 0;
+    unsigned int        total_reads   = 0;
+    try {
+        me->hllcounter->consume_fasta(filename, total_reads, n_consumed);
+    } catch (_khmer_signal &e) {
+        PyErr_SetString(PyExc_IOError, e.get_message().c_str());
+        return NULL;
+    } catch (khmer_file_exception &e) {
+        PyErr_SetString(PyExc_IOError, e.what());
+        return NULL;
+    }
 
-    return (PyObject *) khashbits_obj;
+    return Py_BuildValue("IK", total_reads, n_consumed);
 }
 
-//
-// khmer_counting_dealloc -- clean up a counting hash object.
-//
-
-static void khmer_counting_dealloc(PyObject* self)
+static
+PyObject *
+hllcounter_get_erate(khmer_KHLLCounter_Object * me)
 {
-    khmer_KCountingHashObject * obj = (khmer_KCountingHashObject *) self;
-    delete obj->counting;
-    obj->counting = NULL;
+    return PyFloat_FromDouble(me->hllcounter->get_erate());
+}
 
-    PyObject_Del((PyObject *) obj);
+static
+PyObject *
+hllcounter_get_ksize(khmer_KHLLCounter_Object * me)
+{
+    return PyLong_FromLong(me->hllcounter->get_ksize());
 }
 
-//
-// khmer_hashbits_dealloc -- clean up a hashbits object.
-//
-static void khmer_hashbits_dealloc(PyObject* obj)
+static
+int
+hllcounter_set_ksize(khmer_KHLLCounter_Object * me, PyObject *value,
+                     void *closure)
 {
-    khmer_KHashbitsObject * self = (khmer_KHashbitsObject *) obj;
+    if (value == NULL) {
+        PyErr_SetString(PyExc_TypeError, "Cannot delete attribute");
+        return -1;
+    }
+
+    long ksize = 0;
+    if (PyLong_Check(value)) {
+        ksize = PyLong_AsLong(value);
+    } else if (PyInt_Check(value)) {
+        ksize = PyInt_AsLong(value);
+    } else {
+        PyErr_SetString(PyExc_TypeError,
+                        "Please use an integer value for k-mer size");
+        return -1;
+    }
+
+    if (ksize <= 0) {
+        PyErr_SetString(PyExc_ValueError, "Please set k-mer size to a value "
+                        "greater than zero");
+        return -1;
+    }
 
-    delete self->hashbits;
-    self->hashbits = NULL;
+    try {
+        me->hllcounter->set_ksize(ksize);
+    } catch (ReadOnlyAttribute &e) {
+        PyErr_SetString(PyExc_AttributeError, e.what());
+        return -1;
+    }
 
-    self->ob_type->tp_free((PyObject*)obj);
-    //PyObject_Del((PyObject *) obj);
+    return 0;
 }
 
+static
+int
+hllcounter_set_erate(khmer_KHLLCounter_Object * me, PyObject *value,
+                     void *closure)
+{
+    if (value == NULL) {
+        PyErr_SetString(PyExc_TypeError, "Cannot delete attribute");
+        return -1;
+    }
 
-//
-// khmer_subset_dealloc -- clean up a hashbits object.
-//
+    if (!PyFloat_Check(value)) {
+        PyErr_SetString(PyExc_TypeError,
+                        "Please use a float value for k-mer size");
+        return -1;
+    }
 
-static void khmer_subset_dealloc(PyObject* self)
+    double erate = PyFloat_AsDouble(value);
+    try {
+        me->hllcounter->set_erate(erate);
+    } catch (InvalidValue &e) {
+        PyErr_SetString(PyExc_ValueError, e.what());
+        return -1;
+    } catch (ReadOnlyAttribute &e) {
+        PyErr_SetString(PyExc_AttributeError, e.what());
+        return -1;
+    }
+
+    return 0;
+}
+
+static
+PyObject *
+hllcounter_getalpha(khmer_KHLLCounter_Object * me)
 {
-    khmer_KSubsetPartitionObject * obj = (khmer_KSubsetPartitionObject *) self;
-    delete obj->subset;
-    obj->subset = NULL;
+    return PyFloat_FromDouble(me->hllcounter->get_alpha());
+}
+
+static
+PyObject *
+hllcounter_getcounters(khmer_KHLLCounter_Object * me)
+{
+    std::vector<int> counters = me->hllcounter->get_M();
+
+    PyObject * x = PyList_New(counters.size());
+    for (size_t i = 0; i < counters.size(); i++) {
+        PyList_SET_ITEM(x, i, PyLong_FromLong(counters[i]));
+    }
 
-    PyObject_Del((PyObject *) obj);
+    return x;
 }
 
+static PyMethodDef khmer_hllcounter_methods[] = {
+    {
+        "add", (PyCFunction)hllcounter_add,
+        METH_VARARGS,
+        "Add a k-mer to the counter."
+    },
+    {
+        "estimate_cardinality", (PyCFunction)hllcounter_estimate_cardinality,
+        METH_VARARGS,
+        "Return the current estimation."
+    },
+    {
+        "consume_string", (PyCFunction)hllcounter_consume_string,
+        METH_VARARGS,
+        "Break a sequence into k-mers and add each k-mer to the counter."
+    },
+    {
+        "consume_fasta", (PyCFunction)hllcounter_consume_fasta,
+        METH_VARARGS,
+        "Read sequences from file, break into k-mers, "
+        "and add each k-mer to the counter."
+    },
+    {NULL} /* Sentinel */
+};
+
+static PyGetSetDef khmer_hllcounter_getseters[] = {
+    {
+        (char *)"alpha",
+        (getter)hllcounter_getalpha, NULL,
+        (char *)"alpha constant for this HLL counter.",
+        NULL
+    },
+    {
+        (char *)"error_rate",
+        (getter)hllcounter_get_erate, (setter)hllcounter_set_erate,
+        (char *)"Error rate for this HLL counter. "
+        "Can be changed prior to first counting, but becomes read-only after "
+        "that (raising AttributeError)",
+        NULL
+    },
+    {
+        (char *)"ksize",
+        (getter)hllcounter_get_ksize, (setter)hllcounter_set_ksize,
+        (char *)"k-mer size for this HLL counter."
+        "Can be changed prior to first counting, but becomes read-only after "
+        "that (raising AttributeError)",
+        NULL
+    },
+    {
+        (char *)"counters",
+        (getter)hllcounter_getcounters, NULL,
+        (char *)"Read-only internal counters.",
+        NULL
+    },
+    {NULL} /* Sentinel */
+};
+
+static PyTypeObject khmer_KHLLCounter_Type = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "_khmer.KHLLCounter",                       /* tp_name */
+    sizeof(khmer_KHLLCounter_Object),          /* tp_basicsize */
+    0,                                         /* tp_itemsize */
+    (destructor)khmer_hllcounter_dealloc,      /* tp_dealloc */
+    0,                                         /* tp_print */
+    0,                                         /* tp_getattr */
+    0,                                         /* tp_setattr */
+    0,                                         /* tp_compare */
+    0,                                         /* tp_repr */
+    0,                                         /* tp_as_number */
+    0,                                         /* tp_as_sequence */
+    0,                                         /* tp_as_mapping */
+    0,                                         /* tp_hash */
+    0,                                         /* tp_call */
+    0,                                         /* tp_str */
+    0,                                         /* tp_getattro */
+    0,                                         /* tp_setattro */
+    0,                                         /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /* tp_flags */
+    "HyperLogLog counter",                     /* tp_doc */
+    0,                                         /* tp_traverse */
+    0,                                         /* tp_clear */
+    0,                                         /* tp_richcompare */
+    0,                                         /* tp_weaklistoffset */
+    0,                                         /* tp_iter */
+    0,                                         /* tp_iternext */
+    khmer_hllcounter_methods,                  /* tp_methods */
+    0,                                         /* tp_members */
+    khmer_hllcounter_getseters,                /* tp_getset */
+    0,                                         /* tp_base */
+    0,                                         /* tp_dict */
+    0,                                         /* tp_descr_get */
+    0,                                         /* tp_descr_set */
+    0,                                         /* tp_dictoffset */
+    0,                                         /* tp_init */
+    0,                                         /* tp_alloc */
+    khmer_hllcounter_new,                      /* tp_new */
+};
+
+#define is_hllcounter_obj(v)  (Py_TYPE(v) == &khmer_KHLLCounter_Type)
+
+
 //////////////////////////////
 // standalone functions
 
@@ -4218,7 +4686,15 @@ static PyObject * forward_hash(PyObject * self, PyObject * args)
         return NULL;
     }
 
-    return PyLong_FromUnsignedLongLong(_hash(kmer, ksize));
+    PyObject * hash;
+    try {
+        hash = PyLong_FromUnsignedLongLong(_hash(kmer, ksize));
+        return hash;
+    } catch (khmer_exception &e) {
+        PyErr_SetString(PyExc_RuntimeError, e.what());
+        return NULL;
+    }
+
 }
 
 static PyObject * forward_hash_no_rc(PyObject * self, PyObject * args)
@@ -4237,7 +4713,7 @@ static PyObject * forward_hash_no_rc(PyObject * self, PyObject * args)
 
     if (strlen(kmer) != ksize) {
         PyErr_SetString(PyExc_ValueError,
-                        "k-mer length must be the same as the hashtable k-size");
+                        "k-mer length must equal the k-size");
         return NULL;
     }
 
@@ -4258,22 +4734,29 @@ static PyObject * reverse_hash(PyObject * self, PyObject * args)
         return NULL;
     }
 
-    return PyString_FromString(_revhash(val, ksize).c_str());
+    return PyBytes_FromString(_revhash(val, ksize).c_str());
 }
 
-static PyObject * set_reporting_callback(PyObject * self, PyObject * args)
+static PyObject * murmur3_forward_hash(PyObject * self, PyObject * args)
 {
-    PyObject * o;
+    const char * kmer;
 
-    if (!PyArg_ParseTuple(args, "O", &o)) {
+    if (!PyArg_ParseTuple(args, "s", &kmer)) {
         return NULL;
     }
 
-    Py_XDECREF(_callback_obj);
-    Py_INCREF(o);
-    _callback_obj = o;
+    return PyLong_FromUnsignedLongLong(_hash_murmur(kmer));
+}
 
-    Py_RETURN_NONE;
+static PyObject * murmur3_forward_hash_no_rc(PyObject * self, PyObject * args)
+{
+    const char * kmer;
+
+    if (!PyArg_ParseTuple(args, "s", &kmer)) {
+        return NULL;
+    }
+
+    return PyLong_FromUnsignedLongLong(_hash_murmur_forward(kmer));
 }
 
 //
@@ -4288,7 +4771,7 @@ get_version_cpp( PyObject * self, PyObject * args )
 #define xstr(s) str(s)
 #define str(s) #s
     std::string dVersion = xstr(VERSION);
-    return PyString_FromString(dVersion.c_str());
+    return PyBytes_FromString(dVersion.c_str());
 }
 
 
@@ -4314,18 +4797,6 @@ static PyMethodDef KhmerMethods[] = {
         METH_VARARGS,       "Create an empty single-table counting hash"
     },
     {
-        "_new_counting_hash",   _new_counting_hash,
-        METH_VARARGS,       "Create an empty counting hash"
-    },
-    {
-        "_new_hashbits",        _new_hashbits,
-        METH_VARARGS,       "Create an empty hashbits table"
-    },
-    {
-        "new_readaligner",        new_readaligner,
-        METH_VARARGS,             "Create a read aligner object"
-    },
-    {
         "forward_hash",     forward_hash,
         METH_VARARGS,       "",
     },
@@ -4338,8 +4809,18 @@ static PyMethodDef KhmerMethods[] = {
         METH_VARARGS,       "",
     },
     {
-        "set_reporting_callback",   set_reporting_callback,
-        METH_VARARGS,       "",
+        "hash_murmur3",
+        murmur3_forward_hash,
+        METH_VARARGS,
+        "Calculate the hash value of a k-mer using MurmurHash3 "
+        "(with reverse complement)",
+    },
+    {
+        "hash_no_rc_murmur3",
+        murmur3_forward_hash_no_rc,
+        METH_VARARGS,
+        "Calculate the hash value of a k-mer using MurmurHash3 "
+        "(no reverse complement)",
     },
     {
         "get_version_cpp", get_version_cpp,
@@ -4353,20 +4834,47 @@ init_khmer(void)
 {
     using namespace python;
 
-    khmer_KCountingHashType.ob_type   = &PyType_Type;
+    if (PyType_Ready(&khmer_KCountingHash_Type) < 0) {
+        return;
+    }
 
-    // implemented __new__ for Hashbits; keeping factory func around as well
-    // for backwards compat with old scripts
-    khmer_KHashbitsType.tp_new = khmer_hashbits_new;
-    khmer_KHashbitsType.tp_methods = khmer_hashbits_methods;
-    if (PyType_Ready(&khmer_KHashbitsType) < 0) {
+    khmer_KSubsetPartition_Type.tp_methods = khmer_subset_methods;
+    if (PyType_Ready(&khmer_KSubsetPartition_Type) < 0) {
+        return;
+    }
+
+    khmer_KHashbits_Type.tp_methods = khmer_hashbits_methods;
+    if (PyType_Ready(&khmer_KHashbits_Type) < 0) {
         return;
     }
     // add LabelHash
 
-    khmer_KLabelHashType.tp_base = &khmer_KHashbitsType;
-    khmer_KLabelHashType.tp_new = khmer_labelhash_new;
-    if (PyType_Ready(&khmer_KLabelHashType) < 0) {
+    khmer_KLabelHash_Type.tp_base = &khmer_KHashbits_Type;
+    if (PyType_Ready(&khmer_KLabelHash_Type) < 0) {
+        return;
+    }
+
+    if (PyType_Ready(&khmer_ReadAlignerType) < 0) {
+        return;
+    }
+
+    if (PyType_Ready(&khmer_KHLLCounter_Type) < 0) {
+        return;
+    }
+    if (PyType_Ready(&khmer_ReadAlignerType) < 0) {
+        return;
+    }
+
+    _init_ReadParser_Type_constants();
+    if (PyType_Ready( &khmer_ReadParser_Type ) < 0) {
+        return;
+    }
+
+    if (PyType_Ready(&khmer_Read_Type ) < 0) {
+        return;
+    }
+
+    if (PyType_Ready(&khmer_ReadPairIterator_Type ) < 0) {
         return;
     }
 
@@ -4376,26 +4884,34 @@ init_khmer(void)
     if (m == NULL) {
         return;
     }
-    _init_Read_Type( );
-    _init_ReadParser_Type( );
-    if (PyType_Ready( &ReadParser_Type ) < 0) {
+
+    Py_INCREF(&khmer_ReadParser_Type);
+    if (PyModule_AddObject( m, "ReadParser",
+                            (PyObject *)&khmer_ReadParser_Type ) < 0) {
+        return;
+    }
+
+    Py_INCREF(&khmer_KCountingHash_Type);
+    if (PyModule_AddObject( m, "CountingHash",
+                            (PyObject *)&khmer_KCountingHash_Type ) < 0) {
         return;
     }
-    _init_ReadPairIterator_Type( );
-    // TODO: Finish initialization of other types.
 
-    if (PyModule_AddObject( m, "ReadParser", (PyObject *)&ReadParser_Type ) < 0) {
+    Py_INCREF(&khmer_KHashbits_Type);
+    if (PyModule_AddObject(m, "Hashbits", (PyObject *)&khmer_KHashbits_Type) < 0) {
         return;
     }
-    Py_INCREF(&ReadParser_Type);
-    // TODO: Add other types here as their 'new' methods are implemented.
-    //       Then, remove the corresponding factory functions.
 
-    Py_INCREF(&khmer_KHashbitsType);
-    PyModule_AddObject(m, "_Hashbits", (PyObject *)&khmer_KHashbitsType);
+    Py_INCREF(&khmer_KLabelHash_Type);
+    if (PyModule_AddObject(m, "LabelHash",
+                           (PyObject *)&khmer_KLabelHash_Type) < 0) {
+        return;
+    }
 
-    Py_INCREF(&khmer_KLabelHashType);
-    PyModule_AddObject(m, "_LabelHash", (PyObject *)&khmer_KLabelHashType);
+    Py_INCREF(&khmer_KHLLCounter_Type);
+    PyModule_AddObject(m, "HLLCounter", (PyObject *)&khmer_KHLLCounter_Type);
+    Py_INCREF(&khmer_ReadAlignerType);
+    PyModule_AddObject(m, "ReadAligner", (PyObject *)&khmer_ReadAlignerType);
 }
 
 // vim: set ft=cpp sts=4 sw=4 tw=79:
diff --git a/khmer/_version.py b/khmer/_version.py
index 1e1df70..acac511 100644
--- a/khmer/_version.py
+++ b/khmer/_version.py
@@ -6,23 +6,23 @@
 # that just contains the computed version number.
 
 # This file is released into the public domain. Generated by
-# versioneer-0.12 (https://github.com/warner/python-versioneer)
+# versioneer-0.14 (https://github.com/warner/python-versioneer)
+
+import errno
+import os
+import re
+import subprocess
+import sys
 
 # these strings will be replaced by git during git-archive
-git_refnames = " (tag: v1.3)"
-git_full = "9ee0ad3b49eec425c5a45f0d96e6e67895596ff7"
+git_refnames = " (tag: v1.4)"
+git_full = "ffb8865d28ccf584e5de3362f9150a6b020eaa11"
 
 # these strings are filled in when 'setup.py versioneer' creates _version.py
 tag_prefix = "v"
 parentdir_prefix = "."
 versionfile_source = "khmer/_version.py"
 
-import os
-import sys
-import re
-import subprocess
-import errno
-
 
 def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
     assert isinstance(commands, list)
@@ -47,7 +47,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
             print("unable to find command, tried %s" % (commands,))
         return None
     stdout = p.communicate()[0].strip()
-    if sys.version >= '3':
+    if sys.version_info[0] >= 3:
         stdout = stdout.decode()
     if p.returncode != 0:
         if verbose:
@@ -62,8 +62,8 @@ def versions_from_parentdir(parentdir_prefix, root, verbose=False):
     dirname = os.path.basename(root)
     if not dirname.startswith(parentdir_prefix):
         if verbose:
-            print("guessing rootdir is '%s', but '%s' doesn't start with prefix '%s'" %
-                  (root, dirname, parentdir_prefix))
+            print("guessing rootdir is '%s', but '%s' doesn't start with "
+                  "prefix '%s'" % (root, dirname, parentdir_prefix))
         return None
     return {"version": dirname[len(parentdir_prefix):], "full": ""}
 
@@ -114,7 +114,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
         # "stabilization", as well as "HEAD" and "master".
         tags = set([r for r in refs if re.search(r'\d', r)])
         if verbose:
-            print("discarding '%s', no digits" % ",".join(refs - tags))
+            print("discarding '%s', no digits" % ",".join(refs-tags))
     if verbose:
         print("likely tags: %s" % ",".join(sorted(tags)))
     for ref in sorted(tags):
@@ -125,13 +125,59 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
                 print("picking %s" % r)
             return {"version": r,
                     "full": keywords["full"].strip()}
-    # no suitable tags, so we use the full revision id
+    # no suitable tags, so version is "0+unknown", but full hex is still there
     if verbose:
-        print("no suitable tags, using full revision id")
-    return {"version": keywords["full"].strip(),
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
             "full": keywords["full"].strip()}
 
 
+def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False):
+    # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens.
+
+    # dirty
+    dirty = git_describe.endswith("-dirty")
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+    dirty_suffix = ".dirty" if dirty else ""
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" not in git_describe:  # just HEX
+        return "0+untagged.g"+git_describe+dirty_suffix, dirty
+
+    # just TAG-NUM-gHEX
+    mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+    if not mo:
+        # unparseable. Maybe git-describe is misbehaving?
+        return "0+unparseable"+dirty_suffix, dirty
+
+    # tag
+    full_tag = mo.group(1)
+    if not full_tag.startswith(tag_prefix):
+        if verbose:
+            fmt = "tag '%s' doesn't start with prefix '%s'"
+            print(fmt % (full_tag, tag_prefix))
+        return None, dirty
+    tag = full_tag[len(tag_prefix):]
+
+    # distance: number of commits since tag
+    distance = int(mo.group(2))
+
+    # commit: short hex revision ID
+    commit = mo.group(3)
+
+    # now build up version string, with post-release "local version
+    # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a
+    # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you
+    # can always test version.endswith(".dirty").
+    version = tag
+    if distance or dirty:
+        version += "+%d.g%s" % (distance, commit) + dirty_suffix
+
+    return version, dirty
+
+
 def git_versions_from_vcs(tag_prefix, root, verbose=False):
     # this runs 'git' from the root of the source tree. This only gets called
     # if the git-archive 'subst' keywords were *not* expanded, and
@@ -141,31 +187,33 @@ def git_versions_from_vcs(tag_prefix, root, verbose=False):
     if not os.path.exists(os.path.join(root, ".git")):
         if verbose:
             print("no .git in %s" % root)
-        return {}
+        return {}  # get_versions() will try next method
 
     GITS = ["git"]
     if sys.platform == "win32":
         GITS = ["git.cmd", "git.exe"]
-    stdout = run_command(GITS, ["describe", "--tags", "--dirty", "--always"],
+    # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
+    # if there are no tags, this yields HEX[-dirty] (no NUM)
+    stdout = run_command(GITS, ["describe", "--tags", "--dirty",
+                                "--always", "--long"],
                          cwd=root)
+    # --long was added in git-1.5.5
     if stdout is None:
-        return {}
-    if not stdout.startswith(tag_prefix):
-        if verbose:
-            print("tag '%s' doesn't start with prefix '%s'" %
-                  (stdout, tag_prefix))
-        return {}
-    tag = stdout[len(tag_prefix):]
+        return {}  # try next method
+    version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose)
+
+    # build "full", which is FULLHEX[.dirty]
     stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
     if stdout is None:
         return {}
     full = stdout.strip()
-    if tag.endswith("-dirty"):
-        full += "-dirty"
-    return {"version": tag, "full": full}
+    if dirty:
+        full += ".dirty"
+
+    return {"version": version, "full": full}
 
 
-def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
+def get_versions(default={"version": "0+unknown", "full": ""}, verbose=False):
     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
     # __file__, we can work backwards from there to the root. Some
     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
@@ -177,11 +225,11 @@ def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
         return ver
 
     try:
-        root = os.path.abspath(__file__)
+        root = os.path.realpath(__file__)
         # versionfile_source is the relative path from the top of the source
         # tree (where the .git directory might live) to this file. Invert
         # this to find the root from __file__.
-        for i in range(len(versionfile_source.split(os.sep))):
+        for i in versionfile_source.split('/'):
             root = os.path.dirname(root)
     except NameError:
         return default
diff --git a/khmer/file.py b/khmer/kfile.py
similarity index 70%
rename from khmer/file.py
rename to khmer/kfile.py
index 27b51e4..a5ff6b4 100644
--- a/khmer/file.py
+++ b/khmer/kfile.py
@@ -1,28 +1,41 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2014-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 
-'''
-File handling/checking utilities for command-line scripts.
-'''
+"""File handling/checking utilities for command-line scripts."""
 
 import os
 import sys
+import errno
 from stat import S_ISBLK, S_ISFIFO
 
 
-def check_file_status(file_path, force):
-    """Check the status of the file; if the file is empty or doesn't exist
-    AND if the file is NOT a fifo/block/named pipe then a warning is printed
-    and sys.exit(1) is called
+def check_input_files(file_path, force):
+    """Check the status of the file.
+
+    If the file is empty or doesn't exist AND if the file is NOT a
+    fifo/block/named pipe then a warning is printed and sys.exit(1) is
+    called
     """
+    mode = None
 
     if file_path is '-':
         return
-    mode = os.stat(file_path).st_mode
+    try:
+        mode = os.stat(file_path).st_mode
+    except OSError:
+        print >>sys.stderr, "ERROR: Input file %s does not exist" % \
+                            file_path
+
+        if not force:
+            print >>sys.stderr, "Exiting"
+            sys.exit(1)
+        else:
+            return
+
     # block devices will be nonzero
     if S_ISBLK(mode) or S_ISFIFO(mode):
         return
@@ -40,12 +53,28 @@ def check_file_status(file_path, force):
                 sys.exit(1)
 
 
+def check_file_writable(file_path):
+    """Return if file_path is writable, exit out if it's not."""
+    try:
+        file_obj = open(file_path, "a")
+    except IOError as error:
+        if error.errno == errno.EACCES:
+            print >>sys.stderr, "ERROR: File %s does not have write " \
+                % file_path + "permission; exiting"
+            sys.exit(1)
+        else:
+            print >>sys.stderr, "ERROR: " + error.strerror
+    else:
+        file_obj.close()
+        return
+
+
 def check_space(in_files, force, _testhook_free_space=None):
-    """
-    Estimate size of input files passed, then calculate
-    disk space available. Exit if insufficient disk space,
-    """
+    """Check for available disk space.
 
+    Estimate size of input files passed, then calculate disk space
+    available and exit if disk space is insufficient.
+    """
     # Get disk free space in Bytes assuming non superuser
     # and assuming all inFiles are in same disk
     in_file = in_files[0]
@@ -81,9 +110,7 @@ def check_space(in_files, force, _testhook_free_space=None):
 
 
 def check_space_for_hashtable(hash_size, force, _testhook_free_space=None):
-    """
-    Check we have enough size to write a hash table
-    """
+    """Check we have enough size to write a hash table."""
     cwd = os.getcwd()
     dir_path = os.path.dirname(os.path.realpath(cwd))
     target = os.statvfs(dir_path)
@@ -107,11 +134,12 @@ def check_space_for_hashtable(hash_size, force, _testhook_free_space=None):
 
 
 def check_valid_file_exists(in_files):
-    """
+    """Warn if input files are empty or missing.
+
     In a scenario where we expect multiple input files and
     are OK with some of them being empty or non-existent,
     this check warns to stderr if any input file is empty
-    or non-existent
+    or non-existent.
     """
     for in_file in in_files:
         if os.path.exists(in_file):
diff --git a/khmer/khmer_args.py b/khmer/khmer_args.py
index e0a861e..c33d501 100644
--- a/khmer/khmer_args.py
+++ b/khmer/khmer_args.py
@@ -2,7 +2,7 @@
 # vim: set encoding=utf-8
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 
@@ -25,10 +25,7 @@ class ComboFormatter(argparse.ArgumentDefaultsHelpFormatter,
 
 
 def build_hash_args(descr=None, epilog=None):
-    """Build an argparse.ArgumentParser with arguments for hash* based
-    scripts and return it.
-    """
-
+    """Build an ArgumentParser with args for bloom filter based scripts."""
     parser = argparse.ArgumentParser(
         description=descr, epilog=epilog,
         formatter_class=ComboFormatter)
@@ -56,10 +53,7 @@ def build_hash_args(descr=None, epilog=None):
 
 
 def build_counting_args(descr=None, epilog=None):
-    """Build an argparse.ArgumentParser with arguments for counting_hash
-    based scripts and return it.
-    """
-
+    """Build an ArgumentParser with args for counting_hash based scripts."""
     parser = build_hash_args(descr=descr, epilog=epilog)
     parser.hashtype = 'counting'
 
@@ -67,10 +61,7 @@ def build_counting_args(descr=None, epilog=None):
 
 
 def build_hashbits_args(descr=None, epilog=None):
-    """Build an argparse.ArgumentParser with arguments for hashbits based
-    scripts and return it.
-    """
-
+    """Build an ArgumentParser with args for hashbits based scripts."""
     parser = build_hash_args(descr=descr, epilog=epilog)
     parser.hashtype = 'hashbits'
 
@@ -125,11 +116,11 @@ will be ignored.'''.format(hashfile=values))
 
 
 def report_on_config(args, hashtype='counting'):
-    """
-        Summarizes the configuration produced by the command-line arguments
-        made available by this module.
-    """
+    """Print out configuration.
 
+    Summarize the configuration produced by the command-line arguments
+    made available by this module.
+    """
     from khmer.utils import print_error
 
     if args.quiet:
@@ -166,6 +157,7 @@ def report_on_config(args, hashtype='counting'):
 
 
 def add_threading_args(parser):
+    """Add option for threading to options parser."""
     parser.add_argument('--threads', '-T', default=DEFAULT_N_THREADS, type=int,
                         help='Number of simultaneous threads to execute')
 
@@ -173,15 +165,18 @@ _algorithms = {
     'software': 'MR Crusoe et al., '
     '2014. http://dx.doi.org/10.6084/m9.figshare.979190',
     'diginorm': 'CT Brown et al., arXiv:1203.4802 [q-bio.GN]',
+    'streaming': 'Q Zhang, S Awad, CT Brown, unpublished',
     'graph': 'J Pell et al., http://dx.doi.org/10.1073/pnas.1121464109',
     'counting': 'Q Zhang et al., '
     'http://dx.doi.org/10.1371/journal.pone.0101271',
     'sweep': 'C Scott, MR Crusoe, and CT Brown, unpublished',
-    'SeqAn': 'A. Döring et al. http://dx.doi.org:80/10.1186/1471-2105-9-11'
+    'SeqAn': 'A. Döring et al. http://dx.doi.org:80/10.1186/1471-2105-9-11',
+    'hll': 'Irber and Brown, unpublished'
 }
 
 
 def info(scriptname, algorithm_list=None):
+    """Print version and project info to stderr."""
     import khmer
 
     sys.stderr.write("\n")
diff --git a/khmer/load_pe.py b/khmer/load_pe.py
deleted file mode 100644
index faabd1a..0000000
--- a/khmer/load_pe.py
+++ /dev/null
@@ -1,53 +0,0 @@
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project at idyll.org
-#
-
-
-def _is_1(name):
-    return name.endswith('/1')
-
-
-def _is_2(name):
-    return name.endswith('/2')
-
-
-def is_pe(name1, name2):
-    return name1[:-1] == name2[:-1]
-
-
-def load_pe(screed_handle):
-    last_record = None
-
-    screed_iter = iter(screed_handle)
-
-    while True:
-        try:
-            this_record = screed_iter.next()
-        except StopIteration:
-            if last_record:
-                yield last_record, None
-
-            raise StopIteration
-
-        if _is_2(this_record.name):
-            # PE!
-            if last_record:
-                if is_pe(last_record.name, this_record.name):
-                    yield last_record, this_record
-                    last_record = None
-                else:
-                    # both records exist but they do not match as PEs
-                    yield last_record, None
-                    yield this_record, None
-                    last_record = None
-
-            # first sequence (/1) is missing?
-            else:
-                yield this_record, None
-        else:
-            if last_record:
-                yield last_record, None
-            last_record = this_record
diff --git a/khmer/thread_utils.py b/khmer/thread_utils.py
index 124a83d..a604fe2 100644
--- a/khmer/thread_utils.py
+++ b/khmer/thread_utils.py
@@ -1,24 +1,24 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
-"""
-Utilities for dealing with multithreaded processing of short reads.
-"""
+"""Utilities for dealing with multithreaded processing of short reads."""
+
 import threading
 import Queue
 import sys
 import screed
-
+from khmer import utils
 DEFAULT_WORKER_THREADS = 8
 DEFAULT_GROUPSIZE = 100
 
 
 def verbose_loader(filename):
-    it = screed.open(filename)
-    for n, record in enumerate(it):
+    """Screed iterator that additionally prints progress info to stderr."""
+    screed_iter = screed.open(filename, parse_description=False)
+    for n, record in enumerate(screed_iter):
         if n % 100000 == 0:
             print >>sys.stderr, '... filtering', n
         yield record
@@ -140,11 +140,11 @@ class ThreadedSequenceProcessor(object):
                 name, sequence = self.process_fn(record)
                 bp_processed += len(record['sequence'])
                 if name:
-                    accuracy = record.get('accuracy')
-                    if accuracy:
-                        accuracy = accuracy[:len(sequence)]
+                    quality = record.get('quality')
+                    if quality:
+                        quality = quality[:len(sequence)]
                     bp_written += len(sequence)
-                    keep.append((name, sequence, accuracy))
+                    keep.append((name, sequence, quality))
 
             self.outqueue.put(SequenceGroup(0, keep))
 
@@ -181,9 +181,9 @@ class ThreadedSequenceProcessor(object):
             except Queue.Empty:
                 continue
 
-            for name, seq, accuracy in g.seqlist:
-                if accuracy:  # write FASTQ; CTB hack.
-                    outfp.write('@%s\n%s\n+\n%s\n' % (name, seq, accuracy))
+            for name, seq, quality in g.seqlist:
+                if quality:  # write FASTQ; CTB hack.
+                    outfp.write('@%s\n%s\n+\n%s\n' % (name, seq, quality))
                 else:
                     outfp.write('>%s\n%s\n' % (name, seq,))
 
diff --git a/khmer/utils.py b/khmer/utils.py
index d5d787a..1495fa2 100644
--- a/khmer/utils.py
+++ b/khmer/utils.py
@@ -1,7 +1,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # Convenience functions for performing common argument-checking tasks in
@@ -9,13 +9,158 @@
 
 
 def print_error(msg):
+    """Print the given message to 'stderr'."""
+    import sys
+
+    print >>sys.stderr, msg
+
+
+def _split_left_right(name):
+    """Split record name at the first whitespace and return both parts.
+
+    RHS is set to an empty string if not present.
     """
-        Prints the given message to 'stderr'.
+    parts = name.split(None, 1)
+    lhs, rhs = [parts[0], parts[1] if len(parts) > 1 else '']
+    return lhs, rhs
+
+
+def check_is_pair(record1, record2):
+    """Check if the two sequence records belong to the same fragment.
+
+    In an matching pair the records are left and right pairs
+    of each other, respectively.  Returns True or False as appropriate.
+
+    Handles both Casava formats: seq/1 and seq/2, and 'seq::... 1::...'
+    and 'seq::... 2::...'.
     """
+    if hasattr(record1, 'quality') or hasattr(record2, 'quality'):
+        if not (hasattr(record1, 'quality') and hasattr(record2, 'quality')):
+            raise ValueError("both records must be same type (FASTA or FASTQ)")
 
-    import sys
+    lhs1, rhs1 = _split_left_right(record1.name)
+    lhs2, rhs2 = _split_left_right(record2.name)
 
-    print >>sys.stderr, msg
+    # handle 'name/1'
+    if lhs1.endswith('/1') and lhs2.endswith('/2'):
+        subpart1 = lhs1.split('/', 1)[0]
+        subpart2 = lhs2.split('/', 1)[0]
+
+        assert subpart1
+        if subpart1 == subpart2:
+            return True
+
+    # handle '@name 1:rst'
+    elif lhs1 == lhs2 and rhs1.startswith('1:') and rhs2.startswith('2:'):
+        return True
+
+    return False
+
+
+def check_is_left(name):
+    """Check if the name belongs to a 'left' sequence (/1).
+
+    Returns True or False.
+
+    Handles both Casava formats: seq/1 and 'seq::... 1::...'
+    """
+    lhs, rhs = _split_left_right(name)
+    if lhs.endswith('/1'):              # handle 'name/1'
+        return True
+    elif rhs.startswith('1:'):          # handle '@name 1:rst'
+        return True
+
+    return False
+
+
+def check_is_right(name):
+    """Check if the name belongs to a 'right' sequence (/2).
+
+    Returns True or False.
+
+    Handles both Casava formats: seq/2 and 'seq::... 2::...'
+    """
+    lhs, rhs = _split_left_right(name)
+    if lhs.endswith('/2'):              # handle 'name/2'
+        return True
+    elif rhs.startswith('2:'):          # handle '@name 2:rst'
+        return True
+
+    return False
+
+
+def broken_paired_reader(screed_iter, min_length=None, force_single=False):
+    """Read pairs from a stream.
+
+    A generator that yields singletons and pairs from a stream of FASTA/FASTQ
+    records (yielded by 'screed_iter').  Yields (n, is_pair, r1, r2) where
+    'r2' is None if is_pair is False.
+
+    The input stream can be fully single-ended reads, interleaved paired-end
+    reads, or paired-end reads with orphans, a.k.a. "broken paired".
+
+    Usage::
+
+       for n, is_pair, read1, read2 in broken_paired_reader(...):
+          ...
+
+    Note that 'n' behaves like enumerate() and starts at 0, but tracks
+    the number of records read from the input stream, so is
+    incremented by 2 for a pair of reads.
+
+    If 'min_length' is set, all reads under this length are ignored (even
+    if they are pairs).
+
+    If 'force_single' is True, all reads are returned as singletons.
+    """
+    record = None
+    prev_record = None
+    n = 0
+
+    # handle the majority of the stream.
+    for record in screed_iter:
+        # ignore short reads
+        if min_length and len(record.sequence) < min_length:
+            record = None
+            continue
+
+        if prev_record:
+            if check_is_pair(prev_record, record) and not force_single:
+                yield n, True, prev_record, record  # it's a pair!
+                n += 2
+                record = None
+            else:                                   # orphan.
+                yield n, False, prev_record, None
+                n += 1
+
+        prev_record = record
+        record = None
+
+    # handle the last record, if it exists (i.e. last two records not a pair)
+    if prev_record:
+        yield n, False, prev_record, None
+
+
+def write_record(record, fileobj):
+    """Write sequence record to 'fileobj' in FASTA/FASTQ format."""
+    if hasattr(record, 'quality'):
+        fileobj.write(
+            '@{name}\n{seq}\n'
+            '+\n{qual}\n'.format(name=record.name,
+                                 seq=record.sequence,
+                                 qual=record.quality))
+    else:
+        fileobj.write(
+            '>{name}\n{seq}\n'.format(name=record.name,
+                                      seq=record.sequence))
+
+
+def write_record_pair(read1, read2, fileobj):
+    """Write a pair of sequence records to 'fileobj' in FASTA/FASTQ format."""
+    if hasattr(read1, 'quality'):
+        assert hasattr(read2, 'quality')
+    write_record(read1, fileobj)
+    write_record(read2, fileobj)
 
 
 # vim: set ft=python ts=4 sts=4 sw=4 et tw=79:
diff --git a/lib/.check_openmp.cc b/lib/.check_openmp.cc
new file mode 100644
index 0000000..d4375ca
--- /dev/null
+++ b/lib/.check_openmp.cc
@@ -0,0 +1,17 @@
+//
+// This file is part of khmer, http://github.com/ged-lab/khmer/, and is
+// Copyright (C) Michigan State University, 2015. It is licensed under
+// the three-clause BSD license; see doc/LICENSE.txt.
+// Contact: khmer-project at idyll.org
+//
+
+// File to be used to check for openmp
+#include <omp.h>
+#include <stdio.h>
+
+int main()
+{
+	#pragma omp parallel
+	printf("Hello from thread %d, nthreads %d\n",
+	       omp_get_thread_num(), omp_get_num_threads());
+}
diff --git a/lib/.gitignore b/lib/.gitignore
index 11c2f27..ca5f8e1 100644
--- a/lib/.gitignore
+++ b/lib/.gitignore
@@ -1,9 +1,10 @@
-test-StreamReader
-test-CacheManager
 test-Parser
 test-HashTables
-smpFiltering
-bittest
-ktable_test
+test-read-aligner
 ht-diff
+chkomp
+*.pc
 *.swp
+*.so
+*.so.*
+*.a
diff --git a/lib/Makefile b/lib/Makefile
index 6012291..c100a0f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -3,88 +3,63 @@
 WANT_PROFILING=false
 
 # Which profiling tool to use?
-# Assuming you have TAU installed and setup properly, 
+# Assuming you have TAU installed and setup properly,
 # you can instrument codes with it to get detailed multi-threaded profiling.
 # Otherwise, gprof is able to give you some information without threading info.
 # Choose one of: gprof, TAU
 PROFILER_OF_CHOICE=gprof
 
 # Perform extra sanity checking?
-# Set this variable to true 
-# if you wish the codes to perform extra sanity checking 
+# Set this variable to true
+# if you wish the codes to perform extra sanity checking
 # (to the possible detriment of performance).
 WANT_EXTRA_SANITY_CHECKING=false
 
 # Compile with debugging symbols?
-# Set this variable to true 
-# if you wish the codes to be built with debugging symbols 
-# (increases code size 
-# and does not always produce accurate stepping in a debugger 
+# Set this variable to true
+# if you wish the codes to be built with debugging symbols
+# (increases code size
+# and does not always produce accurate stepping in a debugger
 # when optimization is turned on).
 WANT_DEBUGGING=false
 
-# Compile with tracing logic turned on?
-# Set this variable to true if you want to use instrumentation provided 
-# in the sources for debugging purposes 
-# and are willing to accept the overhead such instrumentation introduces.
-WITH_INTERNAL_TRACING=false
-
-# Trace state transitions?
-# Set this variable to true if you want to use instrumentation which reports
-# on transitions which occur between the states of the various elements of the 
-# processing stack.
-# 'WITH_INTERNAL_TRACING' must be true for this to have effect.
-TRACE_STATE_CHANGES=true
-
-# Trace busywaits?
-# Set this variable to true if you want to use instrumentation which reports
-# on various busywaits, such as synchronization barriers, spinlock trials, and 
-# polling loops.
-# Spinlock trials will only be reported if 'TRACE_SPINLOCKS' is also true.
-# 'WITH_INTERNAL_TRACING' must be true for this to have effect.
-TRACE_BUSYWAITS=true
-
-# Trace spinlocks?
-# Set this variable to true if you want to use instrumentation which reports 
-# on entries into and exits from spinlocks and spinlock trials.
-# Spinlock trials will only be reported if 'TRACE_BUSYWAITS' is also true.
-# 'WITH_INTERNAL_TRACING' must be true for this to have effect.
-TRACE_SPINLOCKS=true
-
-# Trace memory copies?
-# Set this variable to true if you want to use instrumentation which reports
-# on the sizes of memory copies between various caches and buffers.
-# 'WITH_INTERNAL_TRACING' must be true for this to have effect.
-TRACE_MEMCOPIES=true
-
-# Trace data?
-# Set this variable to true if you want to use instrumentation which reports 
-# on the pieces of data handled by various levels of the processing stack.
-# WARNING! This can generate *very* large trace logs - use with caution or 
-# lots of free storage.
-# 'WITH_INTERNAL_TRACING' must be true for this to have effect.
-TRACE_DATA=false
-
 # Compile with performance metrics turned on?
-# Set this variable to true if you want to use instrumentation provided 
-# in the sources for performance measurement purposes 
+# Set this variable to true if you want to use instrumentation provided
+# in the sources for performance measurement purposes
 # and are willing to accept the overhead such instrumentation introduces.
 WITH_INTERNAL_METRICS=false
 
 
-### NOTE: No user-servicable parts below this line! ###
+PREFIX=/usr/local
+
+### NOTE: No user-serviceable parts below this line! ###
+
+INCLUDES= -I ../third-party/seqan/core/include/ 	\
+	  -I ../third-party/zlib/ 			\
+	  -I ../third-party/bzip2/ 			\
+	  -I ../third-party/smhasher/
 
-CXXFLAGS=
+CXXFLAGS=$(INCLUDES)
 CXX_WARNING_FLAGS=-Wall
 CXX_OPTIMIZATION_FLAGS=-O3
 CXX_SHARED_LIB_FLAGS=-fPIC
-CXXFLAGS+= $(CXX_WARNING_FLAGS) $(CXX_OPTIMIZATION_FLAGS) $(CXX_SHARED_LIB_FLAGS)
+CXXFLAGS+= 				\
+	   $(CXX_WARNING_FLAGS) 	\
+	   $(CXX_OPTIMIZATION_FLAGS)	\
+	   $(CXX_SHARED_LIB_FLAGS)
+
+CFLAGS=$(INCLUDES)
+C_WARNING_FLAGS=-Wall
+C_OPTIMIZATION_FLAGS=-O3
+C_SHARED_LIB_FLAGS=-fPIC
+CFLAGS+= $(C_WARNING_FLAGS) $(C_OPTIMIZATION_FLAGS) $(C_SHARED_LIB_FLAGS)
 
 LIBS=
 
 ifeq ($(WANT_DEBUGGING), true)
 CXX_DEBUG_FLAGS=-g
 CXXFLAGS+= $(CXX_DEBUG_FLAGS)
+CFLAGS+= $(CXX_DEBUG_FLAGS)
 else
 CXX_DEBUG_FLAGS=
 endif
@@ -92,6 +67,7 @@ endif
 ifeq ($(WANT_EXTRA_SANITY_CHECKING), true)
 DEFINE_KHMER_EXTRA_SANITY_CHECKS=-DKHMER_EXTRA_SANITY_CHECKS
 CXXFLAGS+= $(DEFINE_KHMER_EXTRA_SANITY_CHECKS)
+CFLAGS+= $(DEFINE_KHMER_EXTRA_SANITY_CHECKS)
 else
 DEFINE_KHMER_EXTRA_SANITY_CHECKS=
 endif
@@ -107,25 +83,6 @@ LIBS+= $(PROFILING_LIBS)
 endif
 endif
 
-ifeq ($(WITH_INTERNAL_TRACING), true)
-CXXFLAGS+= -DWITH_INTERNAL_TRACING
-ifeq ($(TRACE_STATE_CHANGES), true)
-CXXFLAGS+= -DTRACE_STATE_CHANGES
-endif
-ifeq ($(TRACE_BUSYWAITS), true)
-CXXFLAGS+= -DTRACE_BUSYWAITS
-endif
-ifeq ($(TRACE_SPINLOCKS), true)
-CXXFLAGS+= -DTRACE_SPINLOCKS
-endif
-ifeq ($(TRACE_MEMCOPIES), true)
-CXXFLAGS+= -DTRACE_MEMCOPIES
-endif
-ifeq ($(TRACE_DATA), true)
-CXXFLAGS+= -DTRACE_DATA
-endif
-endif
-
 ifeq ($(WITH_INTERNAL_METRICS), true)
 CXXFLAGS+= -DWITH_INTERNAL_METRICS
 endif
@@ -135,139 +92,170 @@ ifneq ($(shell uname), Linux)
 LIBS+= -pthread
 endif
 
-export CXX
-export CXXFLAGS
-export LIBS
 
-VERSION = $(shell ./get_version.py)
+HAVE_OPENMP=$(shell 						\
+	      $(CXX) -fopenmp -o chkomp .check_openmp.cc 	\
+	      2>/dev/null && echo true || echo false; 		\
+	      rm -f chkomp)
 
-export VERSION
+ifeq ($(HAVE_OPENMP), true)
+	CFLAGS += -fopenmp
+	CXXFLAGS += -fopenmp
+endif
+
+VERSION = $(shell python get_version.py)
+
+LIBVERSION = $(shell python get_version.py | sed -e 's/^\([^-]*\)-.*/\1/')
+LIBKHMERSO=libkhmer.so.$(LIBVERSION)
 
 CXXFLAGS+= -DVERSION=$(VERSION)
 
 NO_UNIQUE_RC=0
-
 CXXFLAGS+= -DNO_UNIQUE_RC=$(NO_UNIQUE_RC)
 
+export CXX
+export CFLAGS
+export CXXFLAGS
+export LIBS
+export VERSION
+
+
+#### Third party dependencies ####
+# ZLIB
 ZLIB_DIR=../third-party/zlib
 ZLIB_OBJS_BASE=\
-	adler32.o compress.o crc32.o gzio.o uncompr.o deflate.o trees.o \
-	zutil.o inflate.o infback.o inftrees.o inffast.o
+	adler32.o \
+	crc32.o \
+	deflate.o \
+	infback.o \
+	inffast.o \
+	inflate.o \
+	inftrees.o \
+	trees.o \
+	zutil.o \
+	compress.o \
+	uncompr.o \
+	gzclose.o \
+	gzlib.o \
+	gzread.o \
+	gzwrite.o
+ZLIB_PIC_OBJS_BASE=\
+	adler32.lo \
+	crc32.lo \
+	deflate.lo \
+	infback.lo \
+	inffast.lo \
+	inflate.lo \
+	inftrees.lo \
+	trees.lo \
+	zutil.lo \
+	compress.lo \
+	uncompr.lo \
+	gzclose.lo \
+	gzlib.lo \
+	gzread.lo \
+	gzwrite.lo
+
 ZLIB_OBJS=$(addprefix $(ZLIB_DIR)/, $(ZLIB_OBJS_BASE))
+ZLIB_PIC_OBJS=$(addprefix $(ZLIB_DIR)/, $(ZLIB_PIC_OBJS_BASE))
 
+# BZ2
 BZIP2_DIR=../third-party/bzip2
 BZIP2_OBJS_BASE= \
-	blocksort.o huffman.o crctable.o randtable.o compress.o \
-	decompress.o bzlib.o
-BZIP2_OBJS=$(addprefix $(BZIP2_DIR)/, $(BZIP2_OBJS_BASE))
+	blocksort.o \
+	huffman.o \
+	crctable.o \
+	randtable.o \
+	compress.o \
+	decompress.o \
+	bzlib.o
 
-#DRV_PROGS=bittest # test-StreamReader test-CacheManager test-Parser test-HashTables
-DRV_PROGS+=#graphtest #consume_prof
-AUX_PROGS=#ht-diff
+BZIP2_OBJS=$(addprefix $(BZIP2_DIR)/, $(BZIP2_OBJS_BASE))
 
-CORE_OBJS= error.o khmer_config.o thread_id_map.o trace_logger.o perf_metrics.o kmer_hash.o
-PARSERS_OBJS= read_parsers.o
 
-all: $(ZLIB_OBJS) $(BZIP2_OBJS) $(CORE_OBJS) $(PARSERS_OBJS) hashtable.o hashbits.o subset.o counting.o test aligner.o scoringmatrix.o node.o kmer.o
+#### khmer proper below here ####
+
+LIBKHMER_OBJS= \
+	counting.o \
+	hashbits.o \
+	hashtable.o \
+	hllcounter.o \
+	kmer_hash.o \
+	labelhash.o \
+	perf_metrics.o \
+	read_aligner.o \
+	read_parsers.o \
+	subset.o \
+	trace_logger.o \
+	murmur3.o \
+	$(BZIP2_OBJS) \
+	$(ZLIB_PIC_OBJS)
+
+KHMER_HEADERS= \
+	counting.hh \
+	hashbits.hh \
+	hashtable.hh \
+	khmer_exception.hh \
+	khmer.hh \
+	kmer_hash.hh \
+	labelhash.hh \
+	perf_metrics.hh \
+	primes.hh \
+	read_aligner.hh \
+	read_parsers.hh \
+	subset.hh \
+	trace_logger.hh
+
+TEST_PROGS = test-Colors test-read-aligner test-compile
+
+# START OF RULES #
+
+# The all rule comes first!
+all: $(LIBKHMERSO) libkhmer.a khmer.pc
 
 clean:
-	-(cd $(ZLIB_DIR) && make distclean)
+	rm -f *.o *.a *.so khmer.pc $(LIBKHMERSO) $(TEST_PROGS)
+	(cd $(ZLIB_DIR) && make distclean)
 	(cd $(BZIP2_DIR) && make -f Makefile-libbz2_so clean)
-	rm -f *.o $(DRV_PROGS) $(AUX_PROGS) zlib/Makefile
-
-test: $(DRV_PROGS) $(AUX_PROGS)
-
-DRV_TEST_STREAM_READER_OBJS=test-StreamReader.o read_parsers.o $(CORE_OBJS) $(ZLIB_OBJS) $(BZIP2_OBJS)
-DRV_TEST_CACHE_MANAGER_OBJS=test-CacheManager.o read_parsers.o $(CORE_OBJS) $(ZLIB_OBJS) $(BZIP2_OBJS)
-DRV_TEST_PARSER_OBJS=test-Parser.o read_parsers.o $(CORE_OBJS) $(ZLIB_OBJS) $(BZIP2_OBJS)
-DRV_TEST_HASHTABLES_OBJS= \
-	test-HashTables.o counting.o hashbits.o hashtable.o subset.o \
-	$(PARSERS_OBJS) $(CORE_OBJS) $(ZLIB_OBJS) $(BZIP2_OBJS)
-HT_DIFF_OBJS=ht-diff.o counting.o hashtable.o $(PARSERS_OBJS) $(CORE_OBJS) $(ZLIB_OBJS) $(BZIP2_OBJS)
-DRV_TEST_COLORS_OBJS= \
-	counting.o hashbits.o hashtable.o subset.o \
-	$(PARSERS_OBJS) $(CORE_OBJS) $(ZLIB_OBJS) $(BZIP2_OBJS)
-test-StreamReader: $(DRV_TEST_STREAM_READER_OBJS)
-	$(CXX) -o $@ $(DRV_TEST_STREAM_READER_OBJS) $(LIBS)
-
-test-CacheManager: $(DRV_TEST_CACHE_MANAGER_OBJS)
-	$(CXX) -o $@ $(DRV_TEST_CACHE_MANAGER_OBJS) $(LIBS) -fopenmp
-
-test-Parser: $(DRV_TEST_PARSER_OBJS)
-	$(CXX) -o $@ $(DRV_TEST_PARSER_OBJS) $(LIBS) -fopenmp
-
-test-HashTables: $(DRV_TEST_HASHTABLES_OBJS)
-	$(CXX) -o $@ $(DRV_TEST_HASHTABLES_OBJS) $(LIBS) -fopenmp
-
 
-ht-diff: $(HT_DIFF_OBJS)
-	$(CXX) -o $@ $(HT_DIFF_OBJS) $(LIBS)
+test: $(TEST_PROGS)
 
-bittest: bittest.o kmer_hash.o
-	$(CXX) $(CXXFLAGS) -o $@ bittest.o kmer_hash.o
+install: $(LIBKHMERSO) libkhmer.a khmer.pc $(KHMER_HEADERS)
+	mkdir -p $(PREFIX)/lib $(PREFIX)/lib/pkgconfig $(PREFIX)/include/
+	cp -r $(KHMER_HEADERS) \
+		../third-party/smhasher/MurmurHash3.h \
+		$(PREFIX)/include/
+	cp khmer.pc $(PREFIX)/lib/pkgconfig/
+	cp $(LIBKHMERSO) libkhmer.a $(PREFIX)/lib
+	ln -sf $(PREFIX)/lib/$(LIBKHMERSO) $(PREFIX)/lib/libkhmer.so
 
-# NOTE: Disabled due to broken constructor call.
-#graphtest: graphtest.o kmer_hash.o hashtable.o
-#	$(CXX) -o $@ graphtest.o kmer_hash.o hashtable.o
-
-# NOTE: Disabled due to broken constructor call.
-#consume_prof: consume_prof.o hashtable.o kmer_hash.o $(PARSERS_OBJS)
-#	$(CXX) -o $@ consume_prof.o hashtable.o kmer_hash.o $(PARSERS_OBJS) $(LIBS)
+khmer.pc: khmer.pc.in
+	sed -e 's, at prefix@,$(PREFIX),'  -e 's, at VERSION@,$(VERSION),' $< >$@
 
 $(ZLIB_OBJS):
-	(cd $(ZLIB_DIR) && ./configure --shared && make libz.a)
-
-$(BZIP2_OBJS):
-	(cd $(BZIP2_DIR) && make -f Makefile-libbz2_so all)
-
-error.o: error.cc error.hh
-
-khmer_config.o: khmer_config.cc khmer_config.hh
-
-thread_id_map.o: thread_id_map.cc thread_id_map.hh
-
-trace_logger.o: trace_logger.cc trace_logger.hh
+	(cd $(ZLIB_DIR) && ./configure && make $(ZLIB_OBJS_BASE))
 
-perf_metrics.o: perf_metrics.cc perf_metrics.hh
+$(ZLIB_PIC_OBJS):
+	(cd $(ZLIB_DIR) && ./configure && make $(ZLIB_PIC_OBJS_BASE))
 
-read_parsers.o: read_parsers.cc read_parsers.hh
-
-aligntest: aligntest.o zlib parsers.o kmer_hash.o hashtable.o hashbits.o subset.o counting.o aligner.o scoringmatrix.o node.o kmer.o
-	$(CXX) -pg -o aligntest aligntest.o aligner.o scoringmatrix.o node.o kmer.o $(Z_LIB_FILES) parsers.o kmer_hash.o hashtable.o hashbits.o subset.o counting.o
-
-bugtest: bugtest.o zlib parsers.o kmer_hash.o hashtable.o hashbits.o subset.o counting.o aligner.o scoringmatrix.o node.o kmer.o
-	$(CXX) -pg -o bugtest bugtest.o aligner.o scoringmatrix.o node.o kmer.o $(Z_LIB_FILES) parsers.o kmer_hash.o hashtable.o hashbits.o subset.o counting.o
-
-kmer.o: kmer.cc kmer.hh
-
-node.o: node.cc node.hh
-
-aligner.o: aligner.cc aligner.hh
-
-scoringmatrix.o: scoringmatrix.cc scoringmatrix.hh
-
-parsers.o: parsers.cc parsers.hh
-
-kmer_hash.o: kmer_hash.cc kmer_hash.hh
-
-hashtable.o: hashtable.cc hashtable.hh kmer_hash.hh khmer.hh
-
-hashbits.o: hashbits.cc hashbits.hh subset.hh hashtable.hh kmer_hash.hh khmer.hh counting.hh
-
-subset.o: subset.cc subset.hh hashbits.hh kmer_hash.hh khmer.hh
-
-counting.o: counting.cc counting.hh hashtable.hh kmer_hash.hh khmer.hh
-
-test-StreamReader.o: test-StreamReader.cc read_parsers.hh
+$(BZIP2_OBJS):
+	(cd $(BZIP2_DIR) && make -f Makefile-libbz2_so $(BZIP2_OBJS_BASE))
 
-test-CacheManager.o: test-CacheManager.cc read_parsers.hh
-	$(CXX) $(CXXFLAGS) -c -o $@ test-CacheManager.cc -fopenmp
+# MurMur3
+murmur3.o: ../third-party/smhasher/MurmurHash3.cc
+	$(CXX) $(CXXFLAGS) -c -o $@ $<
 
-test-Parser.o: test-Parser.cc read_parsers.hh
-	$(CXX) $(CXXFLAGS) -c -o $@ test-Parser.cc -fopenmp
+%.o: %.cc $(ZLIB_OBJS) $(ZLIB_PIC_OBJS) $(BZIP2_OBJS) $(KHMER_HEADERS)
+	$(CXX) $(CXXFLAGS) -c -o $@ $<
 
-test-HashTables.o: test-HashTables.cc read_parsers.hh primes.hh
-	$(CXX) $(CXXFLAGS) -c -o $@ test-HashTables.cc -fopenmp
+$(LIBKHMERSO): $(LIBKHMER_OBJS)
+	$(CXX) $(CXXFLAGS) -shared -o $@ $(LIBKHMER_OBJS)
+	ln -sf $(LIBKHMERSO) libkhmer.so
 
-ht-diff.o: counting.hh hashtable.hh kmer_hash.hh khmer.hh
+libkhmer.a: $(LIBKHMER_OBJS)
+	ar rcs $@ $(LIBKHMER_OBJS)
+	ranlib $@
 
+# catch-all rule for test drivers
+test-%: test-%.cc libkhmer.a
+	$(CXX) $(CXXFLAGS) -I . -o $@ $< libkhmer.a
diff --git a/lib/counting.cc b/lib/counting.cc
index b0b044d..b201bdc 100644
--- a/lib/counting.cc
+++ b/lib/counting.cc
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-// Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// Copyright (C) Michigan State University, 2009-2015. It is licensed under
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
@@ -14,6 +14,7 @@
 #include <math.h>
 #include <algorithm>
 #include <sstream>
+#include <errno.h>
 
 using namespace std;
 using namespace khmer;
@@ -47,6 +48,9 @@ void CountingHash::output_fasta_kmer_pos_freq(
     }
 
     delete parser;
+    if (outfile.fail()) {
+        throw khmer_file_exception(strerror(errno));
+    }
 
     outfile.close();
 }
@@ -115,28 +119,31 @@ CountingHash::abundance_distribution(
         throw khmer_exception();
     }
 
-    while(!parser->is_complete()) {
-        read = parser->get_next_read();
-        seq = read.sequence;
+    try {
+        while(!parser->is_complete()) {
+            read = parser->get_next_read();
+            seq = read.sequence;
 
-        if (check_and_normalize_read(seq)) {
-            KMerIterator kmers(seq.c_str(), _ksize);
+            if (check_and_normalize_read(seq)) {
+                KMerIterator kmers(seq.c_str(), _ksize);
 
-            while(!kmers.done()) {
-                HashIntoType kmer = kmers.next();
+                while(!kmers.done()) {
+                    HashIntoType kmer = kmers.next();
 
-                if (!tracking->get_count(kmer)) {
-                    tracking->count(kmer);
+                    if (!tracking->get_count(kmer)) {
+                        tracking->count(kmer);
 
-                    BoundedCounterType n = get_count(kmer);
-                    dist[n]++;
+                        BoundedCounterType n = get_count(kmer);
+                        dist[n]++;
+                    }
                 }
+
+                name.clear();
+                seq.clear();
             }
 
-            name.clear();
-            seq.clear();
         }
-
+    } catch (NoMoreReadsAvailable) {
     }
     return dist;
 }
@@ -487,7 +494,7 @@ CountingHashFileReader::CountingHashFileReader(
         } else {
             err = "Unknown error in opening file: " + infilename;
         }
-        throw khmer_file_exception(err.c_str());
+        throw khmer_file_exception(err + " " + strerror(errno));
     }
 
     if (ht._counts) {
@@ -513,12 +520,12 @@ CountingHashFileReader::CountingHashFileReader(
             err << "Incorrect file format version " << (int) version
                 << " while reading k-mer count file from " << infilename
                 << "; should be " << (int) SAVED_FORMAT_VERSION;
-            throw khmer_file_exception(err.str().c_str());
+            throw khmer_file_exception(err.str());
         } else if (!(ht_type == SAVED_COUNTING_HT)) {
             std::ostringstream err;
             err << "Incorrect file format type " << (int) ht_type
                 << " while reading k-mer count file from " << infilename;
-            throw khmer_file_exception(err.str().c_str());
+            throw khmer_file_exception(err.str());
         }
 
         infile.read((char *) &use_bigcount, 1);
@@ -575,9 +582,10 @@ CountingHashFileReader::CountingHashFileReader(
         if (infile.eof()) {
             err = "Unexpected end of k-mer count file: " + infilename;
         } else {
-            err = "Error reading from k-mer count file: " + infilename;
+            err = "Error reading from k-mer count file: " + infilename + " "
+                  + strerror(errno);
         }
-        throw khmer_file_exception(err.c_str());
+        throw khmer_file_exception(err);
     }
 }
 
@@ -588,7 +596,7 @@ CountingHashGzFileReader::CountingHashGzFileReader(
     gzFile infile = gzopen(infilename.c_str(), "rb");
     if (infile == Z_NULL) {
         std::string err = "Cannot open k-mer count file: " + infilename;
-        throw khmer_file_exception(err.c_str());
+        throw khmer_file_exception(err);
     }
 
     if (ht._counts) {
@@ -610,9 +618,10 @@ CountingHashGzFileReader::CountingHashGzFileReader(
     int read_t = gzread(infile, (char *) &ht_type, 1);
 
     if (read_v <= 0 || read_t <= 0) {
-        std::string err = "K-mer count file read error: " + infilename;
+        std::string err = "K-mer count file read error: " + infilename + " "
+                          + strerror(errno);
         gzclose(infile);
-        throw khmer_file_exception(err.c_str());
+        throw khmer_file_exception(err);
     } else if (!(version == SAVED_FORMAT_VERSION)
                || !(ht_type == SAVED_COUNTING_HT)) {
         if (!(version == SAVED_FORMAT_VERSION)) {
@@ -621,13 +630,13 @@ CountingHashGzFileReader::CountingHashGzFileReader(
                 << " while reading k-mer count file from " << infilename
                 << "; should be " << (int) SAVED_FORMAT_VERSION;
             gzclose(infile);
-            throw khmer_file_exception(err.str().c_str());
+            throw khmer_file_exception(err.str());
         } else if (!(ht_type == SAVED_COUNTING_HT)) {
             std::ostringstream err;
             err << "Incorrect file format type " << (int) ht_type
                 << " while reading k-mer count file from " << infilename;
             gzclose(infile);
-            throw khmer_file_exception(err.str().c_str());
+            throw khmer_file_exception(err.str());
         }
     }
 
@@ -637,9 +646,10 @@ CountingHashGzFileReader::CountingHashGzFileReader(
                          sizeof(save_n_tables));
 
     if (read_b <= 0 || read_k <= 0 || read_nt <= 0) {
-        std::string err = "K-mer count file header read error: " + infilename;
+        std::string err = "K-mer count file header read error: " + infilename
+                          + " " + strerror(errno);
         gzclose(infile);
-        throw khmer_file_exception(err.c_str());
+        throw khmer_file_exception(err);
     }
 
     ht._ksize = (WordLength) save_ksize;
@@ -656,10 +666,16 @@ CountingHashGzFileReader::CountingHashGzFileReader(
                         sizeof(save_tablesize));
 
         if (read_b <= 0) {
-            std::string err = "K-mer count file header read error: " \
+            std::string gzerr = gzerror(infile, &read_b);
+            std::string err = "K-mer count file header read error: "
                               + infilename;
+            if (read_b == Z_ERRNO) {
+                err = err + " " + strerror(errno);
+            } else {
+                err = err + " " + gzerr;
+            }
             gzclose(infile);
-            throw khmer_file_exception(err.c_str());
+            throw khmer_file_exception(err);
         }
 
         tablesize = (HashIntoType) save_tablesize;
@@ -673,9 +689,15 @@ CountingHashGzFileReader::CountingHashGzFileReader(
                             (unsigned) (tablesize - loaded));
 
             if (read_b <= 0) {
+                std::string gzerr = gzerror(infile, &read_b);
                 std::string err = "K-mer count file read error: " + infilename;
+                if (read_b == Z_ERRNO) {
+                    err = err + " " + strerror(errno);
+                } else {
+                    err = err + " " + gzerr;
+                }
                 gzclose(infile);
-                throw khmer_file_exception(err.c_str());
+                throw khmer_file_exception(err);
             }
 
             loaded += read_b;
@@ -685,9 +707,15 @@ CountingHashGzFileReader::CountingHashGzFileReader(
     HashIntoType n_counts = 0;
     read_b = gzread(infile, (char *) &n_counts, sizeof(n_counts));
     if (read_b <= 0) {
+        std::string gzerr = gzerror(infile, &read_b);
         std::string err = "K-mer count header read error: " + infilename;
+        if (read_b == Z_ERRNO) {
+            err = err + " " + strerror(errno);
+        } else {
+            err = err + " " + gzerr;
+        }
         gzclose(infile);
-        throw khmer_file_exception(err.c_str());
+        throw khmer_file_exception(err);
     }
 
     if (n_counts) {
@@ -701,9 +729,15 @@ CountingHashGzFileReader::CountingHashGzFileReader(
             int read_c = gzread(infile, (char *) &count, sizeof(count));
 
             if (read_k <= 0 || read_c <= 0) {
+                std::string gzerr = gzerror(infile, &read_b);
                 std::string err = "K-mer count read error: " + infilename;
+                if (read_b == Z_ERRNO) {
+                    err = err + " " + strerror(errno);
+                } else {
+                    err = err + " " + gzerr;
+                }
                 gzclose(infile);
-                throw khmer_file_exception(err.c_str());
+                throw khmer_file_exception(err);
             }
 
             ht._bigcounts[kmer] = count;
@@ -761,7 +795,7 @@ CountingHashFileWriter::CountingHashFileWriter(
         }
     }
     if (outfile.fail()) {
-        perror("Hash writing file access failure:");
+        throw khmer_file_exception(strerror(errno));
     }
     outfile.close();
 }
@@ -774,11 +808,20 @@ CountingHashGzFileWriter::CountingHashGzFileWriter(
         throw khmer_exception();
     }
 
+    int errnum = 0;
     unsigned int save_ksize = ht._ksize;
     unsigned char save_n_tables = ht._n_tables;
     unsigned long long save_tablesize;
 
     gzFile outfile = gzopen(outfilename.c_str(), "wb");
+    if (outfile == NULL) {
+        const char * error = gzerror(outfile, &errnum);
+        if (errnum == Z_ERRNO) {
+            throw khmer_file_exception(strerror(errno));
+        } else {
+            throw khmer_file_exception(error);
+        }
+    }
 
     unsigned char version = SAVED_FORMAT_VERSION;
     gzwrite(outfile, (const char *) &version, 1);
@@ -818,7 +861,12 @@ CountingHashGzFileWriter::CountingHashGzFileWriter(
             gzwrite(outfile, (const char *) &it->second, sizeof(it->second));
         }
     }
-
+    const char * error = gzerror(outfile, &errnum);
+    if (errnum == Z_ERRNO) {
+        throw khmer_file_exception(strerror(errno));
+    } else if (errnum != Z_OK) {
+        throw khmer_file_exception(error);
+    }
     gzclose(outfile);
 }
 
diff --git a/lib/counting.hh b/lib/counting.hh
index aae5b87..31d0f0f 100644
--- a/lib/counting.hh
+++ b/lib/counting.hh
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-// Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// Copyright (C) Michigan State University, 2009-2015. It is licensed under
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
@@ -87,6 +87,13 @@ public:
         }
     }
 
+    // Writing to the tables outside of defined methods has undefined behavior!
+    // As such, this should only be used to return read-only interfaces
+    Byte ** get_raw_tables()
+    {
+        return _counts;
+    }
+
     virtual BoundedCounterType test_and_set_bits(const char * kmer)
     {
         BoundedCounterType x = get_count(kmer); // @CTB just hash it, yo.
@@ -126,6 +133,11 @@ public:
         return _tablesizes[0];
     }
 
+    const size_t n_tables() const
+    {
+        return _n_tables;
+    }
+
     // count number of occupied bins
     virtual const HashIntoType n_occupied(HashIntoType start=0,
                                           HashIntoType stop=0) const
diff --git a/lib/get_version.py b/lib/get_version.py
old mode 100755
new mode 100644
index f9b0a1f..6ad89b8
--- a/lib/get_version.py
+++ b/lib/get_version.py
@@ -1,16 +1,10 @@
-#!/usr/bin/env python
-""" Extracts the version of the khmer project. """
-
 import sys
-import pkg_resources
+sys.path.insert(0, '../')
+import versioneer
+versioneer.VCS = 'git'
+versioneer.versionfile_source = '../khmer/_version.py'
+versioneer.versionfile_build = '../khmer/_version.py'
+versioneer.tag_prefix = 'v'  # tags are like v1.2.0
+versioneer.parentdir_prefix = '..'
 
-try:
-    print pkg_resources.get_distribution(  # pylint: disable=E1103
-        'khmer').version
-except pkg_resources.DistributionNotFound:
-    print 'To build the khmer library, the distribution information'
-    print 'has to be available.  Either install the package into your'
-    print 'development environment or run "setup.py develop" to setup the'
-    print 'metadata.  A virtualenv is recommended!'
-    sys.exit(1)
-del pkg_resources
+print versioneer.get_version()
diff --git a/lib/graphtest.cc b/lib/graphtest.cc
index c19898c..438971c 100644
--- a/lib/graphtest.cc
+++ b/lib/graphtest.cc
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
diff --git a/lib/hashbits.cc b/lib/hashbits.cc
index ab46942..270f6d1 100644
--- a/lib/hashbits.cc
+++ b/lib/hashbits.cc
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
@@ -11,6 +11,7 @@
 #include "read_parsers.hh"
 
 #include <sstream>
+#include <errno.h>
 
 using namespace std;
 using namespace khmer;
@@ -45,9 +46,16 @@ void Hashbits::save(std::string outfilename)
 
         outfile.write((const char *) _counts[i], tablebytes);
     }
+    if (outfile.fail()) {
+        throw khmer_file_exception(strerror(errno));
+    }
     outfile.close();
 }
 
+/**
+ * Loads @param infilename into Hashbits, with error checking on
+ * file type and file version.  Populates _counts internally.
+ */
 void Hashbits::load(std::string infilename)
 {
     ifstream infile;
@@ -65,7 +73,7 @@ void Hashbits::load(std::string infilename)
         } else {
             err = "Unknown error in opening file: " + infilename;
         }
-        throw khmer_file_exception(err.c_str());
+        throw khmer_file_exception(err);
     }
 
     if (_counts) {
@@ -91,12 +99,12 @@ void Hashbits::load(std::string infilename)
             err << "Incorrect file format version " << (int) version
                 << " while reading k-mer graph from " << infilename
                 << "; should be " << (int) SAVED_FORMAT_VERSION;
-            throw khmer_file_exception(err.str().c_str());
+            throw khmer_file_exception(err.str());
         } else if (!(ht_type == SAVED_HASHBITS)) {
             std::ostringstream err;
             err << "Incorrect file format type " << (int) ht_type
                 << " while reading k-mer graph from " << infilename;
-            throw khmer_file_exception(err.str().c_str());
+            throw khmer_file_exception(err.str());
         }
 
         infile.read((char *) &save_ksize, sizeof(save_ksize));
@@ -133,16 +141,14 @@ void Hashbits::load(std::string infilename)
         } else {
             err = "Error reading from k-mer graph file: " + infilename;
         }
-        throw khmer_file_exception(err.c_str());
+        throw khmer_file_exception(err);
     }
 }
 
-// for counting overlap k-mers specifically!!
-
-//
-// check_and_process_read: checks for non-ACGT characters before consuming
-//
-
+/**
+ * Checks for non-ACGT characters before consuming read.
+ * This is specifically for counting overlap k-mers.
+ */
 unsigned int Hashbits::check_and_process_read_overlap(std::string &read,
         bool &is_valid,
         Hashbits &ht2)
@@ -156,16 +162,13 @@ unsigned int Hashbits::check_and_process_read_overlap(std::string &read,
     return consume_string_overlap(read, ht2);
 }
 
-//
-// consume_fasta: consume a FASTA file of reads
-//
-
+/**
+ * Consume a FASTA file of reads.
+ */
 void Hashbits::consume_fasta_overlap(const std::string &filename,
                                      HashIntoType curve[2][100],Hashbits &ht2,
                                      unsigned int &total_reads,
-                                     unsigned long long &n_consumed,
-                                     CallbackFn callback,
-                                     void * callback_data)
+                                     unsigned long long &n_consumed)
 {
     total_reads = 0;
     n_consumed = 0;
@@ -224,24 +227,14 @@ void Hashbits::consume_fasta_overlap(const std::string &filename,
             curve[0][total_reads/block_size-1] = n_overlap_kmers();
             curve[1][total_reads/block_size-1] = n_unique_kmers();
         }
-        // run callback, if specified
-        if (total_reads % CALLBACK_PERIOD == 0 && callback) {
-            try {
-                callback("consume_fasta", callback_data, total_reads, n_consumed);
-            } catch (...) {
-                throw;
-            }
-        }
-
     } // while
 
     delete parser;
 }
 
-//
-// consume_string: run through every k-mer in the given string, & hash it.
-//
-
+/**
+ * Run through every k-mer in the given string, & hash it.
+ */
 unsigned int Hashbits::consume_string_overlap(const std::string &s,
         Hashbits &ht2)
 {
diff --git a/lib/hashbits.hh b/lib/hashbits.hh
index 74a7d14..894c526 100644
--- a/lib/hashbits.hh
+++ b/lib/hashbits.hh
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
@@ -68,11 +68,17 @@ public:
 
     }
 
+    // Accessors for protected/private table info members
     std::vector<HashIntoType> get_tablesizes() const
     {
         return _tablesizes;
     }
 
+    const size_t n_tables() const
+    {
+        return _n_tables;
+    }
+
     virtual void save(std::string);
     virtual void load(std::string);
 
@@ -81,11 +87,7 @@ public:
                                HashIntoType curve[2][100],
                                khmer::Hashbits &ht2,
                                unsigned int &total_reads,
-                               unsigned long long &n_consumed,
-                               CallbackFn callback,
-                               void * callback_data);
-
-
+                               unsigned long long &n_consumed);
 
     // just for overlap k-mer counting!
     unsigned int check_and_process_read_overlap(std::string &read,
diff --git a/lib/hashtable.cc b/lib/hashtable.cc
index 1a7bc99..726484e 100644
--- a/lib/hashtable.cc
+++ b/lib/hashtable.cc
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
@@ -12,6 +12,7 @@
 
 #include <algorithm>
 #include <sstream>
+#include <errno.h>
 
 using namespace std;
 using namespace khmer;
@@ -120,8 +121,7 @@ void
 Hashtable::
 consume_fasta(
     std:: string const  &filename,
-    unsigned int	      &total_reads, unsigned long long	&n_consumed,
-    CallbackFn	      callback,	    void *		callback_data
+    unsigned int	      &total_reads, unsigned long long	&n_consumed
 )
 {
     IParser *	  parser =
@@ -129,8 +129,7 @@ consume_fasta(
 
     consume_fasta(
         parser,
-        total_reads, n_consumed,
-        callback, callback_data
+        total_reads, n_consumed
     );
 
     delete parser;
@@ -140,13 +139,9 @@ void
 Hashtable::
 consume_fasta(
     read_parsers:: IParser *  parser,
-    unsigned int		    &total_reads, unsigned long long  &n_consumed,
-    CallbackFn		    callback,	  void *	      callback_data
+    unsigned int		    &total_reads, unsigned long long  &n_consumed
 )
 {
-#if (0) // Note: Used with callback - currently disabled.
-    unsigned long long int  n_consumed_LOCAL	= 0;
-#endif
     Read			  read;
 
     // Iterate through the reads and consume their k-mers.
@@ -159,28 +154,8 @@ consume_fasta(
             unsigned int this_n_consumed =
                 check_and_process_read(read.sequence, is_valid);
 
-#if (0) // Note: Used with callback - currently disabled.
-            n_consumed_LOCAL  = __sync_add_and_fetch( &n_consumed, this_n_consumed );
-#else
             __sync_add_and_fetch( &n_consumed, this_n_consumed );
-#endif
             __sync_add_and_fetch( &total_reads, 1 );
-
-            // TODO: Figure out alternative to callback into Python VM
-            //       Cannot use in multi-threaded operation.
-#if (0)
-            // run callback, if specified
-            if (callback && (0 == (total_reads_LOCAL % CALLBACK_PERIOD))) {
-                try {
-                    callback(
-                        "consume_fasta", callback_data,
-                        total_reads_LOCAL, n_consumed_LOCAL
-                    );
-                } catch (...) {
-                    throw;
-                }
-            }
-#endif // 0
         } catch (read_parsers::NoMoreReadsAvailable) {
         }
 
@@ -282,6 +257,10 @@ void Hashtable::save_tagset(std::string outfilename)
     }
 
     outfile.write((const char *) buf, sizeof(HashIntoType) * tagset_size);
+    if (outfile.fail()) {
+        delete[] buf;
+        throw khmer_file_exception(strerror(errno));
+    }
     outfile.close();
 
     delete[] buf;
@@ -304,7 +283,7 @@ void Hashtable::load_tagset(std::string infilename, bool clear_tags)
         } else {
             err = "Unknown error in opening file: " + infilename;
         }
-        throw khmer_file_exception(err.c_str());
+        throw khmer_file_exception(err);
     }
 
     if (clear_tags) {
@@ -325,12 +304,12 @@ void Hashtable::load_tagset(std::string infilename, bool clear_tags)
             err << "Incorrect file format version " << (int) version
                 << " while reading tagset from " << infilename
                 << "; should be " << (int) SAVED_FORMAT_VERSION;
-            throw khmer_file_exception(err.str().c_str());
+            throw khmer_file_exception(err.str());
         } else if (!(ht_type == SAVED_TAGS)) {
             std::ostringstream err;
             err << "Incorrect file format type " << (int) ht_type
                 << " while reading tagset from " << infilename;
-            throw khmer_file_exception(err.str().c_str());
+            throw khmer_file_exception(err.str());
         }
 
         infile.read((char *) &save_ksize, sizeof(save_ksize));
@@ -338,7 +317,7 @@ void Hashtable::load_tagset(std::string infilename, bool clear_tags)
             std::ostringstream err;
             err << "Incorrect k-mer size " << save_ksize
                 << " while reading tagset from " << infilename;
-            throw khmer_file_exception(err.str().c_str());
+            throw khmer_file_exception(err.str());
         }
 
         infile.read((char *) &tagset_size, sizeof(tagset_size));
@@ -358,7 +337,7 @@ void Hashtable::load_tagset(std::string infilename, bool clear_tags)
         if (buf != NULL) {
             delete[] buf;
         }
-        throw khmer_file_exception(err.c_str());
+        throw khmer_file_exception(err);
     }
 }
 
@@ -445,8 +424,7 @@ void
 Hashtable::
 consume_fasta_and_tag(
     std:: string const  &filename,
-    unsigned int	      &total_reads, unsigned long long	&n_consumed,
-    CallbackFn	      callback,	    void *		callback_data
+    unsigned int	      &total_reads, unsigned long long	&n_consumed
 )
 {
     IParser *	  parser =
@@ -454,8 +432,7 @@ consume_fasta_and_tag(
 
     consume_fasta_and_tag(
         parser,
-        total_reads, n_consumed,
-        callback, callback_data
+        total_reads, n_consumed
     );
 
     delete parser;
@@ -465,13 +442,9 @@ void
 Hashtable::
 consume_fasta_and_tag(
     read_parsers:: IParser *  parser,
-    unsigned int		    &total_reads,   unsigned long long	&n_consumed,
-    CallbackFn		    callback,	    void *		callback_data
+    unsigned int		    &total_reads,   unsigned long long	&n_consumed
 )
 {
-#if (0) // Note: Used with callback - currently disabled.
-    unsigned long long int  n_consumed_LOCAL	= 0;
-#endif
     Read			  read;
 
     // TODO? Delete the following assignments.
@@ -487,30 +460,9 @@ consume_fasta_and_tag(
             unsigned long long this_n_consumed = 0;
             consume_sequence_and_tag( read.sequence, this_n_consumed );
 
-#if (0) // Note: Used with callback - currently disabled.
-            n_consumed_LOCAL  = __sync_add_and_fetch( &n_consumed, this_n_consumed );
-#else
             __sync_add_and_fetch( &n_consumed, this_n_consumed );
-#endif
             __sync_add_and_fetch( &total_reads, 1 );
         }
-
-        // TODO: Figure out alternative to callback into Python VM
-        //       Cannot use in multi-threaded operation.
-#if (0)
-        // run callback, if specified
-        if (total_reads_TL % CALLBACK_PERIOD == 0 && callback) {
-            std::cout << "n tags: " << all_tags.size() << "\n";
-            try {
-                callback("consume_fasta_and_tag", callback_data, total_reads_TL,
-                         n_consumed);
-            } catch (...) {
-                delete parser;
-                throw;
-            }
-        }
-#endif // 0
-
     } // while reads left for parser
 
 }
@@ -523,9 +475,7 @@ consume_fasta_and_tag(
 
 void Hashtable::consume_fasta_and_tag_with_stoptags(const std::string &filename,
         unsigned int &total_reads,
-        unsigned long long &n_consumed,
-        CallbackFn callback,
-        void * callback_data)
+        unsigned long long &n_consumed)
 {
     total_reads = 0;
     n_consumed = 0;
@@ -614,17 +564,6 @@ void Hashtable::consume_fasta_and_tag_with_stoptags(const std::string &filename,
         // reset the sequence info, increment read number
         total_reads++;
 
-        // run callback, if specified
-        if (total_reads % CALLBACK_PERIOD == 0 && callback) {
-            std::cout << "n tags: " << all_tags.size() << "\n";
-            try {
-                callback("consume_fasta_and_tag", callback_data, total_reads,
-                         n_consumed);
-            } catch (...) {
-                delete parser;
-                throw;
-            }
-        }
     }
     delete parser;
 }
@@ -655,9 +594,7 @@ void Hashtable::divide_tags_into_subsets(unsigned int subset_size,
 
 void Hashtable::consume_partitioned_fasta(const std::string &filename,
         unsigned int &total_reads,
-        unsigned long long &n_consumed,
-        CallbackFn callback,
-        void * callback_data)
+        unsigned long long &n_consumed)
 {
     total_reads = 0;
     n_consumed = 0;
@@ -696,17 +633,6 @@ void Hashtable::consume_partitioned_fasta(const std::string &filename,
 
         // reset the sequence info, increment read number
         total_reads++;
-
-        // run callback, if specified
-        if (total_reads % CALLBACK_PERIOD == 0 && callback) {
-            try {
-                callback("consume_partitioned_fasta", callback_data, total_reads,
-                         n_consumed);
-            } catch (...) {
-                delete parser;
-                throw;
-            }
-        }
     }
 
     delete parser;
@@ -926,9 +852,7 @@ const
 }
 
 void Hashtable::filter_if_present(const std::string &infilename,
-                                  const std::string &outputfile,
-                                  CallbackFn callback,
-                                  void * callback_data)
+                                  const std::string &outputfile)
 {
     IParser* parser = IParser::get_parser(infilename);
     ofstream outfile(outputfile.c_str());
@@ -964,18 +888,6 @@ void Hashtable::filter_if_present(const std::string &infilename,
             }
 
             total_reads++;
-
-            // run callback, if specified
-            if (total_reads % CALLBACK_PERIOD == 0 && callback) {
-                try {
-                    callback("filter_if_present", callback_data,total_reads, reads_kept);
-                } catch (...) {
-                    delete parser;
-                    parser = NULL;
-                    outfile.close();
-                    throw;
-                }
-            }
         }
     }
 
@@ -1345,7 +1257,7 @@ void Hashtable::load_stop_tags(std::string infilename, bool clear_tags)
         } else {
             err = "Unknown error in opening file: " + infilename;
         }
-        throw khmer_file_exception(err.c_str());
+        throw khmer_file_exception(err);
     }
 
     if (clear_tags) {
@@ -1365,12 +1277,12 @@ void Hashtable::load_stop_tags(std::string infilename, bool clear_tags)
             err << "Incorrect file format version " << (int) version
                 << " while reading stoptags from " << infilename
                 << "; should be " << (int) SAVED_FORMAT_VERSION;
-            throw khmer_file_exception(err.str().c_str());
+            throw khmer_file_exception(err.str());
         } else if (!(ht_type == SAVED_STOPTAGS)) {
             std::ostringstream err;
             err << "Incorrect file format type " << (int) ht_type
                 << " while reading stoptags from " << infilename;
-            throw khmer_file_exception(err.str().c_str());
+            throw khmer_file_exception(err.str());
         }
 
         infile.read((char *) &save_ksize, sizeof(save_ksize));
@@ -1378,7 +1290,7 @@ void Hashtable::load_stop_tags(std::string infilename, bool clear_tags)
             std::ostringstream err;
             err << "Incorrect k-mer size " << save_ksize
                 << " while reading stoptags from " << infilename;
-            throw khmer_file_exception(err.str().c_str());
+            throw khmer_file_exception(err.str());
         }
         infile.read((char *) &tagset_size, sizeof(tagset_size));
 
@@ -1392,7 +1304,7 @@ void Hashtable::load_stop_tags(std::string infilename, bool clear_tags)
         delete[] buf;
     } catch (std::ifstream::failure &e) {
         std::string err = "Error reading stoptags from: " + infilename;
-        throw khmer_file_exception(err.c_str());
+        throw khmer_file_exception(err);
     }
 }
 
diff --git a/lib/hashtable.hh b/lib/hashtable.hh
index 859e182..dd0f521 100644
--- a/lib/hashtable.hh
+++ b/lib/hashtable.hh
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
@@ -239,6 +239,9 @@ protected:
     }
 
     uint32_t _all_tags_spin_lock;
+
+    NONCOPYABLE(Hashtable);
+
 public:
     SubsetPartition * partition;
     SeenSet all_tags;
@@ -278,18 +281,14 @@ public:
     void consume_fasta(
         std::string const   &filename,
         unsigned int	    &total_reads,
-        unsigned long long  &n_consumed,
-        CallbackFn	    callback	    = NULL,
-        void *		    callback_data   = NULL
+        unsigned long long  &n_consumed
     );
     // Count every k-mer from a stream of FASTA or FASTQ reads,
     // using the supplied parser.
     void consume_fasta(
         read_parsers:: IParser *	    parser,
         unsigned int	    &total_reads,
-        unsigned long long  &n_consumed,
-        CallbackFn	    callback	    = NULL,
-        void *		    callback_data   = NULL
+        unsigned long long  &n_consumed
     );
 
     void get_median_count(const std::string &s,
@@ -356,9 +355,7 @@ public:
     void consume_fasta_and_tag(
         std::string const	  &filename,
         unsigned int	  &total_reads,
-        unsigned long long  &n_consumed,
-        CallbackFn	  callback	  = NULL,
-        void *		  callback_data	  = NULL
+        unsigned long long  &n_consumed
     );
 
     // Count every k-mer from a stream of FASTA or FASTQ reads,
@@ -367,9 +364,7 @@ public:
     void consume_fasta_and_tag(
         read_parsers:: IParser *	    parser,
         unsigned int	    &total_reads,
-        unsigned long long  &n_consumed,
-        CallbackFn	    callback	    = NULL,
-        void *		    callback_data   = NULL
+        unsigned long long  &n_consumed
     );
 
     void consume_sequence_and_tag(const std::string& seq,
@@ -379,9 +374,7 @@ public:
 
     void consume_fasta_and_tag_with_stoptags(const std::string &filename,
             unsigned int &total_reads,
-            unsigned long long &n_consumed,
-            CallbackFn callback = 0,
-            void * callback_data = 0);
+            unsigned long long &n_consumed);
     void consume_fasta_and_traverse(const std::string &filename,
                                     unsigned int distance,
                                     unsigned int big_threshold,
@@ -390,17 +383,13 @@ public:
 
     void consume_partitioned_fasta(const std::string &filename,
                                    unsigned int &total_reads,
-                                   unsigned long long &n_consumed,
-                                   CallbackFn callback = 0,
-                                   void * callback_data = 0);
+                                   unsigned long long &n_consumed);
 
     virtual BoundedCounterType test_and_set_bits(const char * kmer) = 0;
     virtual BoundedCounterType test_and_set_bits(HashIntoType khash) = 0;
 
     void filter_if_present(const std::string &infilename,
-                           const std::string &outputfilename,
-                           CallbackFn callback=0,
-                           void * callback_data=0);
+                           const std::string &outputfilename);
 
     unsigned int count_kmers_within_radius(HashIntoType kmer_f,
                                            HashIntoType kmer_r,
diff --git a/lib/hllcounter.cc b/lib/hllcounter.cc
new file mode 100644
index 0000000..299e70e
--- /dev/null
+++ b/lib/hllcounter.cc
@@ -0,0 +1,461 @@
+//
+// This file is part of khmer, http://github.com/ged-lab/khmer/, and is
+// Copyright (C) Michigan State University, 2014-2015. It is licensed under
+// the three-clause BSD license; see LICENSE.
+// Contact: khmer-project at idyll.org
+//
+
+#include "hllcounter.hh"
+
+#include <math.h>
+#include <algorithm>
+#include <numeric>
+#include <inttypes.h>
+#include <sstream>
+
+#include "khmer.hh"
+#include "kmer_hash.hh"
+#include "read_parsers.hh"
+
+#ifdef _OPENMP
+#include <omp.h>
+#else
+#define omp_get_thread_num() 0
+#define omp_get_num_threads() 1
+#endif
+
+#define arr_len(a) (a + sizeof a / sizeof a[0])
+
+using namespace khmer;
+
+std::map<int, std::vector<double> > rawEstimateData;
+std::map<int, std::vector<double> > biasData;
+
+
+double calc_alpha(const int p)
+{
+    if (p < 4) {
+        // ceil(log2((1.04 / x) ^ 2)) = 4, solve for x
+        throw InvalidValue("Please set error rate to a value "
+                           "smaller than 0.367696");
+    } else if (p > 16) {
+        // ceil(log2((1.04 / x) ^ 2)) = 16, solve for x
+        throw InvalidValue("Please set error rate to a value "
+                           "greater than 0.0040624");
+    }
+
+    /*
+       For a description of following constants see
+       HyperLogLog in Practice: Algorithmic Engineering of a State of The Art
+          Cardinality Estimation Algorithm
+       Stefan Heule, Marc Nunkesser and Alex Hall
+       dx.doi.org/10.1145/2452376.2452456
+    */
+    switch (p) {
+    case 4:
+        return 0.673;
+    case 5:
+        return 0.697;
+    case 6:
+        return 0.709;
+    default:
+        return 0.7213 / (1.0 + 1.079 / (1 << p));
+    }
+}
+
+void init_raw_estimate_data()
+{
+    if (rawEstimateData.empty()) {
+        for(int i=4; i <= 18; i++) {
+            std::vector<double> v;
+            switch(i) {
+            case 4:
+                v.assign(RAW_ESTIMATE_DATA_4, arr_len(RAW_ESTIMATE_DATA_4));
+                break;
+            case 5:
+                v.assign(RAW_ESTIMATE_DATA_5, arr_len(RAW_ESTIMATE_DATA_5));
+                break;
+            case 6:
+                v.assign(RAW_ESTIMATE_DATA_6, arr_len(RAW_ESTIMATE_DATA_6));
+                break;
+            case 7:
+                v.assign(RAW_ESTIMATE_DATA_7, arr_len(RAW_ESTIMATE_DATA_7));
+                break;
+            case 8:
+                v.assign(RAW_ESTIMATE_DATA_8, arr_len(RAW_ESTIMATE_DATA_8));
+                break;
+            case 9:
+                v.assign(RAW_ESTIMATE_DATA_9, arr_len(RAW_ESTIMATE_DATA_9));
+                break;
+            case 10:
+                v.assign(RAW_ESTIMATE_DATA_10, arr_len(RAW_ESTIMATE_DATA_10));
+                break;
+            case 11:
+                v.assign(RAW_ESTIMATE_DATA_11, arr_len(RAW_ESTIMATE_DATA_11));
+                break;
+            case 12:
+                v.assign(RAW_ESTIMATE_DATA_12, arr_len(RAW_ESTIMATE_DATA_12));
+                break;
+            case 13:
+                v.assign(RAW_ESTIMATE_DATA_13, arr_len(RAW_ESTIMATE_DATA_13));
+                break;
+            case 14:
+                v.assign(RAW_ESTIMATE_DATA_14, arr_len(RAW_ESTIMATE_DATA_14));
+                break;
+            case 15:
+                v.assign(RAW_ESTIMATE_DATA_15, arr_len(RAW_ESTIMATE_DATA_15));
+                break;
+            case 16:
+                v.assign(RAW_ESTIMATE_DATA_16, arr_len(RAW_ESTIMATE_DATA_16));
+                break;
+            case 17:
+                v.assign(RAW_ESTIMATE_DATA_17, arr_len(RAW_ESTIMATE_DATA_17));
+                break;
+            case 18:
+                v.assign(RAW_ESTIMATE_DATA_18, arr_len(RAW_ESTIMATE_DATA_18));
+                break;
+            }
+            rawEstimateData[i] = v;
+        }
+    }
+}
+
+void init_bias_data()
+{
+    if (biasData.empty()) {
+        for(int i=4; i <= 18; i++) {
+            std::vector<double> v;
+            switch(i) {
+            case 4:
+                v.assign(RAW_BIAS_DATA_4, arr_len(RAW_BIAS_DATA_4));
+                break;
+            case 5:
+                v.assign(RAW_BIAS_DATA_5, arr_len(RAW_BIAS_DATA_5));
+                break;
+            case 6:
+                v.assign(RAW_BIAS_DATA_6, arr_len(RAW_BIAS_DATA_6));
+                break;
+            case 7:
+                v.assign(RAW_BIAS_DATA_7, arr_len(RAW_BIAS_DATA_7));
+                break;
+            case 8:
+                v.assign(RAW_BIAS_DATA_8, arr_len(RAW_BIAS_DATA_8));
+                break;
+            case 9:
+                v.assign(RAW_BIAS_DATA_9, arr_len(RAW_BIAS_DATA_9));
+                break;
+            case 10:
+                v.assign(RAW_BIAS_DATA_10, arr_len(RAW_BIAS_DATA_10));
+                break;
+            case 11:
+                v.assign(RAW_BIAS_DATA_11, arr_len(RAW_BIAS_DATA_11));
+                break;
+            case 12:
+                v.assign(RAW_BIAS_DATA_12, arr_len(RAW_BIAS_DATA_12));
+                break;
+            case 13:
+                v.assign(RAW_BIAS_DATA_13, arr_len(RAW_BIAS_DATA_13));
+                break;
+            case 14:
+                v.assign(RAW_BIAS_DATA_14, arr_len(RAW_BIAS_DATA_14));
+                break;
+            case 15:
+                v.assign(RAW_BIAS_DATA_15, arr_len(RAW_BIAS_DATA_15));
+                break;
+            case 16:
+                v.assign(RAW_BIAS_DATA_16, arr_len(RAW_BIAS_DATA_16));
+                break;
+            case 17:
+                v.assign(RAW_BIAS_DATA_17, arr_len(RAW_BIAS_DATA_17));
+                break;
+            case 18:
+                v.assign(RAW_BIAS_DATA_18, arr_len(RAW_BIAS_DATA_18));
+                break;
+            }
+            biasData[i] = v;
+        }
+    }
+}
+
+double get_threshold(int p)
+{
+    return THRESHOLD_DATA[p - 4];
+}
+
+std::vector<int> get_nearest_neighbors(double E, std::vector<double> estimate)
+{
+    std::vector< std::pair<double,int> > distance_map;
+    std::vector<int> nearest;
+
+    int i = 0;
+    for (std::vector<double>::iterator it = estimate.begin();
+            it != estimate.end();
+            ++it) {
+        std::pair<double, int> p(pow(E - *it, 2.0), i);
+        distance_map.push_back(p);
+        i++;
+    }
+
+    sort(distance_map.begin(), distance_map.end());
+
+    for(int k=0; k < 6; k++) {
+        nearest.push_back(distance_map[k].second);
+    }
+
+    return nearest;
+}
+
+double estimate_bias(double E, int p)
+{
+    std::vector<double> bias = biasData[p];
+    std::vector<double> raw_estimate = rawEstimateData[p];
+
+    std::vector<int> nearest = get_nearest_neighbors(E, raw_estimate);
+    double estimate = 0.0;
+
+    for (std::vector<int>::iterator it = nearest.begin();
+            it != nearest.end();
+            ++it) {
+        estimate += bias[*it];
+    }
+    return estimate / nearest.size();
+}
+
+double ep_sum(double acc, int b)
+{
+    return acc += pow(2.0, float(-b));
+}
+
+int get_rho(HashIntoType w, int max_width)
+{
+    return max_width - floor(log2(w));
+}
+
+HLLCounter::HLLCounter(double error_rate, WordLength ksize)
+{
+    if (error_rate < 0) {
+        throw InvalidValue("Please set error rate to a value "
+                           "greater than zero");
+    }
+    int p = ceil(log2(pow(1.04 / error_rate, 2)));
+    this->init(p, ksize);
+}
+
+HLLCounter::HLLCounter(int p, WordLength ksize)
+{
+    this->init(p, ksize);
+}
+
+void HLLCounter::init(int p, WordLength ksize)
+{
+    this->alpha = calc_alpha(p);
+    this->p = p;
+    this->_ksize = ksize;
+    this->m = 1 << p;
+    std::vector<int> M(this->m, 0.0);
+    this->M = M;
+
+    init_raw_estimate_data();
+    init_bias_data();
+}
+
+double HLLCounter::get_erate()
+{
+    return 1.04 / sqrt(this->m);
+}
+
+void HLLCounter::set_erate(double error_rate)
+{
+    if (count(this->M.begin(), this->M.end(), 0) != this->m) {
+        throw ReadOnlyAttribute("You can only change error rate prior to "
+                                "first counting");
+    }
+
+    if (error_rate < 0) {
+        throw InvalidValue("Please set error rate to a value "
+                           "greater than zero");
+    }
+    int p = ceil(log2(pow(1.04 / error_rate, 2)));
+    this->init(p, this->_ksize);
+}
+
+void HLLCounter::set_ksize(WordLength new_ksize)
+{
+    if (count(this->M.begin(), this->M.end(), 0) != this->m) {
+        throw ReadOnlyAttribute("You can only change k-mer size prior to "
+                                "first counting");
+    }
+
+    this->init(this->p, new_ksize);
+}
+
+double HLLCounter::_Ep()
+{
+    double sum = accumulate(this->M.begin(), this->M.end(), 0.0, ep_sum);
+    double E = this->alpha * pow(this->m, 2.0) / sum;
+
+    if (E <= (5 * (double)this->m)) {
+        return E - estimate_bias(E, this->p);
+    }
+
+    return E;
+}
+
+HashIntoType HLLCounter::estimate_cardinality()
+{
+    long V = count(this->M.begin(), this->M.end(), 0);
+
+    if (V > 0) {
+        double H = this->m * log((double)this->m / V);
+        if (H <= get_threshold(this->p)) {
+            return H;
+        }
+    }
+    return this->_Ep();
+}
+
+void HLLCounter::add(const std::string &value)
+{
+    HashIntoType x = khmer::_hash_murmur(value);
+    HashIntoType j = x & (this->m - 1);
+    this->M[j] = std::max(this->M[j], get_rho(x >> this->p, 64 - this->p));
+}
+
+unsigned int HLLCounter::consume_string(const std::string &s)
+{
+    unsigned int n_consumed = 0;
+    std::string kmer = "";
+
+    for(std::string::const_iterator it = s.begin(); it != s.end(); ++it) {
+        kmer.push_back(*it);
+        if (kmer.size() < _ksize) {
+            continue;
+        }
+        this->add(kmer);
+
+        kmer.erase(0, 1);
+        n_consumed++;
+    }
+    return n_consumed;
+}
+
+void HLLCounter::consume_fasta(
+    std::string const &filename,
+    unsigned int &total_reads,
+    unsigned long long &n_consumed)
+{
+    read_parsers::IParser * parser = read_parsers::IParser::get_parser(filename);
+
+    consume_fasta(parser, total_reads, n_consumed);
+
+    delete parser;
+}
+
+void HLLCounter::consume_fasta(
+    read_parsers::IParser *parser,
+    unsigned int &      total_reads,
+    unsigned long long &    n_consumed)
+{
+
+    read_parsers::Read read;
+    HLLCounter** counters;
+    unsigned int *n_consumed_partial;
+    unsigned int *total_reads_partial;
+
+    n_consumed = 0;
+
+    #pragma omp parallel
+    {
+        #pragma omp single
+        {
+            counters = (HLLCounter**)calloc(omp_get_num_threads(),
+            sizeof(HLLCounter*));
+            n_consumed_partial = (unsigned int*)calloc(omp_get_num_threads(),
+            sizeof(unsigned int));
+            total_reads_partial = (unsigned int*)calloc(omp_get_num_threads(),
+            sizeof(unsigned int));
+
+            for (int i=0; i < omp_get_num_threads(); i++)
+            {
+                HLLCounter *newc = new HLLCounter(this->p, this->_ksize);
+                counters[i] = newc;
+            }
+
+            while (!parser->is_complete())
+            {
+                // Iterate through the reads and consume their k-mers.
+                try {
+                    read = parser->get_next_read();
+
+                    #pragma omp task default(none) firstprivate(read) \
+                    shared(counters, n_consumed_partial, total_reads_partial)
+                    {
+                        bool is_valid;
+                        int n, t = omp_get_thread_num();
+                        n = counters[t]->check_and_process_read(read.sequence,
+                        is_valid);
+                        n_consumed_partial[t] += n;
+                        if (is_valid) {
+                            total_reads_partial[t] += 1;
+                        }
+                    }
+                } catch (read_parsers::NoMoreReadsAvailable) {
+                }
+
+            } // while reads left for parser
+        }
+        #pragma omp taskwait
+
+        #pragma omp single
+        {
+            for (int i=0; i < omp_get_num_threads(); ++i)
+            {
+                this->merge(*counters[i]);
+                delete counters[i];
+                n_consumed += n_consumed_partial[i];
+                total_reads += total_reads_partial[i];;
+            }
+            free(counters);
+            free(n_consumed_partial);
+            free(total_reads_partial);
+        }
+    }
+}
+
+unsigned int HLLCounter::check_and_process_read(std::string &read,
+        bool &is_valid)
+{
+    is_valid = check_and_normalize_read(read);
+
+    if (!is_valid) {
+        return 0;
+    }
+
+    return consume_string(read);
+}
+
+bool HLLCounter::check_and_normalize_read(std::string &read) const
+{
+    bool is_valid = true;
+
+    if (read.length() < this->_ksize) {
+        return false;
+    }
+
+    for (unsigned int i = 0; i < read.length(); i++) {
+        read[ i ] &= 0xdf; // toupper - knock out the "lowercase bit"
+        if (!is_valid_dna( read[ i ] )) {
+            is_valid = false;
+            break;
+        }
+    }
+
+    return is_valid;
+}
+
+void HLLCounter::merge(HLLCounter &other)
+{
+    for(unsigned int i=0; i < this->M.size(); ++i) {
+        this->M[i] = std::max(other.M[i], this->M[i]);
+    }
+}
diff --git a/lib/hllcounter.hh b/lib/hllcounter.hh
new file mode 100644
index 0000000..f314107
--- /dev/null
+++ b/lib/hllcounter.hh
@@ -0,0 +1,221 @@
+//
+// This file is part of khmer, http://github.com/ged-lab/khmer/, and is
+// Copyright (C) Michigan State University, 2014-2015. It is licensed under
+// the three-clause BSD license; see LICENSE.
+// Contact: khmer-project at idyll.org
+//
+
+#ifndef HLLCOUNTER_HH
+#define HLLCOUNTER_HH
+
+#include <vector>
+#include <string>
+
+#include "read_parsers.hh"
+
+
+namespace khmer
+{
+
+class HLLCounter
+{
+public:
+    HLLCounter(double error_rate, WordLength ksize);
+    HLLCounter(int p, WordLength ksize);
+
+    void add(const std::string &);
+    unsigned int consume_string(const std::string &);
+    void consume_fasta(std::string const &,
+                       unsigned int &,
+                       unsigned long long &);
+    void consume_fasta(read_parsers::IParser *,
+                       unsigned int &,
+                       unsigned long long &);
+    unsigned int check_and_process_read(std::string &,
+                                        bool &);
+    bool check_and_normalize_read(std::string &) const;
+    HashIntoType estimate_cardinality();
+    void merge(HLLCounter &);
+    virtual ~HLLCounter() {}
+
+    double get_alpha()
+    {
+        return alpha;
+    }
+    int get_p()
+    {
+        return p;
+    }
+    int get_m()
+    {
+        return m;
+    }
+    void set_ksize(WordLength new_ksize);
+    int get_ksize()
+    {
+        return _ksize;
+    }
+    std::vector<int> get_M()
+    {
+        return M;
+    }
+    double get_erate();
+    void set_erate(double new_erate);
+private:
+    double _Ep();
+    double alpha;
+    int p;
+    int m;
+    WordLength _ksize;
+    std::vector<int> M;
+
+    void init(int p, WordLength ksize);
+};
+
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+   For a description of following constants see
+   HyperLogLog in Practice: Algorithmic Engineering of a State of The Art
+      Cardinality Estimation Algorithm
+   jtefan Heule, Marc Nunkesser and Alex Hall
+   dx.doi.org/10.1145/2452376.2452456
+*/
+const int THRESHOLD_DATA[] = {
+    10, 20, 40, 80, 220, 400, 900, 1800, 3100,
+    6500, 11500, 20000, 50000, 120000, 350000
+};
+
+
+const double RAW_ESTIMATE_DATA_4[] = {
+    11, 11.717, 12.207, 12.7896, 13.2882, 13.8204, 14.3772, 14.9342, 15.5202, 16.161, 16.7722, 17.4636, 18.0396, 18.6766, 19.3566, 20.0454, 20.7936, 21.4856, 22.2666, 22.9946, 23.766, 24.4692, 25.3638, 26.0764, 26.7864, 27.7602, 28.4814, 29.433, 30.2926, 31.0664, 31.9996, 32.7956, 33.5366, 34.5894, 35.5738, 36.2698, 37.3682, 38.0544, 39.2342, 40.0108, 40.7966, 41.9298, 42.8704, 43.6358, 44.5194, 45.773, 46.6772, 47.6174, 48.4888, 49.3304, 50.2506, 51.4996, 52.3824, 53.3078, 54.3984, 55.5 [...]
+};
+
+const double RAW_ESTIMATE_DATA_5[] = {
+    23, 23.1194, 23.8208, 24.2318, 24.77, 25.2436, 25.7774, 26.2848, 26.8224, 27.3742, 27.9336, 28.503, 29.0494, 29.6292, 30.2124, 30.798, 31.367, 31.9728, 32.5944, 33.217, 33.8438, 34.3696, 35.0956, 35.7044, 36.324, 37.0668, 37.6698, 38.3644, 39.049, 39.6918, 40.4146, 41.082, 41.687, 42.5398, 43.2462, 43.857, 44.6606, 45.4168, 46.1248, 46.9222, 47.6804, 48.447, 49.3454, 49.9594, 50.7636, 51.5776, 52.331, 53.19, 53.9676, 54.7564, 55.5314, 56.4442, 57.3708, 57.9774, 58.9624, 59.8796, 60.7 [...]
+};
+
+const double RAW_ESTIMATE_DATA_6[] = {
+    46, 46.1902, 47.271, 47.8358, 48.8142, 49.2854, 50.317, 51.354, 51.8924, 52.9436, 53.4596, 54.5262, 55.6248, 56.1574, 57.2822, 57.837, 58.9636, 60.074, 60.7042, 61.7976, 62.4772, 63.6564, 64.7942, 65.5004, 66.686, 67.291, 68.5672, 69.8556, 70.4982, 71.8204, 72.4252, 73.7744, 75.0786, 75.8344, 77.0294, 77.8098, 79.0794, 80.5732, 81.1878, 82.5648, 83.2902, 84.6784, 85.3352, 86.8946, 88.3712, 89.0852, 90.499, 91.2686, 92.6844, 94.2234, 94.9732, 96.3356, 97.2286, 98.7262, 100.3284, 101.1 [...]
+};
+
+const double RAW_ESTIMATE_DATA_7[] = {
+    92, 93.4934, 94.9758, 96.4574, 97.9718, 99.4954, 101.5302, 103.0756, 104.6374, 106.1782, 107.7888, 109.9522, 111.592, 113.2532, 114.9086, 116.5938, 118.9474, 120.6796, 122.4394, 124.2176, 125.9768, 128.4214, 130.2528, 132.0102, 133.8658, 135.7278, 138.3044, 140.1316, 142.093, 144.0032, 145.9092, 148.6306, 150.5294, 152.5756, 154.6508, 156.662, 159.552, 161.3724, 163.617, 165.5754, 167.7872, 169.8444, 172.7988, 174.8606, 177.2118, 179.3566, 181.4476, 184.5882, 186.6816, 189.0824, 191. [...]
+};
+
+const double RAW_ESTIMATE_DATA_8[] = {
+    184.2152, 187.2454, 190.2096, 193.6652, 196.6312, 199.6822, 203.249, 206.3296, 210.0038, 213.2074, 216.4612, 220.27, 223.5178, 227.4412, 230.8032, 234.1634, 238.1688, 241.6074, 245.6946, 249.2664, 252.8228, 257.0432, 260.6824, 264.9464, 268.6268, 272.2626, 276.8376, 280.4034, 284.8956, 288.8522, 292.7638, 297.3552, 301.3556, 305.7526, 309.9292, 313.8954, 318.8198, 322.7668, 327.298, 331.6688, 335.9466, 340.9746, 345.1672, 349.3474, 354.3028, 358.8912, 364.114, 368.4646, 372.9744, 378 [...]
+};
+
+const double RAW_ESTIMATE_DATA_9[] = {
+    369, 374.8294, 381.2452, 387.6698, 394.1464, 400.2024, 406.8782, 413.6598, 420.462, 427.2826, 433.7102, 440.7416, 447.9366, 455.1046, 462.285, 469.0668, 476.306, 483.8448, 491.301, 498.9886, 506.2422, 513.8138, 521.7074, 529.7428, 537.8402, 545.1664, 553.3534, 561.594, 569.6886, 577.7876, 585.65, 594.228, 602.8036, 611.1666, 620.0818, 628.0824, 637.2574, 646.302, 655.1644, 664.0056, 672.3802, 681.7192, 690.5234, 700.2084, 708.831, 718.485, 728.1112, 737.4764, 746.76, 756.3368, 766.55 [...]
+};
+
+const double RAW_ESTIMATE_DATA_10[] = {
+    738.1256, 750.4234, 763.1064, 775.4732, 788.4636, 801.0644, 814.488, 827.9654, 841.0832, 854.7864, 868.1992, 882.2176, 896.5228, 910.1716, 924.7752, 938.899, 953.6126, 968.6492, 982.9474, 998.5214, 1013.1064, 1028.6364, 1044.2468, 1059.4588, 1075.3832, 1091.0584, 1106.8606, 1123.3868, 1139.5062, 1156.1862, 1172.463, 1189.339, 1206.1936, 1223.1292, 1240.1854, 1257.2908, 1275.3324, 1292.8518, 1310.5204, 1328.4854, 1345.9318, 1364.552, 1381.4658, 1400.4256, 1419.849, 1438.152, 1456.8956 [...]
+};
+
+const double RAW_ESTIMATE_DATA_11[] = {
+    1477, 1501.6014, 1526.5802, 1551.7942, 1577.3042, 1603.2062, 1629.8402, 1656.2292, 1682.9462, 1709.9926, 1737.3026, 1765.4252, 1793.0578, 1821.6092, 1849.626, 1878.5568, 1908.527, 1937.5154, 1967.1874, 1997.3878, 2027.37, 2058.1972, 2089.5728, 2120.1012, 2151.9668, 2183.292, 2216.0772, 2247.8578, 2280.6562, 2313.041, 2345.714, 2380.3112, 2414.1806, 2447.9854, 2481.656, 2516.346, 2551.5154, 2586.8378, 2621.7448, 2656.6722, 2693.5722, 2729.1462, 2765.4124, 2802.8728, 2838.898, 2876.408 [...]
+};
+
+const double RAW_ESTIMATE_DATA_12[] = {
+    2954, 3003.4782, 3053.3568, 3104.3666, 3155.324, 3206.9598, 3259.648, 3312.539, 3366.1474, 3420.2576, 3474.8376, 3530.6076, 3586.451, 3643.38, 3700.4104, 3757.5638, 3815.9676, 3875.193, 3934.838, 3994.8548, 4055.018, 4117.1742, 4178.4482, 4241.1294, 4304.4776, 4367.4044, 4431.8724, 4496.3732, 4561.4304, 4627.5326, 4693.949, 4761.5532, 4828.7256, 4897.6182, 4965.5186, 5034.4528, 5104.865, 5174.7164, 5244.6828, 5316.6708, 5387.8312, 5459.9036, 5532.476, 5604.8652, 5679.6718, 5753.757,  [...]
+};
+
+const double RAW_ESTIMATE_DATA_13[] = {
+    5908.5052, 6007.2672, 6107.347, 6208.5794, 6311.2622, 6414.5514, 6519.3376, 6625.6952, 6732.5988, 6841.3552, 6950.5972, 7061.3082, 7173.5646, 7287.109, 7401.8216, 7516.4344, 7633.3802, 7751.2962, 7870.3784, 7990.292, 8110.79, 8233.4574, 8356.6036, 8482.2712, 8607.7708, 8735.099, 8863.1858, 8993.4746, 9123.8496, 9255.6794, 9388.5448, 9522.7516, 9657.3106, 9792.6094, 9930.5642, 10068.794, 10206.7256, 10347.81, 10490.3196, 10632.0778, 10775.9916, 10920.4662, 11066.124, 11213.073, 11358. [...]
+};
+
+const double RAW_ESTIMATE_DATA_14[] = {
+    11817.475, 12015.0046, 12215.3792, 12417.7504, 12623.1814, 12830.0086, 13040.0072, 13252.503, 13466.178, 13683.2738, 13902.0344, 14123.9798, 14347.394, 14573.7784, 14802.6894, 15033.6824, 15266.9134, 15502.8624, 15741.4944, 15980.7956, 16223.8916, 16468.6316, 16715.733, 16965.5726, 17217.204, 17470.666, 17727.8516, 17986.7886, 18247.6902, 18510.9632, 18775.304, 19044.7486, 19314.4408, 19587.202, 19862.2576, 20135.924, 20417.0324, 20697.9788, 20979.6112, 21265.0274, 21550.723, 21841.6 [...]
+};
+
+const double RAW_ESTIMATE_DATA_15[] = {
+    23635.0036, 24030.8034, 24431.4744, 24837.1524, 25246.7928, 25661.326, 26081.3532, 26505.2806, 26933.9892, 27367.7098, 27805.318, 28248.799, 28696.4382, 29148.8244, 29605.5138, 30066.8668, 30534.2344, 31006.32, 31480.778, 31962.2418, 32447.3324, 32938.0232, 33432.731, 33930.728, 34433.9896, 34944.1402, 35457.5588, 35974.5958, 36497.3296, 37021.9096, 37554.326, 38088.0826, 38628.8816, 39171.3192, 39723.2326, 40274.5554, 40832.3142, 41390.613, 41959.5908, 42532.5466, 43102.0344, 43683. [...]
+};
+
+const double RAW_ESTIMATE_DATA_16[] = {
+    47271, 48062.3584, 48862.7074, 49673.152, 50492.8416, 51322.9514, 52161.03, 53009.407, 53867.6348, 54734.206, 55610.5144, 56496.2096, 57390.795, 58297.268, 59210.6448, 60134.665, 61068.0248, 62010.4472, 62962.5204, 63923.5742, 64895.0194, 65876.4182, 66862.6136, 67862.6968, 68868.8908, 69882.8544, 70911.271, 71944.0924, 72990.0326, 74040.692, 75100.6336, 76174.7826, 77252.5998, 78340.2974, 79438.2572, 80545.4976, 81657.2796, 82784.6336, 83915.515, 85059.7362, 86205.9368, 87364.4424,  [...]
+};
+
+const double RAW_ESTIMATE_DATA_17[] = {
+    94542, 96125.811, 97728.019, 99348.558, 100987.9705, 102646.7565, 104324.5125, 106021.7435, 107736.7865, 109469.272, 111223.9465, 112995.219, 114787.432, 116593.152, 118422.71, 120267.2345, 122134.6765, 124020.937, 125927.2705, 127851.255, 129788.9485, 131751.016, 133726.8225, 135722.592, 137736.789, 139770.568, 141821.518, 143891.343, 145982.1415, 148095.387, 150207.526, 152355.649, 154515.6415, 156696.05, 158887.7575, 161098.159, 163329.852, 165569.053, 167837.4005, 170121.6165, 17 [...]
+};
+
+const double RAW_ESTIMATE_DATA_18[] = {
+    189084, 192250.913, 195456.774, 198696.946, 201977.762, 205294.444, 208651.754, 212042.099, 215472.269, 218941.91, 222443.912, 225996.845, 229568.199, 233193.568, 236844.457, 240543.233, 244279.475, 248044.27, 251854.588, 255693.2, 259583.619, 263494.621, 267445.385, 271454.061, 275468.769, 279549.456, 283646.446, 287788.198, 291966.099, 296181.164, 300431.469, 304718.618, 309024.004, 313393.508, 317760.803, 322209.731, 326675.061, 331160.627, 335654.47, 340241.442, 344841.833, 34946 [...]
+};
+
+
+const double RAW_BIAS_DATA_4[] = {
+    10, 9.717, 9.207, 8.7896, 8.2882, 7.8204, 7.3772, 6.9342, 6.5202, 6.161, 5.7722, 5.4636, 5.0396, 4.6766, 4.3566, 4.0454, 3.7936, 3.4856, 3.2666, 2.9946, 2.766, 2.4692, 2.3638, 2.0764, 1.7864, 1.7602, 1.4814, 1.433, 1.2926, 1.0664, 0.999600000000001, 0.7956, 0.5366, 0.589399999999998, 0.573799999999999, 0.269799999999996, 0.368200000000002, 0.0544000000000011, 0.234200000000001, 0.0108000000000033, -0.203400000000002, -0.0701999999999998, -0.129600000000003, -0.364199999999997, -0.480 [...]
+};
+
+const double RAW_BIAS_DATA_5[] = {
+    22, 21.1194, 20.8208, 20.2318, 19.77, 19.2436, 18.7774, 18.2848, 17.8224, 17.3742, 16.9336, 16.503, 16.0494, 15.6292, 15.2124, 14.798, 14.367, 13.9728, 13.5944, 13.217, 12.8438, 12.3696, 12.0956, 11.7044, 11.324, 11.0668, 10.6698, 10.3644, 10.049, 9.6918, 9.4146, 9.082, 8.687, 8.5398, 8.2462, 7.857, 7.6606, 7.4168, 7.1248, 6.9222, 6.6804, 6.447, 6.3454, 5.9594, 5.7636, 5.5776, 5.331, 5.19, 4.9676, 4.7564, 4.5314, 4.4442, 4.3708, 3.9774, 3.9624, 3.8796, 3.755, 3.472, 3.2076, 3.1024, 2 [...]
+};
+
+const double RAW_BIAS_DATA_6[] = {
+    45, 44.1902, 43.271, 42.8358, 41.8142, 41.2854, 40.317, 39.354, 38.8924, 37.9436, 37.4596, 36.5262, 35.6248, 35.1574, 34.2822, 33.837, 32.9636, 32.074, 31.7042, 30.7976, 30.4772, 29.6564, 28.7942, 28.5004, 27.686, 27.291, 26.5672, 25.8556, 25.4982, 24.8204, 24.4252, 23.7744, 23.0786, 22.8344, 22.0294, 21.8098, 21.0794, 20.5732, 20.1878, 19.5648, 19.2902, 18.6784, 18.3352, 17.8946, 17.3712, 17.0852, 16.499, 16.2686, 15.6844, 15.2234, 14.9732, 14.3356, 14.2286, 13.7262, 13.3284, 13.104 [...]
+};
+
+const double RAW_BIAS_DATA_7[] = {
+    91, 89.4934, 87.9758, 86.4574, 84.9718, 83.4954, 81.5302, 80.0756, 78.6374, 77.1782, 75.7888, 73.9522, 72.592, 71.2532, 69.9086, 68.5938, 66.9474, 65.6796, 64.4394, 63.2176, 61.9768, 60.4214, 59.2528, 58.0102, 56.8658, 55.7278, 54.3044, 53.1316, 52.093, 51.0032, 49.9092, 48.6306, 47.5294, 46.5756, 45.6508, 44.662, 43.552, 42.3724, 41.617, 40.5754, 39.7872, 38.8444, 37.7988, 36.8606, 36.2118, 35.3566, 34.4476, 33.5882, 32.6816, 32.0824, 31.0258, 30.6048, 29.4436, 28.7274, 27.957, 27.1 [...]
+};
+
+const double RAW_BIAS_DATA_8[] = {
+    183.2152, 180.2454, 177.2096, 173.6652, 170.6312, 167.6822, 164.249, 161.3296, 158.0038, 155.2074, 152.4612, 149.27, 146.5178, 143.4412, 140.8032, 138.1634, 135.1688, 132.6074, 129.6946, 127.2664, 124.8228, 122.0432, 119.6824, 116.9464, 114.6268, 112.2626, 109.8376, 107.4034, 104.8956, 102.8522, 100.7638, 98.3552, 96.3556, 93.7526, 91.9292, 89.8954, 87.8198, 85.7668, 83.298, 81.6688, 79.9466, 77.9746, 76.1672, 74.3474, 72.3028, 70.8912, 69.114, 67.4646, 65.9744, 64.4092, 62.6022, 60. [...]
+};
+
+const double RAW_BIAS_DATA_9[] = {
+    368, 361.8294, 355.2452, 348.6698, 342.1464, 336.2024, 329.8782, 323.6598, 317.462, 311.2826, 305.7102, 299.7416, 293.9366, 288.1046, 282.285, 277.0668, 271.306, 265.8448, 260.301, 254.9886, 250.2422, 244.8138, 239.7074, 234.7428, 229.8402, 225.1664, 220.3534, 215.594, 210.6886, 205.7876, 201.65, 197.228, 192.8036, 188.1666, 184.0818, 180.0824, 176.2574, 172.302, 168.1644, 164.0056, 160.3802, 156.7192, 152.5234, 149.2084, 145.831, 142.485, 139.1112, 135.4764, 131.76, 129.3368, 126.55 [...]
+};
+
+const double RAW_BIAS_DATA_10[] = {
+    737.1256, 724.4234, 711.1064, 698.4732, 685.4636, 673.0644, 660.488, 647.9654, 636.0832, 623.7864, 612.1992, 600.2176, 588.5228, 577.1716, 565.7752, 554.899, 543.6126, 532.6492, 521.9474, 511.5214, 501.1064, 490.6364, 480.2468, 470.4588, 460.3832, 451.0584, 440.8606, 431.3868, 422.5062, 413.1862, 404.463, 395.339, 386.1936, 378.1292, 369.1854, 361.2908, 353.3324, 344.8518, 337.5204, 329.4854, 321.9318, 314.552, 306.4658, 299.4256, 292.849, 286.152, 278.8956, 271.8792, 265.118, 258.62 [...]
+};
+
+const double RAW_BIAS_DATA_11[] = {
+    1476, 1449.6014, 1423.5802, 1397.7942, 1372.3042, 1347.2062, 1321.8402, 1297.2292, 1272.9462, 1248.9926, 1225.3026, 1201.4252, 1178.0578, 1155.6092, 1132.626, 1110.5568, 1088.527, 1066.5154, 1045.1874, 1024.3878, 1003.37, 982.1972, 962.5728, 942.1012, 922.9668, 903.292, 884.0772, 864.8578, 846.6562, 828.041, 809.714, 792.3112, 775.1806, 757.9854, 740.656, 724.346, 707.5154, 691.8378, 675.7448, 659.6722, 645.5722, 630.1462, 614.4124, 600.8728, 585.898, 572.408, 558.4926, 544.4938, 531 [...]
+};
+
+const double RAW_BIAS_DATA_12[] = {
+    2953, 2900.4782, 2848.3568, 2796.3666, 2745.324, 2694.9598, 2644.648, 2595.539, 2546.1474, 2498.2576, 2450.8376, 2403.6076, 2357.451, 2311.38, 2266.4104, 2221.5638, 2176.9676, 2134.193, 2090.838, 2048.8548, 2007.018, 1966.1742, 1925.4482, 1885.1294, 1846.4776, 1807.4044, 1768.8724, 1731.3732, 1693.4304, 1657.5326, 1621.949, 1586.5532, 1551.7256, 1517.6182, 1483.5186, 1450.4528, 1417.865, 1385.7164, 1352.6828, 1322.6708, 1291.8312, 1260.9036, 1231.476, 1201.8652, 1173.6718, 1145.757,  [...]
+};
+
+const double RAW_BIAS_DATA_13[] = {
+    5907.5052, 5802.2672, 5697.347, 5593.5794, 5491.2622, 5390.5514, 5290.3376, 5191.6952, 5093.5988, 4997.3552, 4902.5972, 4808.3082, 4715.5646, 4624.109, 4533.8216, 4444.4344, 4356.3802, 4269.2962, 4183.3784, 4098.292, 4014.79, 3932.4574, 3850.6036, 3771.2712, 3691.7708, 3615.099, 3538.1858, 3463.4746, 3388.8496, 3315.6794, 3244.5448, 3173.7516, 3103.3106, 3033.6094, 2966.5642, 2900.794, 2833.7256, 2769.81, 2707.3196, 2644.0778, 2583.9916, 2523.4662, 2464.124, 2406.073, 2347.0362, 2292 [...]
+};
+
+const double RAW_BIAS_DATA_14[] = {
+    11816.475, 11605.0046, 11395.3792, 11188.7504, 10984.1814, 10782.0086, 10582.0072, 10384.503, 10189.178, 9996.2738, 9806.0344, 9617.9798, 9431.394, 9248.7784, 9067.6894, 8889.6824, 8712.9134, 8538.8624, 8368.4944, 8197.7956, 8031.8916, 7866.6316, 7703.733, 7544.5726, 7386.204, 7230.666, 7077.8516, 6926.7886, 6778.6902, 6631.9632, 6487.304, 6346.7486, 6206.4408, 6070.202, 5935.2576, 5799.924, 5671.0324, 5541.9788, 5414.6112, 5290.0274, 5166.723, 5047.6906, 4929.162, 4815.1406, 4699.12 [...]
+};
+
+const double RAW_BIAS_DATA_15[] = {
+    23634.0036, 23210.8034, 22792.4744, 22379.1524, 21969.7928, 21565.326, 21165.3532, 20770.2806, 20379.9892, 19994.7098, 19613.318, 19236.799, 18865.4382, 18498.8244, 18136.5138, 17778.8668, 17426.2344, 17079.32, 16734.778, 16397.2418, 16063.3324, 15734.0232, 15409.731, 15088.728, 14772.9896, 14464.1402, 14157.5588, 13855.5958, 13559.3296, 13264.9096, 12978.326, 12692.0826, 12413.8816, 12137.3192, 11870.2326, 11602.5554, 11340.3142, 11079.613, 10829.5908, 10583.5466, 10334.0344, 10095. [...]
+};
+
+const double RAW_BIAS_DATA_16[] = {
+    47270, 46423.3584, 45585.7074, 44757.152, 43938.8416, 43130.9514, 42330.03, 41540.407, 40759.6348, 39988.206, 39226.5144, 38473.2096, 37729.795, 36997.268, 36272.6448, 35558.665, 34853.0248, 34157.4472, 33470.5204, 32793.5742, 32127.0194, 31469.4182, 30817.6136, 30178.6968, 29546.8908, 28922.8544, 28312.271, 27707.0924, 27114.0326, 26526.692, 25948.6336, 25383.7826, 24823.5998, 24272.2974, 23732.2572, 23201.4976, 22674.2796, 22163.6336, 21656.515, 21161.7362, 20669.9368, 20189.4424,  [...]
+};
+
+const double RAW_BIAS_DATA_17[] = {
+    94541, 92848.811, 91174.019, 89517.558, 87879.9705, 86262.7565, 84663.5125, 83083.7435, 81521.7865, 79977.272, 78455.9465, 76950.219, 75465.432, 73994.152, 72546.71, 71115.2345, 69705.6765, 68314.937, 66944.2705, 65591.255, 64252.9485, 62938.016, 61636.8225, 60355.592, 59092.789, 57850.568, 56624.518, 55417.343, 54231.1415, 53067.387, 51903.526, 50774.649, 49657.6415, 48561.05, 47475.7575, 46410.159, 45364.852, 44327.053, 43318.4005, 42325.6165, 41348.4595, 40383.6265, 39436.77, 3850 [...]
+};
+
+const double RAW_BIAS_DATA_18[] = {
+    189083, 185696.913, 182348.774, 179035.946, 175762.762, 172526.444, 169329.754, 166166.099, 163043.269, 159958.91, 156907.912, 153906.845, 150924.199, 147996.568, 145093.457, 142239.233, 139421.475, 136632.27, 133889.588, 131174.2, 128511.619, 125868.621, 123265.385, 120721.061, 118181.769, 115709.456, 113252.446, 110840.198, 108465.099, 106126.164, 103823.469, 101556.618, 99308.004, 97124.508, 94937.803, 92833.731, 90745.061, 88677.627, 86617.47, 84650.442, 82697.833, 80769.132, 788 [...]
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif // HLLCOUNTER_HH
+
+// vim: set sts=2 sw=2:
diff --git a/lib/ht-diff.cc b/lib/ht-diff.cc
index ccdc973..85f342d 100644
--- a/lib/ht-diff.cc
+++ b/lib/ht-diff.cc
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
diff --git a/lib/khmer.hh b/lib/khmer.hh
index 53cad36..661b144 100644
--- a/lib/khmer.hh
+++ b/lib/khmer.hh
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
@@ -34,6 +34,11 @@ __attribute__((cpychecker_type_object_for_typedef(typename)))
 #define CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF(typename)
 #endif
 
+#define NONCOPYABLE(className)\
+private:\
+    className(const className&);\
+    const className& operator=(const className&)
+
 #include <set>
 #include <map>
 #include <queue>
diff --git a/lib/khmer.pc.in b/lib/khmer.pc.in
new file mode 100644
index 0000000..4c97777
--- /dev/null
+++ b/lib/khmer.pc.in
@@ -0,0 +1,14 @@
+prefix=@prefix@
+exec_prefix=${prefix}
+libdir=${exec_prefix}/lib
+sharedlibdir=${libdir}
+includedir=${prefix}/include
+
+Name: khmer
+Description: The unsupported core C++ library from the khmer project
+URL: http://khmer.readthedocs.org/
+Version: @VERSION@
+
+Requires:
+Libs: -L${libdir} -L${sharedlibdir} -lkhmer
+Cflags: -I${includedir}
diff --git a/lib/khmer_exception.hh b/lib/khmer_exception.hh
index 6447052..e278335 100644
--- a/lib/khmer_exception.hh
+++ b/lib/khmer_exception.hh
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
@@ -22,18 +22,17 @@ namespace khmer
 class khmer_exception : public std::exception
 {
 public:
-    explicit khmer_exception(const char * msg) : _msg(msg) { }
     explicit khmer_exception(const std::string& msg = "Generic khmer exception")
-        : _msg(msg.c_str()) { }
+        : _msg(msg) { }
 
     virtual ~khmer_exception() throw() { }
     virtual const char* what() const throw ()
     {
-        return _msg;
+        return _msg.c_str();
     }
 
 protected:
-    const char * _msg;
+    const std::string _msg;
 };
 
 ///
@@ -42,7 +41,6 @@ protected:
 class khmer_file_exception : public khmer_exception
 {
 public:
-    explicit khmer_file_exception(const char * msg) : khmer_exception(msg) { }
     explicit khmer_file_exception(const std::string& msg)
         : khmer_exception(msg) { }
 };
@@ -55,7 +53,7 @@ class InvalidStreamHandle : public khmer_file_exception
 public:
     InvalidStreamHandle()
         : khmer_file_exception("Generic InvalidStreamHandle error") {}
-    InvalidStreamHandle(const char * msg) : khmer_file_exception(msg) {}
+    InvalidStreamHandle(const std::string& msg) : khmer_file_exception(msg) {}
 };
 
 class StreamReadError : public khmer_file_exception
@@ -63,7 +61,30 @@ class StreamReadError : public khmer_file_exception
 public:
     StreamReadError()
         : khmer_file_exception("Generic StreamReadError error") {}
-    StreamReadError(const char * msg) : khmer_file_exception(msg) {}
+    StreamReadError(const std::string& msg) : khmer_file_exception(msg) {}
+};
+
+
+///
+// An exception for invalid arguments to functions
+//
+
+class InvalidValue : public khmer_exception
+{
+public:
+    explicit InvalidValue(const std::string& msg)
+        : khmer_exception(msg) { }
+};
+
+///
+// An exception for trying to change a read-only attributes
+//
+
+class ReadOnlyAttribute : public khmer_exception
+{
+public:
+    explicit ReadOnlyAttribute(const std::string& msg)
+        : khmer_exception(msg) { }
 };
 
 }
diff --git a/lib/kmer_hash.cc b/lib/kmer_hash.cc
index c0ed2c6..deacbf1 100644
--- a/lib/kmer_hash.cc
+++ b/lib/kmer_hash.cc
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-// Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// Copyright (C) Michigan State University, 2009-2015. It is licensed under
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
@@ -12,6 +12,7 @@
 
 #include "khmer.hh"
 #include "kmer_hash.hh"
+#include "MurmurHash3.h"
 
 using namespace std;
 
@@ -93,5 +94,66 @@ std::string _revhash(HashIntoType hash, WordLength k)
     return s;
 }
 
+std::string _revcomp(const std::string& kmer)
+{
+    std::string out = kmer;
+    size_t ksize = out.size();
+
+    for (size_t i=0; i < ksize; ++i) {
+        char complement;
+
+        switch(kmer[i]) {
+        case 'A':
+            complement = 'T';
+            break;
+        case 'C':
+            complement = 'G';
+            break;
+        case 'G':
+            complement = 'C';
+            break;
+        case 'T':
+            complement = 'A';
+            break;
+        default:
+            throw khmer::khmer_exception("Invalid base in read");
+            break;
+        }
+        out[ksize - i - 1] = complement;
+    }
+    return out;
+}
+
+HashIntoType _hash_murmur(const std::string& kmer)
+{
+    HashIntoType h = 0;
+    HashIntoType r = 0;
+
+    return khmer::_hash_murmur(kmer, h, r);
+}
+
+HashIntoType _hash_murmur(const std::string& kmer,
+                          HashIntoType& h, HashIntoType& r)
+{
+    HashIntoType out[2];
+    uint32_t seed = 0;
+    MurmurHash3_x64_128((void *)kmer.c_str(), kmer.size(), seed, &out);
+    h = out[0];
+
+    std::string rev = khmer::_revcomp(kmer);
+    MurmurHash3_x64_128((void *)rev.c_str(), rev.size(), seed, &out);
+    r = out[0];
+
+    return h ^ r;
+}
+
+HashIntoType _hash_murmur_forward(const std::string& kmer)
+{
+    HashIntoType h = 0;
+    HashIntoType r = 0;
+
+    khmer::_hash_murmur(kmer, h, r);
+    return h;
+}
 
 };
diff --git a/lib/kmer_hash.hh b/lib/kmer_hash.hh
index b2a4984..f9033a3 100644
--- a/lib/kmer_hash.hh
+++ b/lib/kmer_hash.hh
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-// Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// Copyright (C) Michigan State University, 2009-2015. It is licensed under
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
@@ -73,6 +73,12 @@ HashIntoType _hash(const char * kmer, const WordLength k,
 HashIntoType _hash_forward(const char * kmer, WordLength k);
 
 std::string _revhash(HashIntoType hash, WordLength k);
+
+// two-way hash functions, MurmurHash3.
+HashIntoType _hash_murmur(const std::string& kmer);
+HashIntoType _hash_murmur(const std::string& kmer,
+                          HashIntoType& h, HashIntoType& r);
+HashIntoType _hash_murmur_forward(const std::string& kmer);
 };
 
 #endif // KMER_HASH_HH
diff --git a/lib/labelhash.cc b/lib/labelhash.cc
index b5b62e1..1ba2414 100644
--- a/lib/labelhash.cc
+++ b/lib/labelhash.cc
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
diff --git a/lib/labelhash.hh b/lib/labelhash.hh
index d3c07ba..3f06631 100644
--- a/lib/labelhash.hh
+++ b/lib/labelhash.hh
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
diff --git a/lib/perf_metrics.cc b/lib/perf_metrics.cc
index 2e85ce3..374be0f 100644
--- a/lib/perf_metrics.cc
+++ b/lib/perf_metrics.cc
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
diff --git a/lib/perf_metrics.hh b/lib/perf_metrics.hh
index d5b10f4..b45c052 100644
--- a/lib/perf_metrics.hh
+++ b/lib/perf_metrics.hh
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
diff --git a/lib/primes.hh b/lib/primes.hh
index 68c427d..938f4fa 100644
--- a/lib/primes.hh
+++ b/lib/primes.hh
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
diff --git a/lib/read_aligner.cc b/lib/read_aligner.cc
index c425373..9bf4343 100644
--- a/lib/read_aligner.cc
+++ b/lib/read_aligner.cc
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt. Contact: ctb at msu.edu
+// the three-clause BSD license; see LICENSE. Contact: ctb at msu.edu
 //
 #include "read_aligner.hh"
 #include "khmer_exception.hh"
@@ -34,7 +34,7 @@ double GetNull(size_t length)
 
 /*
   Turn two states in to a transition, or disallowed if the
-  transition isn't modeled
+  transition isn't modelled
  */
 Transition get_trans(State s1, State s2)
 {
diff --git a/lib/read_aligner.hh b/lib/read_aligner.hh
index 81a0f0a..3d7e7a5 100644
--- a/lib/read_aligner.hh
+++ b/lib/read_aligner.hh
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt. Contact: ctb at msu.edu
+// the three-clause BSD license; see LICENSE. Contact: ctb at msu.edu
 //
 
 #ifndef READ_ALIGNER_HH
diff --git a/lib/read_parsers.cc b/lib/read_parsers.cc
index c4d4f83..6bb4b35 100644
--- a/lib/read_parsers.cc
+++ b/lib/read_parsers.cc
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
@@ -9,6 +9,10 @@
 
 #include <cstring>
 #include "khmer_exception.hh"
+#include <seqan/sequence.h>
+#include <seqan/seq_io.h>
+#include <seqan/stream.h>
+#include <pthread.h>
 
 namespace khmer
 {
@@ -17,42 +21,73 @@ namespace khmer
 namespace read_parsers
 {
 
+struct SeqAnParser::Handle {
+    seqan::SequenceStream stream;
+    uint32_t seqan_spin_lock;
+};
+
 SeqAnParser::SeqAnParser( char const * filename ) : IParser( )
 {
-    seqan::open(_stream, filename);
-    if (!seqan::isGood(_stream)) {
+    _private = new SeqAnParser::Handle();
+    seqan::open(_private->stream, filename);
+    if (!seqan::isGood(_private->stream)) {
         std::string message = "Could not open ";
         message = message + filename + " for reading.";
         throw InvalidStreamHandle(message.c_str());
-    } else if (seqan::atEnd(_stream)) {
+    } else if (seqan::atEnd(_private->stream)) {
         std::string message = "File ";
         message = message + filename + " does not contain any sequences!";
         throw InvalidStreamHandle(message.c_str());
     }
     __asm__ __volatile__ ("" ::: "memory");
-    _seqan_spin_lock = 0;
+    _private->seqan_spin_lock = 0;
 }
 
 bool SeqAnParser::is_complete()
 {
-    return !seqan::isGood(_stream) || seqan::atEnd(_stream);
+    return !seqan::isGood(_private->stream) || seqan::atEnd(_private->stream);
 }
 
 void SeqAnParser::imprint_next_read(Read &the_read)
 {
     the_read.reset();
     int ret = -1;
-    while (!__sync_bool_compare_and_swap(& _seqan_spin_lock, 0, 1));
-    bool atEnd = seqan::atEnd(_stream);
+    const char *invalid_read_exc = NULL;
+    while (!__sync_bool_compare_and_swap(& _private->seqan_spin_lock, 0, 1));
+    bool atEnd = seqan::atEnd(_private->stream);
     if (!atEnd) {
         ret = seqan::readRecord(the_read.name, the_read.sequence,
-                                the_read.accuracy, _stream);
+                                the_read.quality, _private->stream);
+        if (ret == 0) {
+            // Detect if we're parsing something w/ qualities on the first read
+            // only
+            if (_num_reads == 0 && the_read.quality.length() != 0) {
+                _have_qualities = true;
+            }
+
+            // Handle error cases, or increment number of reads on success
+            if (the_read.sequence.length() == 0) {
+                invalid_read_exc = "Sequence is empty";
+            } else if (_have_qualities && (the_read.sequence.length() != \
+                                           the_read.quality.length())) {
+                invalid_read_exc = "Sequence and quality lengths differ";
+            } else {
+                _num_reads++;
+            }
+        }
     }
     __asm__ __volatile__ ("" ::: "memory");
-    _seqan_spin_lock = 0;
+    _private->seqan_spin_lock = 0;
+    // Throw any error in the read, even if we're at the end
+    if (invalid_read_exc != NULL) {
+        throw InvalidRead(invalid_read_exc);
+    }
+    // Throw NoMoreReadsAvailable if none of the above errors were raised, even
+    // if ret == 0
     if (atEnd) {
         throw NoMoreReadsAvailable();
     }
+    // Catch-all error in readRecord that isn't one of the above
     if (ret != 0) {
         throw StreamReadError();
     }
@@ -60,7 +95,8 @@ void SeqAnParser::imprint_next_read(Read &the_read)
 
 SeqAnParser::~SeqAnParser()
 {
-    seqan::close(_stream);
+    seqan::close(_private->stream);
+    delete _private;
 }
 
 IParser * const
@@ -104,6 +140,8 @@ IParser(
     if (regex_rc) {
         throw khmer_exception();
     }
+    _num_reads = 0;
+    _have_qualities = false;
 }
 
 IParser::
diff --git a/lib/read_parsers.hh b/lib/read_parsers.hh
index dc20b0e..d030a0b 100644
--- a/lib/read_parsers.hh
+++ b/lib/read_parsers.hh
@@ -1,20 +1,16 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
 #ifndef READ_PARSERS_HH
 #define READ_PARSERS_HH
 
-#include <seqan/sequence.h>
-#include <seqan/seq_io.h>
-#include <seqan/stream.h>
-#include <pthread.h>
 #include <regex.h>
-
-
+#include <iostream>
+#include <cstdlib>
 #include "khmer.hh"
 
 namespace khmer
@@ -26,19 +22,38 @@ namespace read_parsers
 {
 
 struct NoMoreReadsAvailable : public  khmer_exception {
+    explicit NoMoreReadsAvailable(const char *msg) :
+        khmer_exception(msg) {}
+    NoMoreReadsAvailable() :
+        khmer_exception("No more reads available in this stream.") {}
+};
+
+struct InvalidRead : public  khmer_exception {
+    explicit InvalidRead(const char *msg) :
+        khmer_exception(msg) {}
+    InvalidRead() :
+        khmer_exception("Invalid read") {}
 };
 
 struct UnknownPairReadingMode : public  khmer_exception {
+    explicit UnknownPairReadingMode(const char *msg) :
+        khmer_exception(msg) {}
+    UnknownPairReadingMode() :
+        khmer_exception("Unknown pair reading mode supplied.") {}
 };
 
 struct InvalidReadPair : public  khmer_exception {
+    explicit InvalidReadPair(const char *msg) :
+        khmer_exception(msg) {}
+    InvalidReadPair() :
+        khmer_exception("Invalid read pair detected.") {}
 };
 
 struct Read {
     std:: string    name;
     std:: string    annotations;
     std:: string    sequence;
-    std:: string    accuracy;
+    std:: string    quality;
     // TODO? Add description field.
 
     inline void reset ( )
@@ -46,7 +61,7 @@ struct Read {
         name.clear( );
         annotations.clear( );
         sequence.clear( );
-        accuracy.clear( );
+        quality.clear( );
     }
 };
 
@@ -88,8 +103,15 @@ struct IParser {
         uint8_t mode = PAIR_MODE_ERROR_ON_UNPAIRED
     );
 
+    size_t		    get_num_reads()
+    {
+        return _num_reads;
+    }
+
 protected:
 
+    size_t		_num_reads;
+    bool        _have_qualities;
     regex_t		_re_read_2_nosub;
     regex_t		_re_read_1;
     regex_t		_re_read_2;
@@ -123,8 +145,8 @@ public:
     void imprint_next_read(Read &the_read);
 
 private:
-    seqan::SequenceStream _stream;
-    uint32_t _seqan_spin_lock;
+    struct Handle;
+    Handle* _private;
 
 };
 
diff --git a/lib/subset.cc b/lib/subset.cc
index 25f1886..339a2c9 100644
--- a/lib/subset.cc
+++ b/lib/subset.cc
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2014. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
@@ -10,6 +10,7 @@
 #include "read_parsers.hh"
 
 #include <sstream>
+#include <errno.h>
 
 #define IO_BUF_SIZE 250*1000*1000
 #define BIG_TRAVERSALS_ARE 200
@@ -131,11 +132,11 @@ size_t SubsetPartition::output_partitioned_file(
             }
 
             if (partition_id > 0 || output_unassigned) {
-                if (read.accuracy.length()) { // FASTQ
+                if (read.quality.length()) { // FASTQ
                     outfile << "@" << read.name << "\t" << partition_id
                             << "\n";
                     outfile << seq << "\n+\n";
-                    outfile << read.accuracy << "\n";
+                    outfile << read.quality << "\n";
                 } else {		// FASTA
                     outfile << ">" << read.name << "\t" << partition_id;
                     outfile << "\n" << seq << "\n";
@@ -1267,7 +1268,7 @@ void SubsetPartition::merge_from_disk(string other_filename)
         } else {
             err = "Unknown error in opening file: " + other_filename;
         }
-        throw khmer_file_exception(err.c_str());
+        throw khmer_file_exception(err);
     }
 
     try {
@@ -1280,12 +1281,12 @@ void SubsetPartition::merge_from_disk(string other_filename)
             std::ostringstream err;
             err << "Incorrect file format version " << (int) version
                 << " while reading subset pmap from " << other_filename;
-            throw khmer_file_exception(err.str().c_str());
+            throw khmer_file_exception(err.str());
         } else if (!(ht_type == SAVED_SUBSET)) {
             std::ostringstream err;
             err << "Incorrect file format type " << (int) ht_type
                 << " while reading subset pmap from " << other_filename;
-            throw khmer_file_exception(err.str().c_str());
+            throw khmer_file_exception(err.str());
         }
 
         infile.read((char *) &save_ksize, sizeof(save_ksize));
@@ -1293,12 +1294,12 @@ void SubsetPartition::merge_from_disk(string other_filename)
             std::ostringstream err;
             err << "Incorrect k-mer size " << save_ksize
                 << " while reading subset pmap from " << other_filename;
-            throw khmer_file_exception(err.str().c_str());
+            throw khmer_file_exception(err.str());
         }
     } catch (std::ifstream::failure &e) {
         std::string err;
         err = "Unknown error reading header info from: " + other_filename;
-        throw khmer_file_exception(err.c_str());
+        throw khmer_file_exception(err);
     }
 
     char * buf = new char[IO_BUF_SIZE];
@@ -1333,7 +1334,7 @@ void SubsetPartition::merge_from_disk(string other_filename)
             if (infile.gcount() == 0) {
                 std::string err;
                 err = "Unknown error reading data from: " + other_filename;
-                throw khmer_file_exception(err.c_str());
+                throw khmer_file_exception(err);
             }
         }
 
@@ -1418,6 +1419,10 @@ void SubsetPartition::save_partitionmap(string pmap_filename)
     if (n_bytes) {
         outfile.write(buf, n_bytes);
     }
+    if (outfile.fail()) {
+        delete[] buf;
+        throw khmer_file_exception(strerror(errno));
+    }
     outfile.close();
 
     delete[] buf;
diff --git a/lib/subset.hh b/lib/subset.hh
index 0ecc3f5..b1c06df 100644
--- a/lib/subset.hh
+++ b/lib/subset.hh
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2014. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
diff --git a/lib/test-CacheManager.cc b/lib/test-CacheManager.cc
deleted file mode 100644
index e5716cd..0000000
--- a/lib/test-CacheManager.cc
+++ /dev/null
@@ -1,189 +0,0 @@
-//
-// This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-// Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
-// Contact: khmer-project at idyll.org
-//
-
-// Test driver for the CacheManager class.
-// Author: Eric McDonald
-
-
-#include <cerrno>
-#include <climits>
-extern "C"
-{
-#include <stdint.h>
-}
-#include <cstring>
-#include <cstdio>
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <cstdlib>
-#include <getopt.h>
-
-#include <omp.h>
-
-#include "error.hh"
-#include "read_parsers.hh"
-
-
-using namespace khmer;
-using namespace khmer:: read_parsers;
-
-
-// t: Stream Type
-// s: Cache Size
-static char const *	    SHORT_OPTS	    = "t:s:";
-
-
-int main( int argc, char * argv[ ] )
-{
-    int		    rc		    = 0;
-    char *	    ifile_type	    = (char *)"raw";
-    uint64_t	    cache_size	    = 64U * 1024 * 1024;
-    char *	    ifile_name	    = NULL;
-    int		    ifd		    = -1;
-    IStreamReader * sr		    = NULL;
-
-    int		    opt		    = -1;
-    char *	    conv_residue    = NULL;
-    while (-1 != (opt = getopt( argc, argv, SHORT_OPTS ))) {
-
-        switch (opt) {
-
-        case 't':
-            if (    strcmp( optarg, "raw" )
-                    &&  strcmp( optarg, "gz" )
-                    &&  strcmp( optarg, "bz2" )
-               ) {
-                error( EINVAL, EINVAL, "Invalid file type" );
-            }
-            ifile_type = new char[ strlen( optarg ) ];
-            strcpy( ifile_type, optarg );
-            break;
-
-        case 's':
-            cache_size = strtoull( optarg, &conv_residue, 10 );
-            if (!strcmp( optarg, conv_residue )) {
-                error( EINVAL, EINVAL, "Invalid cache size" );
-            }
-            break;
-
-        default:
-            error( 0, 0, "Skipping unknown arg, '%c'", optopt );
-
-        } // option switch
-
-    } // getopt loop
-
-    if (optind < argc) {
-        ifile_name = argv[ optind++ ];
-    } else {
-        error( EINVAL, 0, "Input file name required" );
-    }
-
-    // TODO: Handle stdin.
-    // TODO: Play with O_DIRECT.
-    if (-1 == (ifd = open( ifile_name, O_RDONLY ))) {
-        error( errno, errno, "Failed to open input file" );
-    }
-
-    try {
-        if	(!strcmp( "raw", ifile_type )) {
-            sr = new RawStreamReader( ifd );
-        } else if (!strcmp( "gz", ifile_type )) {
-            sr = new GzStreamReader( ifd );
-        } else if (!strcmp( "bz2", ifile_type )) {
-            sr = new Bz2StreamReader( ifd );
-        }
-    } catch (InvalidStreamBuffer & exc) {
-        error( EBADF, EBADF, "Failed to initialize stream reader" );
-    }
-
-    uint32_t	    number_of_threads	    = omp_get_max_threads( );
-    CacheManager *  cmgr		    = new CacheManager(
-        *sr, number_of_threads, cache_size, 3
-    );
-
-    srand( getpid( ) );
-
-    #pragma omp parallel default( shared )
-    {
-        uint32_t	thread_id	    = (uint32_t)omp_get_thread_num( );
-        // drand48_data    rng_state;
-        long int	randnum		    = 0;
-        uint8_t		buffer[ 127 ];
-        uint64_t	nbread		    = 0;
-        uint64_t	nbread_total	    = 0;
-        timespec	sleep_duration;
-        timespec	sleep_duration_rem;
-
-        fprintf(
-            stderr,
-            "OMP thread %lu reporting for duty.\n",
-            (unsigned long int)thread_id
-        );
-
-        // srand48_r( (long int)thread_id, &rng_state );
-        for (uint64_t i = 1; cmgr->has_more_data( ); ++i) {
-
-            if (0 == i % 1000000)
-                fprintf(
-                    stderr,
-                    "OMP thread %lu is on data processing iteration %llu.\n",
-                    (unsigned long int)thread_id,
-                    (unsigned long long int)i
-                );
-
-            // lrand48_r( &rng_state, &randnum );
-            #pragma omp critical (rand_read_len)
-            randnum = rand( );
-            randnum %= 128;
-            nbread  =
-                cmgr->get_bytes( (uint8_t * const)buffer, (uint64_t)randnum );
-            nbread_total += nbread;
-
-            // Pretend to work for a random duration. (The code, not me!)
-            // lrand48_r( &rng_state, &randnum );
-            #pragma omp critical (rand_sleep_time)
-            randnum = rand( );
-            sleep_duration_rem.tv_sec	= 0;
-            sleep_duration_rem.tv_nsec	= randnum % 1000000;
-            while ( sleep_duration_rem.tv_sec && sleep_duration_rem.tv_nsec ) {
-                sleep_duration.tv_sec	= sleep_duration_rem.tv_sec;
-                sleep_duration.tv_nsec	= sleep_duration_rem.tv_nsec;
-                nanosleep( &sleep_duration, &sleep_duration_rem );
-            }
-
-            // Occasionally create copyaside buffer,
-            // when opportunity exists.
-            // lrand48_r( &rng_state, &randnum );
-            #pragma omp critical (rand_split_choice)
-            randnum = rand( );
-            if (    (0 == randnum % 1024)
-                    &&  (!sr->is_at_end_of_stream( ))) {
-                // lrand48_r( &rng_state, &randnum );
-                #pragma omp critical (rand_split_offset)
-                randnum = rand( );
-                randnum %= 1024;
-                cmgr->split_at( (uint64_t)randnum );
-            }
-
-        } // work simulator loop
-
-        fprintf(
-            stderr,
-            "OMP thread %lu finished work.\n",
-            (unsigned long int)thread_id
-        );
-
-    } // parallel block
-
-    delete cmgr;
-    delete sr;
-    return rc;
-}
-
-
-// vim: set ft=cpp sts=4 sw=4 tw=80:
diff --git a/lib/test-Colors.cc b/lib/test-Colors.cc
index 7410f67..7e70309 100644
--- a/lib/test-Colors.cc
+++ b/lib/test-Colors.cc
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
diff --git a/lib/test-HashTables.cc b/lib/test-HashTables.cc
index 9304b32..dccf1ef 100644
--- a/lib/test-HashTables.cc
+++ b/lib/test-HashTables.cc
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
diff --git a/lib/test-Parser.cc b/lib/test-Parser.cc
index 75f6337..5d068cd 100644
--- a/lib/test-Parser.cc
+++ b/lib/test-Parser.cc
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
diff --git a/lib/test-StreamReader.cc b/lib/test-StreamReader.cc
deleted file mode 100644
index afe0f83..0000000
--- a/lib/test-StreamReader.cc
+++ /dev/null
@@ -1,173 +0,0 @@
-//
-// This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-// Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
-// Contact: khmer-project at idyll.org
-//
-
-// Test driver for the StreamReader classes.
-// Author: Eric McDonald
-
-
-#include <cerrno>
-#include <climits>
-extern "C"
-{
-#include <stdint.h>
-}
-//#define SSIZE_MAX	(SIZE_MAX / 2)
-#include <cstring>
-#include <cstdio>
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <cstdlib>
-#include <getopt.h>
-
-#include "error.hh"
-#include "read_parsers.hh"
-
-
-using namespace khmer;
-using namespace khmer:: read_parsers;
-
-
-// t: Stream Type
-// s: Cache Size
-static char const *	    SHORT_OPTS	    = "t:s:";
-
-
-int main( int argc, char * argv[ ] )
-{
-    int		    rc		    = 0;
-    char *	    ifile_type	    = (char *)"raw";
-    uint64_t	    cache_size	    = 4L * 1024 * 1024 * 1024;
-    uint8_t *	    cache	    = NULL;
-    char *	    ifile_name	    = NULL;
-    char *	    ofile_name	    = NULL;
-    int		    ifd		    = -1;
-    int		    ofd		    = -1;
-    IStreamReader * sr		    = NULL;
-
-    int		    opt		    = -1;
-    char *	    conv_residue    = NULL;
-    while (-1 != (opt = getopt( argc, argv, SHORT_OPTS ))) {
-
-        switch (opt) {
-
-        case 't':
-            if (    strcmp( optarg, "raw" )
-                    &&  strcmp( optarg, "gz" )
-                    &&  strcmp( optarg, "bz2" )
-               ) {
-                error( EINVAL, EINVAL, "Invalid file type" );
-            }
-            ifile_type = new char[ strlen( optarg ) ];
-            strcpy( ifile_type, optarg );
-            break;
-
-        case 's':
-            cache_size = strtoull( optarg, &conv_residue, 10 );
-            if (!strcmp( optarg, conv_residue )) {
-                error( EINVAL, EINVAL, "Invalid cache size" );
-            }
-            break;
-
-        default:
-            error( 0, 0, "Skipping unknown arg, '%c'", optopt );
-
-        } // option switch
-
-    } // getopt loop
-
-    if (optind < argc) {
-        ifile_name = argv[ optind++ ];
-    } else {
-        error( EINVAL, 0, "Input file name required" );
-    }
-
-    if (optind < argc) {
-        ofile_name = argv[ optind++ ];
-    } else {
-        error( EINVAL, 0, "Output file name required" );
-    }
-
-    // TODO: Handle stdin.
-    // TODO: Play with O_DIRECT.
-    if (-1 == (ifd = open( ifile_name, O_RDONLY ))) {
-        error( errno, errno, "Failed to open input file" );
-    }
-
-    // TODO: Handle stdout.
-    if (-1 == (ofd = creat( ofile_name, 0644 ))) {
-        error( errno, errno, "Failed to open output file" );
-    }
-
-    try {
-        if	(!strcmp( "raw", ifile_type )) {
-            sr = new RawStreamReader( ifd );
-        } else if (!strcmp( "gz", ifile_type )) {
-            sr = new GzStreamReader( ifd );
-        } else if (!strcmp( "bz2", ifile_type )) {
-            sr = new Bz2StreamReader( ifd );
-        }
-    } catch (InvalidStreamBuffer & exc) {
-        error( EBADF, EBADF, "Failed to initialize stream reader" );
-    }
-
-    try {
-        cache = new uint8_t[ cache_size ];
-    } catch (std:: bad_alloc & exc) {
-        error( ENOMEM, ENOMEM, "Failed to allocate cache" );
-    }
-
-    uint64_t	    nbread	    = 0;
-    ssize_t	    nbwrote	    = 0;
-    uint64_t	    nbread_total    = 0;
-    uint64_t	    nbwrote_total   = 0;
-    while (!sr->is_at_end_of_stream( )) {
-        uint64_t    nbwrote_subtotal	= 0;
-        try {
-            nbread = sr->read_into_cache( cache, cache_size );
-            nbread_total += nbread;
-            for ( uint64_t nbrem = nbread; 0 < nbrem; nbrem -= nbwrote ) {
-                nbwrote =
-                    write( ofd,
-                           cache + nbwrote_subtotal,
-                           (nbrem > SSIZE_MAX ? SSIZE_MAX : nbrem)
-                         );
-                if (-1 == nbwrote) {
-                    error( EIO, EIO, "Error during write of output stream" );
-                }
-                nbwrote_subtotal += nbwrote;
-            }
-            nbwrote_total += nbwrote_subtotal;
-        } catch (StreamReadError & exc) {
-            error( EIO, EIO, "Error during read of input stream" );
-        } catch (...) {
-            throw;
-        }
-        fprintf( stdout,
-                 "Read %llu bytes from disk.\n",
-                 (long long unsigned int)nbread
-               );
-        fprintf( stdout,
-                 "Wrote %llu bytes to disk.\n",
-                 (long long unsigned int)nbwrote_subtotal
-               );
-    } // stream reader read loop
-    fprintf( stdout,
-             "Read %llu bytes in total from disk.\n",
-             (long long unsigned int)nbread_total
-           );
-    fprintf( stdout,
-             "Wrote %llu bytes in total to disk.\n",
-             (long long unsigned int)nbwrote_total
-           );
-
-    close( ofd );
-
-    return rc;
-}
-
-
-// vim: set ft=cpp sts=4 sw=4 tw=80:
diff --git a/lib/test-compile.cc b/lib/test-compile.cc
new file mode 100644
index 0000000..cedfe4c
--- /dev/null
+++ b/lib/test-compile.cc
@@ -0,0 +1,17 @@
+//
+// This file is part of khmer, http://github.com/ged-lab/khmer/, and is
+// Copyright (C) Michigan State University, 2015. It is licensed under
+// the three-clause BSD license; see LICENSE.
+// Contact: khmer-project at idyll.org
+//
+
+// Author:  Kevin Murray, spam at kdmurray.id.au
+// This file is used to test compilation with libkhmer.a/libkhmer.so
+
+#include  <counting.hh>
+
+int main()
+{
+    khmer::CountingHash test(1,1);
+    return 0;
+}
diff --git a/lib/test_read_aligner.cc b/lib/test-read-aligner.cc
similarity index 92%
rename from lib/test_read_aligner.cc
rename to lib/test-read-aligner.cc
index ba284e8..9d2b6a8 100644
--- a/lib/test_read_aligner.cc
+++ b/lib/test-read-aligner.cc
@@ -16,7 +16,7 @@ const unsigned int ht_count = 5;
 const WordLength ksize = 30;
 
 const unsigned int num_test_seqs = 1;
-const std::string test_seqs[] { "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCGCTTTAACTGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTGTTGCAATCTTAACAACCTCTTTAC" };
+const std::string test_seqs[] = { "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCGCTTTAACTGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTGTTGCAATCTTAACAACCTCTTTAC" };
 
 //const std::string toalign = "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAATTTTGCCGCTTTAACTGGGTCTGTTTCTACTGCAAACTTTCCACCAACAAGTTTTTCTGCATCCTGTGTTGCAATCTTAACAACCTCTTTAC"; //perfect alignment
 //const std::string toalign = "TTAAATGCCCAATTTTTCCCTCTTTTCTTCTATATGTTTGATTATCAA"; //short perfect alignment
@@ -35,7 +35,7 @@ int main(void)
     for ( unsigned int i = 0; i < ht_count; ++i ) {
         ht_sizes.push_back( primetab.get_next_prime( ) );
     }
-    CountingHash ht = CountingHash(ksize, ht_sizes);
+    CountingHash ht(ksize, ht_sizes);
 
     for(unsigned int index = 0; index < num_test_seqs; index++) {
         std::cout << "Loading test sequence " << index << ": " << test_seqs[index] <<
diff --git a/lib/trace_logger.cc b/lib/trace_logger.cc
index dadc75e..06dd34a 100644
--- a/lib/trace_logger.cc
+++ b/lib/trace_logger.cc
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
diff --git a/lib/trace_logger.hh b/lib/trace_logger.hh
index 2af6ddf..54d57e1 100644
--- a/lib/trace_logger.hh
+++ b/lib/trace_logger.hh
@@ -1,7 +1,7 @@
 //
 // This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 // Copyright (C) Michigan State University, 2009-2013. It is licensed under
-// the three-clause BSD license; see doc/LICENSE.txt.
+// the three-clause BSD license; see LICENSE.
 // Contact: khmer-project at idyll.org
 //
 
diff --git a/sandbox/README.rst b/sandbox/README.rst
index aa393a6..f50b1a7 100644
--- a/sandbox/README.rst
+++ b/sandbox/README.rst
@@ -6,10 +6,16 @@ scripts that we have not fully tested.  They are also not under
 semantic versioning, so their functionality and command line arguments
 may change without notice.
 
-We are still in the middle of triaging and documenting the various scripts.
+We are still triaging and documenting the various scripts.
 
 ----
 
+Awaiting promotion to sandbox:
+
+* calc-error-profile.py - calculate a per-base "error profile" for shotgun sequencing data, w/o a reference. (Used/tested in `2014 paper on semi-streaming algorithms <https://github.com/ged-lab/2014-streaming/blob/master/>`__)
+* correct-errors.py - streaming error correction.
+* unique-kmers.py - estimate the number of k-mers present in a file with the HyperLogLog low-memory probabilistic cardinality estimation algorithm.
+
 Scripts with recipes:
 
 * calc-median-distribution.py - plot coverage distribution; see `khmer-recipes #1 <https://github.com/ged-lab/khmer-recipes/tree/master/001-extract-reads-by-coverage>`__
@@ -19,15 +25,12 @@ Scripts with recipes:
 
 To keep, document, and build recipes for:
 
+* `make-coverage.py - RPKM calculation script
 * abundance-hist-by-position.py - look at abundance of k-mers by position within read; use with fasta-to-abundance-hist.py
 * assemstats3.py - print out assembly statistics
+* build-sparse-graph.py - code for building a sparse graph (by Camille Scott)
 * calc-best-assembly.py - calculate the "best assembly" - used in metagenome protocol
-* calc-error-profile.py - calculate a per-base "error profile" for shotgun sequencing data, w/o a reference.
-* calc-median-distribution.py - plot coverage distribution; see `khmer-recipes #1 <https://github.com/ged-lab/khmer-recipes/tree/master/001-extract-reads-by-coverage>`__
-* collect-variants.py
-* combine-pe.py - combine partitions based on shared PE reads.
-* compare-partitions.py
-* dn-identify-errors.py - prototype script to identify errors in reads based on diginorm principles
+* collect-variants.py - used in a `gist <https://gist.github.com/ctb/6eaef7971ea429ab348d>`__
 * extract-single-partition.py - extract all the sequences that belong to a specific partition, from a file with multiple partitions
 * fasta-to-abundance-hist.py - generate abundance of k-mers by position within reads; use with abundance-hist-by-position.py
 * filter-below-abund.py - like filter-abund, but trim off high-abundance k-mers
@@ -41,9 +44,7 @@ To keep, document, and build recipes for:
 * normalize-by-median-pct.py - see blog post on Trinity in silico norm (http://ivory.idyll.org/blog/trinity-in-silico-normalize.html)
 * print-stoptags.py - print out the stoptag k-mers
 * print-tagset.py - print out the tagset k-mers
-* readstats.py - print out read statistics
 * renumber-partitions.py - systematically renumber partitions
-* shuffle-fasta.py - FASTA file shuffler for small FASTA files
 * shuffle-reverse-rotary.py - FASTA file shuffler for larger FASTA files
 * split-fasta.py - break a FASTA file up into smaller chunks
 * stoptag-abundance-hist.py - print out abundance histogram of stoptags
@@ -55,32 +56,34 @@ To keep, document, and build recipes for:
 * sweep-reads.py - various ways to extract reads based on k-mer overlap
 * sweep-reads2.py - various ways to extract reads based on k-mer overlap
 * sweep-reads3.py - various ways to extract reads based on k-mer overlap
-* to-casava-1.8-fastq.py - convert reads to different Casava format
-* trim-low-abund.py - streaming k-mer abundance trimming; see filter-abund for non-streaming, and look to `khmer-recipes #6 <https://github.com/ged-lab/khmer-recipes/blob/master/006-streaming-sequence-trimming/index.rst>`__ for usage.
-* write-trimmomatic.py
+* write-trimmomatic.py - used to build Trimmomatic command lines in `khmer-protocols <http://khmer-protocols.readthedocs.org/en/latest/>`__
 
 Good ideas to rewrite using newer tools/approaches:
 
 * assembly-diff.py - find sequences that differ between two assemblies
 * assembly-diff-2.py - find subsequences that differ between two assemblies
-* bloom_count.py - count # of unique k-mers; should be reimplemented with HyperLogLog
-* bloom_count_intersection.py - look at unique and disjoint #s of k-mers
+* bloom-count.py - count # of unique k-mers; should be reimplemented with HyperLogLog, Renamed from bloom_count.py in commit 4788c31
+* bloom-count-intersection.py - look at unique and disjoint #s of k-mers, Renamed from bloom_count_intersection.py in commit 4788c31.
 * split-sequences-by-length.py - break up short reads by length
 
-To examine:
+----
 
-* build-sparse-graph.py - code for building a sparse graph (by Camille Scott)
-* count-within-radius.py - calculating graph density by position with seq
-* degree-by-position.py - calculating graph degree by position in seq
-* ec.py - new error correction foo
-* error-correct-pass2.py - new error correction foo
-* find-unpart.py - something to do with finding unpartitioned sequences
-* normalize-by-align.py  - new error correction foo
-* read_aligner.py - new error correction foo
-* uniqify-sequences.py - print out paths that are unique in the graph
-* write-interleave.py - is this used by any protocol etc?
+Present in commit d295bc847 but removed thereafter:
 
-----
+* `combine-pe.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/combine-pe.py>`__ - combine partitions based on shared PE reads.
+* `compare-partitions.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/compare-partitions.py>`__ - compare read membership in partitions.
+* `count-within-radius.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/count-within-radius.py>`__ - calculating graph density by position with seq
+* `degree-by-position.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/degree-by-position.py>`__ - calculating graph degree by position in seq
+* `dn-identify-errors.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/dn-identify-errors.py>`__ - prototype script to identify errors in reads based on diginorm principles
+* `ec.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/ec.py>`__ - new error correction foo
+* `error-correct-pass2.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/error-correct-pass2.py>`__ - new error correction foo
+* `find-unpart.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/find-unpart.py>`__ - something to do with finding unpartitioned sequences
+* `normalize-by-align.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/normalize-by-align.py>`__  - new error correction foo
+* `read_aligner.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/read_aligner.py>`__ - new error correction foo
+* `shuffle-fasta.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/shuffle-fasta.py>`__ - FASTA file shuffler for small FASTA files
+* `to-casava-1.8-fastq.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/to-casava-1.8-fastq.py>`__ - convert reads to different Casava format
+* `uniqify-sequences.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/uniqify-sequences.py>`__ - print out paths that are unique in the graph
+* `write-interleave.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/write-interleave.py>`__ - is this used by any protocol etc?
 
 Present in commit 691b0b3ae but removed thereafter:
 
@@ -146,7 +149,6 @@ Present in commit 691b0b3ae but removed thereafter:
 * `multi-abyss.py <https://github.com/ged-lab/khmer/tree/691b0b3aefe83e9e8f5f2b80f5f9516664a4654a/sandbox/multi-abyss.py>`__ - better done with parallel
 * `make-coverage-by-position-for-node.py <https://github.com/ged-lab/khmer/tree/691b0b3aefe83e9e8f5f2b80f5f9516664a4654a/sandbox/make-coverage-by-position-for-node.py>`__ - deprecated coverage calculation
 * `make-coverage-histogram.py <https://github.com/ged-lab/khmer/tree/691b0b3aefe83e9e8f5f2b80f5f9516664a4654a/sandbox/make-coverage-histogram.py>`__ - build coverage histograms
-* `make-coverage.py <https://github.com/ged-lab/khmer/tree/691b0b3aefe83e9e8f5f2b80f5f9516664a4654a/sandbox/make-coverage.py>`__ - RPKM calculation script
 * `make-random.py <https://github.com/ged-lab/khmer/tree/691b0b3aefe83e9e8f5f2b80f5f9516664a4654a/sandbox/make-random.py>`__ - make random DNA; see dbg-graph-null project.
 * `make-read-stats.py <https://github.com/ged-lab/khmer/tree/691b0b3aefe83e9e8f5f2b80f5f9516664a4654a/sandbox/make-read-stats.py>`__ - see readstats.py
 * `multi-stats.py <https://github.com/ged-lab/khmer/tree/691b0b3aefe83e9e8f5f2b80f5f9516664a4654a/sandbox/multi-stats.py>`__ - see readstats.py
diff --git a/sandbox/abundance-hist-by-position.py b/sandbox/abundance-hist-by-position.py
index 473b807..4ba6c3b 100755
--- a/sandbox/abundance-hist-by-position.py
+++ b/sandbox/abundance-hist-by-position.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import sys
diff --git a/sandbox/assembly-diff-2.py b/sandbox/assembly-diff-2.py
index 0562592..c81bea7 100755
--- a/sandbox/assembly-diff-2.py
+++ b/sandbox/assembly-diff-2.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import sys
diff --git a/sandbox/assembly-diff.py b/sandbox/assembly-diff.py
index 7a78a0a..189187c 100755
--- a/sandbox/assembly-diff.py
+++ b/sandbox/assembly-diff.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import sys
diff --git a/sandbox/assemstats3.py b/sandbox/assemstats3.py
index 281de48..a085128 100755
--- a/sandbox/assemstats3.py
+++ b/sandbox/assemstats3.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 '''
diff --git a/sandbox/bloom_count_intersection.py b/sandbox/bloom-count-intersection.py
similarity index 97%
rename from sandbox/bloom_count_intersection.py
rename to sandbox/bloom-count-intersection.py
index 85115cb..d6dbc36 100755
--- a/sandbox/bloom_count_intersection.py
+++ b/sandbox/bloom-count-intersection.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # using bloom filter to count intersection
diff --git a/sandbox/bloom_count.py b/sandbox/bloom-count.py
similarity index 94%
rename from sandbox/bloom_count.py
rename to sandbox/bloom-count.py
index 67a7027..19a3c80 100755
--- a/sandbox/bloom_count.py
+++ b/sandbox/bloom-count.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # using bloom filter to count unique kmers
diff --git a/sandbox/build-sparse-graph.py b/sandbox/build-sparse-graph.py
index 9ec09cf..4941603 100755
--- a/sandbox/build-sparse-graph.py
+++ b/sandbox/build-sparse-graph.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import khmer
diff --git a/sandbox/calc-best-assembly.py b/sandbox/calc-best-assembly.py
old mode 100644
new mode 100755
index 5ca1895..9582e44
--- a/sandbox/calc-best-assembly.py
+++ b/sandbox/calc-best-assembly.py
@@ -1,8 +1,8 @@
-#! /usr/bin/env python
+#! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt. Contact: ctb at msu.edu
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE. Contact: ctb at msu.edu
 #
 import screed
 import argparse
diff --git a/sandbox/calc-error-profile.py b/sandbox/calc-error-profile.py
index 6e8922a..f2ffddf 100755
--- a/sandbox/calc-error-profile.py
+++ b/sandbox/calc-error-profile.py
@@ -1,8 +1,8 @@
-#! /usr/bin/env python
+#! /usr/bin/env python2
 #
 # This script is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt. Contact: ctb at msu.edu
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE. Contact: ctb at msu.edu
 #
 """
 Calculate the mismatch error profile for shotgun data, using a subset of
diff --git a/sandbox/calc-median-distribution.py b/sandbox/calc-median-distribution.py
index 4c3946c..c6ef1d0 100755
--- a/sandbox/calc-median-distribution.py
+++ b/sandbox/calc-median-distribution.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import sys
diff --git a/sandbox/collect-reads.py b/sandbox/collect-reads.py
index 5bcdc56..ca16608 100755
--- a/sandbox/collect-reads.py
+++ b/sandbox/collect-reads.py
@@ -1,8 +1,8 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2014-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 # pylint: disable=missing-docstring,invalid-name
 """
@@ -19,15 +19,15 @@ import sys
 import textwrap
 import khmer
 from khmer.khmer_args import build_counting_args, report_on_config, info
-from khmer.file import check_file_status, check_space
-from khmer.file import check_space_for_hashtable
+from khmer.kfile import check_input_files, check_space
+from khmer.kfile import check_space_for_hashtable
 import argparse
 import screed
 
 
 def output_single(read):
-    if hasattr(read, 'accuracy'):
-        return "@%s\n%s\n+\n%s\n" % (read.name, read.sequence, read.accuracy)
+    if hasattr(read, 'quality'):
+        return "@%s\n%s\n+\n%s\n" % (read.name, read.sequence, read.quality)
     else:
         return ">%s\n%s\n" % (read.name, read.sequence)
 
@@ -39,7 +39,7 @@ def get_parser():
 
     Example::
 
-        collect-reads.py -k 20 -x 5e7 out.kh data/100k-filtered.fa
+        collect-reads.py -k 20 -x 5e7 out.ct data/100k-filtered.fa
     """
 
     parser = build_counting_args("Collect reads until a given avg coverage.",
@@ -71,10 +71,10 @@ def main():
     filenames = args.input_sequence_filename
 
     for name in args.input_sequence_filename:
-        check_file_status(name)
+        check_input_files(name, False)
 
-    check_space(args.input_sequence_filename)
-    check_space_for_hashtable(args.n_tables * args.min_tablesize)
+    check_space(args.input_sequence_filename, False)
+    check_space_for_hashtable(args.n_tables * args.min_tablesize, False)
 
     print 'Saving k-mer counting table to %s' % base
     print 'Loading sequences from %s' % repr(filenames)
@@ -130,17 +130,10 @@ def main():
     info_fp.write('through end: %s\n' % filenames[-1])
 
     # Change 0.2 only if you really grok it.  HINT: You don't.
-    fp_rate = khmer.calc_expected_collisions(htable)
+    fp_rate = khmer.calc_expected_collisions(htable, args.force, max_false_pos=.2)
     print 'fp rate estimated to be %1.3f' % fp_rate
     print >> info_fp, 'fp rate estimated to be %1.3f' % fp_rate
 
-    if fp_rate > 0.20:
-        print >> sys.stderr, "**"
-        print >> sys.stderr, ("** ERROR: the k-mer counting table is too small"
-                              " this data set.  Increase tablesize/# tables.")
-        print >> sys.stderr, "**"
-        sys.exit(1)
-
     print 'DONE.'
 
 if __name__ == '__main__':
diff --git a/sandbox/collect-variants.py b/sandbox/collect-variants.py
old mode 100644
new mode 100755
index 2d0fc0f..3a3669b
--- a/sandbox/collect-variants.py
+++ b/sandbox/collect-variants.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2013-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 """
@@ -65,7 +65,7 @@ def main():
         print 'making hashtable'
         ht = khmer.new_counting_hash(K, HT_SIZE, N_HT)
 
-    aligner = khmer.new_readaligner(ht, args.trusted_cutoff, args.bits_theta)
+    aligner = khmer.ReadAligner(ht, args.trusted_cutoff, args.bits_theta)
 
     if args.details_out is not None:
         details_out = open(args.details_out, "w")
@@ -144,16 +144,9 @@ def main():
         ht.save(args.savehash)
 
     # Change 0.2 only if you really grok it.  HINT: You don't.
-    fp_rate = khmer.calc_expected_collisions(ht)
+    fp_rate = khmer.calc_expected_collisions(ht, args.force, max_false_pos=.2)
     print 'fp rate estimated to be %1.3f' % fp_rate
 
-    if fp_rate > 0.20:
-        print >>sys.stderr, "**"
-        print >>sys.stderr, "** ERROR: the counting hash is too small for"
-        print >>sys.stderr, "** this data set.  Increase hashsize/num ht."
-        print >>sys.stderr, "**"
-        print >>sys.stderr, "** Do not use these results!!"
-        sys.exit(-1)
 
 if __name__ == '__main__':
     main()
diff --git a/sandbox/combine-pe.py b/sandbox/combine-pe.py
deleted file mode 100755
index 9558a58..0000000
--- a/sandbox/combine-pe.py
+++ /dev/null
@@ -1,66 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project at idyll.org
-#
-import sys
-import khmer
-from screed.fasta import fasta_iter
-
-K = 32
-
-###
-
-
-def get_partition(record):
-    pid = record['name'].rsplit('\t', 1)[1]
-    return int(pid)
-
-###
-
-
-def main():
-    ht = khmer.new_hashbits(K, 1, 1)
-
-    ht.consume_partitioned_fasta(sys.argv[1])
-    before = ht.count_partitions()
-
-    last_name = None
-    last_record = None
-    for n, record in enumerate(
-            fasta_iter(open(sys.argv[1]), parse_description=False)):
-        if n % 10000 == 0:
-            print '...', n
-
-        name = record['name'].split()[0]
-        name = name.split('/', 1)[0]
-
-        if name == last_name:
-            if 1:
-                pid_1 = ht.get_partition_id(last_record['sequence'][:K])
-                pid_2 = ht.get_partition_id(record['sequence'][:K])
-
-                ht.join_partitions(pid_1, pid_2)
-            else:                           # TEST
-                pid_1 = get_partition(last_record)
-                pid_2 = get_partition(record)
-                assert pid_1 == pid_2, (last_record, record, pid_1, pid_2)
-
-        last_name = name
-        last_record = record
-
-    ht.output_partitions(sys.argv[1], sys.argv[1] + '.paired')
-    print 'before:', before
-    after = ht.count_partitions()
-    print 'after:', after
-
-    n_combined = before[0] - after[0]
-    print 'combined:', n_combined
-
-    # vim: set ft=python ts=4 sts=4 sw=4 et tw=79:
-
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/compare-partitions.py b/sandbox/compare-partitions.py
deleted file mode 100755
index 4ea764b..0000000
--- a/sandbox/compare-partitions.py
+++ /dev/null
@@ -1,68 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project at idyll.org
-#
-# only works for small files...
-
-import sys
-from screed.fasta import fasta_iter
-
-
-def read_partition_file(fp):
-    for n, line in enumerate(fp):
-        if n % 2 == 0:
-            surrendered = False
-            name, partition_id = line[1:].strip().rsplit('\t', 1)
-
-            if '*' in partition_id:
-                partition_id = int(partition_id[:-1])
-                surrendered = True
-            else:
-                partition_id = int(partition_id)
-        else:
-            sequence = line.strip()
-
-            yield name, partition_id, surrendered, sequence
-
-
-def main():
-    (filename1, filename2) = sys.argv[1:]
-
-    p1 = {}
-    s1 = {}
-    for name, pid, _, _ in read_partition_file(open(filename1)):
-        name = name.split('\t')[0]
-        x = p1.get(pid, set())
-        x.add(name)
-        p1[pid] = x
-
-        s1[name] = pid
-
-    p2 = {}
-    s2 = {}
-    for name, pid, _, _ in read_partition_file(open(filename2)):
-        name = name.split('\t')[0]
-        x = p2.get(pid, set())
-        x.add(name)
-        p2[pid] = x
-
-        s2[name] = pid
-
-    found = set()
-    for name in s1:
-        pid = s1[name]
-        pid2 = s2[name]
-
-        x1 = p1[pid]
-        x2 = p2[pid2]
-
-        if x1 != x2 and pid not in found:
-            print pid, pid2, len(x1), len(x2)
-            found.add(pid)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/correct-errors.py b/sandbox/correct-errors.py
index a1cc2a7..83b6ec6 100755
--- a/sandbox/correct-errors.py
+++ b/sandbox/correct-errors.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 """
@@ -38,12 +38,17 @@ def output_single(read, new_sequence):
     name = read.name
     sequence = new_sequence
 
-    accuracy = None
-    if hasattr(read, 'accuracy'):
-        accuracy = read.accuracy[:len(sequence)]
+    quality = None
+    if hasattr(read, 'quality'):
+        quality = read.quality[:len(sequence)]
 
-    if accuracy:
-        return "@%s\n%s\n+\n%s\n" % (name, sequence, accuracy)
+        # in cases where sequence _lengthened_, need to truncate it to
+        # match the quality score length.
+        sequence = sequence[:len(quality)]
+
+    if quality:
+        assert len(sequence) == len(quality), (sequence, quality)
+        return "@%s\n%s\n+\n%s\n" % (name, sequence, quality)
     else:
         return ">%s\n%s\n" % (name, sequence)
 
@@ -66,7 +71,7 @@ def main():
                         help='lower bound on hashsize to use')
 
     parser.add_argument("--trusted-cov", dest="trusted_cov", type=int,
-                        default=2)
+                        default=DEFAULT_CUTOFF)
     parser.add_argument("--theta", dest="bits_theta", type=float, default=1.0)
 
     parser.add_argument('--normalize-to', '-Z', type=int, dest='normalize_to',
@@ -88,7 +93,7 @@ def main():
     print 'making hashtable'
     ht = khmer.new_counting_hash(K, HT_SIZE, N_HT)
 
-    aligner = khmer.new_readaligner(ht, args.trusted_cov, args.bits_theta)
+    aligner = khmer.ReadAligner(ht, args.trusted_cov, args.bits_theta)
 
     tempdir = tempfile.mkdtemp('khmer', 'tmp', args.tempdir)
     print 'created temporary directory %s; use -T to change location' % tempdir
diff --git a/sandbox/count-within-radius.py b/sandbox/count-within-radius.py
deleted file mode 100755
index 2849bc2..0000000
--- a/sandbox/count-within-radius.py
+++ /dev/null
@@ -1,60 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project at idyll.org
-#
-import sys
-import screed.fasta
-import os
-import khmer
-
-K = 32
-HASHTABLE_SIZE = int(8e9)
-N_HT = 4
-RADIUS = 100
-
-###
-
-MAX_DENSITY = 2000
-
-
-def main():
-    infile = sys.argv[1]
-    outfile = sys.argv[2]
-    if len(sys.argv) > 3:
-        RADIUS = int(sys.argv[3])
-
-    print 'saving to:', outfile
-
-    print 'making hashtable'
-    ht = khmer.new_hashbits(K, HASHTABLE_SIZE, N_HT)
-
-    print 'eating', infile
-    ht.consume_fasta(infile)
-
-    print 'loading'
-    ht.save(outfile + '.ht')
-
-    outfp = open(outfile, 'w')
-    for n, record in enumerate(screed.open(infile)):
-        if n % 10000 == 0:
-            print '... saving', n
-        seq = record['sequence']
-
-        for pos in range(0, len(seq), 200):
-            subseq = seq[pos:pos + 200]
-
-            middle = (len(subseq) - K + 1) / 2
-
-            density = ht.count_kmers_within_radius(
-                subseq[middle:middle + K], RADIUS,
-                MAX_DENSITY)
-            density /= float(RADIUS)
-
-            print >>outfp, '>%s d=%.3f\n%s' % (record['name'], density, subseq)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/degree-by-position.py b/sandbox/degree-by-position.py
deleted file mode 100755
index 7649a1e..0000000
--- a/sandbox/degree-by-position.py
+++ /dev/null
@@ -1,47 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project at idyll.org
-#
-import khmer
-import sys
-from screed.fasta import fasta_iter
-
-K = 32
-HASHTABLE_SIZE = int(8e9)
-N_HT = 4
-
-
-def main():
-    outfp = open(sys.argv[2], 'w')
-
-    ht = khmer.new_hashbits(K, HASHTABLE_SIZE, N_HT)
-    ht.consume_fasta(sys.argv[1])
-
-    hist = [0] * 200
-    histcount = [0] * 200
-    for n, record in enumerate(fasta_iter(open(sys.argv[1]))):
-        if n % 10000 == 0:
-            print '...', n
-
-        seq = record['sequence']
-        for pos in range(0, len(seq) - K + 1):
-            kmer = seq[pos:pos + K]
-            count = ht.kmer_degree(kmer)
-
-            hist[pos] += count
-            histcount[pos] += 1
-
-    for i in range(len(hist)):
-        total = hist[i]
-        count = histcount[i]
-        if not count:
-            continue
-
-        print >>outfp, i, total, count, total / float(count)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/dn-identify-errors.py b/sandbox/dn-identify-errors.py
deleted file mode 100755
index 769a505..0000000
--- a/sandbox/dn-identify-errors.py
+++ /dev/null
@@ -1,147 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project at idyll.org
-#
-"""
-Streaming error trimming based on digital normalization.
-
-% python sandbox/trim-low-abund.py [ <data1> [ <data2> [ ... ] ] ]
-
-Use -h for parameter help.
-"""
-import sys
-import screed
-import os
-import khmer
-from khmer.thread_utils import ThreadedSequenceProcessor, verbose_loader
-import argparse
-
-DEFAULT_NORMALIZE_LIMIT = 20
-DEFAULT_CUTOFF = 2
-
-DEFAULT_K = 32
-DEFAULT_N_HT = 4
-DEFAULT_MIN_HASHSIZE = 1e6
-
-
-def main():
-    parser = argparse.ArgumentParser(description='XXX')
-
-    env_ksize = os.environ.get('KHMER_KSIZE', DEFAULT_K)
-    env_n_hashes = os.environ.get('KHMER_N_HASHES', DEFAULT_N_HT)
-    env_hashsize = os.environ.get('KHMER_MIN_HASHSIZE', DEFAULT_MIN_HASHSIZE)
-
-    parser.add_argument('--ksize', '-k', type=int, dest='ksize',
-                        default=env_ksize,
-                        help='k-mer size to use')
-    parser.add_argument('--n_hashes', '-N', type=int, dest='n_hashes',
-                        default=env_n_hashes,
-                        help='number of hash tables to use')
-    parser.add_argument('--hashsize', '-x', type=float, dest='min_hashsize',
-                        default=env_hashsize,
-                        help='lower bound on hashsize to use')
-
-    parser.add_argument('--cutoff', '-C', type=int, dest='abund_cutoff',
-                        help='remove k-mers below this abundance',
-                        default=DEFAULT_CUTOFF)
-
-    parser.add_argument('--normalize-to', '-Z', type=int, dest='normalize_to',
-                        help='base cutoff on median k-mer abundance of this',
-                        default=DEFAULT_NORMALIZE_LIMIT)
-
-    parser.add_argument('--mrna', '-m', dest='is_mrna',
-                        help='treat as mRNAseq data',
-                        default=True, action='store_true')
-
-    parser.add_argument('--genome', '-g', dest='is_genome',
-                        help='treat as genomic data (uniform coverage)',
-                        default=False, action='store_true')
-
-    parser.add_argument('--metagenomic', '-M',
-                        dest='is_metagenomic',
-                        help='treat as metagenomic data',
-                        default=True, action='store_true')
-
-    parser.add_argument('input_filenames', nargs='+')
-    args = parser.parse_args()
-
-    K = args.ksize
-    HT_SIZE = args.min_hashsize
-    N_HT = args.n_hashes
-
-    CUTOFF = args.abund_cutoff
-    NORMALIZE_LIMIT = args.normalize_to
-
-    is_variable_abundance = True        # conservative
-    if args.is_genome:
-        is_variable_abundance = False
-
-    errors = [0] * 1000
-
-    print 'making hashtable'
-    ht = khmer.new_counting_hash(K, HT_SIZE, N_HT)
-
-    save_pass2 = 0
-
-    pass2list = []
-    for filename in args.input_filenames:
-        pass2filename = os.path.basename(filename) + '.pass2'
-        trimfilename = os.path.basename(filename) + '.abundtrim'
-
-        pass2list.append((pass2filename, trimfilename))
-
-        pass2fp = open(pass2filename, 'w')
-        trimfp = open(trimfilename, 'w')
-
-        for n, read in enumerate(screed.open(filename)):
-            if n % 10000 == 0:
-                print '...', n, filename, save_pass2
-            seq = read.sequence.replace('N', 'A')
-            med, _, _ = ht.get_median_count(seq)
-
-            if med < NORMALIZE_LIMIT:
-                ht.consume(seq)
-                pass2fp.write('>%s\n%s\n' % (read.name, read.sequence))
-                save_pass2 += 1
-            else:
-                trim_seq, trim_at = ht.trim_on_abundance(seq, CUTOFF)
-                if trim_at < len(seq):
-                    errors[trim_at] += 1
-                if trim_at >= K:
-                    trimfp.write('>%s\n%s\n' % (read.name, trim_seq))
-
-        pass2fp.close()
-        trimfp.close()
-
-        print 'saved %d of %d to pass2fp' % (save_pass2, n,)
-
-    for pass2filename, trimfilename in pass2list:
-        for n, read in enumerate(screed.open(pass2filename)):
-            if n % 10000 == 0:
-                print '... x 2', n, filename
-
-            trimfp = open(trimfilename, 'a')
-
-            seq = read.sequence.replace('N', 'A')
-            med, _, _ = ht.get_median_count(seq)
-
-            if med >= NORMALIZE_LIMIT or not is_variable_abundance:
-                trim_seq, trim_at = ht.trim_on_abundance(seq, CUTOFF)
-                if trim_at < len(seq):
-                    errors[trim_at] += 1
-                if trim_at >= K:
-                    trimfp.write('>%s\n%s\n' % (read.name, trim_seq))
-            else:
-                trimfp.write('>%s\n%s\n' % (read.name, read.sequence))
-
-    os.unlink(pass2filename)
-
-    fp = open('err-profile.out', 'w')
-    for pos, count in enumerate(errors):
-        print >>fp, pos, count
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/ec.py b/sandbox/ec.py
deleted file mode 100755
index cf36e58..0000000
--- a/sandbox/ec.py
+++ /dev/null
@@ -1,60 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project at idyll.org
-#
-import screed
-import khmer
-import sys
-
-import math
-
-
-def main():
-    hash_filename = sys.argv[1]
-    input_filename = sys.argv[2]
-    output_filename = sys.argv[3]
-    max_error_region = int(sys.argv[4])
-
-    C = 20  # 20
-
-    corrected = 0
-    uncorrected = 0
-
-    outfp = open(output_filename, 'w')
-
-    ht = khmer.load_counting_hash(hash_filename)
-    aligner = khmer.new_readaligner(ht, 1, C, max_error_region)
-
-    K = ht.ksize()
-
-    for n, record in enumerate(screed.open(input_filename)):
-        if n % 1000 == 0:
-            print n
-
-        seq = record.sequence
-        seq_name = record.name
-
-        seq = seq.replace('N', 'A')
-
-        grXreAlign, reXgrAlign = aligner.align(seq)
-
-        if len(reXgrAlign) > 0:
-            graph_seq = grXreAlign.replace('-', '')
-            corrected += 1
-            outfp.write('>%s\n%s\n' % (seq_name, graph_seq))
-        else:
-            uncorrected += 1
-            outfp.write('>%s\n%s\n' % (seq_name, seq))
-
-
-    print 'corrected', corrected
-    print 'uncorrected', uncorrected
-
-    outfp.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/error-correct-pass2.py b/sandbox/error-correct-pass2.py
deleted file mode 100755
index 7b6d9e7..0000000
--- a/sandbox/error-correct-pass2.py
+++ /dev/null
@@ -1,85 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project at idyll.org
-#
-"""
-Error correct reads based on a counting hash from a diginorm step.
-Output sequences will be put in @@@.
-
-% python scripts/error-correct-pass2 <counting.kh> <data1> [ <data2> <...> ]
-
-Use '-h' for parameter help.
-"""
-import sys
-import screed.fasta
-import os
-import khmer
-from khmer.thread_utils import ThreadedSequenceProcessor, verbose_loader
-
-from khmer.khmer_args import build_counting_args
-
-###
-
-DEFAULT_COVERAGE = 20
-DEFAULT_MAX_ERROR_REGION = 40
-
-
-def main():
-    parser = build_counting_multifile_args()
-    parser.add_argument('--cutoff', '-C', dest='coverage',
-                        default=DEFAULT_COVERAGE, type=int,
-                        help="Diginorm coverage.")
-    parser.add_argument('--max-error-region', '-M', dest='max_error_region',
-                        default=DEFAULT_MAX_ERROR_REGION, type=int,
-                        help="Max length of error region allowed")
-    args = parser.parse_args()
-
-    counting_ht = args.input_table
-    infiles = args.input_filenames
-
-    print 'file with ht: %s' % counting_ht
-
-    print 'loading hashtable'
-    ht = khmer.load_counting_hash(counting_ht)
-    K = ht.ksize()
-    C = args.coverage
-    max_error_region = args.max_error_region
-
-    print "K:", K
-    print "C:", C
-    print "max error region:", max_error_region
-
-    # the filtering function.
-    def process_fn(record):
-        # read_aligner is probably not threadsafe?
-        aligner = khmer.new_readaligner(ht, 1, C, max_error_region)
-
-        name = record['name']
-        seq = record['sequence']
-
-        seq = seq.replace('N', 'A')
-
-        grXreAlign, reXgrAlign = aligner.align(seq)
-
-        if len(reXgrAlign) > 0:
-            graph_seq = grXreAlign.replace('-', '')
-            seq = graph_seq
-
-        return name, seq
-
-    # the filtering loop
-    for infile in infiles:
-        print 'filtering', infile
-        outfile = os.path.basename(infile) + '.corr'
-        outfp = open(outfile, 'w')
-
-        tsp = ThreadedSequenceProcessor(process_fn)
-        tsp.start(verbose_loader(infile), outfp)
-
-        print 'output in', outfile
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/extract-single-partition.py b/sandbox/extract-single-partition.py
index 7244959..0e890da 100755
--- a/sandbox/extract-single-partition.py
+++ b/sandbox/extract-single-partition.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import sys
diff --git a/sandbox/fasta-to-abundance-hist.py b/sandbox/fasta-to-abundance-hist.py
index 828871b..d101f16 100755
--- a/sandbox/fasta-to-abundance-hist.py
+++ b/sandbox/fasta-to-abundance-hist.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import sys
diff --git a/sandbox/filter-below-abund.py b/sandbox/filter-below-abund.py
index bed9dea..9b4ef76 100755
--- a/sandbox/filter-below-abund.py
+++ b/sandbox/filter-below-abund.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import sys
diff --git a/sandbox/filter-median-and-pct.py b/sandbox/filter-median-and-pct.py
index dd3fd3e..ed9e251 100755
--- a/sandbox/filter-median-and-pct.py
+++ b/sandbox/filter-median-and-pct.py
@@ -2,14 +2,14 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 """
 Accept or discard sequences XXX, based on the given counting
 hash table.  Output sequences will be placed in 'infile.medpctfilt'.
 
-% python sandbox/filter-median-and-pct.py <counting.kh> <data1> [ <data2> <...> ]
+% python sandbox/filter-median-and-pct.py <counting.ct> <data1> [ <data2> <...> ]
 
 Use '-h' for parameter help.
 """
diff --git a/sandbox/filter-median.py b/sandbox/filter-median.py
index ef40936..c1a126f 100755
--- a/sandbox/filter-median.py
+++ b/sandbox/filter-median.py
@@ -2,14 +2,14 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 """
 Accept or discard sequences XXX, based on the given counting
 hash table.  Output sequences will be placed in 'infile.medfilt'.
 
-% python sandbox/filter-median.py <counting.kh> <data1> [ <data2> <...> ]
+% python sandbox/filter-median.py <counting.ct> <data1> [ <data2> <...> ]
 
 Use '-h' for parameter help.
 """
diff --git a/sandbox/find-high-abund-kmers.py b/sandbox/find-high-abund-kmers.py
index 8b32e2f..687ab41 100755
--- a/sandbox/find-high-abund-kmers.py
+++ b/sandbox/find-high-abund-kmers.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 """
diff --git a/sandbox/find-unpart.py b/sandbox/find-unpart.py
deleted file mode 100755
index b7864df..0000000
--- a/sandbox/find-unpart.py
+++ /dev/null
@@ -1,53 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project at idyll.org
-#
-import khmer
-import sys
-import os
-import gc
-import glob
-
-TRAVERSE_ON_UNPART = True
-STOP_BIG_TRAVERSALS = True
-
-
-def main():
-    already_part = sys.argv[1]
-    new_to_part = sys.argv[2]
-    basename = os.path.basename(new_to_part)
-    pmap_filename = sys.argv[3]
-
-    # if not os.path.exists(already_part):
-    #    print '%s doesn\'t exist! dying.' % already_part
-    #    sys.exit(0)
-
-    # create a fake-ish ht; K matters, but not hashtable size.
-    ht = khmer.load_hashbits(already_part + '.ht')
-    ht.load_tagset(already_part + '.tagset')
-    ht.merge_subset_from_disk(pmap_filename)
-
-    # find singletons
-    n_singletons = ht.find_unpart(
-        new_to_part, TRAVERSE_ON_UNPART, STOP_BIG_TRAVERSALS)
-    print 'found:', n_singletons
-
-    print 'saving', basename + '.unpart'
-    n_partitions = ht.output_partitions(new_to_part, basename + '.unpart')
-    print 'saving', basename + '.pmap'
-    ht.save_partitionmap(basename + '.pmap')
-
-    ###
-
-    (n_partitions, n_singletons) = ht.count_partitions()
-
-    print 'output partitions:', n_partitions
-    print 'pmap partitions:', n_partitions
-    print 'singletons:', n_singletons
-
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/graph-size.py b/sandbox/graph-size.py
old mode 100644
new mode 100755
index 6249348..00891b5
--- a/sandbox/graph-size.py
+++ b/sandbox/graph-size.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import khmer
diff --git a/sandbox/hi-lo-abundance-by-position.py b/sandbox/hi-lo-abundance-by-position.py
index 985f90f..eca923f 100755
--- a/sandbox/hi-lo-abundance-by-position.py
+++ b/sandbox/hi-lo-abundance-by-position.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import sys
diff --git a/sandbox/make-coverage.py b/sandbox/make-coverage.py
new file mode 100755
index 0000000..4828539
--- /dev/null
+++ b/sandbox/make-coverage.py
@@ -0,0 +1,47 @@
+#! /usr/bin/env python2
+#
+# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.txt.
+# Contact: khmer-project at idyll.org
+#
+import screed
+
+import sys
+
+def main():
+    dbfile = sys.argv[1]
+    mapfile = sys.argv[2]
+
+    lengths = {}
+    for n, record in enumerate(screed.open(dbfile)):
+        if n % 100000 == 0:
+            print '...', n
+        lengths[record.name] = len(record.sequence)
+
+    sums = {}
+    for n, line in enumerate(open(mapfile)):
+        if n % 100000 == 0:
+            print '... 2x', n
+        x = line.split('\t')
+        name = x[2]
+        readlen = len(x[4])
+        sums[name] = sums.get(name, 0) + 1
+
+    mapped_reads = n
+
+    rpkms = {}
+    for k in sums:
+        rpkms[k] = sums[k] * (1000. / float(lengths[k])) * \
+            float(mapped_reads) / 1e6
+
+    outfp = open(dbfile + '.cov', 'w')
+    for n, record in enumerate(screed.open(dbfile)):
+        if n % 100000 == 0:
+            print '...', n
+
+        print >>outfp, ">%s[cov=%d]\n%s" % (
+            record.name, rpkms.get(record.name, 0), record.sequence)
+
+if __name__ == '__main__':
+        main()
diff --git a/sandbox/multi-rename.py b/sandbox/multi-rename.py
index 4216cab..4c03f08 100755
--- a/sandbox/multi-rename.py
+++ b/sandbox/multi-rename.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import screed
diff --git a/sandbox/normalize-by-align.py b/sandbox/normalize-by-align.py
deleted file mode 100755
index b0a4027..0000000
--- a/sandbox/normalize-by-align.py
+++ /dev/null
@@ -1,150 +0,0 @@
-#! /usr/bin/env python
-"""
-XXX
-
-Eliminate reads with minimum k-mer abundance higher than
-DESIRED_COVERAGE.  Output sequences will be placed in 'infile.keep'.
-
-% python scripts/normalize-by-min.py [ -C <cutoff> ] <data1> <data2> ...
-
-Use '-h' for parameter help.
-"""
-
-import sys
-import screed
-import os
-import khmer
-from khmer.khmer_args import build_counting_args
-
-DEFAULT_MINIMUM_COVERAGE = 5
-
-
-def main():
-    parser = build_counting_args()
-    parser.add_argument("-t", "--trusted-cutoff", dest="trusted_cutoff", type=int, default=3)
-    parser.add_argument("--bits-theta", help="Tuning parameter controlling trade off of speed vs alignment sensitivity", default=1.0, type=float, dest="bits_theta")
-    parser.add_argument('-C', '--cutoff', type=int, dest='cutoff',
-                        default=DEFAULT_MINIMUM_COVERAGE)
-    parser.add_argument('-s', '--savehash', dest='savehash', default='')
-    parser.add_argument('-l', '--loadhash', dest='loadhash',
-                        default='')
-    parser.add_argument('--details-out', dest="details_out")
-    parser.add_argument('input_filenames', nargs='+')
-
-    args = parser.parse_args()
-
-    if not args.quiet:
-        print >>sys.stderr, '\nPARAMETERS:'
-        print >>sys.stderr, ' - kmer size =    %d \t\t(-k)' % args.ksize
-        print >>sys.stderr, ' - n hashes =     %d \t\t(-N)' % args.n_tables
-        print >>sys.stderr, ' - min hashsize = %-5.2g \t(-x)' % \
-            args.min_tablesize
-        print >>sys.stderr, ''
-        print >>sys.stderr, 'Estimated memory usage is %.2g bytes ' \
-            '(n_hashes x min_hashsize)' % (
-            args.n_tables * args.min_tablesize)
-        print >>sys.stderr, '-' * 8
-
-    K = args.ksize
-    HT_SIZE = args.min_tablesize
-    N_HT = args.n_tables
-    DESIRED_COVERAGE = args.cutoff
-
-    filenames = args.input_filenames
-
-    if args.loadhash:
-        print 'loading hashtable from', args.loadhash
-        ht = khmer.load_counting_hash(args.loadhash)
-    else:
-        print 'making hashtable'
-        ht = khmer.new_counting_hash(K, HT_SIZE, N_HT)
-
-    aligner = khmer.new_readaligner(ht, args.trusted_cutoff, args.bits_theta)
-            
-    if args.details_out != None:
-        details_out = open(args.details_out, "w")
-    else:
-        details_out = None
-
-    total = 0
-    discarded = 0
-    for input_filename in filenames:
-        output_name = os.path.basename(input_filename) + '.keepalign'
-        outfp = open(output_name, 'w')
-
-        for n, record in enumerate(screed.open(input_filename)):
-            if n > 0 and n % 10000 == 0:
-                print '... kept', total - discarded, 'of', total, ', or', \
-                    int(100. - discarded / float(total) * 100.), '%'
-                print '... in file', input_filename
-
-            total += 1
-
-            if len(record.sequence) < K:
-                continue
-
-            seq = record.sequence.upper().replace('N', 'A')
-
-            ##
-            score, graph_alignment, read_alignment, truncated = aligner.align(record.sequence)
-
-            keep = False
-            if truncated:
-                keep = True
-            else:
-                if False:
-                    graph_seq = graph_alignment.replace("-", "")
-                else:
-                    graph_seq = ""
-                    for i in range(len(graph_alignment)):
-                        if graph_alignment[i] == "-":
-                            graph_seq += read_alignment[i]
-                        else:
-                            graph_seq += graph_alignment[i]
-
-                mincount = ht.get_min_count(graph_seq)
-                keep = True
-                seq = graph_seq
-
-                #if mincount < DESIRED_COVERAGE:
-                #    keep = True
-                #    seq = graph_seq
-                #else:
-                #    assert not keep
-
-            if details_out != None:
-                details_out.write("+{7}\t{0:0.2f}\t{3}\t{4}\nread:      {6}\ngraph_aln: {1}\nread_aln:  {2}\nstored_seq:{5}\n".format(score, graph_alignment, read_alignment, truncated, keep, seq, record.sequence, record.name))
-
-
-            if keep:
-                ht.consume(seq)
-                outfp.write('>%s\n%s\n' % (record.name, seq))
-            else:
-                discarded += 1
-
-        if total:
-            print 'DONE with', input_filename, '; kept', total - discarded, 'of',\
-            total, 'or', int(100. - discarded / float(total) * 100.), '%'
-        print 'output in', output_name
-
-    if args.savehash:
-        print 'Saving hashfile through', input_filename
-        print '...saving to', args.savehash
-        ht.save(args.savehash)
-
-    # Change 0.2 only if you really grok it.  HINT: You don't.
-    fp_rate = khmer.calc_expected_collisions(ht)
-    print 'fp rate estimated to be %1.3f' % fp_rate
-
-    if fp_rate > 0.20:
-        print >>sys.stderr, "**"
-        print >>sys.stderr, "** ERROR: the counting hash is too small for"
-        print >>sys.stderr, "** this data set.  Increase hashsize/num ht."
-        print >>sys.stderr, "**"
-        print >>sys.stderr, "** Do not use these results!!"
-        sys.exit(-1)
-
-if __name__ == '__main__':
-    main()
-
-# vim: set ft=python ts=4 sts=4 sw=4 et tw=79:
diff --git a/sandbox/normalize-by-median-pct.py b/sandbox/normalize-by-median-pct.py
index 26594b5..b23d6d8 100755
--- a/sandbox/normalize-by-median-pct.py
+++ b/sandbox/normalize-by-median-pct.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 """
@@ -139,10 +139,10 @@ def main():
             # Emit records if any passed
             if passed_length and passed_filter:
                 for record in batch:
-                    if hasattr(record, 'accuracy'):
+                    if hasattr(record, 'quality'):
                         outfp.write('@%s\n%s\n+\n%s\n' % (record.name,
                                                           record.sequence,
-                                                          record.accuracy))
+                                                          record.quality))
                     else:
                         outfp.write('>%s\n%s\n' %
                                     (record.name, record.sequence))
diff --git a/sandbox/print-stoptags.py b/sandbox/print-stoptags.py
index 483a489..6fc327c 100755
--- a/sandbox/print-stoptags.py
+++ b/sandbox/print-stoptags.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import khmer
diff --git a/sandbox/print-tagset.py b/sandbox/print-tagset.py
index 52a3c71..dbe9c77 100755
--- a/sandbox/print-tagset.py
+++ b/sandbox/print-tagset.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import khmer
diff --git a/sandbox/read_aligner.py b/sandbox/read_aligner.py
deleted file mode 100755
index c976651..0000000
--- a/sandbox/read_aligner.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#! /usr/bin/env python
-"""
-Error correct reads based on a counting hash from a diginorm step.
-Output sequences will be put in @@@.
-
-% python scripts/error-correct-pass2 <counting.kh> <data1> [ <data2> <...> ]
-
-Use '-h' for parameter help.
-"""
-import sys
-import screed
-import os
-import khmer
-from khmer.thread_utils import ThreadedSequenceProcessor, verbose_loader
-
-from khmer.khmer_args import build_counting_args
-from khmer.khmer_args import add_loadhash_args
-
-###
-
-DEFAULT_COVERAGE = 20
-DEFAULT_MAX_ERROR_REGION = 40
-
-
-def main():
-    parser = build_counting_args()
-    parser.add_argument("--trusted-cov", dest="trusted_cov", type=int, default=2)
-    parser.add_argument("--theta", type=float, default=1.0)
-    parser.add_argument("input_table")
-    parser.add_argument("input_filenames", nargs="+")
-    add_loadhash_args(parser)
-
-    args = parser.parse_args()
-
-    counting_ht = args.input_table
-    infiles = args.input_filenames
-
-    print >>sys.stderr, 'file with ht: %s' % counting_ht
-
-    print >>sys.stderr, 'loading hashtable'
-    ht = khmer.load_counting_hash(counting_ht)
-    K = ht.ksize()
-
-    aligner = khmer.new_readaligner(ht, args.trusted_cov, args.theta) # counting hash, trusted kmer coverage cutoff, bits theta (threshold value for terminating unproductive alignemnts)
-    
-    ### the filtering loop
-    for infile in infiles:
-        print >>sys.stderr, 'aligning', infile
-        for n, record in enumerate(screed.open(infile)):
-
-            name = record['name']
-            seq = record['sequence'].upper()
-            print >>sys.stderr, name
-            print >>sys.stderr, seq
-
-            score, graph_alignment, read_alignment, truncated = aligner.align(seq)
-            print >>sys.stderr, score
-            print >>sys.stderr, graph_alignment
-            print >>sys.stderr, read_alignment
-            print >>sys.stderr, truncated
-            print ">{0}\n{1}".format(name, graph_alignment)
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/readstats.py b/sandbox/readstats.py
deleted file mode 100755
index d37a630..0000000
--- a/sandbox/readstats.py
+++ /dev/null
@@ -1,51 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project at idyll.org
-#
-import sys
-import screed
-
-
-def main():
-    total_bp = 0
-    total_seqs = 0
-
-    output = []
-    for filename in sys.argv[1:]:
-        bp = 0
-        seqs = 0
-        for record in screed.open(filename):
-            if seqs % 100000 == 0:
-                print >>sys.stderr, '...', filename, seqs
-            bp += len(record.sequence)
-            seqs += 1
-
-        if seqs == 0:
-            print >>sys.stderr, 'No sequences found in %s' % filename
-        else:
-            avg_len = bp / float(seqs)
-            s = '%d bp / %d seqs; %.1f average length -- %s' % (bp,
-                                                                seqs,
-                                                                avg_len,
-                                                                filename)
-            print >>sys.stderr, '... found', s
-            output.append(s)
-
-            total_bp += bp
-            total_seqs += seqs
-
-    if total_seqs == 0:
-        print 'No sequences found in %d files' % len(sys.argv[1:])
-    else:
-        print '---------------'
-        print "\n".join(output)
-        print '---------------'
-        print '%d bp / %d seqs; %.1f average length -- total'.format(
-            total_bp, total_seqs, total_bp / float(total_seqs))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/renumber-partitions.py b/sandbox/renumber-partitions.py
index 0dde092..7a3df7b 100755
--- a/sandbox/renumber-partitions.py
+++ b/sandbox/renumber-partitions.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import sys
diff --git a/sandbox/saturate-by-median.py b/sandbox/saturate-by-median.py
index 9a603b9..8d5eba4 100755
--- a/sandbox/saturate-by-median.py
+++ b/sandbox/saturate-by-median.py
@@ -1,8 +1,8 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=invalid-name,missing-docstring
@@ -21,12 +21,10 @@ from itertools import izip
 from khmer.khmer_args import (build_counting_args, add_loadhash_args,
                               report_on_config, info)
 import argparse
-from khmer.file import (check_space, check_space_for_hashtable,
-                        check_valid_file_exists)
+from khmer.kfile import (check_space, check_space_for_hashtable,
+                         check_valid_file_exists)
 DEFAULT_DESIRED_COVERAGE = 1
 
-MAX_FALSE_POSITIVE_RATE = 0.8             # see Zhang et al.,
-# http://arxiv.org/abs/1309.2975
 
 # Iterate a collection in arbitrary batches
 # from: http://stackoverflow.com/questions/4628290/pairs-from-single-list
@@ -185,9 +183,9 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
     report_frequency = args.report_frequency
 
     check_valid_file_exists(args.input_filenames)
-    check_space(args.input_filenames)
+    check_space(args.input_filenames, False)
     if args.savetable:
-        check_space_for_hashtable(args.n_tables * args.min_tablesize)
+        check_space_for_hashtable(args.n_tables * args.min_tablesize, False)
 
     # list to save error files along with throwing exceptions
     if args.force:
@@ -237,7 +235,9 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
         print '...saving to', args.savetable
         htable.save(args.savetable)
 
-    fp_rate = khmer.calc_expected_collisions(htable)
+    # re: threshold, see Zhang et al.,
+    # http://arxiv.org/abs/1309.2975
+    fp_rate = khmer.calc_expected_collisions(htable, args.force, max_false_pos=.8)
     print 'fp rate estimated to be {fpr:1.3f}'.format(fpr=fp_rate)
 
     if args.force and len(corrupt_files) > 0:
@@ -245,15 +245,6 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
         print >> sys.stderr, "** IOErrors occurred in the following files:"
         print >> sys.stderr, "\t", " ".join(corrupt_files)
 
-    if fp_rate > MAX_FALSE_POSITIVE_RATE:
-        print >> sys.stderr, "**"
-        print >> sys.stderr, ("** ERROR: the k-mer counting table is too small"
-                              " for this data set.  Increase tablesize/# "
-                              "tables.")
-        print >> sys.stderr, "**"
-        print >> sys.stderr, "** Do not use these results!!"
-        sys.exit(1)
-
 if __name__ == '__main__':
     main()
 
diff --git a/sandbox/shuffle-fasta.py b/sandbox/shuffle-fasta.py
deleted file mode 100755
index 707e5fe..0000000
--- a/sandbox/shuffle-fasta.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project at idyll.org
-#
-# this only works for small files! it loads everything into mem.
-import sys
-from screed.fasta import fasta_iter
-import random
-
-
-def main():
-    d = dict([(r['name'], r['sequence']) for r in fasta_iter(open(sys.argv[1]))])
-
-    ks = d.keys()
-    random.shuffle(ks)
-
-    for k in ks:
-        s = d[k]
-
-        print '>%s\n%s' % (k, s)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/shuffle-reverse-rotary.py b/sandbox/shuffle-reverse-rotary.py
index a00637a..9a289bf 100755
--- a/sandbox/shuffle-reverse-rotary.py
+++ b/sandbox/shuffle-reverse-rotary.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import sys
diff --git a/sandbox/slice-reads-by-coverage.py b/sandbox/slice-reads-by-coverage.py
index 8c0a130..6b4896b 100755
--- a/sandbox/slice-reads-by-coverage.py
+++ b/sandbox/slice-reads-by-coverage.py
@@ -1,4 +1,9 @@
-#! /usr/bin/env python
+#! /usr/bin/env python2
+# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
+# Copyright (C) Michigan State University, 2014. It is licensed under
+# the three-clause BSD license; see LICENSE.
+# Contact: khmer-project at idyll.org
+
 import argparse
 import screed
 import sys
@@ -6,8 +11,8 @@ import khmer
 
 
 def output_single(read):
-    if hasattr(read, 'accuracy'):
-        return "@%s\n%s\n+\n%s\n" % (read.name, read.sequence, read.accuracy)
+    if hasattr(read, 'quality'):
+        return "@%s\n%s\n+\n%s\n" % (read.name, read.sequence, read.quality)
     else:
         return ">%s\n%s\n" % (read.name, read.sequence)
 
diff --git a/sandbox/split-fasta.py b/sandbox/split-fasta.py
index 7cc5a5f..0339693 100755
--- a/sandbox/split-fasta.py
+++ b/sandbox/split-fasta.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import sys
diff --git a/sandbox/split-sequences-by-length.py b/sandbox/split-sequences-by-length.py
index f6fd548..e83c0fe 100755
--- a/sandbox/split-sequences-by-length.py
+++ b/sandbox/split-sequences-by-length.py
@@ -2,14 +2,14 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 """
 Trim sequences at k-mers of the given abundance, based on the given counting
 hash table.  Output sequences will be placed in 'infile.abundfilt'.
 
-% python scripts/filter-abund.py <counting.kh> <data1> [ <data2> <...> ]
+% python scripts/filter-abund.py <counting.ct> <data1> [ <data2> <...> ]
 
 Use '-h' for parameter help.
 """
diff --git a/sandbox/stoptag-abundance-hist.py b/sandbox/stoptag-abundance-hist.py
index 3818ecf..b1c3d8a 100755
--- a/sandbox/stoptag-abundance-hist.py
+++ b/sandbox/stoptag-abundance-hist.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import sys
diff --git a/sandbox/stoptags-by-position.py b/sandbox/stoptags-by-position.py
index bc21bdd..653b441 100755
--- a/sandbox/stoptags-by-position.py
+++ b/sandbox/stoptags-by-position.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import khmer
diff --git a/sandbox/strip-partition.py b/sandbox/strip-partition.py
index 3dab22f..5dbd82d 100755
--- a/sandbox/strip-partition.py
+++ b/sandbox/strip-partition.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import screed
diff --git a/sandbox/subset-report.py b/sandbox/subset-report.py
index 4292fa7..dd580d8 100755
--- a/sandbox/subset-report.py
+++ b/sandbox/subset-report.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import khmer
diff --git a/sandbox/sweep-files.py b/sandbox/sweep-files.py
index 7d78691..74f0675 100755
--- a/sandbox/sweep-files.py
+++ b/sandbox/sweep-files.py
@@ -1,8 +1,8 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt. Contact: ctb at msu.edu
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE. Contact: ctb at msu.edu
 #
 # pylint: disable=invalid-name,missing-docstring,no-member
 
@@ -36,8 +36,6 @@ import os
 import time
 import khmer
 from khmer.khmer_args import (build_hashbits_args, report_on_config, info)
-from khmer.file import (check_file_status, check_valid_file_exists,
-                        check_space)
 
 DEFAULT_OUT_PREF = 'reads'
 DEFAULT_RANGE = -1
@@ -71,8 +69,8 @@ def get_parser():
 
 
 def output_single(r):
-    if hasattr(r, 'accuracy'):
-        return "@%s\n%s\n+\n%s\n" % (r.name, r.sequence, r.accuracy)
+    if hasattr(r, 'quality'):
+        return "@%s\n%s\n+\n%s\n" % (r.name, r.sequence, r.quality)
     else:
         return ">%s\n%s\n" % (r.name, r.sequence)
 
diff --git a/sandbox/sweep-out-reads-with-contigs.py b/sandbox/sweep-out-reads-with-contigs.py
index 99b1883..29b3e3b 100755
--- a/sandbox/sweep-out-reads-with-contigs.py
+++ b/sandbox/sweep-out-reads-with-contigs.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import sys
diff --git a/sandbox/sweep-reads.py b/sandbox/sweep-reads.py
index 81770fb..a344938 100755
--- a/sandbox/sweep-reads.py
+++ b/sandbox/sweep-reads.py
@@ -1,11 +1,13 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt. Contact: ctb at msu.edu
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE. Contact: ctb at msu.edu
 #
 # pylint: disable=invalid-name,missing-docstring,no-member
 
+from khmer import utils
+
 """
 Find all reads connected to the given contigs on a per-partition basis.
 
@@ -34,8 +36,10 @@ import os
 import time
 import khmer
 from khmer.khmer_args import (build_hashbits_args, report_on_config, info)
-from khmer.file import (check_file_status, check_valid_file_exists,
-                        check_space)
+from khmer.kfile import (check_input_files, check_valid_file_exists,
+                         check_space)
+
+from khmer.utils import write_record
 
 DEFAULT_NUM_BUFFERS = 50000
 DEFAULT_MAX_READS = 1000000
@@ -52,10 +56,10 @@ def fmt_fasta(name, seq, labels=[]):
         name=name, labels='\t'.join([str(l) for l in labels]), seq=seq)
 
 
-def fmt_fastq(name, seq, accuracy, labels=[]):
+def fmt_fastq(name, seq, quality, labels=[]):
     return '@{name}\t{labels}\n{seq}\n+\n{acc}\n'.format(
         name=name, labels='\t'.join([str(l) for l in labels]), seq=seq,
-        acc=accuracy)
+        acc=quality)
 
 
 class ReadBuffer(object):
@@ -181,7 +185,7 @@ def get_parser():
     labeling = parser.add_mutually_exclusive_group(required=True)
     labeling.add_argument('--label-by-pid', dest='label_by_pid',
                           action='store_true', help='separate reads by\
-                        referece partition id')
+                        reference partition id')
     labeling.add_argument('--label-by-seq', dest='label_by_seq',
                           action='store_true', help='separate reads by\
                         reference sequence')
@@ -225,7 +229,7 @@ def main():
     buf_size = args.buffer_size
     max_reads = args.max_reads
 
-    check_file_status(args.input_fastp, args.force)
+    check_input_files(args.input_fastp, args.force)
     check_valid_file_exists(args.input_files)
     all_input_files = [input_fastp]
     all_input_files.extend(args.input_files)
@@ -239,7 +243,7 @@ def main():
     del ix
 
     extension = 'fa'
-    if hasattr(record, 'accuracy'):      # fastq!
+    if hasattr(record, 'quality'):      # fastq!
         extension = 'fq'
 
     output_buffer = ReadBufferManager(
@@ -284,16 +288,8 @@ def main():
                     ht.consume_sequence_and_tag_with_labels(record.sequence,
                                                             label)
 
-                    if hasattr(record, 'accuracy'):
-                        outfp.write('@{name}\n{seq}+{accuracy}\n'.format(
-                            name=record.name,
-                            seq=record.sequence,
-                            accuracy=record.accuracy))
-                    else:
-                        outfp.write('>{name}\n{seq}\n'.format(
-                            name=record.name,
-                            seq=record.sequence))
-
+                    write_record(record, outfp)
+ 
             except IOError as e:
                 print >>sys.stderr, '!! ERROR !!', e
                 print >>sys.stderr, '...error splitting input. exiting...'
@@ -346,8 +342,8 @@ def main():
                 except ValueError as e:
                     pass
                 else:
-                    if hasattr(record, 'accuracy'):
-                        seq_str = fmt_fastq(name, seq, record.accuracy, labels)
+                    if hasattr(record, 'quality'):
+                        seq_str = fmt_fastq(name, seq, record.quality, labels)
                     else:
                         seq_str = fmt_fasta(name, seq, labels)
                     label_number_dist.append(len(labels))
diff --git a/sandbox/sweep-reads2.py b/sandbox/sweep-reads2.py
index 0597166..4ccea30 100755
--- a/sandbox/sweep-reads2.py
+++ b/sandbox/sweep-reads2.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 """
diff --git a/sandbox/sweep-reads3.py b/sandbox/sweep-reads3.py
index c56d640..9e66e1f 100755
--- a/sandbox/sweep-reads3.py
+++ b/sandbox/sweep-reads3.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 """
@@ -23,8 +23,8 @@ from khmer.khmer_args import (build_hashbits_args, DEFAULT_MIN_TABLESIZE)
 
 
 def output_single(r):
-    if hasattr(r, 'accuracy'):
-        return "@%s\n%s\n+\n%s\n" % (r.name, r.sequence, r.accuracy)
+    if hasattr(r, 'quality'):
+        return "@%s\n%s\n+\n%s\n" % (r.name, r.sequence, r.quality)
     else:
         return ">%s\n%s\n" % (r.name, r.sequence)
 
diff --git a/sandbox/to-casava-1.8-fastq.py b/sandbox/to-casava-1.8-fastq.py
deleted file mode 100755
index d7b0750..0000000
--- a/sandbox/to-casava-1.8-fastq.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project at idyll.org
-#
-
-
-import functools
-import re
-import argparse
-
-from khmer import ReadParser
-
-
-resub_read_1 = functools.partial(re.sub, r"^(.*)/1$", r"\1 1:N:0:NNNNN")
-resub_read_2 = functools.partial(re.sub, r"^(.*)/2$", r"\1 2:N:0:NNNNN")
-
-
-def setup_cl_parser():
-
-    parser = \
-        argparse.ArgumentParser(
-            description=
-            "Convert the older FASTQ format to the Casava >= 1.8 FASTQ format."
-        )
-    parser.add_argument("input_filename")
-    parser.add_argument("output_filename")
-
-    return parser
-
-
-def main():
-
-    cl_parser = setup_cl_parser()
-    cl_args = cl_parser.parse_args()
-
-    # Note: Only use 1 thread to ensure same ordering of reads.
-    rparser = ReadParser(cl_args.input_filename, 1)
-
-    with open(cl_args.output_filename, "w") as output_file:
-
-        for read in rparser:
-
-            new_name = resub_read_1(read.name)
-            new_name = resub_read_2(new_name)
-
-            output_file.write(
-                "@{name}\n{sequence}\n+\n{accuracy}\n".format(
-                    name=new_name,
-                    sequence=read.sequence,
-                    accuracy=read.accuracy,
-                )
-            )
-
-
-if "__main__" == __name__:
-    main()
-
-# vim: set ft=python ts=4 sts=4 sw=4 et tw=79:
diff --git a/sandbox/trim-low-abund.py b/sandbox/trim-low-abund.py
deleted file mode 100755
index f318a10..0000000
--- a/sandbox/trim-low-abund.py
+++ /dev/null
@@ -1,236 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project at idyll.org
-#
-"""
-Trim sequences at k-mers of the given abundance, using a streaming algorithm.
-Output sequences will be placed in 'infile.abundtrim'.
-
-% python sandbox/trim-low-abund.py [ <data1> [ <data2> [ ... ] ] ]
-
-Use -h for parameter help.
-
-TODO: paired support: paired reads should be kept together.
-TODO: load/save counting table.
-TODO: move output_single elsewhere
-"""
-import sys
-import screed
-import os
-import khmer
-import argparse
-import tempfile
-import shutil
-
-DEFAULT_NORMALIZE_LIMIT = 20
-DEFAULT_CUTOFF = 2
-
-DEFAULT_K = 32
-DEFAULT_N_HT = 4
-DEFAULT_MIN_HASHSIZE = 1e6
-
-# see Zhang et al., http://arxiv.org/abs/1309.2975
-MAX_FALSE_POSITIVE_RATE = 0.8
-
-
-def output_single(read, trim_at=None):
-    name = read.name
-    sequence = read.sequence
-
-    accuracy = None
-    if hasattr(read, 'accuracy'):
-        accuracy = read.accuracy
-
-    if trim_at is not None:
-        sequence = sequence[:trim_at]
-        if accuracy:
-            accuracy = accuracy[:trim_at]
-
-    if accuracy:
-        return "@%s\n%s\n+\n%s\n" % (name, sequence, accuracy)
-    else:
-        return ">%s\n%s\n" % (name, sequence)
-
-
-def main():
-    parser = argparse.ArgumentParser(description='XXX')
-
-    env_ksize = os.environ.get('KHMER_KSIZE', DEFAULT_K)
-    env_n_hashes = os.environ.get('KHMER_N_HASHES', DEFAULT_N_HT)
-    env_hashsize = os.environ.get('KHMER_MIN_HASHSIZE', DEFAULT_MIN_HASHSIZE)
-
-    parser.add_argument('--ksize', '-k', type=int, dest='ksize',
-                        default=env_ksize,
-                        help='k-mer size to use')
-    parser.add_argument('--n_hashes', '-N', type=int, dest='n_hashes',
-                        default=env_n_hashes,
-                        help='number of hash tables to use')
-    parser.add_argument('--hashsize', '-x', type=float, dest='min_hashsize',
-                        default=env_hashsize,
-                        help='lower bound on hashsize to use')
-
-    parser.add_argument('--cutoff', '-C', type=int, dest='abund_cutoff',
-                        help='remove k-mers below this abundance',
-                        default=DEFAULT_CUTOFF)
-
-    parser.add_argument('--normalize-to', '-Z', type=int, dest='normalize_to',
-                        help='base cutoff on median k-mer abundance of this',
-                        default=DEFAULT_NORMALIZE_LIMIT)
-
-    parser.add_argument('--variable-coverage', '-V', action='store_true',
-                        dest='variable_coverage', default=False,
-                        help='Only trim low-abundance k-mers from sequences '
-                        'that have high coverage.')
-    parser.add_argument('--tempdir', '-T', type=str, dest='tempdir',
-                        default='./')
-
-    parser.add_argument('input_filenames', nargs='+')
-    args = parser.parse_args()
-
-    ###
-
-    if len(set(args.input_filenames)) != len(args.input_filenames):
-        print >>sys.stderr, \
-            "Error: Cannot input the same filename multiple times."
-        sys.exit(1)
-
-    ###
-
-    K = args.ksize
-    HT_SIZE = args.min_hashsize
-    N_HT = args.n_hashes
-
-    CUTOFF = args.abund_cutoff
-    NORMALIZE_LIMIT = args.normalize_to
-
-    print 'making hashtable'
-    ht = khmer.new_counting_hash(K, HT_SIZE, N_HT)
-
-    tempdir = tempfile.mkdtemp('khmer', 'tmp', args.tempdir)
-    print 'created temporary directory %s; use -T to change location' % tempdir
-
-    ###
-
-    save_pass2_total = 0
-
-    read_bp = 0
-    read_reads = 0
-    wrote_bp = 0
-    wrote_reads = 0
-    trimmed_reads = 0
-
-    pass2list = []
-    for filename in args.input_filenames:
-        pass2filename = os.path.basename(filename) + '.pass2'
-        pass2filename = os.path.join(tempdir, pass2filename)
-        trimfilename = os.path.basename(filename) + '.abundtrim'
-
-        pass2list.append((filename, pass2filename, trimfilename))
-
-        pass2fp = open(pass2filename, 'w')
-        trimfp = open(trimfilename, 'w')
-
-        save_pass2 = 0
-        for n, read in enumerate(screed.open(filename)):
-            if n % 10000 == 0:
-                print '...', n, filename, save_pass2, read_reads, read_bp, \
-                    wrote_reads, wrote_bp
-
-            read_reads += 1
-            read_bp += len(read.sequence)
-
-            seq = read.sequence.replace('N', 'A')
-            med, _, _ = ht.get_median_count(seq)
-
-            # has this portion of the graph saturated? if not,
-            # consume & save => pass2.
-            if med < NORMALIZE_LIMIT:
-                ht.consume(seq)
-                pass2fp.write(output_single(read))
-                save_pass2 += 1
-            else:                       # trim!!
-                trim_seq, trim_at = ht.trim_on_abundance(seq, CUTOFF)
-                if trim_at >= K:
-                    trimfp.write(output_single(read, trim_at))
-                    wrote_reads += 1
-                    wrote_bp += trim_at
-                    if trim_at != len(read.sequence):
-                        trimmed_reads += 1
-
-        pass2fp.close()
-        trimfp.close()
-
-        print '%s: kept aside %d of %d from first pass, in %s' % \
-              (filename, save_pass2, n, filename)
-        save_pass2_total += save_pass2
-
-    skipped_n = 0
-    skipped_bp = 0
-    for orig_filename, pass2filename, trimfilename in pass2list:
-        print 'second pass: looking at sequences kept aside in %s' % \
-              pass2filename
-        for n, read in enumerate(screed.open(pass2filename)):
-            if n % 10000 == 0:
-                print '... x 2', n, pass2filename, read_reads, read_bp, \
-                      wrote_reads, wrote_bp
-
-            trimfp = open(trimfilename, 'a')
-
-            seq = read.sequence.replace('N', 'A')
-            med, _, _ = ht.get_median_count(seq)
-
-            # do we retain low-abundance components unchanged?
-            if med < NORMALIZE_LIMIT and args.variable_coverage:
-                trimfp.write(output_single(read))
-                wrote_reads += 1
-                wrote_bp += len(read.sequence)
-                skipped_n += 1
-                skipped_bp += len(read.sequence)
-
-            # otherwise, examine/trim/truncate.
-            else:    # med >= NORMALIZE LIMIT or not args.variable_coverage
-                trim_seq, trim_at = ht.trim_on_abundance(seq, CUTOFF)
-                if trim_at >= K:
-                    trimfp.write(output_single(read, trim_at))
-                    wrote_reads += 1
-                    wrote_bp += trim_at
-                    if trim_at != len(read.sequence):
-                        trimmed_reads += 1
-
-        print 'removing %s' % pass2filename
-        os.unlink(pass2filename)
-
-    print 'removing temp directory & contents (%s)' % tempdir
-    shutil.rmtree(tempdir)
-
-    print 'read %d reads, %d bp' % (read_reads, read_bp,)
-    print 'wrote %d reads, %d bp' % (wrote_reads, wrote_bp,)
-    print 'removed %d reads and trimmed %d reads' % (read_reads - wrote_reads,
-                                                     trimmed_reads,)
-    print 'looked at %d reads twice' % (save_pass2_total,)
-    print 'trimmed or removed %.2f%% of bases (%d total)' % \
-        ((1 - (wrote_bp / float(read_bp))) * 100., read_bp - wrote_bp)
-    if args.variable_coverage:
-        print 'skipped %d reads/%d bases because of low coverage' % \
-              (skipped_n, skipped_bp)
-        print 'output in *.abundtrim'
-
-    fp_rate = khmer.calc_expected_collisions(ht)
-    print >>sys.stderr, \
-        'fp rate estimated to be {fpr:1.3f}'.format(fpr=fp_rate)
-
-    if fp_rate > MAX_FALSE_POSITIVE_RATE:
-        print >> sys.stderr, "**"
-        print >> sys.stderr, ("** ERROR: the k-mer counting table is too small"
-                              " for this data set. Increase tablesize/# "
-                              "tables.")
-        print >> sys.stderr, "**"
-        print >> sys.stderr, "** Do not use these results!!"
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/uniqify-sequences.py b/sandbox/uniqify-sequences.py
deleted file mode 100755
index c909008..0000000
--- a/sandbox/uniqify-sequences.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project at idyll.org
-#
-import khmer
-import sys
-import screed
-
-K = 20
-HASHTABLE_SIZE = int(2.5e8)
-N_HT = 4
-
-UNIQUE_LEN = 100
-UNIQUE_F = 0.9
-
-OUTPUT_WINDOW = 100
-OUTPUT_OVERLAP = 10
-
-def main():
-
-    kh = khmer.new_hashbits(K, HASHTABLE_SIZE, N_HT)
-
-    discarded = 0
-    kept_kmers = 0
-    total_kmers = 0
-
-    total_out = 0
-    for filename in sys.argv[1:]:
-        n_out = 0
-        for n, record in enumerate(screed.open(filename)):
-            if n > 0 and n % 10000 == 0:
-                print >>sys.stderr, '...', n, discarded
-                print >>sys.stderr, '==>', total_kmers, kept_kmers, int(
-                    float(kept_kmers) / float(total_kmers) * 100.)
-            seq = record.sequence
-            seq = seq.replace('N', 'G')
-
-            paths = kh.extract_unique_paths(seq, UNIQUE_LEN, UNIQUE_F)
-
-            kh.consume(seq)
-            total_kmers += len(seq) - K + 1
-
-            if not len(paths):
-                discarded += 1
-                continue
-
-            for i, path in enumerate(paths):
-                n_out += 1
-
-                if len(path) < OUTPUT_WINDOW:
-                    total_out += 1
-                    print '>%d\n%s' % (total_out, path)
-                    continue
-
-                for start in range(0, len(path) - OUTPUT_WINDOW + 1,
-                                   OUTPUT_OVERLAP):
-                    total_out += 1
-                    subpath = path[start:start + OUTPUT_WINDOW]
-                    print '>%d\n%s' % (total_out, subpath)
-
-        print >>sys.stderr, '%d for %s' % (n_out, filename)
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/unique-kmers.py b/sandbox/unique-kmers.py
new file mode 100755
index 0000000..6aa5b6a
--- /dev/null
+++ b/sandbox/unique-kmers.py
@@ -0,0 +1,96 @@
+#! /usr/bin/env python2
+#
+# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
+# Contact: khmer-project at idyll.org
+#
+# pylint: disable=invalid-name,missing-docstring
+"""
+Estimate number of unique k-mers, with precision <= ERROR_RATE.
+
+% python sandbox/unique-kmers.py [ -k <k size> ] [ -e <ERROR_RATE> ] <data1> <data2> ...
+
+Use '-h' for parameter help.
+"""
+
+
+import argparse
+import os
+import sys
+import textwrap
+
+import khmer
+from khmer.khmer_args import DEFAULT_K, info, ComboFormatter
+from khmer import __version__
+
+
+def get_parser():
+    descr = "Estimate number of unique k-mers, with precision <= ERROR_RATE."
+    epilog = ("""
+    A HyperLogLog counter is used to do cardinality estimation. Since this counter
+    is based on a tradeoff between precision and memory consumption,
+    :option:`-e`/:option:`--error-rate` can be used to control how much
+    memory will be used. In practice the memory footprint is small even
+    at low error rates (< 0.01).
+
+    :option:`-k`/:option:`--ksize` should be set to the desired k-mer size.
+
+    Output is sent to STDOUT, but a report file can be generated with
+    :option:`-R`/:option:`--report`.
+
+    Example::
+
+        unique-kmers.py -k 17 tests/test-data/test-abund-read{,-2,-3}.fa
+
+    Example::
+
+""" "        unique-kmers.py -R unique_count -k 30 tests/test-data/test-abund-read-paired.fa")  # noqa
+    parser = argparse.ArgumentParser(
+        description=descr, epilog=textwrap.dedent(epilog),
+        formatter_class=ComboFormatter)
+
+    env_ksize = os.environ.get('KHMER_KSIZE', DEFAULT_K)
+
+    parser.add_argument('--version', action='version',
+                        version='khmer {v}'.format(v=__version__))
+    parser.add_argument('-q', '--quiet', dest='quiet', default=False,
+                        action='store_true')
+
+    parser.add_argument('--ksize', '-k', type=int, default=env_ksize,
+                        help='k-mer size to use')
+
+    parser.add_argument('--error-rate', '-e', type=float, default=0.01,
+                        help='Acceptable error rate')
+
+    parser.add_argument('-R', '--report',
+                        metavar='filename', type=argparse.FileType('w'))
+
+    parser.add_argument('input_filenames', metavar='input_sequence_filename',
+                        help='Input FAST[AQ] sequence filename.', nargs='+')
+
+
+    return parser
+
+
+def main():
+    info('unique-kmers.py', ['SeqAn', 'hll'])
+    args = get_parser().parse_args()
+
+    hllcpp = khmer.HLLCounter(args.error_rate, args.ksize)
+
+    report_fp = args.report
+    input_filename = None
+    for index, input_filename in enumerate(args.input_filenames):
+        hllcpp.consume_fasta(input_filename)
+
+    cardinality = hllcpp.estimate_cardinality()
+    print >> sys.stdout, 'Estimated number of unique k-mers: {0}'.format(
+        cardinality)
+
+    if report_fp:
+        print >> report_fp, cardinality
+        report_fp.flush()
+
+if __name__ == "__main__":
+    main()
diff --git a/sandbox/write-interleave.py b/sandbox/write-interleave.py
deleted file mode 100755
index 54e7e78..0000000
--- a/sandbox/write-interleave.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project at idyll.org
-#
-import sys
-import screed
-import os.path
-
-def main():
-    for filename in sys.argv[1:]:
-        assert 'R1' in filename
-        filename2 = filename.replace('R1', 'R2')
-
-        r1 = iter(screed.open(filename)).next()
-        r2 = iter(screed.open(filename2)).next()
-
-        assert r1.name == r2.name, (r1.name, r2.name)
-
-        final = filename.replace('R1', '')
-        print 'python /root/khmer/sandbox/interleave.py %s %s | gzip -9c > %s' % (
-            filename, filename2, final)
-
-if __name__ == '__main__':
-    main()
-
-# vim: set ft=python ts=4 sts=4 sw=4 et tw=79:
diff --git a/sandbox/write-trimmomatic.py b/sandbox/write-trimmomatic.py
index fc1fa0e..49c9638 100755
--- a/sandbox/write-trimmomatic.py
+++ b/sandbox/write-trimmomatic.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import glob
diff --git a/scripts/abundance-dist-single.py b/scripts/abundance-dist-single.py
index db56ca3..f719a5a 100755
--- a/scripts/abundance-dist-single.py
+++ b/scripts/abundance-dist-single.py
@@ -1,28 +1,30 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2010-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2010-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=invalid-name,missing-docstring
 """
-Produce the k-mer abundance distribution for the given file, without
-loading a prebuilt k-mer counting table.
+Produce the k-mer abundance distribution for the given file.
 
 % python scripts/abundance-dist-single.py <data> <histout>
 
+The script does not load a prebuilt k-mer counting table.
+
 Use '-h' for parameter help.
 """
 import os
 import sys
+import csv
 import khmer
 import threading
 import textwrap
 from khmer.khmer_args import (build_counting_args, add_threading_args,
                               report_on_config, info)
-from khmer.file import (check_file_status, check_space,
-                        check_space_for_hashtable)
+from khmer.kfile import (check_input_files, check_space,
+                         check_space_for_hashtable)
 
 
 def get_parser():
@@ -55,6 +57,9 @@ def get_parser():
     parser.add_argument('-s', '--squash', dest='squash_output', default=False,
                         action='store_true',
                         help='Overwrite output file if it exists')
+    parser.add_argument('--csv', default=False, action='store_true',
+                        help='Use the CSV format for the histogram. '
+                        'Includes column headers.')
     parser.add_argument('--savetable', default='', metavar="filename",
                         help="Save the k-mer counting table to the specified "
                         "filename.")
@@ -70,7 +75,7 @@ def main():  # pylint: disable=too-many-locals,too-many-branches
     args = get_parser().parse_args()
     report_on_config(args)
 
-    check_file_status(args.input_sequence_filename, args.force)
+    check_input_files(args.input_sequence_filename, args.force)
     check_space([args.input_sequence_filename], args.force)
     if args.savetable:
         check_space_for_hashtable(args.n_tables * args.min_tablesize,
@@ -83,6 +88,11 @@ def main():  # pylint: disable=too-many-locals,too-many-branches
         sys.exit(1)
     else:
         hist_fp = open(args.output_histogram_filename, 'w')
+        if args.csv:
+            hist_fp_csv = csv.writer(hist_fp)
+            # write headers:
+            hist_fp_csv.writerow(['abundance', 'count', 'cumulative',
+                                  'cumulative_fraction'])
 
     print >>sys.stderr, 'making k-mer counting table'
     counting_hash = khmer.new_counting_hash(args.ksize, args.min_tablesize,
@@ -167,7 +177,10 @@ def main():  # pylint: disable=too-many-locals,too-many-branches
         sofar += i
         frac = sofar / float(total)
 
-        print >> hist_fp, _, i, sofar, round(frac, 3)
+        if args.csv:
+            hist_fp_csv.writerow([_, i, sofar, round(frac, 3)])
+        else:
+            print >> hist_fp, _, i, sofar, round(frac, 3)
 
         if sofar == total:
             break
diff --git a/scripts/abundance-dist.py b/scripts/abundance-dist.py
index 36edf17..966bfe8 100755
--- a/scripts/abundance-dist.py
+++ b/scripts/abundance-dist.py
@@ -1,8 +1,8 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2010-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2010-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=missing-docstring,invalid-name
@@ -16,11 +16,13 @@ Use '-h' for parameter help.
 from __future__ import print_function
 
 import sys
+import csv
 import khmer
 import argparse
 import os
-from khmer.file import check_file_status, check_space
+from khmer.kfile import check_input_files
 from khmer.khmer_args import info
+from khmer.utils import write_record
 
 
 def get_parser():
@@ -41,11 +43,15 @@ def get_parser():
                         help='Do not output 0-count bins')
     parser.add_argument('-s', '--squash', dest='squash_output', default=False,
                         action='store_true',
-                        help='Overwrite output file if it exists')
-    parser.add_argument('--version', action='version', version='%(prog)s '
-                        + khmer.__version__)
+                        help='Overwrite existing output_histogram_filename')
+    parser.add_argument('--csv', default=False, action='store_true',
+                        help='Use the CSV format for the histogram. '
+                        'Includes column headers.')
+    parser.add_argument('--version', action='version', version='%(prog)s ' +
+                        khmer.__version__)
     parser.add_argument('-f', '--force', default=False, action='store_true',
-                        help='Overwrite output file if it exists')
+                        help='Continue even if specified input files '
+                        'do not exist or are empty.')
     return parser
 
 
@@ -55,7 +61,7 @@ def main():
     infiles = [args.input_counting_table_filename,
                args.input_sequence_filename]
     for infile in infiles:
-        check_file_status(infile, args.force)
+        check_input_files(infile, args.force)
 
     print ('hashtable from', args.input_counting_table_filename,
            file=sys.stderr)
@@ -64,7 +70,7 @@ def main():
 
     kmer_size = counting_hash.ksize()
     hashsizes = counting_hash.hashsizes()
-    tracking = khmer._new_hashbits(  # pylint: disable=protected-access
+    tracking = khmer._Hashbits(  # pylint: disable=protected-access
         kmer_size, hashsizes)
 
     print ('K:', kmer_size, file=sys.stderr)
@@ -92,7 +98,13 @@ def main():
         print("\tPlease verify that the input files are valid.",
               file=sys.stderr)
         sys.exit(1)
+
     hash_fp = open(args.output_histogram_filename, 'w')
+    if args.csv:
+        hash_fp_csv = csv.writer(hash_fp)
+        # write headers:
+        hash_fp_csv.writerow(['abundance', 'count', 'cumulative',
+                              'cumulative_fraction'])
 
     sofar = 0
     for _, i in enumerate(abundances):
@@ -102,7 +114,10 @@ def main():
         sofar += i
         frac = sofar / float(total)
 
-        print(_, i, sofar, round(frac, 3), file=hash_fp)
+        if args.csv:
+            hash_fp_csv.writerow([_, i, sofar, round(frac, 3)])
+        else:
+            print(_, i, sofar, round(frac, 3), file=hash_fp)
 
         if sofar == total:
             break
diff --git a/scripts/annotate-partitions.py b/scripts/annotate-partitions.py
index 91faa24..aed932e 100755
--- a/scripts/annotate-partitions.py
+++ b/scripts/annotate-partitions.py
@@ -1,8 +1,8 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=invalid-name,missing-docstring
@@ -21,7 +21,7 @@ import argparse
 import textwrap
 import khmer
 import sys
-from khmer.file import check_file_status, check_space
+from khmer.kfile import check_input_files, check_space
 from khmer.khmer_args import info
 
 DEFAULT_K = 32
@@ -53,8 +53,8 @@ def get_parser():
     parser.add_argument('input_filenames', metavar='input_sequence_filename',
                         nargs='+', help='input FAST[AQ] sequences to '
                         'annotate.')
-    parser.add_argument('--version', action='version', version='%(prog)s '
-                        + khmer.__version__)
+    parser.add_argument('--version', action='version', version='%(prog)s ' +
+                        khmer.__version__)
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Overwrite output file if it exists')
     return parser
@@ -70,9 +70,9 @@ def main():
 
     partitionmap_file = args.graphbase + '.pmap.merged'
 
-    check_file_status(partitionmap_file, args.force)
+    check_input_files(partitionmap_file, args.force)
     for _ in filenames:
-        check_file_status(_, args.force)
+        check_input_files(_, args.force)
 
     check_space(filenames, args.force)
 
diff --git a/scripts/count-median.py b/scripts/count-median.py
index c912eba..58b36a6 100755
--- a/scripts/count-median.py
+++ b/scripts/count-median.py
@@ -1,15 +1,17 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=missing-docstring,invalid-name
 """
-Count the median/avg k-mer abundance for each sequence in the input file,
-based on the k-mer counts in the given k-mer counting table.  Can be used to
-estimate expression levels (mRNAseq) or coverage (genomic/metagenomic).
+Count the median/avg k-mer abundance for each sequence in the input file.
+
+The abundance is based on the k-mer counts in the given k-mer counting
+table.  Can be used to estimate expression levels (mRNAseq) or coverage
+(genomic/metagenomic).
 
 % scripts/count-median.py <htname> <input seqs> <output counts>
 
@@ -21,12 +23,14 @@ NOTE: All 'N's in the input sequences are converted to 'G's.
 """
 import screed
 import argparse
-import khmer
 import sys
-from khmer.file import check_file_status, check_space
-from khmer.khmer_args import info
+import csv
 import textwrap
 
+import khmer
+from khmer.kfile import check_input_files, check_space
+from khmer.khmer_args import info
+
 
 def get_parser():
     epilog = """
@@ -34,8 +38,16 @@ def get_parser():
     based on the k-mer counts in the given k-mer counting table.  Can be used
     to estimate expression levels (mRNAseq) or coverage (genomic/metagenomic).
 
-    The output file contains sequence id, median, average, stddev, and seq
-    length.
+    The output file contains sequence id, median, average, stddev, and
+    seq length; fields are separated by spaces. For khmer 1.x
+    count-median.py will split sequence names at the first space which
+    means that some sequence formats (e.g. paired FASTQ in Casava 1.8
+    format) will yield uninformative names.  Use :option:`--csv` to
+    fix this behavior.
+
+    Example::
+
+       count-median.py counts.ct tests/test-data/test-reads.fq.gz medians.txt
 
     NOTE: All 'N's in the input sequences are converted to 'G's.
     """
@@ -49,10 +61,13 @@ def get_parser():
                         help='input FAST[AQ] sequence filename')
     parser.add_argument('output', metavar='output_summary_filename',
                         help='output summary filename')
-    parser.add_argument('--version', action='version', version='%(prog)s '
-                        + khmer.__version__)
+    parser.add_argument('--version', action='version', version='%(prog)s ' +
+                        khmer.__version__)
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Overwrite output file if it exists')
+    parser.add_argument('--csv', default=False, action='store_true',
+                        help="Use the CSV format for the histogram."
+                        "Includes column headers.")
     return parser
 
 
@@ -66,7 +81,7 @@ def main():
 
     infiles = [htfile, input_filename]
     for infile in infiles:
-        check_file_status(infile, args.force)
+        check_input_files(infile, args.force)
 
     check_space(infiles, args.force)
 
@@ -77,14 +92,27 @@ def main():
     print >>sys.stderr, 'writing to', output_filename
     output = open(output_filename, 'w')
 
-    for record in screed.open(input_filename):
+    if args.csv:
+        output = csv.writer(output)
+        # write headers:
+        output.writerow(['name', 'median', 'average', 'stddev', 'seqlen'])
+
+    parse_description = True            # @legacy behavior: split seq headers
+    if args.csv:
+        parse_description = False       # only enable if we're doing csv out
+
+    for record in screed.open(input_filename,
+                              parse_description=parse_description):
         seq = record.sequence.upper()
         if 'N' in seq:
             seq = seq.replace('N', 'G')
 
         if ksize <= len(seq):
             medn, ave, stdev = htable.get_median_count(seq)
-            print >> output, record.name, medn, ave, stdev, len(seq)
+            if args.csv:
+                output.writerow([record.name, medn, ave, stdev, len(seq)])
+            else:
+                print >> output, record.name, medn, ave, stdev, len(seq)
 
 if __name__ == '__main__':
     main()
diff --git a/scripts/count-overlap.py b/scripts/count-overlap.py
index aabb895..a5d7f95 100755
--- a/scripts/count-overlap.py
+++ b/scripts/count-overlap.py
@@ -1,14 +1,15 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2012-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2012-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=missing-docstring,invalid-name
 """
-Count the overlap k-mers, which are the k-mers appearing in two sequence
-datasets.
+Count the overlap k-mers.
+
+Overlap k-mers are those appearing in two sequence datasets.
 
 usage: count-overlap_cpp.py [-h] [-q] [--ksize KSIZE] [--n_tables N_HASHES]
         [--tablesize HASHSIZE]
@@ -16,12 +17,12 @@ usage: count-overlap_cpp.py [-h] [-q] [--ksize KSIZE] [--n_tables N_HASHES]
         result
 
 Use '-h' for parameter help.
-
 """
 import sys
+import csv
 import khmer
 import textwrap
-from khmer.file import check_file_status, check_space
+from khmer.kfile import check_input_files, check_space
 from khmer.khmer_args import (build_hashbits_args, report_on_config, info)
 
 DEFAULT_K = 32
@@ -44,6 +45,10 @@ def get_parser():
                         help="input sequence filename")
     parser.add_argument('report_filename', metavar='output_report_filename',
                         help='output report filename')
+    parser.add_argument('--csv', default=False, action='store_true',
+                        help='Use the CSV format for the curve output '
+                        'in ${output_report_filename}.curve, '
+                        'including column headers.')
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Overwrite output file if it exists')
     return parser
@@ -55,7 +60,7 @@ def main():
     report_on_config(args, hashtype='hashbits')
 
     for infile in [args.ptfile, args.fafile]:
-        check_file_status(infile, args.force)
+        check_input_files(infile, args.force)
 
     check_space([args.ptfile, args.fafile], args.force)
 
@@ -65,6 +70,10 @@ def main():
 
     output = open(args.report_filename, 'w')
     f_curve_obj = open(args.report_filename + '.curve', 'w')
+    if args.csv:
+        f_curve_obj_csv = csv.writer(f_curve_obj)
+        # write headers:
+        f_curve_obj_csv.writerow(['input_seq', 'overlap_kmer'])
 
     ht2 = khmer.new_hashbits(kmer_size, args.min_tablesize, args.n_tables)
 
@@ -81,8 +90,10 @@ dataset2: %s
     output.write(printout1)
 
     for i in range(100):
-        to_print = str(list_curve[100 + i]) + ' ' + str(list_curve[i]) + '\n'
-        f_curve_obj.write(to_print)
+        if args.csv:
+            f_curve_obj_csv.writerow([list_curve[100 + i], list_curve[i]])
+        else:
+            print >> f_curve_obj, list_curve[100 + i], list_curve[i]
 
     print >> sys.stderr, 'wrote to: ' + args.report_filename
 
diff --git a/scripts/do-partition.py b/scripts/do-partition.py
index 9c49971..0fcf64f 100755
--- a/scripts/do-partition.py
+++ b/scripts/do-partition.py
@@ -1,8 +1,8 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=missing-docstring,invalid-name
@@ -25,15 +25,15 @@ import textwrap
 from khmer.khmer_args import (build_hashbits_args, report_on_config, info,
                               add_threading_args)
 import glob
-from khmer.file import check_file_status, check_space
+from khmer.kfile import check_input_files, check_space
+import re
+import platform
 
 DEFAULT_SUBSET_SIZE = int(1e5)
 DEFAULT_N_THREADS = 4
 DEFAULT_K = 32
 
 # Debugging Support
-import re
-import platform
 if "Linux" == platform.system():
     def __debug_vm_usage(msg):
         print >>sys.stderr, "===> DEBUG: " + msg
@@ -110,7 +110,7 @@ def main():  # pylint: disable=too-many-locals,too-many-statements
     report_on_config(args, hashtype='hashbits')
 
     for infile in args.input_filenames:
-        check_file_status(infile, args.force)
+        check_input_files(infile, args.force)
 
     check_space(args.input_filenames, args.force)
 
@@ -131,16 +131,10 @@ def main():  # pylint: disable=too-many-locals,too-many-statements
         print >>sys.stderr, 'consuming input', filename
         htable.consume_fasta_and_tag(filename)
 
-    fp_rate = khmer.calc_expected_collisions(htable)
+    # 0.18 is ACTUAL MAX. Do not change.
+    fp_rate = \
+        khmer.calc_expected_collisions(htable, args.force, max_false_pos=.15)
     print >>sys.stderr, 'fp rate estimated to be %1.3f' % fp_rate
-    if fp_rate > 0.15:          # 0.18 is ACTUAL MAX. Do not change.
-        print >> sys.stderr, "**"
-        print >> sys.stderr, ("** ERROR: the graph structure is too small for"
-                              " this data set.  Increase k-mer presence table "
-                              "size/num of tables.")
-        print >> sys.stderr, "**"
-        if not args.force:
-            sys.exit(1)
 
     # partition-graph
 
diff --git a/scripts/extract-long-sequences.py b/scripts/extract-long-sequences.py
index 6dd3f17..dcaf481 100755
--- a/scripts/extract-long-sequences.py
+++ b/scripts/extract-long-sequences.py
@@ -1,13 +1,15 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=invalid-name,missing-docstring
 
 """
+Extract long sequences.
+
 Write out lines of FASTQ and FASTA files that exceed an argument-specified
 length.
 
@@ -19,6 +21,7 @@ Use '-h' for parameter help.
 import argparse
 import screed
 import sys
+from khmer.utils import write_record
 
 
 def get_parser():
@@ -41,22 +44,9 @@ def main():
     args = get_parser().parse_args()
     outfp = open(args.output, 'w')
     for filename in args.input_filenames:
-        for record in screed.open(filename):
+        for record in screed.open(filename, parse_description=False):
             if len(record['sequence']) >= args.length:
-                # FASTQ
-                if hasattr(record, 'accuracy'):
-                    outfp.write(
-                        '@{name}\n{seq}\n'
-                        '+\n{acc}\n'.format(name=record.name,
-                                            seq=record.sequence,
-                                            acc=record.accuracy))
-
-                # FASTA
-                else:
-                    outfp.write(
-                        '>{name}\n{seq}\n'.format(name=record.name,
-                                                  seq=record.sequence))
-
+                write_record(record, outfp)
     print >> sys.stderr, 'wrote to: ' + args.output
 
 if __name__ == '__main__':
diff --git a/scripts/extract-paired-reads.py b/scripts/extract-paired-reads.py
index f589dee..ef224cb 100755
--- a/scripts/extract-paired-reads.py
+++ b/scripts/extract-paired-reads.py
@@ -1,12 +1,14 @@
 #! /usr/bin/env python2
 #
 # This script is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=invalid-name,missing-docstring
 """
+Split up pairs and singletons.
+
 Take a file containing a mixture of interleaved and orphaned reads, and
 extract them into separate files (.pe and .se).
 
@@ -20,36 +22,10 @@ import os.path
 import textwrap
 import argparse
 import khmer
-from khmer.file import check_file_status, check_space
+from khmer.kfile import check_input_files, check_space
 from khmer.khmer_args import info
 
-
-def is_pair(name1, name2):
-    if name1.endswith('/1') and name2.endswith('/2'):
-        subpart1 = name1.split('/')[0]
-        subpart2 = name2.split('/')[0]
-        if subpart1 == subpart2:
-            assert subpart1
-            return True
-
-    return False
-
-
-def output_pair(read1, read2):
-    if hasattr(read1, 'accuracy'):
-        return "@%s\n%s\n+\n%s\n@%s\n%s\n+\n%s\n" % \
-            (read1.name, read1.sequence, read1.accuracy,
-             read2.name, read2.sequence, read2.accuracy)
-    else:
-        return ">%s\n%s\n>%s\n%s\n" % (read1.name, read1.sequence, read2.name,
-                                       read2.sequence)
-
-
-def output_single(read):
-    if hasattr(read, 'accuracy'):
-        return "@%s\n%s\n+\n%s\n" % (read.name, read.sequence, read.accuracy)
-    else:
-        return ">%s\n%s\n" % (read.name, read.sequence)
+from khmer.utils import broken_paired_reader, write_record, write_record_pair
 
 
 def get_parser():
@@ -72,8 +48,8 @@ def get_parser():
         description='Take a mixture of reads and split into pairs and '
         'orphans.', epilog=textwrap.dedent(epilog))
     parser.add_argument('infile')
-    parser.add_argument('--version', action='version', version='%(prog)s '
-                        + khmer.__version__)
+    parser.add_argument('--version', action='version', version='%(prog)s ' +
+                        khmer.__version__)
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Overwrite output file if it exists')
     return parser
@@ -83,7 +59,7 @@ def main():
     info('extract-paired-reads.py')
     args = get_parser().parse_args()
 
-    check_file_status(args.infile, args.force)
+    check_input_files(args.infile, args.force)
     infiles = [args.infile]
     check_space(infiles, args.force)
 
@@ -98,45 +74,21 @@ def main():
     print >>sys.stderr, 'outputting interleaved pairs to "%s.pe"' % outfile
     print >>sys.stderr, 'outputting orphans to "%s.se"' % outfile
 
-    last_record = None
-    last_name = None
-
     n_pe = 0
     n_se = 0
 
-    record = None
-    index = 0
-    for index, record in enumerate(screed.open(sys.argv[1])):
+    screed_iter = screed.open(args.infile, parse_description=False)
+    for index, is_pair, read1, read2 in broken_paired_reader(screed_iter):
         if index % 100000 == 0 and index > 0:
-            print '...', index
-        name = record['name'].split()[0]
-
-        if last_record:
-            if is_pair(last_name, name):
-                paired_fp.write(output_pair(last_record, record))
-                name, record = None, None
-                n_pe += 1
-            else:
-                single_fp.write(output_single(last_record))
-                n_se += 1
-
-        last_name = name
-        last_record = record
-
-    if last_record:
-        if is_pair(last_name, name):
-            paired_fp.write(output_pair(last_record, record))
-            name, record = None, None
+            print >>sys.stderr, '...', index
+
+        if is_pair:
+            write_record_pair(read1, read2, paired_fp)
             n_pe += 1
         else:
-            single_fp.write(output_single(last_record))
-            name, record = None, None
+            write_record(read1, single_fp)
             n_se += 1
 
-    if record:
-        single_fp.write(output_single(record))
-        n_se += 1
-
     single_fp.close()
     paired_fp.close()
 
@@ -145,10 +97,11 @@ def main():
 
     print >>sys.stderr, 'DONE; read %d sequences,' \
         ' %d pairs and %d singletons' % \
-        (index + 1, n_pe, n_se)
+        (n_pe * 2 + n_se, n_pe, n_se)
 
     print >> sys.stderr, 'wrote to: ' + outfile \
         + '.se' + ' and ' + outfile + '.pe'
 
+
 if __name__ == '__main__':
     main()
diff --git a/scripts/extract-partitions.py b/scripts/extract-partitions.py
index 410dd2d..6cbd37d 100755
--- a/scripts/extract-partitions.py
+++ b/scripts/extract-partitions.py
@@ -1,8 +1,8 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=invalid-name,missing-docstring
@@ -24,8 +24,9 @@ import screed
 import argparse
 import textwrap
 import khmer
-from khmer.file import check_file_status, check_space
+from khmer.kfile import check_input_files, check_space
 from khmer.khmer_args import info
+from khmer.utils import write_record
 
 DEFAULT_MAX_SIZE = int(1e6)
 DEFAULT_THRESHOLD = 5
@@ -38,13 +39,6 @@ def read_partition_file(filename):
         yield record_index, record, int(partition_id)
 
 
-def output_single(read):
-    if hasattr(read, 'accuracy'):
-        return "@%s\n%s\n+\n%s\n" % (read.name, read.sequence, read.accuracy)
-    else:
-        return ">%s\n%s\n" % (read.name, read.sequence)
-
-
 def get_parser():
     epilog = """
     Example (results will be in ``example.group0000.fa``)::
@@ -79,8 +73,8 @@ def get_parser():
     parser.add_argument('--output-unassigned', '-U', default=False,
                         action='store_true',
                         help='Output unassigned sequences, too')
-    parser.add_argument('--version', action='version', version='%(prog)s '
-                        + khmer.__version__)
+    parser.add_argument('--version', action='version', version='%(prog)s ' +
+                        khmer.__version__)
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Overwrite output file if it exists')
     return parser
@@ -96,7 +90,7 @@ def main():  # pylint: disable=too-many-locals,too-many-branches
     n_unassigned = 0
 
     for infile in args.part_filenames:
-        check_file_status(infile, args.force)
+        check_input_files(infile, args.force)
 
     check_space(args.part_filenames, args.force)
 
@@ -125,7 +119,7 @@ def main():  # pylint: disable=too-many-locals,too-many-branches
     is_fastq = False
 
     for index, read, pid in read_partition_file(args.part_filenames[0]):
-        if hasattr(read, 'accuracy'):
+        if hasattr(read, 'quality'):
             suffix = 'fq'
             is_fastq = True
         break
@@ -133,10 +127,10 @@ def main():  # pylint: disable=too-many-locals,too-many-branches
     for filename in args.part_filenames:
         for index, read, pid in read_partition_file(filename):
             if is_fastq:
-                assert hasattr(read, 'accuracy'), \
+                assert hasattr(read, 'quality'), \
                     "all input files must be FASTQ if the first one is"
             else:
-                assert not hasattr(read, 'accuracy'), \
+                assert not hasattr(read, 'quality'), \
                     "all input files must be FASTA if the first one is"
 
             break
@@ -155,7 +149,7 @@ def main():  # pylint: disable=too-many-locals,too-many-branches
             if pid == 0:
                 n_unassigned += 1
                 if args.output_unassigned:
-                    print >>unassigned_fp, output_single(read)
+                    write_record(read, unassigned_fp)
 
     if args.output_unassigned:
         unassigned_fp.close()
@@ -245,7 +239,7 @@ def main():  # pylint: disable=too-many-locals,too-many-branches
 
             outfp = group_fps[group_n]
 
-            outfp.write(output_single(read))
+            write_record(read, outfp)
             part_seqs += 1
 
     print >>sys.stderr, '---'
diff --git a/scripts/fastq-to-fasta.py b/scripts/fastq-to-fasta.py
index c917e48..76393ea 100755
--- a/scripts/fastq-to-fasta.py
+++ b/scripts/fastq-to-fasta.py
@@ -2,7 +2,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=invalid-name,missing-docstring
@@ -42,7 +42,8 @@ def main():
     print >> sys.stderr, ('fastq from ', args.input_sequence)
 
     n_count = 0
-    for n, record in enumerate(screed.open(args.input_sequence)):
+    for n, record in enumerate(screed.open(args.input_sequence,
+                                           parse_description=False)):
         if n % 10000 == 0:
             print>>sys.stderr, '...', n
 
diff --git a/scripts/filter-abund-single.py b/scripts/filter-abund-single.py
index d12fdee..5300b5b 100755
--- a/scripts/filter-abund-single.py
+++ b/scripts/filter-abund-single.py
@@ -1,12 +1,14 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=missing-docstring,invalid-name
 """
+Sequence trimming by abundance w/o counting table.
+
 Trim sequences at k-mers of the given abundance for the given file,
 without loading a prebuilt counting table.  Output sequences will be
 placed in 'infile.abundfilt'.
@@ -23,8 +25,8 @@ import textwrap
 from khmer.thread_utils import ThreadedSequenceProcessor, verbose_loader
 from khmer.khmer_args import (build_counting_args, report_on_config,
                               add_threading_args, info)
-from khmer.file import (check_file_status, check_space,
-                        check_space_for_hashtable)
+from khmer.kfile import (check_input_files, check_space,
+                         check_space_for_hashtable)
 #
 DEFAULT_CUTOFF = 2
 
@@ -64,7 +66,7 @@ def get_parser():
 def main():
     info('filter-abund-single.py', ['counting', 'SeqAn'])
     args = get_parser().parse_args()
-    check_file_status(args.datafile, args.force)
+    check_input_files(args.datafile, args.force)
     check_space([args.datafile], args.force)
     if args.savetable:
         check_space_for_hashtable(
@@ -95,7 +97,7 @@ def main():
         print >> sys.stderr, 'Total number of unique k-mers: {0}'.format(
             htable.n_unique_kmers())
 
-    fp_rate = khmer.calc_expected_collisions(htable)
+    fp_rate = khmer.calc_expected_collisions(htable, args.force)
     print >>sys.stderr, 'fp rate estimated to be %1.3f' % fp_rate
 
     # now, trim.
diff --git a/scripts/filter-abund.py b/scripts/filter-abund.py
index d9dab60..4701c42 100755
--- a/scripts/filter-abund.py
+++ b/scripts/filter-abund.py
@@ -1,16 +1,18 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=missing-docstring,invalid-name
 """
+Sequence trimming by abundance using counting table.
+
 Trim sequences at k-mers of the given abundance, based on the given counting
 hash table.  Output sequences will be placed in 'infile.abundfilt'.
 
-% python scripts/filter-abund.py <counting.kh> <data1> [ <data2> <...> ]
+% python scripts/filter-abund.py <counting.ct> <data1> [ <data2> <...> ]
 
 Use '-h' for parameter help.
 """
@@ -21,7 +23,7 @@ import argparse
 import sys
 from khmer.thread_utils import ThreadedSequenceProcessor, verbose_loader
 from khmer.khmer_args import (ComboFormatter, add_threading_args, info)
-from khmer.file import check_file_status, check_space
+from khmer.kfile import check_input_files, check_space
 from khmer import __version__
 #
 
@@ -37,8 +39,8 @@ def get_parser():
 
     Example::
 
-        load-into-counting.py -k 20 -x 5e7 table.kh data/100k-filtered.fa
-        filter-abund.py -C 2 table.kh data/100k-filtered.fa
+        load-into-counting.py -k 20 -x 5e7 table.ct data/100k-filtered.fa
+        filter-abund.py -C 2 table.ct data/100k-filtered.fa
     """
     parser = argparse.ArgumentParser(
         description='Trim sequences at a minimum k-mer abundance.',
@@ -76,16 +78,15 @@ def main():
     info('filter-abund.py', ['counting'])
     args = get_parser().parse_args()
 
-    counting_ht = args.input_table
+    check_input_files(args.input_table, args.force)
     infiles = args.input_filename
-
     for _ in infiles:
-        check_file_status(_, args.force)
+        check_input_files(_, args.force)
 
     check_space(infiles, args.force)
 
     print >>sys.stderr, 'loading hashtable'
-    htable = khmer.load_counting_hash(counting_ht)
+    htable = khmer.load_counting_hash(args.input_table)
     ksize = htable.ksize()
 
     print >>sys.stderr, "K:", ksize
diff --git a/scripts/filter-stoptags.py b/scripts/filter-stoptags.py
index dde8fb5..24175ff 100755
--- a/scripts/filter-stoptags.py
+++ b/scripts/filter-stoptags.py
@@ -1,12 +1,14 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=invalid-name,missing-docstring
 """
+Sequence trimming using stoptags.
+
 Trim sequences at k-mers in the given stoptags file.  Output sequences
 will be placed in 'infile.stopfilt'.
 
@@ -21,7 +23,7 @@ import argparse
 import textwrap
 import sys
 from khmer.thread_utils import ThreadedSequenceProcessor, verbose_loader
-from khmer.file import check_file_status, check_space
+from khmer.kfile import check_input_files, check_space
 from khmer.khmer_args import info
 
 # @CTB K should be loaded from file...
@@ -43,8 +45,8 @@ def get_parser():
     parser.add_argument('stoptags_file', metavar='input_stoptags_filename')
     parser.add_argument('input_filenames', metavar='input_sequence_filename',
                         nargs='+')
-    parser.add_argument('--version', action='version', version='%(prog)s '
-                        + khmer.__version__)
+    parser.add_argument('--version', action='version', version='%(prog)s ' +
+                        khmer.__version__)
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Overwrite output file if it exists')
     return parser
@@ -57,7 +59,7 @@ def main():
     infiles = args.input_filenames
 
     for _ in infiles:
-        check_file_status(_, args.force)
+        check_input_files(_, args.force)
 
     check_space(infiles, args.force)
 
diff --git a/scripts/find-knots.py b/scripts/find-knots.py
index 8583742..bd41bfb 100755
--- a/scripts/find-knots.py
+++ b/scripts/find-knots.py
@@ -1,14 +1,15 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=invalid-name,missing-docstring
 """
-Find highly-connected k-mers and output them in a .stoptags file, for use
-in partitioning.
+Find highly-connected k-mers.
+
+k-mers are output into a .stoptags file, for later use in partitioning.
 
 % python scripts/find-knots.py <base>
 """
@@ -19,7 +20,7 @@ import os
 import textwrap
 import khmer
 import sys
-from khmer.file import check_file_status, check_space
+from khmer.kfile import check_input_files, check_space
 from khmer.khmer_args import info
 
 # counting hash parameters.
@@ -73,8 +74,8 @@ def get_parser():
                         ' the size of the k-mer counting table(s)')
     parser.add_argument('graphbase', help='Basename for the input and output '
                         'files.')
-    parser.add_argument('--version', action='version', version='%(prog)s '
-                        + khmer.__version__)
+    parser.add_argument('--version', action='version', version='%(prog)s ' +
+                        khmer.__version__)
     return parser
 
 
@@ -89,7 +90,7 @@ def main():
     if os.path.exists(graphbase + '.stoptags'):
         infiles.append(graphbase + '.stoptags')
     for _ in infiles:
-        check_file_status(_)
+        check_input_files(_, False)
 
     check_space(infiles)
 
diff --git a/scripts/galaxy/gedlab.py b/scripts/galaxy/gedlab.py
index da65711..70b9683 100644
--- a/scripts/galaxy/gedlab.py
+++ b/scripts/galaxy/gedlab.py
@@ -1,6 +1,4 @@
-"""
-k-mer count and presence
-"""
+"""k-mer count and presence."""
 
 from galaxy.datatypes.binary import Binary
 
diff --git a/scripts/interleave-reads.py b/scripts/interleave-reads.py
index ca33e8c..4f2696d 100755
--- a/scripts/interleave-reads.py
+++ b/scripts/interleave-reads.py
@@ -1,12 +1,14 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=invalid-name,missing-docstring
 """
+Interleave left and right reads.
+
 Take two files containing left & right reads from a paired-end sequencing run,
 and interleave them.
 
@@ -25,18 +27,10 @@ import os
 import textwrap
 import argparse
 import khmer
-from khmer.file import check_file_status, check_space
+from khmer.kfile import check_input_files, check_space
 from khmer.khmer_args import info
-
-
-def output_pair(read1, read2):
-    if hasattr(read1, 'accuracy'):
-        return "@%s\n%s\n+\n%s\n@%s\n%s\n+\n%s\n" % \
-            (read1.name, read1.sequence, read1.accuracy,
-             read2.name, read2.sequence, read2.accuracy)
-    else:
-        return ">%s\n%s\n>%s\n%s\n" % (read1.name, read1.sequence, read2.name,
-                                       read2.sequence)
+from khmer.utils import (write_record_pair, check_is_left, check_is_right,
+                         check_is_pair)
 
 
 def get_parser():
@@ -45,9 +39,9 @@ def get_parser():
     with a read in <R2>. By default, the output goes to stdout unless
     :option:`-o`/:option:`--output` is specified.
 
-    As a "bonus", this file ensures that read names are formatted in a
-    consistent way, such that they look like the pre-1.8 Casava format
-    (@name/1, @name/2).
+    As a "bonus", this file ensures that if read names are not already
+    formatted properly, they are reformatted consistently, such that
+    they look like the pre-1.8 Casava format (@name/1, @name/2).
 
     Example::
 
@@ -61,8 +55,8 @@ def get_parser():
     parser.add_argument('-o', '--output', metavar="filename",
                         type=argparse.FileType('w'),
                         default=sys.stdout)
-    parser.add_argument('--version', action='version', version='%(prog)s '
-                        + khmer.__version__)
+    parser.add_argument('--version', action='version', version='%(prog)s ' +
+                        khmer.__version__)
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Overwrite output file if it exists')
     return parser
@@ -73,7 +67,7 @@ def main():
     args = get_parser().parse_args()
 
     for _ in args.infiles:
-        check_file_status(_, args.force)
+        check_input_files(_, args.force)
 
     check_space(args.infiles, args.force)
 
@@ -82,6 +76,11 @@ def main():
         s2_file = args.infiles[1]
     else:
         s2_file = s1_file.replace('_R1_', '_R2_')
+        if s1_file == s2_file:
+            print >>sys.stderr, ("ERROR: given only one filename, that "
+                                 "doesn't contain _R1_. Exiting.")
+            sys.exit(1)
+
         print >> sys.stderr, ("given only one file; "
                               "guessing that R2 file is %s" % s2_file)
 
@@ -100,29 +99,37 @@ def main():
     print >> sys.stderr, "Interleaving:\n\t%s\n\t%s" % (s1_file, s2_file)
 
     counter = 0
-    for read1, read2 in itertools.izip(screed.open(s1_file),
-                                       screed.open(s2_file)):
+    screed_iter_1 = screed.open(s1_file, parse_description=False)
+    screed_iter_2 = screed.open(s2_file, parse_description=False)
+    for read1, read2 in itertools.izip_longest(screed_iter_1, screed_iter_2):
+        if read1 is None or read2 is None:
+            print >>sys.stderr, ("ERROR: Input files contain different number"
+                                 " of records.")
+            sys.exit(1)
+
         if counter % 100000 == 0:
             print >> sys.stderr, '...', counter, 'pairs'
         counter += 1
 
         name1 = read1.name
-        if not name1.endswith('/1'):
+        if not check_is_left(name1):
             name1 += '/1'
         name2 = read2.name
-        if not name2.endswith('/2'):
+        if not check_is_right(name2):
             name2 += '/2'
 
-        assert name1[:-2] == name2[:-2], \
-            "This doesn't look like paired data! %s %s" % (name1, name2)
-
         read1.name = name1
         read2.name = name2
-        args.output.write(output_pair(read1, read2))
 
-    print >> sys.stderr, 'final: interleaved %d pairs' % counter
+        if not check_is_pair(read1, read2):
+            print >>sys.stderr, "ERROR: This doesn't look like paired data! " \
+                "%s %s" % (read1.name, read2.name)
+            sys.exit(1)
 
-    print >> sys.stderr, 'output written to', args.output
+        write_record_pair(read1, read2, args.output)
+
+    print >> sys.stderr, 'final: interleaved %d pairs' % counter
+    print >> sys.stderr, 'output written to', args.output.name
 
 if __name__ == '__main__':
     main()
diff --git a/scripts/load-graph.py b/scripts/load-graph.py
index afbbc71..9fbc7a3 100755
--- a/scripts/load-graph.py
+++ b/scripts/load-graph.py
@@ -1,8 +1,8 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=invalid-name,missing-docstring
@@ -20,8 +20,8 @@ import threading
 import khmer
 from khmer.khmer_args import build_hashbits_args
 from khmer.khmer_args import (report_on_config, info, add_threading_args)
-from khmer.file import check_file_status, check_space
-from khmer.file import check_space_for_hashtable
+from khmer.kfile import check_input_files, check_space
+from khmer.kfile import check_space_for_hashtable
 
 
 def get_parser():
@@ -54,7 +54,7 @@ def main():
     filenames = args.input_filenames
 
     for _ in args.input_filenames:
-        check_file_status(_, args.force)
+        check_input_files(_, args.force)
 
     check_space(args.input_filenames, args.force)
     check_space_for_hashtable(
@@ -103,20 +103,15 @@ def main():
     info_fp = open(base + '.info', 'w')
     info_fp.write('%d unique k-mers' % htable.n_unique_kmers())
 
-    fp_rate = khmer.calc_expected_collisions(htable)
+    fp_rate = \
+        khmer.calc_expected_collisions(htable, args.force, max_false_pos=.15)
+    # 0.18 is ACTUAL MAX. Do not change.
+
     print >>sys.stderr, 'fp rate estimated to be %1.3f' % fp_rate
     if args.write_fp_rate:
         print >> info_fp, \
             '\nfalse positive rate estimated to be %1.3f' % fp_rate
 
-    if fp_rate > 0.15:          # 0.18 is ACTUAL MAX. Do not change.
-        print >> sys.stderr, "**"
-        print >> sys.stderr, ("** ERROR: the graph structure is too small for "
-                              "this data set. Increase table size/# tables.")
-        print >> sys.stderr, "**"
-        if not args.force:
-            sys.exit(1)
-
     print >> sys.stderr, 'wrote to', base + '.info and', base + '.pt'
     if not args.no_build_tagset:
         print >> sys.stderr, 'and ' + base + '.tagset'
diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py
index dfd790d..b81e767 100755
--- a/scripts/load-into-counting.py
+++ b/scripts/load-into-counting.py
@@ -1,8 +1,8 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 # pylint: disable=missing-docstring,invalid-name
 """
@@ -21,8 +21,9 @@ import textwrap
 import khmer
 from khmer.khmer_args import build_counting_args, report_on_config, info,\
     add_threading_args
-from khmer.file import check_file_status, check_space
-from khmer.file import check_space_for_hashtable
+from khmer.kfile import check_file_writable
+from khmer.kfile import check_input_files, check_space
+from khmer.kfile import check_space_for_hashtable
 
 
 def get_parser():
@@ -35,14 +36,14 @@ def get_parser():
 
     Example::
 
-        load-into-counting.py -k 20 -x 5e7 out.kh data/100k-filtered.fa
+        load-into-counting.py -k 20 -x 5e7 out.ct data/100k-filtered.fa
 
     Multiple threads can be used to accelerate the process, if you have extra
     cores to spare.
 
     Example::
 
-        load-into-counting.py -k 20 -x 5e7 -T 4 out.kh data/100k-filtered.fa
+        load-into-counting.py -k 20 -x 5e7 -T 4 out.ct data/100k-filtered.fa
     """
 
     parser = build_counting_args("Build a k-mer counting table from the given"
@@ -54,8 +55,9 @@ def get_parser():
                         help="The names of one or more FAST[AQ] input "
                         "sequence files.")
     parser.add_argument('-b', '--no-bigcount', dest='bigcount', default=True,
-                        action='store_false',
-                        help='Do not count k-mers past 255')
+                        action='store_false', help="The default behaviour is "
+                        "to count past 255 using bigcount. This flag turns "
+                        "bigcount off, limiting counts to 255.")
     parser.add_argument('--summary-info', '-s', default=None, metavar="FORMAT",
                         choices=['json', 'tsv'],
                         help="What format should the machine readable run "
@@ -78,11 +80,14 @@ def main():
     filenames = args.input_sequence_filename
 
     for name in args.input_sequence_filename:
-        check_file_status(name, args.force)
+        check_input_files(name, args.force)
 
     check_space(args.input_sequence_filename, args.force)
     check_space_for_hashtable(args.n_tables * args.min_tablesize, args.force)
 
+    check_file_writable(base)
+    check_file_writable(base + ".info")
+
     print >>sys.stderr, 'Saving k-mer counting table to %s' % base
     print >>sys.stderr, 'Loading kmers from sequences in %s' % repr(filenames)
 
@@ -97,6 +102,8 @@ def main():
 
     filename = None
 
+    total_num_reads = 0
+
     for index, filename in enumerate(filenames):
 
         rparser = khmer.ReadParser(filename)
@@ -121,6 +128,7 @@ def main():
             htable.save(base)
         with open(base + '.info', 'a') as info_fh:
             print >> info_fh, 'through', filename
+        total_num_reads += rparser.num_reads
 
     n_kmers = htable.n_unique_kmers()
     if args.report_total_kmers:
@@ -131,7 +139,9 @@ def main():
     print >>sys.stderr, 'saving', base
     htable.save(base)
 
-    fp_rate = khmer.calc_expected_collisions(htable)
+    # Change max_false_pos=0.2 only if you really grok it. HINT: You don't
+    fp_rate = \
+        khmer.calc_expected_collisions(htable, args.force, max_false_pos=.2)
 
     with open(base + '.info', 'a') as info_fp:
         print >> info_fp, 'fp rate estimated to be %1.3f\n' % fp_rate
@@ -147,26 +157,24 @@ def main():
                     "fpr": fp_rate,
                     "num_kmers": n_kmers,
                     "files": filenames,
-                    "mrinfo_version": "0.1.0",
+                    "mrinfo_version": "0.2.0",
+                    "num_reads": total_num_reads,
                 }
                 json.dump(mr_data, mr_fh)
                 mr_fh.write('\n')
             elif mr_fmt == 'tsv':
-                mr_fh.write("ht_name\tfpr\tnum_kmers\tfiles\n")
-                mr_fh.write("{b:s}\t{fpr:1.3f}\t{k:d}\t{fls:s}\n".format(
-                    b=os.path.basename(base), fpr=fp_rate, k=n_kmers,
-                    fls=";".join(filenames)))
+                mr_fh.write("ht_name\tfpr\tnum_kmers\tnum_reads\tfiles\n")
+                vals = [
+                    os.path.basename(base),
+                    "{:1.3f}".format(fp_rate),
+                    str(n_kmers),
+                    str(total_num_reads),
+                    ";".join(filenames),
+                ]
+                mr_fh.write("\t".join(vals) + "\n")
 
     print >> sys.stderr, 'fp rate estimated to be %1.3f' % fp_rate
 
-    # Change 0.2 only if you really grok it.  HINT: You don't.
-    if fp_rate > 0.20:
-        print >> sys.stderr, "**"
-        print >> sys.stderr, "** ERROR: the k-mer counting table is too small",
-        print >> sys.stderr, "for this data set. Increase tablesize/# tables."
-        print >> sys.stderr, "**"
-        sys.exit(1)
-
     print >>sys.stderr, 'DONE.'
     print >>sys.stderr, 'wrote to:', base + '.info'
 
diff --git a/scripts/make-initial-stoptags.py b/scripts/make-initial-stoptags.py
index 35f8524..48a2741 100755
--- a/scripts/make-initial-stoptags.py
+++ b/scripts/make-initial-stoptags.py
@@ -1,8 +1,8 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=invalid-name,missing-docstring
@@ -16,7 +16,7 @@ import sys
 import textwrap
 import khmer
 from khmer.khmer_args import (build_counting_args, info)
-from khmer.file import check_file_status, check_space
+from khmer.kfile import check_input_files, check_space
 
 DEFAULT_SUBSET_SIZE = int(1e4)
 DEFAULT_COUNTING_HT_SIZE = 3e6                # number of bytes
@@ -79,7 +79,7 @@ def main():
     if args.stoptags:
         infiles.append(args.stoptags)
     for _ in infiles:
-        check_file_status(_, args.force)
+        check_input_files(_, args.force)
 
     check_space(infiles, args.force)
 
diff --git a/scripts/merge-partitions.py b/scripts/merge-partitions.py
index 18aae51..99f6a84 100755
--- a/scripts/merge-partitions.py
+++ b/scripts/merge-partitions.py
@@ -1,8 +1,8 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=invalid-name,missing-docstring
@@ -21,7 +21,7 @@ import os
 import textwrap
 import khmer
 import sys
-from khmer.file import check_file_status, check_space
+from khmer.kfile import check_input_files, check_space
 from khmer.khmer_args import info
 
 DEFAULT_K = 32
@@ -42,8 +42,8 @@ def get_parser():
                         help='Keep individual subsets (default: False)')
     parser.add_argument('graphbase', help='basename for input and output '
                         'files')
-    parser.add_argument('--version', action='version', version='%(prog)s '
-                        + khmer.__version__)
+    parser.add_argument('--version', action='version', version='%(prog)s ' +
+                        khmer.__version__)
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Overwrite output file if it exists')
     return parser
@@ -63,7 +63,7 @@ def main():
     htable = khmer.new_hashbits(ksize, 1, 1)
 
     for _ in pmap_files:
-        check_file_status(_, args.force)
+        check_input_files(_, args.force)
 
     check_space(pmap_files, args.force)
 
diff --git a/scripts/normalize-by-median.py b/scripts/normalize-by-median.py
index 956336a..22c84dc 100755
--- a/scripts/normalize-by-median.py
+++ b/scripts/normalize-by-median.py
@@ -1,14 +1,17 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=invalid-name,missing-docstring
 """
+Eliminate surplus reads.
+
 Eliminate reads with median k-mer abundance higher than
-DESIRED_COVERAGE.  Output sequences will be placed in 'infile.keep'.
+DESIRED_COVERAGE.  Output sequences will be placed in 'infile.keep', with the
+option to output to STDOUT.
 
 % python scripts/normalize-by-median.py [ -C <cutoff> ] <data1> <data2> ...
 
@@ -24,13 +27,11 @@ from itertools import izip
 from khmer.khmer_args import (build_counting_args, add_loadhash_args,
                               report_on_config, info)
 import argparse
-from khmer.file import (check_space, check_space_for_hashtable,
-                        check_valid_file_exists)
+from khmer.kfile import (check_space, check_space_for_hashtable,
+                         check_valid_file_exists)
+from khmer.utils import write_record, check_is_pair
 DEFAULT_DESIRED_COVERAGE = 10
 
-MAX_FALSE_POSITIVE_RATE = 0.8             # see Zhang et al.,
-# http://arxiv.org/abs/1309.2975
-
 # Iterate a collection in arbitrary batches
 # from: http://stackoverflow.com/questions/4628290/pairs-from-single-list
 
@@ -42,28 +43,23 @@ def batchwise(coll, size):
 # Returns true if the pair of records are properly pairs
 
 
-def validpair(read0, read1):
-    return read0.name[-1] == "1" and \
-        read1.name[-1] == "2" and \
-        read0.name[0:-1] == read1.name[0:-1]
-
-
 # pylint: disable=too-many-locals,too-many-branches
-def normalize_by_median(input_filename, outfp, htable, args, report_fp=None):
+def normalize_by_median(input_filename, outfp, htable, paired, cutoff,
+                        report_fp=None):
 
-    desired_coverage = args.cutoff
+    desired_coverage = cutoff
     ksize = htable.ksize()
 
     # In paired mode we read two records at a time
     batch_size = 1
-    if args.paired:
+    if paired:
         batch_size = 2
 
     index = -1
     total = 0
     discarded = 0
     for index, batch in enumerate(batchwise(screed.open(
-            input_filename), batch_size)):
+            input_filename, parse_description=False), batch_size)):
         if index > 0 and index % 100000 == 0:
             print >>sys.stderr, '... kept {kept} of {total} or'\
                 ' {perc:2}%'.format(kept=total - discarded, total=total,
@@ -79,8 +75,9 @@ def normalize_by_median(input_filename, outfp, htable, args, report_fp=None):
         total += batch_size
 
         # If in paired mode, check that the reads are properly interleaved
-        if args.paired:
-            if not validpair(batch[0], batch[1]):
+
+        if paired:
+            if not check_is_pair(batch[0], batch[1]):
                 raise IOError('Error: Improperly interleaved pairs \
                     {b0} {b1}'.format(b0=batch[0].name, b1=batch[1].name))
 
@@ -103,16 +100,7 @@ def normalize_by_median(input_filename, outfp, htable, args, report_fp=None):
         # Emit records if any passed
         if passed_length and passed_filter:
             for record in batch:
-                if hasattr(record, 'accuracy'):
-                    outfp.write(
-                        '@{name}\n{seq}\n'
-                        '+\n{acc}\n'.format(name=record.name,
-                                            seq=record.sequence,
-                                            acc=record.accuracy))
-                else:
-                    outfp.write(
-                        '>{name}\n{seq}\n'.format(name=record.name,
-                                                  seq=record.sequence))
+                write_record(record, outfp)
         else:
             discarded += batch_size
 
@@ -138,6 +126,55 @@ def handle_error(error, output_name, input_name, fail_save, htable):
         print >> sys.stderr, '** ERROR: problem removing corrupt filtered file'
 
 
+def normalize_by_median_and_check(input_filename, htable, single_output_file,
+                                  fail_save, paired, cutoff, force,
+                                  corrupt_files, report_fp=None):
+    total = 0
+    discarded = 0
+
+    total_acc = None
+    discarded_acc = None
+
+    if single_output_file:
+        if single_output_file is sys.stdout:
+            output_name = '/dev/stdout'
+        else:
+            output_name = single_output_file.name
+        outfp = single_output_file
+
+    else:
+        output_name = os.path.basename(input_filename) + '.keep'
+        outfp = open(output_name, 'w')
+
+    try:
+        total_acc, discarded_acc = normalize_by_median(
+            input_filename, outfp, htable, paired, cutoff, report_fp=None)
+    except IOError as err:
+        handle_error(err, output_name, input_filename, fail_save,
+                     htable)
+        if not force:
+            print >> sys.stderr, '** Exiting!'
+
+            sys.exit(1)
+        else:
+            print >> sys.stderr, '*** Skipping error file, moving on...'
+            corrupt_files.append(input_filename)
+    else:
+        if total_acc == 0 and discarded_acc == 0:
+            print >> sys.stderr, 'SKIPPED empty file', input_filename
+        else:
+            total += total_acc
+            discarded += discarded_acc
+            print >> sys.stderr, \
+                'DONE with {inp}; kept {kept} of {total} or {perc:2}%'\
+                .format(inp=input_filename, kept=total - discarded,
+                        total=total, perc=int(100. - discarded /
+                                              float(total) * 100.))
+            print >> sys.stderr, 'output in', output_name
+
+    return total_acc, discarded_acc, corrupt_files
+
+
 def get_parser():
     epilog = ("""
     Discard sequences based on whether or not their median k-mer abundance lies
@@ -146,7 +183,9 @@ def get_parser():
     Paired end reads will be considered together if :option:`-p` is set. If
     either read will be kept, then both will be kept. This should result in
     keeping (or discarding) each sequencing fragment. This helps with retention
-    of repeats, especially.
+    of repeats, especially. With :option: `-u`/:option:`--unpaired-reads`, 
+    unpaired reads from the specified file will be read after the paired data
+    is read. 
 
     With :option:`-s`/:option:`--savetable`, the k-mer counting table
     will be saved to the specified file after all sequences have been
@@ -164,6 +203,10 @@ def get_parser():
     table up to that point will be dumped, and processing will continue on the
     next file.
 
+    To append reads to an output file (rather than overwriting it), send output
+    to STDOUT with `--out -` and use UNIX file redirection syntax (`>>`) to
+    append to the file.
+
     Example::
 
         normalize-by-median.py -k 17 tests/test-data/test-abund-read-2.fa
@@ -175,6 +218,11 @@ def get_parser():
 
     Example::
 
+""" "        normalize-by-median.py -p -k 17 -o - tests/test-data/paired.fq >> appended-output.fq"  # noqa
+    """
+
+    Example::
+
 """ "        normalize-by-median.py -k 17 -f tests/test-data/test-error-reads.fq tests/test-data/test-fastq-reads.fq"  # noqa
     """
 
@@ -187,7 +235,12 @@ def get_parser():
     parser.add_argument('-C', '--cutoff', type=int,
                         default=DEFAULT_DESIRED_COVERAGE)
     parser.add_argument('-p', '--paired', action='store_true')
-    parser.add_argument('-s', '--savetable', metavar="filename", default='')
+    parser.add_argument('-u', '--unpaired-reads',
+                        metavar="unpaired_reads_filename", help='with paired data only,\
+                        include an unpaired file')
+    parser.add_argument('-s', '--savetable', metavar="filename", default='',
+                        help='save the k-mer counting table to disk after all'
+                        'reads are loaded.')
     parser.add_argument('-R', '--report',
                         metavar='filename', type=argparse.FileType('w'))
     parser.add_argument('-f', '--fault-tolerant', dest='force',
@@ -200,9 +253,12 @@ def get_parser():
                         type=int, help='dump k-mer counting table every d '
                         'files', default=-1)
     parser.add_argument('-o', '--out', metavar="filename",
-                        dest='single_output_filename',
-                        default='', help='only output a single'
-                        ' file with the specified filename')
+                        dest='single_output_file',
+                        type=argparse.FileType('w'),
+                        default=None, help='only output a single file with '
+                        'the specified filename; use a single dash "-" to '
+                        'specify that output should go to STDOUT (the '
+                        'terminal)')
     parser.add_argument('input_filenames', metavar='input_sequence_filename',
                         help='Input FAST[AQ] sequence filename.', nargs='+')
     parser.add_argument('--report-total-kmers', '-t', action='store_true',
@@ -222,6 +278,18 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
 
     report_fp = args.report
 
+    # check for similar filenames
+    filenames = []
+    for pathfilename in args.input_filenames:
+        filename = pathfilename.split('/')[-1]
+        if (filename in filenames):
+            print >>sys.stderr, "WARNING: At least two input files are named \
+%s . (The script normalize-by-median.py can not handle this, only one .keep \
+file for one of the input files will be generated.)" % filename
+        else:
+            filenames.append(filename)
+
+    # check for others
     check_valid_file_exists(args.input_filenames)
     check_space(args.input_filenames, args.force)
     if args.savetable:
@@ -229,57 +297,24 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
             args.n_tables * args.min_tablesize, args.force)
 
     # list to save error files along with throwing exceptions
-    if args.force:
-        corrupt_files = []
+    corrupt_files = []
 
     if args.loadtable:
         print 'loading k-mer counting table from', args.loadtable
         htable = khmer.load_counting_hash(args.loadtable)
     else:
-        print 'making k-mer counting table'
+        print >> sys.stderr, 'making k-mer counting table'
         htable = khmer.new_counting_hash(args.ksize, args.min_tablesize,
                                          args.n_tables)
 
-    total = 0
-    discarded = 0
     input_filename = None
 
     for index, input_filename in enumerate(args.input_filenames):
-        if args.single_output_filename != '':
-            output_name = args.single_output_filename
-            outfp = open(args.single_output_filename, 'a')
-        else:
-            output_name = os.path.basename(input_filename) + '.keep'
-            outfp = open(output_name, 'w')
-
-        total_acc = 0
-        discarded_acc = 0
-
-        try:
-            total_acc, discarded_acc = normalize_by_median(input_filename,
-                                                           outfp, htable, args,
-                                                           report_fp)
-        except IOError as err:
-            handle_error(err, output_name, input_filename, args.fail_save,
-                         htable)
-            if not args.force:
-                print >> sys.stderr, '** Exiting!'
-
-                sys.exit(1)
-            else:
-                print >> sys.stderr, '*** Skipping error file, moving on...'
-                corrupt_files.append(input_filename)
-        else:
-            if total_acc == 0 and discarded_acc == 0:
-                print 'SKIPPED empty file', input_filename
-            else:
-                total += total_acc
-                discarded += discarded_acc
-                print 'DONE with {inp}; kept {kept} of {total} or {perc:2}%'\
-                      .format(inp=input_filename, kept=total - discarded,
-                              total=total, perc=int(100. - discarded /
-                                                    float(total) * 100.))
-                print 'output in', output_name
+        total_acc, discarded_acc, corrupt_files = \
+            normalize_by_median_and_check(
+                input_filename, htable, args.single_output_file,
+                args.fail_save, args.paired, args.cutoff, args.force,
+                corrupt_files, report_fp)
 
         if (args.dump_frequency > 0 and
                 index > 0 and index % args.dump_frequency == 0):
@@ -292,6 +327,18 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
                 print 'Nothing given for savetable, saving to', hashname
             htable.save(hashname)
 
+    if args.paired and args.unpaired_reads:
+        args.paired = False
+        output_name = args.unpaired_reads
+        if not args.single_output_file:
+            output_name = os.path.basename(args.unpaired_reads) + '.keep'
+        outfp = open(output_name, 'w')
+        total_acc, discarded_acc, corrupt_files = \
+            normalize_by_median_and_check(
+                args.unpaired_reads, htable, args.single_output_file,
+                args.fail_save, args.paired, args.cutoff, args.force,
+                corrupt_files, report_fp)
+
     if args.report_total_kmers:
         print >> sys.stderr, 'Total number of unique k-mers: {0}'.format(
             htable.n_unique_kmers())
@@ -301,24 +348,18 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
         print '...saving to', args.savetable
         htable.save(args.savetable)
 
-    fp_rate = khmer.calc_expected_collisions(htable)
-    print 'fp rate estimated to be {fpr:1.3f}'.format(fpr=fp_rate)
+    fp_rate = \
+        khmer.calc_expected_collisions(htable, args.force, max_false_pos=.8)
+    # for max_false_pos see Zhang et al., http://arxiv.org/abs/1309.2975
+
+    print >> sys.stderr, \
+        'fp rate estimated to be {fpr:1.3f}'.format(fpr=fp_rate)
 
     if args.force and len(corrupt_files) > 0:
         print >> sys.stderr, "** WARNING: Finished with errors!"
         print >> sys.stderr, "** IOErrors occurred in the following files:"
         print >> sys.stderr, "\t", " ".join(corrupt_files)
 
-    if fp_rate > MAX_FALSE_POSITIVE_RATE:
-        print >> sys.stderr, "**"
-        print >> sys.stderr, ("** ERROR: the k-mer counting table is too small"
-                              " for this data set. Increase tablesize/# "
-                              "tables.")
-        print >> sys.stderr, "**"
-        print >> sys.stderr, "** Do not use these results!!"
-        if not args.force:
-            sys.exit(1)
-
 if __name__ == '__main__':
     main()
 
diff --git a/scripts/partition-graph.py b/scripts/partition-graph.py
index 250e404..63f1e1a 100755
--- a/scripts/partition-graph.py
+++ b/scripts/partition-graph.py
@@ -1,8 +1,8 @@
 #! /usr/bin/env python2
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=invalid-name, missing-docstring
@@ -24,7 +24,7 @@ import argparse
 import khmer
 import sys
 from khmer.khmer_args import (add_threading_args, info)
-from khmer.file import check_file_status, check_space
+from khmer.kfile import check_input_files, check_space
 
 # Debugging Support
 import re
@@ -43,7 +43,7 @@ DEFAULT_N_THREADS = 4
 
 
 def worker(queue, basename, stop_big_traversals):
-    while 1:
+    while True:
         try:
             (htable, index, start, stop) = queue.get(False)
         except Queue.Empty:
@@ -88,8 +88,8 @@ def get_parser():
     parser.add_argument('--no-big-traverse', action='store_true',
                         default=False, help='Truncate graph joins at big '
                         'traversals')
-    parser.add_argument('--version', action='version', version='%(prog)s '
-                        + khmer.__version__)
+    parser.add_argument('--version', action='version', version='%(prog)s ' +
+                        khmer.__version__)
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Overwrite output file if it exists')
     add_threading_args(parser)
@@ -103,7 +103,7 @@ def main():
 
     filenames = [basename + '.pt', basename + '.tagset']
     for _ in filenames:
-        check_file_status(_, args.force)
+        check_input_files(_, args.force)
 
     check_space(filenames, args.force)
 
diff --git a/scripts/readstats.py b/scripts/readstats.py
new file mode 100755
index 0000000..d3c5cd5
--- /dev/null
+++ b/scripts/readstats.py
@@ -0,0 +1,184 @@
+#! /usr/bin/env python2
+#
+# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
+# Copyright (C) Michigan State University, 2009-2013. It is licensed under
+# the three-clause BSD license; see LICENSE.
+# Contact: khmer-project at idyll.org
+#
+"""
+Display summary statistics for one or more FASTA/FASTQ files.
+
+% scripts/readstats.py [ -o output.txt ] <file1> <file2>
+
+Use '-h' for parameter help.
+"""
+
+import sys
+import csv
+import screed
+import argparse
+import textwrap
+
+
+def get_parser():
+    descr = "Display summary statistics for one or more FASTA/FASTQ files."
+    epilog = ("""
+    Report number of bases, number of sequences, and average sequence length
+    for one or more FASTA/FASTQ files; and report aggregate statistics at end.
+
+    With :option:`-o`/:options:`--output`, the output will be saved to the
+    specified file.
+
+    Example::
+
+        readstats.py tests/test-data/test-abund-read-2.fa
+    """)
+
+    parser = argparse.ArgumentParser(description=descr,
+                                     epilog=textwrap.dedent(epilog))
+    parser.add_argument('filenames', nargs='+')
+    parser.add_argument('-o', '--output', dest='outfp', metavar="filename",
+                        help="output file for statistics; defaults to stdout.",
+                        type=argparse.FileType('w'), default=sys.stdout)
+    parser.add_argument('--csv', default=False, action='store_true',
+                        help='Use the CSV format for the statistics, '
+                        'including column headers.')
+    return parser
+
+
+class StatisticsOutput(object):
+    #  pylint: disable=too-few-public-methods
+    """Output statistics for several data files.
+
+    The format of the output is determined by the formatter used.
+    All statistics are aggregated and a summary is added to the data.
+    """
+
+    def __init__(self, formatter):
+        self.formatter = formatter
+
+    def __enter__(self):
+        self.formatter.write_header()
+        return self
+
+    def append(self, basepairs, seqs, filename):
+        """Append a new line for the given basepair number, sequences and file.
+        """
+        self.formatter.append(
+            basepairs, seqs, basepairs / float(seqs), filename)
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        if exc_type is None:
+            self.formatter.finalize()
+
+
+class CsvFormatter(object):
+    """Format the statistis information as CSV."""
+    headers = ['bp', 'seqs', 'avg_len', 'filename']
+
+    def __init__(self, underlying_file):
+        self.file = csv.writer(underlying_file)
+
+    def write_header(self):
+        """Add headers for the csv columns."""
+        self.file.writerow(self.headers)
+
+    def append(self, basepairs, seqs, avg_len, filename):
+        """Append the data separated by comma."""
+        self.file.writerow([basepairs, seqs, "%.1f" % avg_len, filename])
+
+    def finalize(self):
+        """No statistics since the CSV data is supposed to be processed further.
+        """
+        pass
+
+
+class StdFormatter(object):
+    """Format the statistics in a human readable string."""
+
+    def __init__(self, underlying_file):
+        self.file = underlying_file
+        self.bp_total = 0
+        self.seqs_total = 0
+
+    def write_header(self):
+        """Write a header line."""
+        self.file.write('---------------\n')
+
+    def append(self, basepairs, seqs, avg_len, filename):
+        """Append the data human readable."""
+        self.bp_total += basepairs
+        self.seqs_total += seqs
+        self.file.write('%d bp / %d seqs; %.1f average length -- %s\n' %
+                        (basepairs,
+                         seqs,
+                         avg_len,
+                         filename))
+
+    def finalize(self):
+        """Add a summary with the accumulated data."""
+        self.file.write('---------------\n')
+        avg = self.bp_total / float(self.seqs_total)
+        self.file.write('%d bp / %d seqs; %.1f average length -- total\n' %
+                        (self.bp_total, self.seqs_total, avg))
+
+
+def analyze_file(filename):
+    """Run over the given file and count base pairs and sequences."""
+    bps = 0
+    seqs = 0
+    input_iter = screed.open(filename, parse_description=False)
+    for record in input_iter:
+        if seqs % 100000 == 0:
+            print >>sys.stderr, '...', filename, seqs
+        bps += len(record.sequence)
+        seqs += 1
+    return bps, seqs
+
+
+def main():
+    """Main function - run when executed as a script."""
+    parser = get_parser()
+    args = parser.parse_args()
+
+    total_bp = 0
+    total_seqs = 0
+
+    statistics = []
+
+    for filename in args.filenames:
+        try:
+            bps, seqs = analyze_file(filename)
+        except (IOError, OSError, EOFError) as exc:
+            print >>sys.stderr, 'ERROR in opening %s:' % filename
+            print >>sys.stderr, '     ', str(exc)
+            continue
+
+        if seqs:
+            statistics.append((bps, seqs, filename))
+            avg = bps / float(seqs)
+            msg = '%d bps / %d seqs; %.1f average length -- %s' % (bps,
+                                                                   seqs,
+                                                                   avg,
+                                                                   filename)
+
+            print >>sys.stderr, '... found', msg
+
+        else:
+            print >>sys.stderr, 'No sequences found in %s' % filename
+
+    if statistics:
+        if args.csv:
+            formatter = CsvFormatter(args.outfp)
+        else:
+            formatter = StdFormatter(args.outfp)
+        with StatisticsOutput(formatter) as out:
+            for stat in statistics:
+                out.append(*stat)
+    else:
+        print >>args.outfp, \
+            'No sequences found in %d files' % len(args.filenames)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/sample-reads-randomly.py b/scripts/sample-reads-randomly.py
index 755c8a6..64323e4 100755
--- a/scripts/sample-reads-randomly.py
+++ b/scripts/sample-reads-randomly.py
@@ -1,12 +1,14 @@
 #! /usr/bin/env python2
 #
 # This script is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=invalid-name,missing-docstring
 """
+Subsample sequences from multiple files.
+
 Take a list of files containing sequences, and subsample 100,000 sequences (-N)
 uniformly, using reservoir sampling.  Stop after first 100m sequences (-M).
 By default take one subsample, but take -S samples if specified.
@@ -24,8 +26,9 @@ import textwrap
 import sys
 
 import khmer
-from khmer.file import check_file_status, check_space
+from khmer.kfile import check_input_files, check_space
 from khmer.khmer_args import info
+from khmer.utils import write_record, broken_paired_reader
 
 DEFAULT_NUM_READS = int(1e5)
 DEFAULT_MAX_READS = int(1e8)
@@ -34,13 +37,13 @@ DEBUG = True
 
 def get_parser():
     epilog = ("""
-    
+
     Take a list of files containing sequences, and subsample 100,000
     sequences (:option:`-N`/:option:`--num_reads`) uniformly, using
     reservoir sampling.  Stop after first 100m sequences
     (:option:`-M`/:option:`--max_reads`). By default take one subsample,
     but take :option:`-S`/:option:`--samples` samples if specified.
-    
+
     The output is placed in :option:`-o`/:option:`--output` <file>
     (for a single sample) or in <file>.subset.0 to <file>.subset.S-1
     (for more than one sample).
@@ -62,29 +65,24 @@ def get_parser():
     parser.add_argument('-S', '--samples', type=int, dest='num_samples',
                         default=1)
     parser.add_argument('-R', '--random-seed', type=int, dest='random_seed')
+    parser.add_argument('--force_single', default=False, action='store_true',
+                        help='Ignore read pair information if present')
     parser.add_argument('-o', '--output', dest='output_file',
                         metavar='output_file',
                         type=argparse.FileType('w'), default=None)
-    parser.add_argument('--version', action='version', version='%(prog)s '
-                        + khmer.__version__)
+    parser.add_argument('--version', action='version', version='%(prog)s ' +
+                        khmer.__version__)
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Overwrite output file if it exits')
     return parser
 
 
-def output_single(read):
-    if hasattr(read, 'accuracy'):
-        return "@%s\n%s\n+\n%s\n" % (read.name, read.sequence, read.accuracy)
-    else:
-        return ">%s\n%s\n" % (read.name, read.sequence)
-
-
 def main():
     info('sample-reads-randomly.py')
     args = get_parser().parse_args()
 
     for _ in args.filenames:
-        check_file_status(_, args.force)
+        check_input_files(_, args.force)
 
     check_space(args.filenames, args.force)
 
@@ -112,7 +110,7 @@ def main():
         output_filename = os.path.basename(filename) + '.subset'
 
     if num_samples == 1:
-        print >>sys.stderr, 'Subsampling %d reads using reservoir sampling.' % \
+        print >>sys.stderr, 'Subsampling %d reads using reservoir sampling.' %\
             args.num_reads
         print >>sys.stderr, 'Subsampled reads will be placed in %s' % \
             output_filename
@@ -128,34 +126,35 @@ def main():
     for n in range(num_samples):
         reads.append([])
 
-    total = 0
-
     # read through all the sequences and load/resample the reservoir
     for filename in args.filenames:
         print >>sys.stderr, 'opening', filename, 'for reading'
-        for record in screed.open(filename):
-            total += 1
-
-            if total % 10000 == 0:
-                print >>sys.stderr, '...', total, 'reads scanned'
-                if total >= args.max_reads:
-                    print >>sys.stderr, 'reached upper limit of %d reads',\
-                        ' (see -M); exiting' \
-                        % args.max_reads
+        screed_iter = screed.open(filename, parse_description=False)
+
+        for count, (_, ispair, rcrd1, rcrd2) in enumerate(broken_paired_reader(
+                screed_iter,
+                force_single=args.force_single)):
+            if count % 10000 == 0:
+                print >>sys.stderr, '...', count, 'reads scanned'
+                if count >= args.max_reads:
+                    print >>sys.stderr, 'reached upper limit of %d reads' % \
+                        args.max_reads, '(see -M); exiting'
                     break
 
             # collect first N reads
-            if total <= args.num_reads:
+            if count < args.num_reads:
                 for n in range(num_samples):
-                    reads[n].append(record)
+                    reads[n].append((rcrd1, rcrd2))
             else:
+                assert len(reads[n]) <= count
+
                 # use reservoir sampling to replace reads at random
                 # see http://en.wikipedia.org/wiki/Reservoir_sampling
 
                 for n in range(num_samples):
-                    guess = random.randint(1, total)
+                    guess = random.randint(1, count)
                     if guess <= args.num_reads:
-                        reads[n][guess - 1] = record
+                        reads[n][guess - 1] = (rcrd1, rcrd2)
 
     # output all the subsampled reads:
     if len(reads) == 1:
@@ -164,16 +163,20 @@ def main():
         if not output_file:
             output_file = open(output_filename, 'w')
 
-        for record in reads[0]:
-            output_file.write(output_single(record))
+        for records in reads[0]:
+            write_record(records[0], output_file)
+            if records[1] is not None:
+                write_record(records[1], output_file)
     else:
         for n in range(num_samples):
             n_filename = output_filename + '.%d' % n
             print >>sys.stderr, 'Writing %d sequences to %s' % \
                 (len(reads[n]), n_filename)
             output_file = open(n_filename, 'w')
-            for record in reads[n]:
-                output_file.write(output_single(record))
+            for records in reads[n]:
+                write_record(records[0], output_file)
+                if records[1] is not None:
+                    write_record(records[1], output_file)
 
 if __name__ == '__main__':
     main()
diff --git a/scripts/split-paired-reads.py b/scripts/split-paired-reads.py
index fbb4ead..f80cf65 100755
--- a/scripts/split-paired-reads.py
+++ b/scripts/split-paired-reads.py
@@ -1,12 +1,14 @@
 #! /usr/bin/env python2
 #
 # This script is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=invalid-name,missing-docstring
 """
+De-interleave a file.
+
 Take an interleaved set of reads (/1 and /2), and extract them into separate
 files (.1 and .2).
 
@@ -16,12 +18,14 @@ Reads FASTQ and FASTA input, retains format for output.
 """
 import screed
 import sys
-import os.path
+import os
 import textwrap
 import argparse
 import khmer
-from khmer.file import check_file_status, check_space
+from khmer.kfile import check_input_files, check_space
 from khmer.khmer_args import info
+from khmer.utils import (write_record, check_is_left, check_is_right,
+                         broken_paired_reader)
 
 
 def get_parser():
@@ -30,9 +34,29 @@ def get_parser():
     interleaved; other programs want input in the Insanely Bad Format, with
     left- and right- reads separated. This reformats the former to the latter.
 
+    The directory into which the left- and right- reads are output may be
+    specified using :option:`-o`/:option:`--output-dir`. This directory will be
+    created if it does not already exist.
+
+    Alternatively, you can specify the filenames directly with
+    :option:`-1`/:option:`--output-first` and
+    :option:`-2`/:option:`--output-second`, which will override the
+    :option:`-o`/:option:`--output-dir` setting on a file-specific basis.
+
+    :option:`-p`/:option:`--force-paired` will require the input file to
+    be properly interleaved; by default, this is not required.
+
     Example::
 
         split-paired-reads.py tests/test-data/paired.fq
+
+    Example::
+
+        split-paired-reads.py -o ~/reads-go-here tests/test-data/paired.fq
+
+    Example::
+
+        split-paired-reads.py -1 reads.1 -2 reads.2 tests/test-data/paired.fq
     """
     parser = argparse.ArgumentParser(
         description='Split interleaved reads into two files, left and right.',
@@ -40,8 +64,23 @@ def get_parser():
         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 
     parser.add_argument('infile')
-    parser.add_argument('--version', action='version', version='%(prog)s '
-                        + khmer.__version__)
+
+    parser.add_argument('-o', '--output-dir', metavar="output_directory",
+                        dest='output_directory', default='', help='Output '
+                        'split reads to specified directory. Creates '
+                        'directory if necessary')
+
+    parser.add_argument('-1', '--output-first', metavar='output_first',
+                        default=None, help='Output "left" reads to this '
+                        'file')
+    parser.add_argument('-2', '--output-second', metavar='output_second',
+                        default=None, help='Output "right" reads to this '
+                        'file')
+    parser.add_argument('-p', '--force-paired', action='store_true',
+                        help='Require that reads be interleaved')
+
+    parser.add_argument('--version', action='version', version='%(prog)s ' +
+                        khmer.__version__)
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Overwrite output file if it exists')
     return parser
@@ -53,49 +92,67 @@ def main():
 
     infile = args.infile
 
-    check_file_status(infile, args.force)
+    check_input_files(infile, args.force)
     filenames = [infile]
     check_space(filenames, args.force)
 
-    out1 = os.path.basename(infile) + '.1'
-    out2 = os.path.basename(infile) + '.2'
+    # decide where to put output files - specific directory? or just default?
+    if args.output_directory:
+        if not os.path.exists(args.output_directory):
+            os.makedirs(args.output_directory)
+        out1 = args.output_directory + '/' + os.path.basename(infile) + '.1'
+        out2 = args.output_directory + '/' + os.path.basename(infile) + '.2'
+    else:
+        out1 = os.path.basename(infile) + '.1'
+        out2 = os.path.basename(infile) + '.2'
+
+    # OVERRIDE output file locations with -1, -2
+    if args.output_first:
+        out1 = args.output_first
+    if args.output_second:
+        out2 = args.output_second
+
     fp_out1 = open(out1, 'w')
     fp_out2 = open(out2, 'w')
 
-    # is input file FASTQ or FASTA? Determine.
-    is_fastq = False
-    record = iter(screed.open(infile)).next()
-
-    if hasattr(record, 'accuracy'):
-        is_fastq = True
-
     counter1 = 0
     counter2 = 0
     index = None
-    for index, record in enumerate(screed.open(infile)):
-        if index % 100000 == 0:
+
+    screed_iter = screed.open(infile, parse_description=False)
+
+    # walk through all the reads in broken-paired mode.
+    for index, is_pair, record1, record2 in broken_paired_reader(screed_iter):
+        if index % 100000 == 0 and index:
             print >> sys.stderr, '...', index
 
-        name = record.name
-        if name.endswith('/1'):
-            if is_fastq:
-                print >> fp_out1, '@%s\n%s\n+\n%s' % (record.name,
-                                                      record.sequence,
-                                                      record.accuracy)
-            else:
-                print >> fp_out1, '>%s\n%s' % (record.name, record.sequence,)
+        # are we requiring pairs?
+        if args.force_paired and not is_pair:
+            print >>sys.stderr, 'ERROR, %s is not part of a pair' % \
+                record1.name
+            sys.exit(1)
+
+        if is_pair:
+            write_record(record1, fp_out1)
             counter1 += 1
-        elif name.endswith('/2'):
-            if is_fastq:
-                print >> fp_out2, '@%s\n%s\n+\n%s' % (record.name,
-                                                      record.sequence,
-                                                      record.accuracy)
-            else:
-                print >> fp_out2, '>%s\n%s' % (record.name, record.sequence,)
+            write_record(record2, fp_out2)
             counter2 += 1
+        else:
+            name = record1.name
+            if check_is_left(name):
+                write_record(record1, fp_out1)
+                counter1 += 1
+            elif check_is_right(name):
+                write_record(record1, fp_out2)
+                counter2 += 1
+            else:
+                print >>sys.stderr, \
+                    "Unrecognized format for read pair information: %s" % name
+                print >>sys.stderr, "Exiting."
+                sys.exit(1)
 
     print >> sys.stderr, "DONE; split %d sequences (%d left, %d right)" % \
-        (index + 1, counter1, counter2)
+        (counter1 + counter2, counter1, counter2)
     print >> sys.stderr, "/1 reads in %s" % out1
     print >> sys.stderr, "/2 reads in %s" % out2
 
diff --git a/scripts/trim-low-abund.py b/scripts/trim-low-abund.py
new file mode 100755
index 0000000..564e24e
--- /dev/null
+++ b/scripts/trim-low-abund.py
@@ -0,0 +1,331 @@
+#! /usr/bin/env python2
+#
+# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
+# Contact: khmer-project at idyll.org
+#
+"""
+Trim sequences at k-mers of the given abundance, using a streaming algorithm.
+
+Output sequences will be placed in 'infile.abundtrim'.
+
+% python scripts/trim-low-abund.py [ <data1> [ <data2> [ ... ] ] ]
+
+Use -h for parameter help.
+"""
+import sys
+import screed
+import os
+import khmer
+import tempfile
+import shutil
+import textwrap
+import argparse
+
+from screed.screedRecord import _screed_record_dict
+from khmer.khmer_args import (build_counting_args, info, add_loadhash_args,
+                              report_on_config)
+from khmer.utils import write_record, write_record_pair, broken_paired_reader
+from khmer.kfile import (check_space, check_space_for_hashtable,
+                         check_valid_file_exists)
+
+DEFAULT_NORMALIZE_LIMIT = 20
+DEFAULT_CUTOFF = 2
+
+
+def trim_record(read, trim_at):
+    new_read = _screed_record_dict()
+    new_read.name = read.name
+    new_read.sequence = read.sequence[:trim_at]
+    if hasattr(read, 'quality'):
+        new_read.quality = read.quality[:trim_at]
+
+    return new_read
+
+
+def get_parser():
+    epilog = """
+    The output is one file for each input file, <input file>.abundtrim, placed
+    in the current directory.  This output contains the input sequences
+    trimmed at low-abundance k-mers.
+
+    The ``-V/--variable-coverage`` parameter will, if specified,
+    prevent elimination of low-abundance reads by only trimming
+    low-abundance k-mers from high-abundance reads; use this for
+    non-genomic data sets that may have variable coverage.
+
+    Note that the output reads will not necessarily be in the same order
+    as the reads in the input files; if this is an important consideration,
+    use ``load-into-counting.py`` and ``filter-abund.py``.  However, read
+    pairs will be kept together, in "broken-paired" format; you can use
+    ``extract-paired-reads.py`` to extract read pairs and orphans.
+
+    Example::
+
+        trim-low-abund.py -x 5e7 -k 20 -C 2 data/100k-filtered.fa
+    """
+
+    parser = build_counting_args(
+        descr='Trim low-abundance k-mers using a streaming algorithm.',
+        epilog=textwrap.dedent(epilog))
+
+    parser.add_argument('input_filenames', nargs='+')
+
+    parser.add_argument('--cutoff', '-C', type=int,
+                        help='remove k-mers below this abundance',
+                        default=DEFAULT_CUTOFF)
+
+    parser.add_argument('--normalize-to', '-Z', type=int,
+                        help='base cutoff on this median k-mer abundance',
+                        default=DEFAULT_NORMALIZE_LIMIT)
+
+    parser.add_argument('-o', '--out', metavar="filename",
+                        type=argparse.FileType('w'),
+                        default=None, help='only output a single file with '
+                        'the specified filename; use a single dash "-" to '
+                        'specify that output should go to STDOUT (the '
+                        'terminal)')
+
+    parser.add_argument('--variable-coverage', '-V', action='store_true',
+                        default=False,
+                        help='Only trim low-abundance k-mers from sequences '
+                        'that have high coverage.')
+
+    add_loadhash_args(parser)
+    parser.add_argument('-s', '--savetable', metavar="filename", default='',
+                        help='save the k-mer counting table to disk after all'
+                        'reads are loaded.')
+
+    # expert options
+    parser.add_argument('--force', default=False, action='store_true')
+    parser.add_argument('--ignore-pairs', default=False, action='store_true')
+    parser.add_argument('--tempdir', '-T', type=str, default='./')
+
+    return parser
+
+
+def main():
+    info('trim-low-abund.py', ['streaming'])
+    parser = get_parser()
+    args = parser.parse_args()
+
+    ###
+
+    if len(set(args.input_filenames)) != len(args.input_filenames):
+        print >>sys.stderr, \
+            "Error: Cannot input the same filename multiple times."
+        sys.exit(1)
+
+    ###
+
+    report_on_config(args)
+    check_valid_file_exists(args.input_filenames)
+    check_space(args.input_filenames, args.force)
+    if args.savetable:
+        check_space_for_hashtable(
+            args.n_tables * args.min_tablesize, args.force)
+
+    K = args.ksize
+
+    CUTOFF = args.cutoff
+    NORMALIZE_LIMIT = args.normalize_to
+
+    if args.loadtable:
+        print >>sys.stderr, 'loading k-mer counting table from', args.loadtable
+        ct = khmer.load_counting_hash(args.loadtable)
+    else:
+        print >>sys.stderr, 'making k-mer counting table'
+        ct = khmer.new_counting_hash(K, args.min_tablesize, args.n_tables)
+
+    tempdir = tempfile.mkdtemp('khmer', 'tmp', args.tempdir)
+    print >>sys.stderr, 'created temporary directory %s; ' \
+                        'use -T to change location' % tempdir
+
+    # ### FIRST PASS ###
+
+    save_pass2_total = 0
+
+    n_bp = 0
+    n_reads = 0
+    written_bp = 0
+    written_reads = 0
+    trimmed_reads = 0
+
+    pass2list = []
+    for filename in args.input_filenames:
+        pass2filename = os.path.basename(filename) + '.pass2'
+        pass2filename = os.path.join(tempdir, pass2filename)
+        if args.out is None:
+            trimfp = open(os.path.basename(filename) + '.abundtrim', 'w')
+        else:
+            trimfp = args.out
+
+        pass2list.append((filename, pass2filename, trimfp))
+
+        screed_iter = screed.open(filename, parse_description=False)
+        pass2fp = open(pass2filename, 'w')
+
+        save_pass2 = 0
+        n = 0
+
+        paired_iter = broken_paired_reader(screed_iter, min_length=K,
+                                           force_single=args.ignore_pairs)
+        for n, is_pair, read1, read2 in paired_iter:
+            if n % 10000 == 0:
+                print >>sys.stderr, '...', n, filename, save_pass2, \
+                    n_reads, n_bp, written_reads, written_bp
+
+            # we want to track paired reads here, to make sure that pairs
+            # are not split between first pass and second pass.
+
+            if is_pair:
+                n_reads += 2
+                n_bp += len(read1.sequence) + len(read2.sequence)
+
+                seq1 = read1.sequence.replace('N', 'A')
+                seq2 = read2.sequence.replace('N', 'A')
+
+                med1, _, _ = ct.get_median_count(seq1)
+                med2, _, _ = ct.get_median_count(seq2)
+
+                if med1 < NORMALIZE_LIMIT or med2 < NORMALIZE_LIMIT:
+                    ct.consume(seq1)
+                    ct.consume(seq2)
+                    write_record_pair(read1, read2, pass2fp)
+                    save_pass2 += 2
+                else:
+                    _, trim_at1 = ct.trim_on_abundance(seq1, CUTOFF)
+                    _, trim_at2 = ct.trim_on_abundance(seq2, CUTOFF)
+
+                    if trim_at1 >= K:
+                        read1 = trim_record(read1, trim_at1)
+
+                    if trim_at2 >= K:
+                        read2 = trim_record(read2, trim_at2)
+
+                    if trim_at1 != len(seq1):
+                        trimmed_reads += 1
+                    if trim_at2 != len(seq2):
+                        trimmed_reads += 1
+
+                    write_record_pair(read1, read2, trimfp)
+                    written_reads += 2
+                    written_bp += trim_at1 + trim_at2
+            else:
+                n_reads += 1
+                n_bp += len(read1.sequence)
+
+                seq = read1.sequence.replace('N', 'A')
+
+                med, _, _ = ct.get_median_count(seq)
+
+                # has this portion of the graph saturated? if not,
+                # consume & save => pass2.
+                if med < NORMALIZE_LIMIT:
+                    ct.consume(seq)
+                    write_record(read1, pass2fp)
+                    save_pass2 += 1
+                else:                       # trim!!
+                    _, trim_at = ct.trim_on_abundance(seq, CUTOFF)
+                    if trim_at >= K:
+                        new_read = trim_record(read1, trim_at)
+                        write_record(new_read, trimfp)
+
+                        written_reads += 1
+                        written_bp += trim_at
+
+                        if trim_at != len(read1.sequence):
+                            trimmed_reads += 1
+
+        pass2fp.close()
+
+        print >>sys.stderr, '%s: kept aside %d of %d from first pass, in %s' \
+            % (filename, save_pass2, n, filename)
+        save_pass2_total += save_pass2
+
+    # ### SECOND PASS. ###
+
+    skipped_n = 0
+    skipped_bp = 0
+    for _, pass2filename, trimfp in pass2list:
+        print >>sys.stderr, ('second pass: looking at sequences kept aside '
+                             'in %s') % pass2filename
+
+        # note that for this second pass, we don't care about paired
+        # reads - they will be output in the same order they're read in,
+        # so pairs will stay together if not orphaned.  This is in contrast
+        # to the first loop.
+
+        for n, read in enumerate(screed.open(pass2filename,
+                                             parse_description=False)):
+            if n % 10000 == 0:
+                print >>sys.stderr, '... x 2', n, pass2filename, \
+                    written_reads, written_bp
+
+            seq = read.sequence.replace('N', 'A')
+            med, _, _ = ct.get_median_count(seq)
+
+            # do we retain low-abundance components unchanged?
+            if med < NORMALIZE_LIMIT and args.variable_coverage:
+                write_record(read, trimfp)
+
+                written_reads += 1
+                written_bp += len(read.sequence)
+                skipped_n += 1
+                skipped_bp += len(read.sequence)
+
+            # otherwise, examine/trim/truncate.
+            else:    # med >= NORMALIZE LIMIT or not args.variable_coverage
+                _, trim_at = ct.trim_on_abundance(seq, CUTOFF)
+                if trim_at >= K:
+                    new_read = trim_record(read, trim_at)
+                    write_record(new_read, trimfp)
+
+                    written_reads += 1
+                    written_bp += trim_at
+
+                    if trim_at != len(read.sequence):
+                        trimmed_reads += 1
+
+        print >>sys.stderr, 'removing %s' % pass2filename
+        os.unlink(pass2filename)
+
+    print >>sys.stderr, 'removing temp directory & contents (%s)' % tempdir
+    shutil.rmtree(tempdir)
+
+    n_passes = 1.0 + (float(save_pass2_total) / n_reads)
+    percent_reads_trimmed = float(trimmed_reads + (n_reads - written_reads)) /\
+        n_reads * 100.0
+
+    print >>sys.stderr, 'read %d reads, %d bp' % (n_reads, n_bp,)
+    print >>sys.stderr, 'wrote %d reads, %d bp' % (written_reads, written_bp,)
+    print >>sys.stderr, 'looked at %d reads twice (%.2f passes)' % \
+        (save_pass2_total, n_passes)
+    print >>sys.stderr, 'removed %d reads and trimmed %d reads (%.2f%%)' % \
+        (n_reads - written_reads, trimmed_reads, percent_reads_trimmed)
+    print >>sys.stderr, 'trimmed or removed %.2f%% of bases (%d total)' % \
+        ((1 - (written_bp / float(n_bp))) * 100.0, n_bp - written_bp)
+
+    if args.variable_coverage:
+        percent_reads_hicov = 100.0 * float(n_reads - skipped_n) / n_reads
+        print >>sys.stderr, '%d reads were high coverage (%.2f%%);' % \
+            (n_reads - skipped_n, percent_reads_hicov)
+        print >>sys.stderr, ('skipped %d reads/%d bases because of low'
+                             'coverage') % (skipped_n, skipped_bp)
+
+    fp_rate = \
+        khmer.calc_expected_collisions(ct, args.force, max_false_pos=.8)
+    # for max_false_pos see Zhang et al., http://arxiv.org/abs/1309.2975
+    print >>sys.stderr, \
+        'fp rate estimated to be {fpr:1.3f}'.format(fpr=fp_rate)
+
+    print >>sys.stderr, 'output in *.abundtrim'
+
+    if args.savetable:
+        print >>sys.stderr, "Saving k-mer counting table to", args.savetable
+        ct.save(args.savetable)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/setup.cfg b/setup.cfg
index fc5c009..d5e5068 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,7 +1,7 @@
 [nosetests]
 verbosity = 2
 stop = TRUE
-attr = !known_failing,!jenkins
+attr = !known_failing,!jenkins,!linux
 #processes = -1 # breaks xunit output
 
 [build_ext]
@@ -9,7 +9,7 @@ define = SEQAN_HAS_BZIP2,SEQAN_HAS_ZLIB
 undef = NO_UNIQUE_RC
 # libraries = z,bz2
 ## if using system libraries
-include-dirs = lib:third-party/zlib:third-party/bzip2:third-party/seqan/core/include
+include-dirs = lib:third-party/zlib:third-party/bzip2:third-party/seqan/core/include:third-party/smhasher
 # include-dirs = lib
 ## if using system libraries (broken)
 
diff --git a/setup.py b/setup.py
index 8c37e5c..5a21bdb 100755
--- a/setup.py
+++ b/setup.py
@@ -1,17 +1,19 @@
 #! /usr/bin/env python2
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
 # the three-clause BSD license; see doc/LICENSE.txt.
 # Contact: khmer-project at idyll.org
-""" Setup for khmer project. """
+"""Setup for khmer project."""
 
 import ez_setup
-ez_setup.use_setuptools(version="3.4.1")
 
 import os
 import sys
 from os import listdir as os_listdir
 from os.path import join as path_join
+import shutil
+import subprocess
+import tempfile
 
 from setuptools import setup
 from setuptools import Extension
@@ -22,6 +24,8 @@ from distutils.dist import Distribution
 from distutils.errors import DistutilsPlatformError
 
 import versioneer
+ez_setup.use_setuptools(version="3.4.1")
+
 versioneer.VCS = 'git'
 versioneer.versionfile_source = 'khmer/_version.py'
 versioneer.versionfile_build = 'khmer/_version.py'
@@ -39,6 +43,55 @@ os.environ['OPT'] = " ".join(
     flag for flag in OPT.split() if flag != '-Wstrict-prototypes'
 )
 
+# Checking for OpenMP support. Currently clang doesn't work with OpenMP,
+# so it needs to be disabled for now.
+# This function comes from the yt project:
+# https://bitbucket.org/yt_analysis/yt/src/f7c75759e0395861b52d16921d8ce3ad6e36f89f/yt/utilities/lib/setup.py?at=yt
+
+
+def check_for_openmp():
+    """Check for OpenMP support."""
+    # Create a temporary directory
+    tmpdir = tempfile.mkdtemp()
+    curdir = os.getcwd()
+    exit_code = 1
+
+    if os.name == 'nt':
+        return False
+
+    try:
+        os.chdir(tmpdir)
+
+        # Get compiler invocation
+        compiler = os.getenv('CC', 'cc')
+
+        # Attempt to compile a test script.
+        # See http://openmp.org/wp/openmp-compilers/
+        filename = r'test.c'
+        file = open(filename, 'wt', 1)
+        file.write(
+            """
+            #include <omp.h>
+            #include <stdio.h>
+            int main() {
+            #pragma omp parallel
+            printf("Hello from thread %d, nthreads %d",
+                    omp_get_thread_num(), omp_get_num_threads());
+            }
+            """
+        )
+        with open(os.devnull, 'w') as fnull:
+            exit_code = subprocess.call([compiler, '-fopenmp', filename],
+                                        stdout=fnull, stderr=fnull)
+
+        # Clean up
+        file.close()
+    finally:
+        os.chdir(curdir)
+        shutil.rmtree(tmpdir)
+
+    return exit_code == 0
+
 # We bundle tested versions of zlib & bzip2. To use the system zlib and bzip2
 # change setup.cfg or use the `--libraries z,bz2` parameter which will make our
 # custom build_ext command strip out the bundled versions.
@@ -48,22 +101,34 @@ BZIP2DIR = 'third-party/bzip2'
 
 BUILD_DEPENDS = []
 BUILD_DEPENDS.extend(path_join("lib", bn + ".hh") for bn in [
-    "khmer", "kmer_hash", "hashtable", "counting", "hashbits", "labelhash"])
+    "khmer", "kmer_hash", "hashtable", "counting", "hashbits", "labelhash",
+    "hllcounter", "khmer_exception", "read_aligner", "subset", "read_parsers"])
 
 SOURCES = ["khmer/_khmermodule.cc"]
 SOURCES.extend(path_join("lib", bn + ".cc") for bn in [
     "trace_logger", "perf_metrics", "read_parsers", "kmer_hash", "hashtable",
-    "hashbits", "labelhash", "counting", "subset", "read_aligner"])
+    "hashbits", "labelhash", "counting", "subset", "read_aligner",
+    "hllcounter"])
+
+SOURCES.extend(path_join("third-party", "smhasher", bn + ".cc") for bn in [
+    "MurmurHash3"])
 
 EXTRA_COMPILE_ARGS = ['-O3', ]
+EXTRA_LINK_ARGS = []
 
 if sys.platform == 'darwin':
     # force 64bit only builds
     EXTRA_COMPILE_ARGS.extend(['-arch', 'x86_64'])
+
+if check_for_openmp():
+    EXTRA_COMPILE_ARGS.extend(['-fopenmp'])
+    EXTRA_LINK_ARGS.extend(['-fopenmp'])
+
 EXTENSION_MOD_DICT = \
     {
         "sources": SOURCES,
         "extra_compile_args": EXTRA_COMPILE_ARGS,
+        "extra_link_args": EXTRA_LINK_ARGS,
         "depends": BUILD_DEPENDS,
         "language": "c++",
         "define_macros": [("VERSION", versioneer.get_version()), ],
@@ -85,6 +150,7 @@ CLASSIFIERS = [
     "Operating System :: POSIX :: Linux",
     "Operating System :: MacOS :: MacOS X",
     "Programming Language :: C++",
+    "Programming Language :: Python :: 2 :: Only",
     "Programming Language :: Python :: 2.7",
     "Topic :: Scientific/Engineering :: Bio-Informatics",
 ]
@@ -111,7 +177,9 @@ SETUP_METADATA = \
         "url": 'http://ged.msu.edu/',
         "packages": ['khmer', 'khmer.tests'],
         "package_dir": {'khmer.tests': 'tests'},
-        "install_requires": ['screed >= 0.7.1'],
+        "install_requires": ['screed >= 0.8'],
+        # testing screed download link
+
         "extras_require": {':python_version=="2.6"': ['argparse>=1.2.1'],
                            'docs': ['sphinx', 'sphinxcontrib-autoprogram'],
                            'tests': ['nose >= 1.0']},
@@ -136,6 +204,7 @@ class KhmerBuildExt(_build_ext):  # pylint: disable=R0904
     """
 
     def run(self):
+        """Run extension builder."""
         if "%x" % sys.maxsize != '7fffffffffffffff':
             raise DistutilsPlatformError("%s require 64-bit operating system" %
                                          SETUP_METADATA["packages"])
@@ -166,13 +235,13 @@ _DISTUTILS_REINIT = Distribution.reinitialize_command
 
 
 def reinitialize_command(self, command, reinit_subcommands):
-    '''
-    Monkeypatch distutils.Distribution.reinitialize_command() to match behavior
-    of Distribution.get_command_obj()
+    """Monkeypatch the original version from distutils.
 
+    It's supposed to match the behavior of Distribution.get_command_obj()
     This fixes issues with 'pip install -e' and './setup.py nosetests' not
-    respecting the setup.cfg configuration directives for the build_ext command
-    '''
+    respecting the setup.cfg configuration directives for the build_ext
+    command.
+    """
     cmd_obj = _DISTUTILS_REINIT(self, command, reinit_subcommands)
     options = self.command_options.get(command)
     if options:
@@ -181,6 +250,7 @@ def reinitialize_command(self, command, reinit_subcommands):
     return cmd_obj
 Distribution.reinitialize_command = reinitialize_command
 
+
 # pylint: disable=W0142
 setup(cmdclass=CMDCLASS,
       **SETUP_METADATA)
diff --git a/tests/khmer_tst_utils.py b/tests/khmer_tst_utils.py
index beb071b..d52303f 100644
--- a/tests/khmer_tst_utils.py
+++ b/tests/khmer_tst_utils.py
@@ -1,7 +1,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import tempfile
@@ -11,7 +11,6 @@ from pkg_resources import Requirement, resource_filename, ResolutionError
 from cStringIO import StringIO
 import nose
 import sys
-import khmer.file
 import traceback
 import subprocess
 
@@ -76,9 +75,12 @@ def _runscript(scriptname, sandbox=False):
 
 def runscript(scriptname, args, in_directory=None,
               fail_ok=False, sandbox=False):
-    """
+    """Run a Python script using exec().
+
     Run the given Python script, with the given args, in the given directory,
     using 'execfile'.
+
+    When using :attr:`fail_ok`=False in tests, specify the expected error.
     """
     sysargs = [scriptname]
     sysargs.extend(args)
@@ -125,11 +127,11 @@ def runscript(scriptname, args, in_directory=None,
 
 def runscriptredirect(scriptname, args, stdinfilename, in_directory=None,
                       fail_ok=False, sandbox=False):
-    """
+    """Run a Python script using subprocess().
+
     Run the given Python script, with the given args, in the given directory,
     using 'subprocess'.
     """
-
     cwd = os.getcwd()
 
     status = -1
diff --git a/tests/test-data/badversion-k12.kh b/tests/test-data/badversion-k12.ct
similarity index 100%
rename from tests/test-data/badversion-k12.kh
rename to tests/test-data/badversion-k12.ct
diff --git a/tests/test-data/casava_18-pe.fq b/tests/test-data/casava_18-pe.fq
new file mode 100644
index 0000000..9af8adc
--- /dev/null
+++ b/tests/test-data/casava_18-pe.fq
@@ -0,0 +1,16 @@
+ at HWI-ST412:261:d15khacxx:8:1101:3149:2157 1:N:0:ATCACG
+GTACACTATAGTGAGATTCCTTATACCTCGCAATATGATCTCCTGTCGTTACGTCCATTGCTCTTAACAAAGAACACTCATAGAGTAGAATCGAAAAAAC
++
+@@CDAFFAHFACC?DBHIAIIICGC?AHFEEHGGIIGG>CEDHBDHDGBGGBB8B<FGHG4 at GEFDA@.7CC3?)7@?CDECCAA663;;;(;=??B?>5
+ at HWI-ST412:261:d15khacxx:8:1101:3149:2157 2:N:0:ATCACG
+TCGTTTTCTTGTAGATTAGATTTGACATAAATGTCAATTTTTTTGTATCTACCGTCACTTGAATACTCAGTTGCAATATAAT
++
+;+=D7D?DHGHFDCAE at BCCACHHGFBHICC<EHE?BFBDHGGIIADDB*?D8 at B@FG>FCE;@D:A>EECEFF3?BD@(.6
+ at HWI-ST412:261:d15khacxx:8:1101:3426:2112 1:N:0:ATCACG
+CTAAACACAGCAAATTTGTCAGAGATTTAATCCGTGAAGTAACTGGTCATGCTCCTTATGAAAAACGTGCTATGGAATTGCTCAAAGTATCACAGGATA
++
+1=DDFFAFFFHFIGBHGGDHIH>GC9?FEHGFGIIIIEEBGGBFHGGEIGHICHBFGCHEGIIIIGGCEGEHCHA?@@B?@D at C@>@>CCA>(;5;AA>
+ at HWI-ST412:261:d15khacxx:8:1101:3426:2112 2:N:0:ATCACG
+GCCCAGCCTTCTTTTCAAAAACTTCAATGCACGCTTATCCTTTGATACTTTGAGCAATTCCATAGCACGTTTTTCATAAGGAGCATGACCAGTTACTTCA
++
+@?@A=BDD;DF3CFDHGEEHI:AFAHGII>FBCFE):B<B<DDHA<FGIHHFG=GGIGCGEIGGF at A=B2=7?1=;3?;7.;;=AC at CCCDDA>;:;@C@
diff --git a/tests/test-data/normC20k20.kh b/tests/test-data/normC20k20.ct
similarity index 100%
rename from tests/test-data/normC20k20.kh
rename to tests/test-data/normC20k20.ct
diff --git a/tests/test-data/paired-mixed.fq.pe b/tests/test-data/old-style-format-w-comments.fq
similarity index 84%
copy from tests/test-data/paired-mixed.fq.pe
copy to tests/test-data/old-style-format-w-comments.fq
index c169b70..5363db6 100644
--- a/tests/test-data/paired-mixed.fq.pe
+++ b/tests/test-data/old-style-format-w-comments.fq
@@ -1,24 +1,24 @@
- at 895:1:37:17593:9954/1
+ at 895:1:37:17593:9954/1 ec:Z:0_0:1_0_1:0_0
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/2
+ at 895:1:37:17593:9954/2 ec:Z:0_0:2_0_0:0_0
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/1
+ at 895:1:37:17593:9954/1   ec:Z:0_0:1_0_1:0_0
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/2
+ at 895:1:37:17593:9954/2   ec:Z:0_0:1_0_0:0_0
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/1
+ at 895:1:37:17593:9954/1		ec:Z:4
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/2
+ at 895:1:37:17593:9954/2		ec:Z:0_0:3_0_1:0_0
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
diff --git a/tests/test-data/overlap.curve b/tests/test-data/overlap.curve
deleted file mode 100644
index e951b1d..0000000
--- a/tests/test-data/overlap.curve
+++ /dev/null
@@ -1,200 +0,0 @@
-6021 0
-12283 0
-18155 0
-24050 0
-29649 40
-35878 63
-42077 63
-48233 63
-54423 63
-60530 121
-66015 154
-71548 154
-77733 154
-83781 154
-89819 154
-95916 154
-101765 168
-107569 168
-112958 168
-119116 168
-125245 168
-131415 168
-137387 293
-142771 293
-148585 293
-154968 293
-161049 295
-166997 295
-173045 295
-178307 295
-184486 295
-190476 295
-196578 295
-202648 338
-208367 351
-213865 420
-220200 420
-226356 449
-232545 449
-238563 463
-244631 463
-250477 463
-255751 463
-261733 463
-267940 468
-274028 468
-280168 468
-286339 468
-292468 468
-297959 468
-304094 468
-310195 468
-316381 468
-322511 468
-328748 468
-334296 468
-340307 468
-346479 468
-352467 468
-358167 468
-364100 468
-369953 500
-376021 500
-382062 545
-388117 545
-393750 545
-398447 1430
-401544 4527
-404607 7590
-408026 11009
-413425 16408
-419262 22245
-424872 27855
-430628 33611
-436424 39407
-442091 45074
-447704 50687
-453532 56515
-459256 62239
-465319 68302
-471277 74260
-477238 80221
-483201 86184
-488559 91542
-494427 97410
-500543 103526
-506488 109471
-512256 115239
-518250 121233
-524122 127105
-529993 132976
-535602 138585
-541291 144274
-547407 150390
-553356 156339
-559303 162286
-565185 168168
-570992 173975
-576607 179590
-581866 184849
-6021 0
-12283 0
-18155 0
-24050 0
-29649 40
-35878 63
-42077 63
-48233 63
-54423 63
-60530 121
-66015 154
-71548 154
-77733 154
-83781 154
-89819 154
-95916 154
-101765 168
-107569 168
-112958 168
-119116 168
-125245 168
-131415 168
-137387 293
-142771 293
-148585 293
-154968 293
-161049 295
-166997 295
-173045 295
-178307 295
-184486 295
-190476 295
-196578 295
-202648 338
-208367 351
-213865 420
-220200 420
-226356 449
-232545 449
-238563 463
-244631 463
-250477 463
-255751 463
-261733 463
-267940 468
-274028 468
-280168 468
-286339 468
-292468 468
-297959 468
-304094 468
-310195 468
-316381 468
-322511 468
-328748 468
-334296 468
-340307 468
-346479 468
-352467 468
-358167 468
-364100 468
-369953 500
-376021 500
-382062 545
-388117 545
-393750 545
-398447 1430
-401544 4527
-404607 7590
-408026 11009
-413425 16408
-419262 22245
-424872 27855
-430628 33611
-436424 39407
-442091 45074
-447704 50687
-453532 56515
-459256 62239
-465319 68302
-471277 74260
-477238 80221
-483201 86184
-488559 91542
-494427 97410
-500543 103526
-506488 109471
-512256 115239
-518250 121233
-524122 127105
-529993 132976
-535602 138585
-541291 144274
-547407 150390
-553356 156339
-559303 162286
-565185 168168
-570992 173975
-576607 179590
-581866 184849
diff --git a/tests/test-data/paired-broken.fq.1 b/tests/test-data/paired-broken.fq.1
new file mode 100644
index 0000000..5efe1fa
--- /dev/null
+++ b/tests/test-data/paired-broken.fq.1
@@ -0,0 +1,12 @@
+ at 895:1:37:17593:9954/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
++
+##################################################################################################################
+ at 896:1:37:17593:9954/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
++
+##################################################################################################################
+ at 897:1:37:17593:9954/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
++
+##################################################################################################################
diff --git a/tests/test-data/paired-broken.fq.2 b/tests/test-data/paired-broken.fq.2
new file mode 100644
index 0000000..c913526
--- /dev/null
+++ b/tests/test-data/paired-broken.fq.2
@@ -0,0 +1,9 @@
+ at 895:1:37:17593:9954/2
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
++
+##################################################################################################################
+ at 896:1:37:17593:9954/2
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
++
+##################################################################################################################
+
diff --git a/tests/test-data/paired-broken2.fq.1 b/tests/test-data/paired-broken2.fq.1
new file mode 100644
index 0000000..05cd98a
--- /dev/null
+++ b/tests/test-data/paired-broken2.fq.1
@@ -0,0 +1,8 @@
+ at 895:1:37:17593:9954/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
++
+##################################################################################################################
+ at 897:1:37:17593:9954/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
++
+##################################################################################################################
diff --git a/tests/test-data/paired-broken2.fq.2 b/tests/test-data/paired-broken2.fq.2
new file mode 100644
index 0000000..7c76349
--- /dev/null
+++ b/tests/test-data/paired-broken2.fq.2
@@ -0,0 +1,13 @@
+ at 895:1:37:17593:9954/2
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
++
+##################################################################################################################
+ at 896:1:37:17593:9954/2
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
++
+##################################################################################################################
+ at 897:1:37:17593:9954/2
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
++
+##################################################################################################################
+
diff --git a/tests/test-data/paired-broken3.fq.1 b/tests/test-data/paired-broken3.fq.1
new file mode 100644
index 0000000..5efe1fa
--- /dev/null
+++ b/tests/test-data/paired-broken3.fq.1
@@ -0,0 +1,12 @@
+ at 895:1:37:17593:9954/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
++
+##################################################################################################################
+ at 896:1:37:17593:9954/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
++
+##################################################################################################################
+ at 897:1:37:17593:9954/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
++
+##################################################################################################################
diff --git a/tests/test-data/paired-broken3.fq.2 b/tests/test-data/paired-broken3.fq.2
new file mode 100644
index 0000000..9c110ba
--- /dev/null
+++ b/tests/test-data/paired-broken3.fq.2
@@ -0,0 +1,9 @@
+ at 896:1:37:17593:9954/2
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
++
+##################################################################################################################
+ at 897:1:37:17593:9954/2
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
++
+##################################################################################################################
+
diff --git a/tests/test-data/paired-mixed.fq b/tests/test-data/paired-mixed-2.fq
similarity index 96%
copy from tests/test-data/paired-mixed.fq
copy to tests/test-data/paired-mixed-2.fq
index 9fd58e9..e986962 100644
--- a/tests/test-data/paired-mixed.fq
+++ b/tests/test-data/paired-mixed-2.fq
@@ -10,15 +10,15 @@ GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCC
 TTAATTTTGGAAACCCTGCAATAAAGTCACAACATTGC
 +
 ######################################
- at 850:2:1:1123:19958/1
+ at 850:2:1:1123:19958/2
 GCGATAAAAAGTCGTTGAGATAATCCGCGATTTCTCGCA
 +
 #######################################
- at 895:1:37:17593:9954/1
+ at 895:1:37:17593:9954 1::foo
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/2
+ at 895:1:37:17593:9954 2::foo
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
diff --git a/tests/test-data/paired-mixed-broken.fq b/tests/test-data/paired-mixed-broken.fq
new file mode 100644
index 0000000..9470b41
--- /dev/null
+++ b/tests/test-data/paired-mixed-broken.fq
@@ -0,0 +1,4 @@
+ at 895:1:37:17593:9954
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
++
+##################################################################################################################
diff --git a/tests/test-data/paired-mixed.fq b/tests/test-data/paired-mixed.fq
index 9fd58e9..3fd873f 100644
--- a/tests/test-data/paired-mixed.fq
+++ b/tests/test-data/paired-mixed.fq
@@ -14,11 +14,11 @@ TTAATTTTGGAAACCCTGCAATAAAGTCACAACATTGC
 GCGATAAAAAGTCGTTGAGATAATCCGCGATTTCTCGCA
 +
 #######################################
- at 895:1:37:17593:9954/1
+ at 895:1:37:17593:9954 1::foo
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/2
+ at 895:1:37:17593:9954 2::foo
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
diff --git a/tests/test-data/paired-mixed.fq.pe b/tests/test-data/paired-mixed.fq.pe
index c169b70..b1ce5db 100644
--- a/tests/test-data/paired-mixed.fq.pe
+++ b/tests/test-data/paired-mixed.fq.pe
@@ -6,11 +6,11 @@ GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCC
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/1
+ at 895:1:37:17593:9954 1::foo
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/2
+ at 895:1:37:17593:9954 2::foo
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
diff --git a/tests/test-data/random-20-a.fq b/tests/test-data/random-20-a.fq
index c45ece8..9823fd4 100644
--- a/tests/test-data/random-20-a.fq
+++ b/tests/test-data/random-20-a.fq
@@ -1,4 +1,4 @@
- at 35
+ at 35 1::FOO
 CGCAGGCTGGATTCTAGAGGCAGAGGTGAGCTATAAGATATTGCATACGTTGAGCCAGC
 +
 ###########################################################
@@ -6,7 +6,7 @@ CGCAGGCTGGATTCTAGAGGCAGAGGTGAGCTATAAGATATTGCATACGTTGAGCCAGC
 CGGAAGCCCAATGAGTTGTCAGAGTCACCTCCACCCCGGGCCCTGTTAGCTACGTCCGT
 +
 ###########################################################
- at 46
+ at 46 1::FIZ
 GGTCGTGTTGGGTTAACAAAGGATCCCTGACTCGATCCAGCTGGGTAGGGTAACTATGT
 +
 ###########################################################
diff --git a/tests/test-data/simple_1.fa b/tests/test-data/simple_1.fa
deleted file mode 100644
index 5724698..0000000
--- a/tests/test-data/simple_1.fa
+++ /dev/null
@@ -1,6 +0,0 @@
->1
-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
->2
-tttttttttttttttttttttttttttttttttttttttt
->3
-gggggggggg
diff --git a/tests/test-data/simple_2.fa b/tests/test-data/simple_2.fa
deleted file mode 100644
index 2d81af7..0000000
--- a/tests/test-data/simple_2.fa
+++ /dev/null
@@ -1,8 +0,0 @@
->1
-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
->2
-tttttttttttttttttttttttttttttttttttttttt
->3
-gggggggggg
->4
-ggggnggggg
diff --git a/tests/test-data/simple_3.fa b/tests/test-data/simple_3.fa
deleted file mode 100644
index 5bec6ee..0000000
--- a/tests/test-data/simple_3.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->1
-AAAAAAAAAA
->2
-AAAAAAAGGG
diff --git a/tests/test-data/test-abund-read-2.fq b/tests/test-data/test-abund-read-2.fq
index 301208f..3518f14 100644
--- a/tests/test-data/test-abund-read-2.fq
+++ b/tests/test-data/test-abund-read-2.fq
@@ -1,8 +1,8 @@
- at 895:1:37:17593:9954/1
+ at 895:1:37:17593:9954 1::FOO
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at seq
+ at seq 1::BAR
 GGTTGACGGGGCTCAGGG
 +
 ##################
diff --git a/tests/test-data/paired-mixed.fq.pe b/tests/test-data/test-abund-read-2.paired.fq
similarity index 62%
copy from tests/test-data/paired-mixed.fq.pe
copy to tests/test-data/test-abund-read-2.paired.fq
index c169b70..99bee16 100644
--- a/tests/test-data/paired-mixed.fq.pe
+++ b/tests/test-data/test-abund-read-2.paired.fq
@@ -2,23 +2,55 @@
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/2
-GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
-+
-##################################################################################################################
 @895:1:37:17593:9954/1
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/2
-GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+ at seqtrim/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCcCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/1
-GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+ at seqtrim/2
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCcGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/2
-GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+ at seqtrim2/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAtCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
+ at seq
+GGTTGACGGGGCTCAGGG
++
+##################
+ at seq
+GGTTGACGGGGCTCAGGG
++
+##################
+ at seq
+GGTTGACGGGGCTCAGGG
++
+##################
+ at seq
+GGTTGACGGGGCTCAGGG
++
+##################
+ at shortseq
+GAGGG
++
+#####
+ at pair2/1
+GGTTGACGGGGCTCAcGG
++
+##################
+ at pair2/2
+GGTTGACGGGGCTCAGGc
++
+##################
+ at pair/1
+GGTTGACGGGGCTCAGGG
++
+##################
+ at pair/2
+ATATGACATATATAGACA
++
+##################
diff --git a/tests/test-data/paired-mixed.fq.pe b/tests/test-data/test-abund-read-2.paired2.fq
similarity index 60%
copy from tests/test-data/paired-mixed.fq.pe
copy to tests/test-data/test-abund-read-2.paired2.fq
index c169b70..be8c27c 100644
--- a/tests/test-data/paired-mixed.fq.pe
+++ b/tests/test-data/test-abund-read-2.paired2.fq
@@ -2,23 +2,55 @@
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/2
-GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
-+
-##################################################################################################################
 @895:1:37:17593:9954/1
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/2
-GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+ at seqtrim/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCcCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/1
-GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+ at seqtrim/2
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCcGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/2
-GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+ at seqtrim2/1
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAtCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
+ at seq
+GGTTGACGGGGCTCAGGG
++
+##################
+ at seq
+GGTTGACGGGGCTCAGGG
++
+##################
+ at seq
+GGTTGACGGGGCTCAGGG
++
+##################
+ at seq
+GGTTGACGGGGCTCAGGG
++
+##################
+ at shortseq
+GAGGG
++
+#####
+ at pair2:fiz 1::N
+GGTTGACGGGGCTCAcGG
++
+##################
+ at pair2:fiz 2::N
+GGTTGACGGGGCTCAGGc
++
+##################
+ at pair:foo 1::N
+GGTTGACGGGGCTCAGGG
++
+##################
+ at pair:foo 2::N
+ATATGACATATATAGACA
++
+##################
diff --git a/tests/test-data/test-abund-read-paired.fa b/tests/test-data/test-abund-read-paired.fa
index 71bd8d7..08055b9 100644
--- a/tests/test-data/test-abund-read-paired.fa
+++ b/tests/test-data/test-abund-read-paired.fa
@@ -2,9 +2,9 @@
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 >895:1:37:17593:9954/2
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
->895:1:37:17593:9954/1
+>895:1:37:17593:9954 1::FOO
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
->895:1:37:17593:9954/2
+>895:1:37:17593:9954 2::FOO
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 >895:1:37:17593:9954/1
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
diff --git a/tests/test-data/paired-mixed.fq.pe b/tests/test-data/test-abund-read-paired.fq
similarity index 96%
copy from tests/test-data/paired-mixed.fq.pe
copy to tests/test-data/test-abund-read-paired.fq
index c169b70..283b110 100644
--- a/tests/test-data/paired-mixed.fq.pe
+++ b/tests/test-data/test-abund-read-paired.fq
@@ -6,11 +6,11 @@ GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCC
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/1
+ at 895:1:37:17593:9954 1::FOO
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
- at 895:1:37:17593:9954/2
+ at 895:1:37:17593:9954 2::FOO
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 +
 ##################################################################################################################
diff --git a/tests/test-data/test-colors.fa b/tests/test-data/test-colors.fa
deleted file mode 100644
index b93d7c3..0000000
--- a/tests/test-data/test-colors.fa
+++ /dev/null
@@ -1,8 +0,0 @@
->read_A
-ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAG
->read_B_overlap_A
-GCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAGCTCTGCCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGA
->read_C_overlap_B
-TGGGATAGATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGAGTTGGGGATAGATAGATAGATGAGTGTAGATCCAACAACACATACA
->read_D
-TATATATATAGCTAGCTAGCTAACTAGCTAGCATCGATCGATCGATC
diff --git a/tests/test-data/test-est.fa b/tests/test-data/test-est.fa
deleted file mode 100644
index 7b0d9db..0000000
--- a/tests/test-data/test-est.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->est
-AACCGGTTAAACCCGGGTTTAAAACCCCGGGGTTTT
diff --git a/tests/test-data/test-graph3.fa b/tests/test-data/test-graph3.fa
deleted file mode 100644
index 1c2b231..0000000
--- a/tests/test-data/test-graph3.fa
+++ /dev/null
@@ -1,8 +0,0 @@
->a
-GACTGGGACTCTGGGAGCACTTATCATGGAGAT
->b
-GGAGCACTTTAACCCTGCAGAGTGGCCAAGGCT
->c
-GGAGCACTTATCATGGAGATATATCCCGTGCTTAAACATCGCACTTTAACCCTGCAGAGT
->d
-GTGAACCGCTTTCTGGCAGGGGAGGTTAAGGATCTTTCTTATACCGCATGGTACCGCAAGAGCCTTTAGGCCTTACAAG
diff --git a/tests/test-data/test-graph4.fa b/tests/test-data/test-graph4.fa
deleted file mode 100644
index 807625f..0000000
--- a/tests/test-data/test-graph4.fa
+++ /dev/null
@@ -1,12 +0,0 @@
->a
-GACTGGGACTCTGGGAGCACTTATCATGGAGAT
->b
-GAGCACTTTAACCCTGCAGAGTGGCCAAGGCT
->c
-GGAGCACTTATCATGGAGATATATCCCGTGCTTAAACATCGCACTTTAACCCTGCAGAGT
->d
-GTGAACCGCTTTCTGGCAGGGGAGGTTAAGGATCTTTCTTATACCGCA
->f
-CTTATACCGCATGGTACCGCAAGAGCCTTTAGGCCTTACAAGGGATATCATTCGCAGA
->e
-GGGGAGGTTAAGGATCTTTCTTATACCGCATGGTACCGCAAGAGCCTTTAGG
diff --git a/tests/test-data/test-graph6.fa b/tests/test-data/test-graph6.fa
deleted file mode 100644
index 0b05b38..0000000
--- a/tests/test-data/test-graph6.fa
+++ /dev/null
@@ -1,464 +0,0 @@
->a
-AACTCGATACTTACATCGCCCGATTCGACTTCAATTTAAACTCTCAGC
->a
-TGCAACGCCTCGAGATTGCGCGCGGTGTCCTTGAGCGTTTCGCCTTTAGTCAGGCTCGAAGTCGTTGC
->a
-GATTGTCTGGTAACGTTCATCGGTGTTTTTGCGCAAGGCCTTTTGCAGAATCGATTCGAGTTTGGCTGGA
->a
-CTTGAAAACCTGATCGACGAAATGTTGGAAGGCCAGATTCTTCTAGATGAGGCCATGGCTGAATT
->a
-TTTGTGAGGTGTTGCTTATGTCGGGCGAGGGCTTTCTTTATATACAGCTTTTCAAATTCAGCCATGGCCTCATCTA
->a
-ACCGACGGAGCGTTAGAGTCCGAAATTATCCGGAGATTTTTGGGCGCGTTTCAAGCGATCACGCCACTGGATATCG
->a
-AATTCAAAGTAGGCATTTTTGTTTTTGTAGGGTTGGCGATGTTAGGCGCGCTGGTCGTGCAATTC
->a
-ATCGACAAATCATTCCGCCCATGCTGTGACCGATCAGGACGATGCGCTTGTGATTTGGGAATGCGCG
->a
-ACCGACGGGCCTCTGGACGAGCGATATTTGGACCAGGTCGCCGAAATCG
->a
-CACGCCTGCGTTTGCGACCTATCCTCATGACTTCCTTCGCCTTCATCTTCGGCCTTCTGCCGCTTTGGTTTGCGAG
->a
-CCACTATGGAAATCGATTGAGCGGTTCAATCAGGACCCGCAAACCATCGTAGTCGTGCCGTCCATGTCCATCGACG
->a
-GCCGGATGTATTCGCCGAGCCCGGGAAAATATTCTCTGCGCTGGCGC
->a
-CGTATAAGCGGCATTCTTACCTCCATAGTTAGGTCAGAAGTGTAACTGACGCTCGTTCCCAAGTGAGGGCCAATC
->a
-GATATGAAGTAATGATTGGCCCTCACTTGGGAACGAGCGTCAGTTACACTTCTGACCTAACTATGGAGGTAAG
->a
-GATGAATGCGGTTGGACCGTCGGGAAAGTAACCGTCG
->a
-AAAGGTTCCAGTTCGCGGAGCGCCTGTCGCCAACTCTCCTGGGCGGCGGCATGATCGCCGGCGAC
->a
-GGTGTCTCTTCTCAAGCATTACTTTCAGTGCGTGTGCTAAGTCCTTAATTTAGAACGATTTTTGCCGGGCTTTCG
->a
-GATATTGAAGCCGAAAGCCCGGCAAAAATCGTTCTAAATTAAGGACTTAGCACACGCACTGAAAGTAATGCTTGAG
->a
-ACGATCTCGGTGCGCTCTCGATGATATCCTCGGACTCTCAGGCTATGGGACGGTTGGGTGAAGTCATCATCCGCAC
->a
-AAATCGAAAATTCCATCGCCGGGTTCCCCCTGGCGAGCGCGATCTCACATCCAGCGACGGAGACCCAGGGACCAAC
->a
-ACGATCCTGCGTCCTGTCATCCGTTTGACTCTGTTTTAAGCCAGAGGGGCCCATGAAGATTTGTCTGGTA
->a
-ACTCGTTCAGCCCCCGCCGTGAGGGCGGAAAGGCACTTACCAGACAAATCTTCATGGGCCCCTCTGGCTTAAAACA
->a
-ATCTCGTAAATGGGGCTGTCATGTTGAGCGAAGCGAAACATCTCTGTCTATCATTGTAGTTAGATTGGTCCATGAA
->a
-AACTGCTTCGCTGTTTCGACGTAGCTCCACGCCCAACGCGCAATATCCTTCTGCTCGTCGAG
->a
-ATGGACAACAAGAAGGTGAAGGCGCAGTTCGGCGGCAAGCCGAGATCGGAAGAGCGTCG
->a
-TGCAAATATCTCCCATCGATCAGAAGCATCGCCCGATTCGACTTCACTTGCTAGCCGG
->a
-GATGGAGGGCGATGTCTCTGTCGAGCTTCTCGAAGAACGGTATCCCATCGCCAATCAAGACCGGC
->a
-GGAAATCCGGCAGTGGTGGTCAAGAAATTCAAAGGGTGAATCCTTTCAACGCCCAACGCTCAACGCCCAACGCTCA
->a
-AAACAAATCCTCGACCGCGAAATAGAGACGCTCACCGAATTTATCGC
->a
-AGTGGGGCGCCGAACAACCTGGCATGGATTCTGGCGACTTCTTCCGACGCTTCAATGCGCGATGGCGCACGAGCAG
->a
-GAAGATTGGGTTGCTATGTCAAATAAAACCCTCAACGTCTCATTAGCATACCTGACAGGCCGCTTAAAGATTCAAG
->a
-ACAACGTTAAAAAGGTTACAAAAGTTAAATCGCCGGGCGGCATGCTCAATTCTTCAACGGGCGGCGTTGTACGCAT
->a
-ATGGTCACCGAAACCGACGGAGCGTTGGAGTCCGAAATTATCCGGAGATTTTTGGGCGCGTTTCAAGCGATCACGC
->a
-GATCTTAAGATTGAGAGCCAAATTCCATCCAAAGTTCGGGGCCCTCAAAACAATAAGACGTCCACCCGGC
->a
-CGGAAAGACTTCGGTGAACTTGCCACCACACCTGCGTTGGCTTATCTCTTTCTCGTTAGCCCGCTGAT
->a
-GATATTGAAAAAACTTCTCTTGGTTGACTTGTATTGCGTGACTTCAGTGCGCCGCAGCAATTGTCGTCCGGTCAGA
->a
-TGGAACCGCCTGGTGCGCCCAACACTAGAAATAGCTTGTCATCTTTCAAGAGCATGGTTGGCGTCATCGACGAAAG
->a
-TGGTGCAGGGCGAAAAGAATGCGATCGCACCCGGCAAACGTTCACTTTCGTCGATGACGCCAACCATGCTCTTGAA
->a
-AATATTGTTAGACCTTCACTGGTCTCTGTCGCCCTCCTTGTTCTTTTCATCCTCTGCTCTGAGATCTACGACGTC
->a
-ATCGCGGTCGCCCCAGGTCCAAGACGAGCAAAATGGGCGAAAGAATTGCGCCGGCGGCC
->a
-TTCGCGGCGTGCTCCGGTCCCATCGTGTACAAGTCGCCGATTGGAAGGCACGCGATGCGCGGGCGATAGAGATCCG
->a
-CGTGGCACGCGCAATATTTCATTCCGCGCTGCCAATCGTCTCAGCAGTCGGCCACGAAATCGATTTCACCA
->a
-AGAATTAGACTGCATCTTGCAATCTATTTCCTGCCAACTTTGACGGAAATCGCCGCCGGACGCAGCG
->a
-CGACAGACAACCCTGAAGCTTACGATGCCTACCTGCGTGGCCTGGCTTATATACTGAAAGCTGGCAACACACCC
->a
-TCGCACGGCACGGCGCAGCGCATCGTCGAACGCGCCGCC
->a
-ACTATCGTCGATTGCTCGGTACAATTCCCGAAGGCGTAACTTTTTCCGATCGCTTGAAGTCCGATTCGAG
->a
-TTACGAGGACAAAGGGCGCATGGTATTTCTCCTGTACCTCCCAACTGGAGTCGACCCGACGCGTTT
->a
-TCTTCAATCGATGATGCATTTCTACCACTTCGAGGTCGAATTCAGGGCCGAGAATCTCGACGGCCTTGCGCGTGAG
->a
-TGAATAAGTTCAGCGAGTTTGCCACGACACCCTGCCGTGGCTTATCTCCTTCTCGTTAGACCCATGCGTGC
->a
-ACCAAAAAAGCTTGCAACTGACTTCTGCAACAAAATCGGCACTTCTCGGACCTCGGCAGATGTCCACTTGAAGTCC
->a
-CTGCTCCTCACCAGAAATGCAGATTTGATTGCCGGACGCCCGCCGTAGGAAAGGCAGATCGGAAGAGCGTCGTG
->a
-TCGAGGCTGGCCTCTGGTTCCCCAATGACACGAGTCTCGATTTCGATTTCGGCGACCTGGTCG
->a
-AAACAGCCGGATGTATTCGTCGATTCCCGGAAAATATTCTCTGCGCTGGCGTTCATGCTGAATGCG
->a
-TTCCCCGAAGCGCTGCGAAAGTTTGATCAGGTTCTCAACATCATACCGGACG
->a
-TTCCGTGTTGGGTTTTATCCACTGTTTTATCCACTCGTGGAAAATTTTCGGCATTATTTGAGCGTACGTCAGATAA
->a
-AATCTGTGATTCTTCACCGGGGTCTTAGCTCAGTTGGTAGAGCGCCTCAATGGCATCAATGCCCGGCTGATTTTCG
->a
-CTCCGCTTTCGTCCCATCATGATGACCACCATGGCCGCGCTACTCGGTGGCCTCCCACTCGCTCTCGGCGCCGGCG
->a
-TCTCGATGCTGGGCGCCTGGCGCGCGCACGAACTCGACGATGCGCTGCGCCGCGCCT
->a
-GGAATCATTTTTGCGGGCTGATCTGCTTTGCTTCCAGGCGCAATGGTTTTTCGGCGAACATGACCAACGCCGAGCG
->a
-TTTCCTGACGAGTGTTTTGATTCCATCGCTGTTCGTGAAGCGATCCGGCGAATATGTGCCGCCGCAATTTCCAAAG
->a
-CTCTCATCTGGACAGCGGGTGTGAAACCAAGTCCGGTAATTGCATCGTTACCTTGCGAGA
->a
-CCTTCTGCCGCTTTGGTTTGCGAGCGGATCGGGCGCCGTCGCCCGCC
->a
-TCTCGGGCTCGCCGACGAAGTTCGTTATTCGATCCTGCCGATCTTGATTGGCGATGGGATACCGTTCTTCGAGAAG
->a
-AATACGTCGAGTGTGTTTGCCACGACACCCTGCCGTGGCTTATCCCTTTCTCGTTAAGTCTAATAGTTCGCTATGA
->a
-GGAATCAAGAAAAACTTCCGAGTGTTCCCACGGTTGCCCGTACCGCAATCCGACACAGGTGGGTGAGTAGAATATA
->a
-GAGCGGAAACAAGATAGCTGGATAAGGAGGGGTACCACGC
->a
-AGCGCATCTCTATGTTCCAAAGCGTGGTACCCCTCCTTATCCAGCTATCTTGTTTCCGCTCAGATCGGAAGAGCGT
->a
-CGCCGATGGCCAGCGGCGAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT
->a
-GCACCGCGTTCTCAGCCTCAAGCCGCAACTTCGACTTGAATGGCGAGGCCAGGAAGGCCAGCACGAAACAGAACAG
->a
-ACGCCTATGTAGGCGTAGATCGGAAGAGCGTCGTGTAGG
->a
-GTAACAGCGAATCGCTAATTGAAGTTCCATAAATGCAGTTAACTTTTCATCCGCACGCACAACGAAACGCTTTCCG
->a
-AGTACGGTCATCCTGAGCGAAGCGATGCGGAGTCGAAGGAT
->a
-TTTAACTACGGGTTTCTGCTGCACAAGCCGGAAGCGTTTTCCATTTTGAGTTTTCTCTCTGATTCGCGTCCGCTTT
->a
-ATGCAATTGTCATCCTGAGCGAAGCGAAGGATCTCTGGGATTTCGCAAATAGAACCTGAGA
->a
-AAAGTCATCTACTGTTCCTGGCATGGTATCTTTCTCCGGTTAGGGTTTTTAACGGTTGAAGGTATACAAATTGTTC
->a
-GAGACAGATAGTCAACTTAGCAAGACCAGTTGAAATCAAAGAATATCTCCCTGAGATCACAATAACCGGAG
->a
-TGTGATCTCAGGGAGATATTCTTTGATTTCAACTGGTCTTGCTAAGCTGACTTATCTGTCTCTTTCACACAGTACG
->a
-CTCATATTCGCCTGAGTTATGCAGGTTGACATAAACTCTGGCGCGAGCCG
->a
-TCCGGCTGATCACTTTCAGATCGGCCACTTTCGCGAGGTCAGTCAGGATCTCGTCCTGCACGCCATCGGTGAAAAA
->a
-CGAAAATTCGCCGCGAGCTGACGCTCGACGAGCAGAAGGATATTGCGCGTTGGGCG
->a
-CGGCGGTCTACCGCTATCAAATGCCACGATAAATATGACAACAACCACACCAAAGACACACCCGGGCGCTCCGTAT
->a
-GCATTAATCGACAAATCATTCCGCCCATACTGTGACCGATCAGGACGATGCGCTTGTGATTTGGGAATGCGCGCTT
->a
-CTACGAAGCGTGCTTGCTGCGCTTCCGTCCGATCATGATGACCACCATGGCCGCG
->a
-CTTCTCGCGGCCTTCGCGCCGCTCGGCGATCGAGCCGCAGATCGGAAGAGCGTCGTG
->a
-TTACCTTCTTTGTCTCAACTTTTATAGAAAGCTCGATGAGAAAAAATTCCGCCTATTGCTGGGACGAGTAAAACCT
->a
-ACAATCTAAATTCGAAAATCTTAAATCGAAATTCCTTCTCTTGGCCGGGTTCCCCCTGGCGAGT
->a
-GATATTTTCCTTCAGCAAAAGGAGAATACGGCT
->a
-CGAACTCCGACGCCCCCGCTGGTAGTGTCTTCTCCTCGCTGGGGCTGTAGCTCAGTTGGTAGAGCGCTTCCATGGC
->a
-CAGTGGAATAATCCCCAGCCTGGCAGCGCCTATCACCTGACGGTCAACCTGTTTAATCCTAGCTTCCAAGGCGTAC
->a
-GACGTGATCAATATCGCGGCAACGACTTCGAGCCTGACTAAAGGCGAAACCCTCAAGGACACCGCGCGTAACCTGG
->a
-ATAACCTACCGGAATCGCCACCAGCGGAACAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGG
->a
-GCCTGCAGCAGATCGGAAGAGCGTCGTGTAG
->a
-CTTGAACCCCGTAGAAGAGATCAAGTAAAAGCGCGTCAATGCAAGCCCGGTTGGC
->a
-ATCGTCTGGTAACGTTCATCACTGTTTTTGCGCAAAGCCTTGTGCAGAATCGATTCGAGTTTGAGCGGAACATCCG
->a
-CGTAACTGACTTCGCGAGATTGCGCGGCTGATCGACATCGGTGCCACGCTCCG
->a
-CCACTCAGGCAACATTTGCAGTGGCCGGCGGCGGCGGTGAGCTCGTCATGGCCAACGGCAACGG
->a
-GCGCCGACAAACTATCGTCGATTGCTAGGTACAATTCCCGAAGGCGTAACTTTTTCCGATCGCTTGAAGCCCGATT
->a
-TACCGCTATCAAATGCCACGATAAATATGACAACAACCACACCAAAGACACACCCGGGCGCTCCGTATGCAAATGC
->a
-CAAGATGTCGCTGGCGATCATGCCGCCGCCCAGGAAAGCTGGCG
->a
-GCAACGAGATTGCCGAATTGGCGCCTACCGGATTCACCGAAGCTTTCAAAGGCAAACTACGGAATCCGTGCATCGC
->a
-CATCGACCCAGAAGAGTGCGATGCACGGATTCCGTAGTTTGCCTTTGAAAGCTTC
->a
-TTTGCTGCAAAAGGTTTTACTCGTTCCAGCAATAGGCGGAATTTTTTCTCATCGAGCTTCCGATAGAAATTCAGGC
->a
-CTGCCGTCGCTCAGCCGGGTGGACGTCTTATT
->a
-GAGCGCATCAGGATGGGCCTCGACGAGCTCCGCCAGGTCGACGTCGATCGGGGGCGACGCCTCCG
->a
-AAATGTCTCGATATGAAGAGAAAGCGGTGCGGGACGGGGAGCTTAGGGAAGCGCCGAGCTATAATAGGGA
->a
-GCTCTCTTTCTCATCCTCTTCCTCAGCATCGCAGCATGTGCGTTCCCTATTATAGCTCGGCGCTTCCCTAAGCTCC
->a
-TCGTTTTGTTCACGATCATGCGCTTTGTGTTTTTCGTGCCCATCGAGGGC
->a
-CCTTCGATCAGTTCACTGGCGATGTAATGGATTGAGTGATCCTCACCGATCTCGTAAACCGTCAGGATATTCGGAT
->a
-TGGTGTTGCGGTAACTGTTGCAGTAGGCGTCGGCGTAGATCGGAAGAGCGGTTCAGCAGGAATG
->a
-ACGCCGACGCCTACTGCAACAGTTACCGCAACACCAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTC
->a
-ATGAAATATTGGGAAATCATCGCTGACAATCTCAGCAAAGCCGGCTGGAGTTGGGGCTGCCTCTCAGCCG
->a
-TGCGCTTTGTGTTTTTCGTTCCCATCGAGGGCAGTGTGTTCGCCATGATGTTTTCTACGCTGGTGTATGTGTTTGC
->a
-TGCCGATGCACGTCCAAACAAAAGGATCGACACAATAGTATCGACATCGCTTTT
->a
-ACACCGCAGGCAGGCGGTGTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGG
->a
-TCACGAGTGTAGGTTTTGGGCAGAGCGGTGAGTTGTTCAATCACGACTTCCGGCTGGGCGCGACG
->a
-TAGCCCCGGCTCAGGCACTGGTTCTGATTACGCTACGATTAAGTACGACGCATCGGGCAACCAACTGTGG
->a
-ATCTCCACCTCCCGCACTGGAGATAACTGGAGCTCGGCCAGGCGCGCTCTG
->a
-CGCCGAAGCGTTGCATAAAGTCATCTACTGTTCCTGGCATGGTATCTTTCTCCGGTTAG
->a
-CATCTTACTAGTTCCTTTCATTGGTTTTGGCCTTCCGTGAGTTCTCGTCATTGAGAACTCATTTCTGGCCGTTC
->a
-CCACGCTGCCCTCCTGATAATCGCGCGAGACGATATATCCCTGCACCTGCGCATTGATCTTAGCGTTGAT
->a
-CATCCAAATATCCTAACGGTTTACGAGATCGGCGAGGATCACTCAACTCACTACATCGCCAGTGAACTGATCGAAG
->a
-TCAATCTTCGCGCAGGACATTTCGAAGAAAAGTTACAACGTTAAAAACGTTACAAAAGTTAAATCGCTGCGCATCG
->a
-TGGAAGTATTTTCCAACTAACTTCTCGGTTTTGGTTCCCGTTTCGACTTCCATCGAACGAGAATTCGAATCTAATC
->a
-CCGAGGCGATCTACCAGCGTTGGGTTTTATCCACTGTTTTATCCACCGATGGAAAAATTTCAGCGTATTTCGGCAT
->a
-TGTCATTCCGAGCGAAGTCGAGGAATCCTGGAATTAATGACGAATGACAAATGTCGAATGACGAAGGAATGACG
->a
-GGCTGCCTTGAATCTTTAAGCGGCCAGTCAAGTATGCCCACGGGACTTTAAGGGTTTTATTTGACATAGAAACCCA
->a
-ACCACAATCGTTTGCGGGTCCTGATTGAATCGCTCGATCGATTTCCACAAAGGGACCAGCTTGTTCTGCAACCGAT
->a
-CGGAACGCGCGCTGCTTCAACGTGATGAAGAACTTATCGAAGCGCAACGA
->a
-GGCTGATCAGCTCTGCAATTCCGGTTCCCGCCGAGCCAGCGCCGAGAAACAGGAACCG
->a
-TTCCGAAGGCGATGAATCTCTCCTTAGAAAGGAGGTGATCCAGCCGCAG
->a
-ATTCCATTCCCTGGGTCTGCGTCCATTACATGGCAGCAGCATTCTCCTTTTGCTGAAGGAAAATATCTTCCAAAA
->a
-AGGGCGATGAAAGCTGAAATGTCGGGGCAGCCTGAAGAGCTAGTTGGAAAATACTTCCACAGCGCAAACGAGACCA
->a
-GGGCAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTC
->a
-CGCTTCGCTCAACATGACAGTCCAAAAAAAAGCAGGGCACCGC
->a
-GAATACATCTTACAGTATAAGTACGACGTACTTCTAGCAGTAGAACTCCAGAATCAATATTGTTAGACCTTCACTG
->a
-TATTGATAGAGTCTTGTTGTTGAACAACCCGGCGCAGTTGAATACCCGTGCCGGGTTGCGCTTCAGCCCTACGCGA
->a
-GCATGGCGATGTGCCGTCACGTGTAGTCCTCGACTTCGCGTAGGGCTGAAGCGCAACCCGGCACG
->a
-ACCCTCGCAGCTTTACCTTCTATGTCGCTCCTCCCGGTCGC
->a
-TCAAGGTCGAGATCAGAGCGCGCCTGGCCGAGCTCCAGTTATCTCCAGTG
->a
-AGGAAGATGGAAGAGTGGCGTCGAGACTATTGTGTCGATCCTTTTGTTCGGCTTAACATCGGCA
->a
-TCCGGTCCCATCGTGTACAAGTCGCCGATTG
->a
-CGTCTAGGATGCGCATGGATGCGCGTCGTCTCCAGAAGGTTGACGAGCTTCTCACGCGACGCATCGGGCAACCAA
->a
-GGTCAAAACCGGTTGCATCGATAACGCGAAAAAAATCTGGTGGGACATCAGGCTGCACCCATTCTTCGATACCTTG
->a
-TCGAAGGTAAAGCTGCGAGGGTAGCGGCGAACGAC
->a
-CCCGGAGTTAGGGTCGTGTGTCAGCGATTGATTCCAACGGGCGAACGATCTGGACTGCTGACGCACAAAGGCGCTA
->a
-AAGGACGATGGGCGGTTCATTGTGCGCGCCGATGAAATTCTGACTGCGTTTCTGGAACTGGAATCGGCACTCACGC
->a
-CACCATTTTCGGCCAGCTCATCGCACATGCTGCGGAGCCAATCAAGATCGGAAGAGCGTCGTG
->a
-AAATCGCAGATGGTGAAATCGATTTCGTGGCCGACGGCTGAGACAATCGGGACAGAAGAATTAAAAATGGCGCGGG
->a
-CTAATAGGCTTGGACTGCAACATGCAATCTATTCCGGGCTCCATGCAGACAGTTTGCCGGAAAACACTGCAGAACG
->a
-ACGATCCCAACTCGCAAGTCGCAGATCTTTTCGACCTACAGCGATAACCAGCCGGGCGTGGAGATCAAAGTGCTTC
->a
-CTGCGCACTTCTGTCGCGAAAGGCATTGAGATCATCGAAGGCCTGCGTGGCCACACCACCGGTTATGCCGTGCCGC
->a
-GTAAAATTTCGCGCAAAAATGGGTAAGACCTAACGAGAAAGAGATAAGCCACGGCAGAGTATGGCG
->a
-ATGATGAAGAGCATCTCATCGTGCTCCGGCG
->a
-CGCTTCTTAATGAGCGTACCGACAGCTTCGAAGATTGCGCCCAGAACGCAGCTTCCACTTTCTGGC
->a
-AGTATGGCCGCCAGCTCCGGGGCTATCGATTCTACAGCGCCTTGAGAAAGCACCCATTCTTCGAC
->a
-GAGTCTTGCTCCAACACCTCGTTATTCTGACCAGGCGGAACTTTCCAAAGTCGAAGAATGGGTGCTTTCTCAAGGC
->a
-ACATTGTTTCTGGGTCAGATGACAACACTGTGTGCATCTGGGATGTATCAACAGGACAGCTAGTTGGGTCTCCCCT
->a
-AATGCAACAGATGTCACCCAGTCTGTATGTCCTTGCAGGGGAGACCCAACTAGCTGTCCTGTTG
->a
-AACGCCAACATCAATGCGCAGGTGCAGGGTTACATCGTTTCGC
->a
-CGGCCAGAGCGAGGGTTTACGTCAACCTGCATAACTCAGGCGAATACGAGTTACCTATTGGTTATGTTACCATTAA
->a
-GGACAATCCTGAGGCTTACGATGCCTACCTGCGTGGGCTGGCTTATACACTTA
->a
-AGAAGGGATCAAGTAAAAGCGCGTCAATGTGCGCAAGTAACTCGAGAAGCACGGATTGTTTCCGCTTTCCTC
->a
-CGTCCGGCGTGATATTGAGAACCTGATCAAGCTTGCGTAGCGCTTCGGAGAAACGCCGAAGGTCGATATAGGAAAT
->a
-TAATGCGTTCGAGCATCGAACGCAGCGCACGCGCGCCGGTGCCTTTGGTGACAGCCTGCGCGGCGAGGG
->a
-GCTGGATCACCTCCTTTCTAAGGAGCACTGGCCCGCAGCTCTTCG
->a
-AATCGGCGACTTCTGCAACAAAATCGGCACAAAGCCGACG
->a
-ACCCCGTCAGCCGTCACGTAGTCCAGGTTGTAGATGGTGTGCTGTGAACCGCCGTAGATCGGAAGAGCGTCG
->a
-CCAGATAACGCTTATTTGGCGGACGGCATTCAAGAGGAGATCCTGACGCGGCTAG
->a
-TCAATGATCTCAATGCCTTTCGCGACCGCAGTACGCAGATGCGACGAGCC
->a
-ACTCTTGTATGGCATTACACTCGTCCGGCTGATCACTTTCAGATCGGCCACTTTC
->a
-CGCCGCATCCGGTCGGCGAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTG
->a
-ACGTCAACAGCATCACCTGCATCAACACGAGCTACAACGCGCGCTGCTTCAACGACGGCTATCACATCGGCCACCA
->a
-TCCAACCGGCTTTGCTGAGATTGTCAGCGCGTTCAATCGTGAGCTTTGATTGGCCCTTCG
->a
-AGCATCCTTCTTCAATCGATGATGCATTTCTACCACT
->a
-TCAGGATCTCCTCTTGAATACCGTCGGCGAAGTAGGCGTTATCTGGATCGCCACTCAGATTTTCAAAGGGCAGCAC
->a
-AGTCCTCTCATAAAGGTCATTCCGAGCGAAGTCGAGAAATCCTAGTGCGAAACCCAGCGTCAACTGCGCGGATCTC
->a
-CCTCGGCCCGCTCGCCAACGCCCTGGTGCAGATGGAGTTCGGCGAAAGCTCGCTCGGTGCGCTCTCGATGAACACG
->a
-CCATTTTCCGAATCTCCTGTTCGCCGTGGCAACCGTGAATGACCGAGCCAGCGCCGAGAAACAACAGCGCTTTAAA
->a
-TCGACGTTCACCAGCGCCAGGTCGACGTCGATCGACG
->a
-TGGCAAAAAGCGAACTCTTCATTGAAAAAGAATTCAGGAGCCAGGAGACAGAGGCGAGCAGTCACTGCTG
->a
-ATGAGAGGGCTACCCAATTTTCCGGGTGGAAACAACGGTTTTGCCACCGGAGCCAACAACTTGGGTCAGGTAGTCG
->a
-GGTCCCTCCTGCCAGAAAGTGGAGGCTGCGTTCTGTGCGCAATCTTCGAAGCTGTCGGCACGCTCGTTCAGGAGCG
->a
-CAGTGTAATACGTCTTAAAACCTTCACCCAGCCTGCCGAATTGCACGACCAGCGCGCCTAACATCACCAGCCCTAC
->a
-GCCCGTACCGGAATCCGACACAGGTGGGTGGGCATAATTGTGCCAAGGCGCGTGAGTGAAACCTCGTTAAGGAACT
->a
-CTCTTGGTTGACTTGTTTTTCGTAACTTCAGTGCGCCGCAGCAATTGTCGCCCCGTTAGAAATCGCGGTT
->a
-AGTGTGTCGAAGAATGGGTGCAGCCGGATGTCCCACCAGATTTTTTTGGCGTTATCGACGCAACCGGTTT
->a
-TCGTGCCGCATCATGGTAGGAATTCAGCGTTGGACGTTGGGCGTTGAGCGTTGG
->a
-CGCTTCAGAAACCGCGCTCAAGTCCACCAGAGCACGATGAGATGCTCTTCATTCTCATTCATCAGACGTACGAGCT
->a
-CGAGCTGACGGGTTTTAACTACGGATTTCTGTTGCACAAACCGGAAGCGTTTTCCATTTTGAGTTTTCTCTCCGAT
->a
-AGGTGAACCGCGGTTCACCTAGATCGGAAGAGCGTCGTGTAGGG
->a
-AAGCTGCTTGGTCAGGTGATAGGCGCTGCCGATCCCGGAGATGCCTGCACCGACGATCAGAACATCGAAGTGCTCG
->a
-CAATGTGTCGGGCTGAAAGCGTTTTCGTTATGCGTGCGGATGAAAAGCTGACTGCGTTTCTGGAACTGCAAAGAGC
->a
-AATGGATGATGTCTAGACGAAATTTGGGAAAAGAATTCAGGAGCCAGAATTCAGAATTCAGGAGTTTCTGAATGCG
->a
-AGTGTTTTGATTCCATCGCTGTTCGTGAAGCGATCCGGCGAATATTTGCCGCCGCAATTTCCAAAGATCCGCGAAG
->a
-CACGGTCCCAACGCGCAAGTCGCAGATCTTTTCAACTTACAGCGACAATCAACCCGGCGTGGAAATCAAAGTGCTC
->a
-CGACGCCGACCTCGTCGAGATGGGTGCTCCAACCGATGTCGGTGGCGCGA
->a
-ATAAGTGGCCCCCGGAAATGAGTTCTCAATGACGAGAAGTCGCGGAAGGCCAAACAACTCCCCCACTTC
->a
-AGTTAGTGGTGTCGATCACCAGGCCTCCCTGGAAATCCGATGCCCCTC
->a
-TGTCAGCGATGATTTCCCAGCAATTCAACCGACGGCGAAAGGAATCTCAACTTCGATTTTTCGCATGTAAGGCT
->a
-TGAATATAGTGCCAGAAGACAGTGTGGCGTTGAACAACCTGGCATGGATTCTGGCGACTTCTTCCGACG
->a
-ACAAACGCAGTCAGCTTTTCATCAGCGTGCACAACGAAACGCTTTCCGTCGTCGCGATGTGTGTCTGCGATCC
->a
-CCTATCATACCGCTGTGGTGAAGGGGACGGACGTCGATCAGCCGCGCAATCTGGCAAAAT
->a
-GTGGTCAACAATCCCTAGAGCCAACGGGCGAACGATCTGGATTGCTGCCGCACACCCGACAGCCGCATATAGGACC
->a
-GCGGTGAGTTGTTCAATCACGACTTCAGGCTGAGCACGACTGACA
->a
-GCACGGCATTTTCAGCCTCCAGCCGGATCTTCGACTTGAATGGCGAGGTCAAGGCGGCCATCACGAAACACAGAAG
->a
-CCCTTCCCAAGACGAGCAAAATGGGCGAAAGAATTGCGCCGGC
->a
-GCAAGCTGCAAAGCTCCTCACTTTACGAGGACAAAGGACGCATGGTATTTCTCCTGTTCCTCCC
->a
-CTCGGCGTTGGTCATGTTCGCCGAAAAACCATTGCGCCTGGAAGCAAAGCAGATCAGCCCGCAAAAATGCTTCCAG
->a
-CGGTATGACGGTTACTTTCCCGACGGTCCCGCCGCGTTCATCGCGCAACTACAGAAATTGCGG
->a
-ACAATTAGACTGCACCTTGCAATCTATTTCTGCCTCCATACAGGCAGCTTGCCGGAAAACACTGCAGGACGTCAAC
->a
-CGGATTTCCAGGGTGGCCTGGTGATCGACACCACTAACTCGTTCAGCTTCAACCCTGCGAGTAACACTATCGGTTC
->a
-TGAAACCAAGTCCGGTAATTGCATCGTTACCTTGCGAGAAGGA
->a
-GGAACGCGACCTTCGGCATGCATCGCGCCGACCTCGTCGAGATGCTGGCGCGCG
->a
-CCACGTCGTCCGGTGTGATATTGAGAACCTGATCAAACTTCCGCAACGCTTCGGGGAAAC
->a
-CCTAAGGCCGATGAGATATGTGGTTGCGCATTGACACCAGATCGGAAGAG
->a
-GGTGTCAATGCGCAACCACATATCTCATCGGCCTTATGAGATCGGAAGAGCGTCGTGTAG
->a
-ATTTTCTGCGTCGATATTTGGGACGCAGAAAATAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGT
->a
-GCCCACCGAACGGATTGAGTCCGATCCACTACAAGACCACAACGCAAATGGCAACTTGAATATTTGATTATAAAT
->a
-TACTTGAATCCGCTAGTGAAATCATTCGCCGGAAATTGATTTATAATCAAATATTCAAGTTGCCATTTGCGTTGTG
->a
-CAACGGTGAGATCGGAAGAGCGTCGTGTAGGG
->a
-ACAGATGTGGATCAGCCGCGCAATCTGGCCAAGTCAGTCACGGTTGAGTAAAAACAGCCGGTGAGCCTGCCTTCCG
->a
-CATGTAACGAGAATTAGACTGCATCTTGCAATCTATTTCTGGCTCCATAGAGGGAAGTCTGCGTTAAAAGGCGGCA
->a
-CCGGTAACTTCCTCGATGCGACGTTGATCGGCGCCGCGGGAAAGATGTATCGCCAGGGCGACGGCTTCGCCCTAG
->a
-CCCACCCAACCACCTGACCCAAGTTGTTGGCTCCGGTTGCAAAGCCGTTATTGC
->a
-CGCGTTCAAGTACCCGTACGCGTTCGACGTTCCGCTCGACTCCTGGACCGAGCTCGTCATGGCCAACGC
->a
-CGCGAGGCTGAAATTGAGCGGGGTGAAGTTGGACGATCTTTCGGAGCAATAAGATCGGAAGAGCGTCGTG
->a
-TTCCAAGAAGATGATGCATATTGTCATTCCGAGCGAAGTCGAAGAATCTCTCGACAGCGACTCTGCAATGAGTTCG
->a
-CCGGCACAGTTGGGCTGTCGGAATCGATCAGACACACCGAATCAAAACCGCACTTGAACAAATCCTCGACCGCGAA
->a
-CAACAGAAACGGGGTTGATCGGCGCCGCGGGAC
->a
-TATTGGGAAATCATCGCAGACAATCTCAGCAAAGCCGGTTGGAGTTGGGGCTGCGTCTCAGCGATTGATTCCAAAG
->a
-CATCGAACGCAGCGCACGCGCTCCTGTACCTTTGGTGACGGCCTGCCCGGCGAG
->a
-GAGCTCCTTAACGAGGTTTCACTCACGCGCCTGAGTATATTCTACTCACCCACC
diff --git a/tests/test-data/test-reads.fq.gz b/tests/test-data/test-reads.fq.gz
new file mode 100644
index 0000000..d96b0a9
Binary files /dev/null and b/tests/test-data/test-reads.fq.gz differ
diff --git a/tests/test-data/truncated.fq b/tests/test-data/truncated.fq
new file mode 100644
index 0000000..41de3b8
--- /dev/null
+++ b/tests/test-data/truncated.fq
@@ -0,0 +1,5 @@
+ at 1:1:1:1:1 1:N:0:NNNNN
+CAGGCGCCCA
++
+][aaX__aa[
+@
diff --git a/tests/test_c_wrapper.py b/tests/test_c_wrapper.py
deleted file mode 100644
index 58c1f96..0000000
--- a/tests/test_c_wrapper.py
+++ /dev/null
@@ -1,48 +0,0 @@
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project at idyll.org
-#
-import os
-import khmer
-
-import khmer_tst_utils as utils
-
-reads_filename = utils.get_test_data('test-reads.fa')
-
-N_READS = 25000
-
-
-def teardown():
-    utils.cleanup()
-
-
-class GoodException(Exception):
-    pass
-
-
-def callback_raise(info, n_reads, other):
-    raise GoodException
-
-
-def setup():
-    khmer.set_reporting_callback(None)
-
-
-def teardown():
-    khmer.reset_reporting_callback()
-
-
-def test_raise_in_consume_fasta():
-    return  # @CTB
-    kh = khmer.new_hashtable(4, 4 ** 4)
-
-    try:
-        n, _ = kh.consume_fasta(reads_filename, 0, 0, callback_raise)
-        print n
-        assert 0
-    except GoodException:
-        pass
-    except:
-        raise
diff --git a/tests/test_counting_hash.py b/tests/test_counting_hash.py
index a0b654e..849eb21 100644
--- a/tests/test_counting_hash.py
+++ b/tests/test_counting_hash.py
@@ -1,7 +1,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=missing-docstring,protected-access
@@ -10,7 +10,9 @@ import gzip
 import khmer
 import khmer_tst_utils as utils
 from khmer import ReadParser
+import screed
 
+import nose
 from nose.plugins.attrib import attr
 
 MAX_COUNT = 255
@@ -36,7 +38,7 @@ def teardown():
 class Test_CountingHash(object):
 
     def setup(self):
-        self.hi = khmer._new_counting_hash(12, PRIMES_1m)
+        self.hi = khmer.CountingHash(12, PRIMES_1m)
 
     def test_collision_1(self):
 
@@ -100,11 +102,48 @@ class Test_CountingHash(object):
         assert hi.get(GG) == 2
 
 
+def test_get_raw_tables():
+    ht = khmer.new_counting_hash(20, 1e5, 4)
+    tables = ht.get_raw_tables()
+
+    for size, table in zip(ht.hashsizes(), tables):
+        assert isinstance(table, buffer)
+        assert size == len(table)
+
+
+def test_get_raw_tables_view():
+    try:
+        memoryview
+    except NameError:
+        raise nose.SkipTest("This test requires memoryview")
+    ht = khmer.new_counting_hash(20, 1e5, 4)
+    tables = ht.get_raw_tables()
+    for tab in tables:
+        try:
+            memv = memoryview(tab)
+        except TypeError:
+            raise nose.SkipTest("This test needs a higher version of Python.")
+        assert sum(memv.tolist()) == 0
+    ht.consume('AAAATTTTCCCCGGGGAAAA')
+    for tab in tables:
+        memv = memoryview(tab)
+        assert sum(memv.tolist()) == 1
+
+
+ at attr('linux')
+def test_toobig():
+    try:
+        ct = khmer.new_counting_hash(30, 1e13, 1)
+        assert 0, "this should fail"
+    except MemoryError as err:
+        print str(err)
+
+
 def test_3_tables():
     x = list(PRIMES_1m)
     x.append(1000005)
 
-    hi = khmer._new_counting_hash(12, x)
+    hi = khmer.CountingHash(12, x)
 
     GG = 'G' * 12                   # forward_hash: 11184810
     assert khmer.forward_hash(GG, 12) == 11184810
@@ -274,17 +313,17 @@ def test_save_load():
     sizes = list(PRIMES_1m)
     sizes.append(1000005)
 
-    hi = khmer._new_counting_hash(12, sizes)
+    hi = khmer.CountingHash(12, sizes)
     hi.consume_fasta(inpath)
     hi.save(savepath)
 
-    ht = khmer._new_counting_hash(12, sizes)
+    ht = khmer.CountingHash(12, sizes)
     ht.load(savepath)
 
-    tracking = khmer._new_hashbits(12, sizes)
+    tracking = khmer._Hashbits(12, sizes)
     x = hi.abundance_distribution(inpath, tracking)
 
-    tracking = khmer._new_hashbits(12, sizes)
+    tracking = khmer._Hashbits(12, sizes)
     y = ht.abundance_distribution(inpath, tracking)
 
     assert sum(x) == 3966, sum(x)
@@ -301,7 +340,7 @@ def test_load_gz():
     sizes.append(1000005)
 
     # save uncompressed hashtable.
-    hi = khmer._new_counting_hash(12, sizes)
+    hi = khmer.CountingHash(12, sizes)
     hi.consume_fasta(inpath)
     hi.save(savepath)
 
@@ -313,13 +352,13 @@ def test_load_gz():
     in_file.close()
 
     # load compressed hashtable.
-    ht = khmer._new_counting_hash(12, sizes)
+    ht = khmer.CountingHash(12, sizes)
     ht.load(loadpath)
 
-    tracking = khmer._new_hashbits(12, sizes)
+    tracking = khmer._Hashbits(12, sizes)
     x = hi.abundance_distribution(inpath, tracking)
 
-    tracking = khmer._new_hashbits(12, sizes)
+    tracking = khmer._Hashbits(12, sizes)
     y = ht.abundance_distribution(inpath, tracking)
 
     assert sum(x) == 3966, sum(x)
@@ -333,17 +372,17 @@ def test_save_load_gz():
     sizes = list(PRIMES_1m)
     sizes.append(1000005)
 
-    hi = khmer._new_counting_hash(12, sizes)
+    hi = khmer.CountingHash(12, sizes)
     hi.consume_fasta(inpath)
     hi.save(savepath)
 
-    ht = khmer._new_counting_hash(12, sizes)
+    ht = khmer.CountingHash(12, sizes)
     ht.load(savepath)
 
-    tracking = khmer._new_hashbits(12, sizes)
+    tracking = khmer._Hashbits(12, sizes)
     x = hi.abundance_distribution(inpath, tracking)
 
-    tracking = khmer._new_hashbits(12, sizes)
+    tracking = khmer._Hashbits(12, sizes)
     y = ht.abundance_distribution(inpath, tracking)
 
     assert sum(x) == 3966, sum(x)
@@ -693,7 +732,7 @@ def test_load_gz_truncated_should_fail():
 def test_counting_file_version_check():
     ht = khmer.new_counting_hash(12, 1, 1)
 
-    inpath = utils.get_test_data('badversion-k12.kh')
+    inpath = utils.get_test_data('badversion-k12.ct')
 
     try:
         ht.load(inpath)
@@ -705,7 +744,7 @@ def test_counting_file_version_check():
 def test_counting_gz_file_version_check():
     ht = khmer.new_counting_hash(12, 1, 1)
 
-    inpath = utils.get_test_data('badversion-k12.kh.gz')
+    inpath = utils.get_test_data('badversion-k12.ct.gz')
 
     try:
         ht.load(inpath)
@@ -742,7 +781,7 @@ def test_counting_gz_file_type_check():
 
 def test_counting_bad_primes_list():
     try:
-        ht = khmer._new_counting_hash(12, ["a", "b", "c"], 1)
+        ht = khmer.CountingHash(12, ["a", "b", "c"], 1)
         assert 0, "bad list of primes should fail"
     except TypeError as e:
         print str(e)
@@ -781,7 +820,7 @@ def test_consume_absentfasta_with_reads_parser():
         assert 0, "this should fail"
     except IOError as err:
         print str(err)
-    except ValueError, err:
+    except ValueError as err:
         print str(err)
 
 
@@ -965,3 +1004,70 @@ def test_consume_fasta_and_tag():
     except TypeError as err:
         print str(err)
     countingtable.consume_fasta_and_tag(utils.get_test_data("test-graph2.fa"))
+
+
+def test_consume_and_retrieve_tags_1():
+    ct = khmer.new_counting_hash(4, 4 ** 4, 4)
+
+    # first, for each sequence, build tags.
+    for record in screed.open(utils.get_test_data('test-graph2.fa')):
+        ct.consume_and_tag(record.sequence)
+
+    # check that all the tags in sequences are retrieved by iterating
+    # across the sequence and retrieving by neighborhood.
+
+    ss = set()
+    tt = set()
+    for record in screed.open(utils.get_test_data('test-graph2.fa')):
+        for p, tag in ct.get_tags_and_positions(record.sequence):
+            ss.add(tag)
+
+        for start in range(len(record.sequence) - 3):
+            kmer = record.sequence[start:start + 4]
+            tt.update(ct.find_all_tags_list(kmer))
+
+    assert ss == tt
+
+
+def test_consume_and_retrieve_tags_empty():
+    ct = khmer.new_counting_hash(4, 4 ** 4, 4)
+
+    # load each sequence but do not build tags - everything should be empty.
+    for record in screed.open(utils.get_test_data('test-graph2.fa')):
+        ct.consume(record.sequence)
+
+    # check that all the tags in sequences are retrieved by iterating
+    # across the sequence and retrieving by neighborhood.
+
+    ss = set()
+    tt = set()
+    for record in screed.open(utils.get_test_data('test-graph2.fa')):
+        for p, tag in ct.get_tags_and_positions(record.sequence):
+            ss.add(tag)
+
+        for start in range(len(record.sequence) - 3):
+            kmer = record.sequence[start:start + 4]
+            tt.update(ct.find_all_tags_list(kmer))
+
+    assert not ss
+    assert not tt
+
+
+def test_find_all_tags_list_error():
+    ct = khmer.new_counting_hash(4, 4 ** 4, 4)
+
+    # load each sequence but do not build tags - everything should be empty.
+    for record in screed.open(utils.get_test_data('test-graph2.fa')):
+        ct.consume(record.sequence)
+
+    try:
+        ct.find_all_tags_list("ATA")
+        assert False, "a ValueError should be raised for incorrect k-mer size"
+    except ValueError:
+        pass
+
+    try:
+        ct.find_all_tags_list("ATAGA")
+        assert False, "a ValueError should be raised for incorrect k-mer size"
+    except ValueError:
+        pass
diff --git a/tests/test_counting_single.py b/tests/test_counting_single.py
index 83824e3..7fd348a 100644
--- a/tests/test_counting_single.py
+++ b/tests/test_counting_single.py
@@ -1,7 +1,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 
@@ -24,6 +24,15 @@ def test_no_collision():
     assert kh.get('TTTT') == 2
 
 
+ at attr('linux')
+def test_toobig():
+    try:
+        ct = khmer.new_hashtable(4, 1000000000000)
+        assert 0, "this should fail"
+    except MemoryError as err:
+        print str(err)
+
+
 def test_collision():
     kh = khmer.new_hashtable(4, 4)
 
@@ -79,7 +88,7 @@ def test_complete_no_collision():
 
     assert n_rc_filled == kh.n_entries(), n_rc_filled
     assert n_palindromes == 16, n_palindromes  # @CTB check this
-    assert n_fwd_filled == kh.n_entries() / 2 + n_palindromes / 2, \
+    assert n_fwd_filled == kh.n_entries() // 2 + n_palindromes // 2, \
         n_fwd_filled
 
 
diff --git a/tests/test_filter.py b/tests/test_filter.py
index efbc21a..28b87ae 100644
--- a/tests/test_filter.py
+++ b/tests/test_filter.py
@@ -1,7 +1,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import khmer
diff --git a/tests/test_functions.py b/tests/test_functions.py
index 747b042..edf0c8d 100644
--- a/tests/test_functions.py
+++ b/tests/test_functions.py
@@ -1,13 +1,17 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import khmer
 from nose.plugins.attrib import attr
 import os
 import khmer_tst_utils as utils
+import collections
+from khmer.utils import (check_is_pair, broken_paired_reader, check_is_left,
+                         check_is_right)
+from khmer.kfile import check_input_files
 
 
 def test_forward_hash():
@@ -45,6 +49,27 @@ def test_reverse_hash():
     assert s == "GGGG"
 
 
+def test_hash_murmur3():
+    assert khmer.hash_murmur3('AAAA') == 526240128537019279
+    assert khmer.hash_murmur3('TTTT') == 526240128537019279
+    assert khmer.hash_murmur3('CCCC') == 14391997331386449225
+    assert khmer.hash_murmur3('GGGG') == 14391997331386449225
+
+
+def test_hash_no_rc_murmur3():
+    h = khmer.hash_no_rc_murmur3('AAAA')
+    assert h == 5231866503566620412, h
+
+    h = khmer.hash_no_rc_murmur3('TTTT')
+    assert h == 5753003579327329651, h
+
+    h = khmer.hash_no_rc_murmur3('CCCC')
+    assert h == 3789793362494378039, h
+
+    h = khmer.hash_no_rc_murmur3('GGGG')
+    assert h == 17519752047064575358, h
+
+
 def test_get_primes():
     primes = khmer.get_n_primes_near_x(7, 20)
 
@@ -89,3 +114,207 @@ def test_extract_hashbits_info():
             os.remove(fn)
         except OSError as e:
             print >>sys.stderr, '...failed to remove {fn}'.format(fn)
+
+
+def test_check_file_status_kfile():
+    fn = utils.get_temp_filename('thisfiledoesnotexist')
+    check_file_status_exited = False
+    try:
+        check_input_files(fn, False)
+    except SystemExit:
+        check_file_status_exited = True
+    assert check_file_status_exited
+
+
+def test_check_file_status_kfile_force():
+    fn = utils.get_temp_filename('thisfiledoesnotexist')
+    try:
+        check_input_files(fn, True)
+    except OSError as e:
+        assert False
+
+
+FakeFQRead = collections.namedtuple('Read', ['name', 'quality', 'sequence'])
+FakeFastaRead = collections.namedtuple('Read', ['name', 'sequence'])
+
+
+def test_check_is_pair_1():
+    read1 = FakeFQRead(name='seq', quality='###', sequence='AAA')
+    read2 = FakeFQRead(name='seq2', quality='###', sequence='AAA')
+
+    assert not check_is_pair(read1, read2)
+
+
+def test_check_is_pair_2():
+    read1 = FakeFQRead(name='seq/1', quality='###', sequence='AAA')
+    read2 = FakeFQRead(name='seq/2', quality='###', sequence='AAA')
+
+    assert check_is_pair(read1, read2)
+
+
+def test_check_is_pair_3_fq():
+    read1 = FakeFQRead(name='seq 1::', quality='###', sequence='AAA')
+    read2 = FakeFQRead(name='seq 2::', quality='###', sequence='AAA')
+
+    assert check_is_pair(read1, read2)
+
+
+def test_check_is_pair_3_broken_fq_1():
+    read1 = FakeFQRead(name='seq', quality='###', sequence='AAA')
+    read2 = FakeFQRead(name='seq 2::', quality='###', sequence='AAA')
+
+    assert not check_is_pair(read1, read2)
+
+
+def test_check_is_pair_3_broken_fq_2():
+    read1 = FakeFQRead(name='seq 1::', quality='###', sequence='AAA')
+    read2 = FakeFQRead(name='seq', quality='###', sequence='AAA')
+
+    assert not check_is_pair(read1, read2)
+
+
+def test_check_is_pair_3_fa():
+    read1 = FakeFastaRead(name='seq 1::', sequence='AAA')
+    read2 = FakeFastaRead(name='seq 2::', sequence='AAA')
+
+    assert check_is_pair(read1, read2)
+
+
+def test_check_is_pair_4():
+    read1 = FakeFQRead(name='seq/1', quality='###', sequence='AAA')
+    read2 = FakeFastaRead(name='seq/2', sequence='AAA')
+
+    try:
+        check_is_pair(read1, read2)
+        assert False                    # check_is_pair should fail here.
+    except ValueError:
+        pass
+
+
+def test_check_is_pair_4b():
+    read1 = FakeFastaRead(name='seq/1', sequence='AAA')
+    read2 = FakeFQRead(name='seq/2', quality='###', sequence='AAA')
+
+    try:
+        check_is_pair(read1, read2)
+        assert False                    # check_is_pair should fail here.
+    except ValueError:
+        pass
+
+
+def test_check_is_pair_5():
+    read1 = FakeFastaRead(name='seq/1', sequence='AAA')
+    read2 = FakeFastaRead(name='seq/2', sequence='AAA')
+
+    assert check_is_pair(read1, read2)
+
+
+def test_check_is_pair_6():
+    read1 = FakeFastaRead(name='seq1', sequence='AAA')
+    read2 = FakeFastaRead(name='seq2', sequence='AAA')
+
+    assert not check_is_pair(read1, read2)
+
+
+def test_check_is_pair_7():
+    read1 = FakeFastaRead(name='seq/2', sequence='AAA')
+    read2 = FakeFastaRead(name='seq/1', sequence='AAA')
+
+    assert not check_is_pair(read1, read2)
+
+
+def test_check_is_right():
+    assert not check_is_right('seq1/1')
+    assert not check_is_right('seq1 1::N')
+    assert check_is_right('seq1/2')
+    assert check_is_right('seq1 2::N')
+
+    assert not check_is_right('seq')
+    assert not check_is_right('seq 2')
+
+
+def test_check_is_left():
+    assert check_is_left('seq1/1')
+    assert check_is_left('seq1 1::N')
+    assert not check_is_left('seq1/2')
+    assert not check_is_left('seq1 2::N')
+
+    assert not check_is_left('seq')
+    assert not check_is_left('seq 1')
+
+    assert check_is_left(
+        '@HWI-ST412:261:d15khacxx:8:1101:3149:2157 1:N:0:ATCACG')
+
+
+class Test_BrokenPairedReader(object):
+    stream = [FakeFastaRead(name='seq1/1', sequence='A' * 5),
+              FakeFastaRead(name='seq1/2', sequence='A' * 4),
+              FakeFastaRead(name='seq2/1', sequence='A' * 5),
+              FakeFastaRead(name='seq3/1', sequence='A' * 3),
+              FakeFastaRead(name='seq3/2', sequence='A' * 5)]
+
+    def gather(self, **kw):
+        iter = broken_paired_reader(self.stream, **kw)
+
+        x = []
+        m = 0
+        for n, is_pair, read1, read2 in iter:
+            if is_pair:
+                x.append((read1.name, read2.name))
+            else:
+                x.append((read1.name, None))
+            m += 1
+
+        return x, n, m
+
+    def testDefault(self):
+        x, n, m = self.gather(min_length=1)
+
+        expected = [('seq1/1', 'seq1/2'),
+                    ('seq2/1', None),
+                    ('seq3/1', 'seq3/2')]
+        assert x == expected, x
+        assert m == 3
+        assert n == 3, n
+
+    def testMinLength(self):
+        x, n, m = self.gather(min_length=3)
+
+        expected = [('seq1/1', 'seq1/2'),
+                    ('seq2/1', None),
+                    ('seq3/1', 'seq3/2')]
+        assert x == expected, x
+        assert m == 3
+        assert n == 3, n
+
+    def testMinLength_2(self):
+        x, n, m = self.gather(min_length=4)
+
+        expected = [('seq1/1', 'seq1/2'),
+                    ('seq2/1', None),
+                    ('seq3/2', None)]
+        assert x == expected, x
+        assert m == 3
+        assert n == 3, n
+
+    def testForceSingle(self):
+        x, n, m = self.gather(force_single=True)
+
+        expected = [('seq1/1', None),
+                    ('seq1/2', None),
+                    ('seq2/1', None),
+                    ('seq3/1', None),
+                    ('seq3/2', None)]
+        assert x == expected, x
+        assert m == 5
+        assert n == 4, n
+
+    def testForceSingleAndMinLength(self):
+        x, n, m = self.gather(min_length=5, force_single=True)
+
+        expected = [('seq1/1', None),
+                    ('seq2/1', None),
+                    ('seq3/2', None)]
+        assert x == expected, x
+        assert m == 3, m
+        assert n == 2, n
diff --git a/tests/test_graph.py b/tests/test_graph.py
index 2163434..9b05c48 100644
--- a/tests/test_graph.py
+++ b/tests/test_graph.py
@@ -1,7 +1,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import khmer
@@ -239,7 +239,7 @@ class Test_Partitioning(object):
 
         print open(output_file).read()
 
-        x = set([r.accuracy for r in screed.open(output_file)])
+        x = set([r.quality for r in screed.open(output_file)])
         assert x, x
 
     def test_disconnected_20_a(self):
@@ -307,6 +307,26 @@ class Test_Partitioning(object):
 
 class Test_PythonAPI(object):
 
+    def test_find_all_tags_kmersize(self):
+        ht = khmer.new_hashbits(20, 4 ** 4 + 1)
+
+        a = "ATTGGGACTCTGGGAGCACTTATCATGGAGAT"
+        b = "GAGCACTTTAACCCTGCAGAGTGGCCAAGGCT"
+        c = "GGAGCACTTATCATGGAGATATATCCCGTGCTTAAACATCGCACTTTAACCCTGCAGAGT"
+
+        print ht.consume(a)
+        try:
+            ppi = ht.find_all_tags(c[:19])
+            assert False, "should raise a ValueError for wrong k-mer size"
+        except ValueError:
+            pass
+
+        try:
+            ppi = ht.find_all_tags(c[:21])
+            assert False, "should raise a ValueError for wrong k-mer size"
+        except ValueError:
+            pass
+
     def test_ordered_connect(self):
         ht = khmer.new_hashbits(20, 4 ** 4 + 1)
 
diff --git a/tests/test_hashbits.py b/tests/test_hashbits.py
index 1088232..40b8d71 100644
--- a/tests/test_hashbits.py
+++ b/tests/test_hashbits.py
@@ -1,7 +1,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=missing-docstring,protected-access
@@ -543,7 +543,7 @@ def test_load_notexist_should_fail():
 
 def test_load_truncated_should_fail():
     inpath = utils.get_test_data('random-20-a.fa')
-    savepath = utils.get_temp_filename('temphashbitssave0.kh')
+    savepath = utils.get_temp_filename('temphashbitssave0.ct')
 
     hi = khmer.new_counting_hash(12, 1000)
     hi.consume_fasta(inpath)
@@ -584,7 +584,6 @@ def test_save_load_tagset_trunc():
     ht.add_tag('A' * 32)
     ht.add_tag('G' * 32)
     ht.save_tagset(outfile)
-    ht.save_tagset('/tmp/goodversion-k32.tagset')
 
     # truncate tagset file...
     fp = open(outfile, 'rb')
@@ -660,7 +659,7 @@ def test_hashbits_file_version_check():
 
 def test_hashbits_file_type_check():
     kh = khmer.new_counting_hash(12, 1, 1)
-    savepath = utils.get_temp_filename('tempcountingsave0.kh')
+    savepath = utils.get_temp_filename('tempcountingsave0.ct')
     kh.save(savepath)
 
     ht = khmer.new_hashbits(12, 1, 1)
@@ -742,7 +741,7 @@ def test_tagset_filetype_check():
 
 def test_bad_primes_list():
     try:
-        coutingtable = khmer._new_hashbits(31, ["a", "b", "c"], 1)
+        coutingtable = khmer._Hashbits(31, ["a", "b", "c"], 1)
         assert 0, "Bad primes list should fail"
     except TypeError as e:
         print str(e)
@@ -761,5 +760,5 @@ def test_consume_absentfasta_with_reads_parser():
         assert 0, "this should fail"
     except IOError as err:
         print str(err)
-    except ValueError, err:
+    except ValueError as err:
         print str(err)
diff --git a/tests/test_hashbits_obj.py b/tests/test_hashbits_obj.py
index 4d5636a..2d3ebd1 100644
--- a/tests/test_hashbits_obj.py
+++ b/tests/test_hashbits_obj.py
@@ -1,7 +1,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=missing-docstring,protected-access
@@ -25,6 +25,15 @@ def teardown():
     utils.cleanup()
 
 
+ at attr('linux')
+def test_toobig():
+    try:
+        pt = khmer.Hashbits(32, 1e13, 1)
+        assert 0, "This should fail"
+    except MemoryError as err:
+        print str(err)
+
+
 def test__get_set_tag_density():
     ht = khmer.Hashbits(32, 1, 1)
 
@@ -550,5 +559,5 @@ def test_consume_fasta_and_tag_with_badreads_parser():
         assert 0, "this should fail"
     except IOError as e:
         print str(e)
-    except ValueError, e:
+    except ValueError as e:
         print str(e)
diff --git a/tests/test_hll.py b/tests/test_hll.py
new file mode 100644
index 0000000..266ce5d
--- /dev/null
+++ b/tests/test_hll.py
@@ -0,0 +1,250 @@
+#
+# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
+# Copyright (C) Michigan State University, 2014-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
+# Contact: khmer-project at idyll.org
+#
+# pylint: disable=missing-docstring,protected-access
+
+import math
+import string
+
+import khmer
+
+from screed.fasta import fasta_iter
+
+import khmer_tst_utils as utils
+from nose.tools import assert_raises
+
+
+TT = string.maketrans('ACGT', 'TGCA')
+K = 20  # size of kmer
+ERR_RATE = 0.01
+N_UNIQUE = 3960
+
+
+def teardown():
+    utils.cleanup()
+
+
+def test_hll_add_python():
+    # test python code to count unique kmers using HyperLogLog.
+    # use the lower level add() method, which accepts anything,
+    # and compare to an exact count using collections.Counter
+
+    filename = utils.get_test_data('random-20-a.fa')
+    hllcpp = khmer.HLLCounter(ERR_RATE, K)
+    counter = set()
+
+    for n, record in enumerate(fasta_iter(open(filename))):
+        sequence = record['sequence']
+        seq_len = len(sequence)
+        for n in range(0, seq_len + 1 - K):
+            kmer = sequence[n:n + K]
+            rc = kmer[::-1].translate(TT)
+
+            hllcpp.add(kmer)
+
+            if rc in counter:
+                kmer = rc
+            counter.update([kmer])
+
+    n_unique = len(counter)
+
+    assert n_unique == N_UNIQUE
+    assert abs(1 - float(hllcpp.estimate_cardinality()) / N_UNIQUE) < ERR_RATE
+
+
+def test_hll_consume_string():
+    # test c++ code to count unique kmers using HyperLogLog,
+    # using screed to feed each read to the counter.
+
+    filename = utils.get_test_data('random-20-a.fa')
+    hllcpp = khmer.HLLCounter(ERR_RATE, K)
+    for n, record in enumerate(fasta_iter(open(filename))):
+        hllcpp.consume_string(record['sequence'])
+
+    assert abs(1 - float(hllcpp.estimate_cardinality()) / N_UNIQUE) < ERR_RATE
+
+
+def test_hll_empty_fasta():
+    filename = utils.get_test_data('test-empty.fa')
+    hll = khmer.HLLCounter(ERR_RATE, K)
+    with assert_raises(IOError):
+        hll.consume_fasta(filename)
+
+
+def test_hll_consume_fasta():
+    # test c++ code to count unique kmers using HyperLogLog
+
+    filename = utils.get_test_data('random-20-a.fa')
+    hllcpp = khmer.HLLCounter(ERR_RATE, K)
+    hllcpp.consume_fasta(filename)
+
+    assert abs(1 - float(hllcpp.estimate_cardinality()) / N_UNIQUE) < ERR_RATE
+
+
+def test_hll_consume_fasta_ep():
+    # During estimation trigger the _Ep() method,
+    # we need all internal counters values to be different than zero for this.
+
+    filename = utils.get_test_data('paired-mixed.fa')
+    hll = khmer.HLLCounter(0.36, 32)
+    hll.consume_fasta(filename)
+
+    assert all(c != 0 for c in hll.counters)
+    assert len(hll) == 236
+
+
+def test_hll_consume_fasta_estimate_bias():
+    # During estimation trigger the estimate_bias method,
+    # we need all internal counters values to be different than zero for this,
+    # and also the cardinality should be small (if it is large we fall on the
+    # default case).
+
+    filename = utils.get_test_data("test-abund-read-3.fa")
+    hll = khmer.HLLCounter(0.36, K)
+    hll.consume_fasta(filename)
+
+    assert all(c != 0 for c in hll.counters)
+    assert len(hll) == 79
+
+
+def test_hll_len():
+    filename = utils.get_test_data('random-20-a.fa')
+    hllcpp = khmer.HLLCounter(ERR_RATE, K)
+    hllcpp.consume_fasta(filename)
+
+    assert hllcpp.estimate_cardinality() == len(hllcpp)
+
+
+def test_hll_empty():
+    hllcpp = khmer.HLLCounter(ERR_RATE, K)
+
+    assert len(hllcpp) == 0
+
+
+def test_hll_readonly_alpha():
+    hllcpp = khmer.HLLCounter(ERR_RATE, K)
+    with assert_raises(AttributeError):
+        hllcpp.alpha = 5
+
+
+def test_hll_cover_calc_alpha():
+    hllcpp = khmer.HLLCounter(0.36, K)
+    counters = hllcpp.counters
+    assert hllcpp.alpha == 0.673
+    assert len(counters) == 2 ** 4
+
+    hllcpp = khmer.HLLCounter(0.21, K)
+    counters = hllcpp.counters
+    assert hllcpp.alpha == 0.697
+    assert len(counters) == 2 ** 5
+
+    hllcpp = khmer.HLLCounter(0.16, K)
+    counters = hllcpp.counters
+    assert hllcpp.alpha == 0.709
+    assert len(counters) == 2 ** 6
+
+
+def test_hll_invalid_base():
+    # this test should raise a ValueError,
+    # since there are invalid bases in read.
+
+    hllcpp = khmer.HLLCounter(ERR_RATE, 5)
+    with assert_raises(ValueError):
+        hllcpp.consume_string("ACGTTTCGNAATNNNNN")
+
+
+def test_hll_invalid_error_rate():
+    # test if error_rate is a valid value
+
+    with assert_raises(ValueError):
+        hllcpp = khmer.HLLCounter(-0.01, K)
+
+
+def test_hll_invalid_error_rate_max():
+    # test if error_rate is a valid value
+
+    with assert_raises(ValueError):
+        hllcpp = khmer.HLLCounter(0.367696, K)
+
+
+def test_hll_error_rate_max():
+    # test if error_rate is a valid value
+
+    hllcpp = khmer.HLLCounter(0.367695, K)
+    assert len(hllcpp.counters) == 2 ** 4
+
+
+def test_hll_invalid_error_rate_min():
+    # test if error_rate is a valid value
+
+    with assert_raises(ValueError):
+        hllcpp = khmer.HLLCounter(0.0040624, K)
+
+
+def test_hll_error_rate_min():
+    # test if error_rate is a valid value
+
+    hllcpp = khmer.HLLCounter(0.0040625, K)
+    assert len(hllcpp.counters) == 2 ** 16
+
+
+def test_hll_change_error_rate():
+    hllcpp = khmer.HLLCounter(0.0040625, K)
+    assert hllcpp.error_rate == 0.0040625
+
+    # error rate is discrete, what we test here is if an error rate of 1%
+    # rounds to the appropriate value
+    hllcpp.error_rate = 0.01
+    assert hllcpp.error_rate == 0.008125
+
+    with assert_raises(TypeError):
+        del hllcpp.error_rate
+
+    with assert_raises(TypeError):
+        hllcpp.error_rate = 5
+
+    with assert_raises(ValueError):
+        hllcpp.error_rate = 2.5
+
+    with assert_raises(ValueError):
+        hllcpp.error_rate = -10.
+
+    # error rate can only be changed prior to first counting,
+    hllcpp.consume_string('AAACCACTTGTGCATGTCAGTGCAGTCAGT')
+    with assert_raises(AttributeError):
+        hllcpp.error_rate = 0.3
+
+
+def test_hll_change_ksize():
+    hllcpp = khmer.HLLCounter(0.0040625, K)
+    assert hllcpp.ksize == K
+
+    hllcpp.ksize = 24
+    assert hllcpp.ksize == 24
+
+    hllcpp.ksize = 12L
+    assert hllcpp.ksize == 12
+
+    with assert_raises(ValueError):
+        hllcpp.ksize = -20
+
+    with assert_raises(TypeError):
+        del hllcpp.ksize
+
+    with assert_raises(TypeError):
+        hllcpp.ksize = 33.4
+
+    # error rate can only be changed prior to first counting,
+    hllcpp.consume_string('AAACCACTTGTGCATGTCAGTGCAGTCAGT')
+    with assert_raises(AttributeError):
+        hllcpp.ksize = 30
+
+
+def test_hll_get_counters():
+    hll = khmer.HLLCounter(0.36, K)
+    counters = hll.counters
+    assert len(counters) == 2 ** 4
+    assert all(c == 0 for c in counters)
diff --git a/tests/test_labelhash.py b/tests/test_labelhash.py
index 587f52c..36c9e7b 100644
--- a/tests/test_labelhash.py
+++ b/tests/test_labelhash.py
@@ -1,7 +1,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=missing-docstring,protected-access
@@ -22,6 +22,15 @@ def teardown():
 #  * thread-safety
 
 
+ at attr('linux')
+def test_toobig():
+    try:
+        lh = LabelHash(20, 1e13, 1)
+        assert 0, "This should fail."
+    except MemoryError as err:
+        print str(err)
+
+
 def test_n_labels():
     lh = LabelHash(20, 1e7, 4)
     filename = utils.get_test_data('test-labels.fa')
diff --git a/tests/test_lump.py b/tests/test_lump.py
index 2e597d6..3d82471 100644
--- a/tests/test_lump.py
+++ b/tests/test_lump.py
@@ -1,7 +1,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=missing-docstring
diff --git a/tests/test_read_aligner.py b/tests/test_read_aligner.py
index 6ef105f..b5ce9b5 100644
--- a/tests/test_read_aligner.py
+++ b/tests/test_read_aligner.py
@@ -1,7 +1,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt. Contact: ctb at msu.edu
+# the three-clause BSD license; see LICENSE. Contact: ctb at msu.edu
 #
 import khmer
 from nose.tools import assert_almost_equals
@@ -15,7 +15,7 @@ def eq_(v1, v2):
 def test_alignnocov():
     ch = khmer.new_counting_hash(10, 1048576, 1)
     read = "ACCTAGGTTCGACATGTACC"
-    aligner = khmer.new_readaligner(ch, 0, 0)
+    aligner = khmer.ReadAligner(ch, 0, 0)
     for i in range(20):
         ch.consume("AGAGGGAAAGCTAGGTTCGACAAGTCCTTGACAGAT")
     ch.consume("ACCTAGGTTCGACATGTACC")
@@ -28,7 +28,7 @@ def test_alignnocov():
 
 def test_simple_readalign():
     ch = khmer.new_counting_hash(10, 1048576, 1)
-    aligner = khmer.new_readaligner(ch, 2, 0)
+    aligner = khmer.ReadAligner(ch, 2, 0)
     for i in range(20):
         ch.consume("AGAGGGAAAGCTAGGTTCGACATGTCCTTGACAGAT")
     read = "ACCTAGGTTCGACAAGTACC"
@@ -47,7 +47,7 @@ def test_simple_readalign():
 
 def test_readalign():
     ch = khmer.new_counting_hash(10, 1048576, 1)
-    aligner = khmer.new_readaligner(ch, 1, 0)
+    aligner = khmer.ReadAligner(ch, 1, 0)
     for i in range(20):
         ch.consume("AGAGGGAAAGCTAGGTTCGACAAGTCCTTGACAGAT")
     read = "ACCTAGGTTCGACATGTACC"
@@ -208,7 +208,7 @@ queries = [
 
 def test_readalign_new():
     ch = khmer.new_counting_hash(32, 1048576, 1)
-    aligner = khmer.new_readaligner(ch, 1, 0)
+    aligner = khmer.ReadAligner(ch, 1, 0)
     for seq in ht_seqs:
         ch.consume(seq)
 
diff --git a/tests/test_read_parsers.py b/tests/test_read_parsers.py
index f1b0ad1..f2ae329 100644
--- a/tests/test_read_parsers.py
+++ b/tests/test_read_parsers.py
@@ -1,7 +1,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 
@@ -25,7 +25,7 @@ def test_read_properties():
         assert read.name == "895:1:1:1246:14654 1:N:0:NNNNN"
         assert read.sequence == "CAGGCGCCCACCACCGTGCCCTCCAACCTGATGGT"
         assert read.annotations == ""
-        assert read.accuracy == """][aaX__aa[`ZUZ[NONNFNNNNNO_____^RQ_"""
+        assert read.quality == """][aaX__aa[`ZUZ[NONNFNNNNNO_____^RQ_"""
 
 
 def test_with_default_arguments():
@@ -45,8 +45,53 @@ def test_with_default_arguments():
         assert m == n
 
 
-def test_gzip_decompression():
+def test_num_reads():
+    """Test ReadParser.num_reads"""
+    reads_count = 0
+    rparser = ReadParser(utils.get_test_data("100-reads.fq.gz"))
+    for _ in rparser:
+        reads_count += 1
+
+    assert reads_count == 100
+    assert rparser.num_reads == 100
 
+
+ at attr('multithread')
+def test_num_reads_threads():
+    """Test threadsaftey of ReadParser's read counting"""
+    import threading
+
+    def count_reads(rparser):
+        for _ in rparser:
+            pass
+
+    n_threads = 4
+    threads = []
+    rparser = ReadParser(utils.get_test_data("100-reads.fq.gz"))
+    for _ in xrange(n_threads):
+        thr = threading.Thread(target=count_reads, args=[rparser, ])
+        threads.append(thr)
+        thr.start()
+    for thr in threads:
+        thr.join()
+
+    assert rparser.num_reads == 100
+
+
+def test_num_reads_truncated():
+
+    n_reads = 0
+    rparser = ReadParser(utils.get_test_data("truncated.fq"))
+    try:
+        for read in rparser:
+            n_reads += 1
+    except IOError as err:
+        assert "Sequence is empty" in str(err), str(err)
+    assert rparser.num_reads == 1, "%d valid reads in file, got %d" % (
+        n_reads, rparser.num_reads)
+
+
+def test_gzip_decompression():
     reads_count = 0
     rparser = ReadParser(utils.get_test_data("100-reads.fq.gz"))
     for read in rparser:
@@ -62,7 +107,7 @@ def test_gzip_decompression_truncated():
         for read in rparser:
             pass
         assert 0, "this should fail"
-    except IOError, err:
+    except IOError as err:
         print str(err)
 
 
@@ -73,7 +118,7 @@ def test_gzip_decompression_truncated_pairiter():
         for read in rparser.iter_read_pairs():
             pass
         assert 0, "this should fail"
-    except IOError, err:
+    except IOError as err:
         print str(err)
 
 
@@ -94,7 +139,7 @@ def test_bzip2_decompression_truncated():
         for read in rparser:
             pass
         assert 0, "this should fail"
-    except IOError, err:
+    except IOError as err:
         print str(err)
 
 
@@ -105,7 +150,7 @@ def test_bzip2_decompression_truncated_pairiter():
         for read in rparser.iter_read_pairs():
             pass
         assert 0, "this should fail"
-    except IOError, err:
+    except IOError as err:
         print str(err)
 
 
@@ -117,7 +162,7 @@ def test_badbzip2():
         assert 0, "this should fail"
     except IOError as err:
         print str(err)
-    except ValueError, err:
+    except ValueError as err:
         print str(err)
 
 
@@ -164,9 +209,6 @@ def test_old_illumina_pair_mating():
 
     import threading
 
-    # Note: This file, when used in conjunction with a 65600 byte per-thread
-    #       prefetch buffer, tests the paired read mating logic with the
-    #       old Illumina read name format.
     rparser = ReadParser(utils.get_test_data("test-reads.fa"))
 
     def thread_1_runtime(rparser):
@@ -177,7 +219,6 @@ def test_old_illumina_pair_mating():
         for readnum, read in enumerate(rparser):
             if 0 == readnum:
                 pass
-                # assert "850:2:1:1198:16820/1" == read.name, read.name
 
     t1 = threading.Thread(target=thread_1_runtime, args=[rparser])
     t2 = threading.Thread(target=thread_2_runtime, args=[rparser])
@@ -219,6 +260,17 @@ def test_casava_1_8_pair_mating():
     t2.join()
 
 
+def test_read_truncated():
+
+    rparser = ReadParser(utils.get_test_data("truncated.fq"))
+    try:
+        for read in rparser:
+            pass
+        assert 0, "No exception raised on a truncated file"
+    except IOError as err:
+        assert "Sequence is empty" in str(err), str(err)
+
+
 def test_iterator_identities():
 
     rparser = \
@@ -318,6 +370,6 @@ def test_iternext():
         assert 0, "Shouldn't be able to iterate over non FASTA file"
     except IOError as err:
         print str(err)
-    except ValueError, err:
+    except ValueError as err:
         print str(err)
 # vim: set ft=python ts=4 sts=4 sw=4 et tw=79:
diff --git a/tests/test_sandbox_scripts.py b/tests/test_sandbox_scripts.py
index 3e52fc8..c2ab6fc 100644
--- a/tests/test_sandbox_scripts.py
+++ b/tests/test_sandbox_scripts.py
@@ -1,7 +1,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 
@@ -19,7 +19,6 @@ import imp
 
 import khmer_tst_utils as utils
 import khmer
-import khmer.file
 import screed
 
 
@@ -44,6 +43,7 @@ def test_import_all():
 
 
 class _checkImportSucceeds(object):
+
     def __init__(self, tag, filename):
         self.tag = tag
         self.filename = filename
@@ -57,7 +57,7 @@ class _checkImportSucceeds(object):
             print traceback.format_exc()
             raise AssertionError("%s cannot be imported" % (self.filename,))
 
-        ###
+        #
 
         oldargs = sys.argv
         sys.argv = [self.filename]
@@ -69,7 +69,9 @@ class _checkImportSucceeds(object):
         try:
             try:
                 global_dict = {'__name__': '__main__'}
-                execfile(self.filename, global_dict)
+                exec(
+                    compile(open(self.filename).read(), self.filename, 'exec'),
+                    global_dict)
             except (ImportError, SyntaxError):
                 print traceback.format_exc()
                 raise AssertionError("%s cannot be exec'd" % (self.filename,))
@@ -107,6 +109,10 @@ def test_sweep_reads():
 
     print os.listdir(in_dir)
 
+    assert os.path.exists(out1)
+    assert os.path.exists(out2)
+    assert os.path.exists(mout)
+    assert os.path.exists(oout)
     seqs1 = set([r.name for r in screed.open(out1)])
     seqs2 = set([r.name for r in screed.open(out2)])
     seqsm = set([r.name for r in screed.open(mout)])
@@ -147,6 +153,10 @@ def test_sweep_reads_fq():
     mout = os.path.join(in_dir, 'test_multi.fq')
     oout = os.path.join(in_dir, 'test_orphaned.fq')
 
+    assert os.path.exists(out1)
+    assert os.path.exists(out2)
+    assert os.path.exists(mout)
+    assert os.path.exists(oout)
     print open(out1).read()
 
     print os.listdir(in_dir)
@@ -166,10 +176,10 @@ def test_sweep_reads_fq():
             seqsm == set(['read4_multi\t1\t0']))
     assert seqso == set(['read5_orphan'])
 
-    seqs1 = set([r.accuracy for r in screed.open(out1)])
-    seqs2 = set([r.accuracy for r in screed.open(out2)])
-    seqsm = set([r.accuracy for r in screed.open(mout)])
-    seqso = set([r.accuracy for r in screed.open(oout)])
+    seqs1 = set([r.quality for r in screed.open(out1)])
+    seqs2 = set([r.quality for r in screed.open(out2)])
+    seqsm = set([r.quality for r in screed.open(mout)])
+    seqso = set([r.quality for r in screed.open(oout)])
 
 
 def test_sweep_reads_2():
@@ -219,144 +229,3 @@ def test_sweep_reads_3():
     assert os.path.exists(counts_fn)
     assert os.path.exists(os.path.join(wdir, 'test.dist.txt'))
     assert not os.path.exists(os.path.join(wdir, 'test_multi.fa'))
-
-
-def test_trim_low_abund_1():
-    infile = utils.get_temp_filename('test.fa')
-    in_dir = os.path.dirname(infile)
-
-    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
-
-    args = ["-k", "17", "-x", "1e7", "-N", "2", infile]
-    utils.runscript('trim-low-abund.py', args, in_dir, sandbox=True)
-
-    outfile = infile + '.abundtrim'
-    assert os.path.exists(outfile), outfile
-
-    seqs = set([r.sequence for r in screed.open(outfile)])
-    assert len(seqs) == 1, seqs
-    assert 'GGTTGACGGGGCTCAGGG' in seqs
-
-
-def test_trim_low_abund_1_duplicate_filename_err():
-    infile = utils.get_temp_filename('test.fa')
-    in_dir = os.path.dirname(infile)
-
-    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
-
-    args = ["-k", "17", "-x", "1e7", "-N", "2", '-C', '1', infile, infile]
-    try:
-        utils.runscript('trim-low-abund.py', args, in_dir, sandbox=True)
-        raise Exception("should not reach this")
-    except AssertionError:
-        # an error should be raised by passing 'infile' twice.
-        pass
-
-
-def test_trim_low_abund_2():
-    infile = utils.get_temp_filename('test.fa')
-    infile2 = utils.get_temp_filename('test2.fa')
-    in_dir = os.path.dirname(infile)
-
-    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
-    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile2)
-
-    args = ["-k", "17", "-x", "1e7", "-N", "2", '-C', '1', infile, infile2]
-    utils.runscript('trim-low-abund.py', args, in_dir, sandbox=True)
-
-    outfile = infile + '.abundtrim'
-    assert os.path.exists(outfile), outfile
-
-    seqs = set([r.sequence for r in screed.open(outfile)])
-    assert len(seqs) == 2, seqs
-    assert 'GGTTGACGGGGCTCAGGG' in seqs
-
-# make sure that FASTQ records are retained.
-
-
-def test_trim_low_abund_3_fq_retained():
-    infile = utils.get_temp_filename('test.fq')
-    infile2 = utils.get_temp_filename('test2.fq')
-    in_dir = os.path.dirname(infile)
-
-    shutil.copyfile(utils.get_test_data('test-abund-read-2.fq'), infile)
-    shutil.copyfile(utils.get_test_data('test-abund-read-2.fq'), infile2)
-
-    args = ["-k", "17", "-x", "1e7", "-N", "2", '-C', '1', infile, infile2]
-    utils.runscript('trim-low-abund.py', args, in_dir, sandbox=True)
-
-    outfile = infile + '.abundtrim'
-    assert os.path.exists(outfile), outfile
-
-    seqs = set([r.sequence for r in screed.open(outfile)])
-    assert len(seqs) == 2, seqs
-    assert 'GGTTGACGGGGCTCAGGG' in seqs
-
-    # check for 'accuracy' string.
-    seqs = set([r.accuracy for r in screed.open(outfile)])
-    assert len(seqs) == 2, seqs
-    assert '##################' in seqs
-
-
-# test that the -V option does not trim sequences that are low abundance
-
-
-def test_trim_low_abund_4_retain_low_abund():
-    infile = utils.get_temp_filename('test.fa')
-    in_dir = os.path.dirname(infile)
-
-    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
-
-    args = ["-k", "17", "-x", "1e7", "-N", "2", '-V', infile]
-    utils.runscript('trim-low-abund.py', args, in_dir, sandbox=True)
-
-    outfile = infile + '.abundtrim'
-    assert os.path.exists(outfile), outfile
-
-    seqs = set([r.sequence for r in screed.open(outfile)])
-    assert len(seqs) == 2, seqs
-    assert 'GGTTGACGGGGCTCAGGG' in seqs
-
-# test that the -V option *does* trim sequences that are low abundance
-
-
-def test_trim_low_abund_5_trim_high_abund():
-    infile = utils.get_temp_filename('test.fa')
-    in_dir = os.path.dirname(infile)
-
-    shutil.copyfile(utils.get_test_data('test-abund-read-3.fa'), infile)
-
-    args = ["-k", "17", "-x", "1e7", "-N", "2", '-V', infile]
-    utils.runscript('trim-low-abund.py', args, in_dir, sandbox=True)
-
-    outfile = infile + '.abundtrim'
-    assert os.path.exists(outfile), outfile
-
-    seqs = set([r.sequence for r in screed.open(outfile)])
-    assert len(seqs) == 2, seqs
-
-    # trimmed sequence @ error
-    assert 'GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGC' in seqs
-
-# test that -V/-Z setting - should not trip if -Z is set high enough.
-
-
-def test_trim_low_abund_6_trim_high_abund_Z():
-    infile = utils.get_temp_filename('test.fa')
-    in_dir = os.path.dirname(infile)
-
-    shutil.copyfile(utils.get_test_data('test-abund-read-3.fa'), infile)
-
-    args = ["-k", "17", "-x", "1e7", "-N", "2", '-V', '-Z', '25', infile]
-    utils.runscript('trim-low-abund.py', args, in_dir, sandbox=True)
-
-    outfile = infile + '.abundtrim'
-    assert os.path.exists(outfile), outfile
-
-    seqs = set([r.sequence for r in screed.open(outfile)])
-    assert len(seqs) == 2, seqs
-
-    # untrimmed seq.
-    badseq = 'GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCgtgCCGCAGCTGTCGTCAGGG' \
-             'GATTTCCGGGCGG'
-    assert badseq in seqs       # should be there, untrimmed
diff --git a/tests/test_script_arguments.py b/tests/test_script_arguments.py
index 9af1a75..bc0e132 100644
--- a/tests/test_script_arguments.py
+++ b/tests/test_script_arguments.py
@@ -1,7 +1,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2014-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 """
@@ -12,7 +12,7 @@ import sys
 import cStringIO
 import khmer_tst_utils as utils
 
-import khmer.file
+import khmer.kfile
 
 
 def test_check_space():
@@ -20,7 +20,7 @@ def test_check_space():
 
     save_stderr, sys.stderr = sys.stderr, cStringIO.StringIO()
     try:
-        khmer.file.check_space(
+        khmer.kfile.check_space(
             [fakelump_fa], force=False, _testhook_free_space=0)
         assert 0, "this should fail"
     except SystemExit as e:
@@ -32,7 +32,7 @@ def test_check_space():
 def test_check_tablespace():
     save_stderr, sys.stderr = sys.stderr, cStringIO.StringIO()
     try:
-        khmer.file.check_space_for_hashtable(
+        khmer.kfile.check_space_for_hashtable(
             1e9, force=False, _testhook_free_space=0)
         assert 0, "this should fail"
     except SystemExit as e:
@@ -46,7 +46,7 @@ def test_check_space_force():
 
     save_stderr, sys.stderr = sys.stderr, cStringIO.StringIO()
     try:
-        khmer.file.check_space(
+        khmer.kfile.check_space(
             [fakelump_fa], force=True, _testhook_free_space=0)
         assert True, "this should pass"
     except SystemExit as e:
@@ -58,10 +58,22 @@ def test_check_space_force():
 def test_check_tablespace_force():
     save_stderr, sys.stderr = sys.stderr, cStringIO.StringIO()
     try:
-        khmer.file.check_space_for_hashtable(
+        khmer.kfile.check_space_for_hashtable(
             1e9, force=True, _testhook_free_space=0)
         assert True, "this should pass"
     except SystemExit as e:
         print str(e)
     finally:
         sys.stderr = save_stderr
+
+
+def test_invalid_file_warn():
+    save_stderr, sys.stderr = sys.stderr, cStringIO.StringIO()
+    try:
+        khmer.kfile.check_valid_file_exists(["nonexistent", "nonexistent2"])
+        assert sys.stderr.getvalue().count("\n") == 2,  \
+            "Should produce two warning lines"
+    except SystemExit, e:
+        print str(e)
+    finally:
+        sys.stderr = save_stderr
diff --git a/tests/test_scripts.py b/tests/test_scripts.py
index d20aa18..2f4a75d 100644
--- a/tests/test_scripts.py
+++ b/tests/test_scripts.py
@@ -1,7 +1,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 
@@ -10,6 +10,7 @@
 import json
 import sys
 import os
+import stat
 import shutil
 from cStringIO import StringIO
 import traceback
@@ -21,7 +22,7 @@ import io
 
 import khmer_tst_utils as utils
 import khmer
-import khmer.file
+import khmer.kfile
 import screed
 
 
@@ -36,7 +37,7 @@ def teardown():
 def test_check_space():
     # @CTB this probably belongs in a new test file, along with other
     # tests of the file.py module.
-    khmer.file.check_space(
+    khmer.kfile.check_space(
         ['', utils.get_test_data('test-abund-read-2.fa')], False)
 
 
@@ -44,7 +45,7 @@ def test_load_into_counting():
     script = scriptpath('load-into-counting.py')
     args = ['-x', '1e3', '-N', '2', '-k', '20', '-t']
 
-    outfile = utils.get_temp_filename('out.kh')
+    outfile = utils.get_temp_filename('out.ct')
     infile = utils.get_test_data('test-abund-read-2.fa')
 
     args.extend([outfile, infile])
@@ -54,18 +55,52 @@ def test_load_into_counting():
     assert os.path.exists(outfile)
 
 
+def test_load_into_counting_nonwritable():
+    script = scriptpath('load-into-counting.py')
+    args = ['-x', '1e3', '-N', '2', '-k', '20', '-t']
+
+    outfile = utils.get_temp_filename('test-nonwritable')
+    with open(outfile, 'w') as fout:
+        fout.write("This file is non-writable (after this)")
+
+    os.chmod(outfile, stat.S_IWOTH | stat.S_IRUSR)
+    infile = utils.get_test_data('test-abund-read-2.fa')
+
+    args.extend([outfile, infile])
+
+    (status, out, err) = utils.runscript(script, args, fail_ok=True)
+    assert 'does not have write permission; exiting' in err, err
+    assert status == 1, status
+
+
+ at attr('linux')
+def test_load_into_counting_toobig():
+    script = scriptpath('load-into-counting.py')
+    args = ['-x', '1e12', '-N', '2', '-k', '20', '-t', '--force']
+
+    outfile = utils.get_temp_filename('out.kh')
+    infile = utils.get_test_data('test-abund-read-2.fa')
+
+    args.extend([outfile, infile])
+
+    (status, out, err) = utils.runscript(script, args, fail_ok=True)
+    assert status == -1, status
+    assert "MemoryError" in err, err
+
+
 def test_load_into_counting_fail():
     script = scriptpath('load-into-counting.py')
     args = ['-x', '1e2', '-N', '2', '-k', '20']  # use small HT
 
-    outfile = utils.get_temp_filename('out.kh')
+    outfile = utils.get_temp_filename('out.ct')
     infile = utils.get_test_data('test-abund-read-2.fa')
 
     args.extend([outfile, infile])
 
     (status, out, err) = utils.runscript(script, args, fail_ok=True)
     assert status == 1, status
-    assert "ERROR:" in err
+    print err
+    assert "** ERROR: the graph structure is too small" in err
 
 
 def test_load_into_counting_multifile():
@@ -87,7 +122,7 @@ def test_load_into_counting_tsv():
     script = scriptpath('load-into-counting.py')
     args = ['-x', '1e7', '-N', '2', '-k', '20', '-t', '-s', 'tsv']
 
-    outfile = utils.get_temp_filename('out.kh')
+    outfile = utils.get_temp_filename('out.ct')
     tabfile = outfile + '.info.tsv'
     infile = utils.get_test_data('test-abund-read-2.fa')
 
@@ -101,7 +136,8 @@ def test_load_into_counting_tsv():
         tabfile_lines = tabfh.readlines()
     assert len(tabfile_lines) == 2
     outbase = os.path.basename(outfile)
-    expected_tsv_line = '\t'.join([outbase, '0.000', '95', infile]) + '\n'
+    tsv = [outbase, '0.000', '95', '1001', infile]
+    expected_tsv_line = '\t'.join(tsv) + '\n'
     assert tabfile_lines[1] == expected_tsv_line, tabfile_lines
 
 
@@ -109,7 +145,7 @@ def test_load_into_counting_json():
     script = scriptpath('load-into-counting.py')
     args = ['-x', '1e7', '-N', '2', '-k', '20', '-t', '-s', 'json']
 
-    outfile = utils.get_temp_filename('out.kh')
+    outfile = utils.get_temp_filename('out.ct')
     jsonfile = outfile + '.info.json'
     infile = utils.get_test_data('test-abund-read-2.fa')
 
@@ -127,8 +163,9 @@ def test_load_into_counting_json():
         "files": [infile],
         "ht_name": outbase,
         "num_kmers": 95,
+        "num_reads": 1001,
         "fpr": 9.024965705097741e-11,
-        "mrinfo_version": "0.1.0",
+        "mrinfo_version": "0.2.0",
     }
 
     assert got_json == expected_json, got_json
@@ -138,7 +175,7 @@ def test_load_into_counting_bad_summary_fmt():
     script = scriptpath('load-into-counting.py')
     args = ['-x', '1e7', '-N', '2', '-k', '20', '-s', 'badfmt']
 
-    outfile = utils.get_temp_filename('out.kh')
+    outfile = utils.get_temp_filename('out.ct')
     infile = utils.get_test_data('test-abund-read-2.fa')
 
     args.extend([outfile, infile])
@@ -155,7 +192,7 @@ def _make_counting(infilename, SIZE=1e7, N=2, K=20, BIGCOUNT=True):
     if not BIGCOUNT:
         args.append('-b')
 
-    outfile = utils.get_temp_filename('out.kh')
+    outfile = utils.get_temp_filename('out.ct')
 
     args.extend([outfile, infilename])
 
@@ -244,10 +281,32 @@ def test_filter_abund_3_fq_retained():
     assert len(seqs) == 2, seqs
     assert 'GGTTGACGGGGCTCAGGG' in seqs
 
-    # check for 'accuracy' string.
-    seqs = set([r.accuracy for r in screed.open(outfile)])
-    assert len(seqs) == 2, seqs
-    assert '##################' in seqs
+    # check for 'quality' string.
+    quals = set([r.quality for r in screed.open(outfile)])
+    assert len(quals) == 2, quals
+    assert '##################' in quals
+
+
+# make sure that FASTQ names are properly parsed, both formats.
+
+
+def test_filter_abund_4_fq_casava_18():
+    infile = utils.get_temp_filename('test.fq')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.paired2.fq'),
+                    infile)
+    counting_ht = _make_counting(infile, K=17)
+
+    script = scriptpath('filter-abund.py')
+    args = [counting_ht, infile, infile]
+    utils.runscript(script, args, in_dir)
+
+    outfile = infile + '.abundfilt'
+    assert os.path.exists(outfile), outfile
+
+    seqs = set([r.name for r in screed.open(outfile, parse_description=False)])
+    assert 'pair:foo 1::N' in seqs, seqs
 
 
 def test_filter_abund_1_singlefile():
@@ -292,6 +351,24 @@ def test_filter_abund_2_singlefile():
     assert 'GGTTGACGGGGCTCAGGG' in seqs
 
 
+def test_filter_abund_2_singlefile_fq_casava_18():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.paired2.fq'),
+                    infile)
+
+    script = scriptpath('filter-abund-single.py')
+    args = ['-x', '1e7', '-N', '2', '-k', '17', infile]
+    (status, out, err) = utils.runscript(script, args, in_dir)
+
+    outfile = infile + '.abundfilt'
+    assert os.path.exists(outfile), outfile
+
+    seqs = set([r.name for r in screed.open(outfile, parse_description=False)])
+    assert 'pair:foo 1::N' in seqs, seqs
+
+
 def test_filter_abund_4_retain_low_abund():
     # test that the -V option does not trim sequences that are low abundance
     infile = utils.get_temp_filename('test.fa')
@@ -391,9 +468,45 @@ def test_filter_stoptags():
     assert 'GGTTGACGGGGCTCAGGG' in seqs, seqs
 
 
+def test_filter_stoptags_fq():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+    stopfile = utils.get_temp_filename('stoptags', in_dir)
+
+    # first, copy test-abund-read-2.fa to 'test.fa' in the temp dir.
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fq'), infile)
+
+    # now, create a file with some stop tags in it --
+    K = 18
+    kh = khmer.new_hashbits(K, 1, 1)
+    kh.add_stop_tag('GTTGACGGGGCTCAGGGG')
+    kh.save_stop_tags(stopfile)
+    del kh
+
+    # finally, run filter-stoptags.
+    script = scriptpath('filter-stoptags.py')
+    args = ['-k', str(K), stopfile, infile, infile]
+    utils.runscript(script, args, in_dir)
+
+    # verify that the basic output file exists
+    outfile = infile + '.stopfilt'
+    assert os.path.exists(outfile), outfile
+
+    # it should contain only one unique sequence, because we've trimmed
+    # off everything after the beginning of the only long sequence in there.
+    seqs = set([r.sequence for r in screed.open(outfile)])
+    assert len(seqs) == 1, seqs
+    assert 'GGTTGACGGGGCTCAGGG' in seqs, seqs
+
+    # make sure that record names are carried through unparsed
+    names = [r.name for r in screed.open(outfile, parse_description=False)]
+    names = set(names)
+    assert 'seq 1::BAR' in names
+
+
 def test_normalize_by_median_indent():
     infile = utils.get_test_data('paired-mixed.fa.pe')
-    hashfile = utils.get_test_data('normC20k20.kh')
+    hashfile = utils.get_test_data('normC20k20.ct')
     outfile = utils.get_temp_filename('paired-mixed.fa.pe.keep')
     script = scriptpath('normalize-by-median.py')
     args = ['--loadtable', hashfile, '-o', outfile, infile]
@@ -424,6 +537,58 @@ def test_normalize_by_median():
     assert seqs[0].startswith('GGTTGACGGGGCTCAGGGGG'), seqs
 
 
+def test_normalize_by_median_unpaired_and_paired():
+    CUTOFF = '1'
+
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-paired.fa'), infile)
+
+    unpairedfile = utils.get_temp_filename('test1.fa', tempdir=in_dir)
+    shutil.copyfile(utils.get_test_data('random-20-a.fa'), unpairedfile)
+
+    script = scriptpath('normalize-by-median.py')
+    args = ['-C', CUTOFF, '-k', '17', '-t', '-u', unpairedfile, '-p', infile]
+    (status, out, err) = utils.runscript(script, args, in_dir)
+
+    assert 'Total number of unique k-mers: 4029' in err, err
+
+    outfile = infile + '.keep'
+    assert os.path.exists(outfile), outfile
+
+
+def test_normalize_by_median_double_file_name():
+    infile = utils.get_temp_filename('test-abund-read-2.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
+
+    script = scriptpath('normalize-by-median.py')
+    args = [utils.get_test_data('test-abund-read-2.fa'), infile]
+    (status, out, err) = utils.runscript(script, args, in_dir)
+
+    assert "WARNING: At least two input files are named" in err, err
+
+
+def test_normalize_by_median_overwrite():
+    outfile = utils.get_temp_filename('test.fa.keep')
+    shutil.copyfile(utils.get_test_data('test-abund-read.fa'), outfile)
+    in_dir = os.path.dirname(outfile)
+
+    CUTOFF = '1'
+    infile = utils.get_temp_filename('test.fa', in_dir)
+    shutil.copyfile(utils.get_test_data('test-abund-read-3.fa'), infile)
+    script = scriptpath('normalize-by-median.py')
+
+    args = ['-C', CUTOFF, '-k', '17', '-t', '-o', outfile, infile]
+    (status, out, err) = utils.runscript(script, args, in_dir)
+    assert os.path.exists(outfile), outfile
+    seqs = [r.sequence for r in screed.open(outfile)]
+    assert len(seqs) == 1, seqs
+    assert 'GACAGCgtgCCGCA' in seqs[0], seqs
+
+
 def test_normalize_by_median_version():
     script = scriptpath('normalize-by-median.py')
     args = ['--version']
@@ -484,6 +649,34 @@ def test_normalize_by_median_paired():
     assert seqs[1].startswith('GGTTGACGGGGCTCAGGG'), seqs
 
 
+def test_normalize_by_median_paired_fq():
+    CUTOFF = '20'
+
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-paired.fq'), infile)
+
+    script = scriptpath('normalize-by-median.py')
+    args = ['-C', CUTOFF, '-p', '-k', '17', infile]
+    _, out, err = utils.runscript(script, args, in_dir)
+    print out
+    print err
+
+    outfile = infile + '.keep'
+    assert os.path.exists(outfile), outfile
+
+    seqs = [r.sequence for r in screed.open(outfile)]
+    assert len(seqs) == 6, len(seqs)
+    assert seqs[0].startswith('GGTTGACGGGGCTCAGGGGG'), seqs
+    assert seqs[1].startswith('GGTTGACGGGGCTCAGGG'), seqs
+
+    names = [r.name for r in screed.open(outfile, parse_description=False)]
+    assert len(names) == 6, names
+    assert '895:1:37:17593:9954 1::FOO' in names, names
+    assert '895:1:37:17593:9954 2::FOO' in names, names
+
+
 def test_normalize_by_median_impaired():
     CUTOFF = '1'
 
@@ -494,7 +687,8 @@ def test_normalize_by_median_impaired():
 
     script = scriptpath('normalize-by-median.py')
     args = ['-C', CUTOFF, '-p', '-k', '17', infile]
-    utils.runscript(script, args, in_dir, fail_ok=True)
+    _, out, err = utils.runscript(script, args, in_dir, fail_ok=True)
+    assert '** ERROR: Error: Improperly interleaved pairs ' in err
 
 
 def test_normalize_by_median_force():
@@ -527,7 +721,7 @@ def test_normalize_by_median_force():
 
 def test_normalize_by_median_no_bigcount():
     infile = utils.get_temp_filename('test.fa')
-    hashfile = utils.get_temp_filename('test-out.kh')
+    hashfile = utils.get_temp_filename('test-out.ct')
     outfile = infile + '.keep'
     in_dir = os.path.dirname(infile)
 
@@ -592,6 +786,20 @@ def test_normalize_by_median_empty():
     assert os.path.exists(outfile), outfile
 
 
+def test_normalize_by_median_emptycountingtable():
+    CUTOFF = '1'
+
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-empty.fa'), infile)
+
+    script = scriptpath('normalize-by-median.py')
+    args = ['-C', CUTOFF, '--loadtable', infile, infile]
+    (status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True)
+    assert 'ValueError' in err, (status, out, err)
+
+
 def test_normalize_by_median_fpr():
     MIN_TABLESIZE_PARAM = 1
 
@@ -605,8 +813,45 @@ def test_normalize_by_median_fpr():
     (status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True)
 
     assert os.path.exists(infile + '.keep')
-    assert 'fp rate estimated to be' in out, out
-    assert '** ERROR: the k-mer counting table is too small' in err, err
+    assert '** ERROR: the graph structure is too small' in err, err
+
+
+def write_by_chunks(infile, outfile, CHUNKSIZE=8192):
+    ifile = io.open(infile, 'rb')
+    ofile = io.open(outfile, 'wb')
+    chunk = ifile.read(CHUNKSIZE)
+    while len(chunk) > 0:
+        ofile.write(chunk)
+        chunk = ifile.read(CHUNKSIZE)
+    ifile.close()
+    ofile.close()
+
+
+def test_normalize_by_median_stdout():
+    CUTOFF = '20'
+
+    infile = utils.get_test_data('100-reads.fq.gz')
+    in_dir = os.path.dirname(infile)
+    fifo = utils.get_temp_filename('fifo')
+    outfile = utils.get_temp_filename('outfile')
+
+    # Use a fifo to copy stdout to a file for checking
+    os.mkfifo(fifo)
+    thread = threading.Thread(target=write_by_chunks, args=(fifo, outfile))
+    thread.start()
+
+    # Execute diginorm
+    script = scriptpath('normalize-by-median.py')
+    args = ['-C', CUTOFF, '-k', '17', '-o', fifo, infile]
+    (status, out, err) = utils.runscript(script, args, in_dir)
+
+    # Merge the thread
+    thread.join()
+
+    assert os.path.exists(outfile), outfile
+    with open(outfile) as fp:
+        linecount = sum(1 for _ in fp)
+    assert linecount == 400
 
 
 def test_count_median():
@@ -628,6 +873,50 @@ def test_count_median():
     assert 'seq 1001 1001.0 0.0 18' in data
     assert '895:1:37:17593:9954/1 1 103.803741455 303.702941895 114' in data
 
+
+def test_count_median_fq():
+    infile = utils.get_temp_filename('test.fa')
+    outfile = infile + '.counts'
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fq'), infile)
+    counting_ht = _make_counting(infile, K=8)
+
+    script = scriptpath('count-median.py')
+    args = [counting_ht, infile, outfile]
+    utils.runscript(script, args)
+
+    assert os.path.exists(outfile), outfile
+
+    data = [x.strip() for x in open(outfile)]
+    data = set(data)
+    assert len(data) == 2, data
+    assert 'seq 1001 1001.0 0.0 18' in data
+    assert '895:1:37:17593:9954 1 103.803741455 303.702941895 114' in data
+
+
+def test_count_median_fq_csv():
+    infile = utils.get_temp_filename('test.fa')
+    outfile = infile + '.counts'
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fq'), infile)
+    counting_ht = _make_counting(infile, K=8)
+
+    script = scriptpath('count-median.py')
+    args = ['--csv', counting_ht, infile, outfile]
+    utils.runscript(script, args)
+
+    assert os.path.exists(outfile), outfile
+
+    data = [x.strip() for x in open(outfile)]
+    data = set(data)
+    assert len(data) == 4, data
+    assert 'name,median,average,stddev,seqlen' in data
+    assert 'seq,1001,1001.0,0.0,18' in data
+
+    # verify that sequence names remain unparsed with '--csv'
+    names = set([line.split(',')[0] for line in data])
+    assert '895:1:37:17593:9954 1::FOO' in names, names
+
 #
 
 
@@ -695,7 +984,7 @@ def test_load_graph_fail():
 
     (status, out, err) = utils.runscript(script, args, fail_ok=True)
     assert status == 1, status
-    assert "ERROR:" in err
+    assert "** ERROR: the graph structure is too small" in err
 
 
 def test_load_graph_write_fp():
@@ -1011,6 +1300,36 @@ def test_extract_partitions():
     assert len(parts) == 1, len(parts)
 
 
+def test_extract_partitions_header_whitespace():
+    seqfile = utils.get_test_data('test-overlap2.fa')
+    graphbase = _make_graph(
+        seqfile, do_partition=True, annotate_partitions=True)
+    in_dir = os.path.dirname(graphbase)
+
+    # get the final part file
+    partfile = os.path.join(in_dir, 'test-overlap2.fa.part')
+
+    # ok, now run extract-partitions.
+    script = scriptpath('extract-partitions.py')
+    args = ['extracted', partfile]
+
+    utils.runscript(script, args, in_dir)
+
+    distfile = os.path.join(in_dir, 'extracted.dist')
+    groupfile = os.path.join(in_dir, 'extracted.group0000.fa')
+    assert os.path.exists(distfile)
+    assert os.path.exists(groupfile)
+
+    dist = open(distfile).readline()
+    assert dist.strip() == '1 11957 11957 11957'
+
+    parts = [r.name.split('\t')[1]
+             for r in screed.open(partfile, parse_description=False)]
+    assert len(parts) == 13538, len(parts)
+    parts = set(parts)
+    assert len(parts) == 12601, len(parts)
+
+
 def test_extract_partitions_fq():
     seqfile = utils.get_test_data('random-20-a.fq')
     graphbase = _make_graph(
@@ -1034,12 +1353,19 @@ def test_extract_partitions_fq():
     dist = open(distfile).readline()
     assert dist.strip() == '99 1 1 99'
 
-    parts = [r.name.split('\t')[1] for r in screed.open(partfile)]
+    screed_iter = screed.open(partfile, parse_description=False)
+    names = [r.name.split('\t')[0] for r in screed_iter]
+    assert '35 1::FOO' in names
+    assert '46 1::FIZ' in names
+
+    screed_iter = screed.open(partfile, parse_description=False)
+    parts = [r.name.split('\t')[1] for r in screed_iter]
+
     assert len(parts) == 99, len(parts)
     parts = set(parts)
     assert len(parts) == 1, len(parts)
 
-    quals = set([r.accuracy for r in screed.open(partfile)])
+    quals = set([r.quality for r in screed.open(partfile)])
     quals = list(quals)
     assert quals[0], quals
 
@@ -1082,15 +1408,15 @@ def test_extract_partitions_no_output_groups():
     in_dir = os.path.dirname(graphbase)
 
     # get the final part file
-    partfile = os.path.join(in_dir, 'random-20-a.fa.part')
+    partfile = os.path.join(in_dir, 'random-20-a.fq.part')
 
     # ok, now run extract-partitions.
     script = scriptpath('extract-partitions.py')
     args = ['-n', 'extracted', partfile]
 
     # We expect a sys.exit -> we need the test to be tolerant
-    utils.runscript(script, args, in_dir, fail_ok=True)
-
+    _, out, err = utils.runscript(script, args, in_dir, fail_ok=True)
+    assert "NOT outputting groups! Beware!" in err
     # Group files are created after output_groups is
     # checked. They should not exist in this scenario
     groupfile = os.path.join(in_dir, 'extracted.group0000.fa')
@@ -1154,8 +1480,8 @@ def test_extract_partitions_no_groups():
     script = scriptpath('extract-partitions.py')
     args = ['extracted', empty_file]
 
-    utils.runscript(script, args, in_dir, fail_ok=True)
-
+    _, _, err = utils.runscript(script, args, in_dir, fail_ok=True)
+    assert "ERROR: Input file", "is empty; Exiting." in err
     # No group files should be created
     groupfile = os.path.join(in_dir, 'extracted.group0000.fa')
 
@@ -1181,6 +1507,18 @@ def test_abundance_dist():
     line = fp.next().strip()
     assert line == '1001 2 98 1.0', line
 
+    os.remove(outfile)
+    args = ['-z', '--csv', htfile, infile, outfile]
+    utils.runscript(script, args, in_dir)
+
+    fp = iter(open(outfile))
+    line = fp.next().strip()
+    assert (line == 'abundance,count,cumulative,cumulative_fraction'), line
+    line = fp.next().strip()
+    assert line == '1,96,96,0.98', line
+    line = fp.next().strip()
+    assert line == '1001,2,98,1.0', line
+
 
 def test_abundance_dist_nobigcount():
     infile = utils.get_temp_filename('test.fa')
@@ -1223,7 +1561,7 @@ def test_abundance_dist_single():
     assert line == '1001 2 98 1.0', line
 
 
-def test_abundance_dist_single_nobigcount():
+def test_abundance_dist_threaded():
     infile = utils.get_temp_filename('test.fa')
     outfile = utils.get_temp_filename('test.dist')
     in_dir = os.path.dirname(infile)
@@ -1231,27 +1569,69 @@ def test_abundance_dist_single_nobigcount():
     shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
 
     script = scriptpath('abundance-dist-single.py')
-    args = ['-x', '1e7', '-N', '2', '-k', '17', '-z', '-b', infile, outfile]
-    utils.runscript(script, args, in_dir)
+    args = ['-x', '1e7', '-N', '2', '-k', '17', '-z', '-t', '--threads', '18',
+            infile, outfile]
+    (status, out, err) = utils.runscript(script, args, in_dir)
+
+    assert 'Total number of unique k-mers: 98' in err, err
 
     fp = iter(open(outfile))
     line = fp.next().strip()
     assert line == '1 96 96 0.98', line
     line = fp.next().strip()
-    assert line == '255 2 98 1.0', line
+    assert line == '1001 2 98 1.0', line
 
 
-def test_abundance_dist_single_nosquash():
+def test_abundance_dist_single_csv():
     infile = utils.get_temp_filename('test.fa')
-    outfile = utils.get_temp_filename('test-abund-read-2.fa')
+    outfile = utils.get_temp_filename('test.dist')
     in_dir = os.path.dirname(infile)
 
     shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
 
     script = scriptpath('abundance-dist-single.py')
-    args = ['-x', '1e7', '-N', '2', '-k', '17', '-z', '-t', infile, outfile]
-    utils.runscript(script, args, in_dir)
-
+    args = ['-x', '1e7', '-N', '2', '-k', '17', '-z', '--csv', infile,
+            outfile]
+    (status, out, err) = utils.runscript(script, args, in_dir)
+
+    fp = iter(open(outfile))
+    line = fp.next().strip()
+    assert (line == 'abundance,count,cumulative,cumulative_fraction'), line
+    line = fp.next().strip()
+    assert line == '1,96,96,0.98', line
+    line = fp.next().strip()
+    assert line == '1001,2,98,1.0', line
+
+
+def test_abundance_dist_single_nobigcount():
+    infile = utils.get_temp_filename('test.fa')
+    outfile = utils.get_temp_filename('test.dist')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
+
+    script = scriptpath('abundance-dist-single.py')
+    args = ['-x', '1e7', '-N', '2', '-k', '17', '-z', '-b', infile, outfile]
+    utils.runscript(script, args, in_dir)
+
+    fp = iter(open(outfile))
+    line = fp.next().strip()
+    assert line == '1 96 96 0.98', line
+    line = fp.next().strip()
+    assert line == '255 2 98 1.0', line
+
+
+def test_abundance_dist_single_nosquash():
+    infile = utils.get_temp_filename('test.fa')
+    outfile = utils.get_temp_filename('test-abund-read-2.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
+
+    script = scriptpath('abundance-dist-single.py')
+    args = ['-x', '1e7', '-N', '2', '-k', '17', '-z', '-t', infile, outfile]
+    utils.runscript(script, args, in_dir)
+
     fp = iter(open(outfile))
     line = fp.next().strip()
     assert line == '1 96 96 0.98', line
@@ -1315,7 +1695,24 @@ def test_do_partition_2():
 
     assert len(parts) == 99, len(parts)
 
-#
+
+def test_do_partition_2_fq():
+    # test with K=21 (no joining of sequences)
+    seqfile = utils.get_test_data('random-20-a.fq')
+    graphbase = utils.get_temp_filename('out')
+    in_dir = os.path.dirname(graphbase)
+
+    script = scriptpath('do-partition.py')
+    args = ["-k", "21", graphbase, seqfile]
+
+    utils.runscript(script, args, in_dir)
+
+    partfile = os.path.join(in_dir, 'random-20-a.fq.part')
+
+    screed_iter = screed.open(partfile, parse_description=False)
+    names = [r.name.split('\t')[0] for r in screed_iter]
+    assert '35 1::FOO' in names
+    assert '46 1::FIZ' in names
 
 
 def test_interleave_reads_1_fq():
@@ -1340,6 +1737,69 @@ def test_interleave_reads_1_fq():
     assert r == q, (r, q)
 
 
+def test_interleave_reads_broken_fq():
+    # test input files
+    infile1 = utils.get_test_data('paired-broken.fq.1')
+    infile2 = utils.get_test_data('paired-broken.fq.2')
+
+    # actual output file
+    outfile = utils.get_temp_filename('out.fq')
+
+    script = scriptpath('interleave-reads.py')
+    args = [infile1, infile2, '-o', outfile]
+
+    status, out, err = utils.runscript(script, args, fail_ok=True)
+    assert status == 1
+    assert 'ERROR: Input files contain different number of records.' in err
+
+
+def test_interleave_reads_broken_fq_2():
+    # test input files
+    infile1 = utils.get_test_data('paired-broken2.fq.1')
+    infile2 = utils.get_test_data('paired-broken2.fq.2')
+
+    # actual output file
+    outfile = utils.get_temp_filename('out.fq')
+
+    script = scriptpath('interleave-reads.py')
+    args = [infile1, infile2, '-o', outfile]
+
+    status, out, err = utils.runscript(script, args, fail_ok=True)
+    assert status == 1
+    assert "ERROR: This doesn't look like paired data!" in err
+
+
+def test_interleave_reads_broken_fq_3():
+    # test input files
+    infile1 = utils.get_test_data('paired-broken3.fq.1')
+    infile2 = utils.get_test_data('paired-broken3.fq.2')
+
+    # actual output file
+    outfile = utils.get_temp_filename('out.fq')
+
+    script = scriptpath('interleave-reads.py')
+    args = [infile1, infile2, '-o', outfile]
+
+    status, out, err = utils.runscript(script, args, fail_ok=True)
+    assert status == 1
+    assert "ERROR: This doesn't look like paired data!" in err
+
+
+def test_interleave_reads_broken_fq_4():
+    # test input files
+    infile1 = utils.get_test_data('paired-mixed-broken.fq')
+
+    # actual output file
+    outfile = utils.get_temp_filename('out.fq')
+
+    script = scriptpath('interleave-reads.py')
+    args = [infile1, '-o', outfile]
+
+    status, out, err = utils.runscript(script, args, fail_ok=True)
+    assert status == 1
+    assert "ERROR: given only one filename, that doesn't contain _R1_" in err
+
+
 def test_interleave_reads_2_fa():
     # test input files
     infile1 = utils.get_test_data('paired.fa.1')
@@ -1452,19 +1912,21 @@ def test_extract_paired_reads_2_fq():
     assert os.path.exists(outfile2), outfile2
 
     n = 0
-    for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)):
+    for r, q in zip(screed.open(ex_outfile1, parse_description=False),
+                    screed.open(outfile1, parse_description=False)):
         n += 1
-        assert r.name == q.name
+        assert r.name == q.name, (r.name, q.name, n)
         assert r.sequence == q.sequence
-        assert r.accuracy == q.accuracy
+        assert r.quality == q.quality
     assert n > 0
 
     n = 0
-    for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)):
+    for r, q in zip(screed.open(ex_outfile2, parse_description=False),
+                    screed.open(outfile2, parse_description=False)):
         n += 1
         assert r.name == q.name
         assert r.sequence == q.sequence
-        assert r.accuracy == q.accuracy
+        assert r.quality == q.quality
     assert n > 0
 
 
@@ -1528,7 +1990,7 @@ def test_split_paired_reads_2_fq():
         n += 1
         assert r.name == q.name
         assert r.sequence == q.sequence
-        assert r.accuracy == q.accuracy
+        assert r.quality == q.quality
     assert n > 0
 
     n = 0
@@ -1536,164 +1998,458 @@ def test_split_paired_reads_2_fq():
         n += 1
         assert r.name == q.name
         assert r.sequence == q.sequence
-        assert r.accuracy == q.accuracy
+        assert r.quality == q.quality
     assert n > 0
 
 
-def test_sample_reads_randomly():
+def test_split_paired_reads_2_mixed_fq_require_pair():
+    # test input file
     infile = utils.get_temp_filename('test.fq')
+    shutil.copyfile(utils.get_test_data('paired-mixed.fq'), infile)
     in_dir = os.path.dirname(infile)
 
-    shutil.copyfile(utils.get_test_data('test-fastq-reads.fq'), infile)
+    script = scriptpath('split-paired-reads.py')
+    args = ['-p', infile]
 
-    script = scriptpath('sample-reads-randomly.py')
-    # fix random number seed for reproducibility
-    args = ['-N', '10', '-R', '1']
-    args.append(infile)
-    utils.runscript(script, args, in_dir)
+    status, out, err = utils.runscript(script, args, in_dir, fail_ok=True)
+    assert status == 1
+    assert "is not part of a pair" in err
 
-    outfile = infile + '.subset'
-    assert os.path.exists(outfile), outfile
 
-    seqs = set([r.name for r in screed.open(outfile)])
-    assert seqs == set(['895:1:1:1326:7273', '895:1:1:1373:4848',
-                        '895:1:1:1264:15854', '895:1:1:1338:15407',
-                        '895:1:1:1327:15301', '895:1:1:1265:2265',
-                        '895:1:1:1327:13028', '895:1:1:1368:4434',
-                        '895:1:1:1335:19932', '895:1:1:1340:19387'])
+def test_split_paired_reads_2_mixed_fq():
+    # test input file
+    infile = utils.get_temp_filename('test.fq')
+    shutil.copyfile(utils.get_test_data('paired-mixed-2.fq'), infile)
+    in_dir = os.path.dirname(infile)
 
+    script = scriptpath('split-paired-reads.py')
+    args = [infile]
 
-def test_fastq_to_fasta():
+    status, out, err = utils.runscript(script, args, in_dir)
+    assert status == 0
+    assert "split 11 sequences (7 left, 4 right)" in err, err
 
-    script = scriptpath('fastq-to-fasta.py')
-    clean_infile = utils.get_temp_filename('test-clean.fq')
-    n_infile = utils.get_temp_filename('test-n.fq')
 
-    shutil.copyfile(utils.get_test_data('test-fastq-reads.fq'), clean_infile)
-    shutil.copyfile(utils.get_test_data('test-fastq-n-reads.fq'), n_infile)
+def test_split_paired_reads_2_mixed_fq_broken_pairing_format():
+    # test input file
+    infile = utils.get_temp_filename('test.fq')
+    shutil.copyfile(utils.get_test_data('paired-mixed-broken.fq'), infile)
+    in_dir = os.path.dirname(infile)
 
-    clean_outfile = clean_infile + '.keep.fa'
-    n_outfile = n_infile + '.keep.fa'
+    script = scriptpath('split-paired-reads.py')
+    args = [infile]
 
-    in_dir = os.path.dirname(clean_infile)
-    in_dir_n = os.path.dirname(n_infile)
+    status, out, err = utils.runscript(script, args, in_dir, fail_ok=True)
+    assert status == 1
+    assert "Unrecognized format" in err
 
-    args = [clean_infile, '-n', '-o', clean_outfile]
-    (status, out, err) = utils.runscript(script, args, in_dir)
-    assert len(out.splitlines()) == 2, len(out.splitlines())
-    assert "No lines dropped" in err
 
-    args = [n_infile, '-n', '-o', n_outfile]
-    (status, out, err) = utils.runscript(script, args, in_dir_n)
-    assert len(out.splitlines()) == 2
-    assert "No lines dropped" in err
+def test_split_paired_reads_3_output_dir():
+    # test input file
+    infile = utils.get_test_data('paired.fq')
 
-    args = [clean_infile, '-o', clean_outfile]
-    (status, out, err) = utils.runscript(script, args, in_dir)
-    assert len(out.splitlines()) == 2
-    assert "0 lines dropped" in err
+    ex_outfile1 = utils.get_test_data('paired.fq.1')
+    ex_outfile2 = utils.get_test_data('paired.fq.2')
 
-    args = [n_infile, '-o', n_outfile]
-    (status, out, err) = utils.runscript(script, args, in_dir_n)
-    assert len(out.splitlines()) == 2, out
-    assert "4 lines dropped" in err, err
+    # actual output files...
+    outfile1 = utils.get_temp_filename('paired.fq.1')
+    output_dir = os.path.dirname(outfile1)
+    outfile2 = utils.get_temp_filename('paired.fq.2', output_dir)
 
-    args = [clean_infile]
-    (status, out, err) = utils.runscript(script, args, in_dir)
-    assert len(out.splitlines()) > 2
-    assert "0 lines dropped" in err
+    script = scriptpath('split-paired-reads.py')
+    args = ['--output-dir', output_dir, infile]
 
-    args = [n_infile]
-    (status, out, err) = utils.runscript(script, args, in_dir_n)
-    assert len(out.splitlines()) > 2
-    assert "4 lines dropped" in err
+    utils.runscript(script, args)
 
+    assert os.path.exists(outfile1), outfile1
+    assert os.path.exists(outfile2), outfile2
 
-def test_extract_long_sequences():
+    n = 0
+    for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)):
+        n += 1
+        assert r.name == q.name
+        assert r.sequence == q.sequence
+        assert r.quality == q.quality
+    assert n > 0
 
-    script = scriptpath('extract-long-sequences.py')
-    fq_infile = utils.get_temp_filename('test.fq')
-    fa_infile = utils.get_temp_filename('test.fa')
+    n = 0
+    for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)):
+        n += 1
+        assert r.name == q.name
+        assert r.sequence == q.sequence
+        assert r.quality == q.quality
+    assert n > 0
 
-    shutil.copyfile(utils.get_test_data('paired-mixed.fq'), fq_infile)
-    shutil.copyfile(utils.get_test_data('paired-mixed.fa'), fa_infile)
 
-    fq_outfile = fq_infile + '.keep.fq'
-    fa_outfile = fa_infile + '.keep.fa'
+def test_split_paired_reads_3_output_files():
+    # test input file
+    infile = utils.get_test_data('paired.fq')
 
-    in_dir_fq = os.path.dirname(fq_infile)
-    in_dir_fa = os.path.dirname(fa_infile)
+    ex_outfile1 = utils.get_test_data('paired.fq.1')
+    ex_outfile2 = utils.get_test_data('paired.fq.2')
 
-    args = [fq_infile, '-l', '10', '-o', 'fq_outfile']
-    (status, out, err) = utils.runscript(script, args, in_dir_fa)
+    # actual output files...
+    outfile1 = utils.get_temp_filename('xxx')
+    output_dir = os.path.dirname(outfile1)
+    outfile2 = utils.get_temp_filename('yyy', output_dir)
 
-    countlines = sum(1 for line in open(fq_infile))
-    assert countlines == 44, countlines
+    script = scriptpath('split-paired-reads.py')
+    args = ['-1', outfile1, '-2', outfile2, infile]
 
-    args = [fa_infile, '-l', '10', '-o', 'fa_outfile']
-    (status, out, err) = utils.runscript(script, args, in_dir_fa)
+    utils.runscript(script, args)
 
-    countlines = sum(1 for line in open(fa_infile))
-    assert countlines == 22, countlines
+    assert os.path.exists(outfile1), outfile1
+    assert os.path.exists(outfile2), outfile2
 
+    n = 0
+    for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)):
+        n += 1
+        assert r.name == q.name
+        assert r.sequence == q.sequence
+        assert r.quality == q.quality
+    assert n > 0
 
-def test_sample_reads_randomly_S():
-    infile = utils.get_temp_filename('test.fq')
-    in_dir = os.path.dirname(infile)
+    n = 0
+    for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)):
+        n += 1
+        assert r.name == q.name
+        assert r.sequence == q.sequence
+        assert r.quality == q.quality
+    assert n > 0
 
-    shutil.copyfile(utils.get_test_data('test-fastq-reads.fq'), infile)
 
-    script = scriptpath('sample-reads-randomly.py')
+def test_split_paired_reads_3_output_files_left():
+    # test input file
+    infile = utils.get_test_data('paired.fq')
 
-    # fix random number seed for reproducibility
-    args = ['-N', '10', '-R', '1', '-S', '3']
+    ex_outfile1 = utils.get_test_data('paired.fq.1')
+    ex_outfile2 = utils.get_test_data('paired.fq.2')
 
-    badargs = list(args)
-    badargs.extend(['-o', 'test', 'test.fq', 'test.fq'])
-    (status, out, err) = utils.runscript(script, badargs, in_dir, fail_ok=True)
-    assert status == 1, (status, out, err)
+    # actual output files...
+    outfile1 = utils.get_temp_filename('xxx')
+    output_dir = os.path.dirname(outfile1)
+    outfile2 = utils.get_temp_filename('paired.fq.2', output_dir)
 
-    args.append('test.fq')
+    script = scriptpath('split-paired-reads.py')
+    args = ['-o', output_dir, '-1', outfile1, infile]
 
-    utils.runscript(script, args, in_dir)
+    utils.runscript(script, args)
 
-    outfile = infile + '.subset.0'
-    assert os.path.exists(outfile), outfile
+    assert os.path.exists(outfile1), outfile1
+    assert os.path.exists(outfile2), outfile2
 
-    seqs = set([r.name for r in screed.open(outfile)])
-    print seqs
-    assert seqs == set(['895:1:1:1298:13380', '895:1:1:1347:3237',
-                        '895:1:1:1295:6189', '895:1:1:1342:11001',
-                        '895:1:1:1252:19493', '895:1:1:1318:10532',
-                        '895:1:1:1314:10430', '895:1:1:1347:8723',
-                        '895:1:1:1381:4958', '895:1:1:1338:6614'])
+    n = 0
+    for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)):
+        n += 1
+        assert r.name == q.name
+        assert r.sequence == q.sequence
+        assert r.quality == q.quality
+    assert n > 0
 
-    outfile = infile + '.subset.1'
-    assert os.path.exists(outfile), outfile
+    n = 0
+    for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)):
+        n += 1
+        assert r.name == q.name
+        assert r.sequence == q.sequence
+        assert r.quality == q.quality
+    assert n > 0
 
-    seqs = set([r.name for r in screed.open(outfile)])
-    print seqs
-    assert seqs == set(['895:1:1:1384:20217', '895:1:1:1347:3237',
-                        '895:1:1:1348:18672', '895:1:1:1290:11501',
-                        '895:1:1:1386:7536', '895:1:1:1373:13994',
-                        '895:1:1:1355:13535', '895:1:1:1303:6251',
-                        '895:1:1:1381:4958', '895:1:1:1338:6614'])
 
-    outfile = infile + '.subset.2'
-    assert os.path.exists(outfile), outfile
+def test_split_paired_reads_3_output_files_right():
+    # test input file
+    infile = utils.get_test_data('paired.fq')
 
-    seqs = set([r.name for r in screed.open(outfile)])
-    print seqs
-    assert seqs == set(['895:1:1:1326:7273', '895:1:1:1384:20217',
-                        '895:1:1:1347:3237', '895:1:1:1353:6642',
-                        '895:1:1:1340:19387', '895:1:1:1252:19493',
-                        '895:1:1:1381:7062', '895:1:1:1383:3089',
-                        '895:1:1:1342:20695', '895:1:1:1303:6251'])
+    ex_outfile1 = utils.get_test_data('paired.fq.1')
+    ex_outfile2 = utils.get_test_data('paired.fq.2')
 
+    # actual output files...
+    outfile1 = utils.get_temp_filename('paired.fq.1')
+    output_dir = os.path.dirname(outfile1)
+    outfile2 = utils.get_temp_filename('yyy', output_dir)
 
-def test_count_overlap():
-    seqfile1 = utils.get_temp_filename('test-overlap1.fa')
-    in_dir = os.path.dirname(seqfile1)
+    script = scriptpath('split-paired-reads.py')
+    args = ['-2', outfile2, '-o', output_dir, infile]
+
+    utils.runscript(script, args)
+
+    assert os.path.exists(outfile1), outfile1
+    assert os.path.exists(outfile2), outfile2
+
+    n = 0
+    for r, q in zip(screed.open(ex_outfile1), screed.open(outfile1)):
+        n += 1
+        assert r.name == q.name
+        assert r.sequence == q.sequence
+        assert r.quality == q.quality
+    assert n > 0
+
+    n = 0
+    for r, q in zip(screed.open(ex_outfile2), screed.open(outfile2)):
+        n += 1
+        assert r.name == q.name
+        assert r.sequence == q.sequence
+        assert r.quality == q.quality
+    assert n > 0
+
+
+def test_sample_reads_randomly():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-reads.fa'), infile)
+
+    script = scriptpath('sample-reads-randomly.py')
+    # fix random number seed for reproducibility
+    args = ['-N', '10', '-M', '12000', '-R', '1']
+    args.append(infile)
+    utils.runscript(script, args, in_dir)
+
+    outfile = infile + '.subset'
+    assert os.path.exists(outfile), outfile
+
+    seqs = set([r.name for r in screed.open(outfile)])
+    print list(sorted(seqs))
+
+    assert seqs == set(['850:2:1:1859:11742/1', '850:2:1:1859:11742/2',
+                        '850:2:1:2131:17360/1', '850:2:1:2131:17360/2',
+                        '850:2:1:2416:7565/1', '850:2:1:2416:7565/2',
+                        '850:2:1:2490:13491/1', '850:2:1:2490:13491/2',
+                        '850:2:1:2962:3999/1', '850:2:1:2962:3999/2',
+                        '850:2:1:3096:20321/1', '850:2:1:3096:20321/2',
+                        '850:2:1:3164:6414/1', '850:2:1:3164:6414/2',
+                        '850:2:1:3206:13876/1', '850:2:1:3206:13876/2',
+                        '850:2:1:3631:20919/1', '850:2:1:3631:20919/2',
+                        '850:2:1:3655:15581/1', '850:2:1:3655:15581/2'])
+
+
+def test_sample_reads_randomly_force_single():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-reads.fa'), infile)
+
+    script = scriptpath('sample-reads-randomly.py')
+    # fix random number seed for reproducibility
+    args = ['-N', '10', '-M', '12000', '-R', '1', '--force_single']
+    args.append(infile)
+    utils.runscript(script, args, in_dir)
+
+    outfile = infile + '.subset'
+    assert os.path.exists(outfile), outfile
+
+    seqs = set([r.name for r in screed.open(outfile)])
+    print list(sorted(seqs))
+    assert seqs == set(['850:2:1:2399:20086/2',
+                        '850:2:1:2273:13309/1',
+                        '850:2:1:2065:16816/1',
+                        '850:2:1:1984:7162/2',
+                        '850:2:1:2691:14602/1',
+                        '850:2:1:1762:5439/1',
+                        '850:2:1:2503:4494/2',
+                        '850:2:1:2263:11143/2',
+                        '850:2:1:1792:15774/2',
+                        '850:2:1:2084:17145/1'])
+
+
+def test_sample_reads_randomly_fq():
+    infile = utils.get_temp_filename('test.fq.gz')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-reads.fq.gz'), infile)
+
+    script = scriptpath('sample-reads-randomly.py')
+    # fix random number seed for reproducibility
+    args = ['-N', '10', '-M', '12000', '-R', '1']
+    args.append(infile)
+    utils.runscript(script, args, in_dir)
+
+    outfile = infile + '.subset'
+    assert os.path.exists(outfile), outfile
+
+    seqs = set([r.name for r in screed.open(outfile,
+                                            parse_description=False)])
+
+    print list(sorted(seqs))
+    assert seqs == set(['850:2:1:2399:20086/2',
+                        '850:2:1:1762:5439 1::FOO',
+                        '850:2:1:2065:16816/1',
+                        '850:2:1:2263:11143/2',
+                        '850:2:1:1792:15774/2',
+                        '850:2:1:2691:14602/1',
+                        '850:2:1:2503:4494 1::FOO',
+                        '850:2:1:2084:17145/1',
+                        '850:2:1:1984:7162 1::FOO',
+                        '850:2:1:2273:13309 1::FOO'])
+
+
+def test_fastq_to_fasta():
+
+    script = scriptpath('fastq-to-fasta.py')
+    clean_infile = utils.get_temp_filename('test-clean.fq')
+    n_infile = utils.get_temp_filename('test-n.fq')
+
+    shutil.copyfile(utils.get_test_data('test-fastq-reads.fq'), clean_infile)
+    shutil.copyfile(utils.get_test_data('test-fastq-n-reads.fq'), n_infile)
+
+    clean_outfile = clean_infile + '.keep.fa'
+    n_outfile = n_infile + '.keep.fa'
+
+    in_dir = os.path.dirname(clean_infile)
+    in_dir_n = os.path.dirname(n_infile)
+
+    args = [clean_infile, '-n', '-o', clean_outfile]
+    (status, out, err) = utils.runscript(script, args, in_dir)
+    assert len(out.splitlines()) == 2, len(out.splitlines())
+    assert "No lines dropped" in err
+
+    names = [r.name for r in screed.open(clean_outfile,
+                                         parse_description=False)]
+    assert '895:1:1:1246:14654 1:N:0:NNNNN' in names, names
+
+    args = [n_infile, '-n', '-o', n_outfile]
+    (status, out, err) = utils.runscript(script, args, in_dir_n)
+    assert len(out.splitlines()) == 2
+    assert "No lines dropped" in err
+
+    args = [clean_infile, '-o', clean_outfile]
+    (status, out, err) = utils.runscript(script, args, in_dir)
+    assert len(out.splitlines()) == 2
+    assert "0 lines dropped" in err
+
+    args = [n_infile, '-o', n_outfile]
+    (status, out, err) = utils.runscript(script, args, in_dir_n)
+    assert len(out.splitlines()) == 2, out
+    assert "4 lines dropped" in err, err
+
+    args = [clean_infile]
+    (status, out, err) = utils.runscript(script, args, in_dir)
+    assert len(out.splitlines()) > 2
+    assert "0 lines dropped" in err
+
+    args = [n_infile]
+    (status, out, err) = utils.runscript(script, args, in_dir_n)
+    assert len(out.splitlines()) > 2
+    assert "4 lines dropped" in err
+
+
+def test_extract_long_sequences_fa():
+
+    script = scriptpath('extract-long-sequences.py')
+    fa_infile = utils.get_temp_filename('test.fa')
+
+    shutil.copyfile(utils.get_test_data('paired-mixed.fa'), fa_infile)
+
+    fa_outfile = fa_infile + '.keep.fa'
+
+    in_dir_fa = os.path.dirname(fa_infile)
+
+    args = [fa_infile, '-l', '10', '-o', fa_outfile]
+    (status, out, err) = utils.runscript(script, args, in_dir_fa)
+
+    countlines = sum(1 for line in open(fa_outfile))
+    assert countlines == 22, countlines
+
+    names = [r.name for r in screed.open(fa_outfile, parse_description=False)]
+    assert "895:1:37:17593:9954/1" in names
+    assert "895:1:37:17593:9954/2" in names
+
+
+def test_extract_long_sequences_fq():
+
+    script = scriptpath('extract-long-sequences.py')
+    fq_infile = utils.get_temp_filename('test.fq')
+
+    shutil.copyfile(utils.get_test_data('paired-mixed.fq'), fq_infile)
+
+    fq_outfile = fq_infile + '.keep.fq'
+
+    in_dir_fq = os.path.dirname(fq_infile)
+
+    args = [fq_infile, '-l', '10', '-o', fq_outfile]
+    (status, out, err) = utils.runscript(script, args, in_dir_fq)
+
+    countlines = sum(1 for line in open(fq_outfile))
+    assert countlines == 44, countlines
+
+    names = [r.name for r in screed.open(fq_outfile, parse_description=False)]
+    assert "895:1:37:17593:9954 1::foo" in names
+    assert "895:1:37:17593:9954 2::foo" in names
+
+
+def test_sample_reads_randomly_S():
+    infile = utils.get_temp_filename('test.fq')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-fastq-reads.fq'), infile)
+
+    script = scriptpath('sample-reads-randomly.py')
+
+    # fix random number seed for reproducibility
+    args = ['-N', '10', '-R', '1', '-S', '3']
+
+    badargs = list(args)
+    badargs.extend(['-o', 'test', infile, infile])
+    (status, out, err) = utils.runscript(script, badargs, in_dir, fail_ok=True)
+    assert status == 1, (status, out, err)
+    assert "Error: cannot specify -o with more than one sample" in err
+
+    args.append(infile)
+
+    utils.runscript(script, args, in_dir)
+
+    outfile = infile + '.subset.0'
+    assert os.path.exists(outfile), outfile
+
+    seqs = set([r.name for r in screed.open(outfile)])
+    print list(sorted(seqs))
+
+    assert seqs == set(['895:1:1:1303:14389', '895:1:1:1347:3237',
+                        '895:1:1:1295:6189', '895:1:1:1308:20421',
+                        '895:1:1:1320:11648', '895:1:1:1352:5369',
+                        '895:1:1:1318:10532', '895:1:1:1363:11839',
+                        '895:1:1:1355:13535', '895:1:1:1349:15165'])
+
+    outfile = infile + '.subset.1'
+    assert os.path.exists(outfile), outfile
+
+    seqs = set([r.name for r in screed.open(outfile)])
+    print list(sorted(seqs))
+
+    assert seqs == set(['895:1:1:1303:14389', '895:1:1:1373:4848',
+                        '895:1:1:1357:19736', '895:1:1:1347:3237',
+                        '895:1:1:1338:7557', '895:1:1:1388:11093',
+                        '895:1:1:1296:1784', '895:1:1:1290:11501',
+                        '895:1:1:1355:13535', '895:1:1:1303:6251'])
+
+    outfile = infile + '.subset.2'
+    assert os.path.exists(outfile), outfile
+
+    seqs = set([r.name for r in screed.open(outfile)])
+    print list(sorted(seqs))
+
+    assert seqs == set(['895:1:1:1298:13380', '895:1:1:1348:18672',
+                        '895:1:1:1309:4153', '895:1:1:1252:19493',
+                        '895:1:1:1368:4434', '895:1:1:1348:1257',
+                        '895:1:1:1383:3089', '895:1:1:1355:13535',
+                        '895:1:1:1303:6251', '895:1:1:1349:15165'])
+
+
+def test_count_overlap_invalid_datafile():
+    seqfile1 = utils.get_temp_filename('test-overlap1.fa')
+    in_dir = os.path.dirname(seqfile1)
+    shutil.copy(utils.get_test_data('test-overlap1.fa'), seqfile1)
+    htfile = _make_graph(seqfile1, ksize=20)
+    outfile = utils.get_temp_filename('overlap.out', in_dir)
+    script = scriptpath('count-overlap.py')
+    args = ['--ksize', '20', '--n_tables', '2', '--min-tablesize', '10000000',
+            htfile + '.pt', htfile + '.pt', outfile]
+    (status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True)
+    assert "IOError" in err
+
+
+def test_count_overlap():
+    seqfile1 = utils.get_temp_filename('test-overlap1.fa')
+    in_dir = os.path.dirname(seqfile1)
     seqfile2 = utils.get_temp_filename('test-overlap2.fa', in_dir)
     outfile = utils.get_temp_filename('overlap.out', in_dir)
     curvefile = utils.get_temp_filename('overlap.out.curve', in_dir)
@@ -1718,6 +2474,33 @@ def test_count_overlap():
     assert '752053 238627' in data
 
 
+def test_count_overlap_csv():
+    seqfile1 = utils.get_temp_filename('test-overlap1.fa')
+    in_dir = os.path.dirname(seqfile1)
+    seqfile2 = utils.get_temp_filename('test-overlap2.fa', in_dir)
+    outfile = utils.get_temp_filename('overlap.out', in_dir)
+    curvefile = utils.get_temp_filename('overlap.out.curve', in_dir)
+    shutil.copy(utils.get_test_data('test-overlap1.fa'), seqfile1)
+    shutil.copy(utils.get_test_data('test-overlap2.fa'), seqfile2)
+    htfile = _make_graph(seqfile1, ksize=20)
+    script = scriptpath('count-overlap.py')
+    args = ['--ksize', '20', '--n_tables', '2', '--min-tablesize',
+            '10000000', '--csv', htfile + '.pt', seqfile2, outfile]
+    (status, out, err) = utils.runscript(script, args, in_dir)
+    assert status == 0
+    assert os.path.exists(outfile), outfile
+    data = [x.strip() for x in open(outfile)]
+    data = set(data)
+    assert '# of unique k-mers in dataset2: 759047' in data
+    assert '# of overlap unique k-mers: 245621' in data
+    assert os.path.exists(curvefile), curvefile
+    data = [x.strip() for x in open(curvefile)]
+    data = set(data)
+    assert '178633,1155' in data
+    assert '496285,2970' in data
+    assert '752053,238627' in data
+
+
 def execute_streaming_diginorm(ifilename):
     '''Helper function for the matrix of streaming tests for read_parser
     using diginorm, i.e. uncompressed fasta, gzip fasta, bz2 fasta,
@@ -1883,3 +2666,430 @@ def test_read_parser_streaming_bzfa():
 def test_read_parser_streaming_gzfa():
     # gzip compressed FASTA
     execute_load_graph_streaming(utils.get_test_data('random-20-a.fa.gz'))
+
+
+def test_readstats():
+    readstats_output = ("358 bp / 5 seqs; 71.6 average length",
+                        "916 bp / 11 seqs; 83.3 average length")
+
+    args = [utils.get_test_data("test-sweep-reads.fq"),
+            utils.get_test_data("paired-mixed.fq")]
+    status, out, err = utils.runscript('readstats.py', args)
+    assert status == 0
+
+    for k in readstats_output:
+        assert k in out, (k, out)
+
+
+def test_readstats_csv():
+    readstats_output = ("358,5,71.6," +
+                        utils.get_test_data("test-sweep-reads.fq"),
+                        "916,11,83.3," +
+                        utils.get_test_data("paired-mixed.fq"))
+
+    args = [utils.get_test_data("test-sweep-reads.fq"),
+            utils.get_test_data("paired-mixed.fq"),
+            '--csv']
+    status, out, err = utils.runscript('readstats.py', args)
+    assert status == 0
+
+    for k in readstats_output:
+        assert k in out, (k, out)
+
+
+def test_readstats_output():
+    readstats_output = ("358 bp / 5 seqs; 71.6 average length",
+                        "916 bp / 11 seqs; 83.3 average length")
+
+    outfile = utils.get_temp_filename('output.txt')
+    args = ["-o", outfile,
+            utils.get_test_data("test-sweep-reads.fq"),
+            utils.get_test_data("paired-mixed.fq")]
+
+    status, _, _ = utils.runscript('readstats.py', args)
+    assert status == 0
+
+    out = open(outfile).read()
+
+    for k in readstats_output:
+        assert k in out, (k, out)
+
+
+def test_readstats_empty():
+    expected_output = "No sequences found in 2 files"
+
+    args = [utils.get_test_data("test-empty.fa"),
+            utils.get_test_data("test-empty.fa.bz2")]
+
+    status, out, err = utils.runscript('readstats.py', args)
+    assert status == 0
+
+    assert expected_output in out
+
+
+def test_trim_low_abund_1():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
+
+    args = ["-k", "17", "-x", "1e7", "-N", "2", infile]
+    utils.runscript('trim-low-abund.py', args, in_dir)
+
+    outfile = infile + '.abundtrim'
+    assert os.path.exists(outfile), outfile
+
+    seqs = set([r.sequence for r in screed.open(outfile)])
+    assert len(seqs) == 1, seqs
+    assert 'GGTTGACGGGGCTCAGGG' in seqs
+
+
+def test_trim_low_abund_1_duplicate_filename_err():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
+
+    args = ["-k", "17", "-x", "1e7", "-N", "2", '-C', '1', infile, infile]
+    try:
+        utils.runscript('trim-low-abund.py', args, in_dir)
+        raise Exception("should not reach this")
+    except AssertionError:
+        # an error should be raised by passing 'infile' twice.
+        pass
+
+
+def test_trim_low_abund_2():
+    infile = utils.get_temp_filename('test.fa')
+    infile2 = utils.get_temp_filename('test2.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile2)
+
+    args = ["-k", "17", "-x", "1e7", "-N", "2", '-C', '1', infile, infile2]
+    utils.runscript('trim-low-abund.py', args, in_dir)
+
+    outfile = infile + '.abundtrim'
+    assert os.path.exists(outfile), outfile
+
+    seqs = set([r.sequence for r in screed.open(outfile)])
+    assert len(seqs) == 2, seqs
+    assert 'GGTTGACGGGGCTCAGGG' in seqs
+
+# make sure that FASTQ records are retained.
+
+
+def test_trim_low_abund_3_fq_retained():
+    infile = utils.get_temp_filename('test.fq')
+    infile2 = utils.get_temp_filename('test2.fq')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fq'), infile)
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fq'), infile2)
+
+    args = ["-k", "17", "-x", "1e7", "-N", "2", '-C', '1', infile, infile2]
+    utils.runscript('trim-low-abund.py', args, in_dir)
+
+    outfile = infile + '.abundtrim'
+    assert os.path.exists(outfile), outfile
+
+    seqs = set([r.sequence for r in screed.open(outfile)])
+    assert len(seqs) == 2, seqs
+    assert 'GGTTGACGGGGCTCAGGG' in seqs
+
+    # check for 'quality' string.
+    seqs = set([r.quality for r in screed.open(outfile)])
+    assert len(seqs) == 2, seqs
+    assert '##################' in seqs
+
+
+# test that the -V option does not trim sequences that are low abundance
+
+
+def test_trim_low_abund_4_retain_low_abund():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
+
+    args = ["-k", "17", "-x", "1e7", "-N", "2", '-V', infile]
+    utils.runscript('trim-low-abund.py', args, in_dir)
+
+    outfile = infile + '.abundtrim'
+    assert os.path.exists(outfile), outfile
+
+    seqs = set([r.sequence for r in screed.open(outfile)])
+    assert len(seqs) == 2, seqs
+    assert 'GGTTGACGGGGCTCAGGG' in seqs
+
+# test that the -V option *does* trim sequences that are low abundance
+
+
+def test_trim_low_abund_5_trim_high_abund():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-3.fa'), infile)
+
+    args = ["-k", "17", "-x", "1e7", "-N", "2", '-V', infile]
+    utils.runscript('trim-low-abund.py', args, in_dir)
+
+    outfile = infile + '.abundtrim'
+    assert os.path.exists(outfile), outfile
+
+    seqs = set([r.sequence for r in screed.open(outfile)])
+    assert len(seqs) == 2, seqs
+
+    # trimmed sequence @ error
+    assert 'GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGC' in seqs
+
+# test that -V/-Z setting - should not trip if -Z is set high enough.
+
+
+def test_trim_low_abund_6_trim_high_abund_Z():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-3.fa'), infile)
+
+    args = ["-k", "17", "-x", "1e7", "-N", "2", '-V', '-Z', '25', infile]
+    utils.runscript('trim-low-abund.py', args, in_dir)
+
+    outfile = infile + '.abundtrim'
+    assert os.path.exists(outfile), outfile
+
+    seqs = set([r.sequence for r in screed.open(outfile)])
+    assert len(seqs) == 2, seqs
+
+    # untrimmed seq.
+    badseq = 'GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCgtgCCGCAGCTGTCGTCAGGG' \
+             'GATTTCCGGGCGG'
+    assert badseq in seqs       # should be there, untrimmed
+
+
+def test_trim_low_abund_keep_paired():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.paired.fq'), infile)
+
+    args = ["-k", "17", "-x", "1e7", "-N", "2", "-V", infile]
+    utils.runscript('trim-low-abund.py', args, in_dir)
+
+    outfile = infile + '.abundtrim'
+    assert os.path.exists(outfile), outfile
+
+    seqs = [r.name for r in screed.open(outfile)]
+    assert seqs[-2:] == ['pair/1', 'pair/2'], seqs
+
+
+def test_trim_low_abund_keep_paired_casava18():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.paired2.fq'),
+                    infile)
+
+    args = ["-k", "17", "-x", "1e7", "-N", "2", "-V", infile]
+    utils.runscript('trim-low-abund.py', args, in_dir)
+
+    outfile = infile + '.abundtrim'
+    assert os.path.exists(outfile), outfile
+
+    seqs = [r.name for r in screed.open(outfile, parse_description=False)]
+    assert seqs[-2:] == ['pair:foo 1::N', 'pair:foo 2::N'], seqs
+
+
+def test_trim_low_abund_highfpr():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.paired.fq'), infile)
+
+    args = ["-k", "17", "-x", "1", "-N", "1", "-V", infile]
+    code, out, err = utils.runscript('trim-low-abund.py', args, in_dir,
+                                     fail_ok=True)
+
+    assert code == 1
+    assert '** ERROR: the graph structure is too small' in err, err
+
+
+def test_trim_low_abund_trimtest():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.paired.fq'), infile)
+
+    args = ["-k", "17", "-x", "1e7", "-N", "2", "-Z", "2", "-C", "1",
+            "-V", infile]
+    utils.runscript('trim-low-abund.py', args, in_dir)
+
+    outfile = infile + '.abundtrim'
+    assert os.path.exists(outfile), outfile
+
+    for record in screed.open(outfile):
+        if record.name == 'seqtrim/1':
+            print record.name, record.sequence
+            assert record.sequence == \
+                'GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCC'
+        elif record.name == 'seqtrim/2':
+            print record.name, record.sequence
+            assert record.sequence == \
+                'GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGC'
+        elif record.name == 'seqtrim2/1':
+            print record.name, record.sequence
+            assert record.sequence == \
+                'GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCA'
+
+
+def test_trim_low_abund_trimtest_after_load():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    saved_table = utils.get_temp_filename('save.ct')
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.paired.fq'), infile)
+
+    args = ["-k", "17", "-x", "1e7", "-N", "2", saved_table, infile]
+    utils.runscript('load-into-counting.py', args, in_dir)
+
+    args = ["-Z", "2", "-C", "2", "-V", '--loadtable', saved_table, infile]
+    utils.runscript('trim-low-abund.py', args, in_dir)
+
+    outfile = infile + '.abundtrim'
+    assert os.path.exists(outfile), outfile
+
+    for record in screed.open(outfile):
+        if record.name == 'seqtrim/1':
+            print record.name, record.sequence
+            assert record.sequence == \
+                'GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCC'
+        elif record.name == 'seqtrim/2':
+            print record.name, record.sequence
+            assert record.sequence == \
+                'GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGC'
+        elif record.name == 'seqtrim2/1':
+            print record.name, record.sequence
+            assert record.sequence == \
+                'GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCA'
+
+
+def test_trim_low_abund_trimtest_savetable():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    saved_table = utils.get_temp_filename('save.ct')
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.paired.fq'), infile)
+
+    args = ["-k", "17", "-x", "1e7", "-N", "2",
+            "-Z", "2", "-C", "2", "-V", '--savetable', saved_table, infile]
+    utils.runscript('trim-low-abund.py', args, in_dir)
+
+    outfile = infile + '.abundtrim'
+    assert os.path.exists(outfile), outfile
+    assert os.path.exists(saved_table)
+
+    for record in screed.open(outfile):
+        if record.name == 'seqtrim/1':
+            print record.name, record.sequence
+            assert record.sequence == \
+                'GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCC'
+        elif record.name == 'seqtrim/2':
+            print record.name, record.sequence
+            assert record.sequence == \
+                'GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGC'
+        elif record.name == 'seqtrim2/1':
+            print record.name, record.sequence
+            assert record.sequence == \
+                'GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCA'
+
+# test that -o/--out option outputs to STDOUT
+
+
+def test_trim_low_abund_stdout():
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile)
+
+    args = ["-k", "17", "-x", "1e7", "-N", "2", infile, "-o", "-"]
+    _, out, err = utils.runscript('trim-low-abund.py', args, in_dir)
+
+    assert 'GGTTGACGGGGCTCAGGG' in out
+
+
+def test_roundtrip_casava_format_1():
+    # check to make sure that extract-paired-reads produces a file identical
+    # to the input file when only paired data is given.
+
+    infile = utils.get_temp_filename('test.fq')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('casava_18-pe.fq'), infile)
+
+    _, out, err = utils.runscript('extract-paired-reads.py', [infile], in_dir)
+
+    r = open(infile).read()
+
+    outfile = infile + '.pe'
+    r2 = open(outfile).read()
+    assert r == r2, (r, r2)
+
+
+def test_roundtrip_casava_format_2():
+    # check that split-paired-reads -> interleave-reads produces a file
+    # identical to input, when only paired reads are given.
+
+    infile = utils.get_temp_filename('test.fq')
+    outfile = utils.get_temp_filename('test2.fq')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('casava_18-pe.fq'), infile)
+
+    _, out, err = utils.runscript('split-paired-reads.py', [infile], in_dir)
+
+    utils.runscript('interleave-reads.py', [infile + '.1',
+                                            infile + '.2',
+                                            '-o', outfile], in_dir)
+
+    r = open(infile).read()
+    r2 = open(outfile).read()
+    assert r == r2, (r, r2)
+
+
+def test_existance_failure():
+    expected_output = 'ERROR: Input file'
+
+    args = [utils.get_temp_filename('thisfiledoesnotexistatall')]
+
+    status, out, err = utils.runscript(
+        'extract-paired-reads.py', args, fail_ok=True)
+    assert status == 1
+
+    assert expected_output in err
+
+
+def test_roundtrip_commented_format():
+    """Split/interleave roundtrip for old style format with comments (#873).
+
+    This should produce a file identical to the input when only paired
+    reads are given.
+    """
+    infile = utils.get_temp_filename('test.fq')
+    outfile = utils.get_temp_filename('test2.fq')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('old-style-format-w-comments.fq'),
+                    infile)
+
+    _, out, err = utils.runscript('split-paired-reads.py', [infile], in_dir)
+
+    utils.runscript('interleave-reads.py', [infile + '.1',
+                                            infile + '.2',
+                                            '-o', outfile], in_dir)
+
+    r = open(infile).read()
+    r2 = open(outfile).read()
+    assert r == r2, (r, r2)
diff --git a/tests/test_subset_graph.py b/tests/test_subset_graph.py
index bfcb956..97f9ba6 100644
--- a/tests/test_subset_graph.py
+++ b/tests/test_subset_graph.py
@@ -1,7 +1,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 # pylint: disable=missing-docstring
@@ -9,7 +9,7 @@ import khmer
 import screed
 
 import khmer_tst_utils as utils
-from nose.plugins.attrib import attr
+import os
 
 
 def teardown():
@@ -26,7 +26,9 @@ class Test_RandomData(object):
         (total_reads, total_kmers) = ht.consume_fasta_and_tag(filename)
         assert total_reads == 3, total_reads
 
-        (a, b, c) = ht.divide_tags_into_subsets(1)
+        divvy = ht.divide_tags_into_subsets(1)
+        assert len(divvy) is 3
+        (a, b, c) = divvy
 
         x = ht.do_subset_partition(a, a)
         ht.merge_subset(x)
@@ -45,7 +47,9 @@ class Test_RandomData(object):
         (total_reads, total_kmers) = ht.consume_fasta_and_tag(filename)
         assert total_reads == 3, total_reads
 
-        (a, b, c) = ht.divide_tags_into_subsets(1)
+        divvy = ht.divide_tags_into_subsets(1)
+        assert len(divvy) is 3
+        (a, b, c) = divvy
 
         x = ht.do_subset_partition(b, c)
         ht.merge_subset(x)
@@ -83,7 +87,7 @@ class Test_RandomData(object):
 
         total_reads, _ = ht.consume_fasta_and_tag(filename)
 
-        subset_size = total_reads / 2 + total_reads % 2
+        subset_size = total_reads // 2 + total_reads % 2
         divvy = ht.divide_tags_into_subsets(subset_size)
         assert len(divvy) == 4
 
@@ -102,7 +106,7 @@ class Test_RandomData(object):
 
         total_reads, _ = ht.consume_fasta_and_tag(filename)
 
-        subset_size = total_reads / 2 + total_reads % 2
+        subset_size = total_reads // 2 + total_reads % 2
         divvy = ht.divide_tags_into_subsets(subset_size)
         assert len(divvy) == 4
 
@@ -121,7 +125,7 @@ class Test_RandomData(object):
 
         total_reads, _ = ht.consume_fasta_and_tag(filename)
 
-        subset_size = total_reads / 2 + total_reads % 2
+        subset_size = total_reads // 2 + total_reads % 2
         divvy = ht.divide_tags_into_subsets(subset_size)
         assert len(divvy) == 4, len(divvy)
 
@@ -202,6 +206,7 @@ class Test_SaveLoadPmap(object):
 
         divvy = ht.divide_tags_into_subsets(1)
         print divvy
+        assert len(divvy) is 3
         (a, b, c) = divvy
 
         outfile1 = utils.get_temp_filename('x.pmap')
@@ -231,7 +236,7 @@ class Test_SaveLoadPmap(object):
 
         (total_reads, total_kmers) = ht.consume_fasta_and_tag(filename)
 
-        subset_size = total_reads / 2 + total_reads % 2
+        subset_size = total_reads // 2 + total_reads % 2
         divvy = ht.divide_tags_into_subsets(subset_size)
 
         outfile1 = utils.get_temp_filename('x.pmap')
@@ -245,6 +250,8 @@ class Test_SaveLoadPmap(object):
         ht.save_subset_partitionmap(y, outfile2)
         del y
 
+        assert os.path.exists(outfile1)
+        assert os.path.exists(outfile2)
         a = ht.load_subset_partitionmap(outfile1)
         b = ht.load_subset_partitionmap(outfile2)
 
@@ -272,6 +279,7 @@ class Test_SaveLoadPmap(object):
 
         divvy = ht.divide_tags_into_subsets(1)
         print divvy
+        assert len(divvy) is 3
         (a, b, c) = divvy
 
         outfile1 = utils.get_temp_filename('x.pmap')
@@ -298,7 +306,7 @@ class Test_SaveLoadPmap(object):
 
         (total_reads, total_kmers) = ht.consume_fasta_and_tag(filename)
 
-        subset_size = total_reads / 2 + total_reads % 2
+        subset_size = total_reads // 2 + total_reads % 2
         divvy = ht.divide_tags_into_subsets(subset_size)
 
         outfile1 = utils.get_temp_filename('x.pmap')
@@ -312,6 +320,8 @@ class Test_SaveLoadPmap(object):
         ht.save_subset_partitionmap(y, outfile2)
         del y
 
+        assert os.path.exists(outfile1)
+        assert os.path.exists(outfile2)
         ht.merge_subset_from_disk(outfile1)
         ht.merge_subset_from_disk(outfile2)
 
@@ -328,6 +338,7 @@ class Test_SaveLoadPmap(object):
 
         divvy = ht.divide_tags_into_subsets(1)
         print divvy
+        assert len(divvy) is 3
         (a, b, c) = divvy
 
         outfile1 = utils.get_temp_filename('x.pmap')
@@ -369,6 +380,7 @@ class Test_SaveLoadPmap(object):
 
         divvy = ht.divide_tags_into_subsets(1)
         print divvy
+        assert len(divvy) is 3
         (a, b, c) = divvy
 
         outfile1 = utils.get_temp_filename('x.pmap')
diff --git a/tests/test_threaded_sequence_processor.py b/tests/test_threaded_sequence_processor.py
index 6f20384..5aac0f4 100644
--- a/tests/test_threaded_sequence_processor.py
+++ b/tests/test_threaded_sequence_processor.py
@@ -60,15 +60,15 @@ def test_basic():
 def test_basic_fastq_like():
     tsp = ThreadedSequenceProcessor(idem, 1, 1, verbose=False)
 
-    input = [dict(name='a', sequence='AAA', accuracy='###'),
-             dict(name='b', sequence='TTT', accuracy='###'), ]
+    input = [dict(name='a', sequence='AAA', quality='###'),
+             dict(name='b', sequence='TTT', quality='###'), ]
     outfp = StringIO()
 
     tsp.start(input, outfp)
 
     x = load_records_fastq(outfp)
     for i in x:
-        assert i['accuracy'] == '###'
+        assert i['quality'] == '###'
 
 
 def test_odd():
diff --git a/tests/test_version.py b/tests/test_version.py
index 55f0c99..775edbe 100644
--- a/tests/test_version.py
+++ b/tests/test_version.py
@@ -1,7 +1,7 @@
 #
 # This file is part of khmer, http://github.com/ged-lab/khmer/, and is
 # Copyright (C) Michigan State University, 2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project at idyll.org
 #
 import khmer
diff --git a/third-party/.gitignore b/third-party/.gitignore
new file mode 100644
index 0000000..2ced0cd
--- /dev/null
+++ b/third-party/.gitignore
@@ -0,0 +1,6 @@
+zlib/example
+zlib/example64
+zlib/examplesh
+zlib/minigzip
+zlib/minigzip64
+zlib/minigzipsh
diff --git a/third-party/smhasher/MurmurHash3.cc b/third-party/smhasher/MurmurHash3.cc
new file mode 100644
index 0000000..6804a66
--- /dev/null
+++ b/third-party/smhasher/MurmurHash3.cc
@@ -0,0 +1,147 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#include "MurmurHash3.h"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE	__forceinline
+
+#include <stdlib.h>
+
+#define ROTL64(x,y)	_rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#define	FORCE_INLINE inline __attribute__((always_inline))
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+  return (x << r) | (x >> (64 - r));
+}
+
+#define ROTL64(x,y)	rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
+{
+  return p[i];
+}
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+FORCE_INLINE uint64_t fmix64 ( uint64_t k )
+{
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+  k ^= k >> 33;
+
+  return k;
+}
+
+void MurmurHash3_x64_128 ( const void * key, size_t len,
+                           const uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const size_t nblocks = len / 16;
+
+  uint64_t h1 = seed;
+  uint64_t h2 = seed;
+
+  const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+  const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+
+  //----------
+  // body
+
+  const uint64_t * blocks = (const uint64_t *)(data);
+
+  for(size_t i = 0; i < nblocks; i++)
+  {
+    uint64_t k1 = getblock64(blocks,i*2+0);
+    uint64_t k2 = getblock64(blocks,i*2+1);
+
+    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint64_t k1 = 0;
+  uint64_t k2 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k2 ^= ((uint64_t)tail[14]) << 48;
+  case 14: k2 ^= ((uint64_t)tail[13]) << 40;
+  case 13: k2 ^= ((uint64_t)tail[12]) << 32;
+  case 12: k2 ^= ((uint64_t)tail[11]) << 24;
+  case 11: k2 ^= ((uint64_t)tail[10]) << 16;
+  case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;
+  case  9: k2 ^= ((uint64_t)tail[ 8]) << 0;
+           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+  case  8: k1 ^= ((uint64_t)tail[ 7]) << 56;
+  case  7: k1 ^= ((uint64_t)tail[ 6]) << 48;
+  case  6: k1 ^= ((uint64_t)tail[ 5]) << 40;
+  case  5: k1 ^= ((uint64_t)tail[ 4]) << 32;
+  case  4: k1 ^= ((uint64_t)tail[ 3]) << 24;
+  case  3: k1 ^= ((uint64_t)tail[ 2]) << 16;
+  case  2: k1 ^= ((uint64_t)tail[ 1]) << 8;
+  case  1: k1 ^= ((uint64_t)tail[ 0]) << 0;
+           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len;
+
+  h1 += h2;
+  h2 += h1;
+
+  h1 = fmix64(h1);
+  h2 = fmix64(h2);
+
+  h1 += h2;
+  h2 += h1;
+
+  ((uint64_t*)out)[0] = h1;
+  ((uint64_t*)out)[1] = h2;
+}
+
+//-----------------------------------------------------------------------------
+
diff --git a/third-party/smhasher/MurmurHash3.h b/third-party/smhasher/MurmurHash3.h
new file mode 100644
index 0000000..780ff90
--- /dev/null
+++ b/third-party/smhasher/MurmurHash3.h
@@ -0,0 +1,34 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER) && (_MSC_VER < 1600)
+
+typedef unsigned char uint8_t;
+typedef unsigned int uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+#include <cstddef>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x64_128 ( const void * key, size_t len, uint32_t seed, void * out );
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH3_H_
diff --git a/versioneer.py b/versioneer.py
index 481180d..c00770f 100644
--- a/versioneer.py
+++ b/versioneer.py
@@ -1,5 +1,5 @@
 
-# Version: 0.12
+# Version: 0.14
 
 """
 The Versioneer
@@ -10,8 +10,12 @@ The Versioneer
 * Brian Warner
 * License: Public Domain
 * Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, and pypy
-
-[![Build Status](https://travis-ci.org/warner/python-versioneer.png?branch=master)](https://travis-ci.org/warner/python-versioneer)
+* [![Latest Version]
+(https://pypip.in/version/versioneer/badge.svg?style=flat)
+](https://pypi.python.org/pypi/versioneer/)
+* [![Build Status]
+(https://travis-ci.org/warner/python-versioneer.png?branch=master)
+](https://travis-ci.org/warner/python-versioneer)
 
 This is a tool for managing a recorded version number in distutils-based
 python projects. The goal is to remove the tedious and error-prone "update
@@ -103,7 +107,7 @@ First, decide on values for the following configuration variables:
   append some `__version__`-setting assignments, if they aren't already
   present.
 
-*  `versionfile_build`:
+* `versionfile_build`:
 
   Like `versionfile_source`, but relative to the build directory instead of
   the source directory. These will differ when your setup.py uses
@@ -142,12 +146,14 @@ To versioneer-enable your project:
 * 2: add the following lines to the top of your `setup.py`, with the
   configuration values you decided earlier:
 
-        import versioneer
-        versioneer.VCS = 'git'
-        versioneer.versionfile_source = 'src/myproject/_version.py'
-        versioneer.versionfile_build = 'myproject/_version.py'
-        versioneer.tag_prefix = '' # tags are like 1.2.0
-        versioneer.parentdir_prefix = 'myproject-' # dirname like 'myproject-1.2.0'
+  ````
+  import versioneer
+  versioneer.VCS = 'git'
+  versioneer.versionfile_source = 'src/myproject/_version.py'
+  versioneer.versionfile_build = 'myproject/_version.py'
+  versioneer.tag_prefix = '' # tags are like 1.2.0
+  versioneer.parentdir_prefix = 'myproject-' # dirname like 'myproject-1.2.0'
+  ````
 
 * 3: add the following arguments to the setup() call in your setup.py:
 
@@ -195,17 +201,19 @@ import the top-level `versioneer.py` and run `get_versions()`.
 Both functions return a dictionary with different keys for different flavors
 of the version string:
 
-* `['version']`: condensed tag+distance+shortid+dirty identifier. For git,
-  this uses the output of `git describe --tags --dirty --always` but strips
-  the tag_prefix. For example "0.11-2-g1076c97-dirty" indicates that the tree
-  is like the "1076c97" commit but has uncommitted changes ("-dirty"), and
-  that this commit is two revisions ("-2-") beyond the "0.11" tag. For
-  released software (exactly equal to a known tag), the identifier will only
-  contain the stripped tag, e.g. "0.11".
+* `['version']`: A condensed PEP440-compliant string, equal to the
+  un-prefixed tag name for actual releases, and containing an additional
+  "local version" section with more detail for in-between builds. For Git,
+  this is TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe
+  --tags --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates
+  that the tree is like the "1076c97" commit but has uncommitted changes
+  (".dirty"), and that this commit is two revisions ("+2") beyond the "0.11"
+  tag. For released software (exactly equal to a known tag), the identifier
+  will only contain the stripped tag, e.g. "0.11".
 
 * `['full']`: detailed revision identifier. For Git, this is the full SHA1
-  commit id, followed by "-dirty" if the tree contains uncommitted changes,
-  e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac-dirty".
+  commit id, followed by ".dirty" if the tree contains uncommitted changes,
+  e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac.dirty".
 
 Some variants are more useful than others. Including `full` in a bug report
 should allow developers to reconstruct the exact code being tested (or
@@ -214,13 +222,6 @@ developers). `version` is suitable for display in an "about" box or a CLI
 `--version` output: it can be easily compared against release notes and lists
 of bugs fixed in various releases.
 
-In the future, this will also include a
-[PEP-0440](http://legacy.python.org/dev/peps/pep-0440/) -compatible flavor
-(e.g. `1.2.post0.dev123`). This loses a lot of information (and has no room
-for a hash-based revision id), but is safe to use in a `setup.py`
-"`version=`" argument. It also enables tools like *pip* to compare version
-strings and evaluate compatibility constraint declarations.
-
 The `setup.py versioneer` command adds the following text to your
 `__init__.py` to place a basic version in `YOURPROJECT.__version__`:
 
@@ -250,6 +251,14 @@ systems (SVN, etc) in the future.
 
 Nothing special.
 
+## Upgrading to 0.14
+
+0.14 changes the format of the version string. 0.13 and earlier used
+hyphen-separated strings like "0.11-2-g1076c97-dirty". 0.14 and beyond use a
+plus-separated "local version" section strings, with dot-separated
+components, like "0.11+2.g1076c97". PEP440-strict tools did not like the old
+format, but should be ok with the new one.
+
 ## Future Directions
 
 This tool is designed to make it easily extended to other version-control
@@ -272,10 +281,14 @@ domain.
 
 """
 
-import os, sys, re, subprocess, errno
-from distutils.core import Command
-from distutils.command.sdist import sdist as _sdist
+import errno
+import os
+import re
+import subprocess
+import sys
 from distutils.command.build import build as _build
+from distutils.command.sdist import sdist as _sdist
+from distutils.core import Command
 
 # these configuration settings will be overridden by setup.py after it
 # imports us
@@ -288,6 +301,7 @@ VCS = None
 # these dictionaries contain VCS-specific tools
 LONG_VERSION_PY = {}
 
+
 def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
     assert isinstance(commands, list)
     p = None
@@ -311,14 +325,13 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
             print("unable to find command, tried %s" % (commands,))
         return None
     stdout = p.communicate()[0].strip()
-    if sys.version >= '3':
+    if sys.version_info[0] >= 3:
         stdout = stdout.decode()
     if p.returncode != 0:
         if verbose:
             print("unable to run %s (error)" % args[0])
         return None
     return stdout
-
 LONG_VERSION_PY['git'] = '''
 # This file helps to compute a version number in source trees obtained from
 # git-archive tarball (such as those provided by githubs download-from-tag
@@ -327,7 +340,13 @@ LONG_VERSION_PY['git'] = '''
 # that just contains the computed version number.
 
 # This file is released into the public domain. Generated by
-# versioneer-0.12 (https://github.com/warner/python-versioneer)
+# versioneer-0.14 (https://github.com/warner/python-versioneer)
+
+import errno
+import os
+import re
+import subprocess
+import sys
 
 # these strings will be replaced by git during git-archive
 git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
@@ -338,7 +357,6 @@ tag_prefix = "%(TAG_PREFIX)s"
 parentdir_prefix = "%(PARENTDIR_PREFIX)s"
 versionfile_source = "%(VERSIONFILE_SOURCE)s"
 
-import os, sys, re, subprocess, errno
 
 def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
     assert isinstance(commands, list)
@@ -363,7 +381,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
             print("unable to find command, tried %%s" %% (commands,))
         return None
     stdout = p.communicate()[0].strip()
-    if sys.version >= '3':
+    if sys.version_info[0] >= 3:
         stdout = stdout.decode()
     if p.returncode != 0:
         if verbose:
@@ -378,11 +396,12 @@ def versions_from_parentdir(parentdir_prefix, root, verbose=False):
     dirname = os.path.basename(root)
     if not dirname.startswith(parentdir_prefix):
         if verbose:
-            print("guessing rootdir is '%%s', but '%%s' doesn't start with prefix '%%s'" %%
-                  (root, dirname, parentdir_prefix))
+            print("guessing rootdir is '%%s', but '%%s' doesn't start with "
+                  "prefix '%%s'" %% (root, dirname, parentdir_prefix))
         return None
     return {"version": dirname[len(parentdir_prefix):], "full": ""}
 
+
 def git_get_keywords(versionfile_abs):
     # the code embedded in _version.py can just fetch the value of these
     # keywords. When used from setup.py, we don't want to import _version.py,
@@ -390,7 +409,7 @@ def git_get_keywords(versionfile_abs):
     # _version.py.
     keywords = {}
     try:
-        f = open(versionfile_abs,"r")
+        f = open(versionfile_abs, "r")
         for line in f.readlines():
             if line.strip().startswith("git_refnames ="):
                 mo = re.search(r'=\s*"(.*)"', line)
@@ -405,14 +424,15 @@ def git_get_keywords(versionfile_abs):
         pass
     return keywords
 
+
 def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
     if not keywords:
-        return {} # keyword-finding function failed to find keywords
+        return {}  # keyword-finding function failed to find keywords
     refnames = keywords["refnames"].strip()
     if refnames.startswith("$Format"):
         if verbose:
             print("keywords are unexpanded, not using")
-        return {} # unexpanded, so not in an unpacked git-archive tarball
+        return {}  # unexpanded, so not in an unpacked git-archive tarball
     refs = set([r.strip() for r in refnames.strip("()").split(",")])
     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
@@ -437,13 +457,59 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
             r = ref[len(tag_prefix):]
             if verbose:
                 print("picking %%s" %% r)
-            return { "version": r,
-                     "full": keywords["full"].strip() }
-    # no suitable tags, so we use the full revision id
+            return {"version": r,
+                    "full": keywords["full"].strip()}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
     if verbose:
-        print("no suitable tags, using full revision id")
-    return { "version": keywords["full"].strip(),
-             "full": keywords["full"].strip() }
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full": keywords["full"].strip()}
+
+
+def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False):
+    # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens.
+
+    # dirty
+    dirty = git_describe.endswith("-dirty")
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+    dirty_suffix = ".dirty" if dirty else ""
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" not in git_describe:  # just HEX
+        return "0+untagged.g"+git_describe+dirty_suffix, dirty
+
+    # just TAG-NUM-gHEX
+    mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+    if not mo:
+        # unparseable. Maybe git-describe is misbehaving?
+        return "0+unparseable"+dirty_suffix, dirty
+
+    # tag
+    full_tag = mo.group(1)
+    if not full_tag.startswith(tag_prefix):
+        if verbose:
+            fmt = "tag '%%s' doesn't start with prefix '%%s'"
+            print(fmt %% (full_tag, tag_prefix))
+        return None, dirty
+    tag = full_tag[len(tag_prefix):]
+
+    # distance: number of commits since tag
+    distance = int(mo.group(2))
+
+    # commit: short hex revision ID
+    commit = mo.group(3)
+
+    # now build up version string, with post-release "local version
+    # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a
+    # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you
+    # can always test version.endswith(".dirty").
+    version = tag
+    if distance or dirty:
+        version += "+%%d.g%%s" %% (distance, commit) + dirty_suffix
+
+    return version, dirty
 
 
 def git_versions_from_vcs(tag_prefix, root, verbose=False):
@@ -455,46 +521,49 @@ def git_versions_from_vcs(tag_prefix, root, verbose=False):
     if not os.path.exists(os.path.join(root, ".git")):
         if verbose:
             print("no .git in %%s" %% root)
-        return {}
+        return {}  # get_versions() will try next method
 
     GITS = ["git"]
     if sys.platform == "win32":
         GITS = ["git.cmd", "git.exe"]
-    stdout = run_command(GITS, ["describe", "--tags", "--dirty", "--always"],
+    # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
+    # if there are no tags, this yields HEX[-dirty] (no NUM)
+    stdout = run_command(GITS, ["describe", "--tags", "--dirty",
+                                "--always", "--long"],
                          cwd=root)
+    # --long was added in git-1.5.5
     if stdout is None:
-        return {}
-    if not stdout.startswith(tag_prefix):
-        if verbose:
-            print("tag '%%s' doesn't start with prefix '%%s'" %% (stdout, tag_prefix))
-        return {}
-    tag = stdout[len(tag_prefix):]
+        return {}  # try next method
+    version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose)
+
+    # build "full", which is FULLHEX[.dirty]
     stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
     if stdout is None:
         return {}
     full = stdout.strip()
-    if tag.endswith("-dirty"):
-        full += "-dirty"
-    return {"version": tag, "full": full}
+    if dirty:
+        full += ".dirty"
 
+    return {"version": version, "full": full}
 
-def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
+
+def get_versions(default={"version": "0+unknown", "full": ""}, verbose=False):
     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
     # __file__, we can work backwards from there to the root. Some
     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
     # case we can only use expanded keywords.
 
-    keywords = { "refnames": git_refnames, "full": git_full }
+    keywords = {"refnames": git_refnames, "full": git_full}
     ver = git_versions_from_keywords(keywords, tag_prefix, verbose)
     if ver:
         return ver
 
     try:
-        root = os.path.abspath(__file__)
+        root = os.path.realpath(__file__)
         # versionfile_source is the relative path from the top of the source
         # tree (where the .git directory might live) to this file. Invert
         # this to find the root from __file__.
-        for i in range(len(versionfile_source.split(os.sep))):
+        for i in versionfile_source.split('/'):
             root = os.path.dirname(root)
     except NameError:
         return default
@@ -504,6 +573,7 @@ def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
             or default)
 '''
 
+
 def git_get_keywords(versionfile_abs):
     # the code embedded in _version.py can just fetch the value of these
     # keywords. When used from setup.py, we don't want to import _version.py,
@@ -511,7 +581,7 @@ def git_get_keywords(versionfile_abs):
     # _version.py.
     keywords = {}
     try:
-        f = open(versionfile_abs,"r")
+        f = open(versionfile_abs, "r")
         for line in f.readlines():
             if line.strip().startswith("git_refnames ="):
                 mo = re.search(r'=\s*"(.*)"', line)
@@ -526,14 +596,15 @@ def git_get_keywords(versionfile_abs):
         pass
     return keywords
 
+
 def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
     if not keywords:
-        return {} # keyword-finding function failed to find keywords
+        return {}  # keyword-finding function failed to find keywords
     refnames = keywords["refnames"].strip()
     if refnames.startswith("$Format"):
         if verbose:
             print("keywords are unexpanded, not using")
-        return {} # unexpanded, so not in an unpacked git-archive tarball
+        return {}  # unexpanded, so not in an unpacked git-archive tarball
     refs = set([r.strip() for r in refnames.strip("()").split(",")])
     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
@@ -558,13 +629,59 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
             r = ref[len(tag_prefix):]
             if verbose:
                 print("picking %s" % r)
-            return { "version": r,
-                     "full": keywords["full"].strip() }
-    # no suitable tags, so we use the full revision id
+            return {"version": r,
+                    "full": keywords["full"].strip()}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
     if verbose:
-        print("no suitable tags, using full revision id")
-    return { "version": keywords["full"].strip(),
-             "full": keywords["full"].strip() }
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full": keywords["full"].strip()}
+
+
+def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False):
+    # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens.
+
+    # dirty
+    dirty = git_describe.endswith("-dirty")
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+    dirty_suffix = ".dirty" if dirty else ""
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" not in git_describe:  # just HEX
+        return "0+untagged.g"+git_describe+dirty_suffix, dirty
+
+    # just TAG-NUM-gHEX
+    mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+    if not mo:
+        # unparseable. Maybe git-describe is misbehaving?
+        return "0+unparseable"+dirty_suffix, dirty
+
+    # tag
+    full_tag = mo.group(1)
+    if not full_tag.startswith(tag_prefix):
+        if verbose:
+            fmt = "tag '%s' doesn't start with prefix '%s'"
+            print(fmt % (full_tag, tag_prefix))
+        return None, dirty
+    tag = full_tag[len(tag_prefix):]
+
+    # distance: number of commits since tag
+    distance = int(mo.group(2))
+
+    # commit: short hex revision ID
+    commit = mo.group(3)
+
+    # now build up version string, with post-release "local version
+    # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a
+    # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you
+    # can always test version.endswith(".dirty").
+    version = tag
+    if distance or dirty:
+        version += "+%d.g%s" % (distance, commit) + dirty_suffix
+
+    return version, dirty
 
 
 def git_versions_from_vcs(tag_prefix, root, verbose=False):
@@ -576,27 +693,30 @@ def git_versions_from_vcs(tag_prefix, root, verbose=False):
     if not os.path.exists(os.path.join(root, ".git")):
         if verbose:
             print("no .git in %s" % root)
-        return {}
+        return {}  # get_versions() will try next method
 
     GITS = ["git"]
     if sys.platform == "win32":
         GITS = ["git.cmd", "git.exe"]
-    stdout = run_command(GITS, ["describe", "--tags", "--dirty", "--always"],
+    # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
+    # if there are no tags, this yields HEX[-dirty] (no NUM)
+    stdout = run_command(GITS, ["describe", "--tags", "--dirty",
+                                "--always", "--long"],
                          cwd=root)
+    # --long was added in git-1.5.5
     if stdout is None:
-        return {}
-    if not stdout.startswith(tag_prefix):
-        if verbose:
-            print("tag '%s' doesn't start with prefix '%s'" % (stdout, tag_prefix))
-        return {}
-    tag = stdout[len(tag_prefix):]
+        return {}  # try next method
+    version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose)
+
+    # build "full", which is FULLHEX[.dirty]
     stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
     if stdout is None:
         return {}
     full = stdout.strip()
-    if tag.endswith("-dirty"):
-        full += "-dirty"
-    return {"version": tag, "full": full}
+    if dirty:
+        full += ".dirty"
+
+    return {"version": version, "full": full}
 
 
 def do_vcs_install(manifest_in, versionfile_source, ipy):
@@ -631,19 +751,20 @@ def do_vcs_install(manifest_in, versionfile_source, ipy):
         files.append(".gitattributes")
     run_command(GITS, ["add", "--"] + files)
 
+
 def versions_from_parentdir(parentdir_prefix, root, verbose=False):
     # Source tarballs conventionally unpack into a directory that includes
     # both the project name and a version string.
     dirname = os.path.basename(root)
     if not dirname.startswith(parentdir_prefix):
         if verbose:
-            print("guessing rootdir is '%s', but '%s' doesn't start with prefix '%s'" %
-                  (root, dirname, parentdir_prefix))
+            print("guessing rootdir is '%s', but '%s' doesn't start with "
+                  "prefix '%s'" % (root, dirname, parentdir_prefix))
         return None
     return {"version": dirname[len(parentdir_prefix):], "full": ""}
 
 SHORT_VERSION_PY = """
-# This file was generated by 'versioneer.py' (0.12) from
+# This file was generated by 'versioneer.py' (0.14) from
 # revision-control system data, or from the parent directory name of an
 # unpacked source archive. Distribution tarballs contain a pre-generated copy
 # of this file.
@@ -655,7 +776,8 @@ def get_versions(default={}, verbose=False):
 
 """
 
-DEFAULT = {"version": "unknown", "full": "unknown"}
+DEFAULT = {"version": "0+unknown", "full": "unknown"}
+
 
 def versions_from_file(filename):
     versions = {}
@@ -673,6 +795,7 @@ def versions_from_file(filename):
 
     return versions
 
+
 def write_to_version_file(filename, versions):
     with open(filename, "w") as f:
         f.write(SHORT_VERSION_PY % versions)
@@ -686,14 +809,18 @@ def get_root():
     except NameError:
         return os.path.dirname(os.path.abspath(sys.argv[0]))
 
+
 def vcs_function(vcs, suffix):
     return getattr(sys.modules[__name__], '%s_%s' % (vcs, suffix), None)
 
+
 def get_versions(default=DEFAULT, verbose=False):
     # returns dict with two keys: 'version' and 'full'
-    assert versionfile_source is not None, "please set versioneer.versionfile_source"
+    assert versionfile_source is not None, \
+        "please set versioneer.versionfile_source"
     assert tag_prefix is not None, "please set versioneer.tag_prefix"
-    assert parentdir_prefix is not None, "please set versioneer.parentdir_prefix"
+    assert parentdir_prefix is not None, \
+        "please set versioneer.parentdir_prefix"
     assert VCS is not None, "please set versioneer.VCS"
 
     # I am in versioneer.py, which must live at the top of the source tree,
@@ -716,40 +843,50 @@ def get_versions(default=DEFAULT, verbose=False):
         vcs_keywords = get_keywords_f(versionfile_abs)
         ver = versions_from_keywords_f(vcs_keywords, tag_prefix)
         if ver:
-            if verbose: print("got version from expanded keyword %s" % ver)
+            if verbose:
+                print("got version from expanded keyword %s" % ver)
             return ver
 
     ver = versions_from_file(versionfile_abs)
     if ver:
-        if verbose: print("got version from file %s %s" % (versionfile_abs,ver))
+        if verbose:
+            print("got version from file %s %s" % (versionfile_abs, ver))
         return ver
 
     versions_from_vcs_f = vcs_function(VCS, "versions_from_vcs")
     if versions_from_vcs_f:
         ver = versions_from_vcs_f(tag_prefix, root, verbose)
         if ver:
-            if verbose: print("got version from VCS %s" % ver)
+            if verbose:
+                print("got version from VCS %s" % ver)
             return ver
 
     ver = versions_from_parentdir(parentdir_prefix, root, verbose)
     if ver:
-        if verbose: print("got version from parentdir %s" % ver)
+        if verbose:
+            print("got version from parentdir %s" % ver)
         return ver
 
-    if verbose: print("got version from default %s" % default)
+    if verbose:
+        print("got version from default %s" % default)
     return default
 
+
 def get_version(verbose=False):
     return get_versions(verbose=verbose)["version"]
 
+
 class cmd_version(Command):
     description = "report generated version string"
     user_options = []
     boolean_options = []
+
     def initialize_options(self):
         pass
+
     def finalize_options(self):
         pass
+
     def run(self):
         ver = get_version(verbose=True)
         print("Version is currently: %s" % ver)
@@ -762,7 +899,8 @@ class cmd_build(_build):
         # now locate _version.py in the new build/ directory and replace it
         # with an updated value
         if versionfile_build:
-            target_versionfile = os.path.join(self.build_lib, versionfile_build)
+            target_versionfile = os.path.join(self.build_lib,
+                                              versionfile_build)
             print("UPDATING %s" % target_versionfile)
             os.unlink(target_versionfile)
             with open(target_versionfile, "w") as f:
@@ -791,6 +929,7 @@ if 'cx_Freeze' in sys.modules:  # cx_freeze enabled?
                                 "VERSIONFILE_SOURCE": versionfile_source,
                                 })
 
+
 class cmd_sdist(_sdist):
     def run(self):
         versions = get_versions(verbose=True)
@@ -815,14 +954,19 @@ __version__ = get_versions()['version']
 del get_versions
 """
 
+
 class cmd_update_files(Command):
-    description = "install/upgrade Versioneer files: __init__.py SRC/_version.py"
+    description = ("install/upgrade Versioneer files: "
+                   "__init__.py SRC/_version.py")
     user_options = []
     boolean_options = []
+
     def initialize_options(self):
         pass
+
     def finalize_options(self):
         pass
+
     def run(self):
         print(" creating %s" % versionfile_source)
         with open(versionfile_source, "w") as f:
@@ -888,6 +1032,7 @@ class cmd_update_files(Command):
         # substitution.
         do_vcs_install(manifest_in, versionfile_source, ipy)
 
+
 def get_cmdclass():
     cmds = {'version': cmd_version,
             'versioneer': cmd_update_files,

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/khmer.git



More information about the debian-med-commit mailing list