[med-svn] [freebayes] 01/05: Imported Upstream version 1.0.2

Andreas Tille tille at debian.org
Wed Jun 22 12:26:24 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository freebayes.

commit 492c86dd52d2e53f32f7dda452b47e5efc54ac4b
Author: Andreas Tille <tille at debian.org>
Date:   Wed Jun 22 14:24:36 2016 +0200

    Imported Upstream version 1.0.2
---
 .gitmodules                           |   6 +
 .travis.yml                           |  14 +
 Makefile                              |   7 +-
 README.md                             |  16 +-
 paper/main.blg                        |   2 +-
 paper/main.pdf                        | Bin 846894 -> 0 bytes
 scripts/freebayes-parallel            |   4 +-
 src/AlleleParser.cpp                  | 752 +++++++++++++++-------------------
 src/AlleleParser.h                    |  23 +-
 src/DataLikelihood.cpp                | 196 ++++++---
 src/DataLikelihood.h                  |  18 +
 src/Genotype.cpp                      |  18 +-
 src/Genotype.h                        |   4 +-
 src/IndelAllele.cpp                   |   6 +-
 src/IndelAllele.h                     |   5 +-
 src/LeftAlign.cpp                     |  34 +-
 src/Makefile                          |   6 +-
 src/Multinomial.cpp                   |  10 +
 src/Multinomial.h                     |   2 +
 src/NonCall.cpp                       |  86 ++++
 src/NonCall.h                         |  48 +++
 src/Parameters.cpp                    | 105 +++--
 src/Parameters.h                      |   7 +-
 src/ResultData.cpp                    |  68 +++
 src/ResultData.h                      |   8 +-
 src/Sample.cpp                        |   2 +-
 src/Sample.h                          |   2 -
 src/freebayes.cpp                     | 385 ++++++++---------
 src/version_release.txt               |   6 +-
 test/Makefile                         |  15 +
 test/region-and-target-handling.t     | 104 +++++
 test/splice/1:883884-887618.bam       | Bin 0 -> 22589 bytes
 test/splice/1:883884-887618.bam.bai   | Bin 0 -> 288 bytes
 test/splice/1:883884-887618.fa        |  64 +++
 test/splice/1:883884-887618.fa.fai    |   1 +
 test/t/01_call_variants.t             | 110 +++++
 test/tiny/1read.bam                   | Bin 0 -> 636 bytes
 test/tiny/1read.bam.bai               | Bin 0 -> 96 bytes
 test/tiny/NA12878.chr22.tiny.bam      | Bin 0 -> 287213 bytes
 test/tiny/NA12878.chr22.tiny.bam.bai  | Bin 0 -> 96 bytes
 test/tiny/NA12878.chr22.tiny.giab.vcf |  89 ++++
 test/tiny/q with spaces.fa            | 207 ++++++++++
 test/tiny/q with spaces.fa.fai        |   1 +
 test/tiny/q with spaces.regions       |   3 +
 test/tiny/q.fa                        | 207 ++++++++++
 test/tiny/q.fa.fai                    |   1 +
 test/tiny/q.regions                   |   3 +
 test/tiny/q.vcf.gz                    | Bin 0 -> 4305 bytes
 test/tiny/q.vcf.gz.tbi                | Bin 0 -> 102 bytes
 test/tiny/q_spiked.vcf.gz             | Bin 0 -> 2378 bytes
 test/tiny/q_spiked.vcf.gz.tbi         | Bin 0 -> 91 bytes
 51 files changed, 1864 insertions(+), 781 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index 89dcd4f..6341f4d 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -7,3 +7,9 @@
 [submodule "intervaltree"]
 	path = intervaltree
 	url = https://github.com/ekg/intervaltree.git
+[submodule "test/test-simple-bash"]
+	path = test/test-simple-bash
+	url = https://github.com/ingydotnet/test-simple-bash.git
+[submodule "bash-tap"]
+	path = test/bash-tap
+	url = https://github.com/illusori/bash-tap.git
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..8acf75c
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,14 @@
+# Control file for continuous integration testing at http://travis-ci.org/
+
+language: cpp
+compiler: gcc
+before_install:
+  - git submodule update --init --recursive
+  - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y
+  - sudo apt-get update -qq
+  - sudo apt-get install -qq gcc-4.8 g++-4.8
+  - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 60 --slave /usr/bin/g++ g++ /usr/bin/g++-4.8
+  - gcc --version && g++ --version
+  - sudo apt-get install -qq bc samtools parallel
+#install: make get-deps
+script: make && make test
diff --git a/Makefile b/Makefile
index 7e6339e..6848dcd 100644
--- a/Makefile
+++ b/Makefile
@@ -23,8 +23,11 @@ install:
 uninstall:
 	rm /usr/local/bin/freebayes /usr/local/bin/bamleftalign
 
+test:
+	cd test && make test
+
 clean:
 	cd src && $(MAKE) clean
-	rm -f bin/*
+	rm -fr bin/*
 
-.PHONY: all install uninstall clean
+.PHONY: all install uninstall clean test
diff --git a/README.md b/README.md
index 9a16f4d..f233549 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
 # *freebayes*, a haplotype-based variant detector
 ## user manual and guide
+
+[![Build Status](https://travis-ci.org/ekg/freebayes.svg)](https://travis-ci.org/ekg/freebayes)
 [![Gitter](https://badges.gitter.im/Join Chat.svg)](https://gitter.im/ekg/freebayes?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 
 --------
@@ -74,9 +76,9 @@ nested git submodules for external repositories.
 
 After you've already done the above to clone the most recent development 
 version, if you wish to compile a specific version of FreeBayes from, you 
-can then do: 
+can then do something like the following: 
 
-    git checkout v0.9.18 && git submodule update --recursive
+    git checkout v0.9.20 && git submodule update --recursive
 
 ### Resolving proxy issues with git
 
@@ -155,7 +157,8 @@ copies.  When running with highh --ploidy settings, it may be required to set
 
     freebayes -f ref.fa -p 32 --use-best-n-alleles 4 --pooled-discrete aln.bam >var.vcf
 
-Generate frequency-based calls for all variants passing input thresholds:
+Generate frequency-based calls for all variants passing input thresholds. You'd do
+this in the case that you didn't know the number of samples in the pool.
 
     freebayes -f ref.fa -F 0.01 -C 1 --pooled-continuous aln.bam >var.vcf
 
@@ -173,7 +176,7 @@ Naive variant calling: simply annotate observation counts of SNPs and indels:
     freebayes -f ref.fa --haplotype-length 0 --min-alternate-count 1 \
         --min-alternate-fraction 0 --pooled-continuous --report-monomorphic >var.vcf
 
-Parallel operation (use 36 cores in this case cores):
+Parallel operation (use 36 cores in this case):
 
     freebayes-parallel <(fasta_generate_regions.py ref.fa.fai 100000) 36 \
         -f ref.fa aln.bam >var.vcf
@@ -211,7 +214,7 @@ observation count (AO).
 * (possibly, **Iterate** the variant detection process in response to insight 
 gained from your interpretation)
 
-FreeBayes a standard VCF 4.1 outut stream.  This format is designed for the
+FreeBayes emits a standard VCF 4.1 output stream.  This format is designed for the
 probabilistic description of allelic variants within a population of samples,
 but it is equally suited to describing the probability of variation in a single
 sample.
@@ -486,8 +489,7 @@ Bayesian model.  (Our upcoming publication will discuss this in more detail.)
 A minimal pre-processing pipeline similar to that described in "Calling 
 variants" should be sufficient for most uses.  For more information, please 
 refer to a recent post by Brad Chapman [on minimal BAM preprocessing 
-methods](http://bcbio.wordpress.com/2013/10/21/updated-comparison-of-variant-det
-ection-methods-ensemble-freebayes-and-minimal-bam-preparation-pipelines/).
+methods](http://bcbio.wordpress.com/2013/10/21/updated-comparison-of-variant-detection-methods-ensemble-freebayes-and-minimal-bam-preparation-pipelines/).
 
 For a push-button solution to variant detection, from reads to variant calls, 
 look no further than the [gkno genome analysis platform](http://gkno.me/).
diff --git a/paper/main.blg b/paper/main.blg
index 8f3c05a..91a5948 100644
--- a/paper/main.blg
+++ b/paper/main.blg
@@ -1,4 +1,4 @@
-This is BibTeX, Version 0.99d (TeX Live 2013/Debian)
+This is BibTeX, Version 0.99d (TeX Live 2015/dev/Debian)
 Capacity: max_strings=35307, hash_size=35307, hash_prime=30011
 The top-level auxiliary file: main.aux
 The style file: genome_research.bst
diff --git a/paper/main.pdf b/paper/main.pdf
deleted file mode 100644
index 26bfa1e..0000000
Binary files a/paper/main.pdf and /dev/null differ
diff --git a/scripts/freebayes-parallel b/scripts/freebayes-parallel
index a293846..63bdc62 100755
--- a/scripts/freebayes-parallel
+++ b/scripts/freebayes-parallel
@@ -30,11 +30,11 @@ shift
 ncpus=$1
 shift
 
-command="freebayes $@"
+command=("freebayes" "$@")
 
 (
 #$command | head -100 | grep "^#" # generate header
 # iterate over regions using gnu parallel to dispatch jobs
-cat $regionsfile | parallel -k -j $ncpus "$command --region {}" 
+cat "$regionsfile" | parallel -k -j "$ncpus" "${command[@]}" --region {}
 ) | vcffirstheader \
     | vcfstreamsort -w 1000 | vcfuniq # remove duplicates at region edges
diff --git a/src/AlleleParser.cpp b/src/AlleleParser.cpp
index 5a2b79c..444034e 100644
--- a/src/AlleleParser.cpp
+++ b/src/AlleleParser.cpp
@@ -83,28 +83,6 @@ void AlleleParser::openBams(void) {
 
 }
 
-void AlleleParser::openTraceFile(void) {
-    if (parameters.trace) {
-        traceFile.open(parameters.traceFile.c_str(), ios::out);
-        DEBUG("Opening trace file: " << parameters.traceFile << " ...");
-        if (!traceFile) {
-            ERROR(" unable to open trace file: " << parameters.traceFile );
-            exit(1);
-        }
-    }
-}
-
-void AlleleParser::openFailedFile(void) {
-    if (!parameters.failedFile.empty()) {
-        failedFile.open(parameters.failedFile.c_str(), ios::out);
-        DEBUG("Opening failed alleles file: " << parameters.failedFile << " ...");
-        if (!failedFile) {
-            ERROR(" unable to open failed alleles file: " << parameters.failedFile );
-            exit(1);
-        }
-    }
-}
-
 void AlleleParser::openOutputFile(void) {
     if (parameters.outputFile != "") {
         outputFile.open(parameters.outputFile.c_str(), ios::out);
@@ -456,7 +434,9 @@ string AlleleParser::vcfHeader() {
         << "##INFO=<ID=MQM,Number=A,Type=Float,Description=\"Mean mapping quality of observed alternate alleles\">" << endl
         << "##INFO=<ID=MQMR,Number=1,Type=Float,Description=\"Mean mapping quality of observed reference alleles\">" << endl
         << "##INFO=<ID=PAIRED,Number=A,Type=Float,Description=\"Proportion of observed alternate alleles which are supported by properly paired read fragments\">" << endl
-        << "##INFO=<ID=PAIREDR,Number=1,Type=Float,Description=\"Proportion of observed reference alleles which are supported by properly paired read fragments\">" << endl;
+        << "##INFO=<ID=PAIREDR,Number=1,Type=Float,Description=\"Proportion of observed reference alleles which are supported by properly paired read fragments\">" << endl
+        << "##INFO=<ID=MIN,Number=1,Type=Integer,Description=\"Minimum depth in gVCF output block.\">" << endl
+        << "##INFO=<ID=END,Number=1,Type=Integer,Description=\"Last position (inclusive) in gVCF output record.\">" << endl;
 
     // sequencing technology tags, which vary according to input data
     for (vector<string>::iterator st = sequencingTechnologies.begin(); st != sequencingTechnologies.end(); ++st) {
@@ -479,6 +459,7 @@ string AlleleParser::vcfHeader() {
         << "##FORMAT=<ID=QR,Number=1,Type=Integer,Description=\"Sum of quality of the reference observations\">" << endl
         << "##FORMAT=<ID=AO,Number=A,Type=Integer,Description=\"Alternate allele observation count\">" << endl
         << "##FORMAT=<ID=QA,Number=A,Type=Integer,Description=\"Sum of quality of the alternate observations\">" << endl
+        << "##FORMAT=<ID=MIN,Number=1,Type=Integer,Description=\"Minimum depth in gVCF output block.\">" << endl
         //<< "##FORMAT=<ID=SRF,Number=1,Type=Integer,Description=\"Number of reference observations on the forward strand\">" << endl
         //<< "##FORMAT=<ID=SRR,Number=1,Type=Integer,Description=\"Number of reference observations on the reverse strand\">" << endl
         //<< "##FORMAT=<ID=SAF,Number=1,Type=Integer,Description=\"Number of alternate observations on the forward strand\">" << endl
@@ -562,96 +543,64 @@ void AlleleParser::loadFastaReference(void) {
 
 }
 
-// alignment-based method for loading the first bit of our reference sequence
-void AlleleParser::loadReferenceSequence(BamAlignment& alignment) {
-    DEBUG2("loading reference sequence overlapping first alignment");
-    currentPosition = alignment.Position;
-    rightmostHaplotypeBasisAllelePosition = currentPosition;
-    currentSequenceStart = alignment.Position;
-    currentSequenceName = referenceIDToName[alignment.RefID];
-    currentRefID = alignment.RefID;
-    DEBUG2("reference.getSubSequence("<< currentSequenceName << ", " << currentSequenceStart << ", " << alignment.AlignedBases.length() << ")");
-    currentSequence = uppercase(reference.getSubSequence(currentSequenceName, currentSequenceStart, alignment.Length));
-}
-
-// intended to load all the sequence covered by reads which overlap our current target
-// this lets us process the reads fully, checking for suspicious reads, etc.
-// but does not require us to load the whole sequence
-void AlleleParser::loadReferenceSequence(BedTarget* target, int before, int after) {
-    basesBeforeCurrentTarget = before;
-    basesAfterCurrentTarget = after;
-    DEBUG2("loading reference subsequence " << target->seq << " from " << target->left << " - " << before << " to " << target->right + 1 << " + " << after << " + before");
-    string name = reference.sequenceNameStartingWith(target->seq);
-    currentSequence = uppercase(reference.getSubSequence(name, target->left - before, (target->right + 1 - target->left) + after + before));
-    currentReferenceBase = currentReferenceBaseChar();
-}
-
-// used to extend the cached reference subsequence when we encounter a read which extends beyond its right bound
-void AlleleParser::extendReferenceSequence(int rightExtension) {
-    currentSequence += uppercase(reference.getSubSequence(reference.sequenceNameStartingWith(currentSequenceName), 
-                                                 currentTarget->right + basesAfterCurrentTarget,
-                                                 rightExtension));
-    basesAfterCurrentTarget += rightExtension;
+bool AlleleParser::hasMoreInputVariants(void) {
+    pair<int, long> next = nextInputVariantPosition();
+    return next.first != -1;
 }
 
-// maintain a 10bp window around the curent position
-// to guarantee our ability to process sequence
-void AlleleParser::preserveReferenceSequenceWindow(int bp) {
-
-    // establish left difference between ideal window size and current cached sequence
-    int leftdiff = currentSequenceStart - (floor(currentPosition) - bp);
-    // guard against falling off the left end of our sequence
-    //leftdiff = (floor(currentPosition) - leftdiff < 0) ? floor(currentPosition) : leftdiff;
-    leftdiff = (currentSequenceStart - leftdiff < 0) ? currentSequenceStart : leftdiff;
-    // right guard is not needed due to the fact that we are just attempting to
-    // append sequence substr will simply return fewer additional characters if
-    // we go past the right end of the ref
-    int rightdiff = (floor(currentPosition) + bp) - (currentSequenceStart + currentSequence.size());
-
-    if (leftdiff > 0) {
-        //cerr << currentSequenceStart << endl;
-        string left = reference.getSubSequence(currentSequenceName, currentSequenceStart, leftdiff);
-        currentSequence.insert(0, uppercase(left));
-        currentSequenceStart -= left.size();
-
-    }
-    if (rightdiff > 0) {
-        currentSequence += uppercase(reference.getSubSequence(
-                currentSequenceName,
-                (currentSequenceStart + currentSequence.size()),
-                rightdiff));  // always go 10bp past the end of what we need for alignment registration
+bool AlleleParser::loadNextPositionWithAlignmentOrInputVariant(BamAlignment& alignment) {
+    pair<int, long> next = nextInputVariantPosition();
+    if (next.first != -1) {
+        int varRefID = next.first;
+        //cerr << varRefID << " " << alignment.RefID << " " << next.second << " " << alignment.Position << endl;
+        if (!hasMoreAlignments || varRefID < alignment.RefID || varRefID == alignment.RefID && next.second < alignment.Position) {
+            return loadNextPositionWithInputVariant();
+        } else {
+            loadReferenceSequence(alignment);
+        }
+    } else {
+        loadReferenceSequence(alignment);
     }
+    return true;
 }
 
-// ensure we have cached reference sequence according to the current alignment
-void AlleleParser::extendReferenceSequence(BamAlignment& alignment) {
-
-    int leftdiff = currentSequenceStart - alignment.Position;
-    leftdiff = (currentSequenceStart - leftdiff < 0) ? currentSequenceStart : leftdiff;
-    if (leftdiff > 0) {
-        string left = reference.getSubSequence(currentSequenceName,
-                                               currentSequenceStart,
-                                               leftdiff);
-        currentSequenceStart -= left.size();
-        if (currentSequenceStart < 0) currentSequenceStart = 0;
-        currentSequence.insert(0, uppercase(left));
+bool AlleleParser::loadNextPositionWithInputVariant(void) {
+    pair<int, long> next = nextInputVariantPosition();
+    if (next.first != -1) {
+        //cerr << "Next is " << next.first << ":" << next.second << endl;
+        loadReferenceSequence(referenceIDToName[next.first]);
+        currentPosition = next.second;
+        rightmostHaplotypeBasisAllelePosition = currentPosition;
+        return true;
+    } else {
+        return false;
     }
+}
 
-    int rightdiff =
-        (alignment.Position + alignment.AlignedBases.size())
-        - (currentSequenceStart + currentSequence.size());
-    if (rightdiff > 0) {
-        currentSequence += uppercase(reference.getSubSequence(
-                                         currentSequenceName,
-                                         (currentSequenceStart + currentSequence.size()),
-                                         rightdiff));
-    }
+// alignment-based method for loading the first bit of our reference sequence
+void AlleleParser::loadReferenceSequence(BamAlignment& alignment) {
+    loadReferenceSequence(referenceIDToName[alignment.RefID]);
+    currentPosition = alignment.Position;
 }
 
-void AlleleParser::eraseReferenceSequence(int leftErasure) {
-    //cerr << "erasing leftmost " << leftErasure << "bp of cached reference sequence" << endl;
-    currentSequence.erase(0, leftErasure);
-    currentSequenceStart += leftErasure;
+void AlleleParser::loadReferenceSequence(string& seqname) {
+    if (currentSequenceName != seqname) {
+        currentSequenceName = seqname;
+        currentSequenceStart = 0;
+        currentRefID = bamMultiReader.GetReferenceID(currentSequenceName);
+        currentSequence = uppercase(reference.getSequence(currentSequenceName));
+        int i = 0; // check the first few characters and verify they are not garbage
+        for (string::iterator citr = currentSequence.begin();
+             i < 100 && citr != currentSequence.end(); ++citr, ++i) {
+            char c = *citr;
+            if (c != 'A' && c != 'T' && c != 'G' && c != 'C' && c != 'N') {
+                ERROR("Found non-DNA character " << c << " at position " << i << " in " << seqname << endl
+                      << ". Is your reference compressed or corrupted? "
+                      << "freebayes requires an uncompressed reference sequence.");
+                exit(1);
+            }
+        }
+    }
 }
 
 void AlleleParser::loadTargets(void) {
@@ -728,7 +677,7 @@ void AlleleParser::loadTargets(void) {
         // REAL BED format is 0 based, half open (end base not included)
         BedTarget bd(startSeq,
                     (startPos == 0) ? 0 : startPos,
-                    (stopPos == -1) ? reference.sequenceLength(startSeq) : stopPos - 1); // internally, we use 0-base inclusive end
+                    ((stopPos == -1) ? reference.sequenceLength(startSeq) : stopPos) - 1); // internally, we use 0-base inclusive end
         DEBUG("will process reference sequence " << startSeq << ":" << bd.left << ".." << bd.right + 1);
         targets.push_back(bd);
         bedReader.targets.push_back(bd);
@@ -739,9 +688,9 @@ void AlleleParser::loadTargets(void) {
     for (vector<BedTarget>::iterator e = targets.begin(); e != targets.end(); ++e) {
         BedTarget& bd = *e;
         // internally, we use 0-base inclusive end
-        if (bd.left < 0 || bd.right > reference.sequenceLength(bd.seq)) {
+        if (bd.left < 0 || bd.right + 1 > reference.sequenceLength(bd.seq)) {
             ERROR("Target region coordinates (" << bd.seq << " "
-                    << bd.left << " " << bd.right
+                    << bd.left << " " << bd.right + 1
                     << ") outside of reference sequence bounds ("
                     << bd.seq << " " << reference.sequenceLength(bd.seq) << ") terminating.");
             exit(1);
@@ -863,8 +812,6 @@ AlleleParser::AlleleParser(int argc, char** argv) : parameters(Parameters(argc,
     referenceSampleName = "reference_sample";
 
     // initialization
-    openTraceFile();
-    openFailedFile();
     openOutputFile();
 
     loadFastaReference();
@@ -1132,7 +1079,7 @@ void RegisteredAlignment::addAllele(Allele newAllele, bool mergeComplex, int max
                         }
                     }
 
-                    DEBUG("addAllele: mergeAllele/4:"
+                    DEBUG2("addAllele: mergeAllele/4:"
                        << " lastAllele " << lastAllele.typeStr() << "@" << lastAllele.position << ":" << lastAllele.cigar
                        << " newAllele "  << newAllele.typeStr()  << "@" << newAllele.position  << ":" << newAllele.cigar);
 
@@ -1392,10 +1339,6 @@ RegisteredAlignment& AlleleParser::registerAlignment(BamAlignment& alignment, Re
         updateHaplotypeBasisAlleles(sp, alignment.AlignedBases.size());
     }
 
-    if (usingVariantInputAlleles) {
-         updateInputVariants(sp, alignment.AlignedBases.size());
-    }
-
 #ifdef VERBOSE_DEBUG
     if (parameters.debug2) {
         DEBUG2("registering alignment " << rp << " " << csp << " " << sp << endl <<
@@ -1968,7 +1911,7 @@ void AlleleParser::updateAlignmentQueue(long int position,
            << "; currentSequence end = " << currentSequence.size() + currentSequenceStart);
 
     // make sure we have sequence for the *first* alignment
-    extendReferenceSequence(currentAlignment);
+    //extendReferenceSequence(currentAlignment);
 
     // push to the front until we get to an alignment that doesn't overlap our
     // current position or we reach the end of available alignments
@@ -1985,7 +1928,7 @@ void AlleleParser::updateAlignmentQueue(long int position,
         && currentAlignment.RefID == currentRefID) {
         do {
             DEBUG2("top of alignment parsing loop");
-            DEBUG2("currentAlignment.Name == " << currentAlignment.Name);
+            DEBUG("alignment: " << currentAlignment.Name);
             // get read group, and map back to a sample name
             string readGroup;
             if (!currentAlignment.GetTag("RG", readGroup)) {
@@ -2049,7 +1992,7 @@ void AlleleParser::updateAlignmentQueue(long int position,
             // initially skip reads with low mapping quality (what happens if MapQuality is not in the file)
             if (currentAlignment.MapQuality >= parameters.MQL0) {
                 // extend our cached reference sequence to allow processing of this alignment
-                extendReferenceSequence(currentAlignment);
+                //extendReferenceSequence(currentAlignment);
                 // left realign indels
                 if (parameters.leftAlignIndels) {
                     int length = currentAlignment.GetEndPosition() - currentAlignment.Position + 1;
@@ -2129,12 +2072,137 @@ void AlleleParser::updateRegisteredAlleles(void) {
 
     alleles.erase(remove(alleles.begin(), alleles.end(), (Allele*)NULL), alleles.end());
 
-    if (lowestPosition <= currentPosition) {
-        int diff = lowestPosition - currentSequenceStart;
-        // do we have excess bases beyond the current lowest position - cached_reference_window?
-        // if so, erase them
-        if (diff > CACHED_REFERENCE_WINDOW) {
-            eraseReferenceSequence(diff - CACHED_REFERENCE_WINDOW);
+}
+
+pair<int, long int> AlleleParser::nextInputVariantPosition(void) {
+    // are we past the last one in the sequence?
+    if (usingVariantInputAlleles &&
+        ((inputVariantAlleles.find(currentRefID) != inputVariantAlleles.end()
+          && inputVariantAlleles[currentRefID].upper_bound(currentPosition) != inputVariantAlleles[currentRefID].end())
+         || inputVariantAlleles.upper_bound(currentRefID) != inputVariantAlleles.end())) {
+        map<long, vector<Allele> >& inChrom = inputVariantAlleles[currentRefID];
+        map<long, vector<Allele> >::iterator ic = inChrom.upper_bound(currentPosition);
+        if (ic != inChrom.end()) {
+            return make_pair(currentRefID, ic->first);
+        } else {
+            // find next chrom with input alleles
+            map<int, map<long, vector<Allele> > >::iterator nc = inputVariantAlleles.upper_bound(currentRefID);
+            if (nc != inputVariantAlleles.end()) {
+                return make_pair(nc->first, nc->second.begin()->first);
+            } else {
+                return make_pair(-1, 0);
+            }
+        }
+    }
+    return make_pair(-1, 0);
+}
+
+void AlleleParser::getAllInputVariants(void) {
+    string nullstr;
+    getInputVariantsInRegion(nullstr);
+}
+
+void AlleleParser::getInputVariantsInRegion(string& seq, long start, long end) {
+
+    if (!usingVariantInputAlleles) return;
+
+    // get the variants in the target region
+    vcf::Variant var(variantCallInputFile);
+    if (!seq.empty()) {
+        variantCallInputFile.setRegion(seq, start, end);
+    }
+    bool ok;
+    while (ok = variantCallInputFile.getNextVariant(*currentVariant)) {
+
+        long int pos = currentVariant->position - 1;
+        // get alternate alleles
+        bool includePreviousBaseForIndels = true;
+        map<string, vector<vcf::VariantAllele> > variantAlleles = currentVariant->parsedAlternates();
+        // TODO this would be a nice option: why does it not work?
+        //map<string, vector<vcf::VariantAllele> > variantAlleles = currentVariant->flatAlternates();
+        vector< vector<vcf::VariantAllele> > orderedVariantAlleles;
+        for (vector<string>::iterator a = currentVariant->alt.begin(); a != currentVariant->alt.end(); ++a) {
+            orderedVariantAlleles.push_back(variantAlleles[*a]);
+        }
+
+        vector<Allele> genotypeAlleles;
+        set<long int> alternatePositions;
+
+        for (vector< vector<vcf::VariantAllele> >::iterator g = orderedVariantAlleles.begin(); g != orderedVariantAlleles.end(); ++g) {
+
+            vector<vcf::VariantAllele>& altAllele = *g;
+
+            vector<Allele> alleles;
+
+            for (vector<vcf::VariantAllele>::iterator v = altAllele.begin(); v != altAllele.end(); ++v) {
+                vcf::VariantAllele& variant = *v;
+                long int allelePos = variant.position - 1;
+                AlleleType type;
+                string alleleSequence = variant.alt;
+
+                int len = 0;
+                int reflen = 0;
+                string cigar;
+
+                // XXX
+                // FAIL
+                // you need to add in the reference bases between the non-reference ones!
+                // to allow for complex events!
+
+                if (variant.ref == variant.alt) {
+                    // XXX note that for reference alleles, we only use the first base internally
+                    // but this is technically incorrect, so this hack should be noted
+                    len = variant.ref.size();
+                    reflen = len;
+                    //alleleSequence = alleleSequence.at(0); // take only the first base
+                    type = ALLELE_REFERENCE;
+                    cigar = convert(len) + "M";
+                } else if (variant.ref.size() == variant.alt.size()) {
+                    len = variant.ref.size();
+                    reflen = len;
+                    if (variant.ref.size() == 1) {
+                        type = ALLELE_SNP;
+                    } else {
+                        type = ALLELE_MNP;
+                    }
+                    cigar = convert(len) + "X";
+                } else if (variant.ref.size() > variant.alt.size()) {
+                    type = ALLELE_DELETION;
+                    len = variant.ref.size() - variant.alt.size();
+                    allelePos -= 1;
+                    reflen = len + 2;
+                    alleleSequence =
+                        uppercase(reference.getSubSequence(currentVariant->sequenceName, allelePos, 1))
+                        + alleleSequence
+                        + uppercase(reference.getSubSequence(currentVariant->sequenceName, allelePos+1+len, 1));
+                    cigar = "1M" + convert(len) + "D" + "1M";
+                } else {
+                    // we always include the flanking bases for these elsewhere, so here too in order to be consistent and trigger use
+                    type = ALLELE_INSERTION;
+                    // add previous base and post base to match format typically used for calling
+                    allelePos -= 1;
+                    alleleSequence =
+                        uppercase(reference.getSubSequence(currentVariant->sequenceName, allelePos, 1))
+                        + alleleSequence
+                        + uppercase(reference.getSubSequence(currentVariant->sequenceName, allelePos+1, 1));
+                    len = variant.alt.size() - var.ref.size();
+                    cigar = "1M" + convert(len) + "I" + "1M";
+                    reflen = 2;
+                }
+                // TODO deal woth complex subs
+
+                Allele allele = genotypeAllele(type, alleleSequence, (unsigned int) len, cigar, (unsigned int) reflen, allelePos);
+                DEBUG("input allele: " << allele.referenceName << " " << allele);
+                //cerr << "input allele: " << allele.referenceName << " " << allele << endl;
+
+                //alleles.push_back(allele);
+                genotypeAlleles.push_back(allele);
+
+                if (allele.type != ALLELE_REFERENCE) {
+                    inputVariantAlleles[bamMultiReader.GetReferenceID(currentVariant->sequenceName)][allele.position].push_back(allele);
+                    alternatePositions.insert(allele.position);
+                }
+            }
         }
     }
 }
@@ -2150,17 +2218,27 @@ void AlleleParser::updateInputVariants(long int pos, int referenceLength) {
             start = rightmostHaplotypeBasisAllelePosition;
         }
 
-        //stringstream r;
-        //r << currentSequenceName << ":" << start
-        //  << "-" << pos + referenceLength + CACHED_BASIS_HAPLOTYPE_WINDOW;
-        //cerr << "getting variants in " << r.str() << endl;
+        /*
+        stringstream r;
+        r << currentSequenceName << ":" << start
+          << "-" << pos + referenceLength + CACHED_BASIS_HAPLOTYPE_WINDOW;
+        cerr << "getting variants in " << r.str() << endl;
+        */
 
         // tabix expects 1-based, fully closed regions for ti_parse_region()
         // (which is what setRegion() calls eventually)
-        if (variantCallInputFile.setRegion(currentSequenceName,
-                                           start + 1,
-                                           pos + referenceLength + CACHED_BASIS_HAPLOTYPE_WINDOW + 1)) {
-            //cerr << "the vcf line " << haplotypeVariantInputFile.line << endl;
+        bool gotRegion = false;
+        if (referenceLength > 0) {
+            gotRegion = variantCallInputFile.setRegion(currentSequenceName,
+                                                       start + 1,
+                                                       pos + referenceLength + CACHED_BASIS_HAPLOTYPE_WINDOW + 1);
+        } else {
+            // whole chromosome
+            gotRegion = variantCallInputFile.setRegion(currentSequenceName);
+        }
+
+        if (gotRegion) {
+
             // get the variants in the target region
             vcf::Variant var(variantCallInputFile);
             bool ok;
@@ -2245,13 +2323,13 @@ void AlleleParser::updateInputVariants(long int pos, int referenceLength) {
                         // TODO deal woth complex subs
 
                         Allele allele = genotypeAllele(type, alleleSequence, (unsigned int) len, cigar, (unsigned int) reflen, allelePos);
-                        DEBUG("input allele: " << allele);
+                        DEBUG("input allele: " << allele.referenceName << " " << allele);
 
                         //alleles.push_back(allele);
                         genotypeAlleles.push_back(allele);
 
                         if (allele.type != ALLELE_REFERENCE) {
-                            inputVariantAlleles[allele.position].push_back(allele);
+                            inputVariantAlleles[bamMultiReader.GetReferenceID(allele.referenceName)][allele.position].push_back(allele);
                             alternatePositions.insert(allele.position);
                         }
 
@@ -2261,127 +2339,6 @@ void AlleleParser::updateInputVariants(long int pos, int referenceLength) {
 
                 // store the allele counts, if they are provided
                 //
-                continue;
-
-                // xxx wverything here is skipped
-                if (currentVariant->info.find("AC") != currentVariant->info.end()
-                    && currentVariant->info.find("AN") != currentVariant->info.end()) {
-                    vector<string>& afstrs = currentVariant->info["AC"];
-                    // XXX is the reference allele included?
-                    vector<Allele>& altAlleles = inputVariantAlleles[currentVariant->position - 1];
-                    vector<Allele>::iterator al = altAlleles.begin();
-                    assert(altAlleles.size() == afstrs.size());
-                    // XXX TODO include the reference allele--- how???
-                    // do we add another tag to the VCF, use NS, or some other data?
-                    map<Allele, int>& afs = inputAlleleCounts[currentVariant->position - 1];
-                    int altcountsum = 0;
-                    for (vector<string>::iterator afstr = afstrs.begin(); afstr != afstrs.end(); ++afstr, ++al) {
-                        int c; convert(*afstr, c);
-                        altcountsum += c;
-                        afs[*al] = c;
-                    }
-                    int numalleles; convert(currentVariant->info["AN"].front(), numalleles);
-                    int refcount = numalleles - altcountsum;
-                    assert(refcount >= 0);
-                    if (refcount > 0) {
-                        // make ref allele
-                        string ref = currentVariant->ref.substr(0, 1);
-                        Allele refAllele = genotypeAllele(ALLELE_REFERENCE, ref, ref.size(), "1M", ref.size(), currentVariant->position - 1);
-                        // and also cache the observed count of the reference allele
-                        afs[refAllele] = refcount;
-                    }
-                }
-
-                if (currentVariant->samples.empty()) {
-                    continue;
-                }
-
-                if (alternatePositions.size() == 1) {
-
-                    // there can be only one alternate allele position, per immediately previous check
-                    long int alternatePosition = *alternatePositions.begin();
-
-                    map<int, vector<Genotype> > genotypesByPloidy;
-
-                    // XXX hard-coded to a specific set of ploidys
-                    for (int p = 1; p <= 2; ++p) {
-                        vector<Genotype> genotypes = allPossibleGenotypes(p, genotypeAlleles);
-                        genotypesByPloidy[p] = genotypes;
-                    }
-
-                    map<int, map<Genotype*, int> > vcfGenotypeOrder;
-                    for (map<int, vector<Genotype> >::iterator gtg = genotypesByPloidy.begin(); gtg != genotypesByPloidy.end(); ++gtg) {
-                        int groupPloidy = gtg->first;
-                        vector<Genotype>& genotypes = gtg->second;
-                        for (vector<Genotype>::iterator g = genotypes.begin(); g != genotypes.end(); ++g) {
-                            Genotype* genotypePtr = &*g;
-                            Genotype& genotype = *g;
-                            vector<int> gtspec;
-                            genotype.relativeGenotype(gtspec, genotypeAlleles);
-                            // XXX TODO ... EVIL HACKS
-                            if (groupPloidy == 2) {
-                                int j = gtspec.front();
-                                int k = gtspec.back();
-                                vcfGenotypeOrder[groupPloidy][genotypePtr] = (k * (k + 1) / 2) + j;
-                            } else if (groupPloidy == 1) {
-                                vcfGenotypeOrder[groupPloidy][genotypePtr] = gtspec.front();
-                            } else {
-                                // XXX TODO ...
-                            }
-                        }
-                    }
-
-                    // get the GLs for each sample, and store for use in later computation
-                    for (map<string, map<string, vector<string> > >::iterator s = currentVariant->samples.begin();
-                            s != currentVariant->samples.end(); ++s) {
-
-                        string sampleName = s->first;
-                        map<string, vector<string> >& sample = s->second;
-                        string& gt = sample["GT"].front();
-                        map<int, int> genotype = vcf::decomposeGenotype(gt);
-
-                        // default case, give a fixed count for each observed allele
-                        int ploidy = 0;
-                        for (map<int, int>::iterator g = genotype.begin(); g != genotype.end(); ++g) {
-                            ploidy += g->second;
-                        }
-
-                        if (ploidy > 2) {
-                            cerr << "warning, cannot handle ploidy > 2 for in the input VCF due to limitations "
-                                 << "of the VCF specification's definition of Genotype ordering" << endl;
-                            continue;
-                        }
-
-                        // in the case that we have genotype likelihoods in the VCF
-                        if (sample.find("GL") != sample.end()) {
-                            vector<string>& gls = sample["GL"];
-                            vector<long double> genotypeLikelihoods;
-                            genotypeLikelihoods.resize(gls.size());
-                            transform(gls.begin(), gls.end(), genotypeLikelihoods.begin(), log10string2ln);
-
-                            // now map the gls into genotype space
-                            map<Genotype*, int>& genotypeOrder = vcfGenotypeOrder[ploidy];
-                            for (map<Genotype*, int>::iterator gto = genotypeOrder.begin(); gto != genotypeOrder.end(); ++gto) {
-                                Genotype& genotype = *gto->first;
-                                int order = gto->second;
-                                map<string, long double>& sampleGenotypeLikelihoods = inputGenotypeLikelihoods[alternatePosition][sampleName];
-                                //cerr << sampleName << ":" << convert(genotype) << ":" << genotypeLikelihoods[order] << endl;
-                                sampleGenotypeLikelihoods[convert(genotype)] = genotypeLikelihoods[order];
-                            }
-                        }
-                    }
-
-                } else {
-                    /*
-                    cerr << "warning, ambiguous VCF record (multiple mixed variant classes), unable to use for genotype likelihood input:" << endl
-                        << *currentVariant << endl;
-                    for (set<long int>::iterator i = alternatePositions.begin(); i != alternatePositions.end(); ++i) {
-                        cerr << "has position: " << *i << endl;
-                    }
-                         //<< currentVariant->sequenceName << ":" << currentVariant->position << endl;
-                    */
-                }
-
             }
 
             if (!ok) hasMoreVariants = false;
@@ -2396,11 +2353,12 @@ void AlleleParser::updateInputVariants(long int pos, int referenceLength) {
         }
         */
         //rightmostHaplotypeBasisAllelePosition = pos + referenceLength + CACHED_BASIS_HAPLOTYPE_WINDOW;
-        rightmostInputAllelePosition = pos + referenceLength + CACHED_BASIS_HAPLOTYPE_WINDOW;
+        //rightmostInputAllelePosition = pos + referenceLength + CACHED_BASIS_HAPLOTYPE_WINDOW;
     }
 
 }
 
+/*
 void AlleleParser::addCurrentGenotypeLikelihoods(map<int, vector<Genotype> >& genotypesByPloidy,
     vector<vector<SampleDataLikelihood> >& sampleDataLikelihoods) {
 
@@ -2462,55 +2420,6 @@ void AlleleParser::getInputAlleleCounts(vector<Allele>& genotypeAlleles, map<str
         }
     }
 }
-
-/*
-void AlleleParser::__removeNonOverlappingAlleles(vector<Allele*>& alleles, int haplotypeLength, bool getAllAllelesInHaplotype) {
-    for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
-        Allele* allele = *a;
-        if (haplotypeLength > 1) {
-            if (allele->type == ALLELE_REFERENCE) {
-                // does the reference allele overlap the haplotype
-                if (getAllAllelesInHaplotype
-                        && !(currentPosition <= allele->position && allele->position < currentPosition + haplotypeLength)) {
-                    *a = NULL;
-                } else if (!(allele->position <= currentPosition
-                        && allele->position + allele->referenceLength >= currentPosition + haplotypeLength)) {
-                    *a = NULL;
-                } else if (currentPosition < allele->position) { // not there yet
-                    allele->processed = false;
-                    *a = NULL;
-                }
-            } else { // snps, insertions, deletions
-                if (getAllAllelesInHaplotype
-                        && !(currentPosition <= allele->position && allele->position < currentPosition + haplotypeLength)) {
-                    *a = NULL;
-                } else if (!(currentPosition == allele->position && allele->referenceLength == haplotypeLength)) {
-                    *a = NULL;
-                } else if (currentPosition + haplotypeLength <= allele->position) {
-                    allele->processed = false;
-                    *a = NULL;
-                }
-            }
-        } else {
-            if (allele->type == ALLELE_REFERENCE) {
-                if (!(allele->position + allele->referenceLength > currentPosition)) {
-                    *a = NULL;
-                } else if (currentPosition < allele->position) {
-                    allele->processed = false;
-                    *a = NULL;
-                }
-            } else { // snps, insertions, deletions
-                if (currentPosition >= allele->position) {
-                    *a = NULL;
-                } else if (currentPosition < allele->position) {
-                    allele->processed = false;
-                    *a = NULL;
-                }
-            }
-        }
-    }
-    alleles.erase(remove(alleles.begin(), alleles.end(), (Allele*)NULL), alleles.end());
-}
 */
 
 void AlleleParser::removeAllelesWithoutReadSpan(vector<Allele*>& alleles, int probeLength, int haplotypeLength) {
@@ -2598,16 +2507,17 @@ void AlleleParser::removePreviousAlleles(vector<Allele*>& alleles) {
 // false otherwise
 bool AlleleParser::toNextTarget(void) {
 
-    DEBUG("seeking to next target with alignments...");
+    DEBUG("to next target");
+
+    clearRegisteredAlignments();
 
     // reset haplotype length; there is no last call in this sequence; it isn't relevant
     lastHaplotypeLength = 0;
 
-    // XXX
-    // XXX
-    // TODO cleanup the logic in this section
-    // XXX
-    // XXX
+    if (targets.empty() && usingVariantInputAlleles) {
+        // we are processing everything, so load the entire input variant allele set
+        getAllInputVariants();
+    }
 
     // load first target if we have targets and have not loaded the first
     if (!parameters.useStdin && !targets.empty()) {
@@ -2629,27 +2539,10 @@ bool AlleleParser::toNextTarget(void) {
             }
         }
 
-        if (ok) {
-
-            // XXX hack
-            clearRegisteredAlignments();
-            currentSequenceStart = currentAlignment.Position;
-            currentSequenceName = referenceIDToName[currentAlignment.RefID];
-            currentRefID = currentAlignment.RefID;
-            currentPosition = (currentPosition < currentAlignment.Position) ? currentAlignment.Position : currentPosition;
-            currentSequence = uppercase(reference.getSubSequence(currentSequenceName, currentSequenceStart, currentAlignment.Length));
-            rightmostHaplotypeBasisAllelePosition = currentPosition;
-
-        } else {
-            if (hasMoreVariants) {
-                DEBUG("continuing because we have more variants");
-                return true;
-            } else {
-                return false; // last target, no variants, and couldn't get alignment
-            }
+        if (!ok) {
+            return loadNextPositionWithInputVariant();
         }
 
-
     // stdin, no targets cases
     } else if (!currentTarget && (parameters.useStdin || targets.empty())) {
         // if we have a target for limiting the analysis, use it
@@ -2662,23 +2555,26 @@ bool AlleleParser::toNextTarget(void) {
             ERROR("Could not get first alignment from target");
             return false;
         }
-        clearRegisteredAlignments();
-        loadReferenceSequence(currentAlignment); // this seeds us with new reference sequence
-
+        loadNextPositionWithAlignmentOrInputVariant(currentAlignment);
+        //loadReferenceSequence(currentAlignment); // this seeds us with new reference sequence
+        // however, if we have a target list of variants and we should also respect them
     // we've reached the end of file, or stdin
     } else if (parameters.useStdin || targets.empty()) {
         return false;
     }
 
-    DEBUG("getting first variant");
-    getFirstVariant();
-    DEBUG("got first variant");
+    if (currentTarget && usingVariantInputAlleles) {
+        getInputVariantsInRegion(currentTarget->seq, currentTarget->left, currentTarget->right);
+    }
+
+    loadReferenceSequence(currentSequenceName);
 
     justSwitchedTargets = true;
     return true;
 
 }
 
+
 // TODO refactor this to allow reading from stdin or reading the whole file
 // without loading each sequence as a target
 bool AlleleParser::loadTarget(BedTarget* target) {
@@ -2686,21 +2582,17 @@ bool AlleleParser::loadTarget(BedTarget* target) {
     currentTarget = target;
 
     DEBUG("processing target " << currentTarget->desc << " " <<
-            currentTarget->seq << " " << currentTarget->left << " " <<
-            currentTarget->right + 1);
+          currentTarget->seq << " " << currentTarget->left << " " <<
+          currentTarget->right + 1);
     DEBUG2("loading target reference subsequence");
 
-    currentSequenceName = currentTarget->seq;
-
-    int refSeqID = bamMultiReader.GetReferenceID(currentSequenceName);
-
-    DEBUG2("reference sequence id " << refSeqID);
+    loadReferenceSequence(currentTarget->seq);
 
     DEBUG2("setting new position " << currentTarget->left);
     currentPosition = currentTarget->left;
     rightmostHaplotypeBasisAllelePosition = currentTarget->left;
 
-    if (!bamMultiReader.SetRegion(refSeqID, currentTarget->left, refSeqID, currentTarget->right + 1)) { // bamtools expects 0-based, half-open
+    if (!bamMultiReader.SetRegion(currentRefID, currentTarget->left, currentRefID, currentTarget->right + 1)) { // bamtools expects 0-based, half-open
         ERROR("Could not SetRegion to " << currentTarget->seq << ":" << currentTarget->left << ".." << currentTarget->right + 1);
         cerr << bamMultiReader.GetErrorString() << endl;
         return false;
@@ -2750,9 +2642,9 @@ bool AlleleParser::getFirstAlignment(void) {
         DEBUG2("got first alignment in target region");
     } else {
         if (currentTarget) {
-            WARNING("Could not find any mapped reads in target region " << currentSequenceName << ":" << currentTarget->left << ".." << currentTarget->right + 1);
+            DEBUG("Could not find any mapped reads in target region " << currentSequenceName << ":" << currentTarget->left << ".." << currentTarget->right + 1);
         } else {
-            WARNING("Could not find any mapped reads in target region " << currentSequenceName);
+            DEBUG("Could not find any mapped reads in target region " << currentSequenceName);
         }
         return false;
     }
@@ -2811,75 +2703,82 @@ void AlleleParser::clearRegisteredAlignments(void) {
 // if none exist, return false
 //
 bool AlleleParser::toNextPosition(void) {
-
-    // either bail out
+    // is this our first position? (indicated by empty currentSequenceName)
+    // if so, load it up
+    bool first_pos = false;
     if (currentSequenceName.empty()) {
         DEBUG("loading first target");
         if (!toNextTarget()) {
             return false;
         }
-    } 
-    // or step to the next position
-    else {
-        ++currentPosition;
-    }
-
-    // if we've run off the right edge of a target
-    if (!targets.empty() && currentPosition > currentTarget->right) { // time to move to a new target
-        DEBUG("next position " << (long int) currentPosition <<  " outside of current target right bound " << currentTarget->right + 1);
-        // try to get to the next one, and if this fails, bail out
-        if (!toNextTarget()) {
-            DEBUG("no more targets, finishing");
-            return false;
-        }
+        first_pos = true;
     }
 
-    // in the stdin, or no targets case
     // here we assume we are processing an entire BAM or one contiguous region
-    if ((parameters.useStdin && targets.empty()) || targets.empty()) {
-        // implicit step of target sequence
-        // XXX this must wait for us to clean out all of our alignments at the end of the target
-
+    if (parameters.useStdin || targets.empty()) {
         // here we loop over unaligned reads at the beginning of a target
         // we need to get to a mapped read to figure out where we are
         while (hasMoreAlignments && !currentAlignment.IsMapped()) {
             hasMoreAlignments = bamMultiReader.GetNextAlignment(currentAlignment);
         }
-        // now, if the current position of this alignment is outside of the reference sequence length, switch references
-        if (hasMoreAlignments) {
+        // determine if we have more alignments or not
+        if (!hasMoreAlignments) {
+            if (hasMoreInputVariants()) {
+                // continue as we have more variants
+                DEBUG("continuing because we have more input variants");
+                loadNextPositionWithInputVariant();
+            } else if (registeredAlignments.empty()) {
+                DEBUG("no more alignments in input");
+                return false;
+            } else if (currentPosition >= currentSequence.size() + currentSequenceStart) {
+                DEBUG("no more alignments in input");
+                DEBUG("at end of sequence");
+                return false;
+            } else {
+                ++currentPosition;
+            }
+        } else {
+            // step the position
+            ++currentPosition;
+            // if the current position of this alignment is outside of the reference sequence length
+            // we need to switch references
             if (currentPosition >= reference.sequenceLength(currentSequenceName)
                 || registeredAlignments.empty() && currentRefID != currentAlignment.RefID) {
                 DEBUG("at end of sequence");
                 clearRegisteredAlignments();
-                loadReferenceSequence(currentAlignment);
+                loadNextPositionWithAlignmentOrInputVariant(currentAlignment);
                 justSwitchedTargets = true;
             }
-        // here, if we have run out of alignments
-        } else if (!hasMoreAlignments) {
-            if (registeredAlignments.empty()) {
-                DEBUG("no more alignments in input");
-                return false;
-            } else if (currentPosition >= currentSequence.size() + currentSequenceStart) {
-                DEBUG("at end of sequence");
+        }
+    } else {
+        // or if it's not we should step to the next position
+        ++currentPosition;
+        // if we've run off the right edge of a target, jump
+        if (currentPosition > currentTarget->right) {
+            // time to move to a new target
+            DEBUG("next position " << (long int) currentPosition
+                  <<  " outside of current target right bound " << currentTarget->right + 1);
+            // try to get to the next one, and if this fails, bail out
+            if (!toNextTarget()) {
+                DEBUG("no more targets, finishing");
                 return false;
             }
+            justSwitchedTargets = true;
         }
     }
 
     // so we have to make sure it's still there (this matters in low-coverage)
-    DEBUG2("updating reference sequence cache");
-    preserveReferenceSequenceWindow(CACHED_REFERENCE_WINDOW);
     currentReferenceBase = currentReferenceBaseChar();
 
     // handle the case in which we don't have targets but in which we've switched reference sequence
 
-    DEBUG2("processing position " << (long unsigned int) currentPosition + 1 << " in sequence " << currentSequenceName);
+    DEBUG("processing position " << (long unsigned int) currentPosition + 1 << " in sequence " << currentSequenceName);
     vector<Allele*> newAlleles;
     updateAlignmentQueue(currentPosition, newAlleles);
     addToRegisteredAlleles(newAlleles);
     DEBUG2("updating variants");
     // done typically at each new read, but this handles the case where there is no data for a while
-    updateInputVariants(currentPosition, 1);
+    //updateInputVariants(currentPosition, 1);
 
     DEBUG2("updating registered alleles");
     updateRegisteredAlleles(); // this removes unused left-flanking sequence
@@ -2902,34 +2801,26 @@ bool AlleleParser::toNextPosition(void) {
     registeredAlleles.erase(unique(registeredAlleles.begin(), registeredAlleles.end()), registeredAlleles.end());
 
     // and do the same for the variants from the input VCF
+    /*
     DEBUG2("erasing old input variant alleles");
-    map<long int, vector<Allele> >::iterator v = inputVariantAlleles.find(currentPosition - 3);
-    if (v != inputVariantAlleles.end()) {
-        inputVariantAlleles.erase(v);
+    if (inputVariantAlleles.find(currentSequenceName) != inputVariantAlleles.end()) {
+        map<long int, vector<Allele> >::iterator v = inputVariantAlleles[currentSequenceName].begin();
+        while (v != inputVariantAlleles[currentSequenceName].end() && v->first < currentPosition) {
+            inputVariantAlleles[currentSequenceName].erase(v++);
+        }
     }
+    */
 
     DEBUG2("erasing old input haplotype basis alleles");
-    map<long int, vector<AllelicPrimitive> >::iterator z = haplotypeBasisAlleles.find(currentPosition - 3);
-    if (z != haplotypeBasisAlleles.end()) {
-        haplotypeBasisAlleles.erase(z);
-    }
-
-    DEBUG2("erasing old genotype likelihoods");
-    map<long int, map<string, map<string, long double> > >::iterator l = inputGenotypeLikelihoods.find(currentPosition - 3);
-    if (l != inputGenotypeLikelihoods.end()) {
-        inputGenotypeLikelihoods.erase(l);
-    }
-
-    DEBUG2("erasing old allele frequencies");
-    map<long int, map<Allele, int> >::iterator af = inputAlleleCounts.find(currentPosition - 3);
-    if (af != inputAlleleCounts.end()) {
-        inputAlleleCounts.erase(af);
+    map<long int, vector<AllelicPrimitive> >::iterator z = haplotypeBasisAlleles.begin();
+    while (z != haplotypeBasisAlleles.end() && z->first < currentPosition) {
+        haplotypeBasisAlleles.erase(z++);
     }
 
     DEBUG2("erasing old cached repeat counts");
-    map<long int, map<string, int> >::iterator rc = cachedRepeatCounts.find(currentPosition - 3);
-    if (rc != cachedRepeatCounts.end()) {
-        cachedRepeatCounts.erase(rc);
+    map<long int, map<string, int> >::iterator rc = cachedRepeatCounts.begin();
+    while (rc != cachedRepeatCounts.end() && rc->first < currentPosition) {
+        cachedRepeatCounts.erase(rc++);
     }
 
     return true;
@@ -3258,6 +3149,7 @@ void AlleleParser::buildHaplotypeAlleles(
         vector<Allele*> haplotypeObservations;
         getCompleteObservationsOfHaplotype(samples, haplotypeLength, haplotypeObservations);
         addToRegisteredAlleles(haplotypeObservations);
+        DEBUG("added to registered alleles");
 
         // add partial observations
         // first get all the alleles up to the end of the haplotype window
@@ -3265,6 +3157,7 @@ void AlleleParser::buildHaplotypeAlleles(
         if (parameters.usePartialObservations && haplotypeLength > 1) {
             getPartialObservationsOfHaplotype(samples, haplotypeLength, partialHaplotypeObservations);
         }
+        DEBUG("got partial observations of haplotype");
         //addToRegisteredAlleles(partialHaplotypeObservations);
         // now align the sequences of these alleles to the haplotype alleles
         // and put them into the partials bin in each sample
@@ -3285,6 +3178,7 @@ void AlleleParser::buildHaplotypeAlleles(
             (*p)->setQuality();
             (*p)->update(haplotypeLength);
         }
+        DEBUG("done updating");
 
         // debugging
         /*
@@ -3382,6 +3276,13 @@ void AlleleParser::buildHaplotypeAlleles(
             alleles = alleleUnion(alleles, refAlleleVector);
         }
 
+        // this is where we have established our genotype alleles
+        /*
+        for (vector<Allele>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
+            cerr << "genotype allele " << &*a << " " << *a << endl;
+        }
+        */
+
         // pick up observations that are potentially partial (not unambiguous)
         // the way to do this is to test the full observations as if they are partial, and if they
         // end up partially supporting multiple observations, removing them from the "complete" observations
@@ -3520,6 +3421,7 @@ bool AlleleParser::getCompleteObservationsOfHaplotype(Samples& samples, int hapl
             }
         }
     }
+    DEBUG("got complete observations of haplotype");
 }
 
 void AlleleParser::unsetAllProcessedFlags(void) {
@@ -3542,7 +3444,9 @@ bool AlleleParser::getPartialObservationsOfHaplotype(Samples& samples, int haplo
     vector<Allele*> newAlleles;
 
     bool gettingPartials = true;
+    DEBUG("in AlleleParser::getPartialObservationsOfHaplotype, updating alignment queue");
     updateAlignmentQueue(currentPosition + haplotypeLength, newAlleles, gettingPartials);
+    DEBUG("in AlleleParser::getPartialObservationsOfHaplotype, done updating alignment queue");
 
     vector<Allele*> otherObs;
     vector<Allele*> partialObs;
@@ -3550,6 +3454,7 @@ bool AlleleParser::getPartialObservationsOfHaplotype(Samples& samples, int haplo
     // get the max alignment end position, iterate to there
     long int maxAlignmentEnd = registeredAlignments.rbegin()->first;
     for (long int i = currentPosition+1; i < maxAlignmentEnd; ++i) {
+        DEBUG("getting partial observations of haplotype @" << i);
         deque<RegisteredAlignment>& ras = registeredAlignments[i];
         for (deque<RegisteredAlignment>::iterator r = ras.begin(); r != ras.end(); ++r) {
             RegisteredAlignment& ra = *r;
@@ -3587,6 +3492,7 @@ bool AlleleParser::getNextAlleles(Samples& samples, int allowedAlleleTypes) {
         if (!toNextPosition()) {
             return false;
         } else {
+            // triggers cleanup
             if (justSwitchedTargets) {
                 nextPosition = 0;
                 justSwitchedTargets = false;
@@ -3928,22 +3834,24 @@ vector<Allele> AlleleParser::genotypeAlleles(
 
     // this needs to be fixed in a big way
     // the alleles have to be put into the local haplotype structure
-    map<long int, vector<Allele> >::iterator v = inputVariantAlleles.find(currentPosition);
-    if (v != inputVariantAlleles.end()) {
-        vector<Allele>& inputalleles = v->second;
-        for (vector<Allele>::iterator a = inputalleles.begin(); a != inputalleles.end(); ++a) {
-            DEBUG("evaluating input allele " << *a);
-            Allele& allele = *a;
-            // check if the allele is already present
-            bool alreadyPresent = false;
-            for (vector<Allele>::iterator r = resultAlleles.begin(); r != resultAlleles.end(); ++r) {
-                if (r->equivalent(allele)) {
-                    alreadyPresent = true;
-                    break;
+    if (inputVariantAlleles.find(currentRefID) != inputVariantAlleles.end()) {
+        map<long int, vector<Allele> >::iterator v = inputVariantAlleles[currentRefID].find(currentPosition);
+        if (v != inputVariantAlleles[currentRefID].end()) {
+            vector<Allele>& inputalleles = v->second;
+            for (vector<Allele>::iterator a = inputalleles.begin(); a != inputalleles.end(); ++a) {
+                DEBUG("evaluating input allele " << *a);
+                Allele& allele = *a;
+                // check if the allele is already present
+                bool alreadyPresent = false;
+                for (vector<Allele>::iterator r = resultAlleles.begin(); r != resultAlleles.end(); ++r) {
+                    if (r->equivalent(allele)) {
+                        alreadyPresent = true;
+                        break;
+                    }
+                }
+                if (!alreadyPresent) {
+                    resultAlleles.push_back(allele);
                 }
-            }
-            if (!alreadyPresent) {
-                resultAlleles.push_back(allele);
             }
         }
     }
@@ -4053,14 +3961,14 @@ bool AlleleParser::isRepeatUnit(const string& seq, const string& unit) {
 
 }
 
-
 bool AlleleParser::hasInputVariantAllelesAtCurrentPosition(void) {
-    map<long int, vector<Allele> >::iterator v = inputVariantAlleles.find(currentPosition);
-    if (v != inputVariantAlleles.end()) {
-        return true;
-    } else {
-        return false;
+    if (inputVariantAlleles.find(currentRefID) != inputVariantAlleles.end()) {
+        map<long int, vector<Allele> >::iterator v = inputVariantAlleles[currentRefID].find(currentPosition);
+        if (v != inputVariantAlleles[currentRefID].end()) {
+            return true;
+        }
     }
+    return false;
 }
 
 bool operator<(const AllelicPrimitive& a, const AllelicPrimitive& b) {
diff --git a/src/AlleleParser.h b/src/AlleleParser.h
index de65014..0eed6be 100644
--- a/src/AlleleParser.h
+++ b/src/AlleleParser.h
@@ -196,12 +196,19 @@ public:
 
     vector<Allele*> registeredAlleles;
     map<long unsigned int, deque<RegisteredAlignment> > registeredAlignments;
-    map<long int, vector<Allele> > inputVariantAlleles; // all variants present in the input VCF, as 'genotype' alleles
+    map<int, map<long int, vector<Allele> > > inputVariantAlleles; // all variants present in the input VCF, as 'genotype' alleles
+    pair<int, long int> nextInputVariantPosition(void);
+    void getInputVariantsInRegion(string& seq, long start = 0, long end = 0);
+    void getAllInputVariants(void);
     //  position         sample     genotype  likelihood
-    map<long int, map<string, map<string, long double> > > inputGenotypeLikelihoods; // drawn from input VCF
-    map<long int, map<Allele, int> > inputAlleleCounts; // drawn from input VCF
+    map<string, map<long int, map<string, map<string, long double> > > > inputGenotypeLikelihoods; // drawn from input VCF
+    map<string, map<long int, map<Allele, int> > > inputAlleleCounts; // drawn from input VCF
     Sample* nullSample;
 
+    bool loadNextPositionWithAlignmentOrInputVariant(BamAlignment& currentAlignment);
+    bool loadNextPositionWithInputVariant(void);
+    bool hasMoreInputVariants(void);
+
     void addCurrentGenotypeLikelihoods(map<int, vector<Genotype> >& genotypesByPloidy,
             vector<vector<SampleDataLikelihood> >& sampleDataLikelihoods);
 
@@ -218,8 +225,6 @@ public:
     vector<string> bamHeaderLines;
  
     void openBams(void);
-    void openTraceFile(void);
-    void openFailedFile(void);
     void openOutputFile(void);
     void getSampleNames(void);
     void getPopulations(void);
@@ -230,12 +235,8 @@ public:
     vector<int> currentPloidies(Samples& samples);
     void loadBamReferenceSequenceNames(void);
     void loadFastaReference(void);
-    void loadReferenceSequence(BedTarget*, int, int);
     void loadReferenceSequence(BamAlignment& alignment);
-    void preserveReferenceSequenceWindow(int bp);
-    void extendReferenceSequence(int);
-    void extendReferenceSequence(BamAlignment& alignment);
-    void eraseReferenceSequence(int leftErasure);
+    void loadReferenceSequence(string& seqname);
     string referenceSubstr(long int position, unsigned int length);
     void loadTargets(void);
     bool getFirstAlignment(void);
@@ -320,7 +321,7 @@ public:
     string currentReferenceHaplotype();
 
     // output files
-    ofstream logFile, outputFile, traceFile, failedFile;
+    ofstream logFile, outputFile;
     ostream* output;
 
     // utility
diff --git a/src/DataLikelihood.cpp b/src/DataLikelihood.cpp
index 8dcb962..1779352 100644
--- a/src/DataLikelihood.cpp
+++ b/src/DataLikelihood.cpp
@@ -16,13 +16,15 @@ probObservedAllelesGivenGenotype(
         map<string, double>& freqs
     ) {
 
+    //cerr << "P(" << genotype << " given" << endl <<  sample;
+
     int observationCount = sample.observationCount();
     vector<long double> alleleProbs = genotype.alleleProbabilities(observationBias);
     vector<int> observationCounts = genotype.alleleObservationCounts(sample);
     int countOut = 0;
     double countIn = 0;
     long double prodQout = 0;  // the probability that the reads not in the genotype are all wrong
-    long double probObsGivenGt = 0;
+    long double prodSample = 0;
     
     if (standardGLs) {
         for (Sample::iterator s = sample.begin(); s != sample.end(); ++s) {
@@ -43,10 +45,6 @@ probObservedAllelesGivenGenotype(
             }
         }
     } else {
-        // problem.
-        // we should do this over all alleles
-        // which have partial support or full support.
-        // this is only over
         vector<Allele*> emptyA;
         vector<Allele*> emptyB;
         for (set<string>::iterator c = sample.supportedAlleles.begin();
@@ -73,12 +71,14 @@ probObservedAllelesGivenGenotype(
                     }
                 }
                 Allele& obs = **a;
+                //cerr << "observation: " << obs << endl;
                 long double probi = 0;
                 ContaminationEstimate& contamination = contaminations.of(obs.readGroupID);
                 double scale = 1;
                 // note that this will underflow if we have mapping quality = 0
                 // we guard against this externally, by ignoring such alignments (quality has to be > MQL0)
-                long double qual = (1 - exp(obs.lnquality)) * (1 - exp(obs.lnmapQuality));
+                long double qual = (1.0 - exp(obs.lnquality)) * (1.0 - exp(obs.lnmapQuality));
+                
                 if (onPartials) {
                     map<Allele*, set<Allele*> >::iterator r = sample.reversePartials.find(*a);
                     if (r != sample.reversePartials.end()) {
@@ -86,11 +86,12 @@ probObservedAllelesGivenGenotype(
                             cerr << "partial " << *a << " has empty reverse" << endl;
                             exit(1);
                         }
-                        //cerr << "partial " << *a << " supports potentially " << sample.reversePartials[*a].size() << " alleles : ";
+                        //cerr << "partial " << *a << " supports potentially " << sample.reversePartials[*a].size() << " alleles : " << endl;
                         //for (set<Allele*>::iterator m = sample.reversePartials[*a].begin();
-                        //     m != sample.reversePartials[*a].end(); ++m) cerr << **m << " ";
+                        //m != sample.reversePartials[*a].end(); ++m) cerr << **m << " ";
                         //cerr << endl;
                         scale = (double)1/(double)sample.reversePartials[*a].size();
+                        qual *= scale;
                     }
                 }
 
@@ -98,57 +99,44 @@ probObservedAllelesGivenGenotype(
                 // how does this work?
                 // each partial obs is recorded as supporting, but with observation probability scaled by the number of possible haplotypes it supports
                 bool isInGenotype = false;
+                long double asampl = genotype.alleleSamplingProb(obs);
 
+                // for each of the unique genotype alleles
                 for (vector<Allele>::iterator b = genotypeAlleles.begin(); b != genotypeAlleles.end(); ++b) {
                     Allele& allele = *b;
-                    const string& base = b->currentBase;
-
-                    long double q;
-                    if (obs.currentBase == base
-                        || (onPartials && sample.observationSupports(*a, &*b))) {
+                    const string& base = allele.currentBase;
+                    if (genotype.containsAllele(base)
+                        && (obs.currentBase == base
+                            || (onPartials && sample.observationSupports(*a, &*b)))) {
                         isInGenotype = true;
-                        q = qual;
-                    } else {
-                        q = 1 - qual;
-                    }
-
-                    if (onPartials) {
-                        q *= scale; // distribute partial support evenly across supported haplotypes
+                        // use the matched allele to estimate the asampl
+                        asampl = max(asampl, (long double)genotype.alleleSamplingProb(allele));
                     }
+                }
 
-                    double asampl = genotype.alleleSamplingProb(base);
-                    //double freq = freqs[base];
-
-                    if (asampl == 0) {
-                        // scale by frequency of (this) possibly contaminating allele
-                        asampl = contamination.probRefGivenHomAlt;
-                    } else if (asampl == 1) {
-                        // scale by frequency of (other) possibly contaminating alleles
-                        asampl = 1 - contamination.probRefGivenHomAlt;
+                if (asampl == 0) {
+                    // scale by frequency of (this) possibly contaminating allele
+                    asampl = contamination.probRefGivenHomAlt;
+                } else if (asampl == 1) {
+                    // scale by frequency of (other) possibly contaminating alleles
+                    asampl = 1 - contamination.probRefGivenHomAlt;
+                } else { //if (genotype.ploidy == 2) {
+                    // to deal with polyploids
+                    // note that this reduces to 1 for diploid heterozygotes
+                    // this term captures reference bias
+                    if (obs.isReference()) {
+                        asampl *= (contamination.probRefGivenHet / 0.5);
                     } else {
-                        // to deal with polyploids
-                        // note that this reduces to 1 for diploid heterozygotes
-                        double scale = asampl / 0.5;
-                        // this term captures reference bias
-                        if (allele.isReference()) {
-                            asampl = scale * contamination.probRefGivenHet;
-                        } else {
-                            asampl = 1 - (scale * contamination.probRefGivenHet);
-                        }
+                        asampl *= ((1 - contamination.probRefGivenHet) / 0.5);
                     }
-
-                    probi += asampl * q;
-
                 }
 
-                if (isInGenotype) {
-                    countIn += scale;
-                }
-
-                // bound to (0,1]
-                if (probi > 0) {
-                    long double lnprobi = log(min(probi, (long double) 1.0));
-                    probObsGivenGt += lnprobi;
+                // distribute observation support across haplotypes
+                if (!isInGenotype) {
+                    prodQout += log(1-qual);
+                    countOut += scale;
+                } else {
+                    prodSample += log(asampl*scale);
                 }
             }
         }
@@ -164,14 +152,15 @@ probObservedAllelesGivenGenotype(
         if (sum(observationCounts) == 0) {
             return prodQout;
         } else {
+            //cerr << "P(obs|" << genotype << ") = " << prodQout + multinomialSamplingProbLn(alleleProbs, observationCounts) << endl << endl << string(80, '@') << endl << endl;
             return prodQout + multinomialSamplingProbLn(alleleProbs, observationCounts);
+            //return prodQout + samplingProbLn(alleleProbs, observationCounts);
         }
     } else {
-        // read dependence factor, but inverted to deal with the new GL implementation
-        if (countIn > 1) {
-            probObsGivenGt *= (1 + (countIn - 1) * dependenceFactor) / countIn;
+        if (countOut > 1) {
+            prodQout *= (1 + (countOut - 1) * dependenceFactor) / countOut;
         }
-        //cerr << "P(obs|" << genotype << ") = " << probObsGivenGt << endl;
+        long double probObsGivenGt = prodQout + prodSample;
         return isinf(probObsGivenGt) ? 0 : probObsGivenGt;
     }
 
@@ -192,6 +181,7 @@ probObservedAllelesGivenGenotypes(
     ) {
     vector<pair<Genotype*, long double> > results;
     for (vector<Genotype*>::iterator g = genotypes.begin(); g != genotypes.end(); ++g) {
+        Genotype& genotype = **g;
         results.push_back(
 	    make_pair(*g,
                   probObservedAllelesGivenGenotype(
@@ -207,3 +197,103 @@ probObservedAllelesGivenGenotypes(
     }
     return results;
 }
+
+void
+calculateSampleDataLikelihoods(
+    Samples& samples,
+    Results& results,
+    AlleleParser* parser,
+    map<int, vector<Genotype> >& genotypesByPloidy,
+    Parameters& parameters,
+    bool usingNull,
+    Bias& observationBias,
+    vector<Allele>& genotypeAlleles,
+    Contamination& contaminationEstimates,
+    map<string, double>& estimatedAlleleFrequencies,
+    map<string, vector<vector<SampleDataLikelihood> > >& sampleDataLikelihoodsByPopulation,
+    map<string, vector<vector<SampleDataLikelihood> > >& variantSampleDataLikelihoodsByPopulation,
+    map<string, vector<vector<SampleDataLikelihood> > >& invariantSampleDataLikelihoodsByPopulation) {
+
+    for (vector<string>::iterator n = parser->sampleList.begin(); n != parser->sampleList.end(); ++n) {
+        //string sampleName = s->first;
+        string& sampleName = *n;
+        //DEBUG2("sample: " << sampleName);
+        //Sample& sample = s->second;
+        if (samples.find(sampleName) == samples.end()
+            && !(parser->hasInputVariantAllelesAtCurrentPosition()
+                 || parameters.reportMonomorphic)) {
+            continue;
+        }
+        Sample& sample = samples[sampleName];
+        vector<Genotype>& genotypes = genotypesByPloidy[parser->currentSamplePloidy(sampleName)];
+        vector<Genotype*> genotypesWithObs;
+        for (vector<Genotype>::iterator g = genotypes.begin(); g != genotypes.end(); ++g) {
+            if (parameters.excludePartiallyObservedGenotypes) {
+                if (g->sampleHasSupportingObservationsForAllAlleles(sample)) {
+                    genotypesWithObs.push_back(&*g);
+                }
+            } else if (parameters.excludeUnobservedGenotypes && usingNull) {
+                if (g->sampleHasSupportingObservations(sample)) {
+                    //cerr << sampleName << " has suppporting obs for " << *g << endl;
+                    genotypesWithObs.push_back(&*g);
+                } else if (g->hasNullAllele() && g->homozygous) {
+                    // this genotype will never be added if we are running in observed-only mode, but
+                    // we still need it for consistency
+                    genotypesWithObs.push_back(&*g);
+                }
+            } else {
+                genotypesWithObs.push_back(&*g);
+            }
+        }
+
+        // skip this sample if we have no observations supporting any of the genotypes we are going to evaluate
+        if (genotypesWithObs.empty()) {
+            continue;
+        }
+
+        vector<pair<Genotype*, long double> > probs
+            = probObservedAllelesGivenGenotypes(sample, genotypesWithObs,
+                                                parameters.RDF, parameters.useMappingQuality,
+                                                observationBias, parameters.standardGLs,
+                                                genotypeAlleles,
+                                                contaminationEstimates,
+                                                estimatedAlleleFrequencies);
+            
+#ifdef VERBOSE_DEBUG
+        if (parameters.debug2) {
+            for (vector<pair<Genotype*, long double> >::iterator p = probs.begin(); p != probs.end(); ++p) {
+                cerr << parser->currentSequenceName << "," << (long unsigned int) parser->currentPosition + 1 << ","
+                     << sampleName << ",likelihood," << *(p->first) << "," << p->second << endl;
+            }
+        }
+#endif
+
+        Result& sampleData = results[sampleName];
+        sampleData.name = sampleName;
+        sampleData.observations = &sample;
+        for (vector<pair<Genotype*, long double> >::iterator p = probs.begin(); p != probs.end(); ++p) {
+            sampleData.push_back(SampleDataLikelihood(sampleName, &sample, p->first, p->second, 0));
+        }
+
+        sortSampleDataLikelihoods(sampleData);
+
+        string& population = parser->samplePopulation[sampleName];
+        vector<vector<SampleDataLikelihood> >& sampleDataLikelihoods = sampleDataLikelihoodsByPopulation[population];
+        vector<vector<SampleDataLikelihood> >& variantSampleDataLikelihoods = variantSampleDataLikelihoodsByPopulation[population];
+        vector<vector<SampleDataLikelihood> >& invariantSampleDataLikelihoods = invariantSampleDataLikelihoodsByPopulation[population];
+
+        if (parameters.genotypeVariantThreshold != 0) {
+            if (sampleData.size() > 1
+                && abs(sampleData.at(1).prob - sampleData.front().prob)
+                < parameters.genotypeVariantThreshold) {
+                variantSampleDataLikelihoods.push_back(sampleData);
+            } else {
+                invariantSampleDataLikelihoods.push_back(sampleData);
+            }
+        } else {
+            variantSampleDataLikelihoods.push_back(sampleData);
+        }
+        sampleDataLikelihoods.push_back(sampleData);
+
+    }
+}
diff --git a/src/DataLikelihood.h b/src/DataLikelihood.h
index 7351904..07aaaf4 100644
--- a/src/DataLikelihood.h
+++ b/src/DataLikelihood.h
@@ -17,6 +17,8 @@
 #include "Dirichlet.h"
 #include "Bias.h"
 #include "Contamination.h"
+#include "AlleleParser.h"
+#include "ResultData.h"
 
 using namespace std;
 
@@ -44,4 +46,20 @@ probObservedAllelesGivenGenotypes(
         Contamination& contaminations,
         map<string, double>& freqs);
 
+void
+calculateSampleDataLikelihoods(
+    Samples& samples,
+    Results& results,
+    AlleleParser* parser,
+    map<int, vector<Genotype> >& genotypesByPloidy,
+    Parameters& parameters,
+    bool usingNull,
+    Bias& observationBias,
+    vector<Allele>& genotypeAlleles,
+    Contamination& contaminationEstimates,
+    map<string, double>& estimatedAlleleFrequencies,
+    map<string, vector<vector<SampleDataLikelihood> > >& sampleDataLikelihoodsByPopulation,
+    map<string, vector<vector<SampleDataLikelihood> > >& variantSampleDataLikelihoodsByPopulation,
+    map<string, vector<vector<SampleDataLikelihood> > >& invariantSampleDataLikelihoodsByPopulation);
+
 #endif
diff --git a/src/Genotype.cpp b/src/Genotype.cpp
index 5a1b1a2..779a03d 100644
--- a/src/Genotype.cpp
+++ b/src/Genotype.cpp
@@ -3,10 +3,11 @@
 #include "multipermute.h"
 
 
-vector<Allele> Genotype::uniqueAlleles(void) {
-    vector<Allele> uniques;
-    for (Genotype::iterator g = this->begin(); g != this->end(); ++g)
-        uniques.push_back(g->allele);
+vector<Allele*> Genotype::uniqueAlleles(void) {
+    vector<Allele*> uniques;
+    for (Genotype::iterator g = this->begin(); g != this->end(); ++g) {
+        uniques.push_back(&g->allele);
+    }
     return uniques;
 }
 
@@ -64,6 +65,15 @@ int Genotype::alleleCount(Allele& allele) {
     }
 }
 
+// returns true when the genotype is composed of a subset of the alleles
+bool Genotype::matchesAlleles(vector<Allele>& alleles) {
+    int p = 0;
+    for (vector<Allele>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
+        p += alleleCount(*a);
+    }
+    return ploidy == p;
+}
+
 double Genotype::alleleSamplingProb(const string& base) {
     map<string, int>::iterator ge = alleleCounts.find(base);
     if (ge == alleleCounts.end()) {
diff --git a/src/Genotype.h b/src/Genotype.h
index ab30ed6..98cdb6b 100644
--- a/src/Genotype.h
+++ b/src/Genotype.h
@@ -71,12 +71,14 @@ public:
 
     }
 
-    vector<Allele> uniqueAlleles(void);
+    vector<Allele*> uniqueAlleles(void);
     int getPloidy(void);
     int alleleCount(const string& base);
     int alleleCount(Allele& allele);
     bool containsAllele(Allele& allele);
     bool containsAllele(const string& base);
+    // returns true when the genotype is composed of a subset of the alleles
+    bool matchesAlleles(vector<Allele>& alleles);
     vector<Allele> alternateAlleles(string& refbase);
     vector<string> alternateBases(string& refbase);
     vector<int> counts(void);
diff --git a/src/IndelAllele.cpp b/src/IndelAllele.cpp
index 237699b..20e6888 100644
--- a/src/IndelAllele.cpp
+++ b/src/IndelAllele.cpp
@@ -23,7 +23,8 @@ bool FBhomopolymer(string sequence) {
 
 ostream& operator<<(ostream& out, const FBIndelAllele& indel) {
     string t = indel.insertion ? "i" : "d";
-    out << t <<  ":" << indel.position << ":" << indel.readPosition << ":" << indel.sequence;
+    out << t <<  ":" << indel.position << ":" << indel.readPosition
+        << ":" << indel.sequence << ":" << (indel.splice?"splice":"");
     return out;
 }
 
@@ -31,7 +32,8 @@ bool operator==(const FBIndelAllele& a, const FBIndelAllele& b) {
     return (a.insertion == b.insertion
             && a.length == b.length
             && a.position == b.position
-            && a.sequence == b.sequence);
+            && a.sequence == b.sequence
+            && a.splice == b.splice);
 }
 
 bool operator!=(const FBIndelAllele& a, const FBIndelAllele& b) {
diff --git a/src/IndelAllele.h b/src/IndelAllele.h
index 0e8d5fd..55c59e6 100644
--- a/src/IndelAllele.h
+++ b/src/IndelAllele.h
@@ -18,11 +18,12 @@ public:
     int position;
     int readPosition;
     string sequence;
+    bool splice;
 
     bool homopolymer(void);
 
-    FBIndelAllele(bool i, int l, int p, int rp, string s)
-        : insertion(i), length(l), position(p), readPosition(rp), sequence(s)
+    FBIndelAllele(bool i, int l, int p, int rp, string s, bool n)
+    : insertion(i), length(l), position(p), readPosition(rp), sequence(s), splice(n)
     { }
 };
 
diff --git a/src/LeftAlign.cpp b/src/LeftAlign.cpp
index 4673a55..78b7046 100644
--- a/src/LeftAlign.cpp
+++ b/src/LeftAlign.cpp
@@ -48,12 +48,17 @@ bool leftAlign(BamAlignment& alignment, string& referenceSequence, bool debug) {
             sp += l;
             rp += l;
         } else if (t == 'D') { // deletion
-            indels.push_back(FBIndelAllele(false, l, sp, rp, referenceSequence.substr(sp, l)));
+            indels.push_back(FBIndelAllele(false, l, sp, rp, referenceSequence.substr(sp, l), false));
+            alignmentAlignedBases.insert(rp + aabOffset, string(l, '-'));
+            aabOffset += l;
+            sp += l;  // update reference sequence position
+        } else if (t == 'N') {
+            indels.push_back(FBIndelAllele(false, l, sp, rp, referenceSequence.substr(sp, l), true));
             alignmentAlignedBases.insert(rp + aabOffset, string(l, '-'));
             aabOffset += l;
             sp += l;  // update reference sequence position
         } else if (t == 'I') { // insertion
-            indels.push_back(FBIndelAllele(true, l, sp, rp, alignment.QueryBases.substr(rp, l)));
+            indels.push_back(FBIndelAllele(true, l, sp, rp, alignment.QueryBases.substr(rp, l), false));
             alignedReferenceSequence.insert(sp + softBegin.size() + arsOffset, string(l, '-'));
             arsOffset += l;
             rp += l;
@@ -68,8 +73,8 @@ bool leftAlign(BamAlignment& alignment, string& referenceSequence, bool debug) {
             }
             rp += l;
         } else if (t == 'H') { // hard clip on the read, clipped sequence is not present in the read
-        } else if (t == 'N') { // skipped region in the reference not present in read, aka splice
-            sp += l;
+            //} else if (t == 'N') { // skipped region in the reference not present in read, aka splice
+            //sp += l;
         }
     }
 
@@ -117,6 +122,7 @@ bool leftAlign(BamAlignment& alignment, string& referenceSequence, bool debug) {
             }
 #endif
             while (steppos >= 0 && readsteppos >= 0
+                   && !indel.splice
                    && indel.sequence == referenceSequence.substr(steppos, indel.length)
                    && indel.sequence == alignment.QueryBases.substr(readsteppos, indel.length)
                    && (id == indels.begin()
@@ -178,7 +184,8 @@ bool leftAlign(BamAlignment& alignment, string& referenceSequence, bool debug) {
             // if so, do it
             int prev_end_ref = previous->insertion ? previous->position : previous->position + previous->length;
             int prev_end_read = !previous->insertion ? previous->readPosition : previous->readPosition + previous->length;
-            if (previous->insertion == indel.insertion
+            if (!previous->splice && !indel.splice &&
+                previous->insertion == indel.insertion
                     && ((previous->insertion
                         && (previous->position < indel.position
                         && previous->readPosition + previous->readPosition < indel.readPosition))
@@ -240,9 +247,13 @@ bool leftAlign(BamAlignment& alignment, string& referenceSequence, bool debug) {
     FBIndelAllele last = *id++;
     if (last.position > 0) {
         newCigar.push_back(CigarOp('M', last.position));
-        newCigar.push_back(CigarOp((last.insertion ? 'I' : 'D'), last.length));
+    }
+    if (last.insertion) {
+        newCigar.push_back(CigarOp('I', last.length));
+    } else if (last.splice) {
+        newCigar.push_back(CigarOp('N', last.length));
     } else {
-        newCigar.push_back(CigarOp((last.insertion ? 'I' : 'D'), last.length));
+        newCigar.push_back(CigarOp('D', last.length));
     }
     int lastend = last.insertion ? last.position : (last.position + last.length);
     LEFTALIGN_DEBUG(last << ",");
@@ -259,7 +270,14 @@ bool leftAlign(BamAlignment& alignment, string& referenceSequence, bool debug) {
             op.Length += indel.length;
         } else if (indel.position >= lastend) {  // also catches differential indels, but with the same position
             newCigar.push_back(CigarOp('M', indel.position - lastend));
-            newCigar.push_back(CigarOp((indel.insertion ? 'I' : 'D'), indel.length));
+            if (indel.insertion) {
+                newCigar.push_back(CigarOp('I', indel.length));
+            } else if (indel.splice) {
+                newCigar.push_back(CigarOp('N', indel.length));
+            } else { // deletion
+                newCigar.push_back(CigarOp('D', indel.length));
+            }
+
         }
         last = *id;
         lastend = last.insertion ? last.position : (last.position + last.length);
diff --git a/src/Makefile b/src/Makefile
index bd64279..86adbb2 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -9,7 +9,7 @@ CXX=g++
 C=gcc
 
 # Compiler flags
-CFLAGS=-O3 -D_FILE_OFFSET_BITS=64
+CFLAGS=-O3 -D_FILE_OFFSET_BITS=64 -g
 #CFLAGS=-O3 -static -D VERBOSE_DEBUG  # enables verbose debugging via --debug2
 
 BAMTOOLS_ROOT=../bamtools
@@ -61,6 +61,7 @@ OBJECTS=BedReader.o \
 		IndelAllele.o \
 		Bias.o \
 		Contamination.o \
+		NonCall.o \
 		SegfaultHandler.o \
 		../vcflib/tabixpp/tabix.o \
 		../vcflib/tabixpp/bgzf.o \
@@ -151,6 +152,9 @@ ResultData.o: ResultData.cpp ResultData.h Result.h Result.cpp Allele.h Utility.h
 Result.o: Result.cpp Result.h
 	$(CXX) $(CFLAGS) $(INCLUDE) -c Result.cpp
 
+NonCall.o: NonCall.cpp NonCall.h
+	$(CXX) $(CFLAGS) $(INCLUDE) -c NonCall.cpp
+
 BedReader.o: BedReader.cpp BedReader.h
 	$(CXX) $(CFLAGS) $(INCLUDE) -c BedReader.cpp
 
diff --git a/src/Multinomial.cpp b/src/Multinomial.cpp
index 3273e37..7453afb 100644
--- a/src/Multinomial.cpp
+++ b/src/Multinomial.cpp
@@ -37,3 +37,13 @@ long double multinomialCoefficientLn(int n, const vector<int>& counts) {
     transform(counts.begin(), counts.end(), count_factorials.begin(), factorialln);
     return factorialln(n) - sum(count_factorials);
 }
+
+long double samplingProbLn(const vector<long double>& probs, const vector<int>& obs) {
+    vector<long double>::const_iterator p = probs.begin();
+    vector<int>::const_iterator o = obs.begin();
+    long double r = 0;
+    for (; p != probs.end() && o != obs.end(); ++p, ++o) {
+        r += powln(log(*p), *o);
+    }
+    return r;
+}
diff --git a/src/Multinomial.h b/src/Multinomial.h
index eff9102..e2ab0bd 100644
--- a/src/Multinomial.h
+++ b/src/Multinomial.h
@@ -8,4 +8,6 @@ long double multinomialSamplingProb(const vector<long double>& probs, const vect
 long double multinomialSamplingProbLn(const vector<long double>& probs, const vector<int>& obs);
 long double multinomialCoefficientLn(int n, const vector<int>& counts);
 
+long double samplingProbLn(const vector<long double>& probs, const vector<int>& obs);
+
 #endif
diff --git a/src/NonCall.cpp b/src/NonCall.cpp
new file mode 100644
index 0000000..e9a7fe4
--- /dev/null
+++ b/src/NonCall.cpp
@@ -0,0 +1,86 @@
+#include "NonCall.h"
+
+NonCall NonCalls::aggregateAll(void) {
+    NonCall aggregate;
+    bool first = true;
+    for (NonCalls::const_iterator nc = this->begin(); nc != this->end(); ++nc) {
+        for (map<long, map<string, NonCall> >::const_iterator p = nc->second.begin();
+             p != nc->second.end(); ++p) {
+            for (map<string, NonCall>::const_iterator s = p->second.begin();
+                 s != p->second.end(); ++s) {
+                const NonCall& nonCall = s->second;
+                aggregate.refCount += nonCall.refCount;
+                aggregate.altCount += nonCall.altCount;
+                aggregate.reflnQ += nonCall.reflnQ;
+                aggregate.altlnQ += nonCall.altlnQ;
+                if (first) {
+                    aggregate.minDepth = nonCall.refCount + nonCall.altCount;
+                    first = false;
+                } else {
+                    aggregate.minDepth = min(aggregate.minDepth, nonCall.refCount + nonCall.altCount);
+                }
+            }
+        }
+    }
+    return aggregate;
+}
+
+void NonCalls::aggregatePerSample(map<string, NonCall>& perSample) {
+    set<string> seen;
+    for (NonCalls::const_iterator nc = this->begin(); nc != this->end(); ++nc) {
+        for (map<long, map<string, NonCall> >::const_iterator p = nc->second.begin();
+             p != nc->second.end(); ++p) {
+            for (map<string, NonCall>::const_iterator s = p->second.begin();
+                 s != p->second.end(); ++s) {
+                const string& name = s->first;
+                const NonCall& nonCall = s->second;
+                NonCall& aggregate = perSample[name];
+                aggregate.refCount += nonCall.refCount;
+                aggregate.altCount += nonCall.altCount;
+                aggregate.reflnQ += nonCall.reflnQ;
+                aggregate.altlnQ += nonCall.altlnQ;
+                if (!seen.count(name)) {
+                    aggregate.minDepth = nonCall.refCount + nonCall.altCount;
+                    seen.insert(name);
+                } else {
+                    aggregate.minDepth = min(aggregate.minDepth, nonCall.refCount + nonCall.altCount);
+                }
+            }
+        }
+    }
+}
+
+void NonCalls::record(const string& seqName, long pos, const Samples& samples) {
+    map<string, NonCall>& site = (*this)[seqName][pos];
+    for (Samples::const_iterator s = samples.begin(); s != samples.end(); ++s) {
+        // tally ref and non-ref alleles
+        const string& name = s->first;
+        const Sample& sample = s->second;
+        NonCall& noncall = site[name];
+        for (Sample::const_iterator a = sample.begin(); a != sample.end(); ++a) {
+            const vector<Allele*>& alleles = a->second;
+            for (vector<Allele*>::const_iterator o = alleles.begin(); o != alleles.end(); ++o) {
+                Allele& allele = **o;
+                if (allele.isReference()) {
+                    ++noncall.refCount;
+                    noncall.reflnQ += allele.lnquality;
+                } else {
+                    ++noncall.altCount;
+                    noncall.altlnQ += allele.lnquality;
+                }
+            }
+        }
+    }
+}
+
+pair<string, long> NonCalls::firstPos(void) {
+    const string& startChrom = begin()->first;
+    long startPos = begin()->second.begin()->first;
+    return make_pair(startChrom, startPos);
+}
+
+pair<string, long> NonCalls::lastPos(void) {
+    const string& endChrom = rbegin()->first;
+    long endPos = rbegin()->second.rbegin()->first;
+    return make_pair(endChrom, endPos);
+}
diff --git a/src/NonCall.h b/src/NonCall.h
new file mode 100644
index 0000000..c1715a1
--- /dev/null
+++ b/src/NonCall.h
@@ -0,0 +1,48 @@
+#ifndef __NONCALL_H
+#define __NONCALL_H
+
+#include <string>
+#include <vector>
+#include <set>
+#include <map>
+#include <utility>
+#include "Utility.h"
+#include "Allele.h"
+#include "Sample.h"
+
+using namespace std;
+
+class NonCall {
+public:
+    NonCall(void)
+        : refCount(0)
+        , reflnQ(0)
+        , altCount(0)
+        , altlnQ(0)
+        , minDepth(0)
+
+    { }
+    NonCall(int rc, long double rq, int ac, long double aq, int mdp)
+        : refCount(rc)
+        , reflnQ(rq)
+        , altCount(ac)
+        , altlnQ(aq)
+        , minDepth(mdp)
+    { }
+    int refCount;
+    int altCount;
+    int minDepth;
+    long double reflnQ;
+    long double altlnQ;
+};
+
+class NonCalls : public map<string, map<long, map<string, NonCall> > > {
+public:
+    void record(const string& seqName, long pos, const Samples& samples);
+    NonCall aggregateAll(void);
+    void aggregatePerSample(map<string, NonCall>& perSite);
+    pair<string, long> firstPos(void);
+    pair<string, long> lastPos(void);
+};
+
+#endif
diff --git a/src/Parameters.cpp b/src/Parameters.cpp
index 06c283b..3dfbf51 100644
--- a/src/Parameters.cpp
+++ b/src/Parameters.cpp
@@ -66,6 +66,9 @@ void Parameters::usage(char** argv) {
         << "    # call variants assuming a diploid sample" << endl
         << "    freebayes -f ref.fa aln.bam >var.vcf" << endl
         << endl
+        << "    # call variants assuming a diploid sample, providing gVCF output" << endl
+        << "    freebayes -f ref.fa --gvcf aln.bam >var.gvcf" << endl
+        << endl
         << "    # require at least 5 supporting observations to consider a variant" << endl
         << "    freebayes -f ref.fa -C 5 aln.bam >var.vcf" << endl
         << endl
@@ -95,13 +98,12 @@ void Parameters::usage(char** argv) {
         << "   -h --help       Prints this help dialog." << endl
         << "   --version       Prints the release number and the git commit id." << endl
         << endl
-        << "input and output:" << endl
+        << "input:" << endl
         << endl
         << "   -b --bam FILE   Add FILE to the set of BAM files to be analyzed." << endl
         << "   -L --bam-list FILE" << endl
         << "                   A file containing a list of BAM files to be analyzed." << endl
         << "   -c --stdin      Read BAM input on stdin." << endl
-        << "   -v --vcf FILE   Output VCF-format results to FILE." << endl
         << "   -f --fasta-reference FILE" << endl
         << "                   Use FILE as the reference sequence for analysis." << endl
         << "                   An index file (FILE.fai) will be created if none exists." << endl
@@ -127,10 +129,14 @@ void Parameters::usage(char** argv) {
         << "                      reference sequence, start, end, sample name, copy number" << endl
         << "                   ... for each region in each sample which does not have the" << endl
         << "                   default copy number as set by --ploidy." << endl
-        << "   --trace FILE    Output an algorithmic trace to FILE." << endl
-        << "   --failed-alleles FILE" << endl
-        << "                   Write a BED file of the analyzed positions which do not" << endl
-        << "                   pass --pvar to FILE." << endl
+        << endl
+        << "output:" << endl
+        << endl
+        << "   -v --vcf FILE   Output VCF-format results to FILE. (default: stdout)" << endl
+        << "   --gvcf" << endl
+        << "                   Write gVCF output, which indicates coverage in uncalled regions." << endl
+        << "   --gvcf-chunk NUM" << endl
+        << "                   When writing gVCF output emit a record for every NUM bases." << endl
         << "   -@ --variant-input VCF" << endl
         << "                   Use variants reported in VCF file as input to the algorithm." << endl
         << "                   Variants in this file will included in the output even if" << endl
@@ -152,9 +158,6 @@ void Parameters::usage(char** argv) {
         << "                   Report even loci which appear to be monomorphic, and report all" << endl
         << "                   considered alleles, even those which are not in called genotypes." << endl
         << "                   Loci which do not have any potential alternates have '.' for ALT." << endl
-        << endl
-        << "reporting:" << endl
-        << endl
         << "   -P --pvar N     Report sites if the probability that there is a polymorphism" << endl
         << "                   at the site is greater than N.  default: 0.0.  Note that post-" << endl
         << "                   filtering is generally recommended over the use of this parameter." << endl
@@ -267,8 +270,10 @@ void Parameters::usage(char** argv) {
         << "                   Require at least this count of observations supporting" << endl
         << "                   an alternate allele within the total population in order" << endl
         << "                   to use the allele in analysis.  default: 1" << endl
-        << "   -! --min-coverage N" << endl
-        << "                   Require at least this coverage to process a site.  default: 0" << endl
+        << "   --min-coverage N" << endl
+        << "                   Require at least this coverage to process a site. default: 0" << endl
+        << "   --max-coverage N" << endl
+        << "                   Do not process sites with greater than this coverage. default: no limit" << endl
         << endl
         << "population priors:" << endl
         << endl
@@ -298,13 +303,9 @@ void Parameters::usage(char** argv) {
         << "                   where the efficiency is 1 if there is no relative observation bias." << endl
         << "   --base-quality-cap Q" << endl
         << "                   Limit estimated observation quality by capping base quality at Q." << endl
-        << "   --experimental-gls" << endl
-        << "                   Generate genotype likelihoods using 'effective base depth' metric" << endl
-        << "                   qual = 1-BaseQual * 1-MapQual.  Incorporate partial observations." << endl
-        << "                   This is the default when contamination estimates are provided." << endl
-        << "                   Optimized for diploid samples." << endl
         << "   --prob-contamination F" << endl
         << "                   An estimate of contamination to use for all samples.  default: 10e-9" << endl
+        << "   --legacy-gls    Use legacy (polybayes equivalent) genotype likelihood calculations" << endl
         << "   --contamination-estimates FILE" << endl
         << "                   A file containing per-sample estimates of contamination, such as" << endl
         << "                   those generated by VerifyBamID.  The format should be:" << endl
@@ -381,13 +382,11 @@ Parameters::Parameters(int argc, char** argv) {
     cnvFile = "";
     output = "vcf";               // -v --vcf
     outputFile = "";
-    traceFile = "";
-    failedFile = "";
+    gVCFout = false;
+    gVCFchunk = 0;
     alleleObservationBiasFile = "";
 
     // operation parameters
-    outputAlleles = false;          //
-    trace = false;                  // -L --trace
     useDuplicateReads = false;      // -E --use-duplicate-reads
     suppressOutput = false;         // -N --suppress-output
     useBestNAlleles = 0;         // -n --use-best-n-alleles
@@ -427,7 +426,7 @@ Parameters::Parameters(int argc, char** argv) {
     reportMonomorphic = false;
     boundIndels = true; // ignore indels at ends of reads
     onlyUseInputAlleles = false;
-    standardGLs = true;
+    standardGLs = false; // use experimental gls by default // XXX
     MQR = 100;                     // -M --reference-mapping-quality
     BQR = 60;                     // -B --reference-base-quality
     ploidy = 2;                  // -p --ploidy
@@ -457,6 +456,7 @@ Parameters::Parameters(int argc, char** argv) {
     probContamination = 10e-9;
     //minAltQSumTotal = 0;
     minCoverage = 0;
+    maxCoverage = 0;
     debuglevel = 0;
     debug = false;
     debug2 = false;
@@ -479,8 +479,8 @@ Parameters::Parameters(int argc, char** argv) {
             {"populations", required_argument, 0, '2'},
             {"cnv-map", required_argument, 0, 'A'},
             {"vcf", required_argument, 0, 'v'},
-            {"trace", required_argument, 0, '&'},
-            {"failed-alleles", required_argument, 0, '8'},
+            {"gvcf", no_argument, 0, '8'},
+            {"gvcf-chunk", required_argument, 0, '&'},
             {"use-duplicate-reads", no_argument, 0, '4'},
             {"no-partial-observations", no_argument, 0, '['},
             {"use-best-n-alleles", required_argument, 0, 'n'},
@@ -527,6 +527,7 @@ Parameters::Parameters(int argc, char** argv) {
             //{"min-alternate-mean-mapq", required_argument, 0, 'k'},
             {"min-alternate-qsum", required_argument, 0, '3'},
             {"min-coverage", required_argument, 0, '!'},
+            {"max-coverage", required_argument, 0, '+'},
             {"genotype-qualities", no_argument, 0, '='},
             {"variant-input", required_argument, 0, '@'},
             {"only-use-input-alleles", no_argument, 0, 'l'},
@@ -539,8 +540,8 @@ Parameters::Parameters(int argc, char** argv) {
             {"haplotype-basis-alleles", required_argument, 0, '9'},
             {"report-genotype-likelihood-max", no_argument, 0, '5'},
             {"report-all-haplotype-alleles", no_argument, 0, '6'},
-            {"experimental-gls", no_argument, 0, ')'},
             {"base-quality-cap", required_argument, 0, '('},
+            {"legacy-gls", no_argument, 0, ')'},
             {"prob-contamination", required_argument, 0, '_'},
             {"contamination-estimates", required_argument, 0, ','},
             {"report-monomorphic", no_argument, 0, '6'},
@@ -552,7 +553,7 @@ Parameters::Parameters(int argc, char** argv) {
     while (true) {
 
         int option_index = 0;
-        c = getopt_long(argc, argv, "hcO4ZKjH[0diN5a)Ik=wl6#uVXJY:b:G:M:x:@:A:f:t:r:s:v:n:B:p:m:q:R:Q:U:$:e:T:P:D:^:S:W:F:C:&:L:8:z:1:3:E:7:2:9:%:(:_:,:",
+        c = getopt_long(argc, argv, "hcO4ZKjH[0diN5a)Ik=wl6#uVXJY:b:G:M:x:@:A:f:t:r:s:v:n:B:p:m:q:R:Q:U:$:e:T:P:D:^:S:W:F:C:&:L:8z:1:3:E:7:2:9:%:_:,:(:!:+:",
                         long_options, &option_index);
 
         if (c == -1) // end of options
@@ -618,20 +619,18 @@ Parameters::Parameters(int argc, char** argv) {
             leftAlignIndels = false;
             break;
 
-            // -L --trace
-        case '&':
-            traceFile = optarg;
-            trace = true;
-            break;
-
             // --bam-list
         case 'L':
             addLinesFromFile(bams, string(optarg));
             break;
 
-            // -8 --failed-alleles
+            // -8 --gvcf
         case '8':
-            failedFile = optarg;
+            gVCFout = true;
+            break;
+
+        case '&':
+            gVCFchunk = atoi(optarg);
             break;
 
             // -4 --use-duplicate-reads
@@ -663,6 +662,14 @@ Parameters::Parameters(int argc, char** argv) {
             }
             break;
 
+            // -+ --max-coverage
+        case '+':
+            if (!convert(optarg, maxCoverage)) {
+                cerr << "could not parse max-coverage" << endl;
+                exit(1);
+            }
+            break;
+
             // -n --use-best-n-alleles
         case 'n':
             if (!convert(optarg, useBestNAlleles)) {
@@ -812,7 +819,7 @@ Parameters::Parameters(int argc, char** argv) {
             }
             break;
 
-	    case '5':
+        case '5':
             reportGenotypeLikelihoodMax = true;
             break;
 
@@ -904,7 +911,7 @@ Parameters::Parameters(int argc, char** argv) {
             break;
 
             // -% --observation-bias
-	    case '%':
+        case '%':
             alleleObservationBiasFile = optarg;
             break;
 
@@ -970,7 +977,7 @@ Parameters::Parameters(int argc, char** argv) {
             variantPriorsFile = optarg;
             break;
 
-	    case '9':
+        case '9':
             haplotypeVariantFile = optarg;
             break;
 
@@ -978,7 +985,7 @@ Parameters::Parameters(int argc, char** argv) {
             onlyUseInputAlleles = true;
             break;
 
-	    case '6':
+        case '6':
         {
             string arg(argv[optind - 1]);
             if (arg == "--report-monomorphic") {
@@ -997,16 +1004,14 @@ Parameters::Parameters(int argc, char** argv) {
                 cerr << "could not parse prob-contamination" << endl;
                 exit(1);
             }
-            standardGLs = false;
             break;
 
         case ',':
             contaminationEstimateFile = optarg;
-            standardGLs = false;
             break;
 
         case ')':
-            standardGLs = false;
+            standardGLs = true;
             break;
 
         case '(':
@@ -1021,12 +1026,12 @@ Parameters::Parameters(int argc, char** argv) {
             ++debuglevel;
             break;
 
-	case '#':
-	    
-	    // --version
+    case '#':
+        
+        // --version
             cout << "version:  " << VERSION_GIT << endl;
-	    exit(0);
-	    break;
+        exit(0);
+        break;
 
         case 'h':
             usage(argv);
@@ -1087,4 +1092,14 @@ Parameters::Parameters(int argc, char** argv) {
         exit(1);
     }
 
+    // check that there aren't duplicates in the bams list
+    for( int i=1; i<bams.size(); ++i ){
+        for( int j=0; j<i; ++j ){
+            if( bams[i] == bams[j] ){
+                cerr << "Error: Duplicate bam file '" << bams[i] << "'" << endl;
+                exit(1);
+            }
+        }
+    }
+
 }
diff --git a/src/Parameters.h b/src/Parameters.h
index 4502bac..80d20ba 100644
--- a/src/Parameters.h
+++ b/src/Parameters.h
@@ -35,8 +35,8 @@ public:
     //string log;
     string output;               // -v --vcf
     string outputFile;
-    string traceFile;
-    string failedFile;    // -l --failed-alleles
+    bool gVCFout;    // -l --gvcf
+    int gVCFchunk;
     string variantPriorsFile;
     string haplotypeVariantFile;
     bool reportAllHaplotypeAlleles;
@@ -50,8 +50,6 @@ public:
     string contaminationEstimateFile;
 
     // operation parameters
-    bool outputAlleles;          //  unused...
-    bool trace;                  // -L --trace
     bool useDuplicateReads;      // -E --use-duplicate-reads
     bool suppressOutput;         // -S --suppress-output
     int useBestNAlleles;         // -n --use-best-n-alleles
@@ -121,6 +119,7 @@ public:
     int minAltCount;             // -C --min-alternate-count
     int minAltTotal;             // -G --min-alternate-total
     int minCoverage;             // -! --min-coverage
+    int maxCoverage;             // -+ --max-coverage
     int debuglevel;              // -d --debug increments
     bool debug; // set if debuglevel >=1
     bool debug2; // set if debuglevel >=2
diff --git a/src/ResultData.cpp b/src/ResultData.cpp
index de85c7b..b92dfac 100644
--- a/src/ResultData.cpp
+++ b/src/ResultData.cpp
@@ -628,3 +628,71 @@ vcf::Variant& Results::vcf(
     return var;
 
 }
+
+
+vcf::Variant& Results::gvcf(
+    vcf::Variant& var,
+    NonCalls& nonCalls,
+    AlleleParser* parser) {
+
+    // what is the first position in the nonCalls?
+    pair<string, long> start = nonCalls.firstPos();
+    const string& startChrom = start.first;
+    long startPos = start.second;
+    // startPos and endPos are zero-based, half-open -- [startPos,endPos)
+
+    // what is the current position? nb: can't be on a different chrom
+    long endPos;
+    if (startChrom != parser->currentSequenceName) {
+        endPos = parser->reference.sequenceLength(startChrom);
+    } else {
+        endPos = parser->currentPosition;
+    }
+    long numSites = endPos - startPos;
+    assert(numSites > 0);
+
+    // set up site call
+    var.ref = parser->currentReferenceBaseString();
+    var.alt.push_back("<*>");
+    var.sequenceName = parser->currentSequenceName;
+    var.position = startPos + 1; // output text field is one-based
+    var.id = ".";
+    var.filter = ".";
+    // TODO change to actual quality
+    var.quality = 0;
+    // set up format string
+    var.format.clear();
+    var.format.push_back("GQ");
+    var.format.push_back("DP");
+    var.format.push_back("MIN");
+    var.format.push_back("QR");
+    var.format.push_back("QA");
+    
+    NonCall total = nonCalls.aggregateAll();
+    var.info["DP"].push_back(convert((total.refCount+total.altCount) / numSites));
+    var.info["MIN"].push_back(convert(total.minDepth));
+    // The text END field is one-based, inclusive. We proudly conflate this
+    // with our zero-based, exclusive endPos.
+    var.info["END"].push_back(convert(endPos));
+
+    // genotype quality is 1- p(polymorphic)
+    
+    map<string, NonCall> perSample;
+    nonCalls.aggregatePerSample(perSample);
+
+    // iterate through the samples and aggregate information about them
+    for (vector<string>::const_iterator s = parser->sampleList.begin();
+         s != parser->sampleList.end(); ++s) {
+        const string& sampleName = *s;
+        const NonCall& nc = perSample[sampleName];
+        map<string, vector<string> >& sampleOutput = var.samples[sampleName];
+        long double qual = nc.reflnQ - nc.altlnQ;
+        sampleOutput["GQ"].push_back(convert(ln2phred(qual)));
+        sampleOutput["DP"].push_back(convert((nc.refCount+nc.altCount) / numSites));
+        sampleOutput["MIN"].push_back(convert(nc.minDepth));
+        sampleOutput["QR"].push_back(convert(ln2phred(nc.reflnQ)));
+        sampleOutput["QA"].push_back(convert(ln2phred(nc.altlnQ)));
+    }
+
+    return var;
+}
diff --git a/src/ResultData.h b/src/ResultData.h
index d067ee1..ef02cae 100644
--- a/src/ResultData.h
+++ b/src/ResultData.h
@@ -11,6 +11,7 @@
 #include "Variant.h"
 #include "version_git.h"
 #include "Result.h"
+#include "NonCall.h"
 
 using namespace std;
 
@@ -49,7 +50,7 @@ public:
         string refbase,
         vector<Allele>& altAlleles,
         map<string, int> repeats,
-	int genotypingIterations,
+        int genotypingIterations,
         vector<string>& sampleNames,
         int coverage,
         GenotypeCombo& genotypeCombo,
@@ -59,6 +60,11 @@ public:
         map<int, vector<Genotype> >& genotypesByPloidy,
         vector<string>& sequencingTechnologies,
         AlleleParser* parser);
+
+    vcf::Variant& gvcf(
+        vcf::Variant& var,
+        NonCalls& noncalls,
+        AlleleParser* parser);
 };
 
 
diff --git a/src/Sample.cpp b/src/Sample.cpp
index 811cb0c..06207cd 100644
--- a/src/Sample.cpp
+++ b/src/Sample.cpp
@@ -346,7 +346,7 @@ int countAlleles(Samples& samples) {
 
 ostream& operator<<(ostream& out, Sample& sample) {
     for (Sample::iterator s = sample.begin(); s != sample.end(); ++s) {
-        out << s->first << ":" << s->second.size() << " ";
+        out << s->first << " #" << s->second.size() << endl << s->second << endl;
     }
     return out;
 }
diff --git a/src/Sample.h b/src/Sample.h
index 2449156..e74d1eb 100644
--- a/src/Sample.h
+++ b/src/Sample.h
@@ -124,8 +124,6 @@ public:
     void setSupportedAlleles(void);
 };
 
-
-
 int countAlleles(Samples& samples);
 // using this one...
 void groupAlleles(Samples& samples, map<string, vector<Allele*> >& alleleGroups);
diff --git a/src/freebayes.cpp b/src/freebayes.cpp
index 83ca759..be412da 100644
--- a/src/freebayes.cpp
+++ b/src/freebayes.cpp
@@ -16,6 +16,7 @@
 #include <cmath>
 #include <time.h>
 #include <float.h>
+#include <stdlib.h>
 
 // private libraries
 #include "api/BamReader.h"
@@ -38,6 +39,7 @@
 
 #include "Bias.h"
 #include "Contamination.h"
+#include "NonCall.h"
 
 
 // local helper debugging macros to improve code readability
@@ -59,6 +61,13 @@
 
 using namespace std; 
 
+// todo
+// generalize the main function to take the parameters as input
+// so that we can invoke the entire algorithm on different regions
+// when requested to run in parallel
+// take the targets (or whole genome) and make small jobs
+// run the main function for each region in an omp parallel for loop
+// only do this if the --parallel flag is set > 1
 
 // freebayes main
 int main (int argc, char *argv[]) {
@@ -71,6 +80,7 @@ int main (int argc, char *argv[]) {
     list<Allele*> alleles;
 
     Samples samples;
+    NonCalls nonCalls;
 
     ostream& out = *(parser->output);
 
@@ -110,6 +120,10 @@ int main (int argc, char *argv[]) {
     if (parameters.output == "vcf") {
         out << parser->variantCallFile.header << endl;
     }
+            
+    if (0 < parameters.maxCoverage) {
+        srand(13);
+    }
 
     Allele nullAllele = genotypeAllele(ALLELE_NULL, "N", 1, "1N");
 
@@ -122,6 +136,19 @@ int main (int argc, char *argv[]) {
 
         DEBUG2("at start of main loop");
 
+        // did we switch chromosomes or exceed our gVCF chunk size?
+        // if so, we may need to output a gVCF record
+        Results results;
+        if (parameters.gVCFout && !nonCalls.empty() &&
+            ( nonCalls.begin()->first != parser->currentSequenceName
+              || (parameters.gVCFchunk &&
+                  nonCalls.lastPos().second - nonCalls.firstPos().second
+                  > parameters.gVCFchunk))) {
+            vcf::Variant var(parser->variantCallFile);
+            out << results.gvcf(var, nonCalls, parser) << endl;
+            nonCalls.clear();
+        }
+
         // don't process non-ATGC's in the reference
         string cb = parser->currentReferenceBaseString();
         if (cb != "A" && cb != "T" && cb != "C" && cb != "G") {
@@ -129,43 +156,80 @@ int main (int argc, char *argv[]) {
             continue;
         }
 
-        if (parameters.trace) {
-            for (Samples::iterator s = samples.begin(); s != samples.end(); ++s) {
-                const string& name = s->first;
-                for (Sample::iterator g = s->second.begin(); g != s->second.end(); ++g) {
-                    vector<Allele*>& group = g->second;
-                    for (vector<Allele*>::iterator a = group.begin(); a != group.end(); ++a) {
-                        Allele& allele = **a;
-                        parser->traceFile << parser->currentSequenceName << "," << (long unsigned int) parser->currentPosition + 1  
-                                          << ",allele," << name << "," << allele.readID << "," << allele.base() << ","
-                                          << allele.currentQuality() << "," << allele.mapQuality << endl;
-                    }
-                }
-            }
-            DEBUG2("after trace generation");
-        }
-
-        if (!parser->inTarget()) {
-            DEBUG("position: " << parser->currentSequenceName << ":" << (long unsigned int) parser->currentPosition + 1
-                  << " is not inside any targets, skipping");
-            continue;
-        }
-
         int coverage = countAlleles(samples);
 
         DEBUG("position: " << parser->currentSequenceName << ":" << (long unsigned int) parser->currentPosition + 1 << " coverage: " << coverage);
 
+        bool skip = false;
         if (!parser->hasInputVariantAllelesAtCurrentPosition()) {
             // skips 0-coverage regions
             if (coverage == 0) {
                 DEBUG("no alleles left at this site after filtering");
-                continue;
+                skip = true;
             } else if (coverage < parameters.minCoverage) {
                 DEBUG("post-filtering coverage of " << coverage << " is less than --min-coverage of " << parameters.minCoverage);
-                continue;
+                skip = true;
             } else if (parameters.onlyUseInputAlleles) {
                 DEBUG("no input alleles, but using only input alleles for analysis, skipping position");
-                continue;
+                skip = true;
+            } else if (0 < parameters.maxCoverage) {
+                // go through each sample
+                for (Samples::iterator s = samples.begin(); s != samples.end(); ++s) {
+                    string sampleName = s->first;
+                    Sample& sample = s->second;
+                    // get the coverage for this sample 
+                    int sampleCoverage = 0;
+                    for (Sample::iterator sg = sample.begin(); sg != sample.end(); ++sg) {
+                        sampleCoverage += sg->second.size();
+                    }
+                    if (sampleCoverage <= parameters.maxCoverage) {
+                        skip = true;
+                        continue;
+                    }
+
+                    DEBUG("coverage " << sampleCoverage << " for sample " << sampleName << " was > " << parameters.maxCoverage << ", so we will remove " << (sampleCoverage - parameters.maxCoverage) << " genotypes");
+                    vector<string> genotypesToErase;
+                    do {
+                        double probRemove = (sampleCoverage - parameters.maxCoverage) / (double)sampleCoverage;
+                        vector<string> genotypesToErase;
+                        // iterate through the genotypes
+                        for (Sample::iterator sg = sample.begin(); sg != sample.end(); ++sg) {
+                            vector<Allele*> allelesToKeep;
+                            // iterate through each allele
+                            for (int alleleIndex = 0; alleleIndex < sg->second.size(); alleleIndex++) {
+                                // only if we have more alleles to remove
+                                if (parameters.maxCoverage < sampleCoverage) {
+                                    double r = rand() / (double)RAND_MAX;
+                                    if (r < probRemove) { // skip over this allele
+                                        sampleCoverage--;
+                                        continue;
+                                    }
+                                }
+                                // keep it
+                                allelesToKeep.push_back(sg->second[alleleIndex]);
+                            }
+                            // re-assign the alleles to this genotype
+                            if (allelesToKeep.size() < sg->second.size()) {
+                                sg->second.assign(allelesToKeep.begin(), allelesToKeep.end());
+                            }
+                            // if no more alleles for this genotype, remove it later
+                            if (sg->second.empty()) {
+                                genotypesToErase.push_back(sg->first);
+                            }
+                        }
+                        // remove empty genotypes
+                        for (vector<string>::iterator gt = genotypesToErase.begin(); gt != genotypesToErase.end(); ++gt) {
+                            sample.erase(*gt);
+                        }
+                    } while (parameters.maxCoverage < sampleCoverage);
+                    sampleCoverage = 0;
+                    for (Sample::iterator sg = sample.begin(); sg != sample.end(); ++sg) {
+                        sampleCoverage += sg->second.size();
+                    }
+                    DEBUG("coverage for sample " << sampleName << " is now " << sampleCoverage);
+                }
+                // update coverage
+                coverage = countAlleles(samples);
             }
 
             DEBUG2("coverage " << parser->currentSequenceName << ":" << parser->currentPosition << " == " << coverage);
@@ -175,7 +239,7 @@ int main (int argc, char *argv[]) {
             if (!parameters.reportMonomorphic
                 && !sufficientAlternateObservations(samples, parameters.minAltCount, parameters.minAltFraction)) {
                 DEBUG("insufficient alternate observations");
-                continue;
+                skip = true;
             }
             if (parameters.reportMonomorphic) {
                 DEBUG("calling at site even though there are no alternate observations");
@@ -183,13 +247,22 @@ int main (int argc, char *argv[]) {
         } else {
             /*
             cerr << "has input variants at " << parser->currentSequenceName << ":" << parser->currentPosition << endl;
-            vector<Allele>& inputs = parser->inputVariantAlleles[parser->currentPosition];
+            vector<Allele>& inputs = parser->inputVariantAlleles[parser->currentSequenceName][parser->currentPosition];
             for (vector<Allele>::iterator a = inputs.begin(); a != inputs.end(); ++a) {
                 cerr << *a << endl;
             }
             */
         }
 
+        if (skip) {
+            // record data for gVCF
+            if (parameters.gVCFout) {
+                nonCalls.record(parser->currentSequenceName, parser->currentPosition, samples);
+            }
+            // and step ahead
+            continue;
+        }
+
         // to ensure proper ordering of output stream
         vector<string> sampleListPlusRef;
 
@@ -294,7 +367,6 @@ int main (int argc, char *argv[]) {
         //cerr << "estimated minor count " << estimatedMinorAllelesAtLocus << endl;
         
 
-        Results results;
         map<string, vector<vector<SampleDataLikelihood> > > sampleDataLikelihoodsByPopulation;
         map<string, vector<vector<SampleDataLikelihood> > > variantSampleDataLikelihoodsByPopulation;
         map<string, vector<vector<SampleDataLikelihood> > > invariantSampleDataLikelihoodsByPopulation;
@@ -303,127 +375,28 @@ int main (int argc, char *argv[]) {
         int inputLikelihoodCount = 0;
 
         DEBUG2("calculating data likelihoods");
-        // calculate data likelihoods
-        //for (Samples::iterator s = samples.begin(); s != samples.end(); ++s) {
-        for (vector<string>::iterator n = parser->sampleList.begin(); n != parser->sampleList.end(); ++n) {
-
-            //string sampleName = s->first;
-            string& sampleName = *n;
-            //DEBUG2("sample: " << sampleName);
-            //Sample& sample = s->second;
-            if (samples.find(sampleName) == samples.end()
-                && !(parser->hasInputVariantAllelesAtCurrentPosition()
-                     || parameters.reportMonomorphic)) {
-                continue;
-            }
-            Sample& sample = samples[sampleName];
-            vector<Genotype>& genotypes = genotypesByPloidy[parser->currentSamplePloidy(sampleName)];
-            vector<Genotype*> genotypesWithObs;
-            for (vector<Genotype>::iterator g = genotypes.begin(); g != genotypes.end(); ++g) {
-                if (parameters.excludePartiallyObservedGenotypes) {
-                    if (g->sampleHasSupportingObservationsForAllAlleles(sample)) {
-                        genotypesWithObs.push_back(&*g);
-                    }
-                } else if (parameters.excludeUnobservedGenotypes && usingNull) {
-                    if (g->sampleHasSupportingObservations(sample)) {
-                        //cerr << sampleName << " has suppporting obs for " << *g << endl;
-                        genotypesWithObs.push_back(&*g);
-                    } else if (g->hasNullAllele() && g->homozygous) {
-                        // this genotype will never be added if we are running in observed-only mode, but
-                        // we still need it for consistency
-                        genotypesWithObs.push_back(&*g);
-                    }
-                } else {
-                    genotypesWithObs.push_back(&*g);
-                }
-            }
-
-            // skip this sample if we have no observations supporting any of the genotypes we are going to evaluate
-            if (genotypesWithObs.empty()) {
-                continue;
-            }
-
-            vector<pair<Genotype*, long double> > probs
-                = probObservedAllelesGivenGenotypes(sample, genotypesWithObs,
-                                                    parameters.RDF, parameters.useMappingQuality,
-                                                    observationBias, parameters.standardGLs,
-                                                    genotypeAlleles,
-                                                    contaminationEstimates,
-                                                    estimatedAlleleFrequencies);
-            
-#ifdef VERBOSE_DEBUG
-            if (parameters.debug2) {
-                for (vector<pair<Genotype*, long double> >::iterator p = probs.begin(); p != probs.end(); ++p) {
-                    cerr << parser->currentSequenceName << "," << (long unsigned int) parser->currentPosition + 1 << ","
-                         << sampleName << ",likelihood," << *(p->first) << "," << p->second << endl;
-                }
-            }
-#endif
-
-            Result& sampleData = results[sampleName];
-            sampleData.name = sampleName;
-            sampleData.observations = &sample;
-            for (vector<pair<Genotype*, long double> >::iterator p = probs.begin(); p != probs.end(); ++p) {
-                sampleData.push_back(SampleDataLikelihood(sampleName, &sample, p->first, p->second, 0));
-            }
-
-            sortSampleDataLikelihoods(sampleData);
-
-            string& population = parser->samplePopulation[sampleName];
-            vector<vector<SampleDataLikelihood> >& sampleDataLikelihoods = sampleDataLikelihoodsByPopulation[population];
-            vector<vector<SampleDataLikelihood> >& variantSampleDataLikelihoods = variantSampleDataLikelihoodsByPopulation[population];
-            vector<vector<SampleDataLikelihood> >& invariantSampleDataLikelihoods = invariantSampleDataLikelihoodsByPopulation[population];
-
-            if (parameters.genotypeVariantThreshold != 0) {
-                if (sampleData.size() > 1
-                    && abs(sampleData.at(1).prob - sampleData.front().prob)
-                    < parameters.genotypeVariantThreshold) {
-                    variantSampleDataLikelihoods.push_back(sampleData);
-                } else {
-                    invariantSampleDataLikelihoods.push_back(sampleData);
-                }
-            } else {
-                variantSampleDataLikelihoods.push_back(sampleData);
-            }
-            sampleDataLikelihoods.push_back(sampleData);
-
-            DEBUG2("obtaining genotype likelihoods input from VCF");
-            int prevcount = sampleDataLikelihoods.size();
-            parser->addCurrentGenotypeLikelihoods(genotypesByPloidy, sampleDataLikelihoods);
-            // add these sample data likelihoods to 'invariant' likelihoods
-            inputLikelihoodCount += sampleDataLikelihoods.size() - prevcount;
-            parser->addCurrentGenotypeLikelihoods(genotypesByPloidy, invariantSampleDataLikelihoods);
-
-        }
-
-        // if there are not any input GLs, attempt to use the input ACs
-        if (inputLikelihoodCount == 0) {
-            parser->getInputAlleleCounts(genotypeAlleles, inputAlleleCounts);
-        }
+        calculateSampleDataLikelihoods(
+            samples,
+            results,
+            parser,
+            genotypesByPloidy,
+            parameters,
+            usingNull,
+            observationBias,
+            genotypeAlleles,
+            contaminationEstimates,
+            estimatedAlleleFrequencies,
+            sampleDataLikelihoodsByPopulation,
+            variantSampleDataLikelihoodsByPopulation,
+            invariantSampleDataLikelihoodsByPopulation);
 
         DEBUG2("finished calculating data likelihoods");
 
 
-        // this section is a hack to make output of trace identical to BamBayes trace
-        // and also outputs the list of samples
-        vector<bool> samplesWithData;
-        if (parameters.trace) {
-            parser->traceFile << parser->currentSequenceName << "," << (long unsigned int) parser->currentPosition + 1 << ",samples,";
-            for (vector<string>::iterator s = sampleListPlusRef.begin(); s != sampleListPlusRef.end(); ++s) {
-                if (parameters.trace) parser->traceFile << *s << ":";
-                Results::iterator r = results.find(*s);
-                if (r != results.end()) {
-                    samplesWithData.push_back(true);
-                } else {
-                    samplesWithData.push_back(false);
-                }
-            }
-            parser->traceFile << endl;
-        }
-
         // if somehow we get here without any possible sample genotype likelihoods, bail out
         bool hasSampleLikelihoods = false;
-        for (map<string, vector<vector<SampleDataLikelihood> > >::iterator s = sampleDataLikelihoodsByPopulation.begin(); s != sampleDataLikelihoodsByPopulation.end(); ++s) {
+        for (map<string, vector<vector<SampleDataLikelihood> > >::iterator s = sampleDataLikelihoodsByPopulation.begin();
+             s != sampleDataLikelihoodsByPopulation.end(); ++s) {
             if (!s->second.empty()) {
                 hasSampleLikelihoods = true;
                 break;
@@ -457,50 +430,6 @@ int main (int argc, char *argv[]) {
         bool bestOverallComboIsHet = false;
         GenotypeCombo bestCombo; // = NULL;
 
-        // what a hack...
-        /*
-        if (parameters.trace) {
-            for (list<GenotypeCombo>::iterator gc = genotypeCombos.begin(); gc != genotypeCombos.end(); ++gc) {
-                vector<Genotype*> comboGenotypes;
-                for (GenotypeCombo::iterator g = gc->begin(); g != gc->end(); ++g)
-                    comboGenotypes.push_back((*g)->genotype);
-                long double posteriorProb = gc->posteriorProb;
-                long double dataLikelihoodln = gc->probObsGivenGenotypes;
-                long double priorln = gc->posteriorProb;
-                long double priorlnG_Af = gc->priorProbG_Af;
-                long double priorlnAf = gc->priorProbAf;
-                long double priorlnBin = gc->priorProbObservations;
-
-                parser->traceFile << parser->currentSequenceName << "," << (long unsigned int) parser->currentPosition + 1 << ",genotypecombo,";
-
-                int j = 0;
-                GenotypeCombo::iterator i = gc->begin();
-                for (vector<bool>::iterator d = samplesWithData.begin(); d != samplesWithData.end(); ++d) {
-                    if (*d) {
-                        parser->traceFile << IUPAC(*(*i)->genotype);
-                        ++i;
-                    } else {
-                        parser->traceFile << "?";
-                    }
-                }
-                // TODO cleanup this and above
-                parser->traceFile 
-                    << "," << dataLikelihoodln
-                    << "," << priorln
-                    << "," << priorlnG_Af
-                    << "," << priorlnAf
-                    << "," << priorlnBin
-                    << "," << posteriorProb
-                    << "," << safe_exp(posteriorProb - posteriorNormalizer)
-                    << endl;
-            }
-        }
-        */
-
-        // the second clause guards against float underflow causing us not to output a position
-        // practically, parameters.PVL == 0 means "report all genotypes which pass our input filters"
-
-
         GenotypeCombo bestGenotypeComboByMarginals;
         vector<vector<SampleDataLikelihood> > allSampleDataLikelihoods;
 
@@ -628,14 +557,6 @@ int main (int argc, char *argv[]) {
             }
         }
 
-        // report the maximum a posteriori estimate
-        // unless we're reporting the GL maximum
-        if (parameters.reportGenotypeLikelihoodMax) {
-            bestCombo = glMax;
-        }
-
-        DEBUG("best combo: " << bestCombo);
-
         // odds ratio between the first and second-best combinations
         if (genotypeCombos.size() > 1) {
             bestComboOddsRatio = genotypeCombos.front().posteriorProb - (++genotypeCombos.begin())->posteriorProb;
@@ -690,10 +611,58 @@ int main (int argc, char *argv[]) {
             }
         }
 
-        //if (alts.empty()) alts = genotypeAlleles;
+        // reporting the GL maximum *over all alleles*
+        if (parameters.reportGenotypeLikelihoodMax) {
+            bestCombo = glMax;
+        } else {
+            // the default behavior is to report the GL maximum genotyping over the alleles in the best posterior genotyping
+
+            // select the maximum-likelihood GL given the alternates we have
+            // this is not the same thing as the GL max over all alleles!
+            // it is the GL max over the selected alleles at this point
+
+            vector<Allele> alleles = alts;
+            for (vector<Allele>::iterator a = genotypeAlleles.begin(); a != genotypeAlleles.end(); ++a) {
+                if (a->isReference()) {
+                    alleles.push_back(*a);
+                }
+            }
+            map<string, list<GenotypeCombo> > glMaxComboBasedOnAltsByPop;
+            for (map<string, SampleDataLikelihoods>::iterator p = sampleDataLikelihoodsByPopulation.begin(); p != sampleDataLikelihoodsByPopulation.end(); ++p) {
+                const string& population = p->first;
+                SampleDataLikelihoods& sampleDataLikelihoods = p->second;
+                GenotypeCombo glMaxBasedOnAlts;
+                for (SampleDataLikelihoods::iterator v = sampleDataLikelihoods.begin(); v != sampleDataLikelihoods.end(); ++v) {
+                    SampleDataLikelihood* m = NULL;
+                    for (vector<SampleDataLikelihood>::iterator d = v->begin(); d != v->end(); ++d) {
+                        if (d->genotype->matchesAlleles(alleles)) {
+                            m = &*d;
+                            break;
+                        }
+                    }
+                    assert(m != NULL);
+                    glMaxBasedOnAlts.push_back(m);
+                }
+                glMaxComboBasedOnAltsByPop[population].push_back(glMaxBasedOnAlts);
+            }
+            list<GenotypeCombo> glMaxBasedOnAltsGenotypeCombos; // build new combos into this list
+            combinePopulationCombos(glMaxBasedOnAltsGenotypeCombos, glMaxComboBasedOnAltsByPop);
+            bestCombo = glMaxBasedOnAltsGenotypeCombos.front();
+        }
+
+        DEBUG("best combo: " << bestCombo);
+
+        // output
 
         if (!alts.empty() && (1 - pHom.ToDouble()) >= parameters.PVL || parameters.PVL == 0) {
 
+            // write the last gVCF record(s)
+            if (parameters.gVCFout && !nonCalls.empty()) {
+                vcf::Variant var(parser->variantCallFile);
+                out << results.gvcf(var, nonCalls, parser) << endl;
+                nonCalls.clear();
+            }
+
             vcf::Variant var(parser->variantCallFile);
 
             out << results.vcf(
@@ -716,24 +685,22 @@ int main (int argc, char *argv[]) {
                 parser)
                 << endl;
 
-        } else if (!parameters.failedFile.empty()) {
-            // get the unique alternate alleles in this combo, sorted by frequency in the combo
-            long unsigned int position = parser->currentPosition;
-            for (vector<Allele>::iterator ga =  genotypeAlleles.begin(); ga != genotypeAlleles.end(); ++ga) {
-                if (ga->type == ALLELE_REFERENCE)
-                    continue;
-                parser->failedFile
-                    << parser->currentSequenceName << "\t"
-                    << position << "\t"
-                    << position + ga->length << "\t"
-                    << *ga << endl;
-            }
-            // BED format
+        } else if (parameters.gVCFout) {
+            // record statistics for gVCF output
+            nonCalls.record(parser->currentSequenceName, parser->currentPosition, samples);
         }
         DEBUG2("finished position");
 
     }
 
+    // write the last gVCF record
+    if (parameters.gVCFout && !nonCalls.empty()) {
+        Results results;
+        vcf::Variant var(parser->variantCallFile);
+        out << results.gvcf(var, nonCalls, parser) << endl;
+        nonCalls.clear();
+    }
+
     DEBUG("total sites: " << total_sites << endl
           << "processed sites: " << processed_sites << endl
           << "ratio: " << (float) processed_sites / (float) total_sites);
diff --git a/src/version_release.txt b/src/version_release.txt
index 82f7124..875d109 100644
--- a/src/version_release.txt
+++ b/src/version_release.txt
@@ -1,5 +1,5 @@
-# This file was auto-generated by running "make setversion VERSION=v0.9.20"
-# on Fri Dec 12 16:34:49 EST 2014 .
+# This file was auto-generated by running "make setversion VERSION=v1.0.0"
+# on Sat Nov 14 16:39:28 CET 2015 .
 # Please do not edit or commit this file manually.
 #
-v0.9.20
+v1.0.0
diff --git a/test/Makefile b/test/Makefile
new file mode 100644
index 0000000..6285c24
--- /dev/null
+++ b/test/Makefile
@@ -0,0 +1,15 @@
+.PHONY: all clean
+
+freebayes=../bin/freebayes
+vcfuniq=../vcflib/bin/vcfuniq
+
+all: test
+
+test: $(freebayes) $(vcfuniq)
+	prove -v t
+
+$(freebayes):
+	cd .. && $(MAKE)
+
+$(vcfuniq):
+	cd ../vcflib && $(MAKE)
diff --git a/test/region-and-target-handling.t b/test/region-and-target-handling.t
new file mode 100644
index 0000000..b99b6ee
--- /dev/null
+++ b/test/region-and-target-handling.t
@@ -0,0 +1,104 @@
+#!/bin/bash
+# vi: set ft=sh :
+test=$(dirname $0)
+root=$(dirname $0)/..
+source $test/test-simple-bash/lib/test-simple.bash \
+    tests 8
+
+PATH=$root/bin:$PATH
+
+ref=$test/`basename $0`.ref
+alt=$test/`basename $0`.alt
+bam=$test/`basename $0`.bam
+bed=$test/`basename $0`.bed
+vcf=$test/`basename $0`.vcf
+
+trap 'rm -f $ref* $alt $bam* $bed $vcf' EXIT
+
+# 01234567	start
+# ATCGGCTA
+# 12345678	end
+
+cat >$ref <<REF
+>ref
+ATCGGCTA
+REF
+samtools faidx $ref
+
+cat >$alt <<ALT
+>alt
+GTTAGGTT
+ALT
+
+samtools view -b - >$bam <<SAM
+ at HD	VN:1.5	SO:coordinate
+ at SQ	SN:ref	LN:8
+alt	0	ref	1	30	1X1=2X1=1X1=1X	*	0	0	GTTAGGTT	*
+SAM
+samtools index $bam
+
+cat >$bed <<BED
+ref	0	1	first_base
+ref	2	4	third_and_fourth_base
+ref	5	6	sixth_base
+ref	7	8	eigth_base
+BED
+
+cat >$vcf <<VCF
+##fileformat=VCFv4.1
+##INFO=<ID=NAME,Number=0,Type=String,Description="Test name">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+ref	1	.	A	G	1234	PASS	NAME=first base
+ref	2	.	T	.	1234	PASS	NAME=second base
+ref	3	.	C	T	1234	PASS	NAME=third base
+ref	4	.	G	A	1234	PASS	NAME=fourth base
+ref	5	.	G	.	1234	PASS	NAME=fifth base
+ref	6	.	C	G	1234	PASS	NAME=sixth base
+ref	7	.	T	.	1234	PASS	NAME=seventh base
+ref	8	.	A	T	1234	PASS	NAME=eigth base
+VCF
+
+PS4='\n+ '
+
+function run_freebayes() {
+    ($root/bin/freebayes "$@" \
+        --haplotype-length 0 --min-alternate-count 1 \
+        --min-alternate-fraction 0 --pooled-continuous --report-monomorphic \
+        --ploidy 1 \
+        -f $ref $bam \
+        | grep -vE '^#' | cut -f1-5)
+}
+
+if [[ -n $TEST_DEBUG ]]; then
+    cat $ref >&2
+    cat $bed >&2
+    cat $vcf >&2
+    vcfannotate --bed $bed --key MATCH $vcf >&2
+    vcfintersect --bed $bed $vcf >&2
+    bedtools intersect -a $vcf -b $bed >&2
+fi
+
+[[ -z $(run_freebayes --region ref:4-5 --region ref:6-7) ]]; ok $? 'ref:4-5 ref:6-7 are empty'
+[[ -z $(run_freebayes --region ref:4   --region ref:6)   ]]; ok $? 'ref:4   ref:6   are empty'
+
+expected=`cat <<END
+ref	6	.	C	G
+ref	8	.	A	T
+END
+`
+[[ $(run_freebayes --region ref:5-6 --region ref:7-8) == $expected ]]; ok $? 'ref:5-6 ref:7-8'
+[[ $(run_freebayes --region ref:5   --region ref:7)   == $expected ]]; ok $? 'ref:5   ref:7'
+[[ $(run_freebayes --region ref:5-)                   == $expected ]]; ok $? 'ref:5-'
+
+expected=`cat <<END
+ref	1	.	A	G
+ref	3	.	CG	TA
+ref	6	.	C	G
+ref	8	.	A	T
+END
+`
+[[ $(run_freebayes --targets $bed) == $expected ]]; ok $? "--targets $bed"
+[[ $(run_freebayes --region ref)   == $expected ]]; ok $? "--region ref"
+
+[[ $(run_freebayes --region ref:1-20 2>&1) =~ "Target region coordinates (ref 1 19) outside of reference sequence bounds (ref 8)" ]]
+    ok $? 'region outside of bounds error'
diff --git a/test/splice/1:883884-887618.bam b/test/splice/1:883884-887618.bam
new file mode 100644
index 0000000..61bdcb9
Binary files /dev/null and b/test/splice/1:883884-887618.bam differ
diff --git a/test/splice/1:883884-887618.bam.bai b/test/splice/1:883884-887618.bam.bai
new file mode 100644
index 0000000..09649e4
Binary files /dev/null and b/test/splice/1:883884-887618.bam.bai differ
diff --git a/test/splice/1:883884-887618.fa b/test/splice/1:883884-887618.fa
new file mode 100644
index 0000000..6f31855
--- /dev/null
+++ b/test/splice/1:883884-887618.fa
@@ -0,0 +1,64 @@
+>1
+CCTGCAGGTTGACATTGGACAGCTTCAGGATCACGGAGAAGTTGATGGGCTTGGAGCTCA
+TGCGCCCTGGCTTCCTGTTGAAGTCGACCTGCTGGAACATCTGCCCCAAGGGCCGTGTCA
+GGCTCTCTCGGCCCCATGCCTGGTCACCCTGGCTTCACCCTGGCTGCACCCTGGTCCCCC
+TGGTCCCTTTGGCCCTGCACCTGGCTGCACCCTGGTCACCCTGGTCCTCAGGCTTCAGCA
+CGAGGAGACCCAACCCGAAACAAGAGATAAGCAGCTAGGCACGGTCCCAGCCCCATGGCC
+CCACACAGCCTGGCGTGTCCCCGAGTGGGGCTCTGATCAGCAGGGAAGGATCAGGAACAC
+GGGCCCTCTCCCAAAATCCTGGGAAGCACTAAGGACCCACGTCTGGAGACCTAGAGGTCC
+CTGCCACACAGAGTACCAGGACAGCCCCCATTCCTGACAACATGGCACTGAGGCCCATCC
+TGGCCCTCAGCACAGCAATGCCACATGGGGCTCCGTCCCGAGCAACCAGAAGACGCAGCA
+GCAACAGCAAAGAGTGCCAGACACAGGACGGGACAGATGGAGGTCACGGGAGGCCTGGGG
+GGCCCCTCCCACACCCTCCACAGAAGCCACAGGCCATGCACCTCCGTAGGACATGCCAGG
+AGGAGCAGAGAAAGAGTCAGCCTGGCCTCTCAGTCTTGTGACCCCTCCCCAACCACTAGG
+AGCCCTCAGGCTGTGAACCAGAGAGATCCAGGGTACATGCTGGGGCACCAGAGGACAGAA
+GCAGATTATGGGGGGCCCAAGGGTGGAGGCCCCAGCAAGACACCCCACTCAGCAGGGGTG
+GCTGCCAGACAGCTGAGCTCAGGTCCAGATGGGAAAACGCCCTGGTACTGATGAACCTCT
+GGGAGGACACTGCCCTTCTCTAGCCTGGGCCAAAAGTATGGGCAGGCGACTGGCTTCCTG
+TCCAGGCACCGGGGAAGGGCAGGCACGGCCTACAGCTGAGGGCTTTGTTTTAGCTTTTAG
+GGTTTGCTTTTTCCGGAGAGGAGAGGCCTGCATCCCTGCGTGAGAGGGGCCTACAGGGTG
+CGGCTGCAGTGAACCAGGGCAGTGCACAAGTGAAGAAACAGACACTGCAAGAGAGTAAGA
+AAGCACGTAGCCAAGGAGATTGGACAAGGGAAACAGACACAAAGCATGCCTGGAGCTGGG
+CAGCAGAGCTGGAGGCCCACAGACACAGGGGCCAGGCCAAGGGCTGGGCAATCAGAGACC
+ACTGCTGGCTGCCATGGGAGTGAACATGGCTTCCTAAAGATGTTCAGCATCCCTGCCTCA
+GTGGAAGGGGAATCAGAACGTGCCTGGGGCTTCCATACCTTCTGGACCCAGCAACCTGGG
+GACCCAGCCCCAAGGACAGCTGTGACACGACTCTGGGCCTTCATGAGGCTGAGAACTCAT
+TAGATAAACAGGGTGGAGATCGTGAATTCCTAAATTTACAGCGGGGTGGGGCCTAACCCA
+CCCACTGAAAGTTGAAACTTCGGACTCCAATCTTTCCCTACCTGAGTGGCATGTGCACGT
+GCAAAGATGCATACACACAGGTACACACAGGCGCACACATGCACAGGCACACACGTAGAC
+GCACGTACATGCACACAGGTGCACACACGCACAGGCACACACAAGCAGACGTGCATGCAT
+GCACCCAAGTGTACAGGTACACGCACAGGTACACACGCACAGTACACACATGCAGATGCA
+AATGCATGCACACAAGTGCACACAGACACGTGTGTATGCACATAGGTGCACACAGGTACA
+TAGATGCACGCAGACACACATGCATGCACACAGATGCACACATGCAAAGGTTCATGCATG
+CACAGGTAGACACATGCAGACGCAAATGCATGCACACAGGTGCACACACGCACAGGTACA
+CACATGCAGACAGGTGCACACAGGTACACACGCATGTGCATGCATACAGGTACACACAGG
+TACACACTGGTATACAAAGACACATGCACAGGTGCACACACGCAAAGGTACACGCAAAGG
+TACACACATGCATACACACAGGTGTACACACGCACAGATGCACACACCTGCAGGCACACA
+GGCATTCATGGATACACGTGCATACACACATACAGATGCACACATGAACTGATATGCACA
+CACACACAGATTTACAGACCCATGCACCCAGATGGTGCACACACACGCATGCATGCACAG
+ACAAACGCACCCACCCAGATGTACACACATCCCCCATACAAATGCATACACATGCACACA
+ACTGTCAGGGCAGGGCACTGTTCAGTGTACAGTTCAGCAATCAGACTTGGGTTTTGGCCC
+ATCTCGCCTCCTGCCTACTCCAGTCTTGGGCAAGCACCTACAGGGTCAGGTCCCGGCATC
+CTCAAGCACAGCCCTAGCAGCAGCCGCCTTCATCCCCAGTGGTCCCACGTTCCCCAACAC
+TGCCCCCAGCACACGGAGGAGCACTGTCCTCGTGACACCCGTGACAAGGAGGCGGCCAGG
+GCTCCGGGAAGTCGCCGGACAACTCAGCACAGACGCCCGGAGCAGCAGATGCTCCCTACA
+GGAGCAGGCAGAGGTGCCACACGCCCACCACAGCCTCACTCACCTCCAGGATGAAAGGCA
+GCACCGGGATGAAGGCCCCCGAGCTCCCCGAGAGCAGCGTCAGGGCACGGATGCAGTGCA
+TTCGCAGCGGGTAGAAGCGGGCAGTGGGGATGAGCCTGGGGGTGGGAAGGCCGAGTGAGC
+AGAGGCCCCGGCTCTGGCAGCCCCTGCCCCTCCCCCTGCTGTCTTGGACCTCCTCCCCCA
+CTCACCGACATCAGACCCCTAAAGCAGGACAAGGCACGTCTGAACCCCAGGGACCTGCAC
+CCCTTGCCCAGCTATGGGGCTGCAGGTACCTTACCCAGCTTTCAGCTTTATTTCAACACG
+GTGGCAGGGGTCATCTGTTCCTGCCTGCTCTCCTCCCTGTTCCCGCCTTTCCTACTTCTT
+CCCTTGGACTTCCCACCTCCCTTCCCAGCCTTGAGTTTCTAAGGCTCAGCCAGACAGATG
+CCCCCTCCTGGCCAAAGAGGCCACAGTACTGCTCCCACCTTTTCCTGCTGCAGCCCCTAC
+CGTGCCTGGACCCTCCTAAGCGCTCACACCCAAAATGCAGTTCACTGCATCAGATGTGCT
+CCACCCAAGAACCCCACGTAATGAACCAAACTCACTGCAAACTGCACACAAGCCCCCAGA
+ACACACGCTGGGTGGCACCCGAGGCGGCTAAACCGCTGTTGGAGGAGCTCATGTCACAGG
+TGCCCACCAGCCCTAGCTCTCCAGAATCCAGAGCATCTCCCCGTAGCAGGTACAGGGACC
+CCAGCCCTTCCTCAGGGACGGCCAGGACACAGACTCAGGCCCCAAGGCCCTGAACGCCAG
+AGGCACCGCCCACACAGTCCCAGCAAATTTGCTTCTCCTGACCCTCCCGCACAACCCTGC
+CCACCCCACAACTCACTTGATACAGCCAATGATGACTTGGGCAAGGGGGTAGACCAAGGG
+CTGGAGGGCTTCGCTGGGGCCCGCAGTGCTCAGGACCCGGCACCACAGGAAGAGGCAGTG
+CACATACTGCCAGTTGTACACAGACTGGTATGTTTCCTGGTCAGAGAGAACCACGTCAGC
+TACTGGCCAGGCTGACAAGTCAGGCTGATGCACGTTCCTCCTGGGCCGGCACAGGAATCA
+TTTCCTCATCTTGCA
diff --git a/test/splice/1:883884-887618.fa.fai b/test/splice/1:883884-887618.fa.fai
new file mode 100644
index 0000000..dc79067
--- /dev/null
+++ b/test/splice/1:883884-887618.fa.fai
@@ -0,0 +1 @@
+1	3735	3	60	61
diff --git a/test/t/01_call_variants.t b/test/t/01_call_variants.t
new file mode 100644
index 0000000..abc25f1
--- /dev/null
+++ b/test/t/01_call_variants.t
@@ -0,0 +1,110 @@
+#!/usr/bin/env bash
+
+BASH_TAP_ROOT=bash-tap
+. ./bash-tap/bash-tap-bootstrap
+
+PATH=../bin:$PATH # for freebayes
+PATH=../scripts:$PATH # for freebayes-parallel
+PATH=../vcflib/bin:$PATH # for vcf binaries used by freebayes-parallel
+
+plan tests 19
+
+is $(echo "$(comm -12 <(cat tiny/NA12878.chr22.tiny.giab.vcf | grep -v "^#" | cut -f 2 | sort) <(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam | grep -v "^#" | cut -f 2 | sort) | wc -l) >= 13" | bc) 1 "variant calling recovers most of the GiAB variants in a test region"
+
+by_region=$((for region in \
+    q:180-191 \
+    q:1002-1013 \
+    q:1811-1825 \
+    q:1911-1922 \
+    q:2344-2355 \
+    q:3257-3268 \
+    q:4443-4454 \
+    q:5003-5014 \
+    q:5074-5085 \
+    q:5089-5100 \
+    q:5632-5646 \
+    q:6412-6423 \
+    q:8840-8851 \
+    q:9245-9265 \
+    q:9785-9796 \
+    q:10526-10537 \
+    q:11255-11266 \
+    q:11530-11541 \
+    q:12119-12130;
+do
+    freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam -r $region | grep -v "^#"
+done) |wc -l)
+
+at_once=$(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l)
+
+is $by_region $at_once "freebayes produces the same number of calls if targeted per site or called without targets"
+
+cat >targets.bed <<EOF
+q	180	191
+q	1002	1013
+q	1811	1825
+q	1911	1922
+q	2344	2355
+q	3257	3268
+q	4443	4454
+q	5003	5014
+q	5074	5085
+q	5089	5100
+q	5632	5646
+q	6412	6423
+q	8840	8851
+q	9245	9265
+q	9785	9796
+q	10526	10537
+q	11255	11266
+q	11530	11541
+q	12119	12130
+EOF
+
+is $(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam -t targets.bed | grep -v "^#" | wc -l) $by_region "a targets bed file can be used with the same effect as running by region"
+#rm targets.bed
+
+
+is $(samtools view -u tiny/NA12878.chr22.tiny.bam | freebayes -f tiny/q.fa --stdin | grep -v "^#" | wc -l) \
+    $(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) "reading from stdin or not makes no difference"
+
+is $(samtools view tiny/NA12878.chr22.tiny.bam | wc -l) $(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam -d 2>&1 | grep ^alignment: | wc -l) "freebayes processes all alignments in input"
+
+# ensure targeting works even when there are no reads
+is $(freebayes -f tiny/q.fa -@ tiny/q.vcf.gz tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) 19 "freebayes correctly handles variant input"
+
+# ensure that positions at which no variants exist get put in the out vcf
+is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz tiny/NA12878.chr22.tiny.bam | grep -v "^#" | cut -f1,2 | grep -P "(\t500$|\t11000$|\t1000$)" | wc -l) 3 "freebayes puts required variants in output"
+
+is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz tiny/NA12878.chr22.tiny.bam -l | grep -v "^#" |  wc -l) 3 "freebayes limits calls to input variants correctly"
+
+is $(freebayes -f tiny/q.fa -@ tiny/q.vcf.gz -l tiny/1read.bam | grep -v "^#" | wc -l) 20 "freebayes reports all input variants even when there is no input data"
+
+# check variant input with region specified
+is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz -r q:1-10000 tiny/NA12878.chr22.tiny.bam | grep -v "^#" | cut -f1,2 | grep -P "(\t500$|\t11000$|\t1000$)" | wc -l) 2 "freebayes handles region and variant input"
+
+is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz -r q:1-10000 tiny/NA12878.chr22.tiny.bam -l | grep -v "^#" |  wc -l) 2 "freebayes limits to variant input correctly when region is given"
+
+# check variant input when reading from stdin
+is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz --stdin < tiny/NA12878.chr22.tiny.bam | grep -v "^#" | cut -f1,2 | grep -P "(\t500$|\t11000$|\t1000$)" | wc -l) 3 "freebayes handles variant input and reading from stdin"
+
+is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz -l --stdin < tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) 3 "freebayes limits to variant input when reading from stdin"
+
+is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz -r q:1-10000 -l --stdin < tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) 2 "freebayes handles region, stdin, and variant input"
+
+gzip -c tiny/q.fa >tiny/q.fa.gz
+cp tiny/q.fa.fai tiny/q.fa.gz.fai
+freebayes -f tiny/q.fa.gz -@ tiny/q_spiked.vcf.gz -r q:1-10000 -l - < tiny/NA12878.chr22.tiny.bam >/dev/null 2>/dev/null
+is $? 1 "freebayes bails out when given a gzipped or corrupted reference"
+rm tiny/q.fa.gz.*
+
+is $(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) $(freebayes-parallel tiny/q.regions 2 -f tiny/q.fa tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) "running in parallel makes no difference"
+
+#is $(freebayes -f 'tiny/q with spaces.fa' tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) $(freebayes-parallel 'tiny/q with spaces.regions' 2 -f 'tiny/q with spaces.fa' tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) "freebayes handles spaces in file names"
+
+
+is $(freebayes -f splice/1:883884-887618.fa splice/1:883884-887618.bam | grep ^1 | wc -l) 1 "freebayes can handle spliced reads"
+
+is $(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam --gvcf | grep '<\*>' | wc -l) 20 "freebayes produces the expected number of lines of gVCF output"
+
+is $(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam --gvcf --gvcf-chunk 50 | grep '<\*>' | wc -l) 245 "freebayes produces the expected number of lines of gVCF output"
diff --git a/test/tiny/1read.bam b/test/tiny/1read.bam
new file mode 100644
index 0000000..f052290
Binary files /dev/null and b/test/tiny/1read.bam differ
diff --git a/test/tiny/1read.bam.bai b/test/tiny/1read.bam.bai
new file mode 100644
index 0000000..38bec45
Binary files /dev/null and b/test/tiny/1read.bam.bai differ
diff --git a/test/tiny/NA12878.chr22.tiny.bam b/test/tiny/NA12878.chr22.tiny.bam
new file mode 100644
index 0000000..1c7f605
Binary files /dev/null and b/test/tiny/NA12878.chr22.tiny.bam differ
diff --git a/test/tiny/NA12878.chr22.tiny.bam.bai b/test/tiny/NA12878.chr22.tiny.bam.bai
new file mode 100644
index 0000000..066e9bd
Binary files /dev/null and b/test/tiny/NA12878.chr22.tiny.bam.bai differ
diff --git a/test/tiny/NA12878.chr22.tiny.giab.vcf b/test/tiny/NA12878.chr22.tiny.giab.vcf
new file mode 100644
index 0000000..1b27478
--- /dev/null
+++ b/test/tiny/NA12878.chr22.tiny.giab.vcf
@@ -0,0 +1,89 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=Uncertain,Description="Uncertain genotype due to reason in filter INFO field">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Total read depth summed across all datasets, excluding MQ0 reads">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Net Genotype quality across all datasets, defined as difference between most likely and next most likely genotype likelihoods">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Net Genotype across all datasets">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods summed across all unfiltered datasets for genotypes as defined in the VCF specification">
+##INFO=<ID=allalts,Number=1,Type=Integer,Description="All ALT alleles originally considered at this position">
+##INFO=<ID=datasetcalls,Number=1,Type=Integer,Description="Number of datasets with any genotype call at this position">
+##INFO=<ID=DPSum,Number=1,Type=Integer,Description="Total read depth summed across all datasets, excluding MQ0 reads">
+##INFO=<ID=Entropy,Number=1,Type=Float,Description="Shannon entropy of variant flanking regions, 12bp on both sides">
+##INFO=<ID=filter,Number=1,Type=String,Description="Reason for filtering this genotype as uncertain">
+##INFO=<ID=geno,Number=1,Type=Integer,Description="Most probable genotype, corresponding to the minimum entry in the PL field (e.g., 1=0/0,2=0/1,3=1/1,4=0/2,etc)">
+##INFO=<ID=genoMapGood,Number=1,Type=Integer,Description="Number of datasets calling this genotype with VQSR mapping tranche <= 95">
+##INFO=<ID=HapNoVar,Number=1,Type=Integer,Description="Number of datasets for which HaplotypeCaller called a variant within 35bp and did not call a variant at this location">
+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">
+##INFO=<ID=NoCG,Number=0,Type=Flag,Description="Present if no consensus reached, so looked at all datasets except Complete Genomics since it may have a different representation of complex variants">
+##INFO=<ID=NoPLTot,Number=1,Type=Integer,Description="Number of datasets with likelihood ratio > 20 for a genotype different from the called genotype">
+##INFO=<ID=platforms,Number=1,Type=Integer,Description="Number of different platforms that called this genotype">
+##INFO=<ID=platformbias,Number=.,Type=String,Description="Names of platforms that have at more than twice as many incorrect than correct genotypes at this location, indicating platform-specific bias (ill=Illumina,sol=SOLiD,454=454,ion=Ion Torrent,cg=Complete Genomics)">
+##INFO=<ID=platformnames,Number=.,Type=String,Description="Names of platforms that called this genotype (ill=Illumina,sol=SOLiD,454=454,ion=Ion Torrent,cg=Complete Genomics)">
+##INFO=<ID=PL454WG,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~16x 454 whole genome sequencing from 1000 Genomes Project, preceded by filtering info if this dataset was not used due to evidence of bias">
+##INFO=<ID=PLCG,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~73x Complete Genomics whole genome sequencing, preceded by filtering info if this dataset was not used due to evidence of bias">
+##INFO=<ID=PLHSWEx,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~66x 2x100bp Illumina exome sequencing from Broad Institute, preceded by filtering info if this dataset was not used due to evidence of bias">
+##INFO=<ID=PLHSWG,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~68x 2x100bp Illumina whole genome sequencing from Broad Institute, preceded by filtering info if this dataset was not used due to evidence of bias">
+##INFO=<ID=PLILL250,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~50x 2x250bp Illumina PCR-free whole genome sequencing from Broad Institute, preceded by filtering info if this dataset was not used due to evidence of bias">
+##INFO=<ID=PLILLCLIA,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~80x 2x100bp Illumina whole genome sequencing from Illumina CLIA lab, preceded by filtering info if this dataset was not used due to evidence of bias">
+##INFO=<ID=PLIllPCRFree,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~56x 2x100bp Illumina PCR-free whole genome sequencing from Illumina Platinum Genomes Project, preceded by filtering info if this dataset was not used due to evidence of bias">
+##INFO=<ID=PLILLWEx,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~30x 2x54bp Illumina exome sequencing from Broad Institute, preceded by filtering info if this dataset was not used due to evidence of bias">
+##INFO=<ID=PLILLWG,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~39x 2x44bp Illumina whole genome sequencing from Broad Institute, preceded by filtering info if this dataset was not used due to evidence of bias">
+##INFO=<ID=PLIonEx,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~80x mean 237bp Ion Torrent exome sequencing from Life Technologies, preceded by filtering info if this dataset was not used due to evidence of bias">
+##INFO=<ID=PLPlatGen,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~190x 2x100bp Illumina PCR-free whole genome sequencing from Illumina Platinum Genomes Project, preceded by filtering info if this dataset was not used due to evidence of bias">
+##INFO=<ID=PLXIll,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~37x 2x100bp Illumina whole genome sequencing from X Prize, preceded by filtering info if this dataset was not used due to evidence of bias">
+##INFO=<ID=PLXPSolWGLS,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~24x 50bpx35bp SOLiD whole genome sequencing from X Prize, preceded by filtering info if this dataset was not used due to evidence of bias">
+##INFO=<ID=PLminsum,Number=1,Type=Integer,Description="Net Genotype quality across all datasets, defined as difference between most likely and next most likely genotype likelihoods">
+##INFO=<ID=PLminsumOverDP,Number=1,Type=Float,Description="Net Genotype quality across all datasets, defined as difference between most likely and next most likely genotype likelihoods, divided by the depth of coverage">
+##INFO=<ID=RU,Number=1,Type=String,Description="Tandem repeat unit (bases)">
+##INFO=<ID=RPA,Number=.,Type=Integer,Description="Number of times tandem repeat unit is repeated, for each allele (including reference)">
+##INFO=<ID=TrancheABQDmin2,Number=1,Type=Float,Description="2nd lowest VQSR tranche for the called genotype for annotations associated with abnormal allele balance (AB and QD)">
+##INFO=<ID=TrancheAlignmin2,Number=1,Type=Float,Description="2nd lowest VQSR tranche for the called genotype for annotations associated with local alignment errors (distance from the end of the read and clipping)">
+##INFO=<ID=TrancheMapmin2,Number=1,Type=Float,Description="2nd lowest VQSR tranche for the called genotype for annotations associated with mapping errors (mapping quality and depth of coverage)">
+##INFO=<ID=TrancheSSEmin2,Number=1,Type=Float,Description="2nd lowest VQSR tranche for the called genotype for annotations associated with systematic sequencing errors (strand bias and neighboring base quality)">
+##INFO=<ID=varType,Number=1,Type=String,Description="Type of variant">
+##INFO=<ID=YesPLtot,Number=1,Type=Integer,Description="Number of datasets with likelihood ratio > 20 for the called genotype">
+##contig=<ID=1,length=249250621,assembly=b37>
+##contig=<ID=2,length=243199373,assembly=b37>
+##contig=<ID=3,length=198022430,assembly=b37>
+##contig=<ID=4,length=191154276,assembly=b37>
+##contig=<ID=5,length=180915260,assembly=b37>
+##contig=<ID=6,length=171115067,assembly=b37>
+##contig=<ID=7,length=159138663,assembly=b37>
+##contig=<ID=8,length=146364022,assembly=b37>
+##contig=<ID=9,length=141213431,assembly=b37>
+##contig=<ID=10,length=135534747,assembly=b37>
+##contig=<ID=11,length=135006516,assembly=b37>
+##contig=<ID=12,length=133851895,assembly=b37>
+##contig=<ID=13,length=115169878,assembly=b37>
+##contig=<ID=14,length=107349540,assembly=b37>
+##contig=<ID=15,length=102531392,assembly=b37>
+##contig=<ID=16,length=90354753,assembly=b37>
+##contig=<ID=17,length=81195210,assembly=b37>
+##contig=<ID=18,length=78077248,assembly=b37>
+##contig=<ID=19,length=59128983,assembly=b37>
+##contig=<ID=20,length=63025520,assembly=b37>
+##contig=<ID=21,length=48129895,assembly=b37>
+##contig=<ID=22,length=51304566,assembly=b37>
+##contig=<ID=X,length=155270560,assembly=b37>
+##contig=<ID=Y,length=59373566,assembly=b37>
+##contig=<ID=MT,length=16569,assembly=b37>
+##fileDate=20130719
+##phasing=none
+##reference=human_g1k_v37.fasta
+##variants_justified=left
+##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex.">
+##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA12878
+q	186	.	T	C	11488	PASS	DPSum=824;HRun=0;HapNoVar=0;NoPLTot=0;PL454WG=353,0,389;PLCG=698,0,750;PLHSWG=1303,0,1514;PLILL250=694,0,744;PLILLCLIA=1984,0,1727;PLILLWG=652,0,578;PLIllPCRFree=749,0,1617;PLNCIIonWG=67,0,116;PLPlatGen=4041,0,4291;PLXIll=515,0,763;PLXPSolWGLS=432,0,608;PLminsum=11488;PLminsumOverDP=13.94;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=11;allalts=C;datasetcalls=11;geno=2;genoMapGood=11;platformbias=none;platformnames=ill,454,sol,cg,i [...]
+q	1008	.	C	T	6462	PASS	DPSum=569;HRun=0;HapNoVar=0;NoPLTot=0;PL454WG=112,0,152;PLCG=471,0,330;PLHSWG=579,0,778;PLILL250=335,0,178;PLILLCLIA=1013,0,1657;PLILLWG=173,0,294;PLIllPCRFree=781,0,698;PLPlatGen=2598,0,2675;PLXIll=303,0,376;PLXPSolWGLS=97,0,57;PLminsum=6462;PLminsumOverDP=11.36;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=10;allalts=T;datasetcalls=10;geno=2;genoMapGood=10;platformbias=none;platformnames=ill,454,sol,cg;platforms=4;varType=SNP	GT: [...]
+q	1817	.	G	A	3061	PASS	DPSum=361;HRun=0;HapNoVar=0;NoPLTot=0;PL454WG=98,0,24;PLCG=125,0,138;PLHSWG=164,0,80;PLILL250=325,0,792;PLILLCLIA=316,0,212;PLILLWG=65,0,27;PLIllPCRFree=546,0,489;PLPlatGen=1422,0,2430;PLminsum=3061;PLminsumOverDP=8.48;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=8;allalts=A;datasetcalls=8;geno=2;genoMapGood=8;platformbias=none;platformnames=ill,454,cg;platforms=3;varType=SNP	GT:DP:GQ:PL	0/1:361:3061:3061,0,4192
+q	1820	.	C	T	3594	PASS	DPSum=358;HRun=0;HapNoVar=0;NoPLTot=0;PL454WG=97,0,20;PLCG=140,0,160;PLHSWG=165,0,106;PLILL250=332,0,821;PLILLCLIA=317,0,191;PLILLWG=63,0,62;PLIllPCRFree=695,0,582;PLPlatGen=1785,0,2767;PLminsum=3594;PLminsumOverDP=10.04;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=7;allalts=T;datasetcalls=8;geno=2;genoMapGood=7;platformbias=none;platformnames=ill,cg;platforms=2;varType=SNP	GT:DP:GQ:PL	0/1:358:3594:3594,0,4709
+q	1917	.	A	G	5718	PASS	DPSum=430;HRun=0;HapNoVar=0;NoPLTot=0;PLCG=175,0,339;PLHSWG=231,0,186;PLILL250=450,0,787;PLILLCLIA=697,0,510;PLILLWG=76,0,179;PLIllPCRFree=776,0,547;PLNCIIonWG=44,0,22;PLPlatGen=3171,0,2815;PLXIll=98,0,94;PLminsum=5479;PLminsumOverDP=12.74;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=9;allalts=G;datasetcalls=9;geno=2;genoMapGood=9;platformbias=none;platformnames=ill,cg,ion;platforms=3;varType=SNP	GT:DP:GQ:PL	0/1:430:5479:5718,0,5479
+q	4449	.	G	A	11413	PASS	DPSum=725;HRun=0;HapNoVar=0;NoPLTot=0;PL454WG=242,0,405;PLCG=685,0,483;PLHSWG=1176,0,998;PLILL250=967,0,487;PLILLCLIA=956,0,1221;PLILLWG=615,0,542;PLIllPCRFree=1346,0,1143;PLNCIIonWG=1,0,192;PLPlatGen=4726,0,3765;PLXIll=699,0,761;PLminsum=9997;PLminsumOverDP=13.79;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=9;allalts=A;datasetcalls=10;geno=2;genoMapGood=9;platformbias=none;platformnames=ill,454,cg;platforms=3;varType=SNP	GT:DP:G [...]
+q	5009	.	C	T	8517	PASS	DPSum=639;HRun=0;HapNoVar=0;NoPLTot=0;PL454WG=205,0,204;PLCG=744,0,686;PLHSWG=932,0,971;PLILL250=315,0,370;PLILLCLIA=774,0,943;PLILLWG=273,0,298;PLIllPCRFree=878,0,1020;PLNCIIonWG=28,0,82;PLPlatGen=3611,0,4445;PLXIll=567,0,492;PLXPSolWGLS=190,0,302;PLminsum=8517;PLminsumOverDP=13.33;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=11;allalts=T;datasetcalls=11;geno=2;genoMapGood=11;platformbias=none;platformnames=ill,454,sol,cg,ion;pla [...]
+q	6418	.	G	A	9700	PASS	DPSum=777;HRun=1;HapNoVar=0;NoPLTot=0;PL454WG=117,0,141;PLCG=519,0,688;PLHSWG=1193,0,1365;PLILL250=760,0,399;PLILLCLIA=1473,0,1473;PLILLWG=477,0,680;PLIllPCRFree=743,0,1372;PLPlatGen=3923,0,3639;PLXIll=382,0,826;PLXPSolWGLS=113,0,198;PLminsum=9700;PLminsumOverDP=12.48;RPA=16,17;RU=A;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=10;allalts=A;datasetcalls=10;geno=2;genoMapGood=10;platformbias=none;platformnames=ill,454,sol,cg;platfor [...]
+q	8846	.	T	C	9797	PASS	DPSum=727;HRun=1;HapNoVar=0;NoPLTot=0;PLCG=470,0,525;PLHSWG=1228,0,1675;PLILL250=516,0,649;PLILLCLIA=1506,0,1304;PLILLWG=541,0,453;PLIllPCRFree=1022,0,801;PLNCIIonWG=36,0,132;PLPlatGen=3724,0,4719;PLXIll=623,0,669;PLXPSolWGLS=131,0,408;PLminsum=9797;PLminsumOverDP=13.48;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=10;allalts=C;datasetcalls=10;geno=2;genoMapGood=10;platformbias=none;platformnames=ill,sol,cg,ion;platforms=4;varType= [...]
+q	9791	.	A	C	10272	PASS	DPSum=777;HRun=1;HapNoVar=0;NoPLTot=0;PL454WG=239,0,213;PLCG=839,0,825;PLHSWG=1051,0,1286;PLILL250=337,0,490;PLILLCLIA=1591,0,1298;PLILLWG=458,0,844;PLIllPCRFree=716,0,1064;PLNCIIonWG=254,0,57;PLPlatGen=3778,0,4302;PLXIll=513,0,812;PLXPSolWGLS=496,0,558;PLminsum=10272;PLminsumOverDP=13.22;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=11;allalts=C;datasetcalls=11;geno=2;genoMapGood=11;platformbias=none;platformnames=ill,454,sol,cg, [...]
+q	10532	.	C	A	8913	PASS	DPSum=705;HRun=2;HapNoVar=0;NoPLTot=0;PLCG=653,0,743;PLHSWG=1626,0,1610;PLILL250=501,0,624;PLILLCLIA=896,0,1250;PLILLWG=320,0,420;PLIllPCRFree=908,0,1146;PLPlatGen=3625,0,4125;PLXIll=384,0,581;PLminsum=8913;PLminsumOverDP=12.64;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=8;allalts=A;datasetcalls=8;geno=2;genoMapGood=8;platformbias=none;platformnames=ill,cg;platforms=2;varType=SNP	GT:DP:GQ:PL	0/1:705:8913:8913,0,10499
+q	11261	.	T	C	9848	PASS	DPSum=709;HRun=1;HapNoVar=0;NoPLTot=0;PL454WG=106,0,86;PLCG=471,0,482;PLHSWG=1029,0,914;PLILL250=632,0,656;PLILLCLIA=1189,0,1776;PLILLWG=315,0,760;PLIllPCRFree=987,0,701;PLNCIIonWG=91,0,94;PLPlatGen=4150,0,4008;PLXIll=829,0,904;PLXPSolWGLS=49,0,135;PLminsum=9848;PLminsumOverDP=13.89;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=11;allalts=C;datasetcalls=11;geno=2;genoMapGood=11;platformbias=none;platformnames=ill,454,sol,cg,ion;pl [...]
+q	11536	.	T	C	11666	PASS	DPSum=841;HRun=1;HapNoVar=0;NoPLTot=0;PL454WG=172,0,410;PLCG=652,0,960;PLHSWEx=31,0,32;PLHSWG=1156,0,1240;PLILL250=818,0,685;PLILLCLIA=1490,0,1870;PLILLWG=807,0,626;PLIllPCRFree=1042,0,1136;PLNCIIonWG=303,0,116;PLPlatGen=4105,0,3971;PLXIll=795,0,850;PLXPSolWGLS=295,0,414;PLminsum=11666;PLminsumOverDP=13.87;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=12;allalts=C;datasetcalls=12;geno=2;genoMapGood=12;platformbias=none;platformna [...]
+q	12125	.	T	C	11323	PASS	DPSum=806;HRun=1;HapNoVar=0;NoPLTot=0;PL454WG=254,0,312;PLCG=512,0,595;PLHSWG=1073,0,1401;PLILL250=603,0,978;PLILLCLIA=1833,0,1747;PLILLWG=597,0,727;PLIllPCRFree=1219,0,658;PLNCIIonWG=256,0,148;PLPlatGen=4169,0,3563;PLXIll=641,0,783;PLXPSolWGLS=166,0,106;PLminsum=11018;PLminsumOverDP=13.67;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=11;allalts=C;datasetcalls=11;geno=2;genoMapGood=11;platformbias=none;platformnames=ill,454,sol,c [...]
diff --git a/test/tiny/q with spaces.fa b/test/tiny/q with spaces.fa
new file mode 100644
index 0000000..085a809
--- /dev/null
+++ b/test/tiny/q with spaces.fa	
@@ -0,0 +1,207 @@
+>q
+GATTCAACAAATATTTACTGAGTACCTACTATGTGCAATGTGTATACTAGGTGCTGGGGA
+TACAGCAGAGAACAAAGTCCCCACTCTCAATGAAAGGGGCCAGTCTTTTTATTCATCTTT
+AGTTAATTTTAAGCACACAACAGCTATCTCAAACTTTCTTCACACTTTCCAAGCCCCTGA
+TCCCTTATGTACTCTTGGCAGATGCCCCACCTTATCTCTTACCAGAACCTAAGGCCAACT
+AGCATAAAATCCCTTGAGCACACTTAGATTGGCATGCTTTTACTGTGACATCTGTTCCTA
+CAGCACCCTTACTTTCTTCTCTTGAGCTGAGGGTGAAGATCTCTTTCTTTCCCATCTGTA
+ATCTCACAATATGCACAACTACACCAGAGACAATGGAGGCACAATACAGGCCAGAGGTCT
+GTTCCTCATTCCTTCTGCGGGGCCTTCCTCCAGTGAATATCTCTTCTAACATTGTCAATC
+ATATCCAACAGAAAAAGAGATCTCTCCATTGGTACAAAAACCTACAGCTCCTAAGAAGAT
+TTTGACTCTCACTGCTCAACTTCTCAAATAAAAATTCCTCCACTTTAGTTCTAATTATCA
+CTCAAATGCTGTTAGACCTGTGGGCAGGTCACTATATCCCTGCCTAGGTCTGTTTCCCCA
+TCTGCGTAACAGAGAAATAATTGTGCTTTATCTGTCCTACAGAAGTGTGAAGAGGTTTGA
+TGTGACAGCAGTGAAAGCAATTTAAAAAGTACAAAGTGCTGTATAAAAGAAGACGTTTTC
+ATAATTATTCTGTTTTCACATGCTATCTTTATTCCAATGATCTCAAAGTACCTACCCAAG
+CACAGTTCTAACTCTAGTATATGTAAGCAAACGTGCAAGTCTGTCACCTTCCACACAAAG
+AGTCCCCCCACCCCCGCCAAACACACCACTCCCTCTCCCTCTCCTTCTCCCTCTCCCTCT
+CTCCTTCTCCCGCTTTCCACGGTCTCCCTCTGTTGCCGAAGCTGGACCGTACCGCCGTGA
+TCTCCGCTCGCTGCAACCTTCCTGCCTGTTTCTCCTGCCTCAGCCTGCCGAGTGCCTGGG
+ATTGCAGGCGCGCGCCGCCACGCCTGACTGGTTTTTGTATTTTTTGGAGACGGGGTTTCG
+CCATGTTGGATCGGCTGGTCTCCAGCTCCTAACCGCGAGTGATCTGCCTGCCTCGGCTTC
+CCGAGGTGCCGGGATTGCAGACGGAGTCTCGCTCACTCAGTGCTCAATGTTGCCCAGGCT
+GGAGTGCAGTGGCGTGATCTTGGCTCGCTACAACCTCCACCTCCCAGCCACCTGCCTTGG
+CCTCCTAAAGTGCCGAGATTGCAGCCTCTGCCTGGCTGCCACCCCGTCTGGGAAGTGAGG
+AGCGTCTCTGCCTGGCCGCCCATTGTCTGGGATGTGAAGAGCCCCTCTGCCTGGCCGCCC
+AGTCTGGGAAGTGAGGAGCGTCTCTGCCCAGCCGCCCATCGTCTGGGATGTGGGGAGCGC
+CTCTGCCCCGCCGCCCCGTCTGGGATGTGAGGAGCGCCTCTACCCGGCCGCGACCCCGTC
+TGGGAACTGAGGAGCGTCTCTGCCCGGCCGCCACCCCATCTGGGAGGTGAGGAGCGTCTC
+TGCCCAGCCGCCCCGTCTGAGAAGTAAGGAGCCCCTCCGCCCGGCAGCCGCCCCGTCTGG
+GAAGTGAGGAGCGTCTCCGCCCGGCAGCCAGCCCGTCTGGGAGGTGGGGGGCAGCCCCCG
+CCCGGCCAGCCGCCCCGTCCAGGAGGTGGGGGGCACCCCCCGCCCGGCAGCTGCCCCGTC
+GGTGGGGGGCGCCTCCGCCCGGCCGCCCCGTCTGGGAAGTGAGGAGCCCCTCTGCCGGGC
+CGCCACCCCGTCTGGGAGGTGTGCCCGGCAGCTCATTGAGAGCGGGCCATGATGACAATG
+GCGGTTTTGTCGAATAGAGGGGGGAAATGTGGGGAGAGGAGAGAGATCAGATTGTTACTG
+TGTCTGTGTGGAGGGAGGTGGACATGGGAGACTCCATTTTGTTCTGTACTGGGAAAAATT
+CTTCCGCCTTGGGATGCTGTTGATCTATGACCTTACCCCCAACCCCGTGCTCTCTGAAAC
+ATGTGCTGTGTCCACTCAGGGTTAAATGGATTAAGGGCAGTACAAGATGTGCTTTGTTAA
+ACAGATGCTTGAAGGCAGCATGCTAGTTAAGAGTCATCACCACTCCCTAATCTCAAGTAC
+CCAGGGACACGAACACTGAGGAAGGCCGCAGGGTCCTCCTCTGCCTAGGAAAACCAGAGA
+CCTTTGTTCACATGTTTATCTACTGACCTTCCCTCCACTATTGTCCTATGACCCTGCCAA
+ATCCCCCTCTCCGAGAAACACCCAAGAATGATCAATAAATACTAAAAAAAAAAAAAAAAA
+AAAAGGAATATTTTCACGTGCAAATGAAAAAGCCCTAAACAAATCTTAAAATATATAAAG
+TGGTTCAAAAAATGTTTATTGTTATTATTTTAATAGAGGCCTATACATGGAGCATTTATT
+TGGGAAAGTTTTAAATGAATGTTTTCCCCTGTTTTGGTTGTTATCAATCTACACTTTACC
+AATTTTCAATAATGATCATGCCTAATTAATTCAAATTAAAAGGTGTGTAATACAAAAAAA
+AAAAAAAAACTGCAACCAACCCTCGAATCAGCTCAATCTCTCACTGAATTATGGTTATCT
+GTCACCACTGTATCTGCCTATCAGTGATTAGGGAGAGCTCTCTCTGGAGAAAAATGTCAC
+CTAGAGCATCAACAATTTTCATACACAGTATCCAGCATTCACTTGAACGTTACCAAGCAT
+ACAAGGAAACAGAAACGAGAAAAAAGAGACAACATATCCACATATCCACAGTGACCCAGT
+TACTAGTTACTGAAGATGATCTTTAAAATAGCTGTAATTCGGCCGGGCGCAGTGGCTCAC
+GCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCTGAGGTCAGGAGTTCGAGACCAGCCTA
+GCCAACATGGTGAAATCCCGTCTCTAAAAATACAAAAATTAGCTGGGCGTGGTGGGGCGT
+GCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTGAACCTAGGAAGCA
+GAGGTTGCAGTGAGCCTAGGGTGTGGTGGGGCGTGCCTGTAATCCCAGCTACTTGGGAGG
+CTGAGGCAGGAGAATCACTTGAACCTAGGAAGCAGAGATTGCACCATTGCACTCCAGCCT
+GGGCGACAAATTGCGACTGTATCAAAAAAAAAAAAAAAAAAAGAGGTAACTCTTGGAAAG
+GAGAAAATATCTGCAGCACATGAATCTAAAAAAAGACTCATCAAAATCTACAAAGAACTC
+CTATAATTTAATGAAACAAAAATCTCCACAGAAAAATTTGCAAAATAACCCTTAAATACT
+TCACGAGATAGCCGAATGTCATAAACATACAAAAAAGAGTTAACCACCAAAAAAGAGCTA
+AAATATCAGAGAAATGCAAACAAAACCCACAATGCATACCACTTCATACCCACTAGAGAG
+GCTAAAAATGATACCAAGTGTTGTTGAGAATATGCACTGGTCTGAACTCTAATACACTGC
+TAATGGAAATATTTTACCACTTTGGAAAACAGTTTGGCACATCTACTAAAATGGAATCTA
+TGCACAGACTGACACAGTCATTCTGCTACTAAATATATACCCAGCAGGACAATGTGGCTC
+ACGTCTATAATCAGCCCAGGCTGGTCTTGAACTCCTGAGCTCAAAGTGATCCTCTCACCT
+TGGCTTCCTAAAGTGTTGGGATTACAGGCATGAGCCACCACATCTGGCCACTTTTCTTTA
+TGTATATTATACATCAATAAAAGTAAAGAATAAAAAGGGAGAAGAGTCTGAAAAGAACAA
+TTATCACAAAATTAAGGAACGTACTCAACTCTTAAAGATTTGAGAAGGCGTTTCTAAGGT
+ACTAGCAATGTTCCATTTTGCAATCTGCACAGTGATGACAAGTATTTAACTTATTCTGTA
+AACAGTACCTTTTTTTTTTTTTTTTTTGAGACAGAGTCTTGCTCTGTCGCCCAGGATAGA
+GCACAGTGGCGCGATCTCAGCTCACGGCAGCCTCTGCCTCCTGGGGTGACGTGATTCTCA
+TGCCTCAGCCTCTGGAGTAGCTAGGATTACAGGCAGGCGCCACCACGCCCAGCTAATTTT
+TGTATTTTTAGTAGAGACAAGATTTTGCCATGTTGGCCAGGCTGGTCTTGAACTCCTGAC
+CTCAAGTGATCTGCCCGCCTTGGCCTTCCAAAGTGCTAGGATTACAGGCATGAGCCACTG
+TGCCCAGCCACAAACAGTACTTATATCTTACCTATGTTTAAATGGATATTCAAAATAAGA
+TTAAAAGAGGTAAGAAAATACAATACTCTAGTAAATGTAAAAATAAGTGAGAAGAATAAA
+TGCCAAGAGAAATATAATTACCAAGAAGAAATTCAAAGTAGGAACAGTCGTATAACCATT
+TTTAAAATAATTTTGAATCAGCTTAACATGTTTTCAACAAAAAACCAAACACATACCAAA
+ACCAAAAAACCACCCAGGTCCAGACTTGTACAGGCAAGTTCTACCAAACTTTGAAGGAAT
+CATTCTAATTTTACAGAAATAAAAACAGAAGAATAGGAAAAGAAACACTTGCCAATTCAT
+TTTGTAAGGGTAGTCTAATTTTCATACCAAACCACATGAGAAAAACCCTATTATTTATTA
+ACAAGCCAAATCCAGCAATATATAATAATAATACATCATGACCAAGTTGGGTTTAATCCC
+AGGAATGCAAAATTGATTCAATATTAGAAAATCTATTAGGGTAACATGAAACTTAAAGAA
+AAAAAACAAGAAAATTTCTATACATGCAATTAAAAAGCATCTGTAGAAATTCAATCATTC
+AGGATTCTTTTTTTCTTTAAGTTTAAAAAAACTTGACAAAGAAAGCTTTATGAAGCCAGG
+TGCTGTGGCATGTGCCTGTAATCCCAGCCATTCAGGAGGGTAAAGTGGGATGATCACTTG
+AGCCCAGGAGTTTGAGACCAGTCTGAGCAACATAGCAAGACCCTGTTTCCGGGGAGGGGG
+TGGGTGGGAAGTAAGCCTTATGAAAAACCAGTAGCCCCTATCACTCAATAAAAGCTTTCC
+TTTTAACATCAGAACAAGTCAGGGATGCCCTCTGTATCACCACTGCTCTTCAACTTTGTA
+ATGGAGAGTCCGGCCAGCACAGTTAAGACAAGATAAAATATATCTAAGTCATGAAAAGAA
+ACAAAATATCATGATTTTCAGATGATATGTTTGTCCTTAAAAAATTTAAAGAGGGATTAC
+TGGAATAAGAGTTTGTTGGACAATCTTTTTATTCCAGTAATTCATCAGTTTACAAAAGTC
+AAATGCATTTCTATAAACCATCTACAGATATCTCTTAGAATATAGCCGGGCATGGTGGTG
+GGCGCCTGTAATCCCAGCTACTAGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCAGGAG
+GCGGAGCTTGCAATGAGCTGAGATCGCCACTGCACTCCAGCCTGGGCCACAGAGCGAGAC
+TCTGTCTCAAAAAAAAAAAAAAAAAAAAAAGATATCACTTAGAATAGCAACAACAAACAT
+AAAGTATCAAGGAATCAATTAAACACATGATGTGCAATAATAATAACACAAAACTTTACT
+GGGAAAACAACTTAGGAAAACAATGTGGAATACCCGGTGAATGTACATATGCCCTACCAT
+CCAGCAATTCCACTACTGGGGAATCTGTCTAAAGATCTGTGAAACTCTTGTGTATCAGGA
+GAAATGTGCAAGGTTACTTGCAGGAACACTGTAATAACAACAAACCTAATGTCCATTAAT
+ACTAGAATGGATAAATTGTGGTATATTTATACAATGGTATATAAAGCAGTGAAAACAAGT
+AACTATACTATACTCAAAAACATAATGATGAATGAAAAAAGCTAGTTGAAGAATACACAC
+AGTATGATTTCACTTATATGAAATTCAAAACCAGGAAATACTAAGTACTGTATTGTTTAA
+TAGAAAGATAATAGTCCAGCCCGGGTGCGGCGGCTCAAGCCTGTAATCCCAGCACTTTGG
+GAGGCTGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGACCAACATGG
+AGAAACCCCATCTCTACTAAAAATACAAAACTACCTGGGCGTAGTGGCACATGCCTGTAA
+TTCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCGCTTGAATCTGGGAGGTGGAGGTTGC
+AGTGAGTCAAGATTGCACCATTGCACTCCAGCCTGGGCAACAAGAGCAAAACTTCATCTC
+AAAAAAACAAAAAAAAAAACAACAGAAAAACAAGAAAAAAACAAGAATAAACACATCGAG
+CCGTGATCATGCCACTGCACTCAAGCCTGAGTGAGAGAGTGACACCCTGTCTCAAAGAAA
+ACAAAAGACAAAAATACATAACAGGCTGGGCTCAGTGGCTCACACCTATAATCCCAGGAT
+TTTGGGAGGTAGAGGTGGGTGGATCACTTGAGGTCAGGAGTTAGAGACCACCCTGGCCAA
+GGTGATGAAACCCTGTCTCTACTAAAAATACAAAAATTAGCCAGATGTGGTAGCGCACAC
+CTGTAATCCCAGCTACTTGGGAGGCTGAGGCTGGAGAACTGCTTGAACCCGGGAGGTGGA
+GGTTGGATTCAAGTGACAGCGCCACTGCAGTCCAGCCTGGGCGACAGTGAGACTTTGTAT
+CAAAACAACAAAACACATAACAGTCATTAATTACCACACATAAAGGTGCTTAATGGATAA
+TAAGTGCTCAAGGAAATGGCAGTCATGGTGGTGGTTGCAGAGAACAACTGGAGGGGATTA
+AGAGTGGTGGTAGAAACCAAGCAGAAACCAGTCTAAGAAAACCAAAAAAATACCAGGCAA
+ATAGTTTCAAAGGAAAATTATACCATTTTAAGATAAGGTAGACTGGTGGCTGGCGCCTGT
+AATCCCAGCACTTTGGGAGGCCGAGGTGGGTAGATCACCTGAGGTCAGGAGTTTGAGACC
+AGTCTGGCCAACATAGTGAAACCCCATCTCTACTAAAAATATAAAAATTAGTTACACGTG
+GTGGTGTGCACCTGTAGTCCCAACTTCTTGGGAGGCTGAGGCAGGAGAATCACTTGAACC
+TGGGAGGAAGAGGTTGCAGTCAGCCGAGATCATGCCACTGCATTCCAGCCTGGGTGACAG
+AGCAAGACTCTGTGTCAAAAAAAAAAAAAAAAAGACAAAGTAGTCTTAATGCTATTTCAG
+AACACAGAAAAGGAAAGAAAACTTCCACACTCATTTTATGAAGCAAGTGTAATGGTAACC
+CCAACCTGACAAAGACTGCACAAAAGATACTACAGTTGAATACTACTTACGAATATCAAC
+ACAAAATCTCTGAATATTAGCAAACAGAATCCAATGGCACATTATAAAAAGGAATACAAA
+TAAACAAGCACAGTTTATTCCAGGTATGAGAGATTCCATATAAGGAAATCTATTGACATG
+ATCTTCACATTAACAATGTTAAATGTGATGAAATCATATCATCTTCACGGAGGCAGTAAA
+GACATCTGACAGCTGGGTGCGGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCAA
+GGCGGGTGGATTGCCTGAGCTCAGGAGTTCACGACCAGCATGGGCAACAGGGTGAAACCC
+TGTCTCTACTAAAATACAAAAAATTACCTGGGGGTGGCAATACGCGCTTGTAGTCCCAGC
+TACTTGGGAGGCTGAGGCAGGAGAATTGCTTGAAACCAGGAGGTGGAGGTTGCAGTGAGC
+TGAGATGGCACCACTGCACTCCAGCCTGAGAGACAGAGCAAGACTCCATCTCAAAAAACA
+AACAAACAAAAAGAAACAAAAAACTAAAATAGGAAATGATGGATACTGCCTTAAGTAAAC
+AAAATACACCTCGGTCCAAAGCCAGCATCTGACTTCATGTGAAAACATTAGAGACTTCCC
+TGCTAACATCACACACCACATAAGGCTAAAATATTTTCCAATTTACATTTTTAGAGGTAA
+GCATTAAATTCTATGATGCCAGACAAACATTTACCAGATTTTTTTTTTTGAGATAGAGTT
+TCACTCTTGTTGCTCAGGCTGGAGTGTAATGGCGCACAATCTCGGCTCGCCACAAGCTCC
+GCCTCCCAGGTTCAAGCGACTCTCCTGCCTCGGCCTCCCGAGTAGCTGGGATTACAGGCA
+TGTGCCACCACACCCAGCTAATTTTGTATTTTTTGTAGAGACGGGGTTTCTCCATGTTGG
+TCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCTGCCTCGGCCTCCCAAAGTGC
+TGGGATTACAGGTGTGAGCCACCGCGTCTGGCCAAAATTTACCAGTTCTTGAATGCTGAA
+TCCTGCTGGGATAACTGGAAACACTCCTAATGTTACTATAAACTCTCACACTGCCTTGTT
+GGCAATAATTAAATAGCTCTTTCTAGCAGTTGTGTTCAAGTGGACAGATAATCACAAATG
+TCTTTTCTTTCTAGCTCCCTTTGGACGAAAGAAAGAAAGAATTTACAACACGTCTAAAGA
+TTGTCACTTTTATCATCCCTTTGAATGATAAGTGATCAGGGCTGTCCCAAAAGAATGCCT
+GAAAACATTACAAAATGTTATCATAAGCAGAATGGCTCACTAAGGGGATGAACATTCTGG
+CAAGTCTTAATCTAGCATCACTACCAACTGTTTTTTCACTGGTCTAAAACATAACATCTA
+ATATTTTTATTTGCCTATATAGCCCTGCAGTAATAATTAACTTGTGAAAGGTCTACACTG
+ATTTTTTTCTTTATAAAGTATTTGCTTCAACCATGGAACAGAAACAAAGTTGATTTAATG
+GGAAAAATGTAAATTTATTTGAGGATAGGTTCAGCACTGCTAAATTGAAAAGGGAAAGAA
+AATTTCAAACAAACCCCAAAAAGAAACATTTTCACATCTGTAGTGAAAAAGCTAAATAGC
+AGTACACTAAACTTGGTTACTGACAAAATAAATCCTTGTGGCCAAGGTGAGGCAAAGATA
+TTACTCTCTACTTTGCTAGCCTACCTTGTGTTCAGTGAATTCAAACATTAATTGGGGCAA
+TGTATTGTTTTTGCTTCATTTTCTTTAGCTGTGCAGTAAAGTCTTTGCTTTCATAGAAAG
+ATTGTGCCAAGTTCTTTCTTTTTCAGAATTGCTTTTAAATCAGTTCTATTTAAGTATTTC
+TCTCTCTCTCAAACTTGACATTTTTCATTACACCTTTTTTTTCCAATGCCAAAATAGCAG
+CATTAACATTCATTCCAGTCACTAGGTAAAAACTCTCTTCAGTCAAACACACATGAATAA
+TCTAATTCGTGAGGTAAACGACTACTGTCATACAGTATTACGAAGTTCCAACCTTACTTG
+CAGAGCAAAAAATAAAAAATAAAAATAGTCATTTTCCTCAAGGGTTTAATTGTGGGCTTT
+TAATGTTGAAGTAAATCAATTTACTAGTTTTTAACCTCAAGTTTTTCCCTATTTCATTAT
+ATCTCAATAATGATTATCCAGAGATTTCTGAGGTAAATTACATTAAAGACAGCGCTAGTA
+ACTGGGAAACAATCCACACATGAATCTTAATATATTGCATATATGCTAATTTCATGTAAT
+GTTCCTGAGGGCGGTGACAATATTAGAGCTGTACGAGACCCTAGAGACCATCCCAATCCC
+TTAACTCCACAGATGAGAAACACAAAAACTTGCAGAAGCAGAGTGACGAGCACATGCCCA
+TGTGACATGTCATCAGAAGCAGGGACCAAGACTCAGGTCTCTTGCCTTCCAGGCCAGTCC
+TCTTTTGGTAGGCTAAAAGCTGCCTTTCACAGCTTCCCACAAATTTCAACACATCCTCAC
+TCAAGTACGTTTCTGAAAGGGACTCACCGACAACTCTTTAGAGAAAAGGAACTATATCTC
+AAACCATAGGAAAAGTTCTATATCTAGCTTATCAAACTTTTAGTCTCTGAAGCATGTGAG
+AGTAAGTTTGCCAGTATTTCTTTCTCATTATCCCCCAATACTTTAGTGACTATTTCCAAC
+AAACAAAACTAGACTATTCTCCCATACAAACAAAATGCTACCATCAAAATCGTCAACGCT
+CACCTGTCAGATTATGACCAAAGGCCTCTAAGTCAGAATCCCGCCCAGGCAAAATGACAT
+GGCAGCCAACTGTGGAGCCTCGGTTGGCCTCGGATAGCTGGTCCCCCCTTCATTCACACC
+ACATATCTGAGGACCAGATATGGAAAGCCTCTCATGCTGGAAAACTTGGCACAGTAGAAA
+GGCGGCCACACTTTTGCCCGTCACACAATGCACATTCACGGGGGACCTGTGCTAAACCAT
+TCACAGACAACCTGTTTCTGGGTCAGGGTTTCGTATACAGCAGAGCAGTCAGCTATCGAC
+ACAAGGGTTTGTAACAAAAAACAAAAAACAACAAAAAAAAGGAAGGACAAACCAAAACAA
+AAAAGAAAATACCAAAAAAAAGAAGAGTTAAAAAAAAAAGAAGATAAAAATAAAATCATT
+AACACAGATACACGACTACAATCTAATTTTCAGACATTCTAGTTTTGCCAATCATGCCGA
+TTCAGAATCATGAACCACATGGAGTTGCCATGTTTCTGCACACTTCTTTAGTGTGGAACA
+ATTCCTCAGTGTTTTCTTGGTTCCATGATCTTGACTTAATTTTGAGGTTTACAGGCCAGT
+TATTTTGCAGAATATCCCTAAACTCGGGTTACCTTGATGTTGCCTCCTGAAGATTTGGAT
+TATGCATGAAACACTTGACTGTCCCTATTCTAACTATATAAAGATGCTCCTTGACTTACT
+ATGGACTTACATCCCAATAAACCCTTCATAAATGGAAAACCTCATAAGGTGAAATGCTTG
+TTTTTTTTGGGAAAGTTTCTTGCTCTGTTGCCAGGCGGGAATGCGGTGACACGACGACGG
+CTCACTGTAGCCTTGACCACTCAGGTTCAAGCGATCCTCAGCCCCAGGCTCCCAAGTAGC
+TGGGACTATAGGCACGTACCACCACACCTGGCTAATTTTTTTTTTTTTTAATTTTTAGTA
+CCTTTAGTACAGAAAAGGCCTCACTATGTTGCCTAGATTGGTCTCAAACTCCTAAGCTCA
+AGGAATCCTCATGCCTCGGTCTCCCAAAGTCCTGGGATTATAGGCATGAGTCCTGCACCC
+AGCCTAGCTGAAAATACATTTAATACACCTAACTTACCAAACATCACAGCTTAGCTTAGC
+CTACCTTAAACATGTTCAGAGCACTTACATTAGCCTACAGTTGGGCAAAATCATCTAACG
+CAAAGCCCATTTTATAATAAAAGTGTTGAGTAACTCATATGATGTATGAAATACTATACT
+GAAAATGAAAAACAGAACAGTTGCGTGGGTACTCGGAAGTAAGGTTTCTACTGCATGTGT
+ATCACTTCCATACCATTGTAAAGTCATCTGACCACCAAGCCAAATATTTGTACTTCTGCA
+CATGCTGCTTTACATAGAGACCGAGACAAAGGGGGATTCACATCTTGTCAACCCTGTTTA
+CTTCAAAGGAATAACAGAGCATTAACTAGGTACCAAGTGCTCAGGACTTCAAGTATACTG
+TTTTATTTAATACTCATGACAATCCTTTGACCCCTTACTTGTGACTGGCAAGTTACCCAA
+CTTTTGTTTCATTTTCCTCATCTGTAAAAACCGGAGAGTGATACACACTCCCTGGAGAAT
+GACAGGGAAAACAGTACCAAGAACTTTAACCTAACAAGTCAGAAATCAATGAGACCCCTT
+TTGACAATTCCCACCTTGCCCTTTAGACATTCCGAGATATGCAGCTTTATTTAATAAATG
+ACCCCACCTTGGGCCTCAGCTGACTAGACCAGAGGCAGACACTCTCCCAGGCTGGAAAGG
+CCAGGTGGAGTGAGCTGTTCTGTGCAGAATGATCTCCCTAACAGTGTTTTTACACATTCT
+GTTCCACTTGGAATTGGATTCAGAGTGTTCTCAGCCTCTGGTCATCAAAAAACCTGAAGA
+CACATACCTTCAGGGAGCTAGGGTCAGCTCTCCTCTGCTGAGTGCATGGGGAAGTGACAA
+AAGAGAAAAGAGGGGGGAAAATCCAGCTGCTGAGAGATGTAGGGTACAAGAAGCTTTTGT
+AACTGTAAAGAGGGAGGGAGGTAGCTACGTTGGTCTATTTTCCAGTTCCTGGTTCCAACT
+ATACCTAAAGCTTTAACTGCACTTTCTATCCTTGCAGTAATATGACAAAGGCCCCC
diff --git a/test/tiny/q with spaces.fa.fai b/test/tiny/q with spaces.fa.fai
new file mode 100644
index 0000000..dd7673b
--- /dev/null
+++ b/test/tiny/q with spaces.fa.fai	
@@ -0,0 +1 @@
+q	12356	3	60	61
diff --git a/test/tiny/q with spaces.regions b/test/tiny/q with spaces.regions
new file mode 100644
index 0000000..d129bcc
--- /dev/null
+++ b/test/tiny/q with spaces.regions	
@@ -0,0 +1,3 @@
+q:0-5000
+q:5000-10000
+q:10000-12356
diff --git a/test/tiny/q.fa b/test/tiny/q.fa
new file mode 100644
index 0000000..085a809
--- /dev/null
+++ b/test/tiny/q.fa
@@ -0,0 +1,207 @@
+>q
+GATTCAACAAATATTTACTGAGTACCTACTATGTGCAATGTGTATACTAGGTGCTGGGGA
+TACAGCAGAGAACAAAGTCCCCACTCTCAATGAAAGGGGCCAGTCTTTTTATTCATCTTT
+AGTTAATTTTAAGCACACAACAGCTATCTCAAACTTTCTTCACACTTTCCAAGCCCCTGA
+TCCCTTATGTACTCTTGGCAGATGCCCCACCTTATCTCTTACCAGAACCTAAGGCCAACT
+AGCATAAAATCCCTTGAGCACACTTAGATTGGCATGCTTTTACTGTGACATCTGTTCCTA
+CAGCACCCTTACTTTCTTCTCTTGAGCTGAGGGTGAAGATCTCTTTCTTTCCCATCTGTA
+ATCTCACAATATGCACAACTACACCAGAGACAATGGAGGCACAATACAGGCCAGAGGTCT
+GTTCCTCATTCCTTCTGCGGGGCCTTCCTCCAGTGAATATCTCTTCTAACATTGTCAATC
+ATATCCAACAGAAAAAGAGATCTCTCCATTGGTACAAAAACCTACAGCTCCTAAGAAGAT
+TTTGACTCTCACTGCTCAACTTCTCAAATAAAAATTCCTCCACTTTAGTTCTAATTATCA
+CTCAAATGCTGTTAGACCTGTGGGCAGGTCACTATATCCCTGCCTAGGTCTGTTTCCCCA
+TCTGCGTAACAGAGAAATAATTGTGCTTTATCTGTCCTACAGAAGTGTGAAGAGGTTTGA
+TGTGACAGCAGTGAAAGCAATTTAAAAAGTACAAAGTGCTGTATAAAAGAAGACGTTTTC
+ATAATTATTCTGTTTTCACATGCTATCTTTATTCCAATGATCTCAAAGTACCTACCCAAG
+CACAGTTCTAACTCTAGTATATGTAAGCAAACGTGCAAGTCTGTCACCTTCCACACAAAG
+AGTCCCCCCACCCCCGCCAAACACACCACTCCCTCTCCCTCTCCTTCTCCCTCTCCCTCT
+CTCCTTCTCCCGCTTTCCACGGTCTCCCTCTGTTGCCGAAGCTGGACCGTACCGCCGTGA
+TCTCCGCTCGCTGCAACCTTCCTGCCTGTTTCTCCTGCCTCAGCCTGCCGAGTGCCTGGG
+ATTGCAGGCGCGCGCCGCCACGCCTGACTGGTTTTTGTATTTTTTGGAGACGGGGTTTCG
+CCATGTTGGATCGGCTGGTCTCCAGCTCCTAACCGCGAGTGATCTGCCTGCCTCGGCTTC
+CCGAGGTGCCGGGATTGCAGACGGAGTCTCGCTCACTCAGTGCTCAATGTTGCCCAGGCT
+GGAGTGCAGTGGCGTGATCTTGGCTCGCTACAACCTCCACCTCCCAGCCACCTGCCTTGG
+CCTCCTAAAGTGCCGAGATTGCAGCCTCTGCCTGGCTGCCACCCCGTCTGGGAAGTGAGG
+AGCGTCTCTGCCTGGCCGCCCATTGTCTGGGATGTGAAGAGCCCCTCTGCCTGGCCGCCC
+AGTCTGGGAAGTGAGGAGCGTCTCTGCCCAGCCGCCCATCGTCTGGGATGTGGGGAGCGC
+CTCTGCCCCGCCGCCCCGTCTGGGATGTGAGGAGCGCCTCTACCCGGCCGCGACCCCGTC
+TGGGAACTGAGGAGCGTCTCTGCCCGGCCGCCACCCCATCTGGGAGGTGAGGAGCGTCTC
+TGCCCAGCCGCCCCGTCTGAGAAGTAAGGAGCCCCTCCGCCCGGCAGCCGCCCCGTCTGG
+GAAGTGAGGAGCGTCTCCGCCCGGCAGCCAGCCCGTCTGGGAGGTGGGGGGCAGCCCCCG
+CCCGGCCAGCCGCCCCGTCCAGGAGGTGGGGGGCACCCCCCGCCCGGCAGCTGCCCCGTC
+GGTGGGGGGCGCCTCCGCCCGGCCGCCCCGTCTGGGAAGTGAGGAGCCCCTCTGCCGGGC
+CGCCACCCCGTCTGGGAGGTGTGCCCGGCAGCTCATTGAGAGCGGGCCATGATGACAATG
+GCGGTTTTGTCGAATAGAGGGGGGAAATGTGGGGAGAGGAGAGAGATCAGATTGTTACTG
+TGTCTGTGTGGAGGGAGGTGGACATGGGAGACTCCATTTTGTTCTGTACTGGGAAAAATT
+CTTCCGCCTTGGGATGCTGTTGATCTATGACCTTACCCCCAACCCCGTGCTCTCTGAAAC
+ATGTGCTGTGTCCACTCAGGGTTAAATGGATTAAGGGCAGTACAAGATGTGCTTTGTTAA
+ACAGATGCTTGAAGGCAGCATGCTAGTTAAGAGTCATCACCACTCCCTAATCTCAAGTAC
+CCAGGGACACGAACACTGAGGAAGGCCGCAGGGTCCTCCTCTGCCTAGGAAAACCAGAGA
+CCTTTGTTCACATGTTTATCTACTGACCTTCCCTCCACTATTGTCCTATGACCCTGCCAA
+ATCCCCCTCTCCGAGAAACACCCAAGAATGATCAATAAATACTAAAAAAAAAAAAAAAAA
+AAAAGGAATATTTTCACGTGCAAATGAAAAAGCCCTAAACAAATCTTAAAATATATAAAG
+TGGTTCAAAAAATGTTTATTGTTATTATTTTAATAGAGGCCTATACATGGAGCATTTATT
+TGGGAAAGTTTTAAATGAATGTTTTCCCCTGTTTTGGTTGTTATCAATCTACACTTTACC
+AATTTTCAATAATGATCATGCCTAATTAATTCAAATTAAAAGGTGTGTAATACAAAAAAA
+AAAAAAAAACTGCAACCAACCCTCGAATCAGCTCAATCTCTCACTGAATTATGGTTATCT
+GTCACCACTGTATCTGCCTATCAGTGATTAGGGAGAGCTCTCTCTGGAGAAAAATGTCAC
+CTAGAGCATCAACAATTTTCATACACAGTATCCAGCATTCACTTGAACGTTACCAAGCAT
+ACAAGGAAACAGAAACGAGAAAAAAGAGACAACATATCCACATATCCACAGTGACCCAGT
+TACTAGTTACTGAAGATGATCTTTAAAATAGCTGTAATTCGGCCGGGCGCAGTGGCTCAC
+GCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCTGAGGTCAGGAGTTCGAGACCAGCCTA
+GCCAACATGGTGAAATCCCGTCTCTAAAAATACAAAAATTAGCTGGGCGTGGTGGGGCGT
+GCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTGAACCTAGGAAGCA
+GAGGTTGCAGTGAGCCTAGGGTGTGGTGGGGCGTGCCTGTAATCCCAGCTACTTGGGAGG
+CTGAGGCAGGAGAATCACTTGAACCTAGGAAGCAGAGATTGCACCATTGCACTCCAGCCT
+GGGCGACAAATTGCGACTGTATCAAAAAAAAAAAAAAAAAAAGAGGTAACTCTTGGAAAG
+GAGAAAATATCTGCAGCACATGAATCTAAAAAAAGACTCATCAAAATCTACAAAGAACTC
+CTATAATTTAATGAAACAAAAATCTCCACAGAAAAATTTGCAAAATAACCCTTAAATACT
+TCACGAGATAGCCGAATGTCATAAACATACAAAAAAGAGTTAACCACCAAAAAAGAGCTA
+AAATATCAGAGAAATGCAAACAAAACCCACAATGCATACCACTTCATACCCACTAGAGAG
+GCTAAAAATGATACCAAGTGTTGTTGAGAATATGCACTGGTCTGAACTCTAATACACTGC
+TAATGGAAATATTTTACCACTTTGGAAAACAGTTTGGCACATCTACTAAAATGGAATCTA
+TGCACAGACTGACACAGTCATTCTGCTACTAAATATATACCCAGCAGGACAATGTGGCTC
+ACGTCTATAATCAGCCCAGGCTGGTCTTGAACTCCTGAGCTCAAAGTGATCCTCTCACCT
+TGGCTTCCTAAAGTGTTGGGATTACAGGCATGAGCCACCACATCTGGCCACTTTTCTTTA
+TGTATATTATACATCAATAAAAGTAAAGAATAAAAAGGGAGAAGAGTCTGAAAAGAACAA
+TTATCACAAAATTAAGGAACGTACTCAACTCTTAAAGATTTGAGAAGGCGTTTCTAAGGT
+ACTAGCAATGTTCCATTTTGCAATCTGCACAGTGATGACAAGTATTTAACTTATTCTGTA
+AACAGTACCTTTTTTTTTTTTTTTTTTGAGACAGAGTCTTGCTCTGTCGCCCAGGATAGA
+GCACAGTGGCGCGATCTCAGCTCACGGCAGCCTCTGCCTCCTGGGGTGACGTGATTCTCA
+TGCCTCAGCCTCTGGAGTAGCTAGGATTACAGGCAGGCGCCACCACGCCCAGCTAATTTT
+TGTATTTTTAGTAGAGACAAGATTTTGCCATGTTGGCCAGGCTGGTCTTGAACTCCTGAC
+CTCAAGTGATCTGCCCGCCTTGGCCTTCCAAAGTGCTAGGATTACAGGCATGAGCCACTG
+TGCCCAGCCACAAACAGTACTTATATCTTACCTATGTTTAAATGGATATTCAAAATAAGA
+TTAAAAGAGGTAAGAAAATACAATACTCTAGTAAATGTAAAAATAAGTGAGAAGAATAAA
+TGCCAAGAGAAATATAATTACCAAGAAGAAATTCAAAGTAGGAACAGTCGTATAACCATT
+TTTAAAATAATTTTGAATCAGCTTAACATGTTTTCAACAAAAAACCAAACACATACCAAA
+ACCAAAAAACCACCCAGGTCCAGACTTGTACAGGCAAGTTCTACCAAACTTTGAAGGAAT
+CATTCTAATTTTACAGAAATAAAAACAGAAGAATAGGAAAAGAAACACTTGCCAATTCAT
+TTTGTAAGGGTAGTCTAATTTTCATACCAAACCACATGAGAAAAACCCTATTATTTATTA
+ACAAGCCAAATCCAGCAATATATAATAATAATACATCATGACCAAGTTGGGTTTAATCCC
+AGGAATGCAAAATTGATTCAATATTAGAAAATCTATTAGGGTAACATGAAACTTAAAGAA
+AAAAAACAAGAAAATTTCTATACATGCAATTAAAAAGCATCTGTAGAAATTCAATCATTC
+AGGATTCTTTTTTTCTTTAAGTTTAAAAAAACTTGACAAAGAAAGCTTTATGAAGCCAGG
+TGCTGTGGCATGTGCCTGTAATCCCAGCCATTCAGGAGGGTAAAGTGGGATGATCACTTG
+AGCCCAGGAGTTTGAGACCAGTCTGAGCAACATAGCAAGACCCTGTTTCCGGGGAGGGGG
+TGGGTGGGAAGTAAGCCTTATGAAAAACCAGTAGCCCCTATCACTCAATAAAAGCTTTCC
+TTTTAACATCAGAACAAGTCAGGGATGCCCTCTGTATCACCACTGCTCTTCAACTTTGTA
+ATGGAGAGTCCGGCCAGCACAGTTAAGACAAGATAAAATATATCTAAGTCATGAAAAGAA
+ACAAAATATCATGATTTTCAGATGATATGTTTGTCCTTAAAAAATTTAAAGAGGGATTAC
+TGGAATAAGAGTTTGTTGGACAATCTTTTTATTCCAGTAATTCATCAGTTTACAAAAGTC
+AAATGCATTTCTATAAACCATCTACAGATATCTCTTAGAATATAGCCGGGCATGGTGGTG
+GGCGCCTGTAATCCCAGCTACTAGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCAGGAG
+GCGGAGCTTGCAATGAGCTGAGATCGCCACTGCACTCCAGCCTGGGCCACAGAGCGAGAC
+TCTGTCTCAAAAAAAAAAAAAAAAAAAAAAGATATCACTTAGAATAGCAACAACAAACAT
+AAAGTATCAAGGAATCAATTAAACACATGATGTGCAATAATAATAACACAAAACTTTACT
+GGGAAAACAACTTAGGAAAACAATGTGGAATACCCGGTGAATGTACATATGCCCTACCAT
+CCAGCAATTCCACTACTGGGGAATCTGTCTAAAGATCTGTGAAACTCTTGTGTATCAGGA
+GAAATGTGCAAGGTTACTTGCAGGAACACTGTAATAACAACAAACCTAATGTCCATTAAT
+ACTAGAATGGATAAATTGTGGTATATTTATACAATGGTATATAAAGCAGTGAAAACAAGT
+AACTATACTATACTCAAAAACATAATGATGAATGAAAAAAGCTAGTTGAAGAATACACAC
+AGTATGATTTCACTTATATGAAATTCAAAACCAGGAAATACTAAGTACTGTATTGTTTAA
+TAGAAAGATAATAGTCCAGCCCGGGTGCGGCGGCTCAAGCCTGTAATCCCAGCACTTTGG
+GAGGCTGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGACCAACATGG
+AGAAACCCCATCTCTACTAAAAATACAAAACTACCTGGGCGTAGTGGCACATGCCTGTAA
+TTCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCGCTTGAATCTGGGAGGTGGAGGTTGC
+AGTGAGTCAAGATTGCACCATTGCACTCCAGCCTGGGCAACAAGAGCAAAACTTCATCTC
+AAAAAAACAAAAAAAAAAACAACAGAAAAACAAGAAAAAAACAAGAATAAACACATCGAG
+CCGTGATCATGCCACTGCACTCAAGCCTGAGTGAGAGAGTGACACCCTGTCTCAAAGAAA
+ACAAAAGACAAAAATACATAACAGGCTGGGCTCAGTGGCTCACACCTATAATCCCAGGAT
+TTTGGGAGGTAGAGGTGGGTGGATCACTTGAGGTCAGGAGTTAGAGACCACCCTGGCCAA
+GGTGATGAAACCCTGTCTCTACTAAAAATACAAAAATTAGCCAGATGTGGTAGCGCACAC
+CTGTAATCCCAGCTACTTGGGAGGCTGAGGCTGGAGAACTGCTTGAACCCGGGAGGTGGA
+GGTTGGATTCAAGTGACAGCGCCACTGCAGTCCAGCCTGGGCGACAGTGAGACTTTGTAT
+CAAAACAACAAAACACATAACAGTCATTAATTACCACACATAAAGGTGCTTAATGGATAA
+TAAGTGCTCAAGGAAATGGCAGTCATGGTGGTGGTTGCAGAGAACAACTGGAGGGGATTA
+AGAGTGGTGGTAGAAACCAAGCAGAAACCAGTCTAAGAAAACCAAAAAAATACCAGGCAA
+ATAGTTTCAAAGGAAAATTATACCATTTTAAGATAAGGTAGACTGGTGGCTGGCGCCTGT
+AATCCCAGCACTTTGGGAGGCCGAGGTGGGTAGATCACCTGAGGTCAGGAGTTTGAGACC
+AGTCTGGCCAACATAGTGAAACCCCATCTCTACTAAAAATATAAAAATTAGTTACACGTG
+GTGGTGTGCACCTGTAGTCCCAACTTCTTGGGAGGCTGAGGCAGGAGAATCACTTGAACC
+TGGGAGGAAGAGGTTGCAGTCAGCCGAGATCATGCCACTGCATTCCAGCCTGGGTGACAG
+AGCAAGACTCTGTGTCAAAAAAAAAAAAAAAAAGACAAAGTAGTCTTAATGCTATTTCAG
+AACACAGAAAAGGAAAGAAAACTTCCACACTCATTTTATGAAGCAAGTGTAATGGTAACC
+CCAACCTGACAAAGACTGCACAAAAGATACTACAGTTGAATACTACTTACGAATATCAAC
+ACAAAATCTCTGAATATTAGCAAACAGAATCCAATGGCACATTATAAAAAGGAATACAAA
+TAAACAAGCACAGTTTATTCCAGGTATGAGAGATTCCATATAAGGAAATCTATTGACATG
+ATCTTCACATTAACAATGTTAAATGTGATGAAATCATATCATCTTCACGGAGGCAGTAAA
+GACATCTGACAGCTGGGTGCGGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCAA
+GGCGGGTGGATTGCCTGAGCTCAGGAGTTCACGACCAGCATGGGCAACAGGGTGAAACCC
+TGTCTCTACTAAAATACAAAAAATTACCTGGGGGTGGCAATACGCGCTTGTAGTCCCAGC
+TACTTGGGAGGCTGAGGCAGGAGAATTGCTTGAAACCAGGAGGTGGAGGTTGCAGTGAGC
+TGAGATGGCACCACTGCACTCCAGCCTGAGAGACAGAGCAAGACTCCATCTCAAAAAACA
+AACAAACAAAAAGAAACAAAAAACTAAAATAGGAAATGATGGATACTGCCTTAAGTAAAC
+AAAATACACCTCGGTCCAAAGCCAGCATCTGACTTCATGTGAAAACATTAGAGACTTCCC
+TGCTAACATCACACACCACATAAGGCTAAAATATTTTCCAATTTACATTTTTAGAGGTAA
+GCATTAAATTCTATGATGCCAGACAAACATTTACCAGATTTTTTTTTTTGAGATAGAGTT
+TCACTCTTGTTGCTCAGGCTGGAGTGTAATGGCGCACAATCTCGGCTCGCCACAAGCTCC
+GCCTCCCAGGTTCAAGCGACTCTCCTGCCTCGGCCTCCCGAGTAGCTGGGATTACAGGCA
+TGTGCCACCACACCCAGCTAATTTTGTATTTTTTGTAGAGACGGGGTTTCTCCATGTTGG
+TCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCTGCCTCGGCCTCCCAAAGTGC
+TGGGATTACAGGTGTGAGCCACCGCGTCTGGCCAAAATTTACCAGTTCTTGAATGCTGAA
+TCCTGCTGGGATAACTGGAAACACTCCTAATGTTACTATAAACTCTCACACTGCCTTGTT
+GGCAATAATTAAATAGCTCTTTCTAGCAGTTGTGTTCAAGTGGACAGATAATCACAAATG
+TCTTTTCTTTCTAGCTCCCTTTGGACGAAAGAAAGAAAGAATTTACAACACGTCTAAAGA
+TTGTCACTTTTATCATCCCTTTGAATGATAAGTGATCAGGGCTGTCCCAAAAGAATGCCT
+GAAAACATTACAAAATGTTATCATAAGCAGAATGGCTCACTAAGGGGATGAACATTCTGG
+CAAGTCTTAATCTAGCATCACTACCAACTGTTTTTTCACTGGTCTAAAACATAACATCTA
+ATATTTTTATTTGCCTATATAGCCCTGCAGTAATAATTAACTTGTGAAAGGTCTACACTG
+ATTTTTTTCTTTATAAAGTATTTGCTTCAACCATGGAACAGAAACAAAGTTGATTTAATG
+GGAAAAATGTAAATTTATTTGAGGATAGGTTCAGCACTGCTAAATTGAAAAGGGAAAGAA
+AATTTCAAACAAACCCCAAAAAGAAACATTTTCACATCTGTAGTGAAAAAGCTAAATAGC
+AGTACACTAAACTTGGTTACTGACAAAATAAATCCTTGTGGCCAAGGTGAGGCAAAGATA
+TTACTCTCTACTTTGCTAGCCTACCTTGTGTTCAGTGAATTCAAACATTAATTGGGGCAA
+TGTATTGTTTTTGCTTCATTTTCTTTAGCTGTGCAGTAAAGTCTTTGCTTTCATAGAAAG
+ATTGTGCCAAGTTCTTTCTTTTTCAGAATTGCTTTTAAATCAGTTCTATTTAAGTATTTC
+TCTCTCTCTCAAACTTGACATTTTTCATTACACCTTTTTTTTCCAATGCCAAAATAGCAG
+CATTAACATTCATTCCAGTCACTAGGTAAAAACTCTCTTCAGTCAAACACACATGAATAA
+TCTAATTCGTGAGGTAAACGACTACTGTCATACAGTATTACGAAGTTCCAACCTTACTTG
+CAGAGCAAAAAATAAAAAATAAAAATAGTCATTTTCCTCAAGGGTTTAATTGTGGGCTTT
+TAATGTTGAAGTAAATCAATTTACTAGTTTTTAACCTCAAGTTTTTCCCTATTTCATTAT
+ATCTCAATAATGATTATCCAGAGATTTCTGAGGTAAATTACATTAAAGACAGCGCTAGTA
+ACTGGGAAACAATCCACACATGAATCTTAATATATTGCATATATGCTAATTTCATGTAAT
+GTTCCTGAGGGCGGTGACAATATTAGAGCTGTACGAGACCCTAGAGACCATCCCAATCCC
+TTAACTCCACAGATGAGAAACACAAAAACTTGCAGAAGCAGAGTGACGAGCACATGCCCA
+TGTGACATGTCATCAGAAGCAGGGACCAAGACTCAGGTCTCTTGCCTTCCAGGCCAGTCC
+TCTTTTGGTAGGCTAAAAGCTGCCTTTCACAGCTTCCCACAAATTTCAACACATCCTCAC
+TCAAGTACGTTTCTGAAAGGGACTCACCGACAACTCTTTAGAGAAAAGGAACTATATCTC
+AAACCATAGGAAAAGTTCTATATCTAGCTTATCAAACTTTTAGTCTCTGAAGCATGTGAG
+AGTAAGTTTGCCAGTATTTCTTTCTCATTATCCCCCAATACTTTAGTGACTATTTCCAAC
+AAACAAAACTAGACTATTCTCCCATACAAACAAAATGCTACCATCAAAATCGTCAACGCT
+CACCTGTCAGATTATGACCAAAGGCCTCTAAGTCAGAATCCCGCCCAGGCAAAATGACAT
+GGCAGCCAACTGTGGAGCCTCGGTTGGCCTCGGATAGCTGGTCCCCCCTTCATTCACACC
+ACATATCTGAGGACCAGATATGGAAAGCCTCTCATGCTGGAAAACTTGGCACAGTAGAAA
+GGCGGCCACACTTTTGCCCGTCACACAATGCACATTCACGGGGGACCTGTGCTAAACCAT
+TCACAGACAACCTGTTTCTGGGTCAGGGTTTCGTATACAGCAGAGCAGTCAGCTATCGAC
+ACAAGGGTTTGTAACAAAAAACAAAAAACAACAAAAAAAAGGAAGGACAAACCAAAACAA
+AAAAGAAAATACCAAAAAAAAGAAGAGTTAAAAAAAAAAGAAGATAAAAATAAAATCATT
+AACACAGATACACGACTACAATCTAATTTTCAGACATTCTAGTTTTGCCAATCATGCCGA
+TTCAGAATCATGAACCACATGGAGTTGCCATGTTTCTGCACACTTCTTTAGTGTGGAACA
+ATTCCTCAGTGTTTTCTTGGTTCCATGATCTTGACTTAATTTTGAGGTTTACAGGCCAGT
+TATTTTGCAGAATATCCCTAAACTCGGGTTACCTTGATGTTGCCTCCTGAAGATTTGGAT
+TATGCATGAAACACTTGACTGTCCCTATTCTAACTATATAAAGATGCTCCTTGACTTACT
+ATGGACTTACATCCCAATAAACCCTTCATAAATGGAAAACCTCATAAGGTGAAATGCTTG
+TTTTTTTTGGGAAAGTTTCTTGCTCTGTTGCCAGGCGGGAATGCGGTGACACGACGACGG
+CTCACTGTAGCCTTGACCACTCAGGTTCAAGCGATCCTCAGCCCCAGGCTCCCAAGTAGC
+TGGGACTATAGGCACGTACCACCACACCTGGCTAATTTTTTTTTTTTTTAATTTTTAGTA
+CCTTTAGTACAGAAAAGGCCTCACTATGTTGCCTAGATTGGTCTCAAACTCCTAAGCTCA
+AGGAATCCTCATGCCTCGGTCTCCCAAAGTCCTGGGATTATAGGCATGAGTCCTGCACCC
+AGCCTAGCTGAAAATACATTTAATACACCTAACTTACCAAACATCACAGCTTAGCTTAGC
+CTACCTTAAACATGTTCAGAGCACTTACATTAGCCTACAGTTGGGCAAAATCATCTAACG
+CAAAGCCCATTTTATAATAAAAGTGTTGAGTAACTCATATGATGTATGAAATACTATACT
+GAAAATGAAAAACAGAACAGTTGCGTGGGTACTCGGAAGTAAGGTTTCTACTGCATGTGT
+ATCACTTCCATACCATTGTAAAGTCATCTGACCACCAAGCCAAATATTTGTACTTCTGCA
+CATGCTGCTTTACATAGAGACCGAGACAAAGGGGGATTCACATCTTGTCAACCCTGTTTA
+CTTCAAAGGAATAACAGAGCATTAACTAGGTACCAAGTGCTCAGGACTTCAAGTATACTG
+TTTTATTTAATACTCATGACAATCCTTTGACCCCTTACTTGTGACTGGCAAGTTACCCAA
+CTTTTGTTTCATTTTCCTCATCTGTAAAAACCGGAGAGTGATACACACTCCCTGGAGAAT
+GACAGGGAAAACAGTACCAAGAACTTTAACCTAACAAGTCAGAAATCAATGAGACCCCTT
+TTGACAATTCCCACCTTGCCCTTTAGACATTCCGAGATATGCAGCTTTATTTAATAAATG
+ACCCCACCTTGGGCCTCAGCTGACTAGACCAGAGGCAGACACTCTCCCAGGCTGGAAAGG
+CCAGGTGGAGTGAGCTGTTCTGTGCAGAATGATCTCCCTAACAGTGTTTTTACACATTCT
+GTTCCACTTGGAATTGGATTCAGAGTGTTCTCAGCCTCTGGTCATCAAAAAACCTGAAGA
+CACATACCTTCAGGGAGCTAGGGTCAGCTCTCCTCTGCTGAGTGCATGGGGAAGTGACAA
+AAGAGAAAAGAGGGGGGAAAATCCAGCTGCTGAGAGATGTAGGGTACAAGAAGCTTTTGT
+AACTGTAAAGAGGGAGGGAGGTAGCTACGTTGGTCTATTTTCCAGTTCCTGGTTCCAACT
+ATACCTAAAGCTTTAACTGCACTTTCTATCCTTGCAGTAATATGACAAAGGCCCCC
diff --git a/test/tiny/q.fa.fai b/test/tiny/q.fa.fai
new file mode 100644
index 0000000..dd7673b
--- /dev/null
+++ b/test/tiny/q.fa.fai
@@ -0,0 +1 @@
+q	12356	3	60	61
diff --git a/test/tiny/q.regions b/test/tiny/q.regions
new file mode 100644
index 0000000..d129bcc
--- /dev/null
+++ b/test/tiny/q.regions
@@ -0,0 +1,3 @@
+q:0-5000
+q:5000-10000
+q:10000-12356
diff --git a/test/tiny/q.vcf.gz b/test/tiny/q.vcf.gz
new file mode 100644
index 0000000..87d557e
Binary files /dev/null and b/test/tiny/q.vcf.gz differ
diff --git a/test/tiny/q.vcf.gz.tbi b/test/tiny/q.vcf.gz.tbi
new file mode 100644
index 0000000..0781014
Binary files /dev/null and b/test/tiny/q.vcf.gz.tbi differ
diff --git a/test/tiny/q_spiked.vcf.gz b/test/tiny/q_spiked.vcf.gz
new file mode 100644
index 0000000..16f3ac4
Binary files /dev/null and b/test/tiny/q_spiked.vcf.gz differ
diff --git a/test/tiny/q_spiked.vcf.gz.tbi b/test/tiny/q_spiked.vcf.gz.tbi
new file mode 100644
index 0000000..0f0f0da
Binary files /dev/null and b/test/tiny/q_spiked.vcf.gz.tbi differ

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/freebayes.git



More information about the debian-med-commit mailing list